diff options
Diffstat (limited to 'kernel')
-rw-r--r-- | kernel/Kconfig.locks | 2 | ||||
-rw-r--r-- | kernel/auditsc.c | 27 | ||||
-rw-r--r-- | kernel/compat.c | 47 | ||||
-rw-r--r-- | kernel/exit.c | 110 | ||||
-rw-r--r-- | kernel/extable.c | 10 | ||||
-rw-r--r-- | kernel/pm_qos_params.c | 2 | ||||
-rw-r--r-- | kernel/ptrace.c | 118 | ||||
-rw-r--r-- | kernel/rcutiny.c | 1 | ||||
-rw-r--r-- | kernel/rcutree.c | 1 | ||||
-rw-r--r-- | kernel/sched.c | 2 | ||||
-rw-r--r-- | kernel/signal.c | 678 | ||||
-rw-r--r-- | kernel/sys_ni.c | 9 | ||||
-rw-r--r-- | kernel/sysctl.c | 12 | ||||
-rw-r--r-- | kernel/watchdog.c | 52 |
14 files changed, 709 insertions, 362 deletions
diff --git a/kernel/Kconfig.locks b/kernel/Kconfig.locks index 88c92fb4461..5068e2a4e75 100644 --- a/kernel/Kconfig.locks +++ b/kernel/Kconfig.locks @@ -199,4 +199,4 @@ config INLINE_WRITE_UNLOCK_IRQRESTORE def_bool !DEBUG_SPINLOCK && ARCH_INLINE_WRITE_UNLOCK_IRQRESTORE config MUTEX_SPIN_ON_OWNER - def_bool SMP && !DEBUG_MUTEXES && !HAVE_DEFAULT_NO_SPIN_MUTEXES + def_bool SMP && !DEBUG_MUTEXES diff --git a/kernel/auditsc.c b/kernel/auditsc.c index b33513a08be..00d79df03e7 100644 --- a/kernel/auditsc.c +++ b/kernel/auditsc.c @@ -443,17 +443,25 @@ static int match_tree_refs(struct audit_context *ctx, struct audit_tree *tree) /* Determine if any context name data matches a rule's watch data */ /* Compare a task_struct with an audit_rule. Return 1 on match, 0 - * otherwise. */ + * otherwise. + * + * If task_creation is true, this is an explicit indication that we are + * filtering a task rule at task creation time. This and tsk == current are + * the only situations where tsk->cred may be accessed without an rcu read lock. + */ static int audit_filter_rules(struct task_struct *tsk, struct audit_krule *rule, struct audit_context *ctx, struct audit_names *name, - enum audit_state *state) + enum audit_state *state, + bool task_creation) { - const struct cred *cred = get_task_cred(tsk); + const struct cred *cred; int i, j, need_sid = 1; u32 sid; + cred = rcu_dereference_check(tsk->cred, tsk == current || task_creation); + for (i = 0; i < rule->field_count; i++) { struct audit_field *f = &rule->fields[i]; int result = 0; @@ -637,10 +645,8 @@ static int audit_filter_rules(struct task_struct *tsk, break; } - if (!result) { - put_cred(cred); + if (!result) return 0; - } } if (ctx) { @@ -656,7 +662,6 @@ static int audit_filter_rules(struct task_struct *tsk, case AUDIT_NEVER: *state = AUDIT_DISABLED; break; case AUDIT_ALWAYS: *state = AUDIT_RECORD_CONTEXT; break; } - put_cred(cred); return 1; } @@ -671,7 +676,8 @@ static enum audit_state audit_filter_task(struct task_struct *tsk, char **key) rcu_read_lock(); list_for_each_entry_rcu(e, &audit_filter_list[AUDIT_FILTER_TASK], list) { - if (audit_filter_rules(tsk, &e->rule, NULL, NULL, &state)) { + if (audit_filter_rules(tsk, &e->rule, NULL, NULL, + &state, true)) { if (state == AUDIT_RECORD_CONTEXT) *key = kstrdup(e->rule.filterkey, GFP_ATOMIC); rcu_read_unlock(); @@ -705,7 +711,7 @@ static enum audit_state audit_filter_syscall(struct task_struct *tsk, list_for_each_entry_rcu(e, list, list) { if ((e->rule.mask[word] & bit) == bit && audit_filter_rules(tsk, &e->rule, ctx, NULL, - &state)) { + &state, false)) { rcu_read_unlock(); ctx->current_state = state; return state; @@ -743,7 +749,8 @@ void audit_filter_inodes(struct task_struct *tsk, struct audit_context *ctx) list_for_each_entry_rcu(e, list, list) { if ((e->rule.mask[word] & bit) == bit && - audit_filter_rules(tsk, &e->rule, ctx, n, &state)) { + audit_filter_rules(tsk, &e->rule, ctx, n, + &state, false)) { rcu_read_unlock(); ctx->current_state = state; return; diff --git a/kernel/compat.c b/kernel/compat.c index 38b1d2c1cbe..9214dcd087b 100644 --- a/kernel/compat.c +++ b/kernel/compat.c @@ -890,10 +890,9 @@ compat_sys_rt_sigtimedwait (compat_sigset_t __user *uthese, { compat_sigset_t s32; sigset_t s; - int sig; struct timespec t; siginfo_t info; - long ret, timeout = 0; + long ret; if (sigsetsize != sizeof(sigset_t)) return -EINVAL; @@ -901,51 +900,19 @@ compat_sys_rt_sigtimedwait (compat_sigset_t __user *uthese, if (copy_from_user(&s32, uthese, sizeof(compat_sigset_t))) return -EFAULT; sigset_from_compat(&s, &s32); - sigdelsetmask(&s,sigmask(SIGKILL)|sigmask(SIGSTOP)); - signotset(&s); if (uts) { - if (get_compat_timespec (&t, uts)) + if (get_compat_timespec(&t, uts)) return -EFAULT; - if (t.tv_nsec >= 1000000000L || t.tv_nsec < 0 - || t.tv_sec < 0) - return -EINVAL; } - spin_lock_irq(¤t->sighand->siglock); - sig = dequeue_signal(current, &s, &info); - if (!sig) { - timeout = MAX_SCHEDULE_TIMEOUT; - if (uts) - timeout = timespec_to_jiffies(&t) - +(t.tv_sec || t.tv_nsec); - if (timeout) { - current->real_blocked = current->blocked; - sigandsets(¤t->blocked, ¤t->blocked, &s); - - recalc_sigpending(); - spin_unlock_irq(¤t->sighand->siglock); - - timeout = schedule_timeout_interruptible(timeout); - - spin_lock_irq(¤t->sighand->siglock); - sig = dequeue_signal(current, &s, &info); - current->blocked = current->real_blocked; - siginitset(¤t->real_blocked, 0); - recalc_sigpending(); - } - } - spin_unlock_irq(¤t->sighand->siglock); + ret = do_sigtimedwait(&s, &info, uts ? &t : NULL); - if (sig) { - ret = sig; - if (uinfo) { - if (copy_siginfo_to_user32(uinfo, &info)) - ret = -EFAULT; - } - }else { - ret = timeout?-EINTR:-EAGAIN; + if (ret > 0 && uinfo) { + if (copy_siginfo_to_user32(uinfo, &info)) + ret = -EFAULT; } + return ret; } diff --git a/kernel/exit.c b/kernel/exit.c index 8dd87418154..20a40647152 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -1377,11 +1377,23 @@ static int *task_stopped_code(struct task_struct *p, bool ptrace) return NULL; } -/* - * Handle sys_wait4 work for one task in state TASK_STOPPED. We hold - * read_lock(&tasklist_lock) on entry. If we return zero, we still hold - * the lock and this task is uninteresting. If we return nonzero, we have - * released the lock and the system call should return. +/** + * wait_task_stopped - Wait for %TASK_STOPPED or %TASK_TRACED + * @wo: wait options + * @ptrace: is the wait for ptrace + * @p: task to wait for + * + * Handle sys_wait4() work for %p in state %TASK_STOPPED or %TASK_TRACED. + * + * CONTEXT: + * read_lock(&tasklist_lock), which is released if return value is + * non-zero. Also, grabs and releases @p->sighand->siglock. + * + * RETURNS: + * 0 if wait condition didn't exist and search for other wait conditions + * should continue. Non-zero return, -errno on failure and @p's pid on + * success, implies that tasklist_lock is released and wait condition + * search should terminate. */ static int wait_task_stopped(struct wait_opts *wo, int ptrace, struct task_struct *p) @@ -1397,6 +1409,9 @@ static int wait_task_stopped(struct wait_opts *wo, if (!ptrace && !(wo->wo_flags & WUNTRACED)) return 0; + if (!task_stopped_code(p, ptrace)) + return 0; + exit_code = 0; spin_lock_irq(&p->sighand->siglock); @@ -1538,33 +1553,84 @@ static int wait_consider_task(struct wait_opts *wo, int ptrace, return 0; } - if (likely(!ptrace) && unlikely(task_ptrace(p))) { + /* dead body doesn't have much to contribute */ + if (p->exit_state == EXIT_DEAD) + return 0; + + /* slay zombie? */ + if (p->exit_state == EXIT_ZOMBIE) { + /* + * A zombie ptracee is only visible to its ptracer. + * Notification and reaping will be cascaded to the real + * parent when the ptracer detaches. + */ + if (likely(!ptrace) && unlikely(task_ptrace(p))) { + /* it will become visible, clear notask_error */ + wo->notask_error = 0; + return 0; + } + + /* we don't reap group leaders with subthreads */ + if (!delay_group_leader(p)) + return wait_task_zombie(wo, p); + /* - * This child is hidden by ptrace. - * We aren't allowed to see it now, but eventually we will. + * Allow access to stopped/continued state via zombie by + * falling through. Clearing of notask_error is complex. + * + * When !@ptrace: + * + * If WEXITED is set, notask_error should naturally be + * cleared. If not, subset of WSTOPPED|WCONTINUED is set, + * so, if there are live subthreads, there are events to + * wait for. If all subthreads are dead, it's still safe + * to clear - this function will be called again in finite + * amount time once all the subthreads are released and + * will then return without clearing. + * + * When @ptrace: + * + * Stopped state is per-task and thus can't change once the + * target task dies. Only continued and exited can happen. + * Clear notask_error if WCONTINUED | WEXITED. + */ + if (likely(!ptrace) || (wo->wo_flags & (WCONTINUED | WEXITED))) + wo->notask_error = 0; + } else { + /* + * If @p is ptraced by a task in its real parent's group, + * hide group stop/continued state when looking at @p as + * the real parent; otherwise, a single stop can be + * reported twice as group and ptrace stops. + * + * If a ptracer wants to distinguish the two events for its + * own children, it should create a separate process which + * takes the role of real parent. + */ + if (likely(!ptrace) && task_ptrace(p) && + same_thread_group(p->parent, p->real_parent)) + return 0; + + /* + * @p is alive and it's gonna stop, continue or exit, so + * there always is something to wait for. */ wo->notask_error = 0; - return 0; } - if (p->exit_state == EXIT_DEAD) - return 0; - /* - * We don't reap group leaders with subthreads. + * Wait for stopped. Depending on @ptrace, different stopped state + * is used and the two don't interact with each other. */ - if (p->exit_state == EXIT_ZOMBIE && !delay_group_leader(p)) - return wait_task_zombie(wo, p); + ret = wait_task_stopped(wo, ptrace, p); + if (ret) + return ret; /* - * It's stopped or running now, so it might - * later continue, exit, or stop again. + * Wait for continued. There's only one continued state and the + * ptracer can consume it which can confuse the real parent. Don't + * use WCONTINUED from ptracer. You don't need or want it. */ - wo->notask_error = 0; - - if (task_stopped_code(p, ptrace)) - return wait_task_stopped(wo, ptrace, p); - return wait_task_continued(wo, p); } diff --git a/kernel/extable.c b/kernel/extable.c index c2d625fcda7..5339705b824 100644 --- a/kernel/extable.c +++ b/kernel/extable.c @@ -72,6 +72,16 @@ int core_kernel_text(unsigned long addr) return 0; } +/** + * core_kernel_data - tell if addr points to kernel data + * @addr: address to test + * + * Returns true if @addr passed in is from the core kernel data + * section. + * + * Note: On some archs it may return true for core RODATA, and false + * for others. But will always be true for core RW data. + */ int core_kernel_data(unsigned long addr) { if (addr >= (unsigned long)_sdata && diff --git a/kernel/pm_qos_params.c b/kernel/pm_qos_params.c index 0da058bff8e..beb184689af 100644 --- a/kernel/pm_qos_params.c +++ b/kernel/pm_qos_params.c @@ -385,7 +385,7 @@ static ssize_t pm_qos_power_read(struct file *filp, char __user *buf, s32 value; unsigned long flags; struct pm_qos_object *o; - struct pm_qos_request_list *pm_qos_req = filp->private_data;; + struct pm_qos_request_list *pm_qos_req = filp->private_data; if (!pm_qos_req) return -EINVAL; diff --git a/kernel/ptrace.c b/kernel/ptrace.c index dc7ab65f3b3..7a81fc07134 100644 --- a/kernel/ptrace.c +++ b/kernel/ptrace.c @@ -38,35 +38,33 @@ void __ptrace_link(struct task_struct *child, struct task_struct *new_parent) child->parent = new_parent; } -/* - * Turn a tracing stop into a normal stop now, since with no tracer there - * would be no way to wake it up with SIGCONT or SIGKILL. If there was a - * signal sent that would resume the child, but didn't because it was in - * TASK_TRACED, resume it now. - * Requires that irqs be disabled. - */ -static void ptrace_untrace(struct task_struct *child) -{ - spin_lock(&child->sighand->siglock); - if (task_is_traced(child)) { - /* - * If the group stop is completed or in progress, - * this thread was already counted as stopped. - */ - if (child->signal->flags & SIGNAL_STOP_STOPPED || - child->signal->group_stop_count) - __set_task_state(child, TASK_STOPPED); - else - signal_wake_up(child, 1); - } - spin_unlock(&child->sighand->siglock); -} - -/* - * unptrace a task: move it back to its original parent and - * remove it from the ptrace list. +/** + * __ptrace_unlink - unlink ptracee and restore its execution state + * @child: ptracee to be unlinked * - * Must be called with the tasklist lock write-held. + * Remove @child from the ptrace list, move it back to the original parent, + * and restore the execution state so that it conforms to the group stop + * state. + * + * Unlinking can happen via two paths - explicit PTRACE_DETACH or ptracer + * exiting. For PTRACE_DETACH, unless the ptracee has been killed between + * ptrace_check_attach() and here, it's guaranteed to be in TASK_TRACED. + * If the ptracer is exiting, the ptracee can be in any state. + * + * After detach, the ptracee should be in a state which conforms to the + * group stop. If the group is stopped or in the process of stopping, the + * ptracee should be put into TASK_STOPPED; otherwise, it should be woken + * up from TASK_TRACED. + * + * If the ptracee is in TASK_TRACED and needs to be moved to TASK_STOPPED, + * it goes through TRACED -> RUNNING -> STOPPED transition which is similar + * to but in the opposite direction of what happens while attaching to a + * stopped task. However, in this direction, the intermediate RUNNING + * state is not hidden even from the current ptracer and if it immediately + * re-attaches and performs a WNOHANG wait(2), it may fail. + * + * CONTEXT: + * write_lock_irq(tasklist_lock) */ void __ptrace_unlink(struct task_struct *child) { @@ -76,8 +74,27 @@ void __ptrace_unlink(struct task_struct *child) child->parent = child->real_parent; list_del_init(&child->ptrace_entry); - if (task_is_traced(child)) - ptrace_untrace(child); + spin_lock(&child->sighand->siglock); + + /* + * Reinstate GROUP_STOP_PENDING if group stop is in effect and + * @child isn't dead. + */ + if (!(child->flags & PF_EXITING) && + (child->signal->flags & SIGNAL_STOP_STOPPED || + child->signal->group_stop_count)) + child->group_stop |= GROUP_STOP_PENDING; + + /* + * If transition to TASK_STOPPED is pending or in TASK_TRACED, kick + * @child in the butt. Note that @resume should be used iff @child + * is in TASK_TRACED; otherwise, we might unduly disrupt + * TASK_KILLABLE sleeps. + */ + if (child->group_stop & GROUP_STOP_PENDING || task_is_traced(child)) + signal_wake_up(child, task_is_traced(child)); + + spin_unlock(&child->sighand->siglock); } /* @@ -96,16 +113,14 @@ int ptrace_check_attach(struct task_struct *child, int kill) */ read_lock(&tasklist_lock); if ((child->ptrace & PT_PTRACED) && child->parent == current) { - ret = 0; /* * child->sighand can't be NULL, release_task() * does ptrace_unlink() before __exit_signal(). */ spin_lock_irq(&child->sighand->siglock); - if (task_is_stopped(child)) - child->state = TASK_TRACED; - else if (!task_is_traced(child) && !kill) - ret = -ESRCH; + WARN_ON_ONCE(task_is_stopped(child)); + if (task_is_traced(child) || kill) + ret = 0; spin_unlock_irq(&child->sighand->siglock); } read_unlock(&tasklist_lock); @@ -169,6 +184,7 @@ bool ptrace_may_access(struct task_struct *task, unsigned int mode) static int ptrace_attach(struct task_struct *task) { + bool wait_trap = false; int retval; audit_ptrace(task); @@ -208,12 +224,42 @@ static int ptrace_attach(struct task_struct *task) __ptrace_link(task, current); send_sig_info(SIGSTOP, SEND_SIG_FORCED, task); + spin_lock(&task->sighand->siglock); + + /* + * If the task is already STOPPED, set GROUP_STOP_PENDING and + * TRAPPING, and kick it so that it transits to TRACED. TRAPPING + * will be cleared if the child completes the transition or any + * event which clears the group stop states happens. We'll wait + * for the transition to complete before returning from this + * function. + * + * This hides STOPPED -> RUNNING -> TRACED transition from the + * attaching thread but a different thread in the same group can + * still observe the transient RUNNING state. IOW, if another + * thread's WNOHANG wait(2) on the stopped tracee races against + * ATTACH, the wait(2) may fail due to the transient RUNNING. + * + * The following task_is_stopped() test is safe as both transitions + * in and out of STOPPED are protected by siglock. + */ + if (task_is_stopped(task)) { + task->group_stop |= GROUP_STOP_PENDING | GROUP_STOP_TRAPPING; + signal_wake_up(task, 1); + wait_trap = true; + } + + spin_unlock(&task->sighand->siglock); + retval = 0; unlock_tasklist: write_unlock_irq(&tasklist_lock); unlock_creds: mutex_unlock(&task->signal->cred_guard_mutex); out: + if (wait_trap) + wait_event(current->signal->wait_chldexit, + !(task->group_stop & GROUP_STOP_TRAPPING)); return retval; } @@ -316,8 +362,6 @@ static int ptrace_detach(struct task_struct *child, unsigned int data) if (child->ptrace) { child->exit_code = data; dead = __ptrace_detach(current, child); - if (!child->exit_state) - wake_up_state(child, TASK_TRACED | TASK_STOPPED); } write_unlock_irq(&tasklist_lock); diff --git a/kernel/rcutiny.c b/kernel/rcutiny.c index 421abfd3641..7bbac7d0f5a 100644 --- a/kernel/rcutiny.c +++ b/kernel/rcutiny.c @@ -35,6 +35,7 @@ #include <linux/init.h> #include <linux/time.h> #include <linux/cpu.h> +#include <linux/prefetch.h> /* Controls for rcu_kthread() kthread, replacing RCU_SOFTIRQ used previously. */ static struct task_struct *rcu_kthread_task; diff --git a/kernel/rcutree.c b/kernel/rcutree.c index e486f7c3ffb..f07d2f03181 100644 --- a/kernel/rcutree.c +++ b/kernel/rcutree.c @@ -49,6 +49,7 @@ #include <linux/kernel_stat.h> #include <linux/wait.h> #include <linux/kthread.h> +#include <linux/prefetch.h> #include "rcutree.h" diff --git a/kernel/sched.c b/kernel/sched.c index c62acf45d3b..0516af41508 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -2564,7 +2564,7 @@ static void ttwu_queue(struct task_struct *p, int cpu) { struct rq *rq = cpu_rq(cpu); -#if defined(CONFIG_SMP) && defined(CONFIG_SCHED_TTWU_QUEUE) +#if defined(CONFIG_SMP) if (sched_feat(TTWU_QUEUE) && cpu != smp_processor_id()) { ttwu_queue_remote(p, cpu); return; diff --git a/kernel/signal.c b/kernel/signal.c index 7165af5f1b1..ad5e818baac 100644 --- a/kernel/signal.c +++ b/kernel/signal.c @@ -124,7 +124,7 @@ static inline int has_pending_signals(sigset_t *signal, sigset_t *blocked) static int recalc_sigpending_tsk(struct task_struct *t) { - if (t->signal->group_stop_count > 0 || + if ((t->group_stop & GROUP_STOP_PENDING) || PENDING(&t->pending, &t->blocked) || PENDING(&t->signal->shared_pending, &t->blocked)) { set_tsk_thread_flag(t, TIF_SIGPENDING); @@ -223,6 +223,83 @@ static inline void print_dropped_signal(int sig) current->comm, current->pid, sig); } +/** + * task_clear_group_stop_trapping - clear group stop trapping bit + * @task: target task + * + * If GROUP_STOP_TRAPPING is set, a ptracer is waiting for us. Clear it + * and wake up the ptracer. Note that we don't need any further locking. + * @task->siglock guarantees that @task->parent points to the ptracer. + * + * CONTEXT: + * Must be called with @task->sighand->siglock held. + */ +static void task_clear_group_stop_trapping(struct task_struct *task) +{ + if (unlikely(task->group_stop & GROUP_STOP_TRAPPING)) { + task->group_stop &= ~GROUP_STOP_TRAPPING; + __wake_up_sync_key(&task->parent->signal->wait_chldexit, + TASK_UNINTERRUPTIBLE, 1, task); + } +} + +/** + * task_clear_group_stop_pending - clear pending group stop + * @task: target task + * + * Clear group stop states for @task. + * + * CONTEXT: + * Must be called with @task->sighand->siglock held. + */ +void task_clear_group_stop_pending(struct task_struct *task) +{ + task->group_stop &= ~(GROUP_STOP_PENDING | GROUP_STOP_CONSUME | + GROUP_STOP_DEQUEUED); +} + +/** + * task_participate_group_stop - participate in a group stop + * @task: task participating in a group stop + * + * @task has GROUP_STOP_PENDING set and is participating in a group stop. + * Group stop states are cleared and the group stop count is consumed if + * %GROUP_STOP_CONSUME was set. If the consumption completes the group + * stop, the appropriate %SIGNAL_* flags are set. + * + * CONTEXT: + * Must be called with @task->sighand->siglock held. + * + * RETURNS: + * %true if group stop completion should be notified to the parent, %false + * otherwise. + */ +static bool task_participate_group_stop(struct task_struct *task) +{ + struct signal_struct *sig = task->signal; + bool consume = task->group_stop & GROUP_STOP_CONSUME; + + WARN_ON_ONCE(!(task->group_stop & GROUP_STOP_PENDING)); + + task_clear_group_stop_pending(task); + + if (!consume) + return false; + + if (!WARN_ON_ONCE(sig->group_stop_count == 0)) + sig->group_stop_count--; + + /* + * Tell the caller to notify completion iff we are entering into a + * fresh group stop. Read comment in do_signal_stop() for details. + */ + if (!sig->group_stop_count && !(sig->flags & SIGNAL_STOP_STOPPED)) { + sig->flags = SIGNAL_STOP_STOPPED; + return true; + } + return false; +} + /* * allocate a new signal queue record * - this may be called without locks if and only if t == current, otherwise an @@ -527,7 +604,7 @@ int dequeue_signal(struct task_struct *tsk, sigset_t *mask, siginfo_t *info) * is to alert stop-signal processing code when another * processor has come along and cleared the flag. */ - tsk->signal->flags |= SIGNAL_STOP_DEQUEUED; + current->group_stop |= GROUP_STOP_DEQUEUED; } if ((info->si_code & __SI_MASK) == __SI_TIMER && info->si_sys_private) { /* @@ -592,7 +669,7 @@ static int rm_from_queue_full(sigset_t *mask, struct sigpending *s) if (sigisemptyset(&m)) return 0; - signandsets(&s->signal, &s->signal, mask); + sigandnsets(&s->signal, &s->signal, mask); list_for_each_entry_safe(q, n, &s->list, list) { if (sigismember(mask, q->info.si_signo)) { list_del_init(&q->list); @@ -727,34 +804,14 @@ static int prepare_signal(int sig, struct task_struct *p, int from_ancestor_ns) } else if (sig == SIGCONT) { unsigned int why; /* - * Remove all stop signals from all queues, - * and wake all threads. + * Remove all stop signals from all queues, wake all threads. */ rm_from_queue(SIG_KERNEL_STOP_MASK, &signal->shared_pending); t = p; do { - unsigned int state; + task_clear_group_stop_pending(t); rm_from_queue(SIG_KERNEL_STOP_MASK, &t->pending); - /* - * If there is a handler for SIGCONT, we must make - * sure that no thread returns to user mode before - * we post the signal, in case it was the only - * thread eligible to run the signal handler--then - * it must not do anything between resuming and - * running the handler. With the TIF_SIGPENDING - * flag set, the thread will pause and acquire the - * siglock that we hold now and until we've queued - * the pending signal. - * - * Wake up the stopped thread _after_ setting - * TIF_SIGPENDING - */ - state = __TASK_STOPPED; - if (sig_user_defined(t, SIGCONT) && !sigismember(&t->blocked, SIGCONT)) { - set_tsk_thread_flag(t, TIF_SIGPENDING); - state |= TASK_INTERRUPTIBLE; - } - wake_up_state(t, state); + wake_up_state(t, __TASK_STOPPED); } while_each_thread(p, t); /* @@ -780,13 +837,6 @@ static int prepare_signal(int sig, struct task_struct *p, int from_ancestor_ns) signal->flags = why | SIGNAL_STOP_CONTINUED; signal->group_stop_count = 0; signal->group_exit_code = 0; - } else { - /* - * We are not stopped, but there could be a stop - * signal in the middle of being processed after - * being removed from the queue. Clear that too. - */ - signal->flags &= ~SIGNAL_STOP_DEQUEUED; } } @@ -875,6 +925,7 @@ static void complete_signal(int sig, struct task_struct *p, int group) signal->group_stop_count = 0; t = p; do { + task_clear_group_stop_pending(t); sigaddset(&t->pending.signal, SIGKILL); signal_wake_up(t, 1); } while_each_thread(p, t); @@ -1109,6 +1160,7 @@ int zap_other_threads(struct task_struct *p) p->signal->group_stop_count = 0; while_each_thread(p, t) { + task_clear_group_stop_pending(t); count++; /* Don't bother with already dead threads */ @@ -1536,16 +1588,30 @@ int do_notify_parent(struct task_struct *tsk, int sig) return ret; } -static void do_notify_parent_cldstop(struct task_struct *tsk, int why) +/** + * do_notify_parent_cldstop - notify parent of stopped/continued state change + * @tsk: task reporting the state change + * @for_ptracer: the notification is for ptracer + * @why: CLD_{CONTINUED|STOPPED|TRAPPED} to report + * + * Notify @tsk's parent that the stopped/continued state has changed. If + * @for_ptracer is %false, @tsk's group leader notifies to its real parent. + * If %true, @tsk reports to @tsk->parent which should be the ptracer. + * + * CONTEXT: + * Must be called with tasklist_lock at least read locked. + */ +static void do_notify_parent_cldstop(struct task_struct *tsk, + bool for_ptracer, int why) { struct siginfo info; unsigned long flags; struct task_struct *parent; struct sighand_struct *sighand; - if (task_ptrace(tsk)) + if (for_ptracer) { parent = tsk->parent; - else { + } else { tsk = tsk->group_leader; parent = tsk->real_parent; } @@ -1621,6 +1687,15 @@ static int sigkill_pending(struct task_struct *tsk) } /* + * Test whether the target task of the usual cldstop notification - the + * real_parent of @child - is in the same group as the ptracer. + */ +static bool real_parent_is_ptracer(struct task_struct *child) +{ + return same_thread_group(child->parent, child->real_parent); +} + +/* * This must be called with current->sighand->siglock held. * * This should be the path for all ptrace stops. @@ -1631,10 +1706,12 @@ static int sigkill_pending(struct task_struct *tsk) * If we actually decide not to stop at all because the tracer * is gone, we keep current->exit_code unless clear_code. */ -static void ptrace_stop(int exit_code, int clear_code, siginfo_t *info) +static void ptrace_stop(int exit_code, int why, int clear_code, siginfo_t *info) __releases(¤t->sighand->siglock) __acquires(¤t->sighand->siglock) { + bool gstop_done = false; + if (arch_ptrace_stop_needed(exit_code, info)) { /* * The arch code has something special to do before a @@ -1655,21 +1732,49 @@ static void ptrace_stop(int exit_code, int clear_code, siginfo_t *info) } /* - * If there is a group stop in progress, - * we must participate in the bookkeeping. + * If @why is CLD_STOPPED, we're trapping to participate in a group + * stop. Do the bookkeeping. Note that if SIGCONT was delievered + * while siglock was released for the arch hook, PENDING could be + * clear now. We act as if SIGCONT is received after TASK_TRACED + * is entered - ignore it. */ - if (current->signal->group_stop_count > 0) - --current->signal->group_stop_count; + if (why == CLD_STOPPED && (current->group_stop & GROUP_STOP_PENDING)) + gstop_done = task_participate_group_stop(current); current->last_siginfo = info; current->exit_code = exit_code; - /* Let the debugger run. */ - __set_current_state(TASK_TRACED); + /* + * TRACED should be visible before TRAPPING is cleared; otherwise, + * the tracer might fail do_wait(). + */ + set_current_state(TASK_TRACED); + + /* + * We're committing to trapping. Clearing GROUP_STOP_TRAPPING and + * transition to TASK_TRACED should be atomic with respect to + * siglock. This hsould be done after the arch hook as siglock is + * released and regrabbed across it. + */ + task_clear_group_stop_trapping(current); + spin_unlock_irq(¤t->sighand->siglock); read_lock(&tasklist_lock); if (may_ptrace_stop()) { - do_notify_parent_cldstop(current, CLD_TRAPPED); + /* + * Notify parents of the stop. + * + * While ptraced, there are two parents - the ptracer and + * the real_parent of the group_leader. The ptracer should + * know about every stop while the real parent is only + * interested in the completion of group stop. The states + * for the two don't interact with each other. Notify + * separately unless they're gonna be duplicates. + */ + do_notify_parent_cldstop(current, true, why); + if (gstop_done && !real_parent_is_ptracer(current)) + do_notify_parent_cldstop(current, false, why); + /* * Don't want to allow preemption here, because * sys_ptrace() needs this task to be inactive. @@ -1684,7 +1789,16 @@ static void ptrace_stop(int exit_code, int clear_code, siginfo_t *info) /* * By the time we got the lock, our tracer went away. * Don't drop the lock yet, another tracer may come. + * + * If @gstop_done, the ptracer went away between group stop + * completion and here. During detach, it would have set + * GROUP_STOP_PENDING on us and we'll re-enter TASK_STOPPED + * in do_signal_stop() on return, so notifying the real + * parent of the group stop completion is enough. */ + if (gstop_done) + do_notify_parent_cldstop(current, false, why); + __set_current_state(TASK_RUNNING); if (clear_code) current->exit_code = 0; @@ -1728,7 +1842,7 @@ void ptrace_notify(int exit_code) /* Let the debugger run. */ spin_lock_irq(¤t->sighand->siglock); - ptrace_stop(exit_code, 1, &info); + ptrace_stop(exit_code, CLD_TRAPPED, 1, &info); spin_unlock_irq(¤t->sighand->siglock); } @@ -1741,66 +1855,115 @@ void ptrace_notify(int exit_code) static int do_signal_stop(int signr) { struct signal_struct *sig = current->signal; - int notify; - if (!sig->group_stop_count) { + if (!(current->group_stop & GROUP_STOP_PENDING)) { + unsigned int gstop = GROUP_STOP_PENDING | GROUP_STOP_CONSUME; struct task_struct *t; - if (!likely(sig->flags & SIGNAL_STOP_DEQUEUED) || + /* signr will be recorded in task->group_stop for retries */ + WARN_ON_ONCE(signr & ~GROUP_STOP_SIGMASK); + + if (!likely(current->group_stop & GROUP_STOP_DEQUEUED) || unlikely(signal_group_exit(sig))) return 0; /* - * There is no group stop already in progress. - * We must initiate one now. + * There is no group stop already in progress. We must + * initiate one now. + * + * While ptraced, a task may be resumed while group stop is + * still in effect and then receive a stop signal and + * initiate another group stop. This deviates from the + * usual behavior as two consecutive stop signals can't + * cause two group stops when !ptraced. That is why we + * also check !task_is_stopped(t) below. + * + * The condition can be distinguished by testing whether + * SIGNAL_STOP_STOPPED is already set. Don't generate + * group_exit_code in such case. + * + * This is not necessary for SIGNAL_STOP_CONTINUED because + * an intervening stop signal is required to cause two + * continued events regardless of ptrace. */ - sig->group_exit_code = signr; + if (!(sig->flags & SIGNAL_STOP_STOPPED)) + sig->group_exit_code = signr; + else + WARN_ON_ONCE(!task_ptrace(current)); + current->group_stop &= ~GROUP_STOP_SIGMASK; + current->group_stop |= signr | gstop; sig->group_stop_count = 1; - for (t = next_thread(current); t != current; t = next_thread(t)) + for (t = next_thread(current); t != current; + t = next_thread(t)) { + t->group_stop &= ~GROUP_STOP_SIGMASK; /* * Setting state to TASK_STOPPED for a group * stop is always done with the siglock held, * so this check has no races. */ - if (!(t->flags & PF_EXITING) && - !task_is_stopped_or_traced(t)) { + if (!(t->flags & PF_EXITING) && !task_is_stopped(t)) { + t->group_stop |= signr | gstop; sig->group_stop_count++; signal_wake_up(t, 0); } + } } - /* - * If there are no other threads in the group, or if there is - * a group stop in progress and we are the last to stop, report - * to the parent. When ptraced, every thread reports itself. - */ - notify = sig->group_stop_count == 1 ? CLD_STOPPED : 0; - notify = tracehook_notify_jctl(notify, CLD_STOPPED); - /* - * tracehook_notify_jctl() can drop and reacquire siglock, so - * we keep ->group_stop_count != 0 before the call. If SIGCONT - * or SIGKILL comes in between ->group_stop_count == 0. - */ - if (sig->group_stop_count) { - if (!--sig->group_stop_count) - sig->flags = SIGNAL_STOP_STOPPED; - current->exit_code = sig->group_exit_code; +retry: + if (likely(!task_ptrace(current))) { + int notify = 0; + + /* + * If there are no other threads in the group, or if there + * is a group stop in progress and we are the last to stop, + * report to the parent. + */ + if (task_participate_group_stop(current)) + notify = CLD_STOPPED; + __set_current_state(TASK_STOPPED); + spin_unlock_irq(¤t->sighand->siglock); + + /* + * Notify the parent of the group stop completion. Because + * we're not holding either the siglock or tasklist_lock + * here, ptracer may attach inbetween; however, this is for + * group stop and should always be delivered to the real + * parent of the group leader. The new ptracer will get + * its notification when this task transitions into + * TASK_TRACED. + */ + if (notify) { + read_lock(&tasklist_lock); + do_notify_parent_cldstop(current, false, notify); + read_unlock(&tasklist_lock); + } + + /* Now we don't run again until woken by SIGCONT or SIGKILL */ + schedule(); + + spin_lock_irq(¤t->sighand->siglock); + } else { + ptrace_stop(current->group_stop & GROUP_STOP_SIGMASK, + CLD_STOPPED, 0, NULL); + current->exit_code = 0; } - spin_unlock_irq(¤t->sighand->siglock); - if (notify) { - read_lock(&tasklist_lock); - do_notify_parent_cldstop(current, notify); - read_unlock(&tasklist_lock); + /* + * GROUP_STOP_PENDING could be set if another group stop has + * started since being woken up or ptrace wants us to transit + * between TASK_STOPPED and TRACED. Retry group stop. + */ + if (current->group_stop & GROUP_STOP_PENDING) { + WARN_ON_ONCE(!(current->group_stop & GROUP_STOP_SIGMASK)); + goto retry; } - /* Now we don't run again until woken by SIGCONT or SIGKILL */ - do { - schedule(); - } while (try_to_freeze()); + /* PTRACE_ATTACH might have raced with task killing, clear trapping */ + task_clear_group_stop_trapping(current); + + spin_unlock_irq(¤t->sighand->siglock); tracehook_finish_jctl(); - current->exit_code = 0; return 1; } @@ -1814,7 +1977,7 @@ static int ptrace_signal(int signr, siginfo_t *info, ptrace_signal_deliver(regs, cookie); /* Let the debugger run. */ - ptrace_stop(signr, 0, info); + ptrace_stop(signr, CLD_TRAPPED, 0, info); /* We're back. Did the debugger cancel the sig? */ signr = current->exit_code; @@ -1869,18 +2032,36 @@ relock: * the CLD_ si_code into SIGNAL_CLD_MASK bits. */ if (unlikely(signal->flags & SIGNAL_CLD_MASK)) { - int why = (signal->flags & SIGNAL_STOP_CONTINUED) - ? CLD_CONTINUED : CLD_STOPPED; + struct task_struct *leader; + int why; + + if (signal->flags & SIGNAL_CLD_CONTINUED) + why = CLD_CONTINUED; + else + why = CLD_STOPPED; + signal->flags &= ~SIGNAL_CLD_MASK; - why = tracehook_notify_jctl(why, CLD_CONTINUED); spin_unlock_irq(&sighand->siglock); - if (why) { - read_lock(&tasklist_lock); - do_notify_parent_cldstop(current->group_leader, why); - read_unlock(&tasklist_lock); - } + /* + * Notify the parent that we're continuing. This event is + * always per-process and doesn't make whole lot of sense + * for ptracers, who shouldn't consume the state via + * wait(2) either, but, for backward compatibility, notify + * the ptracer of the group leader too unless it's gonna be + * a duplicate. + */ + read_lock(&tasklist_lock); + + do_notify_parent_cldstop(current, false, why); + + leader = current->group_leader; + if (task_ptrace(leader) && !real_parent_is_ptracer(leader)) + do_notify_parent_cldstop(leader, true, why); + + read_unlock(&tasklist_lock); + goto relock; } @@ -1897,8 +2078,8 @@ relock: if (unlikely(signr != 0)) ka = return_ka; else { - if (unlikely(signal->group_stop_count > 0) && - do_signal_stop(0)) + if (unlikely(current->group_stop & + GROUP_STOP_PENDING) && do_signal_stop(0)) goto relock; signr = dequeue_signal(current, ¤t->blocked, @@ -2017,10 +2198,42 @@ relock: return signr; } +/* + * It could be that complete_signal() picked us to notify about the + * group-wide signal. Other threads should be notified now to take + * the shared signals in @which since we will not. + */ +static void retarget_shared_pending(struct task_struct *tsk, sigset_t *which) +{ + sigset_t retarget; + struct task_struct *t; + + sigandsets(&retarget, &tsk->signal->shared_pending.signal, which); + if (sigisemptyset(&retarget)) + return; + + t = tsk; + while_each_thread(tsk, t) { + if (t->flags & PF_EXITING) + continue; + + if (!has_pending_signals(&retarget, &t->blocked)) + continue; + /* Remove the signals this thread can handle. */ + sigandsets(&retarget, &retarget, &t->blocked); + + if (!signal_pending(t)) + signal_wake_up(t, 0); + + if (sigisemptyset(&retarget)) + break; + } +} + void exit_signals(struct task_struct *tsk) { int group_stop = 0; - struct task_struct *t; + sigset_t unblocked; if (thread_group_empty(tsk) || signal_group_exit(tsk->signal)) { tsk->flags |= PF_EXITING; @@ -2036,26 +2249,23 @@ void exit_signals(struct task_struct *tsk) if (!signal_pending(tsk)) goto out; - /* - * It could be that __group_complete_signal() choose us to - * notify about group-wide signal. Another thread should be - * woken now to take the signal since we will not. - */ - for (t = tsk; (t = next_thread(t)) != tsk; ) - if (!signal_pending(t) && !(t->flags & PF_EXITING)) - recalc_sigpending_and_wake(t); + unblocked = tsk->blocked; + signotset(&unblocked); + retarget_shared_pending(tsk, &unblocked); - if (unlikely(tsk->signal->group_stop_count) && - !--tsk->signal->group_stop_count) { - tsk->signal->flags = SIGNAL_STOP_STOPPED; - group_stop = tracehook_notify_jctl(CLD_STOPPED, CLD_STOPPED); - } + if (unlikely(tsk->group_stop & GROUP_STOP_PENDING) && + task_participate_group_stop(tsk)) + group_stop = CLD_STOPPED; out: spin_unlock_irq(&tsk->sighand->siglock); + /* + * If group stop has completed, deliver the notification. This + * should always go to the real parent of the group leader. + */ if (unlikely(group_stop)) { read_lock(&tasklist_lock); - do_notify_parent_cldstop(tsk, group_stop); + do_notify_parent_cldstop(tsk, false, group_stop); read_unlock(&tasklist_lock); } } @@ -2089,11 +2299,33 @@ long do_no_restart_syscall(struct restart_block *param) return -EINTR; } -/* - * We don't need to get the kernel lock - this is all local to this - * particular thread.. (and that's good, because this is _heavily_ - * used by various programs) +static void __set_task_blocked(struct task_struct *tsk, const sigset_t *newset) +{ + if (signal_pending(tsk) && !thread_group_empty(tsk)) { + sigset_t newblocked; + /* A set of now blocked but previously unblocked signals. */ + sigandnsets(&newblocked, newset, ¤t->blocked); + retarget_shared_pending(tsk, &newblocked); + } + tsk->blocked = *newset; + recalc_sigpending(); +} + +/** + * set_current_blocked - change current->blocked mask + * @newset: new mask + * + * It is wrong to change ->blocked directly, this helper should be used + * to ensure the process can't miss a shared signal we are going to block. */ +void set_current_blocked(const sigset_t *newset) +{ + struct task_struct *tsk = current; + + spin_lock_irq(&tsk->sighand->siglock); + __set_task_blocked(tsk, newset); + spin_unlock_irq(&tsk->sighand->siglock); +} /* * This is also useful for kernel threads that want to temporarily @@ -2105,30 +2337,29 @@ long do_no_restart_syscall(struct restart_block *param) */ int sigprocmask(int how, sigset_t *set, sigset_t *oldset) { - int error; + struct task_struct *tsk = current; + sigset_t newset; - spin_lock_irq(¤t->sighand->siglock); + /* Lockless, only current can change ->blocked, never from irq */ if (oldset) - *oldset = current->blocked; + *oldset = tsk->blocked; - error = 0; switch (how) { case SIG_BLOCK: - sigorsets(¤t->blocked, ¤t->blocked, set); + sigorsets(&newset, &tsk->blocked, set); break; case SIG_UNBLOCK: - signandsets(¤t->blocked, ¤t->blocked, set); + sigandnsets(&newset, &tsk->blocked, set); break; case SIG_SETMASK: - current->blocked = *set; + newset = *set; break; default: - error = -EINVAL; + return -EINVAL; } - recalc_sigpending(); - spin_unlock_irq(¤t->sighand->siglock); - return error; + set_current_blocked(&newset); + return 0; } /** @@ -2138,40 +2369,34 @@ int sigprocmask(int how, sigset_t *set, sigset_t *oldset) * @oset: previous value of signal mask if non-null * @sigsetsize: size of sigset_t type */ -SYSCALL_DEFINE4(rt_sigprocmask, int, how, sigset_t __user *, set, +SYSCALL_DEFINE4(rt_sigprocmask, int, how, sigset_t __user *, nset, sigset_t __user *, oset, size_t, sigsetsize) { - int error = -EINVAL; sigset_t old_set, new_set; + int error; /* XXX: Don't preclude handling different sized sigset_t's. */ if (sigsetsize != sizeof(sigset_t)) - goto out; + return -EINVAL; - if (set) { - error = -EFAULT; - if (copy_from_user(&new_set, set, sizeof(*set))) - goto out; + old_set = current->blocked; + + if (nset) { + if (copy_from_user(&new_set, nset, sizeof(sigset_t))) + return -EFAULT; sigdelsetmask(&new_set, sigmask(SIGKILL)|sigmask(SIGSTOP)); - error = sigprocmask(how, &new_set, &old_set); + error = sigprocmask(how, &new_set, NULL); if (error) - goto out; - if (oset) - goto set_old; - } else if (oset) { - spin_lock_irq(¤t->sighand->siglock); - old_set = current->blocked; - spin_unlock_irq(¤t->sighand->siglock); + return error; + } - set_old: - error = -EFAULT; - if (copy_to_user(oset, &old_set, sizeof(*oset))) - goto out; + if (oset) { + if (copy_to_user(oset, &old_set, sizeof(sigset_t))) + return -EFAULT; } - error = 0; -out: - return error; + + return 0; } long do_sigpending(void __user *set, unsigned long sigsetsize) @@ -2284,6 +2509,66 @@ int copy_siginfo_to_user(siginfo_t __user *to, siginfo_t *from) #endif /** + * do_sigtimedwait - wait for queued signals specified in @which + * @which: queued signals to wait for + * @info: if non-null, the signal's siginfo is returned here + * @ts: upper bound on process time suspension + */ +int do_sigtimedwait(const sigset_t *which, siginfo_t *info, + const struct timespec *ts) +{ + struct task_struct *tsk = current; + long timeout = MAX_SCHEDULE_TIMEOUT; + sigset_t mask = *which; + int sig; + + if (ts) { + if (!timespec_valid(ts)) + return -EINVAL; + timeout = timespec_to_jiffies(ts); + /* + * We can be close to the next tick, add another one + * to ensure we will wait at least the time asked for. + */ + if (ts->tv_sec || ts->tv_nsec) + timeout++; + } + + /* + * Invert the set of allowed signals to get those we want to block. + */ + sigdelsetmask(&mask, sigmask(SIGKILL) | sigmask(SIGSTOP)); + signotset(&mask); + + spin_lock_irq(&tsk->sighand->siglock); + sig = dequeue_signal(tsk, &mask, info); + if (!sig && timeout) { + /* + * None ready, temporarily unblock those we're interested + * while we are sleeping in so that we'll be awakened when + * they arrive. Unblocking is always fine, we can avoid + * set_current_blocked(). + */ + tsk->real_blocked = tsk->blocked; + sigandsets(&tsk->blocked, &tsk->blocked, &mask); + recalc_sigpending(); + spin_unlock_irq(&tsk->sighand->siglock); + + timeout = schedule_timeout_interruptible(timeout); + + spin_lock_irq(&tsk->sighand->siglock); + __set_task_blocked(tsk, &tsk->real_blocked); + siginitset(&tsk->real_blocked, 0); + sig = dequeue_signal(tsk, &mask, info); + } + spin_unlock_irq(&tsk->sighand->siglock); + + if (sig) + return sig; + return timeout ? -EINTR : -EAGAIN; +} + +/** * sys_rt_sigtimedwait - synchronously wait for queued signals specified * in @uthese * @uthese: queued signals to wait for @@ -2295,11 +2580,10 @@ SYSCALL_DEFINE4(rt_sigtimedwait, const sigset_t __user *, uthese, siginfo_t __user *, uinfo, const struct timespec __user *, uts, size_t, sigsetsize) { - int ret, sig; sigset_t these; struct timespec ts; siginfo_t info; - long timeout = 0; + int ret; /* XXX: Don't preclude handling different sized sigset_t's. */ if (sigsetsize != sizeof(sigset_t)) @@ -2308,61 +2592,16 @@ SYSCALL_DEFINE4(rt_sigtimedwait, const sigset_t __user *, uthese, if (copy_from_user(&these, uthese, sizeof(these))) return -EFAULT; - /* - * Invert the set of allowed signals to get those we - * want to block. - */ - sigdelsetmask(&these, sigmask(SIGKILL)|sigmask(SIGSTOP)); - signotset(&these); - if (uts) { if (copy_from_user(&ts, uts, sizeof(ts))) return -EFAULT; - if (ts.tv_nsec >= 1000000000L || ts.tv_nsec < 0 - || ts.tv_sec < 0) - return -EINVAL; } - spin_lock_irq(¤t->sighand->siglock); - sig = dequeue_signal(current, &these, &info); - if (!sig) { - timeout = MAX_SCHEDULE_TIMEOUT; - if (uts) - timeout = (timespec_to_jiffies(&ts) - + (ts.tv_sec || ts.tv_nsec)); - - if (timeout) { - /* - * None ready -- temporarily unblock those we're - * interested while we are sleeping in so that we'll - * be awakened when they arrive. - */ - current->real_blocked = current->blocked; - sigandsets(¤t->blocked, ¤t->blocked, &these); - recalc_sigpending(); - spin_unlock_irq(¤t->sighand->siglock); - - timeout = schedule_timeout_interruptible(timeout); - - spin_lock_irq(¤t->sighand->siglock); - sig = dequeue_signal(current, &these, &info); - current->blocked = current->real_blocked; - siginitset(¤t->real_blocked, 0); - recalc_sigpending(); - } - } - spin_unlock_irq(¤t->sighand->siglock); + ret = do_sigtimedwait(&these, &info, uts ? &ts : NULL); - if (sig) { - ret = sig; - if (uinfo) { - if (copy_siginfo_to_user(uinfo, &info)) - ret = -EFAULT; - } - } else { - ret = -EAGAIN; - if (timeout) - ret = -EINTR; + if (ret > 0 && uinfo) { + if (copy_siginfo_to_user(uinfo, &info)) + ret = -EFAULT; } return ret; @@ -2650,60 +2889,51 @@ SYSCALL_DEFINE1(sigpending, old_sigset_t __user *, set) /** * sys_sigprocmask - examine and change blocked signals * @how: whether to add, remove, or set signals - * @set: signals to add or remove (if non-null) + * @nset: signals to add or remove (if non-null) * @oset: previous value of signal mask if non-null * * Some platforms have their own version with special arguments; * others support only sys_rt_sigprocmask. */ -SYSCALL_DEFINE3(sigprocmask, int, how, old_sigset_t __user *, set, +SYSCALL_DEFINE3(sigprocmask, int, how, old_sigset_t __user *, nset, old_sigset_t __user *, oset) { - int error; old_sigset_t old_set, new_set; + sigset_t new_blocked; - if (set) { - error = -EFAULT; - if (copy_from_user(&new_set, set, sizeof(*set))) - goto out; + old_set = current->blocked.sig[0]; + + if (nset) { + if (copy_from_user(&new_set, nset, sizeof(*nset))) + return -EFAULT; new_set &= ~(sigmask(SIGKILL) | sigmask(SIGSTOP)); - spin_lock_irq(¤t->sighand->siglock); - old_set = current->blocked.sig[0]; + new_blocked = current->blocked; - error = 0; switch (how) { - default: - error = -EINVAL; - break; case SIG_BLOCK: - sigaddsetmask(¤t->blocked, new_set); + sigaddsetmask(&new_blocked, new_set); break; case SIG_UNBLOCK: - sigdelsetmask(¤t->blocked, new_set); + sigdelsetmask(&new_blocked, new_set); break; case SIG_SETMASK: - current->blocked.sig[0] = new_set; + new_blocked.sig[0] = new_set; break; + default: + return -EINVAL; } - recalc_sigpending(); - spin_unlock_irq(¤t->sighand->siglock); - if (error) - goto out; - if (oset) - goto set_old; - } else if (oset) { - old_set = current->blocked.sig[0]; - set_old: - error = -EFAULT; + set_current_blocked(&new_blocked); + } + + if (oset) { if (copy_to_user(oset, &old_set, sizeof(*oset))) - goto out; + return -EFAULT; } - error = 0; -out: - return error; + + return 0; } #endif /* __ARCH_WANT_SYS_SIGPROCMASK */ diff --git a/kernel/sys_ni.c b/kernel/sys_ni.c index 25cc41cd8f3..62cbc8877fe 100644 --- a/kernel/sys_ni.c +++ b/kernel/sys_ni.c @@ -46,7 +46,9 @@ cond_syscall(sys_getsockopt); cond_syscall(compat_sys_getsockopt); cond_syscall(sys_shutdown); cond_syscall(sys_sendmsg); +cond_syscall(sys_sendmmsg); cond_syscall(compat_sys_sendmsg); +cond_syscall(compat_sys_sendmmsg); cond_syscall(sys_recvmsg); cond_syscall(sys_recvmmsg); cond_syscall(compat_sys_recvmsg); @@ -69,15 +71,22 @@ cond_syscall(compat_sys_epoll_pwait); cond_syscall(sys_semget); cond_syscall(sys_semop); cond_syscall(sys_semtimedop); +cond_syscall(compat_sys_semtimedop); cond_syscall(sys_semctl); +cond_syscall(compat_sys_semctl); cond_syscall(sys_msgget); cond_syscall(sys_msgsnd); +cond_syscall(compat_sys_msgsnd); cond_syscall(sys_msgrcv); +cond_syscall(compat_sys_msgrcv); cond_syscall(sys_msgctl); +cond_syscall(compat_sys_msgctl); cond_syscall(sys_shmget); cond_syscall(sys_shmat); +cond_syscall(compat_sys_shmat); cond_syscall(sys_shmdt); cond_syscall(sys_shmctl); +cond_syscall(compat_sys_shmctl); cond_syscall(sys_mq_open); cond_syscall(sys_mq_unlink); cond_syscall(sys_mq_timedsend); diff --git a/kernel/sysctl.c b/kernel/sysctl.c index c0bb32414b1..3dd0c46fa3b 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -730,14 +730,16 @@ static struct ctl_table kern_table[] = { .data = &watchdog_enabled, .maxlen = sizeof (int), .mode = 0644, - .proc_handler = proc_dowatchdog_enabled, + .proc_handler = proc_dowatchdog, + .extra1 = &zero, + .extra2 = &one, }, { .procname = "watchdog_thresh", - .data = &softlockup_thresh, + .data = &watchdog_thresh, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = proc_dowatchdog_thresh, + .proc_handler = proc_dowatchdog, .extra1 = &neg_one, .extra2 = &sixty, }, @@ -755,7 +757,9 @@ static struct ctl_table kern_table[] = { .data = &watchdog_enabled, .maxlen = sizeof (int), .mode = 0644, - .proc_handler = proc_dowatchdog_enabled, + .proc_handler = proc_dowatchdog, + .extra1 = &zero, + .extra2 = &one, }, #endif #if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_X86) diff --git a/kernel/watchdog.c b/kernel/watchdog.c index 14733d4d156..6e63097fa73 100644 --- a/kernel/watchdog.c +++ b/kernel/watchdog.c @@ -28,7 +28,7 @@ #include <linux/perf_event.h> int watchdog_enabled = 1; -int __read_mostly softlockup_thresh = 60; +int __read_mostly watchdog_thresh = 10; static DEFINE_PER_CPU(unsigned long, watchdog_touch_ts); static DEFINE_PER_CPU(struct task_struct *, softlockup_watchdog); @@ -91,6 +91,17 @@ static int __init nosoftlockup_setup(char *str) __setup("nosoftlockup", nosoftlockup_setup); /* */ +/* + * Hard-lockup warnings should be triggered after just a few seconds. Soft- + * lockups can have false positives under extreme conditions. So we generally + * want a higher threshold for soft lockups than for hard lockups. So we couple + * the thresholds with a factor: we make the soft threshold twice the amount of + * time the hard threshold is. + */ +static int get_softlockup_thresh() +{ + return watchdog_thresh * 2; +} /* * Returns seconds, approximately. We don't need nanosecond @@ -105,12 +116,12 @@ static unsigned long get_timestamp(int this_cpu) static unsigned long get_sample_period(void) { /* - * convert softlockup_thresh from seconds to ns + * convert watchdog_thresh from seconds to ns * the divide by 5 is to give hrtimer 5 chances to * increment before the hardlockup detector generates * a warning */ - return softlockup_thresh / 5 * NSEC_PER_SEC; + return get_softlockup_thresh() * (NSEC_PER_SEC / 5); } /* Commands for resetting the watchdog */ @@ -182,7 +193,7 @@ static int is_softlockup(unsigned long touch_ts) unsigned long now = get_timestamp(smp_processor_id()); /* Warn about unreasonable delays: */ - if (time_after(now, touch_ts + softlockup_thresh)) + if (time_after(now, touch_ts + get_softlockup_thresh())) return now - touch_ts; return 0; @@ -359,7 +370,7 @@ static int watchdog_nmi_enable(int cpu) /* Try to register using hardware perf events */ wd_attr = &wd_hw_attr; - wd_attr->sample_period = hw_nmi_get_sample_period(); + wd_attr->sample_period = hw_nmi_get_sample_period(watchdog_thresh); event = perf_event_create_kernel_counter(wd_attr, cpu, NULL, watchdog_overflow_callback); if (!IS_ERR(event)) { printk(KERN_INFO "NMI watchdog enabled, takes one hw-pmu counter.\n"); @@ -501,28 +512,25 @@ static void watchdog_disable_all_cpus(void) /* sysctl functions */ #ifdef CONFIG_SYSCTL /* - * proc handler for /proc/sys/kernel/nmi_watchdog + * proc handler for /proc/sys/kernel/nmi_watchdog,watchdog_thresh */ -int proc_dowatchdog_enabled(struct ctl_table *table, int write, - void __user *buffer, size_t *length, loff_t *ppos) +int proc_dowatchdog(struct ctl_table *table, int write, + void __user *buffer, size_t *lenp, loff_t *ppos) { - proc_dointvec(table, write, buffer, length, ppos); + int ret; - if (write) { - if (watchdog_enabled) - watchdog_enable_all_cpus(); - else - watchdog_disable_all_cpus(); - } - return 0; -} + ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos); + if (ret || !write) + goto out; -int proc_dowatchdog_thresh(struct ctl_table *table, int write, - void __user *buffer, - size_t *lenp, loff_t *ppos) -{ - return proc_dointvec_minmax(table, write, buffer, lenp, ppos); + if (watchdog_enabled && watchdog_thresh) + watchdog_enable_all_cpus(); + else + watchdog_disable_all_cpus(); + +out: + return ret; } #endif /* CONFIG_SYSCTL */ |