diff options
Diffstat (limited to 'kernel')
40 files changed, 1160 insertions, 749 deletions
diff --git a/kernel/Makefile b/kernel/Makefile index 685697c0a18..6c584c55a6e 100644 --- a/kernel/Makefile +++ b/kernel/Makefile @@ -4,12 +4,12 @@ obj-y = sched.o fork.o exec_domain.o panic.o printk.o profile.o \ exit.o itimer.o time.o softirq.o resource.o \ - sysctl.o capability.o ptrace.o timer.o user.o user_namespace.o \ + sysctl.o capability.o ptrace.o timer.o user.o \ signal.o sys.o kmod.o workqueue.o pid.o \ rcupdate.o extable.o params.o posix-timers.o \ kthread.o wait.o kfifo.o sys_ni.o posix-cpu-timers.o mutex.o \ hrtimer.o rwsem.o nsproxy.o srcu.o \ - utsname.o notifier.o ksysfs.o pm_qos_params.o + notifier.o ksysfs.o pm_qos_params.o obj-$(CONFIG_SYSCTL) += sysctl_check.o obj-$(CONFIG_STACKTRACE) += stacktrace.o @@ -42,6 +42,9 @@ obj-$(CONFIG_CGROUPS) += cgroup.o obj-$(CONFIG_CGROUP_DEBUG) += cgroup_debug.o obj-$(CONFIG_CPUSETS) += cpuset.o obj-$(CONFIG_CGROUP_NS) += ns_cgroup.o +obj-$(CONFIG_UTS_NS) += utsname.o +obj-$(CONFIG_USER_NS) += user_namespace.o +obj-$(CONFIG_PID_NS) += pid_namespace.o obj-$(CONFIG_IKCONFIG) += configs.o obj-$(CONFIG_RESOURCE_COUNTERS) += res_counter.o obj-$(CONFIG_STOP_MACHINE) += stop_machine.o @@ -88,3 +91,11 @@ quiet_cmd_ikconfiggz = IKCFG $@ targets += config_data.h $(obj)/config_data.h: $(obj)/config_data.gz FORCE $(call if_changed,ikconfiggz) + +$(obj)/time.o: $(obj)/timeconst.h + +quiet_cmd_timeconst = TIMEC $@ + cmd_timeconst = $(PERL) $< $(CONFIG_HZ) > $@ +targets += timeconst.h +$(obj)/timeconst.h: $(src)/timeconst.pl FORCE + $(call if_changed,timeconst) diff --git a/kernel/cpu.c b/kernel/cpu.c index e0d3a4f56ec..2eff3f63abe 100644 --- a/kernel/cpu.c +++ b/kernel/cpu.c @@ -389,7 +389,7 @@ int disable_nonboot_cpus(void) return error; } -void enable_nonboot_cpus(void) +void __ref enable_nonboot_cpus(void) { int cpu, error; diff --git a/kernel/cpuset.c b/kernel/cpuset.c index 67b2bfe2781..3e296ed81d4 100644 --- a/kernel/cpuset.c +++ b/kernel/cpuset.c @@ -2255,13 +2255,14 @@ const struct file_operations proc_cpuset_operations = { #endif /* CONFIG_PROC_PID_CPUSET */ /* Display task cpus_allowed, mems_allowed in /proc/<pid>/status file. */ -char *cpuset_task_status_allowed(struct task_struct *task, char *buffer) -{ - buffer += sprintf(buffer, "Cpus_allowed:\t"); - buffer += cpumask_scnprintf(buffer, PAGE_SIZE, task->cpus_allowed); - buffer += sprintf(buffer, "\n"); - buffer += sprintf(buffer, "Mems_allowed:\t"); - buffer += nodemask_scnprintf(buffer, PAGE_SIZE, task->mems_allowed); - buffer += sprintf(buffer, "\n"); - return buffer; +void cpuset_task_status_allowed(struct seq_file *m, struct task_struct *task) +{ + seq_printf(m, "Cpus_allowed:\t"); + m->count += cpumask_scnprintf(m->buf + m->count, m->size - m->count, + task->cpus_allowed); + seq_printf(m, "\n"); + seq_printf(m, "Mems_allowed:\t"); + m->count += nodemask_scnprintf(m->buf + m->count, m->size - m->count, + task->mems_allowed); + seq_printf(m, "\n"); } diff --git a/kernel/exit.c b/kernel/exit.c index eb9934a82fc..3b893e78ce6 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -293,26 +293,27 @@ static void reparent_to_kthreadd(void) switch_uid(INIT_USER); } -void __set_special_pids(pid_t session, pid_t pgrp) +void __set_special_pids(struct pid *pid) { struct task_struct *curr = current->group_leader; + pid_t nr = pid_nr(pid); - if (task_session_nr(curr) != session) { + if (task_session(curr) != pid) { detach_pid(curr, PIDTYPE_SID); - set_task_session(curr, session); - attach_pid(curr, PIDTYPE_SID, find_pid(session)); + attach_pid(curr, PIDTYPE_SID, pid); + set_task_session(curr, nr); } - if (task_pgrp_nr(curr) != pgrp) { + if (task_pgrp(curr) != pid) { detach_pid(curr, PIDTYPE_PGID); - set_task_pgrp(curr, pgrp); - attach_pid(curr, PIDTYPE_PGID, find_pid(pgrp)); + attach_pid(curr, PIDTYPE_PGID, pid); + set_task_pgrp(curr, nr); } } -static void set_special_pids(pid_t session, pid_t pgrp) +static void set_special_pids(struct pid *pid) { write_lock_irq(&tasklist_lock); - __set_special_pids(session, pgrp); + __set_special_pids(pid); write_unlock_irq(&tasklist_lock); } @@ -383,7 +384,11 @@ void daemonize(const char *name, ...) */ current->flags |= PF_NOFREEZE; - set_special_pids(1, 1); + if (current->nsproxy != &init_nsproxy) { + get_nsproxy(&init_nsproxy); + switch_task_namespaces(current, &init_nsproxy); + } + set_special_pids(&init_struct_pid); proc_clear_tty(current); /* Block and flush all signals */ @@ -398,11 +403,6 @@ void daemonize(const char *name, ...) current->fs = fs; atomic_inc(&fs->count); - if (current->nsproxy != init_task.nsproxy) { - get_nsproxy(init_task.nsproxy); - switch_task_namespaces(current, init_task.nsproxy); - } - exit_files(current); current->files = init_task.files; atomic_inc(¤t->files->count); @@ -458,7 +458,7 @@ struct files_struct *get_files_struct(struct task_struct *task) return files; } -void fastcall put_files_struct(struct files_struct *files) +void put_files_struct(struct files_struct *files) { struct fdtable *fdt; @@ -745,24 +745,6 @@ static void exit_notify(struct task_struct *tsk) struct task_struct *t; struct pid *pgrp; - if (signal_pending(tsk) && !(tsk->signal->flags & SIGNAL_GROUP_EXIT) - && !thread_group_empty(tsk)) { - /* - * This occurs when there was a race between our exit - * syscall and a group signal choosing us as the one to - * wake up. It could be that we are the only thread - * alerted to check for pending signals, but another thread - * should be woken now to take the signal since we will not. - * Now we'll wake all the threads in the group just to make - * sure someone gets all the pending signals. - */ - spin_lock_irq(&tsk->sighand->siglock); - for (t = next_thread(tsk); t != tsk; t = next_thread(t)) - if (!signal_pending(t) && !(t->flags & PF_EXITING)) - recalc_sigpending_and_wake(t); - spin_unlock_irq(&tsk->sighand->siglock); - } - /* * This does two things: * @@ -905,7 +887,7 @@ static inline void exit_child_reaper(struct task_struct *tsk) zap_pid_ns_processes(tsk->nsproxy->pid_ns); } -fastcall NORET_TYPE void do_exit(long code) +NORET_TYPE void do_exit(long code) { struct task_struct *tsk = current; int group_dead; @@ -947,7 +929,7 @@ fastcall NORET_TYPE void do_exit(long code) schedule(); } - tsk->flags |= PF_EXITING; + exit_signals(tsk); /* sets PF_EXITING */ /* * tsk->flags are checked in the futex code to protect against * an exiting task cleaning up the robust pi futexes. @@ -1108,20 +1090,23 @@ asmlinkage void sys_exit_group(int error_code) do_group_exit((error_code & 0xff) << 8); } -static int eligible_child(pid_t pid, int options, struct task_struct *p) +static struct pid *task_pid_type(struct task_struct *task, enum pid_type type) +{ + struct pid *pid = NULL; + if (type == PIDTYPE_PID) + pid = task->pids[type].pid; + else if (type < PIDTYPE_MAX) + pid = task->group_leader->pids[type].pid; + return pid; +} + +static int eligible_child(enum pid_type type, struct pid *pid, int options, + struct task_struct *p) { int err; - struct pid_namespace *ns; - ns = current->nsproxy->pid_ns; - if (pid > 0) { - if (task_pid_nr_ns(p, ns) != pid) - return 0; - } else if (!pid) { - if (task_pgrp_nr_ns(p, ns) != task_pgrp_vnr(current)) - return 0; - } else if (pid != -1) { - if (task_pgrp_nr_ns(p, ns) != -pid) + if (type < PIDTYPE_MAX) { + if (task_pid_type(p, type) != pid) return 0; } @@ -1140,18 +1125,16 @@ static int eligible_child(pid_t pid, int options, struct task_struct *p) if (((p->exit_signal != SIGCHLD) ^ ((options & __WCLONE) != 0)) && !(options & __WALL)) return 0; - /* - * Do not consider thread group leaders that are - * in a non-empty thread group: - */ - if (delay_group_leader(p)) - return 2; err = security_task_wait(p); - if (err) - return err; + if (likely(!err)) + return 1; - return 1; + if (type != PIDTYPE_PID) + return 0; + /* This child was explicitly requested, abort */ + read_unlock(&tasklist_lock); + return err; } static int wait_noreap_copyout(struct task_struct *p, pid_t pid, uid_t uid, @@ -1191,20 +1174,13 @@ static int wait_task_zombie(struct task_struct *p, int noreap, { unsigned long state; int retval, status, traced; - struct pid_namespace *ns; - - ns = current->nsproxy->pid_ns; + pid_t pid = task_pid_vnr(p); if (unlikely(noreap)) { - pid_t pid = task_pid_nr_ns(p, ns); uid_t uid = p->uid; int exit_code = p->exit_code; int why, status; - if (unlikely(p->exit_state != EXIT_ZOMBIE)) - return 0; - if (unlikely(p->exit_signal == -1 && p->ptrace == 0)) - return 0; get_task_struct(p); read_unlock(&tasklist_lock); if ((exit_code & 0x7f) == 0) { @@ -1315,11 +1291,11 @@ static int wait_task_zombie(struct task_struct *p, int noreap, retval = put_user(status, &infop->si_status); } if (!retval && infop) - retval = put_user(task_pid_nr_ns(p, ns), &infop->si_pid); + retval = put_user(pid, &infop->si_pid); if (!retval && infop) retval = put_user(p->uid, &infop->si_uid); if (!retval) - retval = task_pid_nr_ns(p, ns); + retval = pid; if (traced) { write_lock_irq(&tasklist_lock); @@ -1351,21 +1327,38 @@ static int wait_task_zombie(struct task_struct *p, int noreap, * the lock and this task is uninteresting. If we return nonzero, we have * released the lock and the system call should return. */ -static int wait_task_stopped(struct task_struct *p, int delayed_group_leader, +static int wait_task_stopped(struct task_struct *p, int noreap, struct siginfo __user *infop, int __user *stat_addr, struct rusage __user *ru) { - int retval, exit_code; + int retval, exit_code, why; + uid_t uid = 0; /* unneeded, required by compiler */ pid_t pid; - if (!p->exit_code) - return 0; - if (delayed_group_leader && !(p->ptrace & PT_PTRACED) && - p->signal->group_stop_count > 0) + exit_code = 0; + spin_lock_irq(&p->sighand->siglock); + + if (unlikely(!task_is_stopped_or_traced(p))) + goto unlock_sig; + + if (!(p->ptrace & PT_PTRACED) && p->signal->group_stop_count > 0) /* * A group stop is in progress and this is the group leader. * We won't report until all threads have stopped. */ + goto unlock_sig; + + exit_code = p->exit_code; + if (!exit_code) + goto unlock_sig; + + if (!noreap) + p->exit_code = 0; + + uid = p->uid; +unlock_sig: + spin_unlock_irq(&p->sighand->siglock); + if (!exit_code) return 0; /* @@ -1375,65 +1368,15 @@ static int wait_task_stopped(struct task_struct *p, int delayed_group_leader, * keep holding onto the tasklist_lock while we call getrusage and * possibly take page faults for user memory. */ - pid = task_pid_nr_ns(p, current->nsproxy->pid_ns); get_task_struct(p); + pid = task_pid_vnr(p); + why = (p->ptrace & PT_PTRACED) ? CLD_TRAPPED : CLD_STOPPED; read_unlock(&tasklist_lock); - if (unlikely(noreap)) { - uid_t uid = p->uid; - int why = (p->ptrace & PT_PTRACED) ? CLD_TRAPPED : CLD_STOPPED; - - exit_code = p->exit_code; - if (unlikely(!exit_code) || unlikely(p->exit_state)) - goto bail_ref; + if (unlikely(noreap)) return wait_noreap_copyout(p, pid, uid, why, exit_code, infop, ru); - } - - write_lock_irq(&tasklist_lock); - - /* - * This uses xchg to be atomic with the thread resuming and setting - * it. It must also be done with the write lock held to prevent a - * race with the EXIT_ZOMBIE case. - */ - exit_code = xchg(&p->exit_code, 0); - if (unlikely(p->exit_state)) { - /* - * The task resumed and then died. Let the next iteration - * catch it in EXIT_ZOMBIE. Note that exit_code might - * already be zero here if it resumed and did _exit(0). - * The task itself is dead and won't touch exit_code again; - * other processors in this function are locked out. - */ - p->exit_code = exit_code; - exit_code = 0; - } - if (unlikely(exit_code == 0)) { - /* - * Another thread in this function got to it first, or it - * resumed, or it resumed and then died. - */ - write_unlock_irq(&tasklist_lock); -bail_ref: - put_task_struct(p); - /* - * We are returning to the wait loop without having successfully - * removed the process and having released the lock. We cannot - * continue, since the "p" task pointer is potentially stale. - * - * Return -EAGAIN, and do_wait() will restart the loop from the - * beginning. Do _not_ re-acquire the lock. - */ - return -EAGAIN; - } - - /* move to end of parent's list to avoid starvation */ - remove_parent(p); - add_parent(p); - - write_unlock_irq(&tasklist_lock); retval = ru ? getrusage(p, RUSAGE_BOTH, ru) : 0; if (!retval && stat_addr) @@ -1443,15 +1386,13 @@ bail_ref: if (!retval && infop) retval = put_user(0, &infop->si_errno); if (!retval && infop) - retval = put_user((short)((p->ptrace & PT_PTRACED) - ? CLD_TRAPPED : CLD_STOPPED), - &infop->si_code); + retval = put_user(why, &infop->si_code); if (!retval && infop) retval = put_user(exit_code, &infop->si_status); if (!retval && infop) retval = put_user(pid, &infop->si_pid); if (!retval && infop) - retval = put_user(p->uid, &infop->si_uid); + retval = put_user(uid, &infop->si_uid); if (!retval) retval = pid; put_task_struct(p); @@ -1473,7 +1414,6 @@ static int wait_task_continued(struct task_struct *p, int noreap, int retval; pid_t pid; uid_t uid; - struct pid_namespace *ns; if (!(p->signal->flags & SIGNAL_STOP_CONTINUED)) return 0; @@ -1488,8 +1428,7 @@ static int wait_task_continued(struct task_struct *p, int noreap, p->signal->flags &= ~SIGNAL_STOP_CONTINUED; spin_unlock_irq(&p->sighand->siglock); - ns = current->nsproxy->pid_ns; - pid = task_pid_nr_ns(p, ns); + pid = task_pid_vnr(p); uid = p->uid; get_task_struct(p); read_unlock(&tasklist_lock); @@ -1500,7 +1439,7 @@ static int wait_task_continued(struct task_struct *p, int noreap, if (!retval && stat_addr) retval = put_user(0xffff, stat_addr); if (!retval) - retval = task_pid_nr_ns(p, ns); + retval = pid; } else { retval = wait_noreap_copyout(p, pid, uid, CLD_CONTINUED, SIGCONT, @@ -1511,101 +1450,63 @@ static int wait_task_continued(struct task_struct *p, int noreap, return retval; } - -static inline int my_ptrace_child(struct task_struct *p) -{ - if (!(p->ptrace & PT_PTRACED)) - return 0; - if (!(p->ptrace & PT_ATTACHED)) - return 1; - /* - * This child was PTRACE_ATTACH'd. We should be seeing it only if - * we are the attacher. If we are the real parent, this is a race - * inside ptrace_attach. It is waiting for the tasklist_lock, - * which we have to switch the parent links, but has already set - * the flags in p->ptrace. - */ - return (p->parent != p->real_parent); -} - -static long do_wait(pid_t pid, int options, struct siginfo __user *infop, - int __user *stat_addr, struct rusage __user *ru) +static long do_wait(enum pid_type type, struct pid *pid, int options, + struct siginfo __user *infop, int __user *stat_addr, + struct rusage __user *ru) { DECLARE_WAITQUEUE(wait, current); struct task_struct *tsk; int flag, retval; - int allowed, denied; add_wait_queue(¤t->signal->wait_chldexit,&wait); repeat: + /* If there is nothing that can match our critier just get out */ + retval = -ECHILD; + if ((type < PIDTYPE_MAX) && (!pid || hlist_empty(&pid->tasks[type]))) + goto end; + /* * We will set this flag if we see any child that might later * match our criteria, even if we are not able to reap it yet. */ - flag = 0; - allowed = denied = 0; + flag = retval = 0; current->state = TASK_INTERRUPTIBLE; read_lock(&tasklist_lock); tsk = current; do { struct task_struct *p; - int ret; list_for_each_entry(p, &tsk->children, sibling) { - ret = eligible_child(pid, options, p); + int ret = eligible_child(type, pid, options, p); if (!ret) continue; if (unlikely(ret < 0)) { - denied = ret; - continue; - } - allowed = 1; - - if (task_is_stopped_or_traced(p)) { + retval = ret; + } else if (task_is_stopped_or_traced(p)) { /* * It's stopped now, so it might later * continue, exit, or stop again. - * - * When we hit the race with PTRACE_ATTACH, we - * will not report this child. But the race - * means it has not yet been moved to our - * ptrace_children list, so we need to set the - * flag here to avoid a spurious ECHILD when - * the race happens with the only child. */ flag = 1; + if (!(p->ptrace & PT_PTRACED) && + !(options & WUNTRACED)) + continue; - if (!my_ptrace_child(p)) { - if (task_is_traced(p)) - continue; - if (!(options & WUNTRACED)) - continue; - } - - retval = wait_task_stopped(p, ret == 2, + retval = wait_task_stopped(p, (options & WNOWAIT), infop, stat_addr, ru); - if (retval == -EAGAIN) - goto repeat; - if (retval != 0) /* He released the lock. */ - goto end; - } else if (p->exit_state == EXIT_ZOMBIE) { + } else if (p->exit_state == EXIT_ZOMBIE && + !delay_group_leader(p)) { /* - * Eligible but we cannot release it yet: + * We don't reap group leaders with subthreads. */ - if (ret == 2) - goto check_continued; if (!likely(options & WEXITED)) continue; retval = wait_task_zombie(p, (options & WNOWAIT), infop, stat_addr, ru); - /* He released the lock. */ - if (retval != 0) - goto end; } else if (p->exit_state != EXIT_DEAD) { -check_continued: /* * It's running now, so it might later * exit, stop, or stop and then continue. @@ -1616,17 +1517,20 @@ check_continued: retval = wait_task_continued(p, (options & WNOWAIT), infop, stat_addr, ru); - if (retval != 0) /* He released the lock. */ - goto end; } + if (retval != 0) /* tasklist_lock released */ + goto end; } if (!flag) { list_for_each_entry(p, &tsk->ptrace_children, - ptrace_list) { - if (!eligible_child(pid, options, p)) + ptrace_list) { + flag = eligible_child(type, pid, options, p); + if (!flag) continue; - flag = 1; - break; + if (likely(flag > 0)) + break; + retval = flag; + goto end; } } if (options & __WNOTHREAD) @@ -1634,10 +1538,9 @@ check_continued: tsk = next_thread(tsk); BUG_ON(tsk->signal != current->signal); } while (tsk != current); - read_unlock(&tasklist_lock); + if (flag) { - retval = 0; if (options & WNOHANG) goto end; retval = -ERESTARTSYS; @@ -1647,14 +1550,12 @@ check_continued: goto repeat; } retval = -ECHILD; - if (unlikely(denied) && !allowed) - retval = denied; end: current->state = TASK_RUNNING; remove_wait_queue(¤t->signal->wait_chldexit,&wait); if (infop) { if (retval > 0) - retval = 0; + retval = 0; else { /* * For a WNOHANG return, clear out all the fields @@ -1678,10 +1579,12 @@ end: return retval; } -asmlinkage long sys_waitid(int which, pid_t pid, +asmlinkage long sys_waitid(int which, pid_t upid, struct siginfo __user *infop, int options, struct rusage __user *ru) { + struct pid *pid = NULL; + enum pid_type type; long ret; if (options & ~(WNOHANG|WNOWAIT|WEXITED|WSTOPPED|WCONTINUED)) @@ -1691,37 +1594,58 @@ asmlinkage long sys_waitid(int which, pid_t pid, switch (which) { case P_ALL: - pid = -1; + type = PIDTYPE_MAX; break; case P_PID: - if (pid <= 0) + type = PIDTYPE_PID; + if (upid <= 0) return -EINVAL; break; case P_PGID: - if (pid <= 0) + type = PIDTYPE_PGID; + if (upid <= 0) return -EINVAL; - pid = -pid; break; default: return -EINVAL; } - ret = do_wait(pid, options, infop, NULL, ru); + if (type < PIDTYPE_MAX) + pid = find_get_pid(upid); + ret = do_wait(type, pid, options, infop, NULL, ru); + put_pid(pid); /* avoid REGPARM breakage on x86: */ prevent_tail_call(ret); return ret; } -asmlinkage long sys_wait4(pid_t pid, int __user *stat_addr, +asmlinkage long sys_wait4(pid_t upid, int __user *stat_addr, int options, struct rusage __user *ru) { + struct pid *pid = NULL; + enum pid_type type; long ret; if (options & ~(WNOHANG|WUNTRACED|WCONTINUED| __WNOTHREAD|__WCLONE|__WALL)) return -EINVAL; - ret = do_wait(pid, options | WEXITED, NULL, stat_addr, ru); + + if (upid == -1) + type = PIDTYPE_MAX; + else if (upid < 0) { + type = PIDTYPE_PGID; + pid = find_get_pid(-upid); + } else if (upid == 0) { + type = PIDTYPE_PGID; + pid = get_pid(task_pgrp(current)); + } else /* upid > 0 */ { + type = PIDTYPE_PID; + pid = find_get_pid(upid); + } + + ret = do_wait(type, pid, options | WEXITED, NULL, stat_addr, ru); + put_pid(pid); /* avoid REGPARM breakage on x86: */ prevent_tail_call(ret); diff --git a/kernel/fork.c b/kernel/fork.c index b2ef8e4fad7..4363a4eb84e 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -390,7 +390,7 @@ struct mm_struct * mm_alloc(void) * is dropped: either by a lazy thread or by * mmput. Free the page directory and the mm. */ -void fastcall __mmdrop(struct mm_struct *mm) +void __mmdrop(struct mm_struct *mm) { BUG_ON(mm == &init_mm); mm_free_pgd(mm); @@ -909,7 +909,6 @@ static int copy_signal(unsigned long clone_flags, struct task_struct *tsk) hrtimer_init(&sig->real_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); sig->it_real_incr.tv64 = 0; sig->real_timer.function = it_real_fn; - sig->tsk = tsk; sig->it_virt_expires = cputime_zero; sig->it_virt_incr = cputime_zero; @@ -1338,6 +1337,7 @@ static struct task_struct *copy_process(unsigned long clone_flags, if (clone_flags & CLONE_NEWPID) p->nsproxy->pid_ns->child_reaper = p; + p->signal->leader_pid = pid; p->signal->tty = current->signal->tty; set_task_pgrp(p, task_pgrp_nr(current)); set_task_session(p, task_session_nr(current)); @@ -1488,13 +1488,7 @@ long do_fork(unsigned long clone_flags, if (!IS_ERR(p)) { struct completion vfork; - /* - * this is enough to call pid_nr_ns here, but this if - * improves optimisation of regular fork() - */ - nr = (clone_flags & CLONE_NEWPID) ? - task_pid_nr_ns(p, current->nsproxy->pid_ns) : - task_pid_vnr(p); + nr = task_pid_vnr(p); if (clone_flags & CLONE_PARENT_SETTID) put_user(nr, parent_tidptr); diff --git a/kernel/irq/chip.c b/kernel/irq/chip.c index 44019ce30a1..cc54c627635 100644 --- a/kernel/irq/chip.c +++ b/kernel/irq/chip.c @@ -286,7 +286,7 @@ static inline void mask_ack_irq(struct irq_desc *desc, int irq) * Note: The caller is expected to handle the ack, clear, mask and * unmask issues if necessary. */ -void fastcall +void handle_simple_irq(unsigned int irq, struct irq_desc *desc) { struct irqaction *action; @@ -327,7 +327,7 @@ out_unlock: * it after the associated handler has acknowledged the device, so the * interrupt line is back to inactive. */ -void fastcall +void handle_level_irq(unsigned int irq, struct irq_desc *desc) { unsigned int cpu = smp_processor_id(); @@ -375,7 +375,7 @@ out_unlock: * for modern forms of interrupt handlers, which handle the flow * details in hardware, transparently. */ -void fastcall +void handle_fasteoi_irq(unsigned int irq, struct irq_desc *desc) { unsigned int cpu = smp_processor_id(); @@ -434,7 +434,7 @@ out: * the handler was running. If all pending interrupts are handled, the * loop is left. */ -void fastcall +void handle_edge_irq(unsigned int irq, struct irq_desc *desc) { const unsigned int cpu = smp_processor_id(); @@ -505,7 +505,7 @@ out_unlock: * * Per CPU interrupts on SMP machines without locking requirements */ -void fastcall +void handle_percpu_irq(unsigned int irq, struct irq_desc *desc) { irqreturn_t action_ret; @@ -589,3 +589,39 @@ set_irq_chip_and_handler_name(unsigned int irq, struct irq_chip *chip, set_irq_chip(irq, chip); __set_irq_handler(irq, handle, 0, name); } + +void __init set_irq_noprobe(unsigned int irq) +{ + struct irq_desc *desc; + unsigned long flags; + + if (irq >= NR_IRQS) { + printk(KERN_ERR "Trying to mark IRQ%d non-probeable\n", irq); + + return; + } + + desc = irq_desc + irq; + + spin_lock_irqsave(&desc->lock, flags); + desc->status |= IRQ_NOPROBE; + spin_unlock_irqrestore(&desc->lock, flags); +} + +void __init set_irq_probe(unsigned int irq) +{ + struct irq_desc *desc; + unsigned long flags; + + if (irq >= NR_IRQS) { + printk(KERN_ERR "Trying to mark IRQ%d probeable\n", irq); + + return; + } + + desc = irq_desc + irq; + + spin_lock_irqsave(&desc->lock, flags); + desc->status &= ~IRQ_NOPROBE; + spin_unlock_irqrestore(&desc->lock, flags); +} diff --git a/kernel/irq/handle.c b/kernel/irq/handle.c index dc335ad2752..5fa6198e913 100644 --- a/kernel/irq/handle.c +++ b/kernel/irq/handle.c @@ -25,7 +25,7 @@ * * Handles spurious and unhandled IRQ's. It also prints a debugmessage. */ -void fastcall +void handle_bad_irq(unsigned int irq, struct irq_desc *desc) { print_irq_desc(irq, desc); @@ -163,7 +163,7 @@ irqreturn_t handle_IRQ_event(unsigned int irq, struct irqaction *action) * This is the original x86 implementation which is used for every * interrupt type. */ -fastcall unsigned int __do_IRQ(unsigned int irq) +unsigned int __do_IRQ(unsigned int irq) { struct irq_desc *desc = irq_desc + irq; struct irqaction *action; diff --git a/kernel/itimer.c b/kernel/itimer.c index 2fab344dbf5..ab982747d9b 100644 --- a/kernel/itimer.c +++ b/kernel/itimer.c @@ -132,7 +132,7 @@ enum hrtimer_restart it_real_fn(struct hrtimer *timer) struct signal_struct *sig = container_of(timer, struct signal_struct, real_timer); - send_group_sig_info(SIGALRM, SEND_SIG_PRIV, sig->tsk); + kill_pid_info(SIGALRM, SEND_SIG_PRIV, sig->leader_pid); return HRTIMER_NORESTART; } diff --git a/kernel/module.c b/kernel/module.c index bd60278ee70..4202da97a1d 100644 --- a/kernel/module.c +++ b/kernel/module.c @@ -46,6 +46,7 @@ #include <asm/semaphore.h> #include <asm/cacheflush.h> #include <linux/license.h> +#include <asm/sections.h> #if 0 #define DEBUGP printk @@ -290,7 +291,7 @@ static unsigned long __find_symbol(const char *name, } } DEBUGP("Failed to find symbol %s\n", name); - return 0; + return -ENOENT; } /* Search for module by name: must hold module_mutex. */ @@ -343,9 +344,6 @@ static inline unsigned int block_size(int val) return val; } -/* Created by linker magic */ -extern char __per_cpu_start[], __per_cpu_end[]; - static void *percpu_modalloc(unsigned long size, unsigned long align, const char *name) { @@ -783,7 +781,7 @@ void __symbol_put(const char *symbol) const unsigned long *crc; preempt_disable(); - if (!__find_symbol(symbol, &owner, &crc, 1)) + if (IS_ERR_VALUE(__find_symbol(symbol, &owner, &crc, 1))) BUG(); module_put(owner); preempt_enable(); @@ -929,7 +927,8 @@ static inline int check_modstruct_version(Elf_Shdr *sechdrs, const unsigned long *crc; struct module *owner; - if (!__find_symbol("struct_module", &owner, &crc, 1)) + if (IS_ERR_VALUE(__find_symbol("struct_module", + &owner, &crc, 1))) BUG(); return check_version(sechdrs, versindex, "struct_module", mod, crc); @@ -978,12 +977,12 @@ static unsigned long resolve_symbol(Elf_Shdr *sechdrs, ret = __find_symbol(name, &owner, &crc, !(mod->taints & TAINT_PROPRIETARY_MODULE)); - if (ret) { + if (!IS_ERR_VALUE(ret)) { /* use_module can fail due to OOM, or module initialization or unloading */ if (!check_version(sechdrs, versindex, name, mod, crc) || !use_module(mod, owner)) - ret = 0; + ret = -EINVAL; } return ret; } @@ -1371,7 +1370,9 @@ void *__symbol_get(const char *symbol) preempt_disable(); value = __find_symbol(symbol, &owner, &crc, 1); - if (value && strong_try_module_get(owner) != 0) + if (IS_ERR_VALUE(value)) + value = 0; + else if (strong_try_module_get(owner)) value = 0; preempt_enable(); @@ -1391,14 +1392,16 @@ static int verify_export_symbols(struct module *mod) const unsigned long *crc; for (i = 0; i < mod->num_syms; i++) - if (__find_symbol(mod->syms[i].name, &owner, &crc, 1)) { + if (!IS_ERR_VALUE(__find_symbol(mod->syms[i].name, + &owner, &crc, 1))) { name = mod->syms[i].name; ret = -ENOEXEC; goto dup; } for (i = 0; i < mod->num_gpl_syms; i++) - if (__find_symbol(mod->gpl_syms[i].name, &owner, &crc, 1)) { + if (!IS_ERR_VALUE(__find_symbol(mod->gpl_syms[i].name, + &owner, &crc, 1))) { name = mod->gpl_syms[i].name; ret = -ENOEXEC; goto dup; @@ -1448,7 +1451,7 @@ static int simplify_symbols(Elf_Shdr *sechdrs, strtab + sym[i].st_name, mod); /* Ok if resolved. */ - if (sym[i].st_value != 0) + if (!IS_ERR_VALUE(sym[i].st_value)) break; /* Ok if weak. */ if (ELF_ST_BIND(sym[i].st_info) == STB_WEAK) @@ -2250,7 +2253,7 @@ static const char *get_ksymbol(struct module *mod, /* For kallsyms to ask for address resolution. NULL means not found. Careful * not to lock to avoid deadlock on oopses, simply disable preemption. */ -char *module_address_lookup(unsigned long addr, +const char *module_address_lookup(unsigned long addr, unsigned long *size, unsigned long *offset, char **modname, @@ -2275,7 +2278,7 @@ char *module_address_lookup(unsigned long addr, ret = namebuf; } preempt_enable(); - return (char *)ret; + return ret; } int lookup_module_symbol_name(unsigned long addr, char *symname) diff --git a/kernel/mutex-debug.c b/kernel/mutex-debug.c index d17436cdea1..3aaa06c561d 100644 --- a/kernel/mutex-debug.c +++ b/kernel/mutex-debug.c @@ -107,7 +107,7 @@ void debug_mutex_init(struct mutex *lock, const char *name, * use of the mutex is forbidden. The mutex must not be locked when * this function is called. */ -void fastcall mutex_destroy(struct mutex *lock) +void mutex_destroy(struct mutex *lock) { DEBUG_LOCKS_WARN_ON(mutex_is_locked(lock)); lock->magic = NULL; diff --git a/kernel/mutex.c b/kernel/mutex.c index d9ec9b66625..d046a345d36 100644 --- a/kernel/mutex.c +++ b/kernel/mutex.c @@ -58,7 +58,7 @@ EXPORT_SYMBOL(__mutex_init); * We also put the fastpath first in the kernel image, to make sure the * branch is predicted by the CPU as default-untaken. */ -static void fastcall noinline __sched +static void noinline __sched __mutex_lock_slowpath(atomic_t *lock_count); /*** @@ -82,7 +82,7 @@ __mutex_lock_slowpath(atomic_t *lock_count); * * This function is similar to (but not equivalent to) down(). */ -void inline fastcall __sched mutex_lock(struct mutex *lock) +void inline __sched mutex_lock(struct mutex *lock) { might_sleep(); /* @@ -95,8 +95,7 @@ void inline fastcall __sched mutex_lock(struct mutex *lock) EXPORT_SYMBOL(mutex_lock); #endif -static void fastcall noinline __sched -__mutex_unlock_slowpath(atomic_t *lock_count); +static noinline void __sched __mutex_unlock_slowpath(atomic_t *lock_count); /*** * mutex_unlock - release the mutex @@ -109,7 +108,7 @@ __mutex_unlock_slowpath(atomic_t *lock_count); * * This function is similar to (but not equivalent to) up(). */ -void fastcall __sched mutex_unlock(struct mutex *lock) +void __sched mutex_unlock(struct mutex *lock) { /* * The unlocking fastpath is the 0->1 transition from 'locked' @@ -234,7 +233,7 @@ EXPORT_SYMBOL_GPL(mutex_lock_interruptible_nested); /* * Release the lock, slowpath: */ -static fastcall inline void +static inline void __mutex_unlock_common_slowpath(atomic_t *lock_count, int nested) { struct mutex *lock = container_of(lock_count, struct mutex, count); @@ -271,7 +270,7 @@ __mutex_unlock_common_slowpath(atomic_t *lock_count, int nested) /* * Release the lock, slowpath: */ -static fastcall noinline void +static noinline void __mutex_unlock_slowpath(atomic_t *lock_count) { __mutex_unlock_common_slowpath(lock_count, 1); @@ -282,10 +281,10 @@ __mutex_unlock_slowpath(atomic_t *lock_count) * Here come the less common (and hence less performance-critical) APIs: * mutex_lock_interruptible() and mutex_trylock(). */ -static int fastcall noinline __sched +static noinline int __sched __mutex_lock_killable_slowpath(atomic_t *lock_count); -static noinline int fastcall __sched +static noinline int __sched __mutex_lock_interruptible_slowpath(atomic_t *lock_count); /*** @@ -299,7 +298,7 @@ __mutex_lock_interruptible_slowpath(atomic_t *lock_count); * * This function is similar to (but not equivalent to) down_interruptible(). */ -int fastcall __sched mutex_lock_interruptible(struct mutex *lock) +int __sched mutex_lock_interruptible(struct mutex *lock) { might_sleep(); return __mutex_fastpath_lock_retval @@ -308,7 +307,7 @@ int fastcall __sched mutex_lock_interruptible(struct mutex *lock) EXPORT_SYMBOL(mutex_lock_interruptible); -int fastcall __sched mutex_lock_killable(struct mutex *lock) +int __sched mutex_lock_killable(struct mutex *lock) { might_sleep(); return __mutex_fastpath_lock_retval @@ -316,7 +315,7 @@ int fastcall __sched mutex_lock_killable(struct mutex *lock) } EXPORT_SYMBOL(mutex_lock_killable); -static void fastcall noinline __sched +static noinline void __sched __mutex_lock_slowpath(atomic_t *lock_count) { struct mutex *lock = container_of(lock_count, struct mutex, count); @@ -324,7 +323,7 @@ __mutex_lock_slowpath(atomic_t *lock_count) __mutex_lock_common(lock, TASK_UNINTERRUPTIBLE, 0, _RET_IP_); } -static int fastcall noinline __sched +static noinline int __sched __mutex_lock_killable_slowpath(atomic_t *lock_count) { struct mutex *lock = container_of(lock_count, struct mutex, count); @@ -332,7 +331,7 @@ __mutex_lock_killable_slowpath(atomic_t *lock_count) return __mutex_lock_common(lock, TASK_KILLABLE, 0, _RET_IP_); } -static noinline int fastcall __sched +static noinline int __sched __mutex_lock_interruptible_slowpath(atomic_t *lock_count) { struct mutex *lock = container_of(lock_count, struct mutex, count); @@ -381,7 +380,7 @@ static inline int __mutex_trylock_slowpath(atomic_t *lock_count) * This function must not be used in interrupt context. The * mutex must be released by the same task that acquired it. */ -int fastcall __sched mutex_trylock(struct mutex *lock) +int __sched mutex_trylock(struct mutex *lock) { return __mutex_fastpath_trylock(&lock->count, __mutex_trylock_slowpath); diff --git a/kernel/nsproxy.c b/kernel/nsproxy.c index 79f871bc0ef..f5d332cf8c6 100644 --- a/kernel/nsproxy.c +++ b/kernel/nsproxy.c @@ -21,6 +21,7 @@ #include <linux/utsname.h> #include <linux/pid_namespace.h> #include <net/net_namespace.h> +#include <linux/ipc_namespace.h> static struct kmem_cache *nsproxy_cachep; diff --git a/kernel/params.c b/kernel/params.c index e28c70628bb..afc46a23eb6 100644 --- a/kernel/params.c +++ b/kernel/params.c @@ -180,12 +180,12 @@ int parse_args(const char *name, #define STANDARD_PARAM_DEF(name, type, format, tmptype, strtolfn) \ int param_set_##name(const char *val, struct kernel_param *kp) \ { \ - char *endp; \ tmptype l; \ + int ret; \ \ if (!val) return -EINVAL; \ - l = strtolfn(val, &endp, 0); \ - if (endp == val || ((type)l != l)) \ + ret = strtolfn(val, 0, &l); \ + if (ret == -EINVAL || ((type)l != l)) \ return -EINVAL; \ *((type *)kp->arg) = l; \ return 0; \ @@ -195,13 +195,13 @@ int parse_args(const char *name, return sprintf(buffer, format, *((type *)kp->arg)); \ } -STANDARD_PARAM_DEF(byte, unsigned char, "%c", unsigned long, simple_strtoul); -STANDARD_PARAM_DEF(short, short, "%hi", long, simple_strtol); -STANDARD_PARAM_DEF(ushort, unsigned short, "%hu", unsigned long, simple_strtoul); -STANDARD_PARAM_DEF(int, int, "%i", long, simple_strtol); -STANDARD_PARAM_DEF(uint, unsigned int, "%u", unsigned long, simple_strtoul); -STANDARD_PARAM_DEF(long, long, "%li", long, simple_strtol); -STANDARD_PARAM_DEF(ulong, unsigned long, "%lu", unsigned long, simple_strtoul); +STANDARD_PARAM_DEF(byte, unsigned char, "%c", unsigned long, strict_strtoul); +STANDARD_PARAM_DEF(short, short, "%hi", long, strict_strtol); +STANDARD_PARAM_DEF(ushort, unsigned short, "%hu", unsigned long, strict_strtoul); +STANDARD_PARAM_DEF(int, int, "%i", long, strict_strtol); +STANDARD_PARAM_DEF(uint, unsigned int, "%u", unsigned long, strict_strtoul); +STANDARD_PARAM_DEF(long, long, "%li", long, strict_strtol); +STANDARD_PARAM_DEF(ulong, unsigned long, "%lu", unsigned long, strict_strtoul); int param_set_charp(const char *val, struct kernel_param *kp) { diff --git a/kernel/pid.c b/kernel/pid.c index 3b30bccdfcd..477691576b3 100644 --- a/kernel/pid.c +++ b/kernel/pid.c @@ -41,7 +41,6 @@ static struct hlist_head *pid_hash; static int pidhash_shift; struct pid init_struct_pid = INIT_STRUCT_PID; -static struct kmem_cache *pid_ns_cachep; int pid_max = PID_MAX_DEFAULT; @@ -112,7 +111,7 @@ EXPORT_SYMBOL(is_container_init); static __cacheline_aligned_in_smp DEFINE_SPINLOCK(pidmap_lock); -static fastcall void free_pidmap(struct pid_namespace *pid_ns, int pid) +static void free_pidmap(struct pid_namespace *pid_ns, int pid) { struct pidmap *map = pid_ns->pidmap + pid / BITS_PER_PAGE; int offset = pid & BITS_PER_PAGE_MASK; @@ -181,7 +180,7 @@ static int alloc_pidmap(struct pid_namespace *pid_ns) return -1; } -static int next_pidmap(struct pid_namespace *pid_ns, int last) +int next_pidmap(struct pid_namespace *pid_ns, int last) { int offset; struct pidmap *map, *end; @@ -199,7 +198,7 @@ static int next_pidmap(struct pid_namespace *pid_ns, int last) return -1; } -fastcall void put_pid(struct pid *pid) +void put_pid(struct pid *pid) { struct pid_namespace *ns; @@ -221,7 +220,7 @@ static void delayed_put_pid(struct rcu_head *rhp) put_pid(pid); } -fastcall void free_pid(struct pid *pid) +void free_pid(struct pid *pid) { /* We can be called with write_lock_irq(&tasklist_lock) held */ int i; @@ -287,7 +286,7 @@ out_free: goto out; } -struct pid * fastcall find_pid_ns(int nr, struct pid_namespace *ns) +struct pid *find_pid_ns(int nr, struct pid_namespace *ns) { struct hlist_node *elem; struct upid *pnr; @@ -317,7 +316,7 @@ EXPORT_SYMBOL_GPL(find_pid); /* * attach_pid() must be called with the tasklist_lock write-held. */ -int fastcall attach_pid(struct task_struct *task, enum pid_type type, +int attach_pid(struct task_struct *task, enum pid_type type, struct pid *pid) { struct pid_link *link; @@ -329,7 +328,7 @@ int fastcall attach_pid(struct task_struct *task, enum pid_type type, return 0; } -void fastcall detach_pid(struct task_struct *task, enum pid_type type) +void detach_pid(struct task_struct *task, enum pid_type type) { struct pid_link *link; struct pid *pid; @@ -349,7 +348,7 @@ void fastcall detach_pid(struct task_struct *task, enum pid_type type) } /* transfer_pid is an optimization of attach_pid(new), detach_pid(old) */ -void fastcall transfer_pid(struct task_struct *old, struct task_struct *new, +void transfer_pid(struct task_struct *old, struct task_struct *new, enum pid_type type) { new->pids[type].pid = old->pids[type].pid; @@ -357,7 +356,7 @@ void fastcall transfer_pid(struct task_struct *old, struct task_struct *new, old->pids[type].pid = NULL; } -struct task_struct * fastcall pid_task(struct pid *pid, enum pid_type type) +struct task_struct *pid_task(struct pid *pid, enum pid_type type) { struct task_struct *result = NULL; if (pid) { @@ -409,7 +408,7 @@ struct pid *get_task_pid(struct task_struct *task, enum pid_type type) return pid; } -struct task_struct *fastcall get_pid_task(struct pid *pid, enum pid_type type) +struct task_struct *get_pid_task(struct pid *pid, enum pid_type type) { struct task_struct *result; rcu_read_lock(); @@ -444,6 +443,12 @@ pid_t pid_nr_ns(struct pid *pid, struct pid_namespace *ns) return nr; } +pid_t pid_vnr(struct pid *pid) +{ + return pid_nr_ns(pid, current->nsproxy->pid_ns); +} +EXPORT_SYMBOL_GPL(pid_vnr); + pid_t task_pid_nr_ns(struct task_struct *tsk, struct pid_namespace *ns) { return pid_nr_ns(task_pid(tsk), ns); @@ -488,180 +493,6 @@ struct pid *find_ge_pid(int nr, struct pid_namespace *ns) } EXPORT_SYMBOL_GPL(find_get_pid); -struct pid_cache { - int nr_ids; - char name[16]; - struct kmem_cache *cachep; - struct list_head list; -}; - -static LIST_HEAD(pid_caches_lh); -static DEFINE_MUTEX(pid_caches_mutex); - -/* - * creates the kmem cache to allocate pids from. - * @nr_ids: the number of numerical ids this pid will have to carry - */ - -static struct kmem_cache *create_pid_cachep(int nr_ids) -{ - struct pid_cache *pcache; - struct kmem_cache *cachep; - - mutex_lock(&pid_caches_mutex); - list_for_each_entry (pcache, &pid_caches_lh, list) - if (pcache->nr_ids == nr_ids) - goto out; - - pcache = kmalloc(sizeof(struct pid_cache), GFP_KERNEL); - if (pcache == NULL) - goto err_alloc; - - snprintf(pcache->name, sizeof(pcache->name), "pid_%d", nr_ids); - cachep = kmem_cache_create(pcache->name, - sizeof(struct pid) + (nr_ids - 1) * sizeof(struct upid), - 0, SLAB_HWCACHE_ALIGN, NULL); - if (cachep == NULL) - goto err_cachep; - - pcache->nr_ids = nr_ids; - pcache->cachep = cachep; - list_add(&pcache->list, &pid_caches_lh); -out: - mutex_unlock(&pid_caches_mutex); - return pcache->cachep; - -err_cachep: - kfree(pcache); -err_alloc: - mutex_unlock(&pid_caches_mutex); - return NULL; -} - -#ifdef CONFIG_PID_NS -static struct pid_namespace *create_pid_namespace(int level) -{ - struct pid_namespace *ns; - int i; - - ns = kmem_cache_alloc(pid_ns_cachep, GFP_KERNEL); - if (ns == NULL) - goto out; - - ns->pidmap[0].page = kzalloc(PAGE_SIZE, GFP_KERNEL); - if (!ns->pidmap[0].page) - goto out_free; - - ns->pid_cachep = create_pid_cachep(level + 1); - if (ns->pid_cachep == NULL) - goto out_free_map; - - kref_init(&ns->kref); - ns->last_pid = 0; - ns->child_reaper = NULL; - ns->level = level; - - set_bit(0, ns->pidmap[0].page); - atomic_set(&ns->pidmap[0].nr_free, BITS_PER_PAGE - 1); - - for (i = 1; i < PIDMAP_ENTRIES; i++) { - ns->pidmap[i].page = 0; - atomic_set(&ns->pidmap[i].nr_free, BITS_PER_PAGE); - } - - return ns; - -out_free_map: - kfree(ns->pidmap[0].page); -out_free: - kmem_cache_free(pid_ns_cachep, ns); -out: - return ERR_PTR(-ENOMEM); -} - -static void destroy_pid_namespace(struct pid_namespace *ns) -{ - int i; - - for (i = 0; i < PIDMAP_ENTRIES; i++) - kfree(ns->pidmap[i].page); - kmem_cache_free(pid_ns_cachep, ns); -} - -struct pid_namespace *copy_pid_ns(unsigned long flags, struct pid_namespace *old_ns) -{ - struct pid_namespace *new_ns; - - BUG_ON(!old_ns); - new_ns = get_pid_ns(old_ns); - if (!(flags & CLONE_NEWPID)) - goto out; - - new_ns = ERR_PTR(-EINVAL); - if (flags & CLONE_THREAD) - goto out_put; - - new_ns = create_pid_namespace(old_ns->level + 1); - if (!IS_ERR(new_ns)) - new_ns->parent = get_pid_ns(old_ns); - -out_put: - put_pid_ns(old_ns); -out: - return new_ns; -} - -void free_pid_ns(struct kref *kref) -{ - struct pid_namespace *ns, *parent; - - ns = container_of(kref, struct pid_namespace, kref); - - parent = ns->parent; - destroy_pid_namespace(ns); - - if (parent != NULL) - put_pid_ns(parent); -} -#endif /* CONFIG_PID_NS */ - -void zap_pid_ns_processes(struct pid_namespace *pid_ns) -{ - int nr; - int rc; - - /* - * The last thread in the cgroup-init thread group is terminating. - * Find remaining pid_ts in the namespace, signal and wait for them - * to exit. - * - * Note: This signals each threads in the namespace - even those that - * belong to the same thread group, To avoid this, we would have - * to walk the entire tasklist looking a processes in this - * namespace, but that could be unnecessarily expensive if the - * pid namespace has just a few processes. Or we need to - * maintain a tasklist for each pid namespace. - * - */ - read_lock(&tasklist_lock); - nr = next_pidmap(pid_ns, 1); - while (nr > 0) { - kill_proc_info(SIGKILL, SEND_SIG_PRIV, nr); - nr = next_pidmap(pid_ns, nr); - } - read_unlock(&tasklist_lock); - - do { - clear_thread_flag(TIF_SIGPENDING); - rc = sys_wait4(-1, NULL, __WALL, NULL); - } while (rc != -ECHILD); - - - /* Child reaper for the pid namespace is going away */ - pid_ns->child_reaper = NULL; - return; -} - /* * The pid hash table is scaled according to the amount of memory in the * machine. From a minimum of 16 slots up to 4096 slots at one gigabyte or @@ -694,9 +525,6 @@ void __init pidmap_init(void) set_bit(0, init_pid_ns.pidmap[0].page); atomic_dec(&init_pid_ns.pidmap[0].nr_free); - init_pid_ns.pid_cachep = create_pid_cachep(1); - if (init_pid_ns.pid_cachep == NULL) - panic("Can't create pid_1 cachep\n"); - - pid_ns_cachep = KMEM_CACHE(pid_namespace, SLAB_PANIC); + init_pid_ns.pid_cachep = KMEM_CACHE(pid, + SLAB_HWCACHE_ALIGN | SLAB_PANIC); } diff --git a/kernel/pid_namespace.c b/kernel/pid_namespace.c new file mode 100644 index 00000000000..6d792b66d85 --- /dev/null +++ b/kernel/pid_namespace.c @@ -0,0 +1,197 @@ +/* + * Pid namespaces + * + * Authors: + * (C) 2007 Pavel Emelyanov <xemul@openvz.org>, OpenVZ, SWsoft Inc. + * (C) 2007 Sukadev Bhattiprolu <sukadev@us.ibm.com>, IBM + * Many thanks to Oleg Nesterov for comments and help + * + */ + +#include <linux/pid.h> +#include <linux/pid_namespace.h> +#include <linux/syscalls.h> +#include <linux/err.h> + +#define BITS_PER_PAGE (PAGE_SIZE*8) + +struct pid_cache { + int nr_ids; + char name[16]; + struct kmem_cache *cachep; + struct list_head list; +}; + +static LIST_HEAD(pid_caches_lh); +static DEFINE_MUTEX(pid_caches_mutex); +static struct kmem_cache *pid_ns_cachep; + +/* + * creates the kmem cache to allocate pids from. + * @nr_ids: the number of numerical ids this pid will have to carry + */ + +static struct kmem_cache *create_pid_cachep(int nr_ids) +{ + struct pid_cache *pcache; + struct kmem_cache *cachep; + + mutex_lock(&pid_caches_mutex); + list_for_each_entry(pcache, &pid_caches_lh, list) + if (pcache->nr_ids == nr_ids) + goto out; + + pcache = kmalloc(sizeof(struct pid_cache), GFP_KERNEL); + if (pcache == NULL) + goto err_alloc; + + snprintf(pcache->name, sizeof(pcache->name), "pid_%d", nr_ids); + cachep = kmem_cache_create(pcache->name, + sizeof(struct pid) + (nr_ids - 1) * sizeof(struct upid), + 0, SLAB_HWCACHE_ALIGN, NULL); + if (cachep == NULL) + goto err_cachep; + + pcache->nr_ids = nr_ids; + pcache->cachep = cachep; + list_add(&pcache->list, &pid_caches_lh); +out: + mutex_unlock(&pid_caches_mutex); + return pcache->cachep; + +err_cachep: + kfree(pcache); +err_alloc: + mutex_unlock(&pid_caches_mutex); + return NULL; +} + +static struct pid_namespace *create_pid_namespace(int level) +{ + struct pid_namespace *ns; + int i; + + ns = kmem_cache_alloc(pid_ns_cachep, GFP_KERNEL); + if (ns == NULL) + goto out; + + ns->pidmap[0].page = kzalloc(PAGE_SIZE, GFP_KERNEL); + if (!ns->pidmap[0].page) + goto out_free; + + ns->pid_cachep = create_pid_cachep(level + 1); + if (ns->pid_cachep == NULL) + goto out_free_map; + + kref_init(&ns->kref); + ns->last_pid = 0; + ns->child_reaper = NULL; + ns->level = level; + + set_bit(0, ns->pidmap[0].page); + atomic_set(&ns->pidmap[0].nr_free, BITS_PER_PAGE - 1); + + for (i = 1; i < PIDMAP_ENTRIES; i++) { + ns->pidmap[i].page = 0; + atomic_set(&ns->pidmap[i].nr_free, BITS_PER_PAGE); + } + + return ns; + +out_free_map: + kfree(ns->pidmap[0].page); +out_free: + kmem_cache_free(pid_ns_cachep, ns); +out: + return ERR_PTR(-ENOMEM); +} + +static void destroy_pid_namespace(struct pid_namespace *ns) +{ + int i; + + for (i = 0; i < PIDMAP_ENTRIES; i++) + kfree(ns->pidmap[i].page); + kmem_cache_free(pid_ns_cachep, ns); +} + +struct pid_namespace *copy_pid_ns(unsigned long flags, struct pid_namespace *old_ns) +{ + struct pid_namespace *new_ns; + + BUG_ON(!old_ns); + new_ns = get_pid_ns(old_ns); + if (!(flags & CLONE_NEWPID)) + goto out; + + new_ns = ERR_PTR(-EINVAL); + if (flags & CLONE_THREAD) + goto out_put; + + new_ns = create_pid_namespace(old_ns->level + 1); + if (!IS_ERR(new_ns)) + new_ns->parent = get_pid_ns(old_ns); + +out_put: + put_pid_ns(old_ns); +out: + return new_ns; +} + +void free_pid_ns(struct kref *kref) +{ + struct pid_namespace *ns, *parent; + + ns = container_of(kref, struct pid_namespace, kref); + + parent = ns->parent; + destroy_pid_namespace(ns); + + if (parent != NULL) + put_pid_ns(parent); +} + +void zap_pid_ns_processes(struct pid_namespace *pid_ns) +{ + int nr; + int rc; + + /* + * The last thread in the cgroup-init thread group is terminating. + * Find remaining pid_ts in the namespace, signal and wait for them + * to exit. + * + * Note: This signals each threads in the namespace - even those that + * belong to the same thread group, To avoid this, we would have + * to walk the entire tasklist looking a processes in this + * namespace, but that could be unnecessarily expensive if the + * pid namespace has just a few processes. Or we need to + * maintain a tasklist for each pid namespace. + * + */ + read_lock(&tasklist_lock); + nr = next_pidmap(pid_ns, 1); + while (nr > 0) { + kill_proc_info(SIGKILL, SEND_SIG_PRIV, nr); + nr = next_pidmap(pid_ns, nr); + } + read_unlock(&tasklist_lock); + + do { + clear_thread_flag(TIF_SIGPENDING); + rc = sys_wait4(-1, NULL, __WALL, NULL); + } while (rc != -ECHILD); + + + /* Child reaper for the pid namespace is going away */ + pid_ns->child_reaper = NULL; + return; +} + +static __init int pid_namespaces_init(void) +{ + pid_ns_cachep = KMEM_CACHE(pid_namespace, SLAB_PANIC); + return 0; +} + +__initcall(pid_namespaces_init); diff --git a/kernel/posix-cpu-timers.c b/kernel/posix-cpu-timers.c index 0b7c82ac467..2eae91f954c 100644 --- a/kernel/posix-cpu-timers.c +++ b/kernel/posix-cpu-timers.c @@ -20,7 +20,7 @@ static int check_clock(const clockid_t which_clock) return 0; read_lock(&tasklist_lock); - p = find_task_by_pid(pid); + p = find_task_by_vpid(pid); if (!p || !(CPUCLOCK_PERTHREAD(which_clock) ? same_thread_group(p, current) : thread_group_leader(p))) { error = -EINVAL; @@ -305,7 +305,7 @@ int posix_cpu_clock_get(const clockid_t which_clock, struct timespec *tp) */ struct task_struct *p; rcu_read_lock(); - p = find_task_by_pid(pid); + p = find_task_by_vpid(pid); if (p) { if (CPUCLOCK_PERTHREAD(which_clock)) { if (same_thread_group(p, current)) { @@ -354,7 +354,7 @@ int posix_cpu_timer_create(struct k_itimer *new_timer) if (pid == 0) { p = current; } else { - p = find_task_by_pid(pid); + p = find_task_by_vpid(pid); if (p && !same_thread_group(p, current)) p = NULL; } @@ -362,7 +362,7 @@ int posix_cpu_timer_create(struct k_itimer *new_timer) if (pid == 0) { p = current->group_leader; } else { - p = find_task_by_pid(pid); + p = find_task_by_vpid(pid); if (p && !thread_group_leader(p)) p = NULL; } diff --git a/kernel/posix-timers.c b/kernel/posix-timers.c index 122d5c787fe..ce268966007 100644 --- a/kernel/posix-timers.c +++ b/kernel/posix-timers.c @@ -404,7 +404,7 @@ static struct task_struct * good_sigevent(sigevent_t * event) struct task_struct *rtn = current->group_leader; if ((event->sigev_notify & SIGEV_THREAD_ID ) && - (!(rtn = find_task_by_pid(event->sigev_notify_thread_id)) || + (!(rtn = find_task_by_vpid(event->sigev_notify_thread_id)) || !same_thread_group(rtn, current) || (event->sigev_notify & ~SIGEV_THREAD_ID) != SIGEV_SIGNAL)) return NULL; diff --git a/kernel/printk.c b/kernel/printk.c index 4a090621f37..bee36100f11 100644 --- a/kernel/printk.c +++ b/kernel/printk.c @@ -32,7 +32,6 @@ #include <linux/security.h> #include <linux/bootmem.h> #include <linux/syscalls.h> -#include <linux/jiffies.h> #include <asm/uaccess.h> @@ -567,19 +566,6 @@ static int printk_time = 0; #endif module_param_named(time, printk_time, bool, S_IRUGO | S_IWUSR); -static int __init printk_time_setup(char *str) -{ - if (*str) - return 0; - printk_time = 1; - printk(KERN_NOTICE "The 'time' option is deprecated and " - "is scheduled for removal in early 2008\n"); - printk(KERN_NOTICE "Use 'printk.time=<value>' instead\n"); - return 1; -} - -__setup("time", printk_time_setup); - /* Check if we have any console registered that can be called early in boot. */ static int have_callable_console(void) { @@ -1265,6 +1251,7 @@ void tty_write_message(struct tty_struct *tty, char *msg) return; } +#if defined CONFIG_PRINTK /* * printk rate limiting, lifted from the networking subsystem. * @@ -1334,3 +1321,4 @@ bool printk_timed_ratelimit(unsigned long *caller_jiffies, return false; } EXPORT_SYMBOL(printk_timed_ratelimit); +#endif diff --git a/kernel/profile.c b/kernel/profile.c index e64c2da11c0..3b7a1b05512 100644 --- a/kernel/profile.c +++ b/kernel/profile.c @@ -20,7 +20,6 @@ #include <linux/mm.h> #include <linux/cpumask.h> #include <linux/cpu.h> -#include <linux/profile.h> #include <linux/highmem.h> #include <linux/mutex.h> #include <asm/sections.h> diff --git a/kernel/ptrace.c b/kernel/ptrace.c index 628b03ab88a..fdb34e86f92 100644 --- a/kernel/ptrace.c +++ b/kernel/ptrace.c @@ -99,10 +99,12 @@ int ptrace_check_attach(struct task_struct *child, int kill) * be changed by us so it's not changing right after this. */ read_lock(&tasklist_lock); - if ((child->ptrace & PT_PTRACED) && child->parent == current && - (!(child->ptrace & PT_ATTACHED) || child->real_parent != current) - && child->signal != NULL) { + if ((child->ptrace & PT_PTRACED) && child->parent == current) { ret = 0; + /* + * child->sighand can't be NULL, release_task() + * does ptrace_unlink() before __exit_signal(). + */ spin_lock_irq(&child->sighand->siglock); if (task_is_stopped(child)) child->state = TASK_TRACED; @@ -200,8 +202,7 @@ repeat: goto bad; /* Go */ - task->ptrace |= PT_PTRACED | ((task->real_parent != current) - ? PT_ATTACHED : 0); + task->ptrace |= PT_PTRACED; if (capable(CAP_SYS_PTRACE)) task->ptrace |= PT_PTRACE_CAP; diff --git a/kernel/resource.c b/kernel/resource.c index 2eb553d9b51..82aea814d40 100644 --- a/kernel/resource.c +++ b/kernel/resource.c @@ -228,7 +228,7 @@ int release_resource(struct resource *old) EXPORT_SYMBOL(release_resource); -#ifdef CONFIG_MEMORY_HOTPLUG +#if defined(CONFIG_MEMORY_HOTPLUG) && !defined(CONFIG_ARCH_HAS_WALK_MEMORY) /* * Finds the lowest memory reosurce exists within [res->start.res->end) * the caller must specify res->start, res->end, res->flags. diff --git a/kernel/rtmutex-debug.c b/kernel/rtmutex-debug.c index 56d73cb8826..5fcb4fe645e 100644 --- a/kernel/rtmutex-debug.c +++ b/kernel/rtmutex-debug.c @@ -130,7 +130,7 @@ void debug_rt_mutex_deadlock(int detect, struct rt_mutex_waiter *act_waiter, task = rt_mutex_owner(act_waiter->lock); if (task && task != current) { - act_waiter->deadlock_task_pid = task->pid; + act_waiter->deadlock_task_pid = get_pid(task_pid(task)); act_waiter->deadlock_lock = lock; } } @@ -142,9 +142,12 @@ void debug_rt_mutex_print_deadlock(struct rt_mutex_waiter *waiter) if (!waiter->deadlock_lock || !rt_trace_on) return; - task = find_task_by_pid(waiter->deadlock_task_pid); - if (!task) + rcu_read_lock(); + task = pid_task(waiter->deadlock_task_pid, PIDTYPE_PID); + if (!task) { + rcu_read_unlock(); return; + } TRACE_OFF_NOLOCK(); @@ -173,6 +176,7 @@ void debug_rt_mutex_print_deadlock(struct rt_mutex_waiter *waiter) current->comm, task_pid_nr(current)); dump_stack(); debug_show_all_locks(); + rcu_read_unlock(); printk("[ turning off deadlock detection." "Please report this trace. ]\n\n"); @@ -203,10 +207,12 @@ void debug_rt_mutex_init_waiter(struct rt_mutex_waiter *waiter) memset(waiter, 0x11, sizeof(*waiter)); plist_node_init(&waiter->list_entry, MAX_PRIO); plist_node_init(&waiter->pi_list_entry, MAX_PRIO); + waiter->deadlock_task_pid = NULL; } void debug_rt_mutex_free_waiter(struct rt_mutex_waiter *waiter) { + put_pid(waiter->deadlock_task_pid); TRACE_WARN_ON(!plist_node_empty(&waiter->list_entry)); TRACE_WARN_ON(!plist_node_empty(&waiter->pi_list_entry)); TRACE_WARN_ON(waiter->task); diff --git a/kernel/rtmutex_common.h b/kernel/rtmutex_common.h index 2d3b83593ca..e124bf5800e 100644 --- a/kernel/rtmutex_common.h +++ b/kernel/rtmutex_common.h @@ -51,7 +51,7 @@ struct rt_mutex_waiter { struct rt_mutex *lock; #ifdef CONFIG_DEBUG_RT_MUTEXES unsigned long ip; - pid_t deadlock_task_pid; + struct pid *deadlock_task_pid; struct rt_mutex *deadlock_lock; #endif }; diff --git a/kernel/sched.c b/kernel/sched.c index 9474b23c28b..3eedd526090 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -1893,13 +1893,13 @@ out: return success; } -int fastcall wake_up_process(struct task_struct *p) +int wake_up_process(struct task_struct *p) { return try_to_wake_up(p, TASK_ALL, 0); } EXPORT_SYMBOL(wake_up_process); -int fastcall wake_up_state(struct task_struct *p, unsigned int state) +int wake_up_state(struct task_struct *p, unsigned int state) { return try_to_wake_up(p, state, 0); } @@ -1986,7 +1986,7 @@ void sched_fork(struct task_struct *p, int clone_flags) * that must be done for every newly created context, then puts the task * on the runqueue and wakes it. */ -void fastcall wake_up_new_task(struct task_struct *p, unsigned long clone_flags) +void wake_up_new_task(struct task_struct *p, unsigned long clone_flags) { unsigned long flags; struct rq *rq; @@ -3753,7 +3753,7 @@ void scheduler_tick(void) #if defined(CONFIG_PREEMPT) && defined(CONFIG_DEBUG_PREEMPT) -void fastcall add_preempt_count(int val) +void add_preempt_count(int val) { /* * Underflow? @@ -3769,7 +3769,7 @@ void fastcall add_preempt_count(int val) } EXPORT_SYMBOL(add_preempt_count); -void fastcall sub_preempt_count(int val) +void sub_preempt_count(int val) { /* * Underflow? @@ -4067,7 +4067,7 @@ static void __wake_up_common(wait_queue_head_t *q, unsigned int mode, * @nr_exclusive: how many wake-one or wake-many threads to wake up * @key: is directly passed to the wakeup function */ -void fastcall __wake_up(wait_queue_head_t *q, unsigned int mode, +void __wake_up(wait_queue_head_t *q, unsigned int mode, int nr_exclusive, void *key) { unsigned long flags; @@ -4081,7 +4081,7 @@ EXPORT_SYMBOL(__wake_up); /* * Same as __wake_up but called with the spinlock in wait_queue_head_t held. */ -void fastcall __wake_up_locked(wait_queue_head_t *q, unsigned int mode) +void __wake_up_locked(wait_queue_head_t *q, unsigned int mode) { __wake_up_common(q, mode, 1, 0, NULL); } @@ -4099,7 +4099,7 @@ void fastcall __wake_up_locked(wait_queue_head_t *q, unsigned int mode) * * On UP it can prevent extra preemption. */ -void fastcall +void __wake_up_sync(wait_queue_head_t *q, unsigned int mode, int nr_exclusive) { unsigned long flags; diff --git a/kernel/signal.c b/kernel/signal.c index 5d30ff56184..2c1f08defac 100644 --- a/kernel/signal.c +++ b/kernel/signal.c @@ -1018,7 +1018,7 @@ int group_send_sig_info(int sig, struct siginfo *info, struct task_struct *p) } /* - * kill_pgrp_info() sends a signal to a process group: this is what the tty + * __kill_pgrp_info() sends a signal to a process group: this is what the tty * control characters do (^C, ^Z etc) */ @@ -1037,30 +1037,28 @@ int __kill_pgrp_info(int sig, struct siginfo *info, struct pid *pgrp) return success ? 0 : retval; } -int kill_pgrp_info(int sig, struct siginfo *info, struct pid *pgrp) -{ - int retval; - - read_lock(&tasklist_lock); - retval = __kill_pgrp_info(sig, info, pgrp); - read_unlock(&tasklist_lock); - - return retval; -} - int kill_pid_info(int sig, struct siginfo *info, struct pid *pid) { - int error; + int error = -ESRCH; struct task_struct *p; rcu_read_lock(); if (unlikely(sig_needs_tasklist(sig))) read_lock(&tasklist_lock); +retry: p = pid_task(pid, PIDTYPE_PID); - error = -ESRCH; - if (p) + if (p) { error = group_send_sig_info(sig, info, p); + if (unlikely(error == -ESRCH)) + /* + * The task was unhashed in between, try again. + * If it is dead, pid_task() will return NULL, + * if we race with de_thread() it will find the + * new leader. + */ + goto retry; + } if (unlikely(sig_needs_tasklist(sig))) read_unlock(&tasklist_lock); @@ -1125,14 +1123,22 @@ EXPORT_SYMBOL_GPL(kill_pid_info_as_uid); static int kill_something_info(int sig, struct siginfo *info, int pid) { int ret; - rcu_read_lock(); - if (!pid) { - ret = kill_pgrp_info(sig, info, task_pgrp(current)); - } else if (pid == -1) { + + if (pid > 0) { + rcu_read_lock(); + ret = kill_pid_info(sig, info, find_vpid(pid)); + rcu_read_unlock(); + return ret; + } + + read_lock(&tasklist_lock); + if (pid != -1) { + ret = __kill_pgrp_info(sig, info, + pid ? find_vpid(-pid) : task_pgrp(current)); + } else { int retval = 0, count = 0; struct task_struct * p; - read_lock(&tasklist_lock); for_each_process(p) { if (p->pid > 1 && !same_thread_group(p, current)) { int err = group_send_sig_info(sig, info, p); @@ -1141,14 +1147,10 @@ static int kill_something_info(int sig, struct siginfo *info, int pid) retval = err; } } - read_unlock(&tasklist_lock); ret = count ? retval : -ESRCH; - } else if (pid < 0) { - ret = kill_pgrp_info(sig, info, find_vpid(-pid)); - } else { - ret = kill_pid_info(sig, info, find_vpid(pid)); } - rcu_read_unlock(); + read_unlock(&tasklist_lock); + return ret; } @@ -1196,20 +1198,6 @@ send_sig(int sig, struct task_struct *p, int priv) return send_sig_info(sig, __si_special(priv), p); } -/* - * This is the entry point for "process-wide" signals. - * They will go to an appropriate thread in the thread group. - */ -int -send_group_sig_info(int sig, struct siginfo *info, struct task_struct *p) -{ - int ret; - read_lock(&tasklist_lock); - ret = group_send_sig_info(sig, info, p); - read_unlock(&tasklist_lock); - return ret; -} - void force_sig(int sig, struct task_struct *p) { @@ -1237,7 +1225,13 @@ force_sigsegv(int sig, struct task_struct *p) int kill_pgrp(struct pid *pid, int sig, int priv) { - return kill_pgrp_info(sig, __si_special(priv), pid); + int ret; + + read_lock(&tasklist_lock); + ret = __kill_pgrp_info(sig, __si_special(priv), pid); + read_unlock(&tasklist_lock); + + return ret; } EXPORT_SYMBOL(kill_pgrp); @@ -1556,11 +1550,6 @@ static inline int may_ptrace_stop(void) { if (!likely(current->ptrace & PT_PTRACED)) return 0; - - if (unlikely(current->parent == current->real_parent && - (current->ptrace & PT_ATTACHED))) - return 0; - /* * Are we in the middle of do_coredump? * If so and our tracer is also part of the coredump stopping @@ -1596,10 +1585,10 @@ static int sigkill_pending(struct task_struct *tsk) * That makes it a way to test a stopped process for * being ptrace-stopped vs being job-control-stopped. * - * If we actually decide not to stop at all because the tracer is gone, - * we leave nostop_code in current->exit_code. + * If we actually decide not to stop at all because the tracer + * is gone, we keep current->exit_code unless clear_code. */ -static void ptrace_stop(int exit_code, int nostop_code, siginfo_t *info) +static void ptrace_stop(int exit_code, int clear_code, siginfo_t *info) { int killed = 0; @@ -1643,11 +1632,12 @@ static void ptrace_stop(int exit_code, int nostop_code, siginfo_t *info) } else { /* * By the time we got the lock, our tracer went away. - * Don't stop here. + * Don't drop the lock yet, another tracer may come. */ + __set_current_state(TASK_RUNNING); + if (clear_code) + current->exit_code = 0; read_unlock(&tasklist_lock); - set_current_state(TASK_RUNNING); - current->exit_code = nostop_code; } /* @@ -1680,7 +1670,7 @@ void ptrace_notify(int exit_code) /* Let the debugger run. */ spin_lock_irq(¤t->sighand->siglock); - ptrace_stop(exit_code, 0, &info); + ptrace_stop(exit_code, 1, &info); spin_unlock_irq(¤t->sighand->siglock); } @@ -1743,7 +1733,7 @@ static int do_signal_stop(int signr) * stop is always done with the siglock held, * so this check has no races. */ - if (!t->exit_state && + if (!(t->flags & PF_EXITING) && !task_is_stopped_or_traced(t)) { stop_count++; signal_wake_up(t, 0); @@ -1787,7 +1777,7 @@ relock: ptrace_signal_deliver(regs, cookie); /* Let the debugger run. */ - ptrace_stop(signr, signr, info); + ptrace_stop(signr, 0, info); /* We're back. Did the debugger cancel the sig? */ signr = current->exit_code; @@ -1904,6 +1894,48 @@ relock: return signr; } +void exit_signals(struct task_struct *tsk) +{ + int group_stop = 0; + struct task_struct *t; + + if (thread_group_empty(tsk) || signal_group_exit(tsk->signal)) { + tsk->flags |= PF_EXITING; + return; + } + + spin_lock_irq(&tsk->sighand->siglock); + /* + * From now this task is not visible for group-wide signals, + * see wants_signal(), do_signal_stop(). + */ + tsk->flags |= PF_EXITING; + if (!signal_pending(tsk)) + goto out; + + /* It could be that __group_complete_signal() choose us to + * notify about group-wide signal. Another thread should be + * woken now to take the signal since we will not. + */ + for (t = tsk; (t = next_thread(t)) != tsk; ) + if (!signal_pending(t) && !(t->flags & PF_EXITING)) + recalc_sigpending_and_wake(t); + + if (unlikely(tsk->signal->group_stop_count) && + !--tsk->signal->group_stop_count) { + tsk->signal->flags = SIGNAL_STOP_STOPPED; + group_stop = 1; + } +out: + spin_unlock_irq(&tsk->sighand->siglock); + + if (unlikely(group_stop)) { + read_lock(&tasklist_lock); + do_notify_parent_cldstop(tsk, CLD_STOPPED); + read_unlock(&tasklist_lock); + } +} + EXPORT_SYMBOL(recalc_sigpending); EXPORT_SYMBOL_GPL(dequeue_signal); EXPORT_SYMBOL(flush_signals); diff --git a/kernel/softirq.c b/kernel/softirq.c index d7837d45419..5b3aea5f471 100644 --- a/kernel/softirq.c +++ b/kernel/softirq.c @@ -320,7 +320,7 @@ void irq_exit(void) /* * This function must run with irqs disabled! */ -inline fastcall void raise_softirq_irqoff(unsigned int nr) +inline void raise_softirq_irqoff(unsigned int nr) { __raise_softirq_irqoff(nr); @@ -337,7 +337,7 @@ inline fastcall void raise_softirq_irqoff(unsigned int nr) wakeup_softirqd(); } -void fastcall raise_softirq(unsigned int nr) +void raise_softirq(unsigned int nr) { unsigned long flags; @@ -363,7 +363,7 @@ struct tasklet_head static DEFINE_PER_CPU(struct tasklet_head, tasklet_vec) = { NULL }; static DEFINE_PER_CPU(struct tasklet_head, tasklet_hi_vec) = { NULL }; -void fastcall __tasklet_schedule(struct tasklet_struct *t) +void __tasklet_schedule(struct tasklet_struct *t) { unsigned long flags; @@ -376,7 +376,7 @@ void fastcall __tasklet_schedule(struct tasklet_struct *t) EXPORT_SYMBOL(__tasklet_schedule); -void fastcall __tasklet_hi_schedule(struct tasklet_struct *t) +void __tasklet_hi_schedule(struct tasklet_struct *t) { unsigned long flags; diff --git a/kernel/sys.c b/kernel/sys.c index e3c08d4324d..a626116af5d 100644 --- a/kernel/sys.c +++ b/kernel/sys.c @@ -916,8 +916,8 @@ asmlinkage long sys_setpgid(pid_t pid, pid_t pgid) { struct task_struct *p; struct task_struct *group_leader = current->group_leader; - int err = -EINVAL; - struct pid_namespace *ns; + struct pid *pgrp; + int err; if (!pid) pid = task_pid_vnr(group_leader); @@ -929,12 +929,10 @@ asmlinkage long sys_setpgid(pid_t pid, pid_t pgid) /* From this point forward we keep holding onto the tasklist lock * so that our parent does not change from under us. -DaveM */ - ns = current->nsproxy->pid_ns; - write_lock_irq(&tasklist_lock); err = -ESRCH; - p = find_task_by_pid_ns(pid, ns); + p = find_task_by_vpid(pid); if (!p) goto out; @@ -942,7 +940,7 @@ asmlinkage long sys_setpgid(pid_t pid, pid_t pgid) if (!thread_group_leader(p)) goto out; - if (p->real_parent->tgid == group_leader->tgid) { + if (same_thread_group(p->real_parent, group_leader)) { err = -EPERM; if (task_session(p) != task_session(group_leader)) goto out; @@ -959,10 +957,12 @@ asmlinkage long sys_setpgid(pid_t pid, pid_t pgid) if (p->signal->leader) goto out; + pgrp = task_pid(p); if (pgid != pid) { struct task_struct *g; - g = find_task_by_pid_type_ns(PIDTYPE_PGID, pgid, ns); + pgrp = find_vpid(pgid); + g = pid_task(pgrp, PIDTYPE_PGID); if (!g || task_session(g) != task_session(group_leader)) goto out; } @@ -971,13 +971,10 @@ asmlinkage long sys_setpgid(pid_t pid, pid_t pgid) if (err) goto out; - if (task_pgrp_nr_ns(p, ns) != pgid) { - struct pid *pid; - + if (task_pgrp(p) != pgrp) { detach_pid(p, PIDTYPE_PGID); - pid = find_vpid(pgid); - attach_pid(p, PIDTYPE_PGID, pid); - set_task_pgrp(p, pid_nr(pid)); + attach_pid(p, PIDTYPE_PGID, pgrp); + set_task_pgrp(p, pid_nr(pgrp)); } err = 0; @@ -994,17 +991,14 @@ asmlinkage long sys_getpgid(pid_t pid) else { int retval; struct task_struct *p; - struct pid_namespace *ns; - - ns = current->nsproxy->pid_ns; read_lock(&tasklist_lock); - p = find_task_by_pid_ns(pid, ns); + p = find_task_by_vpid(pid); retval = -ESRCH; if (p) { retval = security_task_getpgid(p); if (!retval) - retval = task_pgrp_nr_ns(p, ns); + retval = task_pgrp_vnr(p); } read_unlock(&tasklist_lock); return retval; @@ -1028,19 +1022,16 @@ asmlinkage long sys_getsid(pid_t pid) else { int retval; struct task_struct *p; - struct pid_namespace *ns; - - ns = current->nsproxy->pid_ns; - read_lock(&tasklist_lock); - p = find_task_by_pid_ns(pid, ns); + rcu_read_lock(); + p = find_task_by_vpid(pid); retval = -ESRCH; if (p) { retval = security_task_getsid(p); if (!retval) - retval = task_session_nr_ns(p, ns); + retval = task_session_vnr(p); } - read_unlock(&tasklist_lock); + rcu_read_unlock(); return retval; } } @@ -1048,35 +1039,29 @@ asmlinkage long sys_getsid(pid_t pid) asmlinkage long sys_setsid(void) { struct task_struct *group_leader = current->group_leader; - pid_t session; + struct pid *sid = task_pid(group_leader); + pid_t session = pid_vnr(sid); int err = -EPERM; write_lock_irq(&tasklist_lock); - /* Fail if I am already a session leader */ if (group_leader->signal->leader) goto out; - session = group_leader->pid; /* Fail if a process group id already exists that equals the * proposed session id. - * - * Don't check if session id == 1 because kernel threads use this - * session id and so the check will always fail and make it so - * init cannot successfully call setsid. */ - if (session > 1 && find_task_by_pid_type_ns(PIDTYPE_PGID, - session, &init_pid_ns)) + if (pid_task(sid, PIDTYPE_PGID)) goto out; group_leader->signal->leader = 1; - __set_special_pids(session, session); + __set_special_pids(sid); spin_lock(&group_leader->sighand->siglock); group_leader->signal->tty = NULL; spin_unlock(&group_leader->sighand->siglock); - err = task_pgrp_vnr(group_leader); + err = session; out: write_unlock_irq(&tasklist_lock); return err; diff --git a/kernel/sysctl.c b/kernel/sysctl.c index 8c98d8147d8..d41ef6b4cf7 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -37,7 +37,6 @@ #include <linux/highuid.h> #include <linux/writeback.h> #include <linux/hugetlb.h> -#include <linux/security.h> #include <linux/initrd.h> #include <linux/times.h> #include <linux/limits.h> @@ -74,8 +73,6 @@ extern int suid_dumpable; extern char core_pattern[]; extern int pid_max; extern int min_free_kbytes; -extern int printk_ratelimit_jiffies; -extern int printk_ratelimit_burst; extern int pid_max_min, pid_max_max; extern int sysctl_drop_caches; extern int percpu_pagelist_fraction; @@ -491,14 +488,6 @@ static struct ctl_table kern_table[] = { .mode = 0644, .proc_handler = &proc_dointvec, }, - { - .ctl_name = KERN_PRINTK, - .procname = "printk", - .data = &console_loglevel, - .maxlen = 4*sizeof(int), - .mode = 0644, - .proc_handler = &proc_dointvec, - }, #ifdef CONFIG_KMOD { .ctl_name = KERN_MODPROBE, @@ -645,6 +634,15 @@ static struct ctl_table kern_table[] = { .mode = 0644, .proc_handler = &proc_dointvec, }, +#if defined CONFIG_PRINTK + { + .ctl_name = KERN_PRINTK, + .procname = "printk", + .data = &console_loglevel, + .maxlen = 4*sizeof(int), + .mode = 0644, + .proc_handler = &proc_dointvec, + }, { .ctl_name = KERN_PRINTK_RATELIMIT, .procname = "printk_ratelimit", @@ -662,6 +660,7 @@ static struct ctl_table kern_table[] = { .mode = 0644, .proc_handler = &proc_dointvec, }, +#endif { .ctl_name = KERN_NGROUPS_MAX, .procname = "ngroups_max", @@ -982,7 +981,7 @@ static struct ctl_table vm_table[] = { .data = &nr_overcommit_huge_pages, .maxlen = sizeof(nr_overcommit_huge_pages), .mode = 0644, - .proc_handler = &proc_doulongvec_minmax, + .proc_handler = &hugetlb_overcommit_handler, }, #endif { @@ -2488,7 +2487,7 @@ static int proc_do_cad_pid(struct ctl_table *table, int write, struct file *filp pid_t tmp; int r; - tmp = pid_nr_ns(cad_pid, current->nsproxy->pid_ns); + tmp = pid_vnr(cad_pid); r = __do_proc_dointvec(&tmp, table, write, filp, buffer, lenp, ppos, NULL, NULL); diff --git a/kernel/sysctl_check.c b/kernel/sysctl_check.c index 006365b69ea..c09350d564f 100644 --- a/kernel/sysctl_check.c +++ b/kernel/sysctl_check.c @@ -8,10 +8,10 @@ struct trans_ctl_table { int ctl_name; const char *procname; - struct trans_ctl_table *child; + const struct trans_ctl_table *child; }; -static struct trans_ctl_table trans_random_table[] = { +static const struct trans_ctl_table trans_random_table[] = { { RANDOM_POOLSIZE, "poolsize" }, { RANDOM_ENTROPY_COUNT, "entropy_avail" }, { RANDOM_READ_THRESH, "read_wakeup_threshold" }, @@ -21,13 +21,13 @@ static struct trans_ctl_table trans_random_table[] = { {} }; -static struct trans_ctl_table trans_pty_table[] = { +static const struct trans_ctl_table trans_pty_table[] = { { PTY_MAX, "max" }, { PTY_NR, "nr" }, {} }; -static struct trans_ctl_table trans_kern_table[] = { +static const struct trans_ctl_table trans_kern_table[] = { { KERN_OSTYPE, "ostype" }, { KERN_OSRELEASE, "osrelease" }, /* KERN_OSREV not used */ @@ -107,7 +107,7 @@ static struct trans_ctl_table trans_kern_table[] = { {} }; -static struct trans_ctl_table trans_vm_table[] = { +static const struct trans_ctl_table trans_vm_table[] = { { VM_OVERCOMMIT_MEMORY, "overcommit_memory" }, { VM_PAGE_CLUSTER, "page-cluster" }, { VM_DIRTY_BACKGROUND, "dirty_background_ratio" }, @@ -139,7 +139,7 @@ static struct trans_ctl_table trans_vm_table[] = { {} }; -static struct trans_ctl_table trans_net_core_table[] = { +static const struct trans_ctl_table trans_net_core_table[] = { { NET_CORE_WMEM_MAX, "wmem_max" }, { NET_CORE_RMEM_MAX, "rmem_max" }, { NET_CORE_WMEM_DEFAULT, "wmem_default" }, @@ -165,14 +165,14 @@ static struct trans_ctl_table trans_net_core_table[] = { {}, }; -static struct trans_ctl_table trans_net_unix_table[] = { +static const struct trans_ctl_table trans_net_unix_table[] = { /* NET_UNIX_DESTROY_DELAY unused */ /* NET_UNIX_DELETE_DELAY unused */ { NET_UNIX_MAX_DGRAM_QLEN, "max_dgram_qlen" }, {} }; -static struct trans_ctl_table trans_net_ipv4_route_table[] = { +static const struct trans_ctl_table trans_net_ipv4_route_table[] = { { NET_IPV4_ROUTE_FLUSH, "flush" }, { NET_IPV4_ROUTE_MIN_DELAY, "min_delay" }, { NET_IPV4_ROUTE_MAX_DELAY, "max_delay" }, @@ -195,7 +195,7 @@ static struct trans_ctl_table trans_net_ipv4_route_table[] = { {} }; -static struct trans_ctl_table trans_net_ipv4_conf_vars_table[] = { +static const struct trans_ctl_table trans_net_ipv4_conf_vars_table[] = { { NET_IPV4_CONF_FORWARDING, "forwarding" }, { NET_IPV4_CONF_MC_FORWARDING, "mc_forwarding" }, @@ -222,14 +222,14 @@ static struct trans_ctl_table trans_net_ipv4_conf_vars_table[] = { {} }; -static struct trans_ctl_table trans_net_ipv4_conf_table[] = { +static const struct trans_ctl_table trans_net_ipv4_conf_table[] = { { NET_PROTO_CONF_ALL, "all", trans_net_ipv4_conf_vars_table }, { NET_PROTO_CONF_DEFAULT, "default", trans_net_ipv4_conf_vars_table }, { 0, NULL, trans_net_ipv4_conf_vars_table }, {} }; -static struct trans_ctl_table trans_net_neigh_vars_table[] = { +static const struct trans_ctl_table trans_net_neigh_vars_table[] = { { NET_NEIGH_MCAST_SOLICIT, "mcast_solicit" }, { NET_NEIGH_UCAST_SOLICIT, "ucast_solicit" }, { NET_NEIGH_APP_SOLICIT, "app_solicit" }, @@ -251,13 +251,13 @@ static struct trans_ctl_table trans_net_neigh_vars_table[] = { {} }; -static struct trans_ctl_table trans_net_neigh_table[] = { +static const struct trans_ctl_table trans_net_neigh_table[] = { { NET_PROTO_CONF_DEFAULT, "default", trans_net_neigh_vars_table }, { 0, NULL, trans_net_neigh_vars_table }, {} }; -static struct trans_ctl_table trans_net_ipv4_netfilter_table[] = { +static const struct trans_ctl_table trans_net_ipv4_netfilter_table[] = { { NET_IPV4_NF_CONNTRACK_MAX, "ip_conntrack_max" }, { NET_IPV4_NF_CONNTRACK_TCP_TIMEOUT_SYN_SENT, "ip_conntrack_tcp_timeout_syn_sent" }, @@ -294,7 +294,7 @@ static struct trans_ctl_table trans_net_ipv4_netfilter_table[] = { {} }; -static struct trans_ctl_table trans_net_ipv4_table[] = { +static const struct trans_ctl_table trans_net_ipv4_table[] = { { NET_IPV4_FORWARD, "ip_forward" }, { NET_IPV4_DYNADDR, "ip_dynaddr" }, @@ -393,13 +393,13 @@ static struct trans_ctl_table trans_net_ipv4_table[] = { {} }; -static struct trans_ctl_table trans_net_ipx_table[] = { +static const struct trans_ctl_table trans_net_ipx_table[] = { { NET_IPX_PPROP_BROADCASTING, "ipx_pprop_broadcasting" }, /* NET_IPX_FORWARDING unused */ {} }; -static struct trans_ctl_table trans_net_atalk_table[] = { +static const struct trans_ctl_table trans_net_atalk_table[] = { { NET_ATALK_AARP_EXPIRY_TIME, "aarp-expiry-time" }, { NET_ATALK_AARP_TICK_TIME, "aarp-tick-time" }, { NET_ATALK_AARP_RETRANSMIT_LIMIT, "aarp-retransmit-limit" }, @@ -407,7 +407,7 @@ static struct trans_ctl_table trans_net_atalk_table[] = { {}, }; -static struct trans_ctl_table trans_net_netrom_table[] = { +static const struct trans_ctl_table trans_net_netrom_table[] = { { NET_NETROM_DEFAULT_PATH_QUALITY, "default_path_quality" }, { NET_NETROM_OBSOLESCENCE_COUNT_INITIALISER, "obsolescence_count_initialiser" }, { NET_NETROM_NETWORK_TTL_INITIALISER, "network_ttl_initialiser" }, @@ -423,7 +423,7 @@ static struct trans_ctl_table trans_net_netrom_table[] = { {} }; -static struct trans_ctl_table trans_net_ax25_param_table[] = { +static const struct trans_ctl_table trans_net_ax25_param_table[] = { { NET_AX25_IP_DEFAULT_MODE, "ip_default_mode" }, { NET_AX25_DEFAULT_MODE, "ax25_default_mode" }, { NET_AX25_BACKOFF_TYPE, "backoff_type" }, @@ -441,12 +441,12 @@ static struct trans_ctl_table trans_net_ax25_param_table[] = { {} }; -static struct trans_ctl_table trans_net_ax25_table[] = { +static const struct trans_ctl_table trans_net_ax25_table[] = { { 0, NULL, trans_net_ax25_param_table }, {} }; -static struct trans_ctl_table trans_net_bridge_table[] = { +static const struct trans_ctl_table trans_net_bridge_table[] = { { NET_BRIDGE_NF_CALL_ARPTABLES, "bridge-nf-call-arptables" }, { NET_BRIDGE_NF_CALL_IPTABLES, "bridge-nf-call-iptables" }, { NET_BRIDGE_NF_CALL_IP6TABLES, "bridge-nf-call-ip6tables" }, @@ -455,7 +455,7 @@ static struct trans_ctl_table trans_net_bridge_table[] = { {} }; -static struct trans_ctl_table trans_net_rose_table[] = { +static const struct trans_ctl_table trans_net_rose_table[] = { { NET_ROSE_RESTART_REQUEST_TIMEOUT, "restart_request_timeout" }, { NET_ROSE_CALL_REQUEST_TIMEOUT, "call_request_timeout" }, { NET_ROSE_RESET_REQUEST_TIMEOUT, "reset_request_timeout" }, @@ -469,7 +469,7 @@ static struct trans_ctl_table trans_net_rose_table[] = { {} }; -static struct trans_ctl_table trans_net_ipv6_conf_var_table[] = { +static const struct trans_ctl_table trans_net_ipv6_conf_var_table[] = { { NET_IPV6_FORWARDING, "forwarding" }, { NET_IPV6_HOP_LIMIT, "hop_limit" }, { NET_IPV6_MTU, "mtu" }, @@ -497,14 +497,14 @@ static struct trans_ctl_table trans_net_ipv6_conf_var_table[] = { {} }; -static struct trans_ctl_table trans_net_ipv6_conf_table[] = { +static const struct trans_ctl_table trans_net_ipv6_conf_table[] = { { NET_PROTO_CONF_ALL, "all", trans_net_ipv6_conf_var_table }, { NET_PROTO_CONF_DEFAULT, "default", trans_net_ipv6_conf_var_table }, { 0, NULL, trans_net_ipv6_conf_var_table }, {} }; -static struct trans_ctl_table trans_net_ipv6_route_table[] = { +static const struct trans_ctl_table trans_net_ipv6_route_table[] = { { NET_IPV6_ROUTE_FLUSH, "flush" }, { NET_IPV6_ROUTE_GC_THRESH, "gc_thresh" }, { NET_IPV6_ROUTE_MAX_SIZE, "max_size" }, @@ -518,12 +518,12 @@ static struct trans_ctl_table trans_net_ipv6_route_table[] = { {} }; -static struct trans_ctl_table trans_net_ipv6_icmp_table[] = { +static const struct trans_ctl_table trans_net_ipv6_icmp_table[] = { { NET_IPV6_ICMP_RATELIMIT, "ratelimit" }, {} }; -static struct trans_ctl_table trans_net_ipv6_table[] = { +static const struct trans_ctl_table trans_net_ipv6_table[] = { { NET_IPV6_CONF, "conf", trans_net_ipv6_conf_table }, { NET_IPV6_NEIGH, "neigh", trans_net_neigh_table }, { NET_IPV6_ROUTE, "route", trans_net_ipv6_route_table }, @@ -538,7 +538,7 @@ static struct trans_ctl_table trans_net_ipv6_table[] = { {} }; -static struct trans_ctl_table trans_net_x25_table[] = { +static const struct trans_ctl_table trans_net_x25_table[] = { { NET_X25_RESTART_REQUEST_TIMEOUT, "restart_request_timeout" }, { NET_X25_CALL_REQUEST_TIMEOUT, "call_request_timeout" }, { NET_X25_RESET_REQUEST_TIMEOUT, "reset_request_timeout" }, @@ -548,13 +548,13 @@ static struct trans_ctl_table trans_net_x25_table[] = { {} }; -static struct trans_ctl_table trans_net_tr_table[] = { +static const struct trans_ctl_table trans_net_tr_table[] = { { NET_TR_RIF_TIMEOUT, "rif_timeout" }, {} }; -static struct trans_ctl_table trans_net_decnet_conf_vars[] = { +static const struct trans_ctl_table trans_net_decnet_conf_vars[] = { { NET_DECNET_CONF_DEV_FORWARDING, "forwarding" }, { NET_DECNET_CONF_DEV_PRIORITY, "priority" }, { NET_DECNET_CONF_DEV_T2, "t2" }, @@ -562,12 +562,12 @@ static struct trans_ctl_table trans_net_decnet_conf_vars[] = { {} }; -static struct trans_ctl_table trans_net_decnet_conf[] = { +static const struct trans_ctl_table trans_net_decnet_conf[] = { { 0, NULL, trans_net_decnet_conf_vars }, {} }; -static struct trans_ctl_table trans_net_decnet_table[] = { +static const struct trans_ctl_table trans_net_decnet_table[] = { { NET_DECNET_CONF, "conf", trans_net_decnet_conf }, { NET_DECNET_NODE_ADDRESS, "node_address" }, { NET_DECNET_NODE_NAME, "node_name" }, @@ -585,7 +585,7 @@ static struct trans_ctl_table trans_net_decnet_table[] = { {} }; -static struct trans_ctl_table trans_net_sctp_table[] = { +static const struct trans_ctl_table trans_net_sctp_table[] = { { NET_SCTP_RTO_INITIAL, "rto_initial" }, { NET_SCTP_RTO_MIN, "rto_min" }, { NET_SCTP_RTO_MAX, "rto_max" }, @@ -606,7 +606,7 @@ static struct trans_ctl_table trans_net_sctp_table[] = { {} }; -static struct trans_ctl_table trans_net_llc_llc2_timeout_table[] = { +static const struct trans_ctl_table trans_net_llc_llc2_timeout_table[] = { { NET_LLC2_ACK_TIMEOUT, "ack" }, { NET_LLC2_P_TIMEOUT, "p" }, { NET_LLC2_REJ_TIMEOUT, "rej" }, @@ -614,23 +614,23 @@ static struct trans_ctl_table trans_net_llc_llc2_timeout_table[] = { {} }; -static struct trans_ctl_table trans_net_llc_station_table[] = { +static const struct trans_ctl_table trans_net_llc_station_table[] = { { NET_LLC_STATION_ACK_TIMEOUT, "ack_timeout" }, {} }; -static struct trans_ctl_table trans_net_llc_llc2_table[] = { +static const struct trans_ctl_table trans_net_llc_llc2_table[] = { { NET_LLC2, "timeout", trans_net_llc_llc2_timeout_table }, {} }; -static struct trans_ctl_table trans_net_llc_table[] = { +static const struct trans_ctl_table trans_net_llc_table[] = { { NET_LLC2, "llc2", trans_net_llc_llc2_table }, { NET_LLC_STATION, "station", trans_net_llc_station_table }, {} }; -static struct trans_ctl_table trans_net_netfilter_table[] = { +static const struct trans_ctl_table trans_net_netfilter_table[] = { { NET_NF_CONNTRACK_MAX, "nf_conntrack_max" }, { NET_NF_CONNTRACK_TCP_TIMEOUT_SYN_SENT, "nf_conntrack_tcp_timeout_syn_sent" }, { NET_NF_CONNTRACK_TCP_TIMEOUT_SYN_RECV, "nf_conntrack_tcp_timeout_syn_recv" }, @@ -667,12 +667,12 @@ static struct trans_ctl_table trans_net_netfilter_table[] = { {} }; -static struct trans_ctl_table trans_net_dccp_table[] = { +static const struct trans_ctl_table trans_net_dccp_table[] = { { NET_DCCP_DEFAULT, "default" }, {} }; -static struct trans_ctl_table trans_net_irda_table[] = { +static const struct trans_ctl_table trans_net_irda_table[] = { { NET_IRDA_DISCOVERY, "discovery" }, { NET_IRDA_DEVNAME, "devname" }, { NET_IRDA_DEBUG, "debug" }, @@ -690,7 +690,7 @@ static struct trans_ctl_table trans_net_irda_table[] = { {} }; -static struct trans_ctl_table trans_net_table[] = { +static const struct trans_ctl_table trans_net_table[] = { { NET_CORE, "core", trans_net_core_table }, /* NET_ETHER not used */ /* NET_802 not used */ @@ -716,7 +716,7 @@ static struct trans_ctl_table trans_net_table[] = { {} }; -static struct trans_ctl_table trans_fs_quota_table[] = { +static const struct trans_ctl_table trans_fs_quota_table[] = { { FS_DQ_LOOKUPS, "lookups" }, { FS_DQ_DROPS, "drops" }, { FS_DQ_READS, "reads" }, @@ -729,7 +729,7 @@ static struct trans_ctl_table trans_fs_quota_table[] = { {} }; -static struct trans_ctl_table trans_fs_xfs_table[] = { +static const struct trans_ctl_table trans_fs_xfs_table[] = { { XFS_RESTRICT_CHOWN, "restrict_chown" }, { XFS_SGID_INHERIT, "irix_sgid_inherit" }, { XFS_SYMLINK_MODE, "irix_symlink_mode" }, @@ -750,24 +750,24 @@ static struct trans_ctl_table trans_fs_xfs_table[] = { {} }; -static struct trans_ctl_table trans_fs_ocfs2_nm_table[] = { +static const struct trans_ctl_table trans_fs_ocfs2_nm_table[] = { { 1, "hb_ctl_path" }, {} }; -static struct trans_ctl_table trans_fs_ocfs2_table[] = { +static const struct trans_ctl_table trans_fs_ocfs2_table[] = { { 1, "nm", trans_fs_ocfs2_nm_table }, {} }; -static struct trans_ctl_table trans_inotify_table[] = { +static const struct trans_ctl_table trans_inotify_table[] = { { INOTIFY_MAX_USER_INSTANCES, "max_user_instances" }, { INOTIFY_MAX_USER_WATCHES, "max_user_watches" }, { INOTIFY_MAX_QUEUED_EVENTS, "max_queued_events" }, {} }; -static struct trans_ctl_table trans_fs_table[] = { +static const struct trans_ctl_table trans_fs_table[] = { { FS_NRINODE, "inode-nr" }, { FS_STATINODE, "inode-state" }, /* FS_MAXINODE unused */ @@ -793,11 +793,11 @@ static struct trans_ctl_table trans_fs_table[] = { {} }; -static struct trans_ctl_table trans_debug_table[] = { +static const struct trans_ctl_table trans_debug_table[] = { {} }; -static struct trans_ctl_table trans_cdrom_table[] = { +static const struct trans_ctl_table trans_cdrom_table[] = { { DEV_CDROM_INFO, "info" }, { DEV_CDROM_AUTOCLOSE, "autoclose" }, { DEV_CDROM_AUTOEJECT, "autoeject" }, @@ -807,12 +807,12 @@ static struct trans_ctl_table trans_cdrom_table[] = { {} }; -static struct trans_ctl_table trans_ipmi_table[] = { +static const struct trans_ctl_table trans_ipmi_table[] = { { DEV_IPMI_POWEROFF_POWERCYCLE, "poweroff_powercycle" }, {} }; -static struct trans_ctl_table trans_mac_hid_files[] = { +static const struct trans_ctl_table trans_mac_hid_files[] = { /* DEV_MAC_HID_KEYBOARD_SENDS_LINUX_KEYCODES unused */ /* DEV_MAC_HID_KEYBOARD_LOCK_KEYCODES unused */ { DEV_MAC_HID_MOUSE_BUTTON_EMULATION, "mouse_button_emulation" }, @@ -822,35 +822,35 @@ static struct trans_ctl_table trans_mac_hid_files[] = { {} }; -static struct trans_ctl_table trans_raid_table[] = { +static const struct trans_ctl_table trans_raid_table[] = { { DEV_RAID_SPEED_LIMIT_MIN, "speed_limit_min" }, { DEV_RAID_SPEED_LIMIT_MAX, "speed_limit_max" }, {} }; -static struct trans_ctl_table trans_scsi_table[] = { +static const struct trans_ctl_table trans_scsi_table[] = { { DEV_SCSI_LOGGING_LEVEL, "logging_level" }, {} }; -static struct trans_ctl_table trans_parport_default_table[] = { +static const struct trans_ctl_table trans_parport_default_table[] = { { DEV_PARPORT_DEFAULT_TIMESLICE, "timeslice" }, { DEV_PARPORT_DEFAULT_SPINTIME, "spintime" }, {} }; -static struct trans_ctl_table trans_parport_device_table[] = { +static const struct trans_ctl_table trans_parport_device_table[] = { { DEV_PARPORT_DEVICE_TIMESLICE, "timeslice" }, {} }; -static struct trans_ctl_table trans_parport_devices_table[] = { +static const struct trans_ctl_table trans_parport_devices_table[] = { { DEV_PARPORT_DEVICES_ACTIVE, "active" }, { 0, NULL, trans_parport_device_table }, {} }; -static struct trans_ctl_table trans_parport_parport_table[] = { +static const struct trans_ctl_table trans_parport_parport_table[] = { { DEV_PARPORT_SPINTIME, "spintime" }, { DEV_PARPORT_BASE_ADDR, "base-addr" }, { DEV_PARPORT_IRQ, "irq" }, @@ -864,13 +864,13 @@ static struct trans_ctl_table trans_parport_parport_table[] = { { DEV_PARPORT_AUTOPROBE + 4, "autoprobe3" }, {} }; -static struct trans_ctl_table trans_parport_table[] = { +static const struct trans_ctl_table trans_parport_table[] = { { DEV_PARPORT_DEFAULT, "default", trans_parport_default_table }, { 0, NULL, trans_parport_parport_table }, {} }; -static struct trans_ctl_table trans_dev_table[] = { +static const struct trans_ctl_table trans_dev_table[] = { { DEV_CDROM, "cdrom", trans_cdrom_table }, /* DEV_HWMON unused */ { DEV_PARPORT, "parport", trans_parport_table }, @@ -881,19 +881,19 @@ static struct trans_ctl_table trans_dev_table[] = { {} }; -static struct trans_ctl_table trans_bus_isa_table[] = { +static const struct trans_ctl_table trans_bus_isa_table[] = { { BUS_ISA_MEM_BASE, "membase" }, { BUS_ISA_PORT_BASE, "portbase" }, { BUS_ISA_PORT_SHIFT, "portshift" }, {} }; -static struct trans_ctl_table trans_bus_table[] = { +static const struct trans_ctl_table trans_bus_table[] = { { CTL_BUS_ISA, "isa", trans_bus_isa_table }, {} }; -static struct trans_ctl_table trans_arlan_conf_table0[] = { +static const struct trans_ctl_table trans_arlan_conf_table0[] = { { 1, "spreadingCode" }, { 2, "channelNumber" }, { 3, "scramblingDisable" }, @@ -964,7 +964,7 @@ static struct trans_ctl_table trans_arlan_conf_table0[] = { {} }; -static struct trans_ctl_table trans_arlan_conf_table1[] = { +static const struct trans_ctl_table trans_arlan_conf_table1[] = { { 1, "spreadingCode" }, { 2, "channelNumber" }, { 3, "scramblingDisable" }, @@ -1035,7 +1035,7 @@ static struct trans_ctl_table trans_arlan_conf_table1[] = { {} }; -static struct trans_ctl_table trans_arlan_conf_table2[] = { +static const struct trans_ctl_table trans_arlan_conf_table2[] = { { 1, "spreadingCode" }, { 2, "channelNumber" }, { 3, "scramblingDisable" }, @@ -1106,7 +1106,7 @@ static struct trans_ctl_table trans_arlan_conf_table2[] = { {} }; -static struct trans_ctl_table trans_arlan_conf_table3[] = { +static const struct trans_ctl_table trans_arlan_conf_table3[] = { { 1, "spreadingCode" }, { 2, "channelNumber" }, { 3, "scramblingDisable" }, @@ -1177,7 +1177,7 @@ static struct trans_ctl_table trans_arlan_conf_table3[] = { {} }; -static struct trans_ctl_table trans_arlan_table[] = { +static const struct trans_ctl_table trans_arlan_table[] = { { 1, "arlan0", trans_arlan_conf_table0 }, { 2, "arlan1", trans_arlan_conf_table1 }, { 3, "arlan2", trans_arlan_conf_table2 }, @@ -1185,13 +1185,13 @@ static struct trans_ctl_table trans_arlan_table[] = { {} }; -static struct trans_ctl_table trans_s390dbf_table[] = { +static const struct trans_ctl_table trans_s390dbf_table[] = { { 5678 /* CTL_S390DBF_STOPPABLE */, "debug_stoppable" }, { 5679 /* CTL_S390DBF_ACTIVE */, "debug_active" }, {} }; -static struct trans_ctl_table trans_sunrpc_table[] = { +static const struct trans_ctl_table trans_sunrpc_table[] = { { CTL_RPCDEBUG, "rpc_debug" }, { CTL_NFSDEBUG, "nfs_debug" }, { CTL_NFSDDEBUG, "nfsd_debug" }, @@ -1203,7 +1203,7 @@ static struct trans_ctl_table trans_sunrpc_table[] = { {} }; -static struct trans_ctl_table trans_pm_table[] = { +static const struct trans_ctl_table trans_pm_table[] = { { 1 /* CTL_PM_SUSPEND */, "suspend" }, { 2 /* CTL_PM_CMODE */, "cmode" }, { 3 /* CTL_PM_P0 */, "p0" }, @@ -1211,13 +1211,13 @@ static struct trans_ctl_table trans_pm_table[] = { {} }; -static struct trans_ctl_table trans_frv_table[] = { +static const struct trans_ctl_table trans_frv_table[] = { { 1, "cache-mode" }, { 2, "pin-cxnr" }, {} }; -static struct trans_ctl_table trans_root_table[] = { +static const struct trans_ctl_table trans_root_table[] = { { CTL_KERN, "kernel", trans_kern_table }, { CTL_VM, "vm", trans_vm_table }, { CTL_NET, "net", trans_net_table }, @@ -1261,15 +1261,14 @@ static struct ctl_table *sysctl_parent(struct ctl_table *table, int n) return table; } -static struct trans_ctl_table *sysctl_binary_lookup(struct ctl_table *table) +static const struct trans_ctl_table *sysctl_binary_lookup(struct ctl_table *table) { struct ctl_table *test; - struct trans_ctl_table *ref; - int depth, cur_depth; + const struct trans_ctl_table *ref; + int cur_depth; - depth = sysctl_depth(table); + cur_depth = sysctl_depth(table); - cur_depth = depth; ref = trans_root_table; repeat: test = sysctl_parent(table, cur_depth); @@ -1437,7 +1436,7 @@ static void sysctl_check_leaf(struct nsproxy *namespaces, static void sysctl_check_bin_path(struct ctl_table *table, const char **fail) { - struct trans_ctl_table *ref; + const struct trans_ctl_table *ref; ref = sysctl_binary_lookup(table); if (table->ctl_name && !ref) diff --git a/kernel/time.c b/kernel/time.c index 33af3e55570..a5ec013b6c8 100644 --- a/kernel/time.c +++ b/kernel/time.c @@ -39,6 +39,8 @@ #include <asm/uaccess.h> #include <asm/unistd.h> +#include "timeconst.h" + /* * The timezone where the local system is located. Used as a default by some * programs who obtain this value by using gettimeofday. @@ -93,7 +95,8 @@ asmlinkage long sys_stime(time_t __user *tptr) #endif /* __ARCH_WANT_SYS_TIME */ -asmlinkage long sys_gettimeofday(struct timeval __user *tv, struct timezone __user *tz) +asmlinkage long sys_gettimeofday(struct timeval __user *tv, + struct timezone __user *tz) { if (likely(tv != NULL)) { struct timeval ktv; @@ -118,7 +121,7 @@ asmlinkage long sys_gettimeofday(struct timeval __user *tv, struct timezone __us * hard to make the program warp the clock precisely n hours) or * compile in the timezone information into the kernel. Bad, bad.... * - * - TYT, 1992-01-01 + * - TYT, 1992-01-01 * * The best thing to do is to keep the CMOS clock in universal time (UTC) * as real UNIX machines always do it. This avoids all headaches about @@ -240,7 +243,11 @@ unsigned int inline jiffies_to_msecs(const unsigned long j) #elif HZ > MSEC_PER_SEC && !(HZ % MSEC_PER_SEC) return (j + (HZ / MSEC_PER_SEC) - 1)/(HZ / MSEC_PER_SEC); #else - return (j * MSEC_PER_SEC) / HZ; +# if BITS_PER_LONG == 32 + return ((u64)HZ_TO_MSEC_MUL32 * j) >> HZ_TO_MSEC_SHR32; +# else + return (j * HZ_TO_MSEC_NUM) / HZ_TO_MSEC_DEN; +# endif #endif } EXPORT_SYMBOL(jiffies_to_msecs); @@ -252,7 +259,11 @@ unsigned int inline jiffies_to_usecs(const unsigned long j) #elif HZ > USEC_PER_SEC && !(HZ % USEC_PER_SEC) return (j + (HZ / USEC_PER_SEC) - 1)/(HZ / USEC_PER_SEC); #else - return (j * USEC_PER_SEC) / HZ; +# if BITS_PER_LONG == 32 + return ((u64)HZ_TO_USEC_MUL32 * j) >> HZ_TO_USEC_SHR32; +# else + return (j * HZ_TO_USEC_NUM) / HZ_TO_USEC_DEN; +# endif #endif } EXPORT_SYMBOL(jiffies_to_usecs); @@ -267,7 +278,7 @@ EXPORT_SYMBOL(jiffies_to_usecs); * * This function should be only used for timestamps returned by * current_kernel_time() or CURRENT_TIME, not with do_gettimeofday() because - * it doesn't handle the better resolution of the later. + * it doesn't handle the better resolution of the latter. */ struct timespec timespec_trunc(struct timespec t, unsigned gran) { @@ -315,7 +326,7 @@ EXPORT_SYMBOL_GPL(getnstimeofday); * This algorithm was first published by Gauss (I think). * * WARNING: this function will overflow on 2106-02-07 06:28:16 on - * machines were long is 32-bit! (However, as time_t is signed, we + * machines where long is 32-bit! (However, as time_t is signed, we * will already get problems at other places on 2038-01-19 03:14:08) */ unsigned long @@ -352,7 +363,7 @@ EXPORT_SYMBOL(mktime); * normalize to the timespec storage format * * Note: The tv_nsec part is always in the range of - * 0 <= tv_nsec < NSEC_PER_SEC + * 0 <= tv_nsec < NSEC_PER_SEC * For negative values only the tv_sec field is negative ! */ void set_normalized_timespec(struct timespec *ts, time_t sec, long nsec) @@ -453,12 +464,13 @@ unsigned long msecs_to_jiffies(const unsigned int m) /* * Generic case - multiply, round and divide. But first * check that if we are doing a net multiplication, that - * we wouldnt overflow: + * we wouldn't overflow: */ if (HZ > MSEC_PER_SEC && m > jiffies_to_msecs(MAX_JIFFY_OFFSET)) return MAX_JIFFY_OFFSET; - return (m * HZ + MSEC_PER_SEC - 1) / MSEC_PER_SEC; + return ((u64)MSEC_TO_HZ_MUL32 * m + MSEC_TO_HZ_ADJ32) + >> MSEC_TO_HZ_SHR32; #endif } EXPORT_SYMBOL(msecs_to_jiffies); @@ -472,7 +484,8 @@ unsigned long usecs_to_jiffies(const unsigned int u) #elif HZ > USEC_PER_SEC && !(HZ % USEC_PER_SEC) return u * (HZ / USEC_PER_SEC); #else - return (u * HZ + USEC_PER_SEC - 1) / USEC_PER_SEC; + return ((u64)USEC_TO_HZ_MUL32 * u + USEC_TO_HZ_ADJ32) + >> USEC_TO_HZ_SHR32; #endif } EXPORT_SYMBOL(usecs_to_jiffies); diff --git a/kernel/time/clockevents.c b/kernel/time/clockevents.c index 3e59fce6dd4..3d1e3e1a197 100644 --- a/kernel/time/clockevents.c +++ b/kernel/time/clockevents.c @@ -133,7 +133,7 @@ static void clockevents_do_notify(unsigned long reason, void *dev) } /* - * Called after a notify add to make devices availble which were + * Called after a notify add to make devices available which were * released from the notifier call. */ static void clockevents_notify_released(void) @@ -218,6 +218,8 @@ void clockevents_exchange_device(struct clock_event_device *old, */ void clockevents_notify(unsigned long reason, void *arg) { + struct list_head *node, *tmp; + spin_lock(&clockevents_lock); clockevents_do_notify(reason, arg); @@ -227,13 +229,8 @@ void clockevents_notify(unsigned long reason, void *arg) * Unregister the clock event devices which were * released from the users in the notify chain. */ - while (!list_empty(&clockevents_released)) { - struct clock_event_device *dev; - - dev = list_entry(clockevents_released.next, - struct clock_event_device, list); - list_del(&dev->list); - } + list_for_each_safe(node, tmp, &clockevents_released) + list_del(node); break; default: break; diff --git a/kernel/time/clocksource.c b/kernel/time/clocksource.c index 81afb3927ec..548c436a776 100644 --- a/kernel/time/clocksource.c +++ b/kernel/time/clocksource.c @@ -91,7 +91,6 @@ static void clocksource_ratewd(struct clocksource *cs, int64_t delta) cs->name, delta); cs->flags &= ~(CLOCK_SOURCE_VALID_FOR_HRES | CLOCK_SOURCE_WATCHDOG); clocksource_change_rating(cs, 0); - cs->flags &= ~CLOCK_SOURCE_WATCHDOG; list_del(&cs->wd_list); } diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c index 88267f0a847..fa9bb73dbdb 100644 --- a/kernel/time/tick-sched.c +++ b/kernel/time/tick-sched.c @@ -681,7 +681,7 @@ int tick_check_oneshot_change(int allow_nohz) if (ts->nohz_mode != NOHZ_MODE_INACTIVE) return 0; - if (!timekeeping_is_continuous() || !tick_is_oneshot_available()) + if (!timekeeping_valid_for_hres() || !tick_is_oneshot_available()) return 0; if (!allow_nohz) diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c index cd5dbc4579c..1af9fb050fe 100644 --- a/kernel/time/timekeeping.c +++ b/kernel/time/timekeeping.c @@ -201,9 +201,9 @@ static inline s64 __get_nsec_offset(void) { return 0; } #endif /** - * timekeeping_is_continuous - check to see if timekeeping is free running + * timekeeping_valid_for_hres - Check if timekeeping is suitable for hres */ -int timekeeping_is_continuous(void) +int timekeeping_valid_for_hres(void) { unsigned long seq; int ret; @@ -364,7 +364,7 @@ static __always_inline int clocksource_bigadjust(s64 error, s64 *interval, * with losing too many ticks, otherwise we would overadjust and * produce an even larger error. The smaller the adjustment the * faster we try to adjust for it, as lost ticks can do less harm - * here. This is tuned so that an error of about 1 msec is adusted + * here. This is tuned so that an error of about 1 msec is adjusted * within about 1 sec (or 2^20 nsec in 2^SHIFT_HZ ticks). */ error2 = clock->error >> (TICK_LENGTH_SHIFT + 22 - 2 * SHIFT_HZ); diff --git a/kernel/timeconst.pl b/kernel/timeconst.pl new file mode 100644 index 00000000000..62b1287932e --- /dev/null +++ b/kernel/timeconst.pl @@ -0,0 +1,402 @@ +#!/usr/bin/perl +# ----------------------------------------------------------------------- +# +# Copyright 2007 rPath, Inc. - All Rights Reserved +# +# This file is part of the Linux kernel, and is made available under +# the terms of the GNU General Public License version 2 or (at your +# option) any later version; incorporated herein by reference. +# +# ----------------------------------------------------------------------- +# + +# +# Usage: timeconst.pl HZ > timeconst.h +# + +# Precomputed values for systems without Math::BigInt +# Generated by: +# timeconst.pl --can 24 32 48 64 100 122 128 200 250 256 300 512 1000 1024 1200 +%canned_values = ( + 24 => [ + '0xa6aaaaab','0x2aaaaaa',26, + '0xa6aaaaaaaaaaaaab','0x2aaaaaaaaaaaaaa',58, + 125,3, + '0xc49ba5e4','0x1fbe76c8b4',37, + '0xc49ba5e353f7ceda','0x1fbe76c8b439581062',69, + 3,125, + '0xa2c2aaab','0xaaaa',16, + '0xa2c2aaaaaaaaaaab','0xaaaaaaaaaaaa',48, + 125000,3, + '0xc9539b89','0x7fffbce4217d',47, + '0xc9539b8887229e91','0x7fffbce4217d2849cb25',79, + 3,125000, + ], 32 => [ + '0xfa000000','0x6000000',27, + '0xfa00000000000000','0x600000000000000',59, + 125,4, + '0x83126e98','0xfdf3b645a',36, + '0x83126e978d4fdf3c','0xfdf3b645a1cac0831',68, + 4,125, + '0xf4240000','0x0',17, + '0xf424000000000000','0x0',49, + 31250,1, + '0x8637bd06','0x3fff79c842fa',46, + '0x8637bd05af6c69b6','0x3fff79c842fa5093964a',78, + 1,31250, + ], 48 => [ + '0xa6aaaaab','0x6aaaaaa',27, + '0xa6aaaaaaaaaaaaab','0x6aaaaaaaaaaaaaa',59, + 125,6, + '0xc49ba5e4','0xfdf3b645a',36, + '0xc49ba5e353f7ceda','0xfdf3b645a1cac0831',68, + 6,125, + '0xa2c2aaab','0x15555',17, + '0xa2c2aaaaaaaaaaab','0x1555555555555',49, + 62500,3, + '0xc9539b89','0x3fffbce4217d',46, + '0xc9539b8887229e91','0x3fffbce4217d2849cb25',78, + 3,62500, + ], 64 => [ + '0xfa000000','0xe000000',28, + '0xfa00000000000000','0xe00000000000000',60, + 125,8, + '0x83126e98','0x7ef9db22d',35, + '0x83126e978d4fdf3c','0x7ef9db22d0e560418',67, + 8,125, + '0xf4240000','0x0',18, + '0xf424000000000000','0x0',50, + 15625,1, + '0x8637bd06','0x1fff79c842fa',45, + '0x8637bd05af6c69b6','0x1fff79c842fa5093964a',77, + 1,15625, + ], 100 => [ + '0xa0000000','0x0',28, + '0xa000000000000000','0x0',60, + 10,1, + '0xcccccccd','0x733333333',35, + '0xcccccccccccccccd','0x73333333333333333',67, + 1,10, + '0x9c400000','0x0',18, + '0x9c40000000000000','0x0',50, + 10000,1, + '0xd1b71759','0x1fff2e48e8a7',45, + '0xd1b71758e219652c','0x1fff2e48e8a71de69ad4',77, + 1,10000, + ], 122 => [ + '0x8325c53f','0xfbcda3a',28, + '0x8325c53ef368eb05','0xfbcda3ac10c9714',60, + 500,61, + '0xf9db22d1','0x7fbe76c8b',35, + '0xf9db22d0e560418a','0x7fbe76c8b43958106',67, + 61,500, + '0x8012e2a0','0x3ef36',18, + '0x8012e29f79b47583','0x3ef368eb04325',50, + 500000,61, + '0xffda4053','0x1ffffbce4217',45, + '0xffda4052d666a983','0x1ffffbce4217d2849cb2',77, + 61,500000, + ], 128 => [ + '0xfa000000','0x1e000000',29, + '0xfa00000000000000','0x1e00000000000000',61, + 125,16, + '0x83126e98','0x3f7ced916',34, + '0x83126e978d4fdf3c','0x3f7ced916872b020c',66, + 16,125, + '0xf4240000','0x40000',19, + '0xf424000000000000','0x4000000000000',51, + 15625,2, + '0x8637bd06','0xfffbce4217d',44, + '0x8637bd05af6c69b6','0xfffbce4217d2849cb25',76, + 2,15625, + ], 200 => [ + '0xa0000000','0x0',29, + '0xa000000000000000','0x0',61, + 5,1, + '0xcccccccd','0x333333333',34, + '0xcccccccccccccccd','0x33333333333333333',66, + 1,5, + '0x9c400000','0x0',19, + '0x9c40000000000000','0x0',51, + 5000,1, + '0xd1b71759','0xfff2e48e8a7',44, + '0xd1b71758e219652c','0xfff2e48e8a71de69ad4',76, + 1,5000, + ], 250 => [ + '0x80000000','0x0',29, + '0x8000000000000000','0x0',61, + 4,1, + '0x80000000','0x180000000',33, + '0x8000000000000000','0x18000000000000000',65, + 1,4, + '0xfa000000','0x0',20, + '0xfa00000000000000','0x0',52, + 4000,1, + '0x83126e98','0x7ff7ced9168',43, + '0x83126e978d4fdf3c','0x7ff7ced916872b020c4',75, + 1,4000, + ], 256 => [ + '0xfa000000','0x3e000000',30, + '0xfa00000000000000','0x3e00000000000000',62, + 125,32, + '0x83126e98','0x1fbe76c8b',33, + '0x83126e978d4fdf3c','0x1fbe76c8b43958106',65, + 32,125, + '0xf4240000','0xc0000',20, + '0xf424000000000000','0xc000000000000',52, + 15625,4, + '0x8637bd06','0x7ffde7210be',43, + '0x8637bd05af6c69b6','0x7ffde7210be9424e592',75, + 4,15625, + ], 300 => [ + '0xd5555556','0x2aaaaaaa',30, + '0xd555555555555556','0x2aaaaaaaaaaaaaaa',62, + 10,3, + '0x9999999a','0x1cccccccc',33, + '0x999999999999999a','0x1cccccccccccccccc',65, + 3,10, + '0xd0555556','0xaaaaa',20, + '0xd055555555555556','0xaaaaaaaaaaaaa',52, + 10000,3, + '0x9d495183','0x7ffcb923a29',43, + '0x9d495182a9930be1','0x7ffcb923a29c779a6b5',75, + 3,10000, + ], 512 => [ + '0xfa000000','0x7e000000',31, + '0xfa00000000000000','0x7e00000000000000',63, + 125,64, + '0x83126e98','0xfdf3b645',32, + '0x83126e978d4fdf3c','0xfdf3b645a1cac083',64, + 64,125, + '0xf4240000','0x1c0000',21, + '0xf424000000000000','0x1c000000000000',53, + 15625,8, + '0x8637bd06','0x3ffef39085f',42, + '0x8637bd05af6c69b6','0x3ffef39085f4a1272c9',74, + 8,15625, + ], 1000 => [ + '0x80000000','0x0',31, + '0x8000000000000000','0x0',63, + 1,1, + '0x80000000','0x0',31, + '0x8000000000000000','0x0',63, + 1,1, + '0xfa000000','0x0',22, + '0xfa00000000000000','0x0',54, + 1000,1, + '0x83126e98','0x1ff7ced9168',41, + '0x83126e978d4fdf3c','0x1ff7ced916872b020c4',73, + 1,1000, + ], 1024 => [ + '0xfa000000','0xfe000000',32, + '0xfa00000000000000','0xfe00000000000000',64, + 125,128, + '0x83126e98','0x7ef9db22',31, + '0x83126e978d4fdf3c','0x7ef9db22d0e56041',63, + 128,125, + '0xf4240000','0x3c0000',22, + '0xf424000000000000','0x3c000000000000',54, + 15625,16, + '0x8637bd06','0x1fff79c842f',41, + '0x8637bd05af6c69b6','0x1fff79c842fa5093964',73, + 16,15625, + ], 1200 => [ + '0xd5555556','0xd5555555',32, + '0xd555555555555556','0xd555555555555555',64, + 5,6, + '0x9999999a','0x66666666',31, + '0x999999999999999a','0x6666666666666666',63, + 6,5, + '0xd0555556','0x2aaaaa',22, + '0xd055555555555556','0x2aaaaaaaaaaaaa',54, + 2500,3, + '0x9d495183','0x1ffcb923a29',41, + '0x9d495182a9930be1','0x1ffcb923a29c779a6b5',73, + 3,2500, + ] +); + +$has_bigint = eval 'use Math::BigInt qw(bgcd); 1;'; + +sub bint($) +{ + my($x) = @_; + return Math::BigInt->new($x); +} + +# +# Constants for division by reciprocal multiplication. +# (bits, numerator, denominator) +# +sub fmul($$$) +{ + my ($b,$n,$d) = @_; + + $n = bint($n); + $d = bint($d); + + return scalar (($n << $b)+$d-bint(1))/$d; +} + +sub fadj($$$) +{ + my($b,$n,$d) = @_; + + $n = bint($n); + $d = bint($d); + + $d = $d/bgcd($n, $d); + return scalar (($d-bint(1)) << $b)/$d; +} + +sub fmuls($$$) { + my($b,$n,$d) = @_; + my($s,$m); + my($thres) = bint(1) << ($b-1); + + $n = bint($n); + $d = bint($d); + + for ($s = 0; 1; $s++) { + $m = fmul($s,$n,$d); + return $s if ($m >= $thres); + } + return 0; +} + +# Provides mul, adj, and shr factors for a specific +# (bit, time, hz) combination +sub muladj($$$) { + my($b, $t, $hz) = @_; + my $s = fmuls($b, $t, $hz); + my $m = fmul($s, $t, $hz); + my $a = fadj($s, $t, $hz); + return ($m->as_hex(), $a->as_hex(), $s); +} + +# Provides numerator, denominator values +sub numden($$) { + my($n, $d) = @_; + my $g = bgcd($n, $d); + return ($n/$g, $d/$g); +} + +# All values for a specific (time, hz) combo +sub conversions($$) { + my ($t, $hz) = @_; + my @val = (); + + # HZ_TO_xx + push(@val, muladj(32, $t, $hz)); + push(@val, muladj(64, $t, $hz)); + push(@val, numden($t, $hz)); + + # xx_TO_HZ + push(@val, muladj(32, $hz, $t)); + push(@val, muladj(64, $hz, $t)); + push(@val, numden($hz, $t)); + + return @val; +} + +sub compute_values($) { + my($hz) = @_; + my @val = (); + my $s, $m, $a, $g; + + if (!$has_bigint) { + die "$0: HZ == $hz not canned and ". + "Math::BigInt not available\n"; + } + + # MSEC conversions + push(@val, conversions(1000, $hz)); + + # USEC conversions + push(@val, conversions(1000000, $hz)); + + return @val; +} + +sub output($@) +{ + my($hz, @val) = @_; + my $pfx, $bit, $suf, $s, $m, $a; + + print "/* Automatically generated by kernel/timeconst.pl */\n"; + print "/* Conversion constants for HZ == $hz */\n"; + print "\n"; + print "#ifndef KERNEL_TIMECONST_H\n"; + print "#define KERNEL_TIMECONST_H\n"; + print "\n"; + + print "#include <linux/param.h>\n"; + + print "\n"; + print "#if HZ != $hz\n"; + print "#error \"kernel/timeconst.h has the wrong HZ value!\"\n"; + print "#endif\n"; + print "\n"; + + foreach $pfx ('HZ_TO_MSEC','MSEC_TO_HZ', + 'USEC_TO_HZ','HZ_TO_USEC') { + foreach $bit (32, 64) { + foreach $suf ('MUL', 'ADJ', 'SHR') { + printf "#define %-23s %s\n", + "${pfx}_$suf$bit", shift(@val); + } + } + foreach $suf ('NUM', 'DEN') { + printf "#define %-23s %s\n", + "${pfx}_$suf", shift(@val); + } + } + + print "\n"; + print "#endif /* KERNEL_TIMECONST_H */\n"; +} + +($hz) = @ARGV; + +# Use this to generate the %canned_values structure +if ($hz eq '--can') { + shift(@ARGV); + @hzlist = sort {$a <=> $b} (@ARGV); + + print "# Precomputed values for systems without Math::BigInt\n"; + print "# Generated by:\n"; + print "# timeconst.pl --can ", join(' ', @hzlist), "\n"; + print "\%canned_values = (\n"; + my $pf = "\t"; + foreach $hz (@hzlist) { + my @values = compute_values($hz); + print "$pf$hz => [\n"; + while (scalar(@values)) { + my $bit; + foreach $bit (32, 64) { + my $m = shift(@values); + my $a = shift(@values); + my $s = shift(@values); + print "\t\t\'",$m,"\',\'",$a,"\',",$s,",\n"; + } + my $n = shift(@values); + my $d = shift(@values); + print "\t\t",$n,',',$d,",\n"; + } + print "\t]"; + $pf = ', '; + } + print "\n);\n"; +} else { + $hz += 0; # Force to number + if ($hz < 1) { + die "Usage: $0 HZ\n"; + } + + @val = @{$canned_values{$hz}}; + if (!defined(@val)) { + @val = compute_values($hz); + } + output($hz, @val); +} +exit 0; diff --git a/kernel/timer.c b/kernel/timer.c index 70b29b59343..99b00a25f88 100644 --- a/kernel/timer.c +++ b/kernel/timer.c @@ -327,7 +327,7 @@ static void timer_stats_account_timer(struct timer_list *timer) {} * init_timer() must be done to a timer prior calling *any* of the * other timer functions. */ -void fastcall init_timer(struct timer_list *timer) +void init_timer(struct timer_list *timer) { timer->entry.next = NULL; timer->base = __raw_get_cpu_var(tvec_bases); @@ -339,7 +339,7 @@ void fastcall init_timer(struct timer_list *timer) } EXPORT_SYMBOL(init_timer); -void fastcall init_timer_deferrable(struct timer_list *timer) +void init_timer_deferrable(struct timer_list *timer) { init_timer(timer); timer_set_deferrable(timer); @@ -979,7 +979,7 @@ asmlinkage long sys_getppid(void) int pid; rcu_read_lock(); - pid = task_tgid_nr_ns(current->real_parent, current->nsproxy->pid_ns); + pid = task_tgid_vnr(current->real_parent); rcu_read_unlock(); return pid; @@ -1042,7 +1042,7 @@ static void process_timeout(unsigned long __data) * * In all cases the return value is guaranteed to be non-negative. */ -fastcall signed long __sched schedule_timeout(signed long timeout) +signed long __sched schedule_timeout(signed long timeout) { struct timer_list timer; unsigned long expire; diff --git a/kernel/user.c b/kernel/user.c index bc1c48d35cb..7d7900c5a1f 100644 --- a/kernel/user.c +++ b/kernel/user.c @@ -17,6 +17,14 @@ #include <linux/module.h> #include <linux/user_namespace.h> +struct user_namespace init_user_ns = { + .kref = { + .refcount = ATOMIC_INIT(2), + }, + .root_user = &root_user, +}; +EXPORT_SYMBOL_GPL(init_user_ns); + /* * UID task count cache, to get fast user lookup in "alloc_uid" * when changing user ID's (ie setuid() and friends). @@ -427,6 +435,7 @@ void switch_uid(struct user_struct *new_user) suid_keys(current); } +#ifdef CONFIG_USER_NS void release_uids(struct user_namespace *ns) { int i; @@ -451,6 +460,7 @@ void release_uids(struct user_namespace *ns) free_uid(ns->root_user); } +#endif static int __init uid_cache_init(void) { diff --git a/kernel/user_namespace.c b/kernel/user_namespace.c index 7af90fc4f0f..4c9006275df 100644 --- a/kernel/user_namespace.c +++ b/kernel/user_namespace.c @@ -10,17 +10,6 @@ #include <linux/nsproxy.h> #include <linux/user_namespace.h> -struct user_namespace init_user_ns = { - .kref = { - .refcount = ATOMIC_INIT(2), - }, - .root_user = &root_user, -}; - -EXPORT_SYMBOL_GPL(init_user_ns); - -#ifdef CONFIG_USER_NS - /* * Clone a new ns copying an original user ns, setting refcount to 1 * @old_ns: namespace to clone @@ -84,5 +73,3 @@ void free_user_ns(struct kref *kref) release_uids(ns); kfree(ns); } - -#endif /* CONFIG_USER_NS */ diff --git a/kernel/wait.c b/kernel/wait.c index f9876888a56..c275c56cf2d 100644 --- a/kernel/wait.c +++ b/kernel/wait.c @@ -18,7 +18,7 @@ void init_waitqueue_head(wait_queue_head_t *q) EXPORT_SYMBOL(init_waitqueue_head); -void fastcall add_wait_queue(wait_queue_head_t *q, wait_queue_t *wait) +void add_wait_queue(wait_queue_head_t *q, wait_queue_t *wait) { unsigned long flags; @@ -29,7 +29,7 @@ void fastcall add_wait_queue(wait_queue_head_t *q, wait_queue_t *wait) } EXPORT_SYMBOL(add_wait_queue); -void fastcall add_wait_queue_exclusive(wait_queue_head_t *q, wait_queue_t *wait) +void add_wait_queue_exclusive(wait_queue_head_t *q, wait_queue_t *wait) { unsigned long flags; @@ -40,7 +40,7 @@ void fastcall add_wait_queue_exclusive(wait_queue_head_t *q, wait_queue_t *wait) } EXPORT_SYMBOL(add_wait_queue_exclusive); -void fastcall remove_wait_queue(wait_queue_head_t *q, wait_queue_t *wait) +void remove_wait_queue(wait_queue_head_t *q, wait_queue_t *wait) { unsigned long flags; @@ -63,7 +63,7 @@ EXPORT_SYMBOL(remove_wait_queue); * stops them from bleeding out - it would still allow subsequent * loads to move into the critical region). */ -void fastcall +void prepare_to_wait(wait_queue_head_t *q, wait_queue_t *wait, int state) { unsigned long flags; @@ -82,7 +82,7 @@ prepare_to_wait(wait_queue_head_t *q, wait_queue_t *wait, int state) } EXPORT_SYMBOL(prepare_to_wait); -void fastcall +void prepare_to_wait_exclusive(wait_queue_head_t *q, wait_queue_t *wait, int state) { unsigned long flags; @@ -101,7 +101,7 @@ prepare_to_wait_exclusive(wait_queue_head_t *q, wait_queue_t *wait, int state) } EXPORT_SYMBOL(prepare_to_wait_exclusive); -void fastcall finish_wait(wait_queue_head_t *q, wait_queue_t *wait) +void finish_wait(wait_queue_head_t *q, wait_queue_t *wait) { unsigned long flags; @@ -157,7 +157,7 @@ EXPORT_SYMBOL(wake_bit_function); * waiting, the actions of __wait_on_bit() and __wait_on_bit_lock() are * permitted return codes. Nonzero return codes halt waiting and return. */ -int __sched fastcall +int __sched __wait_on_bit(wait_queue_head_t *wq, struct wait_bit_queue *q, int (*action)(void *), unsigned mode) { @@ -173,7 +173,7 @@ __wait_on_bit(wait_queue_head_t *wq, struct wait_bit_queue *q, } EXPORT_SYMBOL(__wait_on_bit); -int __sched fastcall out_of_line_wait_on_bit(void *word, int bit, +int __sched out_of_line_wait_on_bit(void *word, int bit, int (*action)(void *), unsigned mode) { wait_queue_head_t *wq = bit_waitqueue(word, bit); @@ -183,7 +183,7 @@ int __sched fastcall out_of_line_wait_on_bit(void *word, int bit, } EXPORT_SYMBOL(out_of_line_wait_on_bit); -int __sched fastcall +int __sched __wait_on_bit_lock(wait_queue_head_t *wq, struct wait_bit_queue *q, int (*action)(void *), unsigned mode) { @@ -201,7 +201,7 @@ __wait_on_bit_lock(wait_queue_head_t *wq, struct wait_bit_queue *q, } EXPORT_SYMBOL(__wait_on_bit_lock); -int __sched fastcall out_of_line_wait_on_bit_lock(void *word, int bit, +int __sched out_of_line_wait_on_bit_lock(void *word, int bit, int (*action)(void *), unsigned mode) { wait_queue_head_t *wq = bit_waitqueue(word, bit); @@ -211,7 +211,7 @@ int __sched fastcall out_of_line_wait_on_bit_lock(void *word, int bit, } EXPORT_SYMBOL(out_of_line_wait_on_bit_lock); -void fastcall __wake_up_bit(wait_queue_head_t *wq, void *word, int bit) +void __wake_up_bit(wait_queue_head_t *wq, void *word, int bit) { struct wait_bit_key key = __WAIT_BIT_KEY_INITIALIZER(word, bit); if (waitqueue_active(wq)) @@ -236,13 +236,13 @@ EXPORT_SYMBOL(__wake_up_bit); * may need to use a less regular barrier, such fs/inode.c's smp_mb(), * because spin_unlock() does not guarantee a memory barrier. */ -void fastcall wake_up_bit(void *word, int bit) +void wake_up_bit(void *word, int bit) { __wake_up_bit(bit_waitqueue(word, bit), word, bit); } EXPORT_SYMBOL(wake_up_bit); -fastcall wait_queue_head_t *bit_waitqueue(void *word, int bit) +wait_queue_head_t *bit_waitqueue(void *word, int bit) { const int shift = BITS_PER_LONG == 32 ? 5 : 6; const struct zone *zone = page_zone(virt_to_page(word)); diff --git a/kernel/workqueue.c b/kernel/workqueue.c index 52db48e7f6e..ff06611655a 100644 --- a/kernel/workqueue.c +++ b/kernel/workqueue.c @@ -161,7 +161,7 @@ static void __queue_work(struct cpu_workqueue_struct *cwq, * We queue the work to the CPU it was submitted, but there is no * guarantee that it will be processed by that CPU. */ -int fastcall queue_work(struct workqueue_struct *wq, struct work_struct *work) +int queue_work(struct workqueue_struct *wq, struct work_struct *work) { int ret = 0; @@ -175,7 +175,7 @@ int fastcall queue_work(struct workqueue_struct *wq, struct work_struct *work) } EXPORT_SYMBOL_GPL(queue_work); -void delayed_work_timer_fn(unsigned long __data) +static void delayed_work_timer_fn(unsigned long __data) { struct delayed_work *dwork = (struct delayed_work *)__data; struct cpu_workqueue_struct *cwq = get_wq_data(&dwork->work); @@ -192,7 +192,7 @@ void delayed_work_timer_fn(unsigned long __data) * * Returns 0 if @work was already on a queue, non-zero otherwise. */ -int fastcall queue_delayed_work(struct workqueue_struct *wq, +int queue_delayed_work(struct workqueue_struct *wq, struct delayed_work *dwork, unsigned long delay) { timer_stats_timer_set_start_info(&dwork->timer); @@ -388,7 +388,7 @@ static int flush_cpu_workqueue(struct cpu_workqueue_struct *cwq) * This function used to run the workqueues itself. Now we just wait for the * helper threads to do it. */ -void fastcall flush_workqueue(struct workqueue_struct *wq) +void flush_workqueue(struct workqueue_struct *wq) { const cpumask_t *cpu_map = wq_cpu_map(wq); int cpu; @@ -546,7 +546,7 @@ static struct workqueue_struct *keventd_wq __read_mostly; * * This puts a job in the kernel-global workqueue. */ -int fastcall schedule_work(struct work_struct *work) +int schedule_work(struct work_struct *work) { return queue_work(keventd_wq, work); } @@ -560,7 +560,7 @@ EXPORT_SYMBOL(schedule_work); * After waiting for a given time this puts a job in the kernel-global * workqueue. */ -int fastcall schedule_delayed_work(struct delayed_work *dwork, +int schedule_delayed_work(struct delayed_work *dwork, unsigned long delay) { timer_stats_timer_set_start_info(&dwork->timer); |