aboutsummaryrefslogtreecommitdiff
path: root/kernel
diff options
context:
space:
mode:
authorIngo Molnar <mingo@elte.hu>2008-07-29 00:07:55 +0200
committerIngo Molnar <mingo@elte.hu>2008-07-29 00:07:55 +0200
commitcb28a1bbdb4790378e7366d6c9ee1d2340b84f92 (patch)
tree316436f77dac75335fd2c3ef5f109e71606c50d3 /kernel
parentb6d4f7e3ef25beb8c658c97867d98883e69dc544 (diff)
parentf934fb19ef34730263e6afc01e8ec27a8a71470f (diff)
Merge branch 'linus' into core/generic-dma-coherent
Conflicts: arch/x86/Kconfig Signed-off-by: Ingo Molnar <mingo@elte.hu>
Diffstat (limited to 'kernel')
-rw-r--r--kernel/Kconfig.hz2
-rw-r--r--kernel/Makefile5
-rw-r--r--kernel/acct.c222
-rw-r--r--kernel/auditsc.c3
-rw-r--r--kernel/capability.c338
-rw-r--r--kernel/cgroup.c312
-rw-r--r--kernel/cpu.c63
-rw-r--r--kernel/cpuset.c363
-rw-r--r--kernel/delayacct.c16
-rw-r--r--kernel/exec_domain.c3
-rw-r--r--kernel/exit.c92
-rw-r--r--kernel/fork.c130
-rw-r--r--kernel/irq/chip.c12
-rw-r--r--kernel/irq/manage.c110
-rw-r--r--kernel/kallsyms.c2
-rw-r--r--kernel/kexec.c104
-rw-r--r--kernel/kmod.c15
-rw-r--r--kernel/kprobes.c132
-rw-r--r--kernel/kthread.c6
-rw-r--r--kernel/marker.c25
-rw-r--r--kernel/module.c343
-rw-r--r--kernel/ns_cgroup.c8
-rw-r--r--kernel/nsproxy.c8
-rw-r--r--kernel/panic.c22
-rw-r--r--kernel/pid.c10
-rw-r--r--kernel/pid_namespace.c10
-rw-r--r--kernel/posix-timers.c21
-rw-r--r--kernel/power/Kconfig13
-rw-r--r--kernel/power/main.c201
-rw-r--r--kernel/power/power.h2
-rw-r--r--kernel/power/poweroff.c4
-rw-r--r--kernel/power/process.c2
-rw-r--r--kernel/power/snapshot.c88
-rw-r--r--kernel/printk.c19
-rw-r--r--kernel/profile.c4
-rw-r--r--kernel/ptrace.c2
-rw-r--r--kernel/rcuclassic.c6
-rw-r--r--kernel/rcupreempt.c10
-rw-r--r--kernel/relay.c170
-rw-r--r--kernel/res_counter.c48
-rw-r--r--kernel/rtmutex-tester.c7
-rw-r--r--kernel/sched.c393
-rw-r--r--kernel/sched_fair.c10
-rw-r--r--kernel/sched_rt.c83
-rw-r--r--kernel/signal.c179
-rw-r--r--kernel/smp.c4
-rw-r--r--kernel/softirq.c5
-rw-r--r--kernel/softlockup.c70
-rw-r--r--kernel/stop_machine.c287
-rw-r--r--kernel/sys.c35
-rw-r--r--kernel/sys_ni.c7
-rw-r--r--kernel/sysctl.c206
-rw-r--r--kernel/sysctl_check.c2
-rw-r--r--kernel/taskstats.c6
-rw-r--r--kernel/time/clocksource.c12
-rw-r--r--kernel/time/tick-broadcast.c3
-rw-r--r--kernel/time/tick-common.c14
-rw-r--r--kernel/time/tick-sched.c16
-rw-r--r--kernel/trace/ftrace.c6
-rw-r--r--kernel/trace/trace.c4
-rw-r--r--kernel/trace/trace_irqsoff.c8
-rw-r--r--kernel/trace/trace_sched_wakeup.c27
-rw-r--r--kernel/trace/trace_sysprof.c6
-rw-r--r--kernel/tsacct.c33
-rw-r--r--kernel/workqueue.c149
65 files changed, 2720 insertions, 1798 deletions
diff --git a/kernel/Kconfig.hz b/kernel/Kconfig.hz
index 526128a2e62..382dd5a8b2d 100644
--- a/kernel/Kconfig.hz
+++ b/kernel/Kconfig.hz
@@ -55,4 +55,4 @@ config HZ
default 1000 if HZ_1000
config SCHED_HRTICK
- def_bool HIGH_RES_TIMERS && X86
+ def_bool HIGH_RES_TIMERS && USE_GENERIC_SMP_HELPERS
diff --git a/kernel/Makefile b/kernel/Makefile
index e25ee775dc9..4e1d7df7c3e 100644
--- a/kernel/Makefile
+++ b/kernel/Makefile
@@ -2,7 +2,7 @@
# Makefile for the linux kernel.
#
-obj-y = sched.o fork.o exec_domain.o panic.o printk.o profile.o \
+obj-y = sched.o fork.o exec_domain.o panic.o printk.o \
cpu.o exit.o itimer.o time.o softirq.o resource.o \
sysctl.o capability.o ptrace.o timer.o user.o \
signal.o sys.o kmod.o workqueue.o pid.o \
@@ -11,6 +11,8 @@ obj-y = sched.o fork.o exec_domain.o panic.o printk.o profile.o \
hrtimer.o rwsem.o nsproxy.o srcu.o semaphore.o \
notifier.o ksysfs.o pm_qos_params.o sched_clock.o
+CFLAGS_REMOVE_sched.o = -mno-spe
+
ifdef CONFIG_FTRACE
# Do not trace debug files and internal ftrace files
CFLAGS_REMOVE_lockdep.o = -pg
@@ -22,6 +24,7 @@ CFLAGS_REMOVE_sched_clock.o = -pg
CFLAGS_REMOVE_sched.o = -mno-spe -pg
endif
+obj-$(CONFIG_PROFILING) += profile.o
obj-$(CONFIG_SYSCTL_SYSCALL_CHECK) += sysctl_check.o
obj-$(CONFIG_STACKTRACE) += stacktrace.o
obj-y += time/
diff --git a/kernel/acct.c b/kernel/acct.c
index 91e1cfd734d..dd68b905941 100644
--- a/kernel/acct.c
+++ b/kernel/acct.c
@@ -75,37 +75,39 @@ int acct_parm[3] = {4, 2, 30};
/*
* External references and all of the globals.
*/
-static void do_acct_process(struct pid_namespace *ns, struct file *);
+static void do_acct_process(struct bsd_acct_struct *acct,
+ struct pid_namespace *ns, struct file *);
/*
* This structure is used so that all the data protected by lock
* can be placed in the same cache line as the lock. This primes
* the cache line to have the data after getting the lock.
*/
-struct acct_glbs {
- spinlock_t lock;
+struct bsd_acct_struct {
volatile int active;
volatile int needcheck;
struct file *file;
struct pid_namespace *ns;
struct timer_list timer;
+ struct list_head list;
};
-static struct acct_glbs acct_globals __cacheline_aligned =
- {__SPIN_LOCK_UNLOCKED(acct_globals.lock)};
+static DEFINE_SPINLOCK(acct_lock);
+static LIST_HEAD(acct_list);
/*
* Called whenever the timer says to check the free space.
*/
-static void acct_timeout(unsigned long unused)
+static void acct_timeout(unsigned long x)
{
- acct_globals.needcheck = 1;
+ struct bsd_acct_struct *acct = (struct bsd_acct_struct *)x;
+ acct->needcheck = 1;
}
/*
* Check the amount of free space and suspend/resume accordingly.
*/
-static int check_free_space(struct file *file)
+static int check_free_space(struct bsd_acct_struct *acct, struct file *file)
{
struct kstatfs sbuf;
int res;
@@ -113,11 +115,11 @@ static int check_free_space(struct file *file)
sector_t resume;
sector_t suspend;
- spin_lock(&acct_globals.lock);
- res = acct_globals.active;
- if (!file || !acct_globals.needcheck)
+ spin_lock(&acct_lock);
+ res = acct->active;
+ if (!file || !acct->needcheck)
goto out;
- spin_unlock(&acct_globals.lock);
+ spin_unlock(&acct_lock);
/* May block */
if (vfs_statfs(file->f_path.dentry, &sbuf))
@@ -136,35 +138,35 @@ static int check_free_space(struct file *file)
act = 0;
/*
- * If some joker switched acct_globals.file under us we'ld better be
+ * If some joker switched acct->file under us we'ld better be
* silent and _not_ touch anything.
*/
- spin_lock(&acct_globals.lock);
- if (file != acct_globals.file) {
+ spin_lock(&acct_lock);
+ if (file != acct->file) {
if (act)
res = act>0;
goto out;
}
- if (acct_globals.active) {
+ if (acct->active) {
if (act < 0) {
- acct_globals.active = 0;
+ acct->active = 0;
printk(KERN_INFO "Process accounting paused\n");
}
} else {
if (act > 0) {
- acct_globals.active = 1;
+ acct->active = 1;
printk(KERN_INFO "Process accounting resumed\n");
}
}
- del_timer(&acct_globals.timer);
- acct_globals.needcheck = 0;
- acct_globals.timer.expires = jiffies + ACCT_TIMEOUT*HZ;
- add_timer(&acct_globals.timer);
- res = acct_globals.active;
+ del_timer(&acct->timer);
+ acct->needcheck = 0;
+ acct->timer.expires = jiffies + ACCT_TIMEOUT*HZ;
+ add_timer(&acct->timer);
+ res = acct->active;
out:
- spin_unlock(&acct_globals.lock);
+ spin_unlock(&acct_lock);
return res;
}
@@ -172,39 +174,41 @@ out:
* Close the old accounting file (if currently open) and then replace
* it with file (if non-NULL).
*
- * NOTE: acct_globals.lock MUST be held on entry and exit.
+ * NOTE: acct_lock MUST be held on entry and exit.
*/
-static void acct_file_reopen(struct file *file)
+static void acct_file_reopen(struct bsd_acct_struct *acct, struct file *file,
+ struct pid_namespace *ns)
{
struct file *old_acct = NULL;
struct pid_namespace *old_ns = NULL;
- if (acct_globals.file) {
- old_acct = acct_globals.file;
- old_ns = acct_globals.ns;
- del_timer(&acct_globals.timer);
- acct_globals.active = 0;
- acct_globals.needcheck = 0;
- acct_globals.file = NULL;
+ if (acct->file) {
+ old_acct = acct->file;
+ old_ns = acct->ns;
+ del_timer(&acct->timer);
+ acct->active = 0;
+ acct->needcheck = 0;
+ acct->file = NULL;
+ acct->ns = NULL;
+ list_del(&acct->list);
}
if (file) {
- acct_globals.file = file;
- acct_globals.ns = get_pid_ns(task_active_pid_ns(current));
- acct_globals.needcheck = 0;
- acct_globals.active = 1;
+ acct->file = file;
+ acct->ns = ns;
+ acct->needcheck = 0;
+ acct->active = 1;
+ list_add(&acct->list, &acct_list);
/* It's been deleted if it was used before so this is safe */
- init_timer(&acct_globals.timer);
- acct_globals.timer.function = acct_timeout;
- acct_globals.timer.expires = jiffies + ACCT_TIMEOUT*HZ;
- add_timer(&acct_globals.timer);
+ setup_timer(&acct->timer, acct_timeout, (unsigned long)acct);
+ acct->timer.expires = jiffies + ACCT_TIMEOUT*HZ;
+ add_timer(&acct->timer);
}
if (old_acct) {
mnt_unpin(old_acct->f_path.mnt);
- spin_unlock(&acct_globals.lock);
- do_acct_process(old_ns, old_acct);
+ spin_unlock(&acct_lock);
+ do_acct_process(acct, old_ns, old_acct);
filp_close(old_acct, NULL);
- put_pid_ns(old_ns);
- spin_lock(&acct_globals.lock);
+ spin_lock(&acct_lock);
}
}
@@ -212,6 +216,8 @@ static int acct_on(char *name)
{
struct file *file;
int error;
+ struct pid_namespace *ns;
+ struct bsd_acct_struct *acct = NULL;
/* Difference from BSD - they don't do O_APPEND */
file = filp_open(name, O_WRONLY|O_APPEND|O_LARGEFILE, 0);
@@ -228,18 +234,34 @@ static int acct_on(char *name)
return -EIO;
}
+ ns = task_active_pid_ns(current);
+ if (ns->bacct == NULL) {
+ acct = kzalloc(sizeof(struct bsd_acct_struct), GFP_KERNEL);
+ if (acct == NULL) {
+ filp_close(file, NULL);
+ return -ENOMEM;
+ }
+ }
+
error = security_acct(file);
if (error) {
+ kfree(acct);
filp_close(file, NULL);
return error;
}
- spin_lock(&acct_globals.lock);
+ spin_lock(&acct_lock);
+ if (ns->bacct == NULL) {
+ ns->bacct = acct;
+ acct = NULL;
+ }
+
mnt_pin(file->f_path.mnt);
- acct_file_reopen(file);
- spin_unlock(&acct_globals.lock);
+ acct_file_reopen(ns->bacct, file, ns);
+ spin_unlock(&acct_lock);
mntput(file->f_path.mnt); /* it's pinned, now give up active reference */
+ kfree(acct);
return 0;
}
@@ -269,11 +291,17 @@ asmlinkage long sys_acct(const char __user *name)
error = acct_on(tmp);
putname(tmp);
} else {
+ struct bsd_acct_struct *acct;
+
+ acct = task_active_pid_ns(current)->bacct;
+ if (acct == NULL)
+ return 0;
+
error = security_acct(NULL);
if (!error) {
- spin_lock(&acct_globals.lock);
- acct_file_reopen(NULL);
- spin_unlock(&acct_globals.lock);
+ spin_lock(&acct_lock);
+ acct_file_reopen(acct, NULL, NULL);
+ spin_unlock(&acct_lock);
}
}
return error;
@@ -288,10 +316,16 @@ asmlinkage long sys_acct(const char __user *name)
*/
void acct_auto_close_mnt(struct vfsmount *m)
{
- spin_lock(&acct_globals.lock);
- if (acct_globals.file && acct_globals.file->f_path.mnt == m)
- acct_file_reopen(NULL);
- spin_unlock(&acct_globals.lock);
+ struct bsd_acct_struct *acct;
+
+ spin_lock(&acct_lock);
+restart:
+ list_for_each_entry(acct, &acct_list, list)
+ if (acct->file && acct->file->f_path.mnt == m) {
+ acct_file_reopen(acct, NULL, NULL);
+ goto restart;
+ }
+ spin_unlock(&acct_lock);
}
/**
@@ -303,12 +337,31 @@ void acct_auto_close_mnt(struct vfsmount *m)
*/
void acct_auto_close(struct super_block *sb)
{
- spin_lock(&acct_globals.lock);
- if (acct_globals.file &&
- acct_globals.file->f_path.mnt->mnt_sb == sb) {
- acct_file_reopen(NULL);
+ struct bsd_acct_struct *acct;
+
+ spin_lock(&acct_lock);
+restart:
+ list_for_each_entry(acct, &acct_list, list)
+ if (acct->file && acct->file->f_path.mnt->mnt_sb == sb) {
+ acct_file_reopen(acct, NULL, NULL);
+ goto restart;
+ }
+ spin_unlock(&acct_lock);
+}
+
+void acct_exit_ns(struct pid_namespace *ns)
+{
+ struct bsd_acct_struct *acct;
+
+ spin_lock(&acct_lock);
+ acct = ns->bacct;
+ if (acct != NULL) {
+ if (acct->file != NULL)
+ acct_file_reopen(acct, NULL, NULL);
+
+ kfree(acct);
}
- spin_unlock(&acct_globals.lock);
+ spin_unlock(&acct_lock);
}
/*
@@ -425,7 +478,8 @@ static u32 encode_float(u64 value)
/*
* do_acct_process does all actual work. Caller holds the reference to file.
*/
-static void do_acct_process(struct pid_namespace *ns, struct file *file)
+static void do_acct_process(struct bsd_acct_struct *acct,
+ struct pid_namespace *ns, struct file *file)
{
struct pacct_struct *pacct = &current->signal->pacct;
acct_t ac;
@@ -440,7 +494,7 @@ static void do_acct_process(struct pid_namespace *ns, struct file *file)
* First check to see if there is enough free_space to continue
* the process accounting system.
*/
- if (!check_free_space(file))
+ if (!check_free_space(acct, file))
return;
/*
@@ -577,34 +631,46 @@ void acct_collect(long exitcode, int group_dead)
spin_unlock_irq(&current->sighand->siglock);
}
-/**
- * acct_process - now just a wrapper around do_acct_process
- * @exitcode: task exit code
- *
- * handles process accounting for an exiting task
- */
-void acct_process(void)
+static void acct_process_in_ns(struct pid_namespace *ns)
{
struct file *file = NULL;
- struct pid_namespace *ns;
+ struct bsd_acct_struct *acct;
+ acct = ns->bacct;
/*
* accelerate the common fastpath:
*/
- if (!acct_globals.file)
+ if (!acct || !acct->file)
return;
- spin_lock(&acct_globals.lock);
- file = acct_globals.file;
+ spin_lock(&acct_lock);
+ file = acct->file;
if (unlikely(!file)) {
- spin_unlock(&acct_globals.lock);
+ spin_unlock(&acct_lock);
return;
}
get_file(file);
- ns = get_pid_ns(acct_globals.ns);
- spin_unlock(&acct_globals.lock);
+ spin_unlock(&acct_lock);
- do_acct_process(ns, file);
+ do_acct_process(acct, ns, file);
fput(file);
- put_pid_ns(ns);
+}
+
+/**
+ * acct_process - now just a wrapper around acct_process_in_ns,
+ * which in turn is a wrapper around do_acct_process.
+ *
+ * handles process accounting for an exiting task
+ */
+void acct_process(void)
+{
+ struct pid_namespace *ns;
+
+ /*
+ * This loop is safe lockless, since current is still
+ * alive and holds its namespace, which in turn holds
+ * its parent.
+ */
+ for (ns = task_active_pid_ns(current); ns != NULL; ns = ns->parent)
+ acct_process_in_ns(ns);
}
diff --git a/kernel/auditsc.c b/kernel/auditsc.c
index c10e7aae04d..4699950e65b 100644
--- a/kernel/auditsc.c
+++ b/kernel/auditsc.c
@@ -1476,7 +1476,8 @@ void audit_syscall_entry(int arch, int major,
struct audit_context *context = tsk->audit_context;
enum audit_state state;
- BUG_ON(!context);
+ if (unlikely(!context))
+ return;
/*
* This happens only on certain architectures that make system
diff --git a/kernel/capability.c b/kernel/capability.c
index 901e0fdc3ff..0101e847603 100644
--- a/kernel/capability.c
+++ b/kernel/capability.c
@@ -115,11 +115,208 @@ static int cap_validate_magic(cap_user_header_t header, unsigned *tocopy)
return 0;
}
+#ifndef CONFIG_SECURITY_FILE_CAPABILITIES
+
+/*
+ * Without filesystem capability support, we nominally support one process
+ * setting the capabilities of another
+ */
+static inline int cap_get_target_pid(pid_t pid, kernel_cap_t *pEp,
+ kernel_cap_t *pIp, kernel_cap_t *pPp)
+{
+ struct task_struct *target;
+ int ret;
+
+ spin_lock(&task_capability_lock);
+ read_lock(&tasklist_lock);
+
+ if (pid && pid != task_pid_vnr(current)) {
+ target = find_task_by_vpid(pid);
+ if (!target) {
+ ret = -ESRCH;
+ goto out;
+ }
+ } else
+ target = current;
+
+ ret = security_capget(target, pEp, pIp, pPp);
+
+out:
+ read_unlock(&tasklist_lock);
+ spin_unlock(&task_capability_lock);
+
+ return ret;
+}
+
+/*
+ * cap_set_pg - set capabilities for all processes in a given process
+ * group. We call this holding task_capability_lock and tasklist_lock.
+ */
+static inline int cap_set_pg(int pgrp_nr, kernel_cap_t *effective,
+ kernel_cap_t *inheritable,
+ kernel_cap_t *permitted)
+{
+ struct task_struct *g, *target;
+ int ret = -EPERM;
+ int found = 0;
+ struct pid *pgrp;
+
+ spin_lock(&task_capability_lock);
+ read_lock(&tasklist_lock);
+
+ pgrp = find_vpid(pgrp_nr);
+ do_each_pid_task(pgrp, PIDTYPE_PGID, g) {
+ target = g;
+ while_each_thread(g, target) {
+ if (!security_capset_check(target, effective,
+ inheritable, permitted)) {
+ security_capset_set(target, effective,
+ inheritable, permitted);
+ ret = 0;
+ }
+ found = 1;
+ }
+ } while_each_pid_task(pgrp, PIDTYPE_PGID, g);
+
+ read_unlock(&tasklist_lock);
+ spin_unlock(&task_capability_lock);
+
+ if (!found)
+ ret = 0;
+ return ret;
+}
+
/*
- * For sys_getproccap() and sys_setproccap(), any of the three
- * capability set pointers may be NULL -- indicating that that set is
- * uninteresting and/or not to be changed.
+ * cap_set_all - set capabilities for all processes other than init
+ * and self. We call this holding task_capability_lock and tasklist_lock.
*/
+static inline int cap_set_all(kernel_cap_t *effective,
+ kernel_cap_t *inheritable,
+ kernel_cap_t *permitted)
+{
+ struct task_struct *g, *target;
+ int ret = -EPERM;
+ int found = 0;
+
+ spin_lock(&task_capability_lock);
+ read_lock(&tasklist_lock);
+
+ do_each_thread(g, target) {
+ if (target == current
+ || is_container_init(target->group_leader))
+ continue;
+ found = 1;
+ if (security_capset_check(target, effective, inheritable,
+ permitted))
+ continue;
+ ret = 0;
+ security_capset_set(target, effective, inheritable, permitted);
+ } while_each_thread(g, target);
+
+ read_unlock(&tasklist_lock);
+ spin_unlock(&task_capability_lock);
+
+ if (!found)
+ ret = 0;
+
+ return ret;
+}
+
+/*
+ * Given the target pid does not refer to the current process we
+ * need more elaborate support... (This support is not present when
+ * filesystem capabilities are configured.)
+ */
+static inline int do_sys_capset_other_tasks(pid_t pid, kernel_cap_t *effective,
+ kernel_cap_t *inheritable,
+ kernel_cap_t *permitted)
+{
+ struct task_struct *target;
+ int ret;
+
+ if (!capable(CAP_SETPCAP))
+ return -EPERM;
+
+ if (pid == -1) /* all procs other than current and init */
+ return cap_set_all(effective, inheritable, permitted);
+
+ else if (pid < 0) /* all procs in process group */
+ return cap_set_pg(-pid, effective, inheritable, permitted);
+
+ /* target != current */
+ spin_lock(&task_capability_lock);
+ read_lock(&tasklist_lock);
+
+ target = find_task_by_vpid(pid);
+ if (!target)
+ ret = -ESRCH;
+ else {
+ ret = security_capset_check(target, effective, inheritable,
+ permitted);
+
+ /* having verified that the proposed changes are legal,
+ we now put them into effect. */
+ if (!ret)
+ security_capset_set(target, effective, inheritable,
+ permitted);
+ }
+
+ read_unlock(&tasklist_lock);
+ spin_unlock(&task_capability_lock);
+
+ return ret;
+}
+
+#else /* ie., def CONFIG_SECURITY_FILE_CAPABILITIES */
+
+/*
+ * If we have configured with filesystem capability support, then the
+ * only thing that can change the capabilities of the current process
+ * is the current process. As such, we can't be in this code at the
+ * same time as we are in the process of setting capabilities in this
+ * process. The net result is that we can limit our use of locks to
+ * when we are reading the caps of another process.
+ */
+static inline int cap_get_target_pid(pid_t pid, kernel_cap_t *pEp,
+ kernel_cap_t *pIp, kernel_cap_t *pPp)
+{
+ int ret;
+
+ if (pid && (pid != task_pid_vnr(current))) {
+ struct task_struct *target;
+
+ spin_lock(&task_capability_lock);
+ read_lock(&tasklist_lock);
+
+ target = find_task_by_vpid(pid);
+ if (!target)
+ ret = -ESRCH;
+ else
+ ret = security_capget(target, pEp, pIp, pPp);
+
+ read_unlock(&tasklist_lock);
+ spin_unlock(&task_capability_lock);
+ } else
+ ret = security_capget(current, pEp, pIp, pPp);
+
+ return ret;
+}
+
+/*
+ * With filesystem capability support configured, the kernel does not
+ * permit the ch