diff options
author | Ingo Molnar <mingo@elte.hu> | 2008-07-29 00:07:55 +0200 |
---|---|---|
committer | Ingo Molnar <mingo@elte.hu> | 2008-07-29 00:07:55 +0200 |
commit | cb28a1bbdb4790378e7366d6c9ee1d2340b84f92 (patch) | |
tree | 316436f77dac75335fd2c3ef5f109e71606c50d3 /kernel | |
parent | b6d4f7e3ef25beb8c658c97867d98883e69dc544 (diff) | |
parent | f934fb19ef34730263e6afc01e8ec27a8a71470f (diff) |
Merge branch 'linus' into core/generic-dma-coherent
Conflicts:
arch/x86/Kconfig
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Diffstat (limited to 'kernel')
65 files changed, 2720 insertions, 1798 deletions
diff --git a/kernel/Kconfig.hz b/kernel/Kconfig.hz index 526128a2e62..382dd5a8b2d 100644 --- a/kernel/Kconfig.hz +++ b/kernel/Kconfig.hz @@ -55,4 +55,4 @@ config HZ default 1000 if HZ_1000 config SCHED_HRTICK - def_bool HIGH_RES_TIMERS && X86 + def_bool HIGH_RES_TIMERS && USE_GENERIC_SMP_HELPERS diff --git a/kernel/Makefile b/kernel/Makefile index e25ee775dc9..4e1d7df7c3e 100644 --- a/kernel/Makefile +++ b/kernel/Makefile @@ -2,7 +2,7 @@ # Makefile for the linux kernel. # -obj-y = sched.o fork.o exec_domain.o panic.o printk.o profile.o \ +obj-y = sched.o fork.o exec_domain.o panic.o printk.o \ cpu.o exit.o itimer.o time.o softirq.o resource.o \ sysctl.o capability.o ptrace.o timer.o user.o \ signal.o sys.o kmod.o workqueue.o pid.o \ @@ -11,6 +11,8 @@ obj-y = sched.o fork.o exec_domain.o panic.o printk.o profile.o \ hrtimer.o rwsem.o nsproxy.o srcu.o semaphore.o \ notifier.o ksysfs.o pm_qos_params.o sched_clock.o +CFLAGS_REMOVE_sched.o = -mno-spe + ifdef CONFIG_FTRACE # Do not trace debug files and internal ftrace files CFLAGS_REMOVE_lockdep.o = -pg @@ -22,6 +24,7 @@ CFLAGS_REMOVE_sched_clock.o = -pg CFLAGS_REMOVE_sched.o = -mno-spe -pg endif +obj-$(CONFIG_PROFILING) += profile.o obj-$(CONFIG_SYSCTL_SYSCALL_CHECK) += sysctl_check.o obj-$(CONFIG_STACKTRACE) += stacktrace.o obj-y += time/ diff --git a/kernel/acct.c b/kernel/acct.c index 91e1cfd734d..dd68b905941 100644 --- a/kernel/acct.c +++ b/kernel/acct.c @@ -75,37 +75,39 @@ int acct_parm[3] = {4, 2, 30}; /* * External references and all of the globals. */ -static void do_acct_process(struct pid_namespace *ns, struct file *); +static void do_acct_process(struct bsd_acct_struct *acct, + struct pid_namespace *ns, struct file *); /* * This structure is used so that all the data protected by lock * can be placed in the same cache line as the lock. This primes * the cache line to have the data after getting the lock. */ -struct acct_glbs { - spinlock_t lock; +struct bsd_acct_struct { volatile int active; volatile int needcheck; struct file *file; struct pid_namespace *ns; struct timer_list timer; + struct list_head list; }; -static struct acct_glbs acct_globals __cacheline_aligned = - {__SPIN_LOCK_UNLOCKED(acct_globals.lock)}; +static DEFINE_SPINLOCK(acct_lock); +static LIST_HEAD(acct_list); /* * Called whenever the timer says to check the free space. */ -static void acct_timeout(unsigned long unused) +static void acct_timeout(unsigned long x) { - acct_globals.needcheck = 1; + struct bsd_acct_struct *acct = (struct bsd_acct_struct *)x; + acct->needcheck = 1; } /* * Check the amount of free space and suspend/resume accordingly. */ -static int check_free_space(struct file *file) +static int check_free_space(struct bsd_acct_struct *acct, struct file *file) { struct kstatfs sbuf; int res; @@ -113,11 +115,11 @@ static int check_free_space(struct file *file) sector_t resume; sector_t suspend; - spin_lock(&acct_globals.lock); - res = acct_globals.active; - if (!file || !acct_globals.needcheck) + spin_lock(&acct_lock); + res = acct->active; + if (!file || !acct->needcheck) goto out; - spin_unlock(&acct_globals.lock); + spin_unlock(&acct_lock); /* May block */ if (vfs_statfs(file->f_path.dentry, &sbuf)) @@ -136,35 +138,35 @@ static int check_free_space(struct file *file) act = 0; /* - * If some joker switched acct_globals.file under us we'ld better be + * If some joker switched acct->file under us we'ld better be * silent and _not_ touch anything. */ - spin_lock(&acct_globals.lock); - if (file != acct_globals.file) { + spin_lock(&acct_lock); + if (file != acct->file) { if (act) res = act>0; goto out; } - if (acct_globals.active) { + if (acct->active) { if (act < 0) { - acct_globals.active = 0; + acct->active = 0; printk(KERN_INFO "Process accounting paused\n"); } } else { if (act > 0) { - acct_globals.active = 1; + acct->active = 1; printk(KERN_INFO "Process accounting resumed\n"); } } - del_timer(&acct_globals.timer); - acct_globals.needcheck = 0; - acct_globals.timer.expires = jiffies + ACCT_TIMEOUT*HZ; - add_timer(&acct_globals.timer); - res = acct_globals.active; + del_timer(&acct->timer); + acct->needcheck = 0; + acct->timer.expires = jiffies + ACCT_TIMEOUT*HZ; + add_timer(&acct->timer); + res = acct->active; out: - spin_unlock(&acct_globals.lock); + spin_unlock(&acct_lock); return res; } @@ -172,39 +174,41 @@ out: * Close the old accounting file (if currently open) and then replace * it with file (if non-NULL). * - * NOTE: acct_globals.lock MUST be held on entry and exit. + * NOTE: acct_lock MUST be held on entry and exit. */ -static void acct_file_reopen(struct file *file) +static void acct_file_reopen(struct bsd_acct_struct *acct, struct file *file, + struct pid_namespace *ns) { struct file *old_acct = NULL; struct pid_namespace *old_ns = NULL; - if (acct_globals.file) { - old_acct = acct_globals.file; - old_ns = acct_globals.ns; - del_timer(&acct_globals.timer); - acct_globals.active = 0; - acct_globals.needcheck = 0; - acct_globals.file = NULL; + if (acct->file) { + old_acct = acct->file; + old_ns = acct->ns; + del_timer(&acct->timer); + acct->active = 0; + acct->needcheck = 0; + acct->file = NULL; + acct->ns = NULL; + list_del(&acct->list); } if (file) { - acct_globals.file = file; - acct_globals.ns = get_pid_ns(task_active_pid_ns(current)); - acct_globals.needcheck = 0; - acct_globals.active = 1; + acct->file = file; + acct->ns = ns; + acct->needcheck = 0; + acct->active = 1; + list_add(&acct->list, &acct_list); /* It's been deleted if it was used before so this is safe */ - init_timer(&acct_globals.timer); - acct_globals.timer.function = acct_timeout; - acct_globals.timer.expires = jiffies + ACCT_TIMEOUT*HZ; - add_timer(&acct_globals.timer); + setup_timer(&acct->timer, acct_timeout, (unsigned long)acct); + acct->timer.expires = jiffies + ACCT_TIMEOUT*HZ; + add_timer(&acct->timer); } if (old_acct) { mnt_unpin(old_acct->f_path.mnt); - spin_unlock(&acct_globals.lock); - do_acct_process(old_ns, old_acct); + spin_unlock(&acct_lock); + do_acct_process(acct, old_ns, old_acct); filp_close(old_acct, NULL); - put_pid_ns(old_ns); - spin_lock(&acct_globals.lock); + spin_lock(&acct_lock); } } @@ -212,6 +216,8 @@ static int acct_on(char *name) { struct file *file; int error; + struct pid_namespace *ns; + struct bsd_acct_struct *acct = NULL; /* Difference from BSD - they don't do O_APPEND */ file = filp_open(name, O_WRONLY|O_APPEND|O_LARGEFILE, 0); @@ -228,18 +234,34 @@ static int acct_on(char *name) return -EIO; } + ns = task_active_pid_ns(current); + if (ns->bacct == NULL) { + acct = kzalloc(sizeof(struct bsd_acct_struct), GFP_KERNEL); + if (acct == NULL) { + filp_close(file, NULL); + return -ENOMEM; + } + } + error = security_acct(file); if (error) { + kfree(acct); filp_close(file, NULL); return error; } - spin_lock(&acct_globals.lock); + spin_lock(&acct_lock); + if (ns->bacct == NULL) { + ns->bacct = acct; + acct = NULL; + } + mnt_pin(file->f_path.mnt); - acct_file_reopen(file); - spin_unlock(&acct_globals.lock); + acct_file_reopen(ns->bacct, file, ns); + spin_unlock(&acct_lock); mntput(file->f_path.mnt); /* it's pinned, now give up active reference */ + kfree(acct); return 0; } @@ -269,11 +291,17 @@ asmlinkage long sys_acct(const char __user *name) error = acct_on(tmp); putname(tmp); } else { + struct bsd_acct_struct *acct; + + acct = task_active_pid_ns(current)->bacct; + if (acct == NULL) + return 0; + error = security_acct(NULL); if (!error) { - spin_lock(&acct_globals.lock); - acct_file_reopen(NULL); - spin_unlock(&acct_globals.lock); + spin_lock(&acct_lock); + acct_file_reopen(acct, NULL, NULL); + spin_unlock(&acct_lock); } } return error; @@ -288,10 +316,16 @@ asmlinkage long sys_acct(const char __user *name) */ void acct_auto_close_mnt(struct vfsmount *m) { - spin_lock(&acct_globals.lock); - if (acct_globals.file && acct_globals.file->f_path.mnt == m) - acct_file_reopen(NULL); - spin_unlock(&acct_globals.lock); + struct bsd_acct_struct *acct; + + spin_lock(&acct_lock); +restart: + list_for_each_entry(acct, &acct_list, list) + if (acct->file && acct->file->f_path.mnt == m) { + acct_file_reopen(acct, NULL, NULL); + goto restart; + } + spin_unlock(&acct_lock); } /** @@ -303,12 +337,31 @@ void acct_auto_close_mnt(struct vfsmount *m) */ void acct_auto_close(struct super_block *sb) { - spin_lock(&acct_globals.lock); - if (acct_globals.file && - acct_globals.file->f_path.mnt->mnt_sb == sb) { - acct_file_reopen(NULL); + struct bsd_acct_struct *acct; + + spin_lock(&acct_lock); +restart: + list_for_each_entry(acct, &acct_list, list) + if (acct->file && acct->file->f_path.mnt->mnt_sb == sb) { + acct_file_reopen(acct, NULL, NULL); + goto restart; + } + spin_unlock(&acct_lock); +} + +void acct_exit_ns(struct pid_namespace *ns) +{ + struct bsd_acct_struct *acct; + + spin_lock(&acct_lock); + acct = ns->bacct; + if (acct != NULL) { + if (acct->file != NULL) + acct_file_reopen(acct, NULL, NULL); + + kfree(acct); } - spin_unlock(&acct_globals.lock); + spin_unlock(&acct_lock); } /* @@ -425,7 +478,8 @@ static u32 encode_float(u64 value) /* * do_acct_process does all actual work. Caller holds the reference to file. */ -static void do_acct_process(struct pid_namespace *ns, struct file *file) +static void do_acct_process(struct bsd_acct_struct *acct, + struct pid_namespace *ns, struct file *file) { struct pacct_struct *pacct = ¤t->signal->pacct; acct_t ac; @@ -440,7 +494,7 @@ static void do_acct_process(struct pid_namespace *ns, struct file *file) * First check to see if there is enough free_space to continue * the process accounting system. */ - if (!check_free_space(file)) + if (!check_free_space(acct, file)) return; /* @@ -577,34 +631,46 @@ void acct_collect(long exitcode, int group_dead) spin_unlock_irq(¤t->sighand->siglock); } -/** - * acct_process - now just a wrapper around do_acct_process - * @exitcode: task exit code - * - * handles process accounting for an exiting task - */ -void acct_process(void) +static void acct_process_in_ns(struct pid_namespace *ns) { struct file *file = NULL; - struct pid_namespace *ns; + struct bsd_acct_struct *acct; + acct = ns->bacct; /* * accelerate the common fastpath: */ - if (!acct_globals.file) + if (!acct || !acct->file) return; - spin_lock(&acct_globals.lock); - file = acct_globals.file; + spin_lock(&acct_lock); + file = acct->file; if (unlikely(!file)) { - spin_unlock(&acct_globals.lock); + spin_unlock(&acct_lock); return; } get_file(file); - ns = get_pid_ns(acct_globals.ns); - spin_unlock(&acct_globals.lock); + spin_unlock(&acct_lock); - do_acct_process(ns, file); + do_acct_process(acct, ns, file); fput(file); - put_pid_ns(ns); +} + +/** + * acct_process - now just a wrapper around acct_process_in_ns, + * which in turn is a wrapper around do_acct_process. + * + * handles process accounting for an exiting task + */ +void acct_process(void) +{ + struct pid_namespace *ns; + + /* + * This loop is safe lockless, since current is still + * alive and holds its namespace, which in turn holds + * its parent. + */ + for (ns = task_active_pid_ns(current); ns != NULL; ns = ns->parent) + acct_process_in_ns(ns); } diff --git a/kernel/auditsc.c b/kernel/auditsc.c index c10e7aae04d..4699950e65b 100644 --- a/kernel/auditsc.c +++ b/kernel/auditsc.c @@ -1476,7 +1476,8 @@ void audit_syscall_entry(int arch, int major, struct audit_context *context = tsk->audit_context; enum audit_state state; - BUG_ON(!context); + if (unlikely(!context)) + return; /* * This happens only on certain architectures that make system diff --git a/kernel/capability.c b/kernel/capability.c index 901e0fdc3ff..0101e847603 100644 --- a/kernel/capability.c +++ b/kernel/capability.c @@ -115,11 +115,208 @@ static int cap_validate_magic(cap_user_header_t header, unsigned *tocopy) return 0; } +#ifndef CONFIG_SECURITY_FILE_CAPABILITIES + +/* + * Without filesystem capability support, we nominally support one process + * setting the capabilities of another + */ +static inline int cap_get_target_pid(pid_t pid, kernel_cap_t *pEp, + kernel_cap_t *pIp, kernel_cap_t *pPp) +{ + struct task_struct *target; + int ret; + + spin_lock(&task_capability_lock); + read_lock(&tasklist_lock); + + if (pid && pid != task_pid_vnr(current)) { + target = find_task_by_vpid(pid); + if (!target) { + ret = -ESRCH; + goto out; + } + } else + target = current; + + ret = security_capget(target, pEp, pIp, pPp); + +out: + read_unlock(&tasklist_lock); + spin_unlock(&task_capability_lock); + + return ret; +} + +/* + * cap_set_pg - set capabilities for all processes in a given process + * group. We call this holding task_capability_lock and tasklist_lock. + */ +static inline int cap_set_pg(int pgrp_nr, kernel_cap_t *effective, + kernel_cap_t *inheritable, + kernel_cap_t *permitted) +{ + struct task_struct *g, *target; + int ret = -EPERM; + int found = 0; + struct pid *pgrp; + + spin_lock(&task_capability_lock); + read_lock(&tasklist_lock); + + pgrp = find_vpid(pgrp_nr); + do_each_pid_task(pgrp, PIDTYPE_PGID, g) { + target = g; + while_each_thread(g, target) { + if (!security_capset_check(target, effective, + inheritable, permitted)) { + security_capset_set(target, effective, + inheritable, permitted); + ret = 0; + } + found = 1; + } + } while_each_pid_task(pgrp, PIDTYPE_PGID, g); + + read_unlock(&tasklist_lock); + spin_unlock(&task_capability_lock); + + if (!found) + ret = 0; + return ret; +} + /* - * For sys_getproccap() and sys_setproccap(), any of the three - * capability set pointers may be NULL -- indicating that that set is - * uninteresting and/or not to be changed. + * cap_set_all - set capabilities for all processes other than init + * and self. We call this holding task_capability_lock and tasklist_lock. */ +static inline int cap_set_all(kernel_cap_t *effective, + kernel_cap_t *inheritable, + kernel_cap_t *permitted) +{ + struct task_struct *g, *target; + int ret = -EPERM; + int found = 0; + + spin_lock(&task_capability_lock); + read_lock(&tasklist_lock); + + do_each_thread(g, target) { + if (target == current + || is_container_init(target->group_leader)) + continue; + found = 1; + if (security_capset_check(target, effective, inheritable, + permitted)) + continue; + ret = 0; + security_capset_set(target, effective, inheritable, permitted); + } while_each_thread(g, target); + + read_unlock(&tasklist_lock); + spin_unlock(&task_capability_lock); + + if (!found) + ret = 0; + + return ret; +} + +/* + * Given the target pid does not refer to the current process we + * need more elaborate support... (This support is not present when + * filesystem capabilities are configured.) + */ +static inline int do_sys_capset_other_tasks(pid_t pid, kernel_cap_t *effective, + kernel_cap_t *inheritable, + kernel_cap_t *permitted) +{ + struct task_struct *target; + int ret; + + if (!capable(CAP_SETPCAP)) + return -EPERM; + + if (pid == -1) /* all procs other than current and init */ + return cap_set_all(effective, inheritable, permitted); + + else if (pid < 0) /* all procs in process group */ + return cap_set_pg(-pid, effective, inheritable, permitted); + + /* target != current */ + spin_lock(&task_capability_lock); + read_lock(&tasklist_lock); + + target = find_task_by_vpid(pid); + if (!target) + ret = -ESRCH; + else { + ret = security_capset_check(target, effective, inheritable, + permitted); + + /* having verified that the proposed changes are legal, + we now put them into effect. */ + if (!ret) + security_capset_set(target, effective, inheritable, + permitted); + } + + read_unlock(&tasklist_lock); + spin_unlock(&task_capability_lock); + + return ret; +} + +#else /* ie., def CONFIG_SECURITY_FILE_CAPABILITIES */ + +/* + * If we have configured with filesystem capability support, then the + * only thing that can change the capabilities of the current process + * is the current process. As such, we can't be in this code at the + * same time as we are in the process of setting capabilities in this + * process. The net result is that we can limit our use of locks to + * when we are reading the caps of another process. + */ +static inline int cap_get_target_pid(pid_t pid, kernel_cap_t *pEp, + kernel_cap_t *pIp, kernel_cap_t *pPp) +{ + int ret; + + if (pid && (pid != task_pid_vnr(current))) { + struct task_struct *target; + + spin_lock(&task_capability_lock); + read_lock(&tasklist_lock); + + target = find_task_by_vpid(pid); + if (!target) + ret = -ESRCH; + else + ret = security_capget(target, pEp, pIp, pPp); + + read_unlock(&tasklist_lock); + spin_unlock(&task_capability_lock); + } else + ret = security_capget(current, pEp, pIp, pPp); + + return ret; +} + +/* + * With filesystem capability support configured, the kernel does not + * permit the ch |