diff options
Diffstat (limited to 'kernel')
89 files changed, 5161 insertions, 3846 deletions
diff --git a/kernel/Makefile b/kernel/Makefile index e898c5b9d02..f70396e5a24 100644 --- a/kernel/Makefile +++ b/kernel/Makefile @@ -2,16 +2,15 @@ # Makefile for the linux kernel. # -obj-y = sched.o fork.o exec_domain.o panic.o printk.o \ +obj-y = fork.o exec_domain.o panic.o printk.o \ cpu.o exit.o itimer.o time.o softirq.o resource.o \ sysctl.o sysctl_binary.o capability.o ptrace.o timer.o user.o \ signal.o sys.o kmod.o workqueue.o pid.o \ rcupdate.o extable.o params.o posix-timers.o \ kthread.o wait.o kfifo.o sys_ni.o posix-cpu-timers.o mutex.o \ hrtimer.o rwsem.o nsproxy.o srcu.o semaphore.o \ - notifier.o ksysfs.o sched_clock.o cred.o \ - async.o range.o -obj-y += groups.o + notifier.o ksysfs.o cred.o \ + async.o range.o groups.o ifdef CONFIG_FUNCTION_TRACER # Do not trace debug files and internal ftrace files @@ -20,10 +19,11 @@ CFLAGS_REMOVE_lockdep_proc.o = -pg CFLAGS_REMOVE_mutex-debug.o = -pg CFLAGS_REMOVE_rtmutex-debug.o = -pg CFLAGS_REMOVE_cgroup-debug.o = -pg -CFLAGS_REMOVE_sched_clock.o = -pg CFLAGS_REMOVE_irq_work.o = -pg endif +obj-y += sched/ + obj-$(CONFIG_FREEZER) += freezer.o obj-$(CONFIG_PROFILING) += profile.o obj-$(CONFIG_SYSCTL_SYSCALL_CHECK) += sysctl_check.o @@ -99,7 +99,6 @@ obj-$(CONFIG_TRACING) += trace/ obj-$(CONFIG_X86_DS) += trace/ obj-$(CONFIG_RING_BUFFER) += trace/ obj-$(CONFIG_TRACEPOINTS) += trace/ -obj-$(CONFIG_SMP) += sched_cpupri.o obj-$(CONFIG_IRQ_WORK) += irq_work.o obj-$(CONFIG_CPU_PM) += cpu_pm.o @@ -110,15 +109,6 @@ obj-$(CONFIG_PADATA) += padata.o obj-$(CONFIG_CRASH_DUMP) += crash_dump.o obj-$(CONFIG_JUMP_LABEL) += jump_label.o -ifneq ($(CONFIG_SCHED_OMIT_FRAME_POINTER),y) -# According to Alan Modra <alan@linuxcare.com.au>, the -fno-omit-frame-pointer is -# needed for x86 only. Why this used to be enabled for all architectures is beyond -# me. I suspect most platforms don't need this, but until we know that for sure -# I turn this off for IA-64 only. Andreas Schwab says it's also needed on m68k -# to get a correct value for the wait-channel (WCHAN in ps). --davidm -CFLAGS_sched.o := $(PROFILING) -fno-omit-frame-pointer -endif - $(obj)/configs.o: $(obj)/config_data.h # config_data.h contains the same information as ikconfig.h but gzipped. diff --git a/kernel/acct.c b/kernel/acct.c index fa7eb3de2dd..02e6167a53b 100644 --- a/kernel/acct.c +++ b/kernel/acct.c @@ -84,11 +84,10 @@ static void do_acct_process(struct bsd_acct_struct *acct, * the cache line to have the data after getting the lock. */ struct bsd_acct_struct { - volatile int active; - volatile int needcheck; + int active; + unsigned long needcheck; struct file *file; struct pid_namespace *ns; - struct timer_list timer; struct list_head list; }; @@ -96,15 +95,6 @@ static DEFINE_SPINLOCK(acct_lock); static LIST_HEAD(acct_list); /* - * Called whenever the timer says to check the free space. - */ -static void acct_timeout(unsigned long x) -{ - struct bsd_acct_struct *acct = (struct bsd_acct_struct *)x; - acct->needcheck = 1; -} - -/* * Check the amount of free space and suspend/resume accordingly. */ static int check_free_space(struct bsd_acct_struct *acct, struct file *file) @@ -112,12 +102,12 @@ static int check_free_space(struct bsd_acct_struct *acct, struct file *file) struct kstatfs sbuf; int res; int act; - sector_t resume; - sector_t suspend; + u64 resume; + u64 suspend; spin_lock(&acct_lock); res = acct->active; - if (!file || !acct->needcheck) + if (!file || time_is_before_jiffies(acct->needcheck)) goto out; spin_unlock(&acct_lock); @@ -127,8 +117,8 @@ static int check_free_space(struct bsd_acct_struct *acct, struct file *file) suspend = sbuf.f_blocks * SUSPEND; resume = sbuf.f_blocks * RESUME; - sector_div(suspend, 100); - sector_div(resume, 100); + do_div(suspend, 100); + do_div(resume, 100); if (sbuf.f_bavail <= suspend) act = -1; @@ -160,10 +150,7 @@ static int check_free_space(struct bsd_acct_struct *acct, struct file *file) } } - del_timer(&acct->timer); - acct->needcheck = 0; - acct->timer.expires = jiffies + ACCT_TIMEOUT*HZ; - add_timer(&acct->timer); + acct->needcheck = jiffies + ACCT_TIMEOUT*HZ; res = acct->active; out: spin_unlock(&acct_lock); @@ -185,9 +172,7 @@ static void acct_file_reopen(struct bsd_acct_struct *acct, struct file *file, if (acct->file) { old_acct = acct->file; old_ns = acct->ns; - del_timer(&acct->timer); acct->active = 0; - acct->needcheck = 0; acct->file = NULL; acct->ns = NULL; list_del(&acct->list); @@ -195,13 +180,9 @@ static void acct_file_reopen(struct bsd_acct_struct *acct, struct file *file, if (file) { acct->file = file; acct->ns = ns; - acct->needcheck = 0; + acct->needcheck = jiffies + ACCT_TIMEOUT*HZ; acct->active = 1; list_add(&acct->list, &acct_list); - /* It's been deleted if it was used before so this is safe */ - setup_timer(&acct->timer, acct_timeout, (unsigned long)acct); - acct->timer.expires = jiffies + ACCT_TIMEOUT*HZ; - add_timer(&acct->timer); } if (old_acct) { mnt_unpin(old_acct->f_path.mnt); @@ -334,7 +315,7 @@ void acct_auto_close(struct super_block *sb) spin_lock(&acct_lock); restart: list_for_each_entry(acct, &acct_list, list) - if (acct->file && acct->file->f_path.mnt->mnt_sb == sb) { + if (acct->file && acct->file->f_path.dentry->d_sb == sb) { acct_file_reopen(acct, NULL, NULL); goto restart; } @@ -348,7 +329,6 @@ void acct_exit_ns(struct pid_namespace *ns) if (acct == NULL) return; - del_timer_sync(&acct->timer); spin_lock(&acct_lock); if (acct->file != NULL) acct_file_reopen(acct, NULL, NULL); @@ -498,7 +478,7 @@ static void do_acct_process(struct bsd_acct_struct *acct, * Fill the accounting struct with the needed info as recorded * by the different kernel functions. */ - memset((caddr_t)&ac, 0, sizeof(acct_t)); + memset(&ac, 0, sizeof(acct_t)); ac.ac_version = ACCT_VERSION | ACCT_BYTEORDER; strlcpy(ac.ac_comm, current->comm, sizeof(ac.ac_comm)); @@ -613,8 +593,8 @@ void acct_collect(long exitcode, int group_dead) pacct->ac_flag |= ACORE; if (current->flags & PF_SIGNALED) pacct->ac_flag |= AXSIG; - pacct->ac_utime = cputime_add(pacct->ac_utime, current->utime); - pacct->ac_stime = cputime_add(pacct->ac_stime, current->stime); + pacct->ac_utime += current->utime; + pacct->ac_stime += current->stime; pacct->ac_minflt += current->min_flt; pacct->ac_majflt += current->maj_flt; spin_unlock_irq(¤t->sighand->siglock); diff --git a/kernel/audit.c b/kernel/audit.c index 09fae2677a4..2c1d6ab7106 100644 --- a/kernel/audit.c +++ b/kernel/audit.c @@ -1260,12 +1260,13 @@ static void audit_log_vformat(struct audit_buffer *ab, const char *fmt, avail = audit_expand(ab, max_t(unsigned, AUDIT_BUFSIZ, 1+len-avail)); if (!avail) - goto out; + goto out_va_end; len = vsnprintf(skb_tail_pointer(skb), avail, fmt, args2); } - va_end(args2); if (len > 0) skb_put(skb, len); +out_va_end: + va_end(args2); out: return; } diff --git a/kernel/auditsc.c b/kernel/auditsc.c index 47b7fc1ea89..e7fe2b0d29b 100644 --- a/kernel/auditsc.c +++ b/kernel/auditsc.c @@ -210,12 +210,12 @@ struct audit_context { struct { uid_t uid; gid_t gid; - mode_t mode; + umode_t mode; u32 osid; int has_perm; uid_t perm_uid; gid_t perm_gid; - mode_t perm_mode; + umode_t perm_mode; unsigned long qbytes; } ipc; struct { @@ -234,7 +234,7 @@ struct audit_context { } mq_sendrecv; struct { int oflag; - mode_t mode; + umode_t mode; struct mq_attr attr; } mq_open; struct { @@ -308,7 +308,7 @@ static int audit_match_perm(struct audit_context *ctx, int mask) static int audit_match_filetype(struct audit_context *ctx, int which) { unsigned index = which & ~S_IFMT; - mode_t mode = which & S_IFMT; + umode_t mode = which & S_IFMT; if (unlikely(!ctx)) return 0; @@ -1249,7 +1249,7 @@ static void show_special(struct audit_context *context, int *call_panic) case AUDIT_IPC: { u32 osid = context->ipc.osid; - audit_log_format(ab, "ouid=%u ogid=%u mode=%#o", + audit_log_format(ab, "ouid=%u ogid=%u mode=%#ho", context->ipc.uid, context->ipc.gid, context->ipc.mode); if (osid) { char *ctx = NULL; @@ -1267,7 +1267,7 @@ static void show_special(struct audit_context *context, int *call_panic) ab = audit_log_start(context, GFP_KERNEL, AUDIT_IPC_SET_PERM); audit_log_format(ab, - "qbytes=%lx ouid=%u ogid=%u mode=%#o", + "qbytes=%lx ouid=%u ogid=%u mode=%#ho", context->ipc.qbytes, context->ipc.perm_uid, context->ipc.perm_gid, @@ -1278,7 +1278,7 @@ static void show_special(struct audit_context *context, int *call_panic) break; } case AUDIT_MQ_OPEN: { audit_log_format(ab, - "oflag=0x%x mode=%#o mq_flags=0x%lx mq_maxmsg=%ld " + "oflag=0x%x mode=%#ho mq_flags=0x%lx mq_maxmsg=%ld " "mq_msgsize=%ld mq_curmsgs=%ld", context->mq_open.oflag, context->mq_open.mode, context->mq_open.attr.mq_flags, @@ -1502,7 +1502,7 @@ static void audit_log_exit(struct audit_context *context, struct task_struct *ts if (n->ino != (unsigned long)-1) { audit_log_format(ab, " inode=%lu" - " dev=%02x:%02x mode=%#o" + " dev=%02x:%02x mode=%#ho" " ouid=%u ogid=%u rdev=%02x:%02x", n->ino, MAJOR(n->dev), @@ -2160,7 +2160,7 @@ int audit_set_loginuid(struct task_struct *task, uid_t loginuid) * @attr: queue attributes * */ -void __audit_mq_open(int oflag, mode_t mode, struct mq_attr *attr) +void __audit_mq_open(int oflag, umode_t mode, struct mq_attr *attr) { struct audit_context *context = current->audit_context; @@ -2260,7 +2260,7 @@ void __audit_ipc_obj(struct kern_ipc_perm *ipcp) * * Called only after audit_ipc_obj(). */ -void __audit_ipc_set_perm(unsigned long qbytes, uid_t uid, gid_t gid, mode_t mode) +void __audit_ipc_set_perm(unsigned long qbytes, uid_t uid, gid_t gid, umode_t mode) { struct audit_context *context = current->audit_context; diff --git a/kernel/cgroup.c b/kernel/cgroup.c index d9d5648f3cd..a5d3b5325f7 100644 --- a/kernel/cgroup.c +++ b/kernel/cgroup.c @@ -63,7 +63,24 @@ #include <linux/atomic.h> +/* + * cgroup_mutex is the master lock. Any modification to cgroup or its + * hierarchy must be performed while holding it. + * + * cgroup_root_mutex nests inside cgroup_mutex and should be held to modify + * cgroupfs_root of any cgroup hierarchy - subsys list, flags, + * release_agent_path and so on. Modifying requires both cgroup_mutex and + * cgroup_root_mutex. Readers can acquire either of the two. This is to + * break the following locking order cycle. + * + * A. cgroup_mutex -> cred_guard_mutex -> s_type->i_mutex_key -> namespace_sem + * B. namespace_sem -> cgroup_mutex + |