diff options
author | Ingo Molnar <mingo@elte.hu> | 2008-07-15 23:12:58 +0200 |
---|---|---|
committer | Ingo Molnar <mingo@elte.hu> | 2008-07-15 23:12:58 +0200 |
commit | 1e09481365ce248dbb4eb06dad70129bb5807037 (patch) | |
tree | c0cff5bef95c8b5e7486f144718ade9a06c284dc /kernel | |
parent | 3e2f69fdd1b00166e7d589bce56b2d36a9e74374 (diff) | |
parent | b9d2252c1e44fa83a4e65fdc9eb93db6297c55af (diff) |
Merge branch 'linus' into core/softlockup
Conflicts:
kernel/softlockup.c
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Diffstat (limited to 'kernel')
58 files changed, 10087 insertions, 797 deletions
diff --git a/kernel/Makefile b/kernel/Makefile index 1c9938addb9..f6328e16dfd 100644 --- a/kernel/Makefile +++ b/kernel/Makefile @@ -3,7 +3,7 @@ # obj-y = sched.o fork.o exec_domain.o panic.o printk.o profile.o \ - exit.o itimer.o time.o softirq.o resource.o \ + cpu.o exit.o itimer.o time.o softirq.o resource.o \ sysctl.o capability.o ptrace.o timer.o user.o \ signal.o sys.o kmod.o workqueue.o pid.o \ rcupdate.o extable.o params.o posix-timers.o \ @@ -11,6 +11,18 @@ obj-y = sched.o fork.o exec_domain.o panic.o printk.o profile.o \ hrtimer.o rwsem.o nsproxy.o srcu.o semaphore.o \ notifier.o ksysfs.o pm_qos_params.o sched_clock.o +CFLAGS_REMOVE_sched.o = -mno-spe + +ifdef CONFIG_FTRACE +# Do not trace debug files and internal ftrace files +CFLAGS_REMOVE_lockdep.o = -pg +CFLAGS_REMOVE_lockdep_proc.o = -pg +CFLAGS_REMOVE_mutex-debug.o = -pg +CFLAGS_REMOVE_rtmutex-debug.o = -pg +CFLAGS_REMOVE_cgroup-debug.o = -pg +CFLAGS_REMOVE_sched_clock.o = -pg +endif + obj-$(CONFIG_SYSCTL_SYSCALL_CHECK) += sysctl_check.o obj-$(CONFIG_STACKTRACE) += stacktrace.o obj-y += time/ @@ -27,7 +39,7 @@ obj-$(CONFIG_RT_MUTEXES) += rtmutex.o obj-$(CONFIG_DEBUG_RT_MUTEXES) += rtmutex-debug.o obj-$(CONFIG_RT_MUTEX_TESTER) += rtmutex-tester.o obj-$(CONFIG_GENERIC_ISA_DMA) += dma.o -obj-$(CONFIG_SMP) += cpu.o spinlock.o +obj-$(CONFIG_SMP) += spinlock.o obj-$(CONFIG_DEBUG_SPINLOCK) += spinlock.o obj-$(CONFIG_PROVE_LOCKING) += spinlock.o obj-$(CONFIG_UID16) += uid16.o @@ -69,6 +81,9 @@ obj-$(CONFIG_TASK_DELAY_ACCT) += delayacct.o obj-$(CONFIG_TASKSTATS) += taskstats.o tsacct.o obj-$(CONFIG_MARKERS) += marker.o obj-$(CONFIG_LATENCYTOP) += latencytop.o +obj-$(CONFIG_FTRACE) += trace/ +obj-$(CONFIG_TRACING) += trace/ +obj-$(CONFIG_SMP) += sched_cpupri.o ifneq ($(CONFIG_SCHED_NO_NO_OMIT_FRAME_POINTER),y) # According to Alan Modra <alan@linuxcare.com.au>, the -fno-omit-frame-pointer is diff --git a/kernel/audit.c b/kernel/audit.c index e8692a5748c..e092f1c0ce3 100644 --- a/kernel/audit.c +++ b/kernel/audit.c @@ -738,7 +738,7 @@ static int audit_receive_msg(struct sk_buff *skb, struct nlmsghdr *nlh) if (!audit_enabled && msg_type != AUDIT_USER_AVC) return 0; - err = audit_filter_user(&NETLINK_CB(skb), msg_type); + err = audit_filter_user(&NETLINK_CB(skb)); if (err == 1) { err = 0; if (msg_type == AUDIT_USER_TTY) { @@ -779,7 +779,7 @@ static int audit_receive_msg(struct sk_buff *skb, struct nlmsghdr *nlh) } /* fallthrough */ case AUDIT_LIST: - err = audit_receive_filter(nlh->nlmsg_type, NETLINK_CB(skb).pid, + err = audit_receive_filter(msg_type, NETLINK_CB(skb).pid, uid, seq, data, nlmsg_len(nlh), loginuid, sessionid, sid); break; @@ -798,7 +798,7 @@ static int audit_receive_msg(struct sk_buff *skb, struct nlmsghdr *nlh) } /* fallthrough */ case AUDIT_LIST_RULES: - err = audit_receive_filter(nlh->nlmsg_type, NETLINK_CB(skb).pid, + err = audit_receive_filter(msg_type, NETLINK_CB(skb).pid, uid, seq, data, nlmsg_len(nlh), loginuid, sessionid, sid); break; diff --git a/kernel/auditfilter.c b/kernel/auditfilter.c index 0e0bd27e651..98c50cc671b 100644 --- a/kernel/auditfilter.c +++ b/kernel/auditfilter.c @@ -1544,6 +1544,7 @@ static void audit_log_rule_change(uid_t loginuid, u32 sessionid, u32 sid, * @data: payload data * @datasz: size of payload data * @loginuid: loginuid of sender + * @sessionid: sessionid for netlink audit message * @sid: SE Linux Security ID of sender */ int audit_receive_filter(int type, int pid, int uid, int seq, void *data, @@ -1720,7 +1721,7 @@ static int audit_filter_user_rules(struct netlink_skb_parms *cb, return 1; } -int audit_filter_user(struct netlink_skb_parms *cb, int type) +int audit_filter_user(struct netlink_skb_parms *cb) { enum audit_state state = AUDIT_DISABLED; struct audit_entry *e; diff --git a/kernel/backtracetest.c b/kernel/backtracetest.c index d1a7605c5b8..a5e026bc45c 100644 --- a/kernel/backtracetest.c +++ b/kernel/backtracetest.c @@ -10,30 +10,73 @@ * of the License. */ +#include <linux/completion.h> +#include <linux/delay.h> +#include <linux/interrupt.h> #include <linux/module.h> #include <linux/sched.h> -#include <linux/delay.h> +#include <linux/stacktrace.h> + +static void backtrace_test_normal(void) +{ + printk("Testing a backtrace from process context.\n"); + printk("The following trace is a kernel self test and not a bug!\n"); -static struct timer_list backtrace_timer; + dump_stack(); +} -static void backtrace_test_timer(unsigned long data) +static DECLARE_COMPLETION(backtrace_work); + +static void backtrace_test_irq_callback(unsigned long data) +{ + dump_stack(); + complete(&backtrace_work); +} + +static DECLARE_TASKLET(backtrace_tasklet, &backtrace_test_irq_callback, 0); + +static void backtrace_test_irq(void) { printk("Testing a backtrace from irq context.\n"); printk("The following trace is a kernel self test and not a bug!\n"); - dump_stack(); + + init_completion(&backtrace_work); + tasklet_schedule(&backtrace_tasklet); + wait_for_completion(&backtrace_work); +} + +#ifdef CONFIG_STACKTRACE +static void backtrace_test_saved(void) +{ + struct stack_trace trace; + unsigned long entries[8]; + + printk("Testing a saved backtrace.\n"); + printk("The following trace is a kernel self test and not a bug!\n"); + + trace.nr_entries = 0; + trace.max_entries = ARRAY_SIZE(entries); + trace.entries = entries; + trace.skip = 0; + + save_stack_trace(&trace); + print_stack_trace(&trace, 0); +} +#else +static void backtrace_test_saved(void) +{ + printk("Saved backtrace test skipped.\n"); } +#endif + static int backtrace_regression_test(void) { printk("====[ backtrace testing ]===========\n"); - printk("Testing a backtrace from process context.\n"); - printk("The following trace is a kernel self test and not a bug!\n"); - dump_stack(); - init_timer(&backtrace_timer); - backtrace_timer.function = backtrace_test_timer; - mod_timer(&backtrace_timer, jiffies + 10); + backtrace_test_normal(); + backtrace_test_irq(); + backtrace_test_saved(); - msleep(10); printk("====[ end of backtrace testing ]====\n"); return 0; } diff --git a/kernel/capability.c b/kernel/capability.c index cfbe4429948..901e0fdc3ff 100644 --- a/kernel/capability.c +++ b/kernel/capability.c @@ -121,6 +121,27 @@ static int cap_validate_magic(cap_user_header_t header, unsigned *tocopy) * uninteresting and/or not to be changed. */ +/* + * Atomically modify the effective capabilities returning the original + * value. No permission check is performed here - it is assumed that the + * caller is permitted to set the desired effective capabilities. + */ +kernel_cap_t cap_set_effective(const kernel_cap_t pE_new) +{ + kernel_cap_t pE_old; + + spin_lock(&task_capability_lock); + + pE_old = current->cap_effective; + current->cap_effective = pE_new; + + spin_unlock(&task_capability_lock); + + return pE_old; +} + +EXPORT_SYMBOL(cap_set_effective); + /** * sys_capget - get the capabilities of a given process. * @header: pointer to struct that contains capability version and diff --git a/kernel/cpu.c b/kernel/cpu.c index c77bc3a1c72..b11f06dc149 100644 --- a/kernel/cpu.c +++ b/kernel/cpu.c @@ -15,6 +15,28 @@ #include <linux/stop_machine.h> #include <linux/mutex.h> +/* + * Represents all cpu's present in the system + * In systems capable of hotplug, this map could dynamically grow + * as new cpu's are detected in the system via any platform specific + * method, such as ACPI for e.g. + */ +cpumask_t cpu_present_map __read_mostly; +EXPORT_SYMBOL(cpu_present_map); + +#ifndef CONFIG_SMP + +/* + * Represents all cpu's that are currently online. + */ +cpumask_t cpu_online_map __read_mostly = CPU_MASK_ALL; +EXPORT_SYMBOL(cpu_online_map); + +cpumask_t cpu_possible_map __read_mostly = CPU_MASK_ALL; +EXPORT_SYMBOL(cpu_possible_map); + +#else /* CONFIG_SMP */ + /* Serializes the updates to cpu_online_map, cpu_present_map */ static DEFINE_MUTEX(cpu_add_remove_lock); @@ -403,3 +425,5 @@ out: cpu_maps_update_done(); } #endif /* CONFIG_PM_SLEEP_SMP */ + +#endif /* CONFIG_SMP */ diff --git a/kernel/cpuset.c b/kernel/cpuset.c index 9fceb97e989..459d601947a 100644 --- a/kernel/cpuset.c +++ b/kernel/cpuset.c @@ -1194,6 +1194,15 @@ static int cpuset_can_attach(struct cgroup_subsys *ss, if (cpus_empty(cs->cpus_allowed) || nodes_empty(cs->mems_allowed)) return -ENOSPC; + if (tsk->flags & PF_THREAD_BOUND) { + cpumask_t mask; + + mutex_lock(&callback_mutex); + mask = cs->cpus_allowed; + mutex_unlock(&callback_mutex); + if (!cpus_equal(tsk->cpus_allowed, mask)) + return -EINVAL; + } return security_task_setscheduler(tsk, 0, NULL); } @@ -1207,11 +1216,14 @@ static void cpuset_attach(struct cgroup_subsys *ss, struct mm_struct *mm; struct cpuset *cs = cgroup_cs(cont); struct cpuset *oldcs = cgroup_cs(oldcont); + int err; mutex_lock(&callback_mutex); guarantee_online_cpus(cs, &cpus); - set_cpus_allowed_ptr(tsk, &cpus); + err = set_cpus_allowed_ptr(tsk, &cpus); mutex_unlock(&callback_mutex); + if (err) + return; from = oldcs->mems_allowed; to = cs->mems_allowed; @@ -1882,7 +1894,7 @@ static void scan_for_empty_cpusets(const struct cpuset *root) * in order to minimize text size. */ -static void common_cpu_mem_hotplug_unplug(void) +static void common_cpu_mem_hotplug_unplug(int rebuild_sd) { cgroup_lock(); @@ -1894,7 +1906,8 @@ static void common_cpu_mem_hotplug_unplug(void) * Scheduler destroys domains on hotplug events. * Rebuild them based on the current settings. */ - rebuild_sched_domains(); + if (rebuild_sd) + rebuild_sched_domains(); cgroup_unlock(); } @@ -1912,11 +1925,22 @@ static void common_cpu_mem_hotplug_unplug(void) static int cpuset_handle_cpuhp(struct notifier_block *unused_nb, unsigned long phase, void *unused_cpu) { - if (phase == CPU_DYING || phase == CPU_DYING_FROZEN) + switch (phase) { + case CPU_UP_CANCELED: + case CPU_UP_CANCELED_FROZEN: + case CPU_DOWN_FAILED: + case CPU_DOWN_FAILED_FROZEN: + case CPU_ONLINE: + case CPU_ONLINE_FROZEN: + case CPU_DEAD: + case CPU_DEAD_FROZEN: + common_cpu_mem_hotplug_unplug(1); + break; + default: return NOTIFY_DONE; + } - common_cpu_mem_hotplug_unplug(); - return 0; + return NOTIFY_OK; } #ifdef CONFIG_MEMORY_HOTPLUG @@ -1929,7 +1953,7 @@ static int cpuset_handle_cpuhp(struct notifier_block *unused_nb, void cpuset_track_online_nodes(void) { - common_cpu_mem_hotplug_unplug(); + common_cpu_mem_hotplug_unplug(0); } #endif diff --git a/kernel/exit.c b/kernel/exit.c index 8f6185e69b6..ceb25878283 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -13,6 +13,7 @@ #include <linux/personality.h> #include <linux/tty.h> #include <linux/mnt_namespace.h> +#include <linux/iocontext.h> #include <linux/key.h> #include <linux/security.h> #include <linux/cpu.h> diff --git a/kernel/fork.c b/kernel/fork.c index 19908b26cf8..4bd2f516401 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -23,6 +23,7 @@ #include <linux/sem.h> #include <linux/file.h> #include <linux/fdtable.h> +#include <linux/iocontext.h> #include <linux/key.h> #include <linux/binfmts.h> #include <linux/mman.h> @@ -909,7 +910,7 @@ static struct task_struct *copy_process(unsigned long clone_flags, rt_mutex_init_task(p); -#ifdef CONFIG_TRACE_IRQFLAGS +#ifdef CONFIG_PROVE_LOCKING DEBUG_LOCKS_WARN_ON(!p->hardirqs_enabled); DEBUG_LOCKS_WARN_ON(!p->softirqs_enabled); #endif diff --git a/kernel/hrtimer.c b/kernel/hrtimer.c index 421be5fe5cc..2913a8bff61 100644 --- a/kernel/hrtimer.c +++ b/kernel/hrtimer.c @@ -300,11 +300,10 @@ EXPORT_SYMBOL_GPL(ktime_sub_ns); */ u64 ktime_divns(const ktime_t kt, s64 div) { - u64 dclc, inc, dns; + u64 dclc; int sft = 0; - dclc = dns = ktime_to_ns(kt); - inc = div; + dclc = ktime_to_ns(kt); /* Make sure the divisor is less than 2^32: */ while (div >> 32) { sft++; @@ -632,8 +631,6 @@ void clock_was_set(void) */ void hres_timers_resume(void) { - WARN_ON_ONCE(num_online_cpus() > 1); - /* Retrigger the CPU local events: */ retrigger_next_event(NULL); } @@ -1003,10 +1000,18 @@ hrtimer_start(struct hrtimer *timer, ktime_t tim, const enum hrtimer_mode mode) */ raise = timer->state == HRTIMER_STATE_PENDING; + /* + * We use preempt_disable to prevent this task from migrating after + * setting up the softirq and raising it. Otherwise, if me migrate + * we will raise the softirq on the wrong CPU. + */ + preempt_disable(); + unlock_hrtimer_base(timer, &flags); if (raise) hrtimer_raise_softirq(); + preempt_enable(); return ret; } @@ -1078,7 +1083,7 @@ ktime_t hrtimer_get_remaining(const struct hrtimer *timer) } EXPORT_SYMBOL_GPL(hrtimer_get_remaining); -#if defined(CONFIG_NO_IDLE_HZ) || defined(CONFIG_NO_HZ) +#ifdef CONFIG_NO_HZ /** * hrtimer_get_next_event - get the time until next expiry event * @@ -1669,7 +1674,7 @@ void __init hrtimers_init(void) (void *)(long)smp_processor_id()); register_cpu_notifier(&hrtimers_nb); #ifdef CONFIG_HIGH_RES_TIMERS - open_softirq(HRTIMER_SOFTIRQ, run_hrtimer_softirq, NULL); + open_softirq(HRTIMER_SOFTIRQ, run_hrtimer_softirq); #endif } diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c index 46d6611a33b..77a51be3601 100644 --- a/kernel/irq/manage.c +++ b/kernel/irq/manage.c @@ -17,6 +17,8 @@ #ifdef CONFIG_SMP +cpumask_t irq_default_affinity = CPU_MASK_ALL; + /** * synchronize_irq - wait for pending IRQ handlers (on other CPUs) * @irq: interrupt number to wait for @@ -95,6 +97,27 @@ int irq_set_affinity(unsigned int irq, cpumask_t cpumask) return 0; } +#ifndef CONFIG_AUTO_IRQ_AFFINITY +/* + * Generic version of the affinity autoselector. + */ +int irq_select_affinity(unsigned int irq) +{ + cpumask_t mask; + + if (!irq_can_set_affinity(irq)) + return 0; + + cpus_and(mask, cpu_online_map, irq_default_affinity); + + irq_desc[irq].affinity = mask; + irq_desc[irq].chip->set_affinity(irq, mask); + + set_balance_irq_affinity(irq, mask); + return 0; +} +#endif + #endif /** @@ -354,7 +377,7 @@ int setup_irq(unsigned int irq, struct irqaction *new) /* Setup the type (level, edge polarity) if configured: */ if (new->flags & IRQF_TRIGGER_MASK) { - if (desc->chip && desc->chip->set_type) + if (desc->chip->set_type) desc->chip->set_type(irq, new->flags & IRQF_TRIGGER_MASK); else @@ -364,8 +387,7 @@ int setup_irq(unsigned int irq, struct irqaction *new) */ printk(KERN_WARNING "No IRQF_TRIGGER set_type " "function for IRQ %d (%s)\n", irq, - desc->chip ? desc->chip->name : - "unknown"); + desc->chip->name); } else compat_irq_chip_set_default_handler(desc); @@ -382,6 +404,9 @@ int setup_irq(unsigned int irq, struct irqaction *new) } else /* Undo nested disables: */ desc->depth = 1; + + /* Set default affinity mask once everything is setup */ + irq_select_affinity(irq); } /* Reset broken irq detection when installing new handler */ desc->irq_count = 0; @@ -571,8 +596,6 @@ int request_irq(unsigned int irq, irq_handler_t handler, action->next = NULL; action->dev_id = dev_id; - select_smp_affinity(irq); - #ifdef CONFIG_DEBUG_SHIRQ if (irqflags & IRQF_SHARED) { /* diff --git a/kernel/irq/proc.c b/kernel/irq/proc.c index c2f2ccb0549..6c6d35d68ee 100644 --- a/kernel/irq/proc.c +++ b/kernel/irq/proc.c @@ -44,7 +44,7 @@ static int irq_affinity_write_proc(struct file *file, const char __user *buffer, unsigned long count, void *data) { unsigned int irq = (int)(long)data, full_count = count, err; - cpumask_t new_value, tmp; + cpumask_t new_value; if (!irq_desc[irq].chip->set_affinity || no_irq_affinity || irq_balancing_disabled(irq)) @@ -62,17 +62,51 @@ static int irq_affinity_write_proc(struct file *file, const char __user *buffer, * way to make the system unusable accidentally :-) At least * one online CPU still has to be targeted. */ - cpus_and(tmp, new_value, cpu_online_map); - if (cpus_empty(tmp)) + if (!cpus_intersects(new_value, cpu_online_map)) /* Special case for empty set - allow the architecture code to set default SMP affinity. */ - return select_smp_affinity(irq) ? -EINVAL : full_count; + return irq_select_affinity(irq) ? -EINVAL : full_count; irq_set_affinity(irq, new_value); return full_count; } +static int default_affinity_read(char *page, char **start, off_t off, + int count, int *eof, void *data) +{ + int len = cpumask_scnprintf(page, count, irq_default_affinity); + if (count - len < 2) + return -EINVAL; + len += sprintf(page + len, "\n"); + return len; +} + +static int default_affinity_write(struct file *file, const char __user *buffer, + unsigned long count, void *data) +{ + unsigned int full_count = count, err; + cpumask_t new_value; + + err = cpumask_parse_user(buffer, count, new_value); + if (err) + return err; + + if (!is_affinity_mask_valid(new_value)) + return -EINVAL; + + /* + * Do not allow disabling IRQs completely - it's a too easy + * way to make the system unusable accidentally :-) At least + * one online CPU still has to be targeted. + */ + if (!cpus_intersects(new_value, cpu_online_map)) + return -EINVAL; + + irq_default_affinity = new_value; + + return full_count; +} #endif static int irq_spurious_read(char *page, char **start, off_t off, @@ -171,6 +205,21 @@ void unregister_handler_proc(unsigned int irq, struct irqaction *action) remove_proc_entry(action->dir->name, irq_desc[irq].dir); } +void register_default_affinity_proc(void) +{ +#ifdef CONFIG_SMP + struct proc_dir_entry *entry; + + /* create /proc/irq/default_smp_affinity */ + entry = create_proc_entry("default_smp_affinity", 0600, root_irq_dir); + if (entry) { + entry->data = NULL; + entry->read_proc = default_affinity_read; + entry->write_proc = default_affinity_write; + } +#endif +} + void init_irq_proc(void) { int i; @@ -180,6 +229,8 @@ void init_irq_proc(void) if (!root_irq_dir) return; + register_default_affinity_proc(); + /* * Create entries for all existing IRQs. */ diff --git a/kernel/kprobes.c b/kernel/kprobes.c index d4998f81e22..1485ca8d0e0 100644 --- a/kernel/kprobes.c +++ b/kernel/kprobes.c @@ -79,7 +79,7 @@ static DEFINE_PER_CPU(struct kprobe *, kprobe_instance) = NULL; * * For such cases, we now have a blacklist */ -struct kprobe_blackpoint kprobe_blacklist[] = { +static struct kprobe_blackpoint kprobe_blacklist[] = { {"preempt_schedule",}, {NULL} /* Terminator */ }; |