diff options
Diffstat (limited to 'kernel')
46 files changed, 2421 insertions, 1105 deletions
diff --git a/kernel/async.c b/kernel/async.c index f565891f2c9..968ef9457d4 100644 --- a/kernel/async.c +++ b/kernel/async.c @@ -49,6 +49,7 @@ asynchronous and synchronous parts of the kernel. */ #include <linux/async.h> +#include <linux/bug.h> #include <linux/module.h> #include <linux/wait.h> #include <linux/sched.h> @@ -387,20 +388,11 @@ static int async_manager_thread(void *unused) static int __init async_init(void) { - if (async_enabled) - if (IS_ERR(kthread_run(async_manager_thread, NULL, - "async/mgr"))) - async_enabled = 0; - return 0; -} + async_enabled = + !IS_ERR(kthread_run(async_manager_thread, NULL, "async/mgr")); -static int __init setup_async(char *str) -{ - async_enabled = 1; - return 1; + WARN_ON(!async_enabled); + return 0; } -__setup("fastboot", setup_async); - - core_initcall(async_init); diff --git a/kernel/cgroup.c b/kernel/cgroup.c index 9edb5c4b79b..c500ca7239b 100644 --- a/kernel/cgroup.c +++ b/kernel/cgroup.c @@ -1071,7 +1071,8 @@ static int cgroup_get_sb(struct file_system_type *fs_type, mutex_unlock(&cgroup_mutex); } - return simple_set_mnt(mnt, sb); + simple_set_mnt(mnt, sb); + return 0; free_cg_links: free_cg_links(&tmp_cg_links); @@ -1627,7 +1628,7 @@ static struct inode_operations cgroup_dir_inode_operations = { static int cgroup_create_file(struct dentry *dentry, int mode, struct super_block *sb) { - static struct dentry_operations cgroup_dops = { + static const struct dentry_operations cgroup_dops = { .d_iput = cgroup_diput, }; diff --git a/kernel/cpu.c b/kernel/cpu.c index 79e40f00dcb..395b6974dc8 100644 --- a/kernel/cpu.c +++ b/kernel/cpu.c @@ -281,7 +281,7 @@ int __ref cpu_down(unsigned int cpu) goto out; } - cpu_clear(cpu, cpu_active_map); + set_cpu_active(cpu, false); /* * Make sure the all cpus did the reschedule and are not @@ -296,7 +296,7 @@ int __ref cpu_down(unsigned int cpu) err = _cpu_down(cpu, 0); if (cpu_online(cpu)) - cpu_set(cpu, cpu_active_map); + set_cpu_active(cpu, true); out: cpu_maps_update_done(); @@ -333,7 +333,7 @@ static int __cpuinit _cpu_up(unsigned int cpu, int tasks_frozen) goto out_notify; BUG_ON(!cpu_online(cpu)); - cpu_set(cpu, cpu_active_map); + set_cpu_active(cpu, true); /* Now call notifier in preparation. */ raw_notifier_call_chain(&cpu_chain, CPU_ONLINE | mod, hcpu); diff --git a/kernel/fork.c b/kernel/fork.c index 6715ebc3761..47c15840a38 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -284,7 +284,7 @@ static int dup_mmap(struct mm_struct *mm, struct mm_struct *oldmm) mm->free_area_cache = oldmm->mmap_base; mm->cached_hole_size = ~0UL; mm->map_count = 0; - cpus_clear(mm->cpu_vm_mask); + cpumask_clear(mm_cpumask(mm)); mm->mm_rb = RB_ROOT; rb_link = &mm->mm_rb.rb_node; rb_parent = NULL; diff --git a/kernel/futex.c b/kernel/futex.c index 438701adce2..6b50a024bca 100644 --- a/kernel/futex.c +++ b/kernel/futex.c @@ -114,7 +114,9 @@ struct futex_q { }; /* - * Split the global futex_lock into every hash list lock. + * Hash buckets are shared by all the futex_keys that hash to the same + * location. Each key may have multiple futex_q structures, one for each task + * waiting on a futex. */ struct futex_hash_bucket { spinlock_t lock; @@ -189,8 +191,7 @@ static void drop_futex_key_refs(union futex_key *key) /** * get_futex_key - Get parameters which are the keys for a futex. * @uaddr: virtual address of the futex - * @shared: NULL for a PROCESS_PRIVATE futex, - * ¤t->mm->mmap_sem for a PROCESS_SHARED futex + * @fshared: 0 for a PROCESS_PRIVATE futex, 1 for PROCESS_SHARED * @key: address where result is stored. * * Returns a negative error code or 0 @@ -200,9 +201,7 @@ static void drop_futex_key_refs(union futex_key *key) * offset_within_page). For private mappings, it's (uaddr, current->mm). * We can usually work out the index without swapping in the page. * - * fshared is NULL for PROCESS_PRIVATE futexes - * For other futexes, it points to ¤t->mm->mmap_sem and - * caller must have taken the reader lock. but NOT any spinlocks. + * lock_page() might sleep, the caller should not hold a spinlock. */ static int get_futex_key(u32 __user *uaddr, int fshared, union futex_key *key) { @@ -299,41 +298,6 @@ static int get_futex_value_locked(u32 *dest, u32 __user *from) return ret ? -EFAULT : 0; } -/* - * Fault handling. - */ -static int futex_handle_fault(unsigned long address, int attempt) -{ - struct vm_area_struct * vma; - struct mm_struct *mm = current->mm; - int ret = -EFAULT; - - if (attempt > 2) - return ret; - - down_read(&mm->mmap_sem); - vma = find_vma(mm, address); - if (vma && address >= vma->vm_start && - (vma->vm_flags & VM_WRITE)) { - int fault; - fault = handle_mm_fault(mm, vma, address, 1); - if (unlikely((fault & VM_FAULT_ERROR))) { -#if 0 - /* XXX: let's do this when we verify it is OK */ - if (ret & VM_FAULT_OOM) - ret = -ENOMEM; -#endif - } else { - ret = 0; - if (fault & VM_FAULT_MAJOR) - current->maj_flt++; - else - current->min_flt++; - } - } - up_read(&mm->mmap_sem); - return ret; -} /* * PI code: @@ -589,10 +553,9 @@ static void wake_futex(struct futex_q *q) * The waiting task can free the futex_q as soon as this is written, * without taking any locks. This must come last. * - * A memory barrier is required here to prevent the following store - * to lock_ptr from getting ahead of the wakeup. Clearing the lock - * at the end of wake_up_all() does not prevent this store from - * moving. + * A memory barrier is required here to prevent the following store to + * lock_ptr from getting ahead of the wakeup. Clearing the lock at the + * end of wake_up() does not prevent this store from moving. */ smp_wmb(); q->lock_ptr = NULL; @@ -692,9 +655,16 @@ double_lock_hb(struct futex_hash_bucket *hb1, struct futex_hash_bucket *hb2) } } +static inline void +double_unlock_hb(struct futex_hash_bucket *hb1, struct futex_hash_bucket *hb2) +{ + spin_unlock(&hb1->lock); + if (hb1 != hb2) + spin_unlock(&hb2->lock); +} + /* - * Wake up all waiters hashed on the physical page that is mapped - * to this virtual address: + * Wake up waiters matching bitset queued on this futex (uaddr). */ static int futex_wake(u32 __user *uaddr, int fshared, int nr_wake, u32 bitset) { @@ -750,9 +720,9 @@ futex_wake_op(u32 __user *uaddr1, int fshared, u32 __user *uaddr2, struct futex_hash_bucket *hb1, *hb2; struct plist_head *head; struct futex_q *this, *next; - int ret, op_ret, attempt = 0; + int ret, op_ret; -retryfull: +retry: ret = get_futex_key(uaddr1, fshared, &key1); if (unlikely(ret != 0)) goto out; @@ -763,16 +733,13 @@ retryfull: hb1 = hash_futex(&key1); hb2 = hash_futex(&key2); -retry: double_lock_hb(hb1, hb2); - +retry_private: op_ret = futex_atomic_op_inuser(op, uaddr2); if (unlikely(op_ret < 0)) { u32 dummy; - spin_unlock(&hb1->lock); - if (hb1 != hb2) - spin_unlock(&hb2->lock); + double_unlock_hb(hb1, hb2); #ifndef CONFIG_MMU /* @@ -788,26 +755,16 @@ retry: goto out_put_keys; } - /* - * futex_atomic_op_inuser needs to both read and write - * *(int __user *)uaddr2, but we can't modify it - * non-atomically. Therefore, if get_user below is not - * enough, we need to handle the fault ourselves, while - * still holding the mmap_sem. - */ - if (attempt++) { - ret = futex_handle_fault((unsigned long)uaddr2, - attempt); - if (ret) - goto out_put_keys; - goto retry; - } - ret = get_user(dummy, uaddr2); if (ret) - return ret; + goto out_put_keys; + + if (!fshared) + goto retry_private; - goto retryfull; + put_futex_key(fshared, &key2); + put_futex_key(fshared, &key1); + goto retry; } head = &hb1->chain; @@ -834,9 +791,7 @@ retry: ret += op_ret; } - spin_unlock(&hb1->lock); - if (hb1 != hb2) - spin_unlock(&hb2->lock); + double_unlock_hb(hb1, hb2); out_put_keys: put_futex_key(fshared, &key2); out_put_key1: @@ -869,6 +824,7 @@ retry: hb1 = hash_futex(&key1); hb2 = hash_futex(&key2); +retry_private: double_lock_hb(hb1, hb2); if (likely(cmpval != NULL)) { @@ -877,16 +833,18 @@ retry: ret = get_futex_value_locked(&curval, uaddr1); if (unlikely(ret)) { - spin_unlock(&hb1->lock); - if (hb1 != hb2) - spin_unlock(&hb2->lock); + double_unlock_hb(hb1, hb2); ret = get_user(curval, uaddr1); + if (ret) + goto out_put_keys; - if (!ret) - goto retry; + if (!fshared) + goto retry_private; - goto out_put_keys; + put_futex_key(fshared, &key2); + put_futex_key(fshared, &key1); + goto retry; } if (curval != *cmpval) { ret = -EAGAIN; @@ -923,9 +881,7 @@ retry: } out_unlock: - spin_unlock(&hb1->lock); - if (hb1 != hb2) - spin_unlock(&hb2->lock); + double_unlock_hb(hb1, hb2); /* drop_futex_key_refs() must be called outside the spinlocks. */ while (--drop_count >= 0) @@ -1063,7 +1019,7 @@ static int fixup_pi_state_owner(u32 __user *uaddr, struct futex_q *q, struct futex_pi_state *pi_state = q->pi_state; struct task_struct *oldowner = pi_state->owner; u32 uval, curval, newval; - int ret, attempt = 0; + int ret; /* Owner died? */ if (!pi_state->owner) @@ -1076,11 +1032,9 @@ static int fixup_pi_state_owner(u32 __user *uaddr, struct futex_q *q, * in the user space variable. This must be atomic as we have * to preserve the owner died bit here. * - * Note: We write the user space value _before_ changing the - * pi_state because we can fault here. Imagine swapped out - * pages or a fork, which was running right before we acquired - * mmap_sem, that marked all the anonymous memory readonly for - * cow. + * Note: We write the user space value _before_ changing the pi_state + * because we can fault here. Imagine swapped out pages or a fork + * that marked all the anonymous memory readonly for cow. * * Modifying pi_state _before_ the user space value would * leave the pi_state in an inconsistent state when we fault @@ -1136,7 +1090,7 @@ retry: handle_fault: spin_unlock(q->lock_ptr); - ret = futex_handle_fault((unsigned long)uaddr, attempt++); + ret = get_user(uval, uaddr); spin_lock(q->lock_ptr); @@ -1185,10 +1139,11 @@ retry: if (unlikely(ret != 0)) goto out; +retry_private: hb = queue_lock(&q); /* - * Access the page AFTER the futex is queued. + * Access the page AFTER the hash-bucket is locked. * Order is important: * * Userspace waiter: val = var; if (cond(val)) futex_wait(&var, val); @@ -1204,20 +1159,23 @@ retry: * a wakeup when *uaddr != val on entry to the syscall. This is * rare, but normal. * - * for shared futexes, we hold the mmap semaphore, so the mapping + * For shared futexes, we hold the mmap semaphore, so the mapping * cannot have changed since we looked it up in get_futex_key. */ ret = get_futex_value_locked(&uval, uaddr); if (unlikely(ret)) { queue_unlock(&q, hb); - put_futex_key(fshared, &q.key); ret = get_user(uval, uaddr); + if (ret) + goto out_put_key; - if (!ret) - goto retry; - goto out; + if (!fshared) + goto retry_private; + + put_futex_key(fshared, &q.key); + goto retry; } ret = -EWOULDBLOCK; if (unlikely(uval != val)) { @@ -1248,16 +1206,13 @@ retry: if (!abs_time) schedule(); else { - unsigned long slack; - slack = current->timer_slack_ns; - if (rt_task(current)) - slack = 0; hrtimer_init_on_stack(&t.timer, clockrt ? CLOCK_REALTIME : CLOCK_MONOTONIC, HRTIMER_MODE_ABS); hrtimer_init_sleeper(&t, current); - hrtimer_set_expires_range_ns(&t.timer, *abs_time, slack); + hrtimer_set_expires_range_ns(&t.timer, *abs_time, + current->timer_slack_ns); hrtimer_start_expires(&t.timer, HRTIMER_MODE_ABS); if (!hrtimer_active(&t.timer)) @@ -1354,7 +1309,7 @@ static int futex_lock_pi(u32 __user *uaddr, int fshared, struct futex_hash_bucket *hb; u32 uval, newval, curval; struct futex_q q; - int ret, lock_taken, ownerdied = 0, attempt = 0; + int ret, lock_taken, ownerdied = 0; if (refill_pi_state_cache()) return -ENOMEM; @@ -1374,7 +1329,7 @@ retry: if (unlikely(ret != 0)) goto out; -retry_unlocked: +retry_private: hb = queue_lock(&q); retry_locked: @@ -1458,6 +1413,7 @@ retry_locked: * exit to complete. */ queue_unlock(&q, hb); + put_futex_key(fshared, &q.key); cond_resched(); goto retry; @@ -1564,6 +1520,13 @@ retry_locked: } } + /* + * If fixup_pi_state_owner() faulted and was unable to handle the + * fault, unlock it and return the fault to userspace. + */ + if (ret && (rt_mutex_owner(&q.pi_state->pi_mutex) == current)) + rt_mutex_unlock(&q.pi_state->pi_mutex); + /* Unqueue and drop the lock */ unqueue_me_pi(&q); @@ -1591,22 +1554,18 @@ uaddr_faulted: */ queue_unlock(&q, hb); - if (attempt++) { - ret = futex_handle_fault((unsigned long)uaddr, attempt); - if (ret) - goto out_put_key; - goto retry_unlocked; - } - ret = get_user(uval, uaddr); - if (!ret) - goto retry; + if (ret) + goto out_put_key; - if (to) - destroy_hrtimer_on_stack(&to->timer); - return ret; + if (!fshared) + goto retry_private; + + put_futex_key(fshared, &q.key); + goto retry; } + /* * Userspace attempted a TID -> 0 atomic transition, and failed. * This is the in-kernel slowpath: we look up the PI state (if any), @@ -1619,7 +1578,7 @@ static int futex_unlock_pi(u32 __user *uaddr, int fshared) u32 uval; struct plist_head *head; union futex_key key = FUTEX_KEY_INIT; - int ret, attempt = 0; + int ret; retry: if (get_user(uval, uaddr)) @@ -1635,7 +1594,6 @@ retry: goto out; hb = hash_futex(&key); -retry_unlocked: spin_lock(&hb->lock); /* @@ -1700,14 +1658,7 @@ pi_faulted: * we have to drop the mmap_sem in order to call get_user(). */ spin_unlock(&hb->lock); - - if (attempt++) { - ret = futex_handle_fault((unsigned long)uaddr, attempt); - if (ret) - goto out; - uval = 0; - goto retry_unlocked; - } + put_futex_key(fshared, &key); ret = get_user(uval, uaddr); if (!ret) diff --git a/kernel/irq/Makefile b/kernel/irq/Makefile index 4dd5b1edac9..3394f8f5296 100644 --- a/kernel/irq/Makefile +++ b/kernel/irq/Makefile @@ -4,3 +4,4 @@ obj-$(CONFIG_GENERIC_IRQ_PROBE) += autoprobe.o obj-$(CONFIG_PROC_FS) += proc.o obj-$(CONFIG_GENERIC_PENDING_IRQ) += migration.o obj-$(CONFIG_NUMA_MIGRATE_IRQ_DESC) += numa_migrate.o +obj-$(CONFIG_PM_SLEEP) += pm.o diff --git a/kernel/irq/chip.c b/kernel/irq/chip.c index 122fef4b0bd..c687ba4363f 100644 --- a/kernel/irq/chip.c +++ b/kernel/irq/chip.c @@ -81,6 +81,7 @@ void dynamic_irq_cleanup(unsigned int irq) desc->handle_irq = handle_bad_irq; desc->chip = &no_irq_chip; desc->name = NULL; + clear_kstat_irqs(desc); spin_unlock_irqrestore(&desc->lock, flags); } @@ -293,7 +294,8 @@ static inline void mask_ack_irq(struct irq_desc *desc, int irq) desc->chip->mask_ack(irq); else { desc->chip->mask(irq); - desc->chip->ack(irq); + if (desc->chip->ack) + desc->chip->ack(irq); } } @@ -479,7 +481,8 @@ handle_edge_irq(unsigned int irq, struct irq_desc *desc) kstat_incr_irqs_this_cpu(irq, desc); /* Start handling the irq */ - desc->chip->ack(irq); + if (desc->chip->ack) + desc->chip->ack(irq); desc = irq_remap_to_desc(irq, desc); /* Mark the IRQ currently in progress.*/ diff --git a/kernel/irq/handle.c b/kernel/irq/handle.c index 412370ab9a3..343acecae62 100644 --- a/kernel/irq/handle.c +++ b/kernel/irq/handle.c @@ -83,19 +83,21 @@ static struct irq_desc irq_desc_init = { void init_kstat_irqs(struct irq_desc *desc, int cpu, int nr) { - unsigned long bytes; - char *ptr; int node; - - /* Compute how many bytes we need per irq and allocate them */ - bytes = nr * sizeof(unsigned int); + void *ptr; node = cpu_to_node(cpu); - ptr = kzalloc_node(bytes, GFP_ATOMIC, node); - printk(KERN_DEBUG " alloc kstat_irqs on cpu %d node %d\n", cpu, node); + ptr = kzalloc_node(nr * sizeof(*desc->kstat_irqs), GFP_ATOMIC, node); - if (ptr) - desc->kstat_irqs = (unsigned int *)ptr; + /* + * don't overwite if can not get new one + * init_copy_kstat_irqs() could still use old one + */ + if (ptr) { + printk(KERN_DEBUG " alloc kstat_irqs on cpu %d node %d\n", + cpu, node); + desc->kstat_irqs = ptr; + } } static void init_one_irq_desc(int irq, struct irq_desc *desc, int cpu) @@ -238,6 +240,7 @@ struct irq_desc irq_desc[NR_IRQS] __cacheline_aligned_in_smp = { } }; +static unsigned int kstat_irqs_all[NR_IRQS][NR_CPUS]; int __init early_irq_init(void) { struct irq_desc *desc; @@ -254,6 +257,7 @@ int __init early_irq_init(void) for (i = 0; i < count; i++) { desc[i].irq = i; init_alloc_desc_masks(&desc[i], 0, true); + desc[i].kstat_irqs = kstat_irqs_all[i]; } return arch_early_irq_init(); } @@ -269,6 +273,11 @@ struct irq_desc *irq_to_desc_alloc_cpu(unsigned int irq, int cpu) } #endif /* !CONFIG_SPARSE_IRQ */ +void clear_kstat_irqs(struct irq_desc *desc) +{ + memset(desc->kstat_irqs, 0, nr_cpu_ids * sizeof(*(desc->kstat_irqs))); +} + /* * What should we do if we get a hw irq event on an illegal vector? * Each architecture has to answer this themself. @@ -345,6 +354,8 @@ irqreturn_t handle_IRQ_event(unsigned int irq, struct irqaction *action) irqreturn_t ret, retval = IRQ_NONE; unsigned int status = 0; + WARN_ONCE(!in_irq(), "BUG: IRQ handler called from non-hardirq context!"); + if (!(action->flags & IRQF_DISABLED)) local_irq_enable_in_hardirq(); @@ -366,6 +377,11 @@ irqreturn_t handle_IRQ_event(unsigned int irq, struct irqaction *action) } #ifndef CONFIG_GENERIC_HARDIRQS_NO__DO_IRQ + +#ifdef CONFIG_ENABLE_WARN_DEPRECATED +# warning __do_IRQ is deprecated. Please convert to proper flow handlers +#endif + /** * __do_IRQ - original all in one highlevel IRQ handler * @irq: the interrupt number @@ -486,12 +502,10 @@ void early_init_irq_lock_class(void) } } -#ifdef CONFIG_SPARSE_IRQ unsigned int kstat_irqs_cpu(unsigned int irq, int cpu) { struct irq_desc *desc = irq_to_desc(irq); return desc ? desc->kstat_irqs[cpu] : 0; } -#endif EXPORT_SYMBOL(kstat_irqs_cpu); diff --git a/kernel/irq/internals.h b/kernel/irq/internals.h index 40416a81a0f..01ce20eab38 100644 --- a/kernel/irq/internals.h +++ b/kernel/irq/internals.h @@ -12,9 +12,12 @@ extern void compat_irq_chip_set_default_handler(struct irq_desc *desc); extern int __irq_set_trigger(struct irq_desc *desc, unsigned int irq, unsigned long flags); +extern void __disable_irq(struct irq_desc *desc, unsigned int irq, bool susp); +extern void __enable_irq(struct irq_desc *desc, unsigned int irq, bool resume); extern struct lock_class_key irq_desc_lock_class; extern void init_kstat_irqs(struct irq_desc *desc, int cpu, int nr); +extern void clear_kstat_irqs(struct irq_desc *desc); extern spinlock_t sparse_irq_lock; #ifdef CONFIG_SPARSE_IRQ diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c index a3a5dc9ef34..1516ab77355 100644 --- a/kernel/irq/manage.c +++ b/kernel/irq/manage.c @@ -109,7 +109,7 @@ int irq_set_affinity(unsigned int irq, const struct cpumask *cpumask) /* * Generic version of the affinity autoselector. */ -int do_irq_select_affinity(unsigned int irq, struct irq_desc *desc) +static int setup_affinity(unsigned int irq, struct irq_desc *desc) { if (!irq_can_set_affinity(irq)) return 0; @@ -133,7 +133,7 @@ set_affinity: return 0; } #else -static inline int do_irq_select_affinity(unsigned int irq, struct irq_desc *d) +static inline int setup_affinity(unsigned int irq, struct irq_desc *d) { return irq_select_affinity(irq); } @@ -149,19 +149,33 @@ int irq_select_affinity_usr(unsigned int irq) int ret; spin_lock_irqsave(&desc->lock, flags); - ret = do_irq_select_affinity(irq, desc); + ret = setup_affinity(irq, desc); spin_unlock_irqrestore(&desc->lock, flags); return ret; } #else -static inline int do_irq_select_affinity(int irq, struct irq_desc *desc) +static inline int setup_affinity(unsigned int irq, struct irq_desc *desc) { return 0; } #endif +void __disable_irq(struct irq_desc *desc, unsigned int irq, bool suspend) +{ + if (suspend) { + if (!desc->action || (desc->action->flags & IRQF_TIMER)) + return; + desc->status |= IRQ_SUSPENDED; + } + + if (!desc->depth++) { + desc->status |= IRQ_DISABLED; + desc->chip->disable(irq); + } +} + /** * disable_irq_nosync - disable an irq without waiting * @irq: Interrupt to disable @@ -182,10 +196,7 @@ void disable_irq_nosync(unsigned int irq) return; spin_lock_irqsave(&desc->lock, flags); - if (!desc->depth++) { - desc->status |= IRQ_DISABLED; - desc->chip->disable(irq); - } + __disable_irq(desc, irq, false); spin_unlock_irqrestore(& |