diff options
author | David S. Miller <davem@davemloft.net> | 2011-01-24 13:17:06 -0800 |
---|---|---|
committer | David S. Miller <davem@davemloft.net> | 2011-01-24 13:17:06 -0800 |
commit | e92427b289d252cfbd4cb5282d92f4ce1a5bb1fb (patch) | |
tree | 6d30e5e7b7f8e9aaa51d43b7128ac56860fa03bb /kernel | |
parent | c506653d35249bb4738bb139c24362e1ae724bc1 (diff) | |
parent | ec30f343d61391ab23705e50a525da1d55395780 (diff) |
Merge branch 'master' of master.kernel.org:/pub/scm/linux/kernel/git/torvalds/linux-2.6
Diffstat (limited to 'kernel')
52 files changed, 1135 insertions, 674 deletions
diff --git a/kernel/Makefile b/kernel/Makefile index 0b5ff083fa2..353d3fe8ba3 100644 --- a/kernel/Makefile +++ b/kernel/Makefile @@ -43,7 +43,7 @@ obj-$(CONFIG_RT_MUTEXES) += rtmutex.o obj-$(CONFIG_DEBUG_RT_MUTEXES) += rtmutex-debug.o obj-$(CONFIG_RT_MUTEX_TESTER) += rtmutex-tester.o obj-$(CONFIG_GENERIC_ISA_DMA) += dma.o -obj-$(CONFIG_USE_GENERIC_SMP_HELPERS) += smp.o +obj-$(CONFIG_SMP) += smp.o ifneq ($(CONFIG_SMP),y) obj-y += up.o endif @@ -100,6 +100,7 @@ obj-$(CONFIG_FUNCTION_TRACER) += trace/ obj-$(CONFIG_TRACING) += trace/ obj-$(CONFIG_X86_DS) += trace/ obj-$(CONFIG_RING_BUFFER) += trace/ +obj-$(CONFIG_TRACEPOINTS) += trace/ obj-$(CONFIG_SMP) += sched_cpupri.o obj-$(CONFIG_IRQ_WORK) += irq_work.o obj-$(CONFIG_PERF_EVENTS) += perf_event.o @@ -121,7 +122,7 @@ $(obj)/configs.o: $(obj)/config_data.h # config_data.h contains the same information as ikconfig.h but gzipped. # Info from config_data can be extracted from /proc/config* targets += config_data.gz -$(obj)/config_data.gz: .config FORCE +$(obj)/config_data.gz: $(KCONFIG_CONFIG) FORCE $(call if_changed,gzip) quiet_cmd_ikconfiggz = IKCFG $@ diff --git a/kernel/audit.c b/kernel/audit.c index 77770a034d5..e4956244ae5 100644 --- a/kernel/audit.c +++ b/kernel/audit.c @@ -400,7 +400,7 @@ static void kauditd_send_skb(struct sk_buff *skb) if (err < 0) { BUG_ON(err != -ECONNREFUSED); /* Shouldn't happen */ printk(KERN_ERR "audit: *NO* daemon at audit_pid=%d\n", audit_pid); - audit_log_lost("auditd dissapeared\n"); + audit_log_lost("auditd disappeared\n"); audit_pid = 0; /* we might get lucky and get this in the next auditd */ audit_hold_skb(skb); diff --git a/kernel/cgroup.c b/kernel/cgroup.c index 51cddc11cd8..b24d7027b83 100644 --- a/kernel/cgroup.c +++ b/kernel/cgroup.c @@ -763,9 +763,8 @@ EXPORT_SYMBOL_GPL(cgroup_unlock); * -> cgroup_mkdir. */ -static struct dentry *cgroup_lookup(struct inode *dir, - struct dentry *dentry, struct nameidata *nd); static int cgroup_mkdir(struct inode *dir, struct dentry *dentry, int mode); +static struct dentry *cgroup_lookup(struct inode *, struct dentry *, struct nameidata *); static int cgroup_rmdir(struct inode *unused_dir, struct dentry *dentry); static int cgroup_populate_dir(struct cgroup *cgrp); static const struct inode_operations cgroup_dir_inode_operations; @@ -862,6 +861,11 @@ static void cgroup_diput(struct dentry *dentry, struct inode *inode) iput(inode); } +static int cgroup_delete(const struct dentry *d) +{ + return 1; +} + static void remove_dir(struct dentry *d) { struct dentry *parent = dget(d->d_parent); @@ -912,7 +916,7 @@ static void cgroup_d_remove_dir(struct dentry *dentry) parent = dentry->d_parent; spin_lock(&parent->d_lock); - spin_lock(&dentry->d_lock); + spin_lock_nested(&dentry->d_lock, DENTRY_D_LOCK_NESTED); list_del_init(&dentry->d_u.d_child); spin_unlock(&dentry->d_lock); spin_unlock(&parent->d_lock); @@ -1451,6 +1455,11 @@ static int cgroup_set_super(struct super_block *sb, void *data) static int cgroup_get_rootdir(struct super_block *sb) { + static const struct dentry_operations cgroup_dops = { + .d_iput = cgroup_diput, + .d_delete = cgroup_delete, + }; + struct inode *inode = cgroup_new_inode(S_IFDIR | S_IRUGO | S_IXUGO | S_IWUSR, sb); struct dentry *dentry; @@ -1468,6 +1477,8 @@ static int cgroup_get_rootdir(struct super_block *sb) return -ENOMEM; } sb->s_root = dentry; + /* for everything else we want ->d_op set */ + sb->s_d_op = &cgroup_dops; return 0; } @@ -2197,6 +2208,14 @@ static const struct inode_operations cgroup_dir_inode_operations = { .rename = cgroup_rename, }; +static struct dentry *cgroup_lookup(struct inode *dir, struct dentry *dentry, struct nameidata *nd) +{ + if (dentry->d_name.len > NAME_MAX) + return ERR_PTR(-ENAMETOOLONG); + d_add(dentry, NULL); + return NULL; +} + /* * Check if a file is a control file */ @@ -2207,26 +2226,6 @@ static inline struct cftype *__file_cft(struct file *file) return __d_cft(file->f_dentry); } -static int cgroup_delete_dentry(const struct dentry *dentry) -{ - return 1; -} - -static struct dentry *cgroup_lookup(struct inode *dir, - struct dentry *dentry, struct nameidata *nd) -{ - static const struct dentry_operations cgroup_dentry_operations = { - .d_delete = cgroup_delete_dentry, - .d_iput = cgroup_diput, - }; - - if (dentry->d_name.len > NAME_MAX) - return ERR_PTR(-ENAMETOOLONG); - d_set_d_op(dentry, &cgroup_dentry_operations); - d_add(dentry, NULL); - return NULL; -} - static int cgroup_create_file(struct dentry *dentry, mode_t mode, struct super_block *sb) { diff --git a/kernel/debug/kdb/kdb_main.c b/kernel/debug/kdb/kdb_main.c index a6e72976682..bd3e8e29caa 100644 --- a/kernel/debug/kdb/kdb_main.c +++ b/kernel/debug/kdb/kdb_main.c @@ -2914,7 +2914,7 @@ static void __init kdb_cmd_init(void) } } -/* Intialize kdb_printf, breakpoint tables and kdb state */ +/* Initialize kdb_printf, breakpoint tables and kdb state */ void __init kdb_init(int lvl) { static int kdb_init_lvl = KDB_NOT_INITIALIZED; diff --git a/kernel/exit.c b/kernel/exit.c index 89c74861a3d..f9a45ebcc7b 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -994,6 +994,15 @@ NORET_TYPE void do_exit(long code) exit_fs(tsk); check_stack_usage(); exit_thread(); + + /* + * Flush inherited counters to the parent - before the parent + * gets woken up by child-exit notifications. + * + * because of cgroup mode, must be called before cgroup_exit() + */ + perf_event_exit_task(tsk); + cgroup_exit(tsk, 1); if (group_dead) @@ -1007,11 +1016,6 @@ NORET_TYPE void do_exit(long code) * FIXME: do that only when needed, using sched_exit tracepoint */ flush_ptrace_hw_breakpoint(tsk); - /* - * Flush inherited counters to the parent - before the parent - * gets woken up by child-exit notifications. - */ - perf_event_exit_task(tsk); exit_notify(tsk, group_dead); #ifdef CONFIG_NUMA diff --git a/kernel/fork.c b/kernel/fork.c index d9b44f20b6b..25e429152dd 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -66,6 +66,7 @@ #include <linux/posix-timers.h> #include <linux/user-return-notifier.h> #include <linux/oom.h> +#include <linux/khugepaged.h> #include <asm/pgtable.h> #include <asm/pgalloc.h> @@ -330,6 +331,9 @@ static int dup_mmap(struct mm_struct *mm, struct mm_struct *oldmm) retval = ksm_fork(mm, oldmm); if (retval) goto out; + retval = khugepaged_fork(mm, oldmm); + if (retval) + goto out; prev = NULL; for (mpnt = oldmm->mmap; mpnt; mpnt = mpnt->vm_next) { @@ -529,6 +533,9 @@ void __mmdrop(struct mm_struct *mm) mm_free_pgd(mm); destroy_context(mm); mmu_notifier_mm_destroy(mm); +#ifdef CONFIG_TRANSPARENT_HUGEPAGE + VM_BUG_ON(mm->pmd_huge_pte); +#endif free_mm(mm); } EXPORT_SYMBOL_GPL(__mmdrop); @@ -543,6 +550,7 @@ void mmput(struct mm_struct *mm) if (atomic_dec_and_test(&mm->mm_users)) { exit_aio(mm); ksm_exit(mm); + khugepaged_exit(mm); /* must run before exit_mmap */ exit_mmap(mm); set_mm_exe_file(mm, NULL); if (!list_empty(&mm->mmlist)) { @@ -669,6 +677,10 @@ struct mm_struct *dup_mm(struct task_struct *tsk) mm->token_priority = 0; mm->last_interval = 0; +#ifdef CONFIG_TRANSPARENT_HUGEPAGE + mm->pmd_huge_pte = NULL; +#endif + if (!mm_init(mm, tsk)) goto fail_nomem; @@ -910,6 +922,7 @@ static int copy_signal(unsigned long clone_flags, struct task_struct *tsk) sig->oom_adj = current->signal->oom_adj; sig->oom_score_adj = current->signal->oom_score_adj; + sig->oom_score_adj_min = current->signal->oom_score_adj_min; mutex_init(&sig->cred_guard_mutex); @@ -1410,23 +1423,6 @@ long do_fork(unsigned long clone_flags, } /* - * We hope to recycle these flags after 2.6.26 - */ - if (unlikely(clone_flags & CLONE_STOPPED)) { - static int __read_mostly count = 100; - - if (count > 0 && printk_ratelimit()) { - char comm[TASK_COMM_LEN]; - - count--; - printk(KERN_INFO "fork(): process `%s' used deprecated " - "clone flags 0x%lx\n", - get_task_comm(comm, current), - clone_flags & CLONE_STOPPED); - } - } - - /* * When called from kernel_thread, don't do user tracing stuff. */ if (likely(user_mode(regs))) @@ -1464,16 +1460,7 @@ long do_fork(unsigned long clone_flags, */ p->flags &= ~PF_STARTING; - if (unlikely(clone_flags & CLONE_STOPPED)) { - /* - * We'll start up with an immediate SIGSTOP. - */ - sigaddset(&p->pending.signal, SIGSTOP); - set_tsk_thread_flag(p, TIF_SIGPENDING); - __set_task_state(p, TASK_STOPPED); - } else { - wake_up_new_task(p, clone_flags); - } + wake_up_new_task(p, clone_flags); tracehook_report_clone_complete(trace, regs, clone_flags, nr, p); diff --git a/kernel/freezer.c b/kernel/freezer.c index bd1d42b17cb..66ecd2ead21 100644 --- a/kernel/freezer.c +++ b/kernel/freezer.c @@ -104,8 +104,13 @@ bool freeze_task(struct task_struct *p, bool sig_only) } if (should_send_signal(p)) { - if (!signal_pending(p)) - fake_signal_wake_up(p); + fake_signal_wake_up(p); + /* + * fake_signal_wake_up() goes through p's scheduler + * lock and guarantees that TASK_STOPPED/TRACED -> + * TASK_RUNNING transition can't race with task state + * testing in try_to_freeze_tasks(). + */ } else if (sig_only) { return false; } else { diff --git a/kernel/futex.c b/kernel/futex.c index 3019b92e691..b766d28accd 100644 --- a/kernel/futex.c +++ b/kernel/futex.c @@ -233,7 +233,7 @@ get_futex_key(u32 __user *uaddr, int fshared, union futex_key *key) { unsigned long address = (unsigned long)uaddr; struct mm_struct *mm = current->mm; - struct page *page; + struct page *page, *page_head; int err; /* @@ -265,11 +265,46 @@ again: if (err < 0) return err; - page = compound_head(page); - lock_page(page); - if (!page->mapping) { - unlock_page(page); +#ifdef CONFIG_TRANSPARENT_HUGEPAGE + page_head = page; + if (unlikely(PageTail(page))) { put_page(page); + /* serialize against __split_huge_page_splitting() */ + local_irq_disable(); + if (likely(__get_user_pages_fast(address, 1, 1, &page) == 1)) { + page_head = compound_head(page); + /* + * page_head is valid pointer but we must pin + * it before taking the PG_lock and/or + * PG_compound_lock. The moment we re-enable + * irqs __split_huge_page_splitting() can + * return and the head page can be freed from + * under us. We can't take the PG_lock and/or + * PG_compound_lock on a page that could be + * freed from under us. + */ + if (page != page_head) { + get_page(page_head); + put_page(page); + } + local_irq_enable(); + } else { + local_irq_enable(); + goto again; + } + } +#else + page_head = compound_head(page); + if (page != page_head) { + get_page(page_head); + put_page(page); + } +#endif + + lock_page(page_head); + if (!page_head->mapping) { + unlock_page(page_head); + put_page(page_head); goto again; } @@ -280,20 +315,20 @@ again: * it's a read-only handle, it's expected that futexes attach to * the object not the particular process. */ - if (PageAnon(page)) { + if (PageAnon(page_head)) { key->both.offset |= FUT_OFF_MMSHARED; /* ref taken on mm */ key->private.mm = mm; key->private.address = address; } else { key->both.offset |= FUT_OFF_INODE; /* inode-based key */ - key->shared.inode = page->mapping->host; - key->shared.pgoff = page->index; + key->shared.inode = page_head->mapping->host; + key->shared.pgoff = page_head->index; } get_futex_key_refs(key); - unlock_page(page); - put_page(page); + unlock_page(page_head); + put_page(page_head); return 0; } @@ -791,10 +826,9 @@ static int wake_futex_pi(u32 __user *uaddr, u32 uval, struct futex_q *this) new_owner = rt_mutex_next_owner(&pi_state->pi_mutex); /* - * This happens when we have stolen the lock and the original - * pending owner did not enqueue itself back on the rt_mutex. - * Thats not a tragedy. We know that way, that a lock waiter - * is on the fly. We make the futex_q waiter the pending owner. + * It is possible that the next waiter (the one that brought + * this owner to the kernel) timed out and is no longer + * waiting on the lock. */ if (!new_owner) new_owner = this->task; diff --git a/kernel/hrtimer.c b/kernel/hrtimer.c index 45da2b6920a..0c8d7c04861 100644 --- a/kernel/hrtimer.c +++ b/kernel/hrtimer.c @@ -1745,7 +1745,7 @@ schedule_hrtimeout_range_clock(ktime_t *expires, unsigned long delta, } /* - * A NULL parameter means "inifinte" + * A NULL parameter means "infinite" */ if (!expires) { schedule(); diff --git a/kernel/irq/Kconfig b/kernel/irq/Kconfig index 31d766bf5d2..8e42fec7686 100644 --- a/kernel/irq/Kconfig +++ b/kernel/irq/Kconfig @@ -9,9 +9,6 @@ menu "IRQ subsystem" config GENERIC_HARDIRQS def_bool y -config GENERIC_HARDIRQS_NO__DO_IRQ - def_bool y - # Select this to disable the deprecated stuff config GENERIC_HARDIRQS_NO_DEPRECATED def_bool n diff --git a/kernel/irq/handle.c b/kernel/irq/handle.c index e2347eb6330..3540a719012 100644 --- a/kernel/irq/handle.c +++ b/kernel/irq/handle.c @@ -118,114 +118,3 @@ irqreturn_t handle_IRQ_event(unsigned int irq, struct irqaction *action) return retval; } - -#ifndef CONFIG_GENERIC_HARDIRQS_NO__DO_IRQ - -#ifdef CONFIG_ENABLE_WARN_DEPRECATED -# warning __do_IRQ is deprecated. Please convert to proper flow handlers -#endif - -/** - * __do_IRQ - original all in one highlevel IRQ handler - * @irq: the interrupt number - * - * __do_IRQ handles all normal device IRQ's (the special - * SMP cross-CPU interrupts have their own specific - * handlers). - * - * This is the original x86 implementation which is used for every - * interrupt type. - */ -unsigned int __do_IRQ(unsigned int irq) -{ - struct irq_desc *desc = irq_to_desc(irq); - struct irqaction *action; - unsigned int status; - - kstat_incr_irqs_this_cpu(irq, desc); - - if (CHECK_IRQ_PER_CPU(desc->status)) { - irqreturn_t action_ret; - - /* - * No locking required for CPU-local interrupts: - */ - if (desc->irq_data.chip->ack) - desc->irq_data.chip->ack(irq); - if (likely(!(desc->status & IRQ_DISABLED))) { - action_ret = handle_IRQ_event(irq, desc->action); - if (!noirqdebug) - note_interrupt(irq, desc, action_ret); - } - desc->irq_data.chip->end(irq); - return 1; - } - - raw_spin_lock(&desc->lock); - if (desc->irq_data.chip->ack) - desc->irq_data.chip->ack(irq); - /* - * REPLAY is when Linux resends an IRQ that was dropped earlier - * WAITING is used by probe to mark irqs that are being tested - */ - status = desc->status & ~(IRQ_REPLAY | IRQ_WAITING); - status |= IRQ_PENDING; /* we _want_ to handle it */ - - /* - * If the IRQ is disabled for whatever reason, we cannot - * use the action we have. - */ - action = NULL; - if (likely(!(status & (IRQ_DISABLED | IRQ_INPROGRESS)))) { - action = desc->action; - status &= ~IRQ_PENDING; /* we commit to handling */ - status |= IRQ_INPROGRESS; /* we are handling it */ - } - desc->status = status; - - /* - * If there is no IRQ handler or it was disabled, exit early. - * Since we set PENDING, if another processor is handling - * a different instance of this same irq, the other processor - * will take care of it. - */ - if (unlikely(!action)) - goto out; - - /* - * Edge triggered interrupts need to remember - * pending events. - * This applies to any hw interrupts that allow a second - * instance of the same irq to arrive while we are in do_IRQ - * or in the handler. But the code here only handles the _second_ - * instance of the irq, not the third or fourth. So it is mostly - * useful for irq hardware that does not mask cleanly in an - * SMP environment. - */ - for (;;) { - irqreturn_t action_ret; - - raw_spin_unlock(&desc->lock); - - action_ret = handle_IRQ_event(irq, action); - if (!noirqdebug) - note_interrupt(irq, desc, action_ret); - - raw_spin_lock(&desc->lock); - if (likely(!(desc->status & IRQ_PENDING))) - break; - desc->status &= ~IRQ_PENDING; - } - desc->status &= ~IRQ_INPROGRESS; - -out: - /* - * The ->end() handler has to deal with interrupts which got - * disabled while the handler was running. - */ - desc->irq_data.chip->end(irq); - raw_spin_unlock(&desc->lock); - - return 1; -} -#endif diff --git a/kernel/irq/irqdesc.c b/kernel/irq/irqdesc.c index 9988d03797f..282f20230e6 100644 --- a/kernel/irq/irqdesc.c +++ b/kernel/irq/irqdesc.c @@ -72,6 +72,8 @@ static inline int desc_node(struct irq_desc *desc) { return 0; } static void desc_set_defaults(unsigned int irq, struct irq_desc *desc, int node) { + int cpu; + desc->irq_data.irq = irq; desc->irq_data.chip = &no_irq_chip; desc->irq_data.chip_data = NULL; @@ -83,7 +85,8 @@ static void desc_set_defaults(unsigned int irq, struct irq_desc *desc, int node) desc->irq_count = 0; desc->irqs_unhandled = 0; desc->name = NULL; - memset(desc->kstat_irqs, 0, nr_cpu_ids * sizeof(*(desc->kstat_irqs))); + for_each_possible_cpu(cpu) + *per_cpu_ptr(desc->kstat_irqs, cpu) = 0; desc_smp_init(desc, node); } @@ -133,8 +136,7 @@ static struct irq_desc *alloc_desc(int irq, int node) if (!desc) return NULL; /* allocate based on nr_cpu_ids */ - desc->kstat_irqs = kzalloc_node(nr_cpu_ids * sizeof(*desc->kstat_irqs), - gfp, node); + desc->kstat_irqs = alloc_percpu(unsigned int); if (!desc->kstat_irqs) goto err_desc; @@ -149,7 +151,7 @@ static struct irq_desc *alloc_desc(int irq, int node) return desc; err_kstat: - kfree(desc->kstat_irqs); + free_percpu(desc->kstat_irqs); err_desc: kfree(desc); return NULL; @@ -166,7 +168,7 @@ static void free_desc(unsigned int irq) mutex_unlock(&sparse_irq_lock); free_masks(desc); - kfree(desc->kstat_irqs); + free_percpu(desc->kstat_irqs); kfree(desc); } @@ -234,7 +236,6 @@ struct irq_desc irq_desc[NR_IRQS] __cacheline_aligned_in_smp = { } }; -static unsigned int kstat_irqs_all[NR_IRQS][NR_CPUS]; int __init early_irq_init(void) { int count, i, node = first_online_node; @@ -250,7 +251,8 @@ int __init early_irq_init(void) for (i = 0; i < count; i++) { desc[i].irq_data.irq = i; desc[i].irq_data.chip = &no_irq_chip; - desc[i].kstat_irqs = kstat_irqs_all[i]; + /* TODO : do this allocation on-demand ... */ + desc[i].kstat_irqs = alloc_percpu(unsigned int); alloc_masks(desc + i, GFP_KERNEL, node); desc_smp_init(desc + i, node); lockdep_set_class(&desc[i].lock, &irq_desc_lock_class); @@ -275,6 +277,22 @@ static void free_desc(unsigned int irq) static inline int alloc_descs(unsigned int start, unsigned int cnt, int node) { +#if defined(CONFIG_KSTAT_IRQS_ONDEMAND) + struct irq_desc *desc; + unsigned int i; + + for (i = 0; i < cnt; i++) { + desc = irq_to_desc(start + i); + if (desc && !desc->kstat_irqs) { + unsigned int __percpu *stats = alloc_percpu(unsigned int); + + if (!stats) + return -1; + if (cmpxchg(&desc->kstat_irqs, NULL, stats) != NULL) + free_percpu(stats); + } + } +#endif return start; } #endif /* !CONFIG_SPARSE_IRQ */ @@ -391,7 +409,9 @@ void dynamic_irq_cleanup(unsigned int irq) unsigned int kstat_irqs_cpu(unsigned int irq, int cpu) { struct irq_desc *desc = irq_to_desc(irq); - return desc ? desc->kstat_irqs[cpu] : 0; + + return desc && desc->kstat_irqs ? + *per_cpu_ptr(desc->kstat_irqs, cpu) : 0; } #ifdef CONFIG_GENERIC_HARDIRQS @@ -401,10 +421,10 @@ unsigned int kstat_irqs(unsigned int irq) int cpu; int sum = 0; - if (!desc) + if (!desc || !desc->kstat_irqs) return 0; for_each_possible_cpu(cpu) - sum += desc->kstat_irqs[cpu]; + sum += *per_cpu_ptr(desc->kstat_irqs, cpu); return sum; } #endif /* CONFIG_GENERIC_HARDIRQS */ diff --git a/kernel/kexec.c b/kernel/kexec.c index b55045bc756..ec19b92c7eb 100644 --- a/kernel/kexec.c +++ b/kernel/kexec.c @@ -163,7 +163,7 @@ static int do_kimage_alloc(struct kimage **rimage, unsigned long entry, * just verifies it is an address we can use. * * Since the kernel does everything in page size chunks ensure - * the destination addreses are page aligned. Too many + * the destination addresses are page aligned. Too many * special cases crop of when we don't do this. The most * insidious is getting overlapping destination addresses * simply because addresses are changed to page size diff --git a/kernel/latencytop.c b/kernel/latencytop.c index 17110a4a4fc..ee74b |