aboutsummaryrefslogtreecommitdiff
path: root/kernel
diff options
context:
space:
mode:
authorLen Brown <len.brown@intel.com>2005-09-08 01:45:47 -0400
committerLen Brown <len.brown@intel.com>2005-09-08 01:45:47 -0400
commit64e47488c913ac704d465a6af86a26786d1412a5 (patch)
treed3b0148592963dcde26e4bb35ddfec8b1eaf8e23 /kernel
parent4a35a46bf1cda4737c428380d1db5d15e2590d18 (diff)
parentcaf39e87cc1182f7dae84eefc43ca14d54c78ef9 (diff)
Merge linux-2.6 with linux-acpi-2.6
Diffstat (limited to 'kernel')
-rw-r--r--kernel/Makefile1
-rw-r--r--kernel/acct.c2
-rw-r--r--kernel/cpuset.c125
-rw-r--r--kernel/fork.c3
-rw-r--r--kernel/futex.c137
-rw-r--r--kernel/intermodule.c3
-rw-r--r--kernel/irq/handle.c2
-rw-r--r--kernel/irq/manage.c4
-rw-r--r--kernel/irq/proc.c14
-rw-r--r--kernel/kprobes.c94
-rw-r--r--kernel/module.c33
-rw-r--r--kernel/params.c4
-rw-r--r--kernel/posix-timers.c28
-rw-r--r--kernel/power/Kconfig14
-rw-r--r--kernel/power/disk.c55
-rw-r--r--kernel/power/main.c5
-rw-r--r--kernel/power/pm.c3
-rw-r--r--kernel/power/process.c29
-rw-r--r--kernel/power/swsusp.c202
-rw-r--r--kernel/printk.c13
-rw-r--r--kernel/ptrace.c41
-rw-r--r--kernel/resource.c3
-rw-r--r--kernel/sched.c340
-rw-r--r--kernel/signal.c83
-rw-r--r--kernel/softlockup.c151
-rw-r--r--kernel/sys.c6
-rw-r--r--kernel/timer.c18
-rw-r--r--kernel/workqueue.c5
28 files changed, 1128 insertions, 290 deletions
diff --git a/kernel/Makefile b/kernel/Makefile
index cb05cd05d23..8d57a2f1226 100644
--- a/kernel/Makefile
+++ b/kernel/Makefile
@@ -27,6 +27,7 @@ obj-$(CONFIG_AUDIT) += audit.o
obj-$(CONFIG_AUDITSYSCALL) += auditsc.o
obj-$(CONFIG_KPROBES) += kprobes.o
obj-$(CONFIG_SYSFS) += ksysfs.o
+obj-$(CONFIG_DETECT_SOFTLOCKUP) += softlockup.o
obj-$(CONFIG_GENERIC_HARDIRQS) += irq/
obj-$(CONFIG_CRASH_DUMP) += crash_dump.o
obj-$(CONFIG_SECCOMP) += seccomp.o
diff --git a/kernel/acct.c b/kernel/acct.c
index 4168f631868..f70e6027cca 100644
--- a/kernel/acct.c
+++ b/kernel/acct.c
@@ -220,7 +220,7 @@ asmlinkage long sys_acct(const char __user *name)
return (PTR_ERR(tmp));
}
/* Difference from BSD - they don't do O_APPEND */
- file = filp_open(tmp, O_WRONLY|O_APPEND, 0);
+ file = filp_open(tmp, O_WRONLY|O_APPEND|O_LARGEFILE, 0);
putname(tmp);
if (IS_ERR(file)) {
return (PTR_ERR(file));
diff --git a/kernel/cpuset.c b/kernel/cpuset.c
index 8ab1b4e518b..1f06e769010 100644
--- a/kernel/cpuset.c
+++ b/kernel/cpuset.c
@@ -628,13 +628,6 @@ static int validate_change(const struct cpuset *cur, const struct cpuset *trial)
* lock_cpu_hotplug()/unlock_cpu_hotplug() pair.
*/
-/*
- * Hack to avoid 2.6.13 partial node dynamic sched domain bug.
- * Disable letting 'cpu_exclusive' cpusets define dynamic sched
- * domains, until the sched domain can handle partial nodes.
- * Remove this #if hackery when sched domains fixed.
- */
-#if 0
static void update_cpu_domains(struct cpuset *cur)
{
struct cpuset *c, *par = cur->parent;
@@ -675,11 +668,6 @@ static void update_cpu_domains(struct cpuset *cur)
partition_sched_domains(&pspan, &cspan);
unlock_cpu_hotplug();
}
-#else
-static void update_cpu_domains(struct cpuset *cur)
-{
-}
-#endif
static int update_cpumask(struct cpuset *cs, char *buf)
{
@@ -1611,17 +1599,114 @@ int cpuset_zonelist_valid_mems_allowed(struct zonelist *zl)
return 0;
}
+/*
+ * nearest_exclusive_ancestor() - Returns the nearest mem_exclusive
+ * ancestor to the specified cpuset. Call while holding cpuset_sem.
+ * If no ancestor is mem_exclusive (an unusual configuration), then
+ * returns the root cpuset.
+ */
+static const struct cpuset *nearest_exclusive_ancestor(const struct cpuset *cs)
+{
+ while (!is_mem_exclusive(cs) && cs->parent)
+ cs = cs->parent;
+ return cs;
+}
+
/**
- * cpuset_zone_allowed - is zone z allowed in current->mems_allowed
- * @z: zone in question
+ * cpuset_zone_allowed - Can we allocate memory on zone z's memory node?
+ * @z: is this zone on an allowed node?
+ * @gfp_mask: memory allocation flags (we use __GFP_HARDWALL)
*
- * Is zone z allowed in current->mems_allowed, or is
- * the CPU in interrupt context? (zone is always allowed in this case)
- */
-int cpuset_zone_allowed(struct zone *z)
+ * If we're in interrupt, yes, we can always allocate. If zone
+ * z's node is in our tasks mems_allowed, yes. If it's not a
+ * __GFP_HARDWALL request and this zone's nodes is in the nearest
+ * mem_exclusive cpuset ancestor to this tasks cpuset, yes.
+ * Otherwise, no.
+ *
+ * GFP_USER allocations are marked with the __GFP_HARDWALL bit,
+ * and do not allow allocations outside the current tasks cpuset.
+ * GFP_KERNEL allocations are not so marked, so can escape to the
+ * nearest mem_exclusive ancestor cpuset.
+ *
+ * Scanning up parent cpusets requires cpuset_sem. The __alloc_pages()
+ * routine only calls here with __GFP_HARDWALL bit _not_ set if
+ * it's a GFP_KERNEL allocation, and all nodes in the current tasks
+ * mems_allowed came up empty on the first pass over the zonelist.
+ * So only GFP_KERNEL allocations, if all nodes in the cpuset are
+ * short of memory, might require taking the cpuset_sem semaphore.
+ *
+ * The first loop over the zonelist in mm/page_alloc.c:__alloc_pages()
+ * calls here with __GFP_HARDWALL always set in gfp_mask, enforcing
+ * hardwall cpusets - no allocation on a node outside the cpuset is
+ * allowed (unless in interrupt, of course).
+ *
+ * The second loop doesn't even call here for GFP_ATOMIC requests
+ * (if the __alloc_pages() local variable 'wait' is set). That check
+ * and the checks below have the combined affect in the second loop of
+ * the __alloc_pages() routine that:
+ * in_interrupt - any node ok (current task context irrelevant)
+ * GFP_ATOMIC - any node ok
+ * GFP_KERNEL - any node in enclosing mem_exclusive cpuset ok
+ * GFP_USER - only nodes in current tasks mems allowed ok.
+ **/
+
+int cpuset_zone_allowed(struct zone *z, unsigned int __nocast gfp_mask)
{
- return in_interrupt() ||
- node_isset(z->zone_pgdat->node_id, current->mems_allowed);
+ int node; /* node that zone z is on */
+ const struct cpuset *cs; /* current cpuset ancestors */
+ int allowed = 1; /* is allocation in zone z allowed? */
+
+ if (in_interrupt())
+ return 1;
+ node = z->zone_pgdat->node_id;
+ if (node_isset(node, current->mems_allowed))
+ return 1;
+ if (gfp_mask & __GFP_HARDWALL) /* If hardwall request, stop here */
+ return 0;
+
+ /* Not hardwall and node outside mems_allowed: scan up cpusets */
+ down(&cpuset_sem);
+ cs = current->cpuset;
+ if (!cs)
+ goto done; /* current task exiting */
+ cs = nearest_exclusive_ancestor(cs);
+ allowed = node_isset(node, cs->mems_allowed);
+done:
+ up(&cpuset_sem);
+ return allowed;
+}
+
+/**
+ * cpuset_excl_nodes_overlap - Do we overlap @p's mem_exclusive ancestors?
+ * @p: pointer to task_struct of some other task.
+ *
+ * Description: Return true if the nearest mem_exclusive ancestor
+ * cpusets of tasks @p and current overlap. Used by oom killer to
+ * determine if task @p's memory usage might impact the memory
+ * available to the current task.
+ *
+ * Acquires cpuset_sem - not suitable for calling from a fast path.
+ **/
+
+int cpuset_excl_nodes_overlap(const struct task_struct *p)
+{
+ const struct cpuset *cs1, *cs2; /* my and p's cpuset ancestors */
+ int overlap = 0; /* do cpusets overlap? */
+
+ down(&cpuset_sem);
+ cs1 = current->cpuset;
+ if (!cs1)
+ goto done; /* current task exiting */
+ cs2 = p->cpuset;
+ if (!cs2)
+ goto done; /* task p is exiting */
+ cs1 = nearest_exclusive_ancestor(cs1);
+ cs2 = nearest_exclusive_ancestor(cs2);
+ overlap = nodes_intersects(cs1->mems_allowed, cs2->mems_allowed);
+done:
+ up(&cpuset_sem);
+
+ return overlap;
}
/*
diff --git a/kernel/fork.c b/kernel/fork.c
index b65187f0c74..7e1ead9a6ba 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -994,6 +994,9 @@ static task_t *copy_process(unsigned long clone_flags,
* of CLONE_PTRACE.
*/
clear_tsk_thread_flag(p, TIF_SYSCALL_TRACE);
+#ifdef TIF_SYSCALL_EMU
+ clear_tsk_thread_flag(p, TIF_SYSCALL_EMU);
+#endif
/* Our parent execution domain becomes current domain
These must match for thread signalling to apply */
diff --git a/kernel/futex.c b/kernel/futex.c
index c7130f86106..ca05fe6a70b 100644
--- a/kernel/futex.c
+++ b/kernel/futex.c
@@ -40,6 +40,7 @@
#include <linux/pagemap.h>
#include <linux/syscalls.h>
#include <linux/signal.h>
+#include <asm/futex.h>
#define FUTEX_HASHBITS (CONFIG_BASE_SMALL ? 4 : 8)
@@ -327,6 +328,118 @@ out:
}
/*
+ * Wake up all waiters hashed on the physical page that is mapped
+ * to this virtual address:
+ */
+static int futex_wake_op(unsigned long uaddr1, unsigned long uaddr2, int nr_wake, int nr_wake2, int op)
+{
+ union futex_key key1, key2;
+ struct futex_hash_bucket *bh1, *bh2;
+ struct list_head *head;
+ struct futex_q *this, *next;
+ int ret, op_ret, attempt = 0;
+
+retryfull:
+ down_read(&current->mm->mmap_sem);
+
+ ret = get_futex_key(uaddr1, &key1);
+ if (unlikely(ret != 0))
+ goto out;
+ ret = get_futex_key(uaddr2, &key2);
+ if (unlikely(ret != 0))
+ goto out;
+
+ bh1 = hash_futex(&key1);
+ bh2 = hash_futex(&key2);
+
+retry:
+ if (bh1 < bh2)
+ spin_lock(&bh1->lock);
+ spin_lock(&bh2->lock);
+ if (bh1 > bh2)
+ spin_lock(&bh1->lock);
+
+ op_ret = futex_atomic_op_inuser(op, (int __user *)uaddr2);
+ if (unlikely(op_ret < 0)) {
+ int dummy;
+
+ spin_unlock(&bh1->lock);
+ if (bh1 != bh2)
+ spin_unlock(&bh2->lock);
+
+ /* futex_atomic_op_inuser needs to both read and write
+ * *(int __user *)uaddr2, but we can't modify it
+ * non-atomically. Therefore, if get_user below is not
+ * enough, we need to handle the fault ourselves, while
+ * still holding the mmap_sem. */
+ if (attempt++) {
+ struct vm_area_struct * vma;
+ struct mm_struct *mm = current->mm;
+
+ ret = -EFAULT;
+ if (attempt >= 2 ||
+ !(vma = find_vma(mm, uaddr2)) ||
+ vma->vm_start > uaddr2 ||
+ !(vma->vm_flags & VM_WRITE))
+ goto out;
+
+ switch (handle_mm_fault(mm, vma, uaddr2, 1)) {
+ case VM_FAULT_MINOR:
+ current->min_flt++;
+ break;
+ case VM_FAULT_MAJOR:
+ current->maj_flt++;
+ break;
+ default:
+ goto out;
+ }
+ goto retry;
+ }
+
+ /* If we would have faulted, release mmap_sem,
+ * fault it in and start all over again. */
+ up_read(&current->mm->mmap_sem);
+
+ ret = get_user(dummy, (int __user *)uaddr2);
+ if (ret)
+ return ret;
+
+ goto retryfull;
+ }
+
+ head = &bh1->chain;
+
+ list_for_each_entry_safe(this, next, head, list) {
+ if (match_futex (&this->key, &key1)) {
+ wake_futex(this);
+ if (++ret >= nr_wake)
+ break;
+ }
+ }
+
+ if (op_ret > 0) {
+ head = &bh2->chain;
+
+ op_ret = 0;
+ list_for_each_entry_safe(this, next, head, list) {
+ if (match_futex (&this->key, &key2)) {
+ wake_futex(this);
+ if (++op_ret >= nr_wake2)
+ break;
+ }
+ }
+ ret += op_ret;
+ }
+
+ spin_unlock(&bh1->lock);
+ if (bh1 != bh2)
+ spin_unlock(&bh2->lock);
+out:
+ up_read(&current->mm->mmap_sem);
+ return ret;
+}
+
+/*
* Requeue all waiters hashed on one physical page to another
* physical page.
*/
@@ -673,23 +786,17 @@ static int futex_fd(unsigned long uaddr, int signal)
filp->f_mapping = filp->f_dentry->d_inode->i_mapping;
if (signal) {
- int err;
err = f_setown(filp, current->pid, 1);
if (err < 0) {
- put_unused_fd(ret);
- put_filp(filp);
- ret = err;
- goto out;
+ goto error;
}
filp->f_owner.signum = signal;
}
q = kmalloc(sizeof(*q), GFP_KERNEL);
if (!q) {
- put_unused_fd(ret);
- put_filp(filp);
- ret = -ENOMEM;
- goto out;
+ err = -ENOMEM;
+ goto error;
}
down_read(&current->mm->mmap_sem);
@@ -697,10 +804,8 @@ static int futex_fd(unsigned long uaddr, int signal)
if (unlikely(err != 0)) {
up_read(&current->mm->mmap_sem);
- put_unused_fd(ret);
- put_filp(filp);
kfree(q);
- return err;
+ goto error;
}
/*
@@ -716,6 +821,11 @@ static int futex_fd(unsigned long uaddr, int signal)
fd_install(ret, filp);
out:
return ret;
+error:
+ put_unused_fd(ret);
+ put_filp(filp);
+ ret = err;
+ goto out;
}
long do_futex(unsigned long uaddr, int op, int val, unsigned long timeout,
@@ -740,6 +850,9 @@ long do_futex(unsigned long uaddr, int op, int val, unsigned long timeout,
case FUTEX_CMP_REQUEUE:
ret = futex_requeue(uaddr, uaddr2, val, val2, &val3);
break;
+ case FUTEX_WAKE_OP:
+ ret = futex_wake_op(uaddr, uaddr2, val, val2, val3);
+ break;
default:
ret = -ENOSYS;
}
diff --git a/kernel/intermodule.c b/kernel/intermodule.c
index 388977f3e9b..0cbe633420f 100644
--- a/kernel/intermodule.c
+++ b/kernel/intermodule.c
@@ -39,7 +39,7 @@ void inter_module_register(const char *im_name, struct module *owner, const void
struct list_head *tmp;
struct inter_module_entry *ime, *ime_new;
- if (!(ime_new = kmalloc(sizeof(*ime), GFP_KERNEL))) {
+ if (!(ime_new = kzalloc(sizeof(*ime), GFP_KERNEL))) {
/* Overloaded kernel, not fatal */
printk(KERN_ERR
"Aiee, inter_module_register: cannot kmalloc entry for '%s'\n",
@@ -47,7 +47,6 @@ void inter_module_register(const char *im_name, struct module *owner, const void
kmalloc_failed = 1;
return;
}
- memset(ime_new, 0, sizeof(*ime_new));
ime_new->im_name = im_name;
ime_new->owner = owner;
ime_new->userdata = userdata;
diff --git a/kernel/irq/handle.c b/kernel/irq/handle.c
index c29f83c1649..3ff7b925c38 100644
--- a/kernel/irq/handle.c
+++ b/kernel/irq/handle.c
@@ -111,7 +111,7 @@ fastcall unsigned int __do_IRQ(unsigned int irq, struct pt_regs *regs)
unsigned int status;
kstat_this_cpu.irqs[irq]++;
- if (desc->status & IRQ_PER_CPU) {
+ if (CHECK_IRQ_PER_CPU(desc->status)) {
irqreturn_t action_ret;
/*
diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c
index ac670098570..1cfdb08ddf2 100644
--- a/kernel/irq/manage.c
+++ b/kernel/irq/manage.c
@@ -18,6 +18,10 @@
cpumask_t irq_affinity[NR_IRQS] = { [0 ... NR_IRQS-1] = CPU_MASK_ALL };
+#if defined (CONFIG_GENERIC_PENDING_IRQ) || defined (CONFIG_IRQBALANCE)
+cpumask_t __cacheline_aligned pending_irq_cpumask[NR_IRQS];
+#endif
+
/**
* synchronize_irq - wait for pending IRQ handlers (on other CPUs)
*
diff --git a/kernel/irq/proc.c b/kernel/irq/proc.c
index 85d08daa660..f26e534c658 100644
--- a/kernel/irq/proc.c
+++ b/kernel/irq/proc.c
@@ -19,12 +19,22 @@ static struct proc_dir_entry *root_irq_dir, *irq_dir[NR_IRQS];
*/
static struct proc_dir_entry *smp_affinity_entry[NR_IRQS];
-void __attribute__((weak))
-proc_set_irq_affinity(unsigned int irq, cpumask_t mask_val)
+#ifdef CONFIG_GENERIC_PENDING_IRQ
+void proc_set_irq_affinity(unsigned int irq, cpumask_t mask_val)
+{
+ /*
+ * Save these away for later use. Re-progam when the
+ * interrupt is pending
+ */
+ set_pending_irq(irq, mask_val);
+}
+#else
+void proc_set_irq_affinity(unsigned int irq, cpumask_t mask_val)
{
irq_affinity[irq] = mask_val;
irq_desc[irq].handler->set_affinity(irq, mask_val);
}
+#endif
static int irq_affinity_read_proc(char *page, char **start, off_t off,
int count, int *eof, void *data)
diff --git a/kernel/kprobes.c b/kernel/kprobes.c
index b0237122b24..f3ea492ab44 100644
--- a/kernel/kprobes.c
+++ b/kernel/kprobes.c
@@ -37,6 +37,7 @@
#include <linux/init.h>
#include <linux/module.h>
#include <linux/moduleloader.h>
+#include <asm-generic/sections.h>
#include <asm/cacheflush.h>
#include <asm/errno.h>
#include <asm/kdebug.h>
@@ -72,7 +73,7 @@ static struct hlist_head kprobe_insn_pages;
* get_insn_slot() - Find a slot on an executable page for an instruction.
* We allocate an executable page if there's no room on existing ones.
*/
-kprobe_opcode_t *get_insn_slot(void)
+kprobe_opcode_t __kprobes *get_insn_slot(void)
{
struct kprobe_insn_page *kip;
struct hlist_node *pos;
@@ -117,7 +118,7 @@ kprobe_opcode_t *get_insn_slot(void)
return kip->insns;
}
-void free_insn_slot(kprobe_opcode_t *slot)
+void __kprobes free_insn_slot(kprobe_opcode_t *slot)
{
struct kprobe_insn_page *kip;
struct hlist_node *pos;
@@ -152,20 +153,42 @@ void free_insn_slot(kprobe_opcode_t *slot)
}
/* Locks kprobe: irqs must be disabled */
-void lock_kprobes(void)
+void __kprobes lock_kprobes(void)
{
+ unsigned long flags = 0;
+
+ /* Avoiding local interrupts to happen right after we take the kprobe_lock
+ * and before we get a chance to update kprobe_cpu, this to prevent
+ * deadlock when we have a kprobe on ISR routine and a kprobe on task
+ * routine
+ */
+ local_irq_save(flags);
+
spin_lock(&kprobe_lock);
kprobe_cpu = smp_processor_id();
+
+ local_irq_restore(flags);
}
-void unlock_kprobes(void)
+void __kprobes unlock_kprobes(void)
{
+ unsigned long flags = 0;
+
+ /* Avoiding local interrupts to happen right after we update
+ * kprobe_cpu and before we get a a chance to release kprobe_lock,
+ * this to prevent deadlock when we have a kprobe on ISR routine and
+ * a kprobe on task routine
+ */
+ local_irq_save(flags);
+
kprobe_cpu = NR_CPUS;
spin_unlock(&kprobe_lock);
+
+ local_irq_restore(flags);
}
/* You have to be holding the kprobe_lock */
-struct kprobe *get_kprobe(void *addr)
+struct kprobe __kprobes *get_kprobe(void *addr)
{
struct hlist_head *head;
struct hlist_node *node;
@@ -183,7 +206,7 @@ struct kprobe *get_kprobe(void *addr)
* Aggregate handlers for multiple kprobes support - these handlers
* take care of invoking the individual kprobe handlers on p->list
*/
-static int aggr_pre_handler(struct kprobe *p, struct pt_regs *regs)
+static int __kprobes aggr_pre_handler(struct kprobe *p, struct pt_regs *regs)
{
struct kprobe *kp;
@@ -198,8 +221,8 @@ static int aggr_pre_handler(struct kprobe *p, struct pt_regs *regs)
return 0;
}
-static void aggr_post_handler(struct kprobe *p, struct pt_regs *regs,
- unsigned long flags)
+static void __kprobes aggr_post_handler(struct kprobe *p, struct pt_regs *regs,
+ unsigned long flags)
{
struct kprobe *kp;
@@ -213,8 +236,8 @@ static void aggr_post_handler(struct kprobe *p, struct pt_regs *regs,
return;
}
-static int aggr_fault_handler(struct kprobe *p, struct pt_regs *regs,
- int trapnr)
+static int __kprobes aggr_fault_handler(struct kprobe *p, struct pt_regs *regs,
+ int trapnr)
{
/*
* if we faulted "during" the execution of a user specified
@@ -227,7 +250,7 @@ static int aggr_fault_handler(struct kprobe *p, struct pt_regs *regs,
return 0;
}
-static int aggr_break_handler(struct kprobe *p, struct pt_regs *regs)
+static int __kprobes aggr_break_handler(struct kprobe *p, struct pt_regs *regs)
{
struct kprobe *kp = curr_kprobe;
if (curr_kprobe && kp->break_handler) {
@@ -240,7 +263,7 @@ static int aggr_break_handler(struct kprobe *p, struct pt_regs *regs)
return 0;
}
-struct kretprobe_instance *get_free_rp_inst(struct kretprobe *rp)
+struct kretprobe_instance __kprobes *get_free_rp_inst(struct kretprobe *rp)
{
struct hlist_node *node;
struct kretprobe_instance *ri;
@@ -249,7 +272,8 @@ struct kretprobe_instance *get_free_rp_inst(struct kretprobe *rp)
return NULL;
}
-static struct kretprobe_instance *get_used_rp_inst(struct kretprobe *rp)
+static struct kretprobe_instance __kprobes *get_used_rp_inst(struct kretprobe
+ *rp)
{
struct hlist_node *node;
struct kretprobe_instance *ri;
@@ -258,7 +282,7 @@ static struct kretprobe_instance *get_used_rp_inst(struct kretprobe *rp)
return NULL;
}
-void add_rp_inst(struct kretprobe_instance *ri)
+void __kprobes add_rp_inst(struct kretprobe_instance *ri)
{
/*
* Remove rp inst off the free list -
@@ -276,7 +300,7 @@ void add_rp_inst(struct kretprobe_instance *ri)
hlist_add_head(&ri->uflist, &ri->rp->used_instances);
}
-void recycle_rp_inst(struct kretprobe_instance *ri)
+void __kprobes recycle_rp_inst(struct kretprobe_instance *ri)
{
/* remove rp inst off the rprobe_inst_table */
hlist_del(&ri->hlist);
@@ -291,7 +315,7 @@ void recycle_rp_inst(struct kretprobe_instance *ri)
kfree(ri);
}
-struct hlist_head * kretprobe_inst_table_head(struct task_struct *tsk)
+struct hlist_head __kprobes *kretprobe_inst_table_head(struct task_struct *tsk)
{
return &kretprobe_inst_table[hash_ptr(tsk, KPROBE_HASH_BITS)];
}
@@ -302,7 +326,7 @@ struct hlist_head * kretprobe_inst_table_head(struct task_struct *tsk)
* instances associated with this task. These left over instances represent
* probed functions that have been called but will never return.
*/
-void kprobe_flush_task(struct task_struct *tk)
+void __kprobes kprobe_flush_task(struct task_struct *tk)
{
struct kretprobe_instance *ri;
struct hlist_head *head;
@@ -322,7 +346,8 @@ void kprobe_flush_task(struct task_struct *tk)
* This kprobe pre_handler is registered with every kretprobe. When probe
* hits it will set up the return probe.
*/
-static int pre_handler_kretprobe(struct kprobe *p, struct pt_regs *regs)
+static int __kprobes pre_handler_kretprobe(struct kprobe *p,
+ struct pt_regs *regs)
{
struct kretprobe *rp = container_of(p, struct kretprobe, kp);
@@ -353,7 +378,7 @@ static inline void copy_kprobe(struct kprobe *old_p, struct kprobe *p)
* Add the new probe to old_p->list. Fail if this is the
* second jprobe at the address - two jprobes can't coexist
*/
-static int add_new_kprobe(struct kprobe *old_p, struct kprobe *p)
+static int __kprobes add_new_kprobe(struct kprobe *old_p, struct kprobe *p)
{
struct kprobe *kp;
@@ -395,7 +420,8 @@ static inline void add_aggr_kprobe(struct kprobe *ap, struct kprobe *p)
* the intricacies
* TODO: Move kcalloc outside the spinlock
*/
-static int register_aggr_kprobe(struct kprobe *old_p, struct kprobe *p)
+static int __kprobes register_aggr_kprobe(struct kprobe *old_p,
+ struct kprobe *p)
{
int ret = 0;
struct kprobe *ap;
@@ -434,15 +460,25 @@ static inline void cleanup_aggr_kprobe(struct kprobe *old_p,
spin_unlock_irqrestore(&kprobe_lock, flags);
}
-int register_kprobe(struct kprobe *p)
+static int __kprobes in_kprobes_functions(unsigned long addr)
+{
+ if (addr >= (unsigned long)__kprobes_text_start
+ && addr < (unsigned long)__kprobes_text_end)
+ return -EINVAL;
+ return 0;
+}
+
+int __kprobes register_kprobe(struct kprobe *p)
{
int ret = 0;
unsigned long flags = 0;
struct kprobe *old_p;
- if ((ret = arch_prepare_kprobe(p)) != 0) {
+ if ((ret = in_kprobes_functions((unsigned long) p->addr)) != 0)
+ return ret;
+ if ((ret = arch_prepare_kprobe(p)) != 0)
goto rm_kprobe;
- }
+
spin_lock_irqsave(&kprobe_lock, flags);
old_p = get_kprobe(p->addr);
p->nmissed = 0;
@@ -466,7 +502,7 @@ rm_kprobe:
return ret;
}
-void unregister_kprobe(struct kprobe *p)
+void __kprobes unregister_kprobe(struct kprobe *p)
{
unsigned long flags;
struct kprobe *old_p;
@@ -487,7 +523,7 @@ static struct notifier_block kprobe_exceptions_nb = {
.priority = 0x7fffffff /* we need to notified first */
};
-int register_jprobe(struct jprobe *jp)
+int __kprobes register_jprobe(struct jprobe *jp)
{
/* Todo: Verify probepoint is a function entry point */
jp->kp.pre_handler = setjmp_pre_handler;
@@ -496,14 +532,14 @@ int register_jprobe(struct jprobe *jp)
return register_kprobe(&jp->kp);
}
-void unregister_jprobe(struct jprobe *jp)
+void __kprobes unregister_jprobe(struct jprobe *jp)
{
unregister_kprobe(&jp->kp);
}
#ifdef ARCH_SUPPORTS_KRETPROBES
-int register_kretprobe(struct kretprobe *rp)
+int __kprobes register_kretprobe(struct kretprobe *rp)
{
int ret = 0;
struct kretprobe_instance *inst;
@@ -540,14 +576,14 @@ int register_kretprobe(struct kretprobe *rp)
#else /* ARCH_SUPPORTS_KRETPROBES */
-int register_kretprobe(struct kretprobe *rp)
+int __kprobes register_kretprobe(struct kretprobe *rp)
{
return -ENOSYS;
}
#endif /* ARCH_SUPPORTS_KRETPROBES */
-void unregister_kretprobe(struct kretprobe *rp)
+void __kprobes unregister_kretprobe(struct kretprobe *rp)
{
unsigned long flags;
struct kretprobe_instance *ri;
diff --git a/kernel/module.c b/kernel/module.c
index c32995fbd8f..4b39d3793c7 100644
--- a/kernel/module.c
+++ b/kernel/module.c
@@ -1509,6 +1509,7 @@ static struct module *load_module(void __user *umod,
long err = 0;
void *percpu = NULL, *ptr = NULL; /* Stops spurious gcc warning */
struct exception_table_entry *extable;
+ mm_segment_t old_fs;
DEBUGP("load_module: umod=%p, len=%lu, uargs=%p\n",
umod, len, uargs);
@@ -1779,6 +1780,24 @@ static struct module *load_module(void __user *umod,
if (err < 0)
goto cleanup;
+ /* flush the icache in correct context */
+ old_fs = get_fs();
+ set_fs(KERNEL_DS);
+
+ /*
+ * Flush the instruction cache, since we've played with text.
+ * Do it before processing of module parameters, so the module
+ * can provide parameter accessor functions of its own.
+ */
+ if (mod->module_init)
+ flush_icache_range((unsigned long)mod->module_init,
+ (unsigned long)mod->module_init
+ + mod->init_size);
+ flush_icache_range((unsigned long)mod->module_core,
+ (unsigned long)mod->module_core + mod->core_size);
+
+ set_fs(old_fs);
+
mod->args = args;
if (obsparmindex) {
err = obsolete_params(mod->name, mod->args,
@@ -1860,7 +1879,6 @@ sys_init_module(void __user *umod,
const char __user *uargs)
{
struct module *mod;
- mm_segment_t old_fs = get_fs();
int ret = 0;
/* Must have permission */
@@ -1878,19 +1896,6 @@ sys_init_module(void __user *umod,
return PTR_ERR(mod);
}
- /* flush the icache in correct context */
- set_fs(KERNEL_DS);
-
- /* Flush the instruction cache, since we've played with text */
- if (mod->module_init)
- flush_icache_range((unsigned long)mod->module_init,
- (unsigned long)mod->module_init
- + mod->init_size);
- flush_icache_range((unsigned long)mod->module_core,
- (unsigned long)mod->module_core + mod->core_size);
-
- set_fs(old_fs);
-
/* Now sew it into the lists. They won't access us, since
strong_try_module_get() will fail. */
stop_machine_run(__link_module, mod, NR_CPUS);
diff --git a/kernel/params.c b/kernel/params.c
index d586c35ef8f..fbf173215fd 100644
--- a/kernel/params.c
+++ b/kernel/params.c
@@ -542,8 +542,8 @@ static void __init kernel_param_sysfs_setup(const char *name,
{
struct module_kobject *mk;
- mk = kmalloc(sizeof(struct module_kobject), GFP_KERNEL);
- memset(mk, 0, sizeof(struct module_kobject));
+ mk = kzalloc(sizeof(struct module_kobject), GFP_KERNEL);
+ BUG_ON(!mk);
mk->mod = THIS_MODULE;
kobj_set_kset_s(mk, module_subsys);
diff --git a/kernel/posix-timers.c b/kernel/posix-timers.c
index 38798a2ff99..b7b532acd9f 100644
--- a/kernel/posix-timers.c
+++ b/kernel/posix-timers.c
@@ -427,21 +427,23 @@ int posix_timer_event(struct k_itimer *timr,int si_private)
timr->sigq->info.si_code = SI_TIMER;
timr->sigq->info.si_tid = timr->it_id;
timr->sigq->info.si_value = timr->it_sigev_value;
+
if (timr->it_sigev_notify & SIGEV_THREAD_ID) {
- if (unlikely(timr->it_process->flags & PF_EXITING)) {
- timr->it_sigev_notify = SIGEV_SIGNAL;
- put_task_struct(timr->it_process);
- timr->it_process = timr->it_process->group_leader;
- goto group;
- }
- return send_sigqueue(timr->it_sigev_signo, timr->sigq,
- timr->it_process);
- }
- else {
- group:
- return send_group_sigqueue(timr->it_sigev_signo, timr->sigq,
- timr->it_process);
+ struct task_struct *leader;
+ int ret = send_sigqueue(timr->it_sigev_signo, timr->sigq,
+ timr->it_process);
+
+ if (likely(ret >= 0))
+ return ret;
+
+ timr->it_sigev_notify = SIGEV_SIGNAL;
+ leader = timr->it_process->group_leader;
+ put_task_struct(timr->it_process);
+ timr->it_process = leader;
}
+
+ return send_group_sigqueue(timr->it_sigev_signo, timr->sigq,
+ timr->it_process);
}