aboutsummaryrefslogtreecommitdiff
path: root/kernel
diff options
context:
space:
mode:
Diffstat (limited to 'kernel')
-rw-r--r--kernel/cgroup.c33
-rw-r--r--kernel/compat.c5
-rw-r--r--kernel/cpuset.c34
-rw-r--r--kernel/dma-coherent.c42
-rw-r--r--kernel/exit.c21
-rw-r--r--kernel/fork.c17
-rw-r--r--kernel/kmod.c4
-rw-r--r--kernel/kprobes.c281
-rw-r--r--kernel/module.c33
-rw-r--r--kernel/panic.c2
-rw-r--r--kernel/profile.c1
-rw-r--r--kernel/signal.c3
-rw-r--r--kernel/sys.c2
-rw-r--r--kernel/sysctl.c27
-rw-r--r--kernel/test_kprobes.c210
-rw-r--r--kernel/time.c4
-rw-r--r--kernel/tsacct.c4
17 files changed, 505 insertions, 218 deletions
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index 87bb0258fd2..f221446aa02 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -116,7 +116,6 @@ static int root_count;
* be called.
*/
static int need_forkexit_callback __read_mostly;
-static int need_mm_owner_callback __read_mostly;
/* convenient tests for these bits */
inline int cgroup_is_removed(const struct cgroup *cgrp)
@@ -2539,7 +2538,6 @@ static void __init cgroup_init_subsys(struct cgroup_subsys *ss)
init_css_set.subsys[ss->subsys_id] = dummytop->subsys[ss->subsys_id];
need_forkexit_callback |= ss->fork || ss->exit;
- need_mm_owner_callback |= !!ss->mm_owner_changed;
/* At system boot, before all subsystems have been
* registered, no tasks have been forked, so we don't
@@ -2789,37 +2787,6 @@ void cgroup_fork_callbacks(struct task_struct *child)
}
}
-#ifdef CONFIG_MM_OWNER
-/**
- * cgroup_mm_owner_callbacks - run callbacks when the mm->owner changes
- * @p: the new owner
- *
- * Called on every change to mm->owner. mm_init_owner() does not
- * invoke this routine, since it assigns the mm->owner the first time
- * and does not change it.
- *
- * The callbacks are invoked with mmap_sem held in read mode.
- */
-void cgroup_mm_owner_callbacks(struct task_struct *old, struct task_struct *new)
-{
- struct cgroup *oldcgrp, *newcgrp = NULL;
-
- if (need_mm_owner_callback) {
- int i;
- for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) {
- struct cgroup_subsys *ss = subsys[i];
- oldcgrp = task_cgroup(old, ss->subsys_id);
- if (new)
- newcgrp = task_cgroup(new, ss->subsys_id);
- if (oldcgrp == newcgrp)
- continue;
- if (ss->mm_owner_changed)
- ss->mm_owner_changed(ss, oldcgrp, newcgrp, new);
- }
- }
-}
-#endif /* CONFIG_MM_OWNER */
-
/**
* cgroup_post_fork - called on a new task after adding it to the task list
* @child: the task in question
diff --git a/kernel/compat.c b/kernel/compat.c
index d52e2ec1deb..42d56544460 100644
--- a/kernel/compat.c
+++ b/kernel/compat.c
@@ -24,6 +24,7 @@
#include <linux/migrate.h>
#include <linux/posix-timers.h>
#include <linux/times.h>
+#include <linux/ptrace.h>
#include <asm/uaccess.h>
@@ -229,6 +230,7 @@ asmlinkage long compat_sys_times(struct compat_tms __user *tbuf)
if (copy_to_user(tbuf, &tmp, sizeof(tmp)))
return -EFAULT;
}
+ force_successful_syscall_return();
return compat_jiffies_to_clock_t(jiffies);
}
@@ -894,8 +896,9 @@ asmlinkage long compat_sys_time(compat_time_t __user * tloc)
if (tloc) {
if (put_user(i,tloc))
- i = -EFAULT;
+ return -EFAULT;
}
+ force_successful_syscall_return();
return i;
}
diff --git a/kernel/cpuset.c b/kernel/cpuset.c
index 39c1a4c1c5a..345ace5117d 100644
--- a/kernel/cpuset.c
+++ b/kernel/cpuset.c
@@ -240,6 +240,17 @@ static struct cpuset top_cpuset = {
static DEFINE_MUTEX(callback_mutex);
/*
+ * cpuset_buffer_lock protects both the cpuset_name and cpuset_nodelist
+ * buffers. They are statically allocated to prevent using excess stack
+ * when calling cpuset_print_task_mems_allowed().
+ */
+#define CPUSET_NAME_LEN (128)
+#define CPUSET_NODELIST_LEN (256)
+static char cpuset_name[CPUSET_NAME_LEN];
+static char cpuset_nodelist[CPUSET_NODELIST_LEN];
+static DEFINE_SPINLOCK(cpuset_buffer_lock);
+
+/*
* This is ugly, but preserves the userspace API for existing cpuset
* users. If someone tries to mount the "cpuset" filesystem, we
* silently switch it to mount "cgroup" instead
@@ -2356,6 +2367,29 @@ int cpuset_mems_allowed_intersects(const struct task_struct *tsk1,
return nodes_intersects(tsk1->mems_allowed, tsk2->mems_allowed);
}
+/**
+ * cpuset_print_task_mems_allowed - prints task's cpuset and mems_allowed
+ * @task: pointer to task_struct of some task.
+ *
+ * Description: Prints @task's name, cpuset name, and cached copy of its
+ * mems_allowed to the kernel log. Must hold task_lock(task) to allow
+ * dereferencing task_cs(task).
+ */
+void cpuset_print_task_mems_allowed(struct task_struct *tsk)
+{
+ struct dentry *dentry;
+
+ dentry = task_cs(tsk)->css.cgroup->dentry;
+ spin_lock(&cpuset_buffer_lock);
+ snprintf(cpuset_name, CPUSET_NAME_LEN,
+ dentry ? (const char *)dentry->d_name.name : "/");
+ nodelist_scnprintf(cpuset_nodelist, CPUSET_NODELIST_LEN,
+ tsk->mems_allowed);
+ printk(KERN_INFO "%s cpuset=%s mems_allowed=%s\n",
+ tsk->comm, cpuset_name, cpuset_nodelist);
+ spin_unlock(&cpuset_buffer_lock);
+}
+
/*
* Collection of memory_pressure is suppressed unless
* this flag is enabled by writing "1" to the special
diff --git a/kernel/dma-coherent.c b/kernel/dma-coherent.c
index f013a0c2e11..038707404b7 100644
--- a/kernel/dma-coherent.c
+++ b/kernel/dma-coherent.c
@@ -109,20 +109,40 @@ EXPORT_SYMBOL(dma_mark_declared_memory_occupied);
int dma_alloc_from_coherent(struct device *dev, ssize_t size,
dma_addr_t *dma_handle, void **ret)
{
- struct dma_coherent_mem *mem = dev ? dev->dma_mem : NULL;
+ struct dma_coherent_mem *mem;
int order = get_order(size);
+ int pageno;
- if (mem) {
- int page = bitmap_find_free_region(mem->bitmap, mem->size,
- order);
- if (page >= 0) {
- *dma_handle = mem->device_base + (page << PAGE_SHIFT);
- *ret = mem->virt_base + (page << PAGE_SHIFT);
- memset(*ret, 0, size);
- } else if (mem->flags & DMA_MEMORY_EXCLUSIVE)
- *ret = NULL;
+ if (!dev)
+ return 0;
+ mem = dev->dma_mem;
+ if (!mem)
+ return 0;
+ if (unlikely(size > mem->size))
+ return 0;
+
+ pageno = bitmap_find_free_region(mem->bitmap, mem->size, order);
+ if (pageno >= 0) {
+ /*
+ * Memory was found in the per-device arena.
+ */
+ *dma_handle = mem->device_base + (pageno << PAGE_SHIFT);
+ *ret = mem->virt_base + (pageno << PAGE_SHIFT);
+ memset(*ret, 0, size);
+ } else if (mem->flags & DMA_MEMORY_EXCLUSIVE) {
+ /*
+ * The per-device arena is exhausted and we are not
+ * permitted to fall back to generic memory.
+ */
+ *ret = NULL;
+ } else {
+ /*
+ * The per-device arena is exhausted and we are
+ * permitted to fall back to generic memory.
+ */
+ return 0;
}
- return (mem != NULL);
+ return 1;
}
EXPORT_SYMBOL(dma_alloc_from_coherent);
diff --git a/kernel/exit.c b/kernel/exit.c
index c9e5a1c14e0..c7740fa3252 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -642,35 +642,31 @@ retry:
/*
* We found no owner yet mm_users > 1: this implies that we are
* most likely racing with swapoff (try_to_unuse()) or /proc or
- * ptrace or page migration (get_task_mm()). Mark owner as NULL,
- * so that subsystems can understand the callback and take action.
+ * ptrace or page migration (get_task_mm()). Mark owner as NULL.
*/
- down_write(&mm->mmap_sem);
- cgroup_mm_owner_callbacks(mm->owner, NULL);
mm->owner = NULL;
- up_write(&mm->mmap_sem);
return;
assign_new_owner:
BUG_ON(c == p);
get_task_struct(c);
- read_unlock(&tasklist_lock);
- down_write(&mm->mmap_sem);
/*
* The task_lock protects c->mm from changing.
* We always want mm->owner->mm == mm
*/
task_lock(c);
+ /*
+ * Delay read_unlock() till we have the task_lock()
+ * to ensure that c does not slip away underneath us
+ */
+ read_unlock(&tasklist_lock);
if (c->mm != mm) {
task_unlock(c);
- up_write(&mm->mmap_sem);
put_task_struct(c);
goto retry;
}
- cgroup_mm_owner_callbacks(mm->owner, c);
mm->owner = c;
task_unlock(c);
- up_write(&mm->mmap_sem);
put_task_struct(c);
}
#endif /* CONFIG_MM_OWNER */
@@ -1055,10 +1051,7 @@ NORET_TYPE void do_exit(long code)
preempt_count());
acct_update_integrals(tsk);
- if (tsk->mm) {
- update_hiwater_rss(tsk->mm);
- update_hiwater_vm(tsk->mm);
- }
+
group_dead = atomic_dec_and_test(&tsk->signal->live);
if (group_dead) {
hrtimer_cancel(&tsk->signal->real_timer);
diff --git a/kernel/fork.c b/kernel/fork.c
index 43cbf30669e..7b8f2a78be3 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -400,6 +400,18 @@ __cacheline_aligned_in_smp DEFINE_SPINLOCK(mmlist_lock);
#define allocate_mm() (kmem_cache_alloc(mm_cachep, GFP_KERNEL))
#define free_mm(mm) (kmem_cache_free(mm_cachep, (mm)))
+static unsigned long default_dump_filter = MMF_DUMP_FILTER_DEFAULT;
+
+static int __init coredump_filter_setup(char *s)
+{
+ default_dump_filter =
+ (simple_strtoul(s, NULL, 0) << MMF_DUMP_FILTER_SHIFT) &
+ MMF_DUMP_FILTER_MASK;
+ return 1;
+}
+
+__setup("coredump_filter=", coredump_filter_setup);
+
#include <linux/init_task.h>
static struct mm_struct * mm_init(struct mm_struct * mm, struct task_struct *p)
@@ -408,8 +420,7 @@ static struct mm_struct * mm_init(struct mm_struct * mm, struct task_struct *p)
atomic_set(&mm->mm_count, 1);
init_rwsem(&mm->mmap_sem);
INIT_LIST_HEAD(&mm->mmlist);
- mm->flags = (current->mm) ? current->mm->flags
- : MMF_DUMP_FILTER_DEFAULT;
+ mm->flags = (current->mm) ? current->mm->flags : default_dump_filter;
mm->core_state = NULL;
mm->nr_ptes = 0;
set_mm_counter(mm, file_rss, 0);
@@ -758,7 +769,7 @@ static int copy_sighand(unsigned long clone_flags, struct task_struct *tsk)
{
struct sighand_struct *sig;
- if (clone_flags & (CLONE_SIGHAND | CLONE_THREAD)) {
+ if (clone_flags & CLONE_SIGHAND) {
atomic_inc(&current->sighand->count);
return 0;
}
diff --git a/kernel/kmod.c b/kernel/kmod.c
index b46dbb90866..a27a5f64443 100644
--- a/kernel/kmod.c
+++ b/kernel/kmod.c
@@ -51,8 +51,8 @@ char modprobe_path[KMOD_PATH_LEN] = "/sbin/modprobe";
/**
* request_module - try to load a kernel module
- * @fmt: printf style format string for the name of the module
- * @varargs: arguements as specified in the format string
+ * @fmt: printf style format string for the name of the module
+ * @...: arguments as specified in the format string
*
* Load a module using the user mode module loader. The function returns
* zero on success or a negative errno code on failure. Note that a
diff --git a/kernel/kprobes.c b/kernel/kprobes.c
index 9f8a3f25259..1b9cbdc0127 100644
--- a/kernel/kprobes.c
+++ b/kernel/kprobes.c
@@ -69,7 +69,7 @@ static struct hlist_head kretprobe_inst_table[KPROBE_TABLE_SIZE];
/* NOTE: change this value only with kprobe_mutex held */
static bool kprobe_enabled;
-DEFINE_MUTEX(kprobe_mutex); /* Protects kprobe_table */
+static DEFINE_MUTEX(kprobe_mutex); /* Protects kprobe_table */
static DEFINE_PER_CPU(struct kprobe *, kprobe_instance) = NULL;
static struct {
spinlock_t lock ____cacheline_aligned_in_smp;
@@ -115,6 +115,7 @@ enum kprobe_slot_state {
SLOT_USED = 2,
};
+static DEFINE_MUTEX(kprobe_insn_mutex); /* Protects kprobe_insn_pages */
static struct hlist_head kprobe_insn_pages;
static int kprobe_garbage_slots;
static int collect_garbage_slots(void);
@@ -144,10 +145,10 @@ loop_end:
}
/**
- * get_insn_slot() - Find a slot on an executable page for an instruction.
+ * __get_insn_slot() - Find a slot on an executable page for an instruction.
* We allocate an executable page if there's no room on existing ones.
*/
-kprobe_opcode_t __kprobes *get_insn_slot(void)
+static kprobe_opcode_t __kprobes *__get_insn_slot(void)
{
struct kprobe_insn_page *kip;
struct hlist_node *pos;
@@ -196,6 +197,15 @@ kprobe_opcode_t __kprobes *get_insn_slot(void)
return kip->insns;
}
+kprobe_opcode_t __kprobes *get_insn_slot(void)
+{
+ kprobe_opcode_t *ret;
+ mutex_lock(&kprobe_insn_mutex);
+ ret = __get_insn_slot();
+ mutex_unlock(&kprobe_insn_mutex);
+ return ret;
+}
+
/* Return 1 if all garbages are collected, otherwise 0. */
static int __kprobes collect_one_slot(struct kprobe_insn_page *kip, int idx)
{
@@ -226,9 +236,13 @@ static int __kprobes collect_garbage_slots(void)
{
struct kprobe_insn_page *kip;
struct hlist_node *pos, *next;
+ int safety;
/* Ensure no-one is preepmted on the garbages */
- if (check_safety() != 0)
+ mutex_unlock(&kprobe_insn_mutex);
+ safety = check_safety();
+ mutex_lock(&kprobe_insn_mutex);
+ if (safety != 0)
return -EAGAIN;
hlist_for_each_entry_safe(kip, pos, next, &kprobe_insn_pages, hlist) {
@@ -251,6 +265,7 @@ void __kprobes free_insn_slot(kprobe_opcode_t * slot, int dirty)
struct kprobe_insn_page *kip;
struct hlist_node *pos;
+ mutex_lock(&kprobe_insn_mutex);
hlist_for_each_entry(kip, pos, &kprobe_insn_pages, hlist) {
if (kip->insns <= slot &&
slot < kip->insns + (INSNS_PER_PAGE * MAX_INSN_SIZE)) {
@@ -267,6 +282,8 @@ void __kprobes free_insn_slot(kprobe_opcode_t * slot, int dirty)
if (dirty && ++kprobe_garbage_slots > INSNS_PER_PAGE)
collect_garbage_slots();
+
+ mutex_unlock(&kprobe_insn_mutex);
}
#endif
@@ -310,7 +327,7 @@ static int __kprobes aggr_pre_handler(struct kprobe *p, struct pt_regs *regs)
struct kprobe *kp;
list_for_each_entry_rcu(kp, &p->list, list) {
- if (kp->pre_handler) {
+ if (kp->pre_handler && !kprobe_gone(kp)) {
set_kprobe_instance(kp);
if (kp->pre_handler(kp, regs))
return 1;
@@ -326,7 +343,7 @@ static void __kprobes aggr_post_handler(struct kprobe *p, struct pt_regs *regs,
struct kprobe *kp;
list_for_each_entry_rcu(kp, &p->list, list) {
- if (kp->post_handler) {
+ if (kp->post_handler && !kprobe_gone(kp)) {
set_kprobe_instance(kp);
kp->post_handler(kp, regs, flags);
reset_kprobe_instance();
@@ -393,7 +410,7 @@ void __kprobes recycle_rp_inst(struct kretprobe_instance *ri,
hlist_add_head(&ri->hlist, head);
}
-void kretprobe_hash_lock(struct task_struct *tsk,
+void __kprobes kretprobe_hash_lock(struct task_struct *tsk,
struct hlist_head **head, unsigned long *flags)
{
unsigned long hash = hash_ptr(tsk, KPROBE_HASH_BITS);
@@ -404,13 +421,15 @@ void kretprobe_hash_lock(struct task_struct *tsk,
spin_lock_irqsave(hlist_lock, *flags);
}
-static void kretprobe_table_lock(unsigned long hash, unsigned long *flags)
+static void __kprobes kretprobe_table_lock(unsigned long hash,
+ unsigned long *flags)
{
spinlock_t *hlist_lock = kretprobe_table_lock_ptr(hash);
spin_lock_irqsave(hlist_lock, *flags);
}
-void kretprobe_hash_unlock(struct task_struct *tsk, unsigned long *flags)
+void __kprobes kretprobe_hash_unlock(struct task_struct *tsk,
+ unsigned long *flags)
{
unsigned long hash = hash_ptr(tsk, KPROBE_HASH_BITS);
spinlock_t *hlist_lock;
@@ -419,7 +438,7 @@ void kretprobe_hash_unlock(struct task_struct *tsk, unsigned long *flags)
spin_unlock_irqrestore(hlist_lock, *flags);
}
-void kretprobe_table_unlock(unsigned long hash, unsigned long *flags)
+void __kprobes kretprobe_table_unlock(unsigned long hash, unsigned long *flags)
{
spinlock_t *hlist_lock = kretprobe_table_lock_ptr(hash);
spin_unlock_irqrestore(hlist_lock, *flags);
@@ -526,9 +545,10 @@ static inline void add_aggr_kprobe(struct kprobe *ap, struct kprobe *p)
ap->addr = p->addr;
ap->pre_handler = aggr_pre_handler;
ap->fault_handler = aggr_fault_handler;
- if (p->post_handler)
+ /* We don't care the kprobe which has gone. */
+ if (p->post_handler && !kprobe_gone(p))
ap->post_handler = aggr_post_handler;
- if (p->break_handler)
+ if (p->break_handler && !kprobe_gone(p))
ap->break_handler = aggr_break_handler;
INIT_LIST_HEAD(&ap->list);
@@ -547,17 +567,41 @@ static int __kprobes register_aggr_kprobe(struct kprobe *old_p,
int ret = 0;
struct kprobe *ap;
+ if (kprobe_gone(old_p)) {
+ /*
+ * Attempting to insert new probe at the same location that
+ * had a probe in the module vaddr area which already
+ * freed. So, the instruction slot has already been
+ * released. We need a new slot for the new probe.
+ */
+ ret = arch_prepare_kprobe(old_p);
+ if (ret)
+ return ret;
+ }
if (old_p->pre_handler == aggr_pre_handler) {
copy_kprobe(old_p, p);
ret = add_new_kprobe(old_p, p);
+ ap = old_p;
} else {
ap = kzalloc(sizeof(struct kprobe), GFP_KERNEL);
- if (!ap)
+ if (!ap) {
+ if (kprobe_gone(old_p))
+ arch_remove_kprobe(old_p);
return -ENOMEM;
+ }
add_aggr_kprobe(ap, old_p);
copy_kprobe(ap, p);
ret = add_new_kprobe(ap, p);
}
+ if (kprobe_gone(old_p)) {
+ /*
+ * If the old_p has gone, its breakpoint has been disarmed.
+ * We have to arm it again after preparing real kprobes.
+ */
+ ap->flags &= ~KPROBE_FLAG_GONE;
+ if (kprobe_enabled)
+ arch_arm_kprobe(ap);
+ }
return ret;
}
@@ -600,8 +644,7 @@ static kprobe_opcode_t __kprobes *kprobe_addr(struct kprobe *p)
return (kprobe_opcode_t *)(((char *)addr) + p->offset);
}
-static int __kprobes __register_kprobe(struct kprobe *p,
- unsigned long called_from)
+int __kprobes register_kprobe(struct kprobe *p)
{
int ret = 0;
struct kprobe *old_p;
@@ -620,28 +663,30 @@ static int __kprobes __register_kprobe(struct kprobe *p,
return -EINVAL;
}
- p->mod_refcounted = 0;
-
+ p->flags = 0;
/*
* Check if are we probing a module.
*/
probed_mod = __module_text_address((unsigned long) p->addr);
if (probed_mod) {
- struct module *calling_mod;
- calling_mod = __module_text_address(called_from);
/*
- * We must allow modules to probe themself and in this case
- * avoid incrementing the module refcount, so as to allow
- * unloading of self probing modules.
+ * We must hold a refcount of the probed module while updating
+ * its code to prohibit unexpected unloading.
*/
- if (calling_mod && calling_mod != probed_mod) {
- if (unlikely(!try_module_get(probed_mod))) {
- preempt_enable();
- return -EINVAL;
- }
- p->mod_refcounted = 1;
- } else
- probed_mod = NULL;
+ if (unlikely(!try_module_get(probed_mod))) {
+ preempt_enable();
+ return -EINVAL;
+ }
+ /*
+ * If the module freed .init.text, we couldn't insert
+ * kprobes in there.
+ */
+ if (within_module_init((unsigned long)p->addr, probed_mod) &&
+ probed_mod->state != MODULE_STATE_COMING) {
+ module_put(probed_mod);
+ preempt_enable();
+ return -EINVAL;
+ }
}
preempt_enable();
@@ -668,8 +713,9 @@ static int __kprobes __register_kprobe(struct kprobe *p,
out:
mutex_unlock(&kprobe_mutex);
- if (ret && probed_mod)
+ if (probed_mod)
module_put(probed_mod);
+
return ret;
}
@@ -697,16 +743,16 @@ valid_p:
list_is_singular(&old_p->list))) {
/*
* Only probe on the hash list. Disarm only if kprobes are
- * enabled - otherwise, the breakpoint would already have
- * been removed. We save on flushing icache.
+ * enabled and not gone - otherwise, the breakpoint would
+ * already have been removed. We save on flushing icache.
*/
- if (kprobe_enabled)
+ if (kprobe_enabled && !kprobe_gone(old_p))
arch_disarm_kprobe(p);
hlist_del_rcu(&old_p->hlist);
} else {
- if (p->break_handler)
+ if (p->break_handler && !kprobe_gone(p))
old_p->break_handler = NULL;
- if (p->post_handler) {
+ if (p->post_handler && !kprobe_gone(p)) {
list_for_each_entry_rcu(list_p, &old_p->list, list) {
if ((list_p != p) && (list_p->post_handler))
goto noclean;
@@ -721,39 +767,27 @@ noclean:
static void __kprobes __unregister_kprobe_bottom(struct kprobe *p)
{
- struct module *mod;
struct kprobe *old_p;
- if (p->mod_refcounted) {
- /*
- * Since we've already incremented refcount,
- * we don't need to disable preemption.
- */
- mod = module_text_address((unsigned long)p->addr);
- if (mod)
- module_put(mod);
- }
-
- if (list_empty(&p->list) || list_is_singular(&p->list)) {
- if (!list_empty(&p->list)) {
- /* "p" is the last child of an aggr_kprobe */
- old_p = list_entry(p->list.next, struct kprobe, list);
- list_del(&p->list);
- kfree(old_p);
- }
+ if (list_empty(&p->list))
arch_remove_kprobe(p);
+ else if (list_is_singular(&p->list)) {
+ /* "p" is the last child of an aggr_kprobe */
+ old_p = list_entry(p->list.next, struct kprobe, list);
+ list_del(&p->list);
+ arch_remove_kprobe(old_p);
+ kfree(old_p);
}
}
-static int __register_kprobes(struct kprobe **kps, int num,
- unsigned long called_from)
+int __kprobes register_kprobes(struct kprobe **kps, int num)
{
int i, ret = 0;
if (num <= 0)
return -EINVAL;
for (i = 0; i < num; i++) {
- ret = __register_kprobe(kps[i], called_from);
+ ret = register_kprobe(kps[i]);
if (ret < 0) {
if (i > 0)
unregister_kprobes(kps, i);
@@ -763,26 +797,11 @@ static int __register_kprobes(struct kprobe **kps, int num,
return ret;
}
-/*
- * Registration and unregistration functions for kprobe.
- */
-int __kprobes register_kprobe(struct kprobe *p)
-{
- return __register_kprobes(&p, 1,
- (unsigned long)__builtin_return_address(0));
-}
-
void __kprobes unregister_kprobe(struct kprobe *p)
{
unregister_kprobes(&p, 1);
}
-int __kprobes register_kprobes(struct kprobe **kps, int num)
-{
- return __register_kprobes(kps, num,
- (unsigned long)__builtin_return_address(0));
-}
-
void __kprobes unregister_kprobes(struct kprobe **kps, int num)
{
int i;
@@ -811,8 +830,7 @@ unsigned long __weak arch_deref_entry_point(void *entry)
return (unsigned long)entry;
}
-static int __register_jprobes(struct jprobe **jps, int num,
- unsigned long called_from)
+int __kprobes register_jprobes(struct jprobe **jps, int num)
{
struct jprobe *jp;
int ret = 0, i;
@@ -830,7 +848,7 @@ static int __register_jprobes(struct jprobe **jps, int num,
/* Todo: Verify probepoint is a function entry point */
jp->kp.pre_handler = setjmp_pre_handler;
jp->kp.break_handler = longjmp_break_handler;
- ret = __register_kprobe(&jp->kp, called_from);
+ ret = register_kprobe(&jp->kp);
}
if (ret < 0) {
if (i > 0)
@@ -843,8 +861,7 @@ static int __register_jprobes(struct jprobe **jps, int num,
int __kprobes register_jprobe(struct jprobe *jp)
{
- return __register_jprobes(&jp, 1,
- (unsigned long)__builtin_return_address(0));
+ return register_jprobes(&jp, 1);
}
void __kprobes unregister_jprobe(struct jprobe *jp)
@@ -852,12 +869,6 @@ void __kprobes unregister_jprobe(struct jprobe *jp)
unregister_jprobes(&jp, 1);
}
-int __kprobes register_jprobes(struct jprobe **jps, int num)
-{
- return __register_jprobes(jps, num,
- (unsigned long)__builtin_return_address(0));
-}
-
void __kprobes unregister_jprobes(struct jprobe **jps, int num)
{
int i;
@@ -920,8 +931,7 @@ static int __kprobes pre_handler_kretprobe(struct kprobe *p,
return 0;
}
-static int __kprobes __register_kretprobe(struct kretprobe *rp,
- unsigned long called_from)
+int __kprobes register_kretprobe(struct kretprobe *rp)
{
int ret = 0;
struct kretprobe_instance *inst;
@@ -967,21 +977,20 @@ static int __kprobes __register_kretprobe(struct kretprobe *rp,
rp->nmissed = 0;
/* Establish function entry probe point */
- ret = __register_kprobe(&rp->kp, called_from);
+ ret = register_kprobe(&rp->kp);
if (ret != 0)
free_rp_inst(rp);
return ret;
}
-static int __register_kretprobes(struct kretprobe **rps, int num,
- unsigned long called_from)
+int __kprobes register_kretprobes(struct kretprobe **rps, int num)
{
int ret = 0, i;
if (num <= 0)
return -EINVAL;
for (i = 0; i < num; i++) {
- ret = __register_kretprobe(rps[i], called_from);
+ ret = register_kretprobe(rps[i]);
if (ret < 0) {
if (i > 0)
unregister_kretprobes(rps, i);
@@ -991,23 +1000,11 @@ static int __register_kretprobes(struct kretprobe **rps, int num,
return ret;
}
-int __kprobes register_kretprobe(struct kretprobe *rp)
-{
- return __register_kretprobes(&rp, 1,
- (unsigned long)__builtin_return_address(0));
-}
-
void __kprobes unregister_kretprobe(struct kretprobe *rp)
{
unregister_kretprobes(&rp, 1);
}
-int __kprobes register_kretprobes(struct kretprobe **rps, int num)
-{
- return __register_kretprobes(rps, num,
- (unsigned long)__builtin_return_address(0));
-}
-
void __kprobes unregister_kretprobes(struct kretprobe **rps, int num)
{
int i;
@@ -1055,6 +1052,72 @@ static int __kprobes pre_handler_kretprobe(struct kprobe *p,
#endif /* CONFIG_KRETPROBES */
+/* Set the kprobe gone and remove its instruction buffer. */
+static void __kprobes kill_kprobe(struct kprobe *p)
+{
+ struct kprobe *kp;
+ p->flags |= KPROBE_FLAG_GONE;
+ if (p->pre_handler == aggr_pre_handler) {
+ /*
+ * If this is an aggr_kprobe, we have to list all the
+ * chained probes and mark them GONE.
+ */
+ list_for_each_entry_rcu(kp, &p->list, list)
+ kp->flags |= KPROBE_FLAG_GONE;
+ p->post_handler = NULL;
+ p->break_handler = NULL;
+ }
+ /*
+ * Here, we can remove insn_slot safely, because no thread calls
+ * the original probed function (which will be freed soon) any more.
+ */
+ arch_remove_kprobe(p);
+}
+
+/* Module notifier call back, checking kprobes on the module */
+static int __kprobes kprobes_module_callback(struct notifier_block *nb,
+ unsigned long val, void *data)
+{
+ struct module *mod = data;
+ struct hlist_head *head;
+ struct hlist_node *node;
+ struct kprobe *p;
+ unsigned int i;
+ int checkcore = (val == MODULE_STATE_GOING);
+
+ if (val != MODULE_STATE_GOING && val != MODULE_STATE_LIVE)
+ return NOTIFY_DONE;
+
+ /*
+ * When MODULE_STATE_GOING was notified, both of module .text and
+ * .init.text sections would be freed. When MODULE_STATE_LIVE was
+ * notified, only .init.text section would be freed. We need to
+ * disable kprobes which have been inserted in the sections.
+ */
+ mutex_lock(&kprobe_mutex);
+ for (i = 0; i < KPROBE_TABLE_SIZE; i++) {
+ head = &kprobe_table[i];
+ hlist_for_each_entry_rcu(p, node, head, hlist)
+ if (within_module_init((unsigned long)p->addr, mod) ||
+ (checkcore &&
+ within_module_core((unsigned long)p->addr, mod))) {
+ /*
+ * The vaddr this probe is installed will soon
+ * be vfreed buy not synced to disk. Hence,
+ * disarming the breakpoint isn't needed.
+ */
+ kill_kprobe(p);
+ }
+ }
+ mutex_unlock(&kprobe_mutex);
+ return NOTIFY_DONE;
+}
+
+static struct notifier_block kprobe_module_nb = {
+ .notifier_call = kprobes_module_callback,
+ .priority = 0
+};
+
static int __init init_kprobes(void)
{
int i, err = 0;
@@ -1111,6 +1174,9 @@ static int __init init_kprobes(void)
err = arch_init_kprobes();
if (!err)
err = register_die_notifier(&kprobe_exceptions_nb);
+ if (!err)
+ err = register_module_notifier(&kprobe_module_nb);
+
kprobes_initialized = (err == 0);
if (!err)
@@ -1131,10 +1197,12 @@ static void __kprobes report_probe(struct seq_file *pi, struct kprobe *p,
else
kprobe_type = "k";
if (sym)
- seq_printf(pi, "%p %s %s+0x%x %s\n", p->addr, kprobe_type,
- sym, offset, (modname ? modname : " "));
+ seq_printf(pi, "%p %s %s+0x%x %s %s\n", p->addr, kprobe_type,
+ sym, offset, (modname ? modname : " "),
+ (kprobe_gone(p) ? "[GONE]" : ""));
else
- seq_printf(pi, "%p %s %p\n", p->addr, kprobe_type, p->addr);
+ seq_printf(pi, "%p %s %p %s\n", p->addr, kprobe_type, p->addr,
+ (kprobe_gone(p) ? "[GONE]" : ""));
}
static void __kprobes *kprobe_seq_start(struct seq_file *f, loff_t *pos)
@@ -1215,7 +1283,8 @@ static void __kprobes enable_all_kprobes(void)
for (i = 0; i < KPROBE_TABLE_SIZE; i++) {
head = &kprobe_table[i];
hlist_for_each_entry_rcu(p, node, head, hlist)
- arch_arm_kprobe(p);
+ if (!kprobe_gone(p))
+ arch_arm_kprobe(p);
}
kprobe_enabled = true;
@@ -1244,7 +1313,7 @@ static void __kprobes disable_all_kprobes(void)
for (i = 0; i < KPROBE_TABLE_SIZE; i++) {
head = &kprobe_table[i];
hlist_for_each_entry_rcu(p, node, head, hlist) {
- if (!arch_trampoline_kprobe(p))
+ if (!arch_trampoline_kprobe(p) && !kprobe_gone(p))
arch_disarm_kprobe(p);
}
}
diff --git a/kernel/module.c b/kernel/module.c
index f47cce910f2..496dcb57b60 100644
--- a/kernel/module.c
+++ b/kernel/module.c
@@ -43,7 +43,6 @@
#include <linux/device.h>
#include <linux/string.h>
#include <linux/mutex.h>
-#include <linux/unwind.h>
#include <linux/rculist.h>
#include <asm/uaccess.h>
#include <asm/cacheflush.h>
@@ -1449,8 +1448,6 @@ static void free_module(struct module *mod)
remove_sect_attrs(mod);
mod_kobject_remove(mod);
- unwind_remove_table(mod->unwind_info, 0);
-
/* Arch-specific cleanup. */
module_arch_cleanup(mod);
@@ -1867,7 +1864,6 @@ static noinline struct module *load_module(void __user *umod,
unsigned int symindex = 0;
unsigned int strindex = 0;
unsigned int modindex, versindex, infoindex, pcpuindex;
- unsigned int unwindex = 0;
unsigned int num_kp, num_mcount;
struct kernel_param *kp;
struct module *mod;
@@ -1957,9 +1953,6 @@ static noinline struct module *load_module(void __user *umod,
versindex = find_sec(hdr, sechdrs, secstrings, "__versions");
infoindex = find_sec(hdr, sechdrs, secstrings, ".modinfo");
pcpuindex = find_pcpusec(hdr, sechdrs, secstrings);
-#ifdef ARCH_UNWIND_SECTION_NAME
- unwindex = find_sec(hdr, sechdrs, secstrings, ARCH_UNWIND_SECTION_NAME);
-#endif
/* Don't keep modinfo and version sections. */
sechdrs[infoindex].sh_flags &= ~(unsigned long)SHF_ALLOC;
@@ -1969,8 +1962,6 @@ static noinline struct module *load_module(void __user *umod,
sechdrs[symindex].sh_flags |= SHF_ALLOC;
sechdrs[strindex].sh_flags |= SHF_ALLOC;
#endif
- if (unwindex)
- sechdrs[unwindex].sh_flags |= SHF_ALLOC;
/* Check module struct version now, before we try to use module. */
if (!check_modstruct_version(sechdrs, versindex, mod)) {
@@ -2267,11 +2258,6 @@ static noinline struct module *load_module(void __user *umod,
add_sect_attrs(mod, hdr->e_shnum, secstrings, sechdrs);
add_notes_attrs(mod, hdr->e_shnum, secstrings, sechdrs);
- /* Size of section 0 is 0, so this works well if no unwind info. */
- mod->unwind_info = unwind_add_table(mod,
- (void *)sechdrs[unwindex].sh_addr,
- sechdrs[unwindex].sh_size);
-
/* Get rid of temporary copy */
vfree(hdr);
@@ -2366,11 +2352,12 @@ sys_init_module