aboutsummaryrefslogtreecommitdiff
path: root/kernel/events
diff options
context:
space:
mode:
Diffstat (limited to 'kernel/events')
-rw-r--r--kernel/events/core.c117
-rw-r--r--kernel/events/uprobes.c58
2 files changed, 129 insertions, 46 deletions
diff --git a/kernel/events/core.c b/kernel/events/core.c
index 689237a0c5e..6b17ac1b0c2 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -40,6 +40,7 @@
#include <linux/mm_types.h>
#include <linux/cgroup.h>
#include <linux/module.h>
+#include <linux/mman.h>
#include "internal.h"
@@ -608,7 +609,8 @@ static inline int perf_cgroup_connect(int fd, struct perf_event *event,
if (!f.file)
return -EBADF;
- css = css_tryget_from_dir(f.file->f_dentry, &perf_event_cgrp_subsys);
+ css = css_tryget_online_from_dir(f.file->f_dentry,
+ &perf_event_cgrp_subsys);
if (IS_ERR(css)) {
ret = PTR_ERR(css);
goto out;
@@ -2318,7 +2320,7 @@ static void perf_event_context_sched_out(struct task_struct *task, int ctxn,
next_parent = rcu_dereference(next_ctx->parent_ctx);
/* If neither context have a parent context; they cannot be clones. */
- if (!parent && !next_parent)
+ if (!parent || !next_parent)
goto unlock;
if (next_parent == ctx || next_ctx == parent || next_parent == parent) {
@@ -2973,6 +2975,22 @@ out:
local_irq_restore(flags);
}
+void perf_event_exec(void)
+{
+ struct perf_event_context *ctx;
+ int ctxn;
+
+ rcu_read_lock();
+ for_each_task_context_nr(ctxn) {
+ ctx = current->perf_event_ctxp[ctxn];
+ if (!ctx)
+ continue;
+
+ perf_event_enable_on_exec(ctx);
+ }
+ rcu_read_unlock();
+}
+
/*
* Cross CPU call to read the hardware event
*/
@@ -5074,21 +5092,9 @@ static void perf_event_comm_event(struct perf_comm_event *comm_event)
NULL);
}
-void perf_event_comm(struct task_struct *task)
+void perf_event_comm(struct task_struct *task, bool exec)
{
struct perf_comm_event comm_event;
- struct perf_event_context *ctx;
- int ctxn;
-
- rcu_read_lock();
- for_each_task_context_nr(ctxn) {
- ctx = task->perf_event_ctxp[ctxn];
- if (!ctx)
- continue;
-
- perf_event_enable_on_exec(ctx);
- }
- rcu_read_unlock();
if (!atomic_read(&nr_comm_events))
return;
@@ -5100,7 +5106,7 @@ void perf_event_comm(struct task_struct *task)
.event_id = {
.header = {
.type = PERF_RECORD_COMM,
- .misc = 0,
+ .misc = exec ? PERF_RECORD_MISC_COMM_EXEC : 0,
/* .size */
},
/* .pid */
@@ -5123,6 +5129,7 @@ struct perf_mmap_event {
int maj, min;
u64 ino;
u64 ino_generation;
+ u32 prot, flags;
struct {
struct perf_event_header header;
@@ -5164,6 +5171,8 @@ static void perf_event_mmap_output(struct perf_event *event,
mmap_event->event_id.header.size += sizeof(mmap_event->min);
mmap_event->event_id.header.size += sizeof(mmap_event->ino);
mmap_event->event_id.header.size += sizeof(mmap_event->ino_generation);
+ mmap_event->event_id.header.size += sizeof(mmap_event->prot);
+ mmap_event->event_id.header.size += sizeof(mmap_event->flags);
}
perf_event_header__init_id(&mmap_event->event_id.header, &sample, event);
@@ -5182,6 +5191,8 @@ static void perf_event_mmap_output(struct perf_event *event,
perf_output_put(&handle, mmap_event->min);
perf_output_put(&handle, mmap_event->ino);
perf_output_put(&handle, mmap_event->ino_generation);
+ perf_output_put(&handle, mmap_event->prot);
+ perf_output_put(&handle, mmap_event->flags);
}
__output_copy(&handle, mmap_event->file_name,
@@ -5200,6 +5211,7 @@ static void perf_event_mmap_event(struct perf_mmap_event *mmap_event)
struct file *file = vma->vm_file;
int maj = 0, min = 0;
u64 ino = 0, gen = 0;
+ u32 prot = 0, flags = 0;
unsigned int size;
char tmp[16];
char *buf = NULL;
@@ -5230,6 +5242,28 @@ static void perf_event_mmap_event(struct perf_mmap_event *mmap_event)
gen = inode->i_generation;
maj = MAJOR(dev);
min = MINOR(dev);
+
+ if (vma->vm_flags & VM_READ)
+ prot |= PROT_READ;
+ if (vma->vm_flags & VM_WRITE)
+ prot |= PROT_WRITE;
+ if (vma->vm_flags & VM_EXEC)
+ prot |= PROT_EXEC;
+
+ if (vma->vm_flags & VM_MAYSHARE)
+ flags = MAP_SHARED;
+ else
+ flags = MAP_PRIVATE;
+
+ if (vma->vm_flags & VM_DENYWRITE)
+ flags |= MAP_DENYWRITE;
+ if (vma->vm_flags & VM_MAYEXEC)
+ flags |= MAP_EXECUTABLE;
+ if (vma->vm_flags & VM_LOCKED)
+ flags |= MAP_LOCKED;
+ if (vma->vm_flags & VM_HUGETLB)
+ flags |= MAP_HUGETLB;
+
goto got_name;
} else {
name = (char *)arch_vma_name(vma);
@@ -5270,6 +5304,8 @@ got_name:
mmap_event->min = min;
mmap_event->ino = ino;
mmap_event->ino_generation = gen;
+ mmap_event->prot = prot;
+ mmap_event->flags = flags;
if (!(vma->vm_flags & VM_EXEC))
mmap_event->event_id.header.misc |= PERF_RECORD_MISC_MMAP_DATA;
@@ -5310,6 +5346,8 @@ void perf_event_mmap(struct vm_area_struct *vma)
/* .min (attr_mmap2 only) */
/* .ino (attr_mmap2 only) */
/* .ino_generation (attr_mmap2 only) */
+ /* .prot (attr_mmap2 only) */
+ /* .flags (attr_mmap2 only) */
};
perf_event_mmap_event(&mmap_event);
@@ -6892,10 +6930,6 @@ static int perf_copy_attr(struct perf_event_attr __user *uattr,
if (ret)
return -EFAULT;
- /* disabled for now */
- if (attr->mmap2)
- return -EINVAL;
-
if (attr->__reserved_1)
return -EINVAL;
@@ -7121,6 +7155,13 @@ SYSCALL_DEFINE5(perf_event_open,
}
}
+ if (is_sampling_event(event)) {
+ if (event->pmu->capabilities & PERF_PMU_CAP_NO_INTERRUPT) {
+ err = -ENOTSUPP;
+ goto err_alloc;
+ }
+ }
+
account_event(event);
/*
@@ -7417,7 +7458,19 @@ __perf_event_exit_task(struct perf_event *child_event,
struct perf_event_context *child_ctx,
struct task_struct *child)
{
- perf_remove_from_context(child_event, true);
+ /*
+ * Do not destroy the 'original' grouping; because of the context
+ * switch optimization the original events could've ended up in a
+ * random child task.
+ *
+ * If we were to destroy the original group, all group related
+ * operations would cease to function properly after this random
+ * child dies.
+ *
+ * Do destroy all inherited groups, we don't care about those
+ * and being thorough is better.
+ */
+ perf_remove_from_context(child_event, !!child_event->parent);
/*
* It can happen that the parent exits first, and has events
@@ -7432,8 +7485,8 @@ __perf_event_exit_task(struct perf_event *child_event,
static void perf_event_exit_task_context(struct task_struct *child, int ctxn)
{
- struct perf_event *child_event;
- struct perf_event_context *child_ctx;
+ struct perf_event *child_event, *next;
+ struct perf_event_context *child_ctx, *parent_ctx;
unsigned long flags;
if (likely(!child->perf_event_ctxp[ctxn])) {
@@ -7458,6 +7511,15 @@ static void perf_event_exit_task_context(struct task_struct *child, int ctxn)
raw_spin_lock(&child_ctx->lock);
task_ctx_sched_out(child_ctx);
child->perf_event_ctxp[ctxn] = NULL;
+
+ /*
+ * In order to avoid freeing: child_ctx->parent_ctx->task
+ * under perf_event_context::lock, grab another reference.
+ */
+ parent_ctx = child_ctx->parent_ctx;
+ if (parent_ctx)
+ get_ctx(parent_ctx);
+
/*
* If this context is a clone; unclone it so it can't get
* swapped to another process while we're removing all
@@ -7468,6 +7530,13 @@ static void perf_event_exit_task_context(struct task_struct *child, int ctxn)
raw_spin_unlock_irqrestore(&child_ctx->lock, flags);
/*
+ * Now that we no longer hold perf_event_context::lock, drop
+ * our extra child_ctx->parent_ctx reference.
+ */
+ if (parent_ctx)
+ put_ctx(parent_ctx);
+
+ /*
* Report the task dead after unscheduling the events so that we
* won't get any samples after PERF_RECORD_EXIT. We can however still
* get a few PERF_RECORD_READ events.
@@ -7486,7 +7555,7 @@ static void perf_event_exit_task_context(struct task_struct *child, int ctxn)
*/
mutex_lock(&child_ctx->mutex);
- list_for_each_entry_rcu(child_event, &child_ctx->event_list, event_entry)
+ list_for_each_entry_safe(child_event, next, &child_ctx->event_list, event_entry)
__perf_event_exit_task(child_event, child_ctx, child);
mutex_unlock(&child_ctx->mutex);
diff --git a/kernel/events/uprobes.c b/kernel/events/uprobes.c
index adcd76a9683..6f3254e8c13 100644
--- a/kernel/events/uprobes.c
+++ b/kernel/events/uprobes.c
@@ -36,6 +36,7 @@
#include "../../mm/internal.h" /* munlock_vma_page */
#include <linux/percpu-rwsem.h>
#include <linux/task_work.h>
+#include <linux/shmem_fs.h>
#include <linux/uprobes.h>
@@ -127,7 +128,7 @@ struct xol_area {
*/
static bool valid_vma(struct vm_area_struct *vma, bool is_register)
{
- vm_flags_t flags = VM_HUGETLB | VM_MAYEXEC | VM_SHARED;
+ vm_flags_t flags = VM_HUGETLB | VM_MAYEXEC | VM_MAYSHARE;
if (is_register)
flags |= VM_WRITE;
@@ -279,18 +280,13 @@ static int verify_opcode(struct page *page, unsigned long vaddr, uprobe_opcode_t
* supported by that architecture then we need to modify is_trap_at_addr and
* uprobe_write_opcode accordingly. This would never be a problem for archs
* that have fixed length instructions.
- */
-
-/*
+ *
* uprobe_write_opcode - write the opcode at a given virtual address.
* @mm: the probed process address space.
* @vaddr: the virtual address to store the opcode.
* @opcode: opcode to be written at @vaddr.
*
- * Called with mm->mmap_sem held (for read and with a reference to
- * mm).
- *
- * For mm @mm, write the opcode at @vaddr.
+ * Called with mm->mmap_sem held for write.
* Return 0 (success) or a negative errno.
*/
int uprobe_write_opcode(struct mm_struct *mm, unsigned long vaddr,
@@ -310,21 +306,25 @@ retry:
if (ret <= 0)
goto put_old;
+ ret = anon_vma_prepare(vma);
+ if (ret)
+ goto put_old;
+
ret = -ENOMEM;
new_page = alloc_page_vma(GFP_HIGHUSER_MOVABLE, vma, vaddr);
if (!new_page)
goto put_old;
- __SetPageUptodate(new_page);
+ if (mem_cgroup_charge_anon(new_page, mm, GFP_KERNEL))
+ goto put_new;
+ __SetPageUptodate(new_page);
copy_highpage(new_page, old_page);
copy_to_page(new_page, vaddr, &opcode, UPROBE_SWBP_INSN_SIZE);
- ret = anon_vma_prepare(vma);
- if (ret)
- goto put_new;
-
ret = __replace_page(vma, vaddr, old_page, new_page);
+ if (ret)
+ mem_cgroup_uncharge_page(new_page);
put_new:
page_cache_release(new_page);
@@ -537,14 +537,15 @@ static int __copy_insn(struct address_space *mapping, struct file *filp,
void *insn, int nbytes, loff_t offset)
{
struct page *page;
-
- if (!mapping->a_ops->readpage)
- return -EIO;
/*
- * Ensure that the page that has the original instruction is
- * populated and in page-cache.
+ * Ensure that the page that has the original instruction is populated
+ * and in page-cache. If ->readpage == NULL it must be shmem_mapping(),
+ * see uprobe_register().
*/
- page = read_mapping_page(mapping, offset >> PAGE_CACHE_SHIFT, filp);
+ if (mapping->a_ops->readpage)
+ page = read_mapping_page(mapping, offset >> PAGE_CACHE_SHIFT, filp);
+ else
+ page = shmem_read_mapping_page(mapping, offset >> PAGE_CACHE_SHIFT);
if (IS_ERR(page))
return PTR_ERR(page);
@@ -845,7 +846,7 @@ static void __uprobe_unregister(struct uprobe *uprobe, struct uprobe_consumer *u
{
int err;
- if (!consumer_del(uprobe, uc)) /* WARN? */
+ if (WARN_ON(!consumer_del(uprobe, uc)))
return;
err = register_for_each_vma(uprobe, NULL);
@@ -880,6 +881,9 @@ int uprobe_register(struct inode *inode, loff_t offset, struct uprobe_consumer *
if (!uc->handler && !uc->ret_handler)
return -EINVAL;
+ /* copy_insn() uses read_mapping_page() or shmem_read_mapping_page() */
+ if (!inode->i_mapping->a_ops->readpage && !shmem_mapping(inode->i_mapping))
+ return -EIO;
/* Racy, just to catch the obvious mistakes */
if (offset > i_size_read(inode))
return -EINVAL;
@@ -923,7 +927,7 @@ int uprobe_apply(struct inode *inode, loff_t offset,
int ret = -ENOENT;
uprobe = find_uprobe(inode, offset);
- if (!uprobe)
+ if (WARN_ON(!uprobe))
return ret;
down_write(&uprobe->register_rwsem);
@@ -948,7 +952,7 @@ void uprobe_unregister(struct inode *inode, loff_t offset, struct uprobe_consume
struct uprobe *uprobe;
uprobe = find_uprobe(inode, offset);
- if (!uprobe)
+ if (WARN_ON(!uprobe))
return;
down_write(&uprobe->register_rwsem);
@@ -1361,6 +1365,16 @@ unsigned long __weak uprobe_get_swbp_addr(struct pt_regs *regs)
return instruction_pointer(regs) - UPROBE_SWBP_INSN_SIZE;
}
+unsigned long uprobe_get_trap_addr(struct pt_regs *regs)
+{
+ struct uprobe_task *utask = current->utask;
+
+ if (unlikely(utask && utask->active_uprobe))
+ return utask->vaddr;
+
+ return instruction_pointer(regs);
+}
+
/*
* Called with no locks held.
* Called in context of a exiting or a exec-ing thread.