/*
* Copyright (C) 1995 Linus Torvalds
* Copyright (C) 2001,2002 Andi Kleen, SuSE Labs.
*/
#include <linux/signal.h>
#include <linux/sched.h>
#include <linux/kernel.h>
#include <linux/errno.h>
#include <linux/string.h>
#include <linux/types.h>
#include <linux/ptrace.h>
#include <linux/mman.h>
#include <linux/mm.h>
#include <linux/smp.h>
#include <linux/interrupt.h>
#include <linux/init.h>
#include <linux/tty.h>
#include <linux/vt_kern.h> /* For unblank_screen() */
#include <linux/compiler.h>
#include <linux/highmem.h>
#include <linux/bootmem.h> /* for max_low_pfn */
#include <linux/vmalloc.h>
#include <linux/module.h>
#include <linux/kprobes.h>
#include <linux/uaccess.h>
#include <linux/kdebug.h>
#include <asm/system.h>
#include <asm/desc.h>
#include <asm/segment.h>
#include <asm/pgalloc.h>
#include <asm/smp.h>
#include <asm/tlbflush.h>
#include <asm/proto.h>
#include <asm-generic/sections.h>
/*
* Page fault error code bits
* bit 0 == 0 means no page found, 1 means protection fault
* bit 1 == 0 means read, 1 means write
* bit 2 == 0 means kernel, 1 means user-mode
* bit 3 == 1 means use of reserved bit detected
* bit 4 == 1 means fault was an instruction fetch
*/
#define PF_PROT (1<<0)
#define PF_WRITE (1<<1)
#define PF_USER (1<<2)
#define PF_RSVD (1<<3)
#define PF_INSTR (1<<4)
#ifdef CONFIG_PAGE_FAULT_HANDLERS
static HLIST_HEAD(pf_handlers); /* protected by RCU */
static DEFINE_SPINLOCK(pf_handlers_writer);
void register_page_fault_handler(struct pf_handler *new_pfh)
{
unsigned long flags;
spin_lock_irqsave(&pf_handlers_writer, flags);
hlist_add_head_rcu(&new_pfh->hlist, &pf_handlers);
spin_unlock_irqrestore(&pf_handlers_writer, flags);
}
EXPORT_SYMBOL_GPL(register_page_fault_handler);
/**
* unregister_page_fault_handler:
* The caller must ensure @old_pfh is not in use anymore before freeing it.
* This function does not guarantee it. The list of handlers is protected by
* RCU, so you can do this by e.g. calling synchronize_rcu().
*/
void unregister_page_fault_handler(struct pf_handler *old_pfh)
{
unsigned long flags;
spin_lock_irqsave(&pf_handlers_writer, flags);
hlist_del_rcu(&old_pfh->hlist);
spin_unlock_irqrestore(&pf_handlers_writer, flags);
}
EXPORT_SYMBOL_GPL(unregister_page_fault_handler);
#endif
/* returns non-zero if do_page_fault() should return */
static int handle_custom_pf(struct pt_regs *regs, unsigned long error_code,
unsigned long address)
{
#ifdef CONFIG_PAGE_FAULT_HANDLERS
int ret = 0;
struct pf_handler *cur;
struct hlist_node *ncur;
if (hlist_empty(&pf_handlers))
return 0;
rcu_read_lock();
hlist_for_each_entry_rcu(cur, ncur, &pf_handlers, hlist) {
ret = cur->handler(regs, error_code, address);
if (ret)
break;
}
rcu_read_unlock();
return ret;
#else
return 0;
#endif
}
static inline int notify_page_fault(struct pt_regs *regs)
{
#ifdef CONFIG_KPROBES
int ret = 0;
/* kprobe_running() needs smp_processor_id() */
#ifdef CONFIG_X86_32
if (!user_mode_vm(regs)) {
#else
if (!user_mode(regs)) {
#endif
preempt_disable();
if (kprobe_running() && kprobe_fault_handler(regs, 14))
ret = 1;
preempt_enable();
}
return ret;
#else
return 0;
#endif
}
/*
* X86_32
* Sometimes AMD Athlon/Opteron CPUs report invalid exceptions on prefetch.
* Check that here and ignore it.
*
* X86_64
* Sometimes the CPU reports invalid exceptions on prefetch.
* Check that here and ignore it.
*
* Opcode checker based on code by Richard Brunner
*/
static int is_prefetch(struct pt_regs *regs, unsigned long addr,
unsigned long error_code)
{
unsigned char *instr;
int scan_more = 1;
int prefetch = 0;
unsigned char *max_instr;
/*
* If it was a exec (instruction fetch) fault on NX page, then
* do not ignore the fault:
*/
if (error_code & PF_INSTR)
return 0;
instr = (unsigned char *)convert_ip_to_linear(current, regs);
max_instr = instr + 15;
if (user_mode(regs) && instr >= (unsigned char *)TASK_SIZE)
return 0;
while (scan_more && instr < max_instr) {
unsigned char opcode;
unsigned char instr_hi;
unsigned char instr_lo;
if (probe_kernel_address(instr, opcode))
break;
instr_hi = opcode & 0xf0;
instr_lo = opcode & 0x0f;
instr++;
switch (instr_hi) {
case 0x20:
case 0x30:
/*
* Values 0x26,0x2E,0x36,0x3E are valid x86 prefixes.
* In X86_64 long mode, the CPU will signal invalid
* opcode if some of these prefixes are present so
* X86_64 will never get here anyway
*/
scan_more = ((instr_lo & 7)