From 3cb4a0bb1e773e3c41800b33a3f7dab32bd06c64 Mon Sep 17 00:00:00 2001 From: "Kawai, Hidehiro" Date: Thu, 19 Jul 2007 01:48:28 -0700 Subject: coredump masking: add an interface for core dump filter This patch adds an interface to set/reset flags which determines each memory segment should be dumped or not when a core file is generated. /proc//coredump_filter file is provided to access the flags. You can change the flag status for a particular process by writing to or reading from the file. The flag status is inherited to the child process when it is created. Signed-off-by: Hidehiro Kawai Cc: Alan Cox Cc: David Howells Cc: Hugh Dickins Cc: Nick Piggin Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/fork.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'kernel/fork.c') diff --git a/kernel/fork.c b/kernel/fork.c index ba39bdb2a7b..46983899822 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -334,6 +334,8 @@ static struct mm_struct * mm_init(struct mm_struct * mm) atomic_set(&mm->mm_count, 1); init_rwsem(&mm->mmap_sem); INIT_LIST_HEAD(&mm->mmlist); + mm->flags = (current->mm) ? current->mm->flags + : MMF_DUMP_FILTER_DEFAULT; mm->core_waiters = 0; mm->nr_ptes = 0; set_mm_counter(mm, file_rss, 0); -- cgit v1.2.3-70-g09d2 From 5992b6dac0d23a2b51a1ccbaf8f1a2e62097b12b Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Thu, 19 Jul 2007 01:49:21 -0700 Subject: lguest: export symbols for lguest as a module lguest does some fairly lowlevel things to support a host, which normal modules don't need: math_state_restore: When the guest triggers a Device Not Available fault, we need to be able to restore the FPU __put_task_struct: We need to hold a reference to another task for inter-guest I/O, and put_task_struct() is an inline function which calls __put_task_struct. access_process_vm: We need to access another task for inter-guest I/O. map_vm_area & __get_vm_area: We need to map the switcher shim (ie. monitor) at 0xFFC01000. Signed-off-by: Rusty Russell Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/i386/kernel/traps.c | 1 + kernel/fork.c | 1 + mm/memory.c | 1 + mm/vmalloc.c | 2 ++ 4 files changed, 5 insertions(+) (limited to 'kernel/fork.c') diff --git a/arch/i386/kernel/traps.c b/arch/i386/kernel/traps.c index d32fd4b6f78..109ebbcde58 100644 --- a/arch/i386/kernel/traps.c +++ b/arch/i386/kernel/traps.c @@ -1056,6 +1056,7 @@ asmlinkage void math_state_restore(void) thread->status |= TS_USEDFPU; /* So we fnsave on switch_to() */ tsk->fpu_counter++; } +EXPORT_SYMBOL_GPL(math_state_restore); #ifndef CONFIG_MATH_EMULATION diff --git a/kernel/fork.c b/kernel/fork.c index 46983899822..e7a2d995b08 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -127,6 +127,7 @@ void __put_task_struct(struct task_struct *tsk) if (!profile_handoff_task(tsk)) free_task(tsk); } +EXPORT_SYMBOL_GPL(__put_task_struct); void __init fork_init(unsigned long mempages) { diff --git a/mm/memory.c b/mm/memory.c index 50dd3d1f4d1..8aace3db3a5 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -2865,3 +2865,4 @@ int access_process_vm(struct task_struct *tsk, unsigned long addr, void *buf, in return buf - old_buf; } +EXPORT_SYMBOL_GPL(access_process_vm); diff --git a/mm/vmalloc.c b/mm/vmalloc.c index 213d5e5079f..3cee76a8c9f 100644 --- a/mm/vmalloc.c +++ b/mm/vmalloc.c @@ -164,6 +164,7 @@ int map_vm_area(struct vm_struct *area, pgprot_t prot, struct page ***pages) flush_cache_vmap((unsigned long) area->addr, end); return err; } +EXPORT_SYMBOL_GPL(map_vm_area); static struct vm_struct *__get_vm_area_node(unsigned long size, unsigned long flags, unsigned long start, unsigned long end, @@ -242,6 +243,7 @@ struct vm_struct *__get_vm_area(unsigned long size, unsigned long flags, { return __get_vm_area_node(size, flags, start, end, -1, GFP_KERNEL); } +EXPORT_SYMBOL_GPL(__get_vm_area); /** * get_vm_area - reserve a contingous kernel virtual area -- cgit v1.2.3-70-g09d2 From d7e28ffe6c74416b54345d6004fd0964c115b12c Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Thu, 19 Jul 2007 01:49:23 -0700 Subject: lguest: the host code This is the code for the "lg.ko" module, which allows lguest guests to be launched. [akpm@linux-foundation.org: update for futex-new-private-futexes] [akpm@linux-foundation.org: build fix] [jmorris@namei.org: lguest: use hrtimers] [akpm@linux-foundation.org: x86_64 build fix] Signed-off-by: Rusty Russell Cc: Andi Kleen Cc: Eric Dumazet Cc: Thomas Gleixner Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/i386/kernel/tsc.c | 4 +- arch/x86_64/kernel/tsc.c | 2 +- drivers/lguest/core.c | 462 ++++++++++++++++++++++++++++++++++ drivers/lguest/hypercalls.c | 192 ++++++++++++++ drivers/lguest/interrupts_and_traps.c | 268 ++++++++++++++++++++ drivers/lguest/io.c | 399 +++++++++++++++++++++++++++++ drivers/lguest/lg.h | 261 +++++++++++++++++++ drivers/lguest/lguest.c | 125 +++++++-- drivers/lguest/lguest_asm.S | 5 +- drivers/lguest/lguest_user.c | 236 +++++++++++++++++ drivers/lguest/page_tables.c | 411 ++++++++++++++++++++++++++++++ drivers/lguest/segments.c | 125 +++++++++ drivers/lguest/switcher.S | 159 ++++++++++++ include/asm-i386/tsc.h | 1 + include/linux/lguest.h | 12 +- include/linux/lguest_launcher.h | 73 ++++++ kernel/fork.c | 1 - 17 files changed, 2702 insertions(+), 34 deletions(-) create mode 100644 drivers/lguest/core.c create mode 100644 drivers/lguest/hypercalls.c create mode 100644 drivers/lguest/interrupts_and_traps.c create mode 100644 drivers/lguest/io.c create mode 100644 drivers/lguest/lg.h create mode 100644 drivers/lguest/lguest_user.c create mode 100644 drivers/lguest/page_tables.c create mode 100644 drivers/lguest/segments.c create mode 100644 drivers/lguest/switcher.S create mode 100644 include/linux/lguest_launcher.h (limited to 'kernel/fork.c') diff --git a/arch/i386/kernel/tsc.c b/arch/i386/kernel/tsc.c index 252f9010f28..debd7dbb415 100644 --- a/arch/i386/kernel/tsc.c +++ b/arch/i386/kernel/tsc.c @@ -27,6 +27,7 @@ static int tsc_enabled; * an extra value to store the TSC freq */ unsigned int tsc_khz; +EXPORT_SYMBOL_GPL(tsc_khz); int tsc_disable; @@ -58,10 +59,11 @@ __setup("notsc", tsc_setup); */ static int tsc_unstable; -static inline int check_tsc_unstable(void) +int check_tsc_unstable(void) { return tsc_unstable; } +EXPORT_SYMBOL_GPL(check_tsc_unstable); /* Accellerators for sched_clock() * convert from cycles(64bits) => nanoseconds (64bits) diff --git a/arch/x86_64/kernel/tsc.c b/arch/x86_64/kernel/tsc.c index 48f9a8e6aa9..e850aa01e1b 100644 --- a/arch/x86_64/kernel/tsc.c +++ b/arch/x86_64/kernel/tsc.c @@ -44,7 +44,7 @@ unsigned long long sched_clock(void) static int tsc_unstable; -static inline int check_tsc_unstable(void) +inline int check_tsc_unstable(void) { return tsc_unstable; } diff --git a/drivers/lguest/core.c b/drivers/lguest/core.c new file mode 100644 index 00000000000..ce909ec5749 --- /dev/null +++ b/drivers/lguest/core.c @@ -0,0 +1,462 @@ +/* World's simplest hypervisor, to test paravirt_ops and show + * unbelievers that virtualization is the future. Plus, it's fun! */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "lg.h" + +/* Found in switcher.S */ +extern char start_switcher_text[], end_switcher_text[], switch_to_guest[]; +extern unsigned long default_idt_entries[]; + +/* Every guest maps the core switcher code. */ +#define SHARED_SWITCHER_PAGES \ + DIV_ROUND_UP(end_switcher_text - start_switcher_text, PAGE_SIZE) +/* Pages for switcher itself, then two pages per cpu */ +#define TOTAL_SWITCHER_PAGES (SHARED_SWITCHER_PAGES + 2 * NR_CPUS) + +/* We map at -4M for ease of mapping into the guest (one PTE page). */ +#define SWITCHER_ADDR 0xFFC00000 + +static struct vm_struct *switcher_vma; +static struct page **switcher_page; + +static int cpu_had_pge; +static struct { + unsigned long offset; + unsigned short segment; +} lguest_entry; + +/* This One Big lock protects all inter-guest data structures. */ +DEFINE_MUTEX(lguest_lock); +static DEFINE_PER_CPU(struct lguest *, last_guest); + +/* FIXME: Make dynamic. */ +#define MAX_LGUEST_GUESTS 16 +struct lguest lguests[MAX_LGUEST_GUESTS]; + +/* Offset from where switcher.S was compiled to where we've copied it */ +static unsigned long switcher_offset(void) +{ + return SWITCHER_ADDR - (unsigned long)start_switcher_text; +} + +/* This cpu's struct lguest_pages. */ +static struct lguest_pages *lguest_pages(unsigned int cpu) +{ + return &(((struct lguest_pages *) + (SWITCHER_ADDR + SHARED_SWITCHER_PAGES*PAGE_SIZE))[cpu]); +} + +static __init int map_switcher(void) +{ + int i, err; + struct page **pagep; + + switcher_page = kmalloc(sizeof(switcher_page[0])*TOTAL_SWITCHER_PAGES, + GFP_KERNEL); + if (!switcher_page) { + err = -ENOMEM; + goto out; + } + + for (i = 0; i < TOTAL_SWITCHER_PAGES; i++) { + unsigned long addr = get_zeroed_page(GFP_KERNEL); + if (!addr) { + err = -ENOMEM; + goto free_some_pages; + } + switcher_page[i] = virt_to_page(addr); + } + + switcher_vma = __get_vm_area(TOTAL_SWITCHER_PAGES * PAGE_SIZE, + VM_ALLOC, SWITCHER_ADDR, VMALLOC_END); + if (!switcher_vma) { + err = -ENOMEM; + printk("lguest: could not map switcher pages high\n"); + goto free_pages; + } + + pagep = switcher_page; + err = map_vm_area(switcher_vma, PAGE_KERNEL, &pagep); + if (err) { + printk("lguest: map_vm_area failed: %i\n", err); + goto free_vma; + } + memcpy(switcher_vma->addr, start_switcher_text, + end_switcher_text - start_switcher_text); + + /* Fix up IDT entries to point into copied text. */ + for (i = 0; i < IDT_ENTRIES; i++) + default_idt_entries[i] += switcher_offset(); + + for_each_possible_cpu(i) { + struct lguest_pages *pages = lguest_pages(i); + struct lguest_ro_state *state = &pages->state; + + /* These fields are static: rest done in copy_in_guest_info */ + state->host_gdt_desc.size = GDT_SIZE-1; + state->host_gdt_desc.address = (long)get_cpu_gdt_table(i); + store_idt(&state->host_idt_desc); + state->guest_idt_desc.size = sizeof(state->guest_idt)-1; + state->guest_idt_desc.address = (long)&state->guest_idt; + state->guest_gdt_desc.size = sizeof(state->guest_gdt)-1; + state->guest_gdt_desc.address = (long)&state->guest_gdt; + state->guest_tss.esp0 = (long)(&pages->regs + 1); + state->guest_tss.ss0 = LGUEST_DS; + /* No I/O for you! */ + state->guest_tss.io_bitmap_base = sizeof(state->guest_tss); + setup_default_gdt_entries(state); + setup_default_idt_entries(state, default_idt_entries); + + /* Setup LGUEST segments on all cpus */ + get_cpu_gdt_table(i)[GDT_ENTRY_LGUEST_CS] = FULL_EXEC_SEGMENT; + get_cpu_gdt_table(i)[GDT_ENTRY_LGUEST_DS] = FULL_SEGMENT; + } + + /* Initialize entry point into switcher. */ + lguest_entry.offset = (long)switch_to_guest + switcher_offset(); + lguest_entry.segment = LGUEST_CS; + + printk(KERN_INFO "lguest: mapped switcher at %p\n", + switcher_vma->addr); + return 0; + +free_vma: + vunmap(switcher_vma->addr); +free_pages: + i = TOTAL_SWITCHER_PAGES; +free_some_pages: + for (--i; i >= 0; i--) + __free_pages(switcher_page[i], 0); + kfree(switcher_page); +out: + return err; +} + +static void unmap_switcher(void) +{ + unsigned int i; + + vunmap(switcher_vma->addr); + for (i = 0; i < TOTAL_SWITCHER_PAGES; i++) + __free_pages(switcher_page[i], 0); +} + +/* IN/OUT insns: enough to get us past boot-time probing. */ +static int emulate_insn(struct lguest *lg) +{ + u8 insn; + unsigned int insnlen = 0, in = 0, shift = 0; + unsigned long physaddr = guest_pa(lg, lg->regs->eip); + + /* This only works for addresses in linear mapping... */ + if (lg->regs->eip < lg->page_offset) + return 0; + lgread(lg, &insn, physaddr, 1); + + /* Operand size prefix means it's actually for ax. */ + if (insn == 0x66) { + shift = 16; + insnlen = 1; + lgread(lg, &insn, physaddr + insnlen, 1); + } + + switch (insn & 0xFE) { + case 0xE4: /* in ,%al */ + insnlen += 2; + in = 1; + break; + case 0xEC: /* in (%dx),%al */ + insnlen += 1; + in = 1; + break; + case 0xE6: /* out %al, */ + insnlen += 2; + break; + case 0xEE: /* out %al,(%dx) */ + insnlen += 1; + break; + default: + return 0; + } + + if (in) { + /* Lower bit tells is whether it's a 16 or 32 bit access */ + if (insn & 0x1) + lg->regs->eax = 0xFFFFFFFF; + else + lg->regs->eax |= (0xFFFF << shift); + } + lg->regs->eip += insnlen; + return 1; +} + +int lguest_address_ok(const struct lguest *lg, + unsigned long addr, unsigned long len) +{ + return (addr+len) / PAGE_SIZE < lg->pfn_limit && (addr+len >= addr); +} + +/* Just like get_user, but don't let guest access lguest binary. */ +u32 lgread_u32(struct lguest *lg, unsigned long addr) +{ + u32 val = 0; + + /* Don't let them access lguest binary */ + if (!lguest_address_ok(lg, addr, sizeof(val)) + || get_user(val, (u32 __user *)addr) != 0) + kill_guest(lg, "bad read address %#lx", addr); + return val; +} + +void lgwrite_u32(struct lguest *lg, unsigned long addr, u32 val) +{ + if (!lguest_address_ok(lg, addr, sizeof(val)) + || put_user(val, (u32 __user *)addr) != 0) + kill_guest(lg, "bad write address %#lx", addr); +} + +void lgread(struct lguest *lg, void *b, unsigned long addr, unsigned bytes) +{ + if (!lguest_address_ok(lg, addr, bytes) + || copy_from_user(b, (void __user *)addr, bytes) != 0) { + /* copy_from_user should do this, but as we rely on it... */ + memset(b, 0, bytes); + kill_guest(lg, "bad read address %#lx len %u", addr, bytes); + } +} + +void lgwrite(struct lguest *lg, unsigned long addr, const void *b, + unsigned bytes) +{ + if (!lguest_address_ok(lg, addr, bytes) + || copy_to_user((void __user *)addr, b, bytes) != 0) + kill_guest(lg, "bad write address %#lx len %u", addr, bytes); +} + +static void set_ts(void) +{ + u32 cr0; + + cr0 = read_cr0(); + if (!(cr0 & 8)) + write_cr0(cr0|8); +} + +static void copy_in_guest_info(struct lguest *lg, struct lguest_pages *pages) +{ + if (__get_cpu_var(last_guest) != lg || lg->last_pages != pages) { + __get_cpu_var(last_guest) = lg; + lg->last_pages = pages; + lg->changed = CHANGED_ALL; + } + + /* These are pretty cheap, so we do them unconditionally. */ + pages->state.host_cr3 = __pa(current->mm->pgd); + map_switcher_in_guest(lg, pages); + pages->state.guest_tss.esp1 = lg->esp1; + pages->state.guest_tss.ss1 = lg->ss1; + + /* Copy direct trap entries. */ + if (lg->changed & CHANGED_IDT) + copy_traps(lg, pages->state.guest_idt, default_idt_entries); + + /* Copy all GDT entries but the TSS. */ + if (lg->changed & CHANGED_GDT) + copy_gdt(lg, pages->state.guest_gdt); + /* If only the TLS entries have changed, copy them. */ + else if (lg->changed & CHANGED_GDT_TLS) + copy_gdt_tls(lg, pages->state.guest_gdt); + + lg->changed = 0; +} + +static void run_guest_once(struct lguest *lg, struct lguest_pages *pages) +{ + unsigned int clobber; + + copy_in_guest_info(lg, pages); + + /* Put eflags on stack, lcall does rest: suitable for iret return. */ + asm volatile("pushf; lcall *lguest_entry" + : "=a"(clobber), "=b"(clobber) + : "0"(pages), "1"(__pa(lg->pgdirs[lg->pgdidx].pgdir)) + : "memory", "%edx", "%ecx", "%edi", "%esi"); +} + +int run_guest(struct lguest *lg, unsigned long __user *user) +{ + while (!lg->dead) { + unsigned int cr2 = 0; /* Damn gcc */ + + /* Hypercalls first: we might have been out to userspace */ + do_hypercalls(lg); + if (lg->dma_is_pending) { + if (put_user(lg->pending_dma, user) || + put_user(lg->pending_key, user+1)) + return -EFAULT; + return sizeof(unsigned long)*2; + } + + if (signal_pending(current)) + return -ERESTARTSYS; + + /* If Waker set break_out, return to Launcher. */ + if (lg->break_out) + return -EAGAIN; + + maybe_do_interrupt(lg); + + try_to_freeze(); + + if (lg->dead) + break; + + if (lg->halted) { + set_current_state(TASK_INTERRUPTIBLE); + schedule(); + continue; + } + + local_irq_disable(); + + /* Even if *we* don't want FPU trap, guest might... */ + if (lg->ts) + set_ts(); + + /* Don't let Guest do SYSENTER: we can't handle it. */ + if (boot_cpu_has(X86_FEATURE_SEP)) + wrmsr(MSR_IA32_SYSENTER_CS, 0, 0); + + run_guest_once(lg, lguest_pages(raw_smp_processor_id())); + + /* Save cr2 now if we page-faulted. */ + if (lg->regs->trapnum == 14) + cr2 = read_cr2(); + else if (lg->regs->trapnum == 7) + math_state_restore(); + + if (boot_cpu_has(X86_FEATURE_SEP)) + wrmsr(MSR_IA32_SYSENTER_CS, __KERNEL_CS, 0); + local_irq_enable(); + + switch (lg->regs->trapnum) { + case 13: /* We've intercepted a GPF. */ + if (lg->regs->errcode == 0) { + if (emulate_insn(lg)) + continue; + } + break; + case 14: /* We've intercepted a page fault. */ + if (demand_page(lg, cr2, lg->regs->errcode)) + continue; + + /* If lguest_data is NULL, this won't hurt. */ + if (put_user(cr2, &lg->lguest_data->cr2)) + kill_guest(lg, "Writing cr2"); + break; + case 7: /* We've intercepted a Device Not Available fault. */ + /* If they don't want to know, just absorb it. */ + if (!lg->ts) + continue; + break; + case 32 ... 255: /* Real interrupt, fall thru */ + cond_resched(); + case LGUEST_TRAP_ENTRY: /* Handled at top of loop */ + continue; + } + + if (deliver_trap(lg, lg->regs->trapnum)) + continue; + + kill_guest(lg, "unhandled trap %li at %#lx (%#lx)", + lg->regs->trapnum, lg->regs->eip, + lg->regs->trapnum == 14 ? cr2 : lg->regs->errcode); + } + return -ENOENT; +} + +int find_free_guest(void) +{ + unsigned int i; + for (i = 0; i < MAX_LGUEST_GUESTS; i++) + if (!lguests[i].tsk) + return i; + return -1; +} + +static void adjust_pge(void *on) +{ + if (on) + write_cr4(read_cr4() | X86_CR4_PGE); + else + write_cr4(read_cr4() & ~X86_CR4_PGE); +} + +static int __init init(void) +{ + int err; + + if (paravirt_enabled()) { + printk("lguest is afraid of %s\n", paravirt_ops.name); + return -EPERM; + } + + err = map_switcher(); + if (err) + return err; + + err = init_pagetables(switcher_page, SHARED_SWITCHER_PAGES); + if (err) { + unmap_switcher(); + return err; + } + lguest_io_init(); + + err = lguest_device_init(); + if (err) { + free_pagetables(); + unmap_switcher(); + return err; + } + lock_cpu_hotplug(); + if (cpu_has_pge) { /* We have a broader idea of "global". */ + cpu_had_pge = 1; + on_each_cpu(adjust_pge, (void *)0, 0, 1); + clear_bit(X86_FEATURE_PGE, boot_cpu_data.x86_capability); + } + unlock_cpu_hotplug(); + return 0; +} + +static void __exit fini(void) +{ + lguest_device_remove(); + free_pagetables(); + unmap_switcher(); + lock_cpu_hotplug(); + if (cpu_had_pge) { + set_bit(X86_FEATURE_PGE, boot_cpu_data.x86_capability); + on_each_cpu(adjust_pge, (void *)1, 0, 1); + } + unlock_cpu_hotplug(); +} + +module_init(init); +module_exit(fini); +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Rusty Russell "); diff --git a/drivers/lguest/hypercalls.c b/drivers/lguest/hypercalls.c new file mode 100644 index 00000000000..ea52ca451f7 --- /dev/null +++ b/drivers/lguest/hypercalls.c @@ -0,0 +1,192 @@ +/* Actual hypercalls, which allow guests to actually do something. + Copyright (C) 2006 Rusty Russell IBM Corporation + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA +*/ +#include +#include +#include +#include +#include +#include +#include "lg.h" + +static void do_hcall(struct lguest *lg, struct lguest_regs *regs) +{ + switch (regs->eax) { + case LHCALL_FLUSH_ASYNC: + break; + case LHCALL_LGUEST_INIT: + kill_guest(lg, "already have lguest_data"); + break; + case LHCALL_CRASH: { + char msg[128]; + lgread(lg, msg, regs->edx, sizeof(msg)); + msg[sizeof(msg)-1] = '\0'; + kill_guest(lg, "CRASH: %s", msg); + break; + } + case LHCALL_FLUSH_TLB: + if (regs->edx) + guest_pagetable_clear_all(lg); + else + guest_pagetable_flush_user(lg); + break; + case LHCALL_GET_WALLCLOCK: { + struct timespec ts; + ktime_get_real_ts(&ts); + regs->eax = ts.tv_sec; + break; + } + case LHCALL_BIND_DMA: + regs->eax = bind_dma(lg, regs->edx, regs->ebx, + regs->ecx >> 8, regs->ecx & 0xFF); + break; + case LHCALL_SEND_DMA: + send_dma(lg, regs->edx, regs->ebx); + break; + case LHCALL_LOAD_GDT: + load_guest_gdt(lg, regs->edx, regs->ebx); + break; + case LHCALL_LOAD_IDT_ENTRY: + load_guest_idt_entry(lg, regs->edx, regs->ebx, regs->ecx); + break; + case LHCALL_NEW_PGTABLE: + guest_new_pagetable(lg, regs->edx); + break; + case LHCALL_SET_STACK: + guest_set_stack(lg, regs->edx, regs->ebx, regs->ecx); + break; + case LHCALL_SET_PTE: + guest_set_pte(lg, regs->edx, regs->ebx, mkgpte(regs->ecx)); + break; + case LHCALL_SET_PMD: + guest_set_pmd(lg, regs->edx, regs->ebx); + break; + case LHCALL_LOAD_TLS: + guest_load_tls(lg, regs->edx); + break; + case LHCALL_SET_CLOCKEVENT: + guest_set_clockevent(lg, regs->edx); + break; + case LHCALL_TS: + lg->ts = regs->edx; + break; + case LHCALL_HALT: + lg->halted = 1; + break; + default: + kill_guest(lg, "Bad hypercall %li\n", regs->eax); + } +} + +/* We always do queued calls before actual hypercall. */ +static void do_async_hcalls(struct lguest *lg) +{ + unsigned int i; + u8 st[LHCALL_RING_SIZE]; + + if (copy_from_user(&st, &lg->lguest_data->hcall_status, sizeof(st))) + return; + + for (i = 0; i < ARRAY_SIZE(st); i++) { + struct lguest_regs regs; + unsigned int n = lg->next_hcall; + + if (st[n] == 0xFF) + break; + + if (++lg->next_hcall == LHCALL_RING_SIZE) + lg->next_hcall = 0; + + if (get_user(regs.eax, &lg->lguest_data->hcalls[n].eax) + || get_user(regs.edx, &lg->lguest_data->hcalls[n].edx) + || get_user(regs.ecx, &lg->lguest_data->hcalls[n].ecx) + || get_user(regs.ebx, &lg->lguest_data->hcalls[n].ebx)) { + kill_guest(lg, "Fetching async hypercalls"); + break; + } + + do_hcall(lg, ®s); + if (put_user(0xFF, &lg->lguest_data->hcall_status[n])) { + kill_guest(lg, "Writing result for async hypercall"); + break; + } + + if (lg->dma_is_pending) + break; + } +} + +static void initialize(struct lguest *lg) +{ + u32 tsc_speed; + + if (lg->regs->eax != LHCALL_LGUEST_INIT) { + kill_guest(lg, "hypercall %li before LGUEST_INIT", + lg->regs->eax); + return; + } + + /* We only tell the guest to use the TSC if it's reliable. */ + if (boot_cpu_has(X86_FEATURE_CONSTANT_TSC) && !check_tsc_unstable()) + tsc_speed = tsc_khz; + else + tsc_speed = 0; + + lg->lguest_data = (struct lguest_data __user *)lg->regs->edx; + /* We check here so we can simply copy_to_user/from_user */ + if (!lguest_address_ok(lg, lg->regs->edx, sizeof(*lg->lguest_data))) { + kill_guest(lg, "bad guest page %p", lg->lguest_data); + return; + } + if (get_user(lg->noirq_start, &lg->lguest_data->noirq_start) + || get_user(lg->noirq_end, &lg->lguest_data->noirq_end) + /* We reserve the top pgd entry. */ + || put_user(4U*1024*1024, &lg->lguest_data->reserve_mem) + || put_user(tsc_speed, &lg->lguest_data->tsc_khz) + || put_user(lg->guestid, &lg->lguest_data->guestid)) + kill_guest(lg, "bad guest page %p", lg->lguest_data); + + /* This is the one case where the above accesses might have + * been the first write to a Guest page. This may have caused + * a copy-on-write fault, but the Guest might be referring to + * the old (read-only) page. */ + guest_pagetable_clear_all(lg); +} + +/* Even if we go out to userspace and come back, we don't want to do + * the hypercall again. */ +static void clear_hcall(struct lguest *lg) +{ + lg->regs->trapnum = 255; +} + +void do_hypercalls(struct lguest *lg) +{ + if (unlikely(!lg->lguest_data)) { + if (lg->regs->trapnum == LGUEST_TRAP_ENTRY) { + initialize(lg); + clear_hcall(lg); + } + return; + } + + do_async_hcalls(lg); + if (!lg->dma_is_pending && lg->regs->trapnum == LGUEST_TRAP_ENTRY) { + do_hcall(lg, lg->regs); + clear_hcall(lg); + } +} diff --git a/drivers/lguest/interrupts_and_traps.c b/drivers/lguest/interrupts_and_traps.c new file mode 100644 index 00000000000..d9de5bbc613 --- /dev/null +++ b/drivers/lguest/interrupts_and_traps.c @@ -0,0 +1,268 @@ +#include +#include "lg.h" + +static unsigned long idt_address(u32 lo, u32 hi) +{ + return (lo & 0x0000FFFF) | (hi & 0xFFFF0000); +} + +static int idt_type(u32 lo, u32 hi) +{ + return (hi >> 8) & 0xF; +} + +static int idt_present(u32 lo, u32 hi) +{ + return (hi & 0x8000); +} + +static void push_guest_stack(struct lguest *lg, unsigned long *gstack, u32 val) +{ + *gstack -= 4; + lgwrite_u32(lg, *gstack, val); +} + +static void set_guest_interrupt(struct lguest *lg, u32 lo, u32 hi, int has_err) +{ + unsigned long gstack; + u32 eflags, ss, irq_enable; + + /* If they want a ring change, we use new stack and push old ss/esp */ + if ((lg->regs->ss&0x3) != GUEST_PL) { + gstack = guest_pa(lg, lg->esp1); + ss = lg->ss1; + push_guest_stack(lg, &gstack, lg->regs->ss); + push_guest_stack(lg, &gstack, lg->regs->esp); + } else { + gstack = guest_pa(lg, lg->regs->esp); + ss = lg->regs->ss; + } + + /* We use IF bit in eflags to indicate whether irqs were disabled + (it's always 0, since irqs are enabled when guest is running). */ + eflags = lg->regs->eflags; + if (get_user(irq_enable, &lg->lguest_data->irq_enabled)) + irq_enable = 0; + eflags |= (irq_enable & X86_EFLAGS_IF); + + push_guest_stack(lg, &gstack, eflags); + push_guest_stack(lg, &gstack, lg->regs->cs); + push_guest_stack(lg, &gstack, lg->regs->eip); + + if (has_err) + push_guest_stack(lg, &gstack, lg->regs->errcode); + + /* Change the real stack so switcher returns to trap handler */ + lg->regs->ss = ss; + lg->regs->esp = gstack + lg->page_offset; + lg->regs->cs = (__KERNEL_CS|GUEST_PL); + lg->regs->eip = idt_address(lo, hi); + + /* Disable interrupts for an interrupt gate. */ + if (idt_type(lo, hi) == 0xE) + if (put_user(0, &lg->lguest_data->irq_enabled)) + kill_guest(lg, "Disabling interrupts"); +} + +void maybe_do_interrupt(struct lguest *lg) +{ + unsigned int irq; + DECLARE_BITMAP(blk, LGUEST_IRQS); + struct desc_struct *idt; + + if (!lg->lguest_data) + return; + + /* Mask out any interrupts they have blocked. */ + if (copy_from_user(&blk, lg->lguest_data->blocked_interrupts, + sizeof(blk))) + return; + + bitmap_andnot(blk, lg->irqs_pending, blk, LGUEST_IRQS); + + irq = find_first_bit(blk, LGUEST_IRQS); + if (irq >= LGUEST_IRQS) + return; + + if (lg->regs->eip >= lg->noirq_start && lg->regs->eip < lg->noirq_end) + return; + + /* If they're halted, we re-enable interrupts. */ + if (lg->halted) { + /* Re-enable interrupts. */ + if (put_user(X86_EFLAGS_IF, &lg->lguest_data->irq_enabled)) + kill_guest(lg, "Re-enabling interrupts"); + lg->halted = 0; + } else { + /* Maybe they have interrupts disabled? */ + u32 irq_enabled; + if (get_user(irq_enabled, &lg->lguest_data->irq_enabled)) + irq_enabled = 0; + if (!irq_enabled) + return; + } + + idt = &lg->idt[FIRST_EXTERNAL_VECTOR+irq]; + if (idt_present(idt->a, idt->b)) { + clear_bit(irq, lg->irqs_pending); + set_guest_interrupt(lg, idt->a, idt->b, 0); + } +} + +static int has_err(unsigned int trap) +{ + return (trap == 8 || (trap >= 10 && trap <= 14) || trap == 17); +} + +int deliver_trap(struct lguest *lg, unsigned int num) +{ + u32 lo = lg->idt[num].a, hi = lg->idt[num].b; + + if (!idt_present(lo, hi)) + return 0; + set_guest_interrupt(lg, lo, hi, has_err(num)); + return 1; +} + +static int direct_trap(const struct lguest *lg, + const struct desc_struct *trap, + unsigned int num) +{ + /* Hardware interrupts don't go to guest (except syscall). */ + if (num >= FIRST_EXTERNAL_VECTOR && num != SYSCALL_VECTOR) + return 0; + + /* We intercept page fault (demand shadow paging & cr2 saving) + protection fault (in/out emulation) and device not + available (TS handling), and hypercall */ + if (num == 14 || num == 13 || num == 7 || num == LGUEST_TRAP_ENTRY) + return 0; + + /* Interrupt gates (0xE) or not present (0x0) can't go direct. */ + return idt_type(trap->a, trap->b) == 0xF; +} + +void pin_stack_pages(struct lguest *lg) +{ + unsigned int i; + + for (i = 0; i < lg->stack_pages; i++) + pin_page(lg, lg->esp1 - i * PAGE_SIZE); +} + +void guest_set_stack(struct lguest *lg, u32 seg, u32 esp, unsigned int pages) +{ + /* You cannot have a stack segment with priv level 0. */ + if ((seg & 0x3) != GUEST_PL) + kill_guest(lg, "bad stack segment %i", seg); + if (pages > 2) + kill_guest(lg, "bad stack pages %u", pages); + lg->ss1 = seg; + lg->esp1 = esp; + lg->stack_pages = pages; + pin_stack_pages(lg); +} + +/* Set up trap in IDT. */ +static void set_trap(struct lguest *lg, struct desc_struct *trap, + unsigned int num, u32 lo, u32 hi) +{ + u8 type = idt_type(lo, hi); + + if (!idt_present(lo, hi)) { + trap->a = trap->b = 0; + return; + } + + if (type != 0xE && type != 0xF) + kill_guest(lg, "bad IDT type %i", type); + + trap->a = ((__KERNEL_CS|GUEST_PL)<<16) | (lo&0x0000FFFF); + trap->b = (hi&0xFFFFEF00); +} + +void load_guest_idt_entry(struct lguest *lg, unsigned int num, u32 lo, u32 hi) +{ + /* Guest never handles: NMI, doublefault, hypercall, spurious irq. */ + if (num == 2 || num == 8 || num == 15 || num == LGUEST_TRAP_ENTRY) + return; + + lg->changed |= CHANGED_IDT; + if (num < ARRAY_SIZE(lg->idt)) + set_trap(lg, &lg->idt[num], num, lo, hi); + else if (num == SYSCALL_VECTOR) + set_trap(lg, &lg->syscall_idt, num, lo, hi); +} + +static void default_idt_entry(struct desc_struct *idt, + int trap, + const unsigned long handler) +{ + u32 flags = 0x8e00; + + /* They can't "int" into any of them except hypercall. */ + if (trap == LGUEST_TRAP_ENTRY) + flags |= (GUEST_PL << 13); + + idt->a = (LGUEST_CS<<16) | (handler&0x0000FFFF); + idt->b = (handler&0xFFFF0000) | flags; +} + +void setup_default_idt_entries(struct lguest_ro_state *state, + const unsigned long *def) +{ + unsigned int i; + + for (i = 0; i < ARRAY_SIZE(state->guest_idt); i++) + default_idt_entry(&state->guest_idt[i], i, def[i]); +} + +void copy_traps(const struct lguest *lg, struct desc_struct *idt, + const unsigned long *def) +{ + unsigned int i; + + /* All hardware interrupts are same whatever the guest: only the + * traps might be different. */ + for (i = 0; i < FIRST_EXTERNAL_VECTOR; i++) { + if (direct_trap(lg, &lg->idt[i], i)) + idt[i] = lg->idt[i]; + else + default_idt_entry(&idt[i], i, def[i]); + } + i = SYSCALL_VECTOR; + if (direct_trap(lg, &lg->syscall_idt, i)) + idt[i] = lg->syscall_idt; + else + default_idt_entry(&idt[i], i, def[i]); +} + +void guest_set_clockevent(struct lguest *lg, unsigned long delta) +{ + ktime_t expires; + + if (unlikely(delta == 0)) { + /* Clock event device is shutting down. */ + hrtimer_cancel(&lg->hrt); + return; + } + + expires = ktime_add_ns(ktime_get_real(), delta); + hrtimer_start(&lg->hrt, expires, HRTIMER_MODE_ABS); +} + +static enum hrtimer_restart clockdev_fn(struct hrtimer *timer) +{ + struct lguest *lg = container_of(timer, struct lguest, hrt); + + set_bit(0, lg->irqs_pending); + if (lg->halted) + wake_up_process(lg->tsk); + return HRTIMER_NORESTART; +} + +void init_clockdev(struct lguest *lg) +{ + hrtimer_init(&lg->hrt, CLOCK_REALTIME, HRTIMER_MODE_ABS); + lg->hrt.function = clockdev_fn; +} diff --git a/drivers/lguest/io.c b/drivers/lguest/io.c new file mode 100644 index 00000000000..06bdba2337e --- /dev/null +++ b/drivers/lguest/io.c @@ -0,0 +1,399 @@ +/* Simple I/O model for guests, based on shared memory. + * Copyright (C) 2006 Rusty Russell IBM Corporation + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + */ +#include +#include +#include +#include +#include +#include +#include "lg.h" + +static struct list_head dma_hash[61]; + +void lguest_io_init(void) +{ + unsigned int i; + + for (i = 0; i < ARRAY_SIZE(dma_hash); i++) + INIT_LIST_HEAD(&dma_hash[i]); +} + +/* FIXME: allow multi-page lengths. */ +static int check_dma_list(struct lguest *lg, const struct lguest_dma *dma) +{ + unsigned int i; + + for (i = 0; i < LGUEST_MAX_DMA_SECTIONS; i++) { + if (!dma->len[i]) + return 1; + if (!lguest_address_ok(lg, dma->addr[i], dma->len[i])) + goto kill; + if (dma->len[i] > PAGE_SIZE) + goto kill; + /* We could do over a page, but is it worth it? */ + if ((dma->addr[i] % PAGE_SIZE) + dma->len[i] > PAGE_SIZE) + goto kill; + } + return 1; + +kill: + kill_guest(lg, "bad DMA entry: %u@%#lx", dma->len[i], dma->addr[i]); + return 0; +} + +static unsigned int hash(const union futex_key *key) +{ + return jhash2((u32*)&key->both.word, + (sizeof(key->both.word)+sizeof(key->both.ptr))/4, + key->both.offset) + % ARRAY_SIZE(dma_hash); +} + +static inline int key_eq(const union futex_key *a, const union futex_key *b) +{ + return (a->both.word == b->both.word + && a->both.ptr == b->both.ptr + && a->both.offset == b->both.offset); +} + +/* Must hold read lock on dmainfo owner's current->mm->mmap_sem */ +static void unlink_dma(struct lguest_dma_info *dmainfo) +{ + BUG_ON(!mutex_is_locked(&lguest_lock)); + dmainfo->interrupt = 0; + list_del(&dmainfo->list); + drop_futex_key_refs(&dmainfo->key); +} + +static int unbind_dma(struct lguest *lg, + const union futex_key *key, + unsigned long dmas) +{ + int i, ret = 0; + + for (i = 0; i < LGUEST_MAX_DMA; i++) { + if (key_eq(key, &lg->dma[i].key) && dmas == lg->dma[i].dmas) { + unlink_dma(&lg->dma[i]); + ret = 1; + break; + } + } + return ret; +} + +int bind_dma(struct lguest *lg, + unsigned long ukey, unsigned long dmas, u16 numdmas, u8 interrupt) +{ + unsigned int i; + int ret = 0; + union futex_key key; + struct rw_semaphore *fshared = ¤t->mm->mmap_sem; + + if (interrupt >= LGUEST_IRQS) + return 0; + + mutex_lock(&lguest_lock); + down_read(fshared); + if (get_futex_key((u32 __user *)ukey, fshared, &key) != 0) { + kill_guest(lg, "bad dma key %#lx", ukey); + goto unlock; + } + get_futex_key_refs(&key); + + if (interrupt == 0) + ret = unbind_dma(lg, &key, dmas); + else { + for (i = 0; i < LGUEST_MAX_DMA; i++) { + if (lg->dma[i].interrupt) + continue; + + lg->dma[i].dmas = dmas; + lg->dma[i].num_dmas = numdmas; + lg->dma[i].next_dma = 0; + lg->dma[i].key = key; + lg->dma[i].guestid = lg->guestid; + lg->dma[i].interrupt = interrupt; + list_add(&lg->dma[i].list, &dma_hash[hash(&key)]); + ret = 1; + goto unlock; + } + } + drop_futex_key_refs(&key); +unlock: + up_read(fshared); + mutex_unlock(&lguest_lock); + return ret; +} + +/* lgread from another guest */ +static int lgread_other(struct lguest *lg, + void *buf, u32 addr, unsigned bytes) +{ + if (!lguest_address_ok(lg, addr, bytes) + || access_process_vm(lg->tsk, addr, buf, bytes, 0) != bytes) { + memset(buf, 0, bytes); + kill_guest(lg, "bad address in registered DMA struct"); + return 0; + } + return 1; +} + +/* lgwrite to another guest */ +static int lgwrite_other(struct lguest *lg, u32 addr, + const void *buf, unsigned bytes) +{ + if (!lguest_address_ok(lg, addr, bytes) + || (access_process_vm(lg->tsk, addr, (void *)buf, bytes, 1) + != bytes)) { + kill_guest(lg, "bad address writing to registered DMA"); + return 0; + } + return 1; +} + +static u32 copy_data(struct lguest *srclg, + const struct lguest_dma *src, + const struct lguest_dma *dst, + struct page *pages[]) +{ + unsigned int totlen, si, di, srcoff, dstoff; + void *maddr = NULL; + + totlen = 0; + si = di = 0; + srcoff = dstoff = 0; + while (si < LGUEST_MAX_DMA_SECTIONS && src->len[si] + && di < LGUEST_MAX_DMA_SECTIONS && dst->len[di]) { + u32 len = min(src->len[si] - srcoff, dst->len[di] - dstoff); + + if (!maddr) + maddr = kmap(pages[di]); + + /* FIXME: This is not completely portable, since + archs do different things for copy_to_user_page. */ + if (copy_from_user(maddr + (dst->addr[di] + dstoff)%PAGE_SIZE, + (void *__user)src->addr[si], len) != 0) { + kill_guest(srclg, "bad address in sending DMA"); + totlen = 0; + break; + } + + totlen += len; + srcoff += len; + dstoff += len; + if (srcoff == src->len[si]) { + si++; + srcoff = 0; + } + if (dstoff == dst->len[di]) { + kunmap(pages[di]); + maddr = NULL; + di++; + dstoff = 0; + } + } + + if (maddr) + kunmap(pages[di]); + + return totlen; +} + +/* Src is us, ie. current. */ +static u32 do_dma(struct lguest *srclg, const struct lguest_dma *src, + struct lguest *dstlg, const struct lguest_dma *dst) +{ + int i; + u32 ret; + struct page *pages[LGUEST_MAX_DMA_SECTIONS]; + + if (!check_dma_list(dstlg, dst) || !check_dma_list(srclg, src)) + return 0; + + /* First get the destination pages */ + for (i = 0; i < LGUEST_MAX_DMA_SECTIONS; i++) { + if (dst->len[i] == 0) + break; + if (get_user_pages(dstlg->tsk, dstlg->mm, + dst->addr[i], 1, 1, 1, pages+i, NULL) + != 1) { + kill_guest(dstlg, "Error mapping DMA pages"); + ret = 0; + goto drop_pages; + } + } + + /* Now copy until we run out of src or dst. */ + ret = copy_data(srclg, src, dst, pages); + +drop_pages: + while (--i >= 0) + put_page(pages[i]); + return ret; +} + +static int dma_transfer(struct lguest *srclg, + unsigned long udma, + struct lguest_dma_info *dst) +{ + struct lguest_dma dst_dma, src_dma; + struct lguest *dstlg; + u32 i, dma = 0; + + dstlg = &lguests[dst->guestid]; + /* Get our dma list. */ + lgread(srclg, &src_dma, udma, sizeof(src_dma)); + + /* We can't deadlock against them dmaing to us, because this + * is all under the lguest_lock. */ + down_read(&dstlg->mm->mmap_sem); + + for (i = 0; i < dst->num_dmas; i++) { + dma = (dst->next_dma + i) % dst->num_dmas; + if (!lgread_other(dstlg, &dst_dma, + dst->dmas + dma * sizeof(struct lguest_dma), + sizeof(dst_dma))) { + goto fail; + } + if (!dst_dma.used_len) + break; + } + if (i != dst->num_dmas) { + unsigned long used_lenp; + unsigned int ret; + + ret = do_dma(srclg, &src_dma, dstlg, &dst_dma); + /* Put used length in src. */ + lgwrite_u32(srclg, + udma+offsetof(struct lguest_dma, used_len), ret); + if (ret == 0 && src_dma.len[0] != 0) + goto fail; + + /* Make sure destination sees contents before length. */ + wmb(); + used_lenp = dst->dmas + + dma * sizeof(struct lguest_dma) + + offsetof(struct lguest_dma, used_len); + lgwrite_other(dstlg, used_lenp, &ret, sizeof(ret)); + dst->next_dma++; + } + up_read(&dstlg->mm->mmap_sem); + + /* Do this last so dst doesn't simply sleep on lock. */ + set_bit(dst->interrupt, dstlg->irqs_pending); + wake_up_process(dstlg->tsk); + return i == dst->num_dmas; + +fail: + up_read(&dstlg->mm->mmap_sem); + return 0; +} + +void send_dma(struct lguest *lg, unsigned long ukey, unsigned long udma) +{ + union futex_key key; + int empty = 0; + struct rw_semaphore *fshared = ¤t->mm->mmap_sem; + +again: + mutex_lock(&lguest_lock); + down_read(fshared); + if (get_futex_key((u32 __user *)ukey, fshared, &key) != 0) { + kill_guest(lg, "bad sending DMA key"); + goto unlock; + } + /* Shared mapping? Look for other guests... */ + if (key.shared.offset & 1) { + struct lguest_dma_info *i; + list_for_each_entry(i, &dma_hash[hash(&key)], list) { + if (i->guestid == lg->guestid) + continue; + if (!key_eq(&key, &i->key)) + continue; + + empty += dma_transfer(lg, udma, i); + break; + } + if (empty == 1) { + /* Give any recipients one chance to restock. */ + up_read(¤t->mm->mmap_sem); + mutex_unlock(&lguest_lock); + empty++; + goto again; + } + } else { + /* Private mapping: tell our userspace. */ + lg->dma_is_pending = 1; + lg->pending_dma = udma; + lg->pending_key = ukey; + } +unlock: + up_read(fshared); + mutex_unlock(&lguest_lock); +} + +void release_all_dma(struct lguest *lg) +{ + unsigned int i; + + BUG_ON(!mutex_is_locked(&lguest_lock)); + + down_read(&lg->mm->mmap_sem); + for (i = 0; i < LGUEST_MAX_DMA; i++) { + if (lg->dma[i].interrupt) + unlink_dma(&lg->dma[i]); + } + up_read(&lg->mm->mmap_sem); +} + +/* Userspace wants a dma buffer from this guest. */ +unsigned long get_dma_buffer(struct lguest *lg, + unsigned long ukey, unsigned long *interrupt) +{ + unsigned long ret = 0; + union futex_key key; + struct lguest_dma_info *i; + struct rw_semaphore *fshared = ¤t->mm->mmap_sem; + + mutex_lock(&lguest_lock); + down_read(fshared); + if (get_futex_key((u32 __user *)ukey, fshared, &key) != 0) { + kill_guest(lg, "bad registered DMA buffer"); + goto unlock; + } + list_for_each_entry(i, &dma_hash[hash(&key)], list) { + if (key_eq(&key, &i->key) && i->guestid == lg->guestid) { + unsigned int j; + for (j = 0; j < i->num_dmas; j++) { + struct lguest_dma dma; + + ret = i->dmas + j * sizeof(struct lguest_dma); + lgread(lg, &dma, ret, sizeof(dma)); + if (dma.used_len == 0) + break; + } + *interrupt = i->interrupt; + break; + } + } +unlock: + up_read(fshared); + mutex_unlock(&lguest_lock); + return ret; +} + diff --git a/drivers/lguest/lg.h b/drivers/lguest/lg.h new file mode 100644 index 00000000000..3e2ddfbc816 --- /dev/null +++ b/drivers/lguest/lg.h @@ -0,0 +1,261 @@ +#ifndef _LGUEST_H +#define _LGUEST_H + +#include + +#define GDT_ENTRY_LGUEST_CS 10 +#define GDT_ENTRY_LGUEST_DS 11 +#define LGUEST_CS (GDT_ENTRY_LGUEST_CS * 8) +#define LGUEST_DS (GDT_ENTRY_LGUEST_DS * 8) + +#ifndef __ASSEMBLY__ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "irq_vectors.h" + +#define GUEST_PL 1 + +struct lguest_regs +{ + /* Manually saved part. */ + unsigned long ebx, ecx, edx; + unsigned long esi, edi, ebp; + unsigned long gs; + unsigned long eax; + unsigned long fs, ds, es; + unsigned long trapnum, errcode; + /* Trap pushed part */ + unsigned long eip; + unsigned long cs; + unsigned long eflags; + unsigned long esp; + unsigned long ss; +}; + +void free_pagetables(void); +int init_pagetables(struct page **switcher_page, unsigned int pages); + +/* Full 4G segment descriptors, suitable for CS and DS. */ +#define FULL_EXEC_SEGMENT ((struct desc_struct){0x0000ffff, 0x00cf9b00}) +#define FULL_SEGMENT ((struct desc_struct){0x0000ffff, 0x00cf9300}) + +struct lguest_dma_info +{ + struct list_head list; + union futex_key key; + unsigned long dmas; + u16 next_dma; + u16 num_dmas; + u16 guestid; + u8 interrupt; /* 0 when not registered */ +}; + +/* We have separate types for the guest's ptes & pgds and the shadow ptes & + * pgds. Since this host might use three-level pagetables and the guest and + * shadow pagetables don't, we can't use the normal pte_t/pgd_t. */ +typedef union { + struct { unsigned flags:12, pfn:20; }; + struct { unsigned long val; } raw; +} spgd_t; +typedef union { + struct { unsigned flags:12, pfn:20; }; + struct { unsigned long val; } raw; +} spte_t; +typedef union { + struct { unsigned flags:12, pfn:20; }; + struct { unsigned long val; } raw; +} gpgd_t; +typedef union { + struct { unsigned flags:12, pfn:20; }; + struct { unsigned long val; } raw; +} gpte_t; +#define mkgpte(_val) ((gpte_t){.raw.val = _val}) +#define mkgpgd(_val) ((gpgd_t){.raw.val = _val}) + +struct pgdir +{ + unsigned long cr3; + spgd_t *pgdir; +}; + +/* This is a guest-specific page (mapped ro) into the guest. */ +struct lguest_ro_state +{ + /* Host information we need to restore when we switch back. */ + u32 host_cr3; + struct Xgt_desc_struct host_idt_desc; + struct Xgt_desc_struct host_gdt_desc; + u32 host_sp; + + /* Fields which are used when guest is running. */ + struct Xgt_desc_struct guest_idt_desc; + struct Xgt_desc_struct guest_gdt_desc; + struct i386_hw_tss guest_tss; + struct desc_struct guest_idt[IDT_ENTRIES]; + struct desc_struct guest_gdt[GDT_ENTRIES]; +}; + +/* We have two pages shared with guests, per cpu. */ +struct lguest_pages +{ + /* This is the stack page mapped rw in guest */ + char spare[PAGE_SIZE - sizeof(struct lguest_regs)]; + struct lguest_regs regs; + + /* This is the host state & guest descriptor page, ro in guest */ + struct lguest_ro_state state; +} __attribute__((aligned(PAGE_SIZE))); + +#define CHANGED_IDT 1 +#define CHANGED_GDT 2 +#define CHANGED_GDT_TLS 4 /* Actually a subset of CHANGED_GDT */ +#define CHANGED_ALL 3 + +/* The private info the thread maintains about the guest. */ +struct lguest +{ + /* At end of a page shared mapped over lguest_pages in guest. */ + unsigned long regs_page; + struct lguest_regs *regs; + struct lguest_data __user *lguest_data; + struct task_struct *tsk; + struct mm_struct *mm; /* == tsk->mm, but that becomes NULL on exit */ + u16 guestid; + u32 pfn_limit; + u32 page_offset; + u32 cr2; + int halted; + int ts; + u32 next_hcall; + u32 esp1; + u8 ss1; + + /* Do we need to stop what we're doing and return to userspace? */ + int break_out; + wait_queue_head_t break_wq; + + /* Bitmap of what has changed: see CHANGED_* above. */ + int changed; + struct lguest_pages *last_pages; + + /* We keep a small number of these. */ + u32 pgdidx; + struct pgdir pgdirs[4]; + + /* Cached wakeup: we hold a reference to this task. */ + struct task_struct *wake; + + unsigned long noirq_start, noirq_end; + int dma_is_pending; + unsigned long pending_dma; /* struct lguest_dma */ + unsigned long pending_key; /* address they're sending to */ + + unsigned int stack_pages; + u32 tsc_khz; + + struct lguest_dma_info dma[LGUEST_MAX_DMA]; + + /* Dead? */ + const char *dead; + + /* The GDT entries copied into lguest_ro_state when running. */ + struct desc_struct gdt[GDT_ENTRIES]; + + /* The IDT entries: some copied into lguest_ro_state when running. */ + struct desc_struct idt[FIRST_EXTERNAL_VECTOR+LGUEST_IRQS]; + struct desc_struct syscall_idt; + + /* Virtual clock device */ + struct hrtimer hrt; + + /* Pending virtual interrupts */ + DECLARE_BITMAP(irqs_pending, LGUEST_IRQS); +}; + +extern struct lguest lguests[]; +extern struct mutex lguest_lock; + +/* core.c: */ +u32 lgread_u32(struct lguest *lg, unsigned long addr); +void lgwrite_u32(struct lguest *lg, unsigned long addr, u32 val); +void lgread(struct lguest *lg, void *buf, unsigned long addr, unsigned len); +void lgwrite(struct lguest *lg, unsigned long, const void *buf, unsigned len); +int find_free_guest(void); +int lguest_address_ok(const struct lguest *lg, + unsigned long addr, unsigned long len); +int run_guest(struct lguest *lg, unsigned long __user *user); + + +/* interrupts_and_traps.c: */ +void maybe_do_interrupt(struct lguest *lg); +int deliver_trap(struct lguest *lg, unsigned int num); +void load_guest_idt_entry(struct lguest *lg, unsigned int i, u32 low, u32 hi); +void guest_set_stack(struct lguest *lg, u32 seg, u32 esp, unsigned int pages); +void pin_stack_pages(struct lguest *lg); +void setup_default_idt_entries(struct lguest_ro_state *state, + const unsigned long *def); +void copy_traps(const struct lguest *lg, struct desc_struct *idt, + const unsigned long *def); +void guest_set_clockevent(struct lguest *lg, unsigned long delta); +void init_clockdev(struct lguest *lg); + +/* segments.c: */ +void setup_default_gdt_entries(struct lguest_ro_state *state); +void setup_guest_gdt(struct lguest *lg); +void load_guest_gdt(struct lguest *lg, unsigned long table, u32 num); +void guest_load_tls(struct lguest *lg, unsigned long tls_array); +void copy_gdt(const struct lguest *lg, struct desc_struct *gdt); +void copy_gdt_tls(const struct lguest *lg, struct desc_struct *gdt); + +/* page_tables.c: */ +int init_guest_pagetable(struct lguest *lg, unsigned long pgtable); +void free_guest_pagetable(struct lguest *lg); +void guest_new_pagetable(struct lguest *lg, unsigned long pgtable); +void guest_set_pmd(struct lguest *lg, unsigned long cr3, u32 i); +void guest_pagetable_clear_all(struct lguest *lg); +void guest_pagetable_flush_user(struct lguest *lg); +void guest_set_pte(struct lguest *lg, unsigned long cr3, + unsigned long vaddr, gpte_t val); +void map_switcher_in_guest(struct lguest *lg, struct lguest_pages *pages); +int demand_page(struct lguest *info, unsigned long cr2, int errcode); +void pin_page(struct lguest *lg, unsigned long vaddr); + +/* lguest_user.c: */ +int lguest_device_init(void); +void lguest_device_remove(void); + +/* io.c: */ +void lguest_io_init(void); +int bind_dma(struct lguest *lg, + unsigned long key, unsigned long udma, u16 numdmas, u8 interrupt); +void send_dma(struct lguest *info, unsigned long key, unsigned long udma); +void release_all_dma(struct lguest *lg); +unsigned long get_dma_buffer(struct lguest *lg, unsigned long key, + unsigned long *interrupt); + +/* hypercalls.c: */ +void do_hypercalls(struct lguest *lg); + +#define kill_guest(lg, fmt...) \ +do { \ + if (!(lg)->dead) { \ + (lg)->dead = kasprintf(GFP_ATOMIC, fmt); \ + if (!(lg)->dead) \ + (lg)->dead = ERR_PTR(-ENOMEM); \ + } \ +} while(0) + +static inline unsigned long guest_pa(struct lguest *lg, unsigned long vaddr) +{ + return vaddr - lg->page_offset; +} +#endif /* __ASSEMBLY__ */ +#endif /* _LGUEST_H */ diff --git a/drivers/lguest/lguest.c b/drivers/lguest/lguest.c index b3a72bd8d6f..b9a58b78c99 100644 --- a/drivers/lguest/lguest.c +++ b/drivers/lguest/lguest.c @@ -25,6 +25,8 @@ #include #include #include +#include +#include #include #include #include @@ -37,6 +39,7 @@ #include #include #include +//#include /* Declarations for definitions in lguest_guest.S */ extern char lguest_noirq_start[], lguest_noirq_end[]; @@ -54,7 +57,6 @@ struct lguest_data lguest_data = { .blocked_interrupts = { 1 }, /* Block timer interrupts */ }; struct lguest_device_desc *lguest_devices; -static __initdata const struct lguest_boot_info *boot = __va(0); static enum paravirt_lazy_mode lazy_mode; static void lguest_lazy_mode(enum paravirt_lazy_mode mode) @@ -210,7 +212,7 @@ static void lguest_cpuid(unsigned int *eax, unsigned int *ebx, case 1: /* Basic feature request. */ /* We only allow kernel to see SSE3, CMPXCHG16B and SSSE3 */ *ecx &= 0x00002201; - /* Similarly: SSE, SSE2, FXSR, MMX, CMOV, CMPXCHG8B, FPU. */ + /* SSE, SSE2, FXSR, MMX, CMOV, CMPXCHG8B, FPU. */ *edx &= 0x07808101; /* Host wants to know when we flush kernel pages: set PGE. */ *edx |= 0x00002000; @@ -346,24 +348,104 @@ static unsigned long lguest_get_wallclock(void) return hcall(LHCALL_GET_WALLCLOCK, 0, 0, 0); } +static cycle_t lguest_clock_read(void) +{ + if (lguest_data.tsc_khz) + return native_read_tsc(); + else + return jiffies; +} + +/* This is what we tell the kernel is our clocksource. */ +static struct clocksource lguest_clock = { + .name = "lguest", + .rating = 400, + .read = lguest_clock_read, +}; + +/* We also need a "struct clock_event_device": Linux asks us to set it to go + * off some time in the future. Actually, James Morris figured all this out, I + * just applied the patch. */ +static int lguest_clockevent_set_next_event(unsigned long delta, + struct clock_event_device *evt) +{ + if (delta < LG_CLOCK_MIN_DELTA) { + if (printk_ratelimit()) + printk(KERN_DEBUG "%s: small delta %lu ns\n", + __FUNCTION__, delta); + return -ETIME; + } + hcall(LHCALL_SET_CLOCKEVENT, delta, 0, 0); + return 0; +} + +static void lguest_clockevent_set_mode(enum clock_event_mode mode, + struct clock_event_device *evt) +{ + switch (mode) { + case CLOCK_EVT_MODE_UNUSED: + case CLOCK_EVT_MODE_SHUTDOWN: + /* A 0 argument shuts the clock down. */ + hcall(LHCALL_SET_CLOCKEVENT, 0, 0, 0); + break; + case CLOCK_EVT_MODE_ONESHOT: + /* This is what we expect. */ + break; + case CLOCK_EVT_MODE_PERIODIC: + BUG(); + } +} + +/* This describes our primitive timer chip. */ +static struct clock_event_device lguest_clockevent = { + .name = "lguest", + .features = CLOCK_EVT_FEAT_ONESHOT, + .set_next_event = lguest_clockevent_set_next_event, + .set_mode = lguest_clockevent_set_mode, + .rating = INT_MAX, + .mult = 1, + .shift = 0, + .min_delta_ns = LG_CLOCK_MIN_DELTA, + .max_delta_ns = LG_CLOCK_MAX_DELTA, +}; + +/* This is the Guest timer interrupt handler (hardware interrupt 0). We just + * call the clockevent infrastructure and it does whatever needs doing. */ static void lguest_time_irq(unsigned int irq, struct irq_desc *desc) { - do_timer(hcall(LHCALL_TIMER_READ, 0, 0, 0)); - update_process_times(user_mode_vm(get_irq_regs())); + unsigned long flags; + + /* Don't interrupt us while this is running. */ + local_irq_save(flags); + lguest_clockevent.event_handler(&lguest_clockevent); + local_irq_restore(flags); } -static u64 sched_clock_base; static void lguest_time_init(void) { set_irq_handler(0, lguest_time_irq); - hcall(LHCALL_TIMER_READ, 0, 0, 0); - sched_clock_base = jiffies_64; - enable_lguest_irq(0); -} -static unsigned long long lguest_sched_clock(void) -{ - return (jiffies_64 - sched_clock_base) * (1000000000 / HZ); + /* We use the TSC if the Host tells us we can, otherwise a dumb + * jiffies-based clock. */ + if (lguest_data.tsc_khz) { + lguest_clock.shift = 22; + lguest_clock.mult = clocksource_khz2mult(lguest_data.tsc_khz, + lguest_clock.shift); + lguest_clock.mask = CLOCKSOURCE_MASK(64); + lguest_clock.flags = CLOCK_SOURCE_IS_CONTINUOUS; + } else { + /* To understand this, start at kernel/time/jiffies.c... */ + lguest_clock.shift = 8; + lguest_clock.mult = (((u64)NSEC_PER_SEC<<8)/ACTHZ) << 8; + lguest_clock.mask = CLOCKSOURCE_MASK(32); + } + clocksource_register(&lguest_clock); + + /* We can't set cpumask in the initializer: damn C limitations! */ + lguest_clockevent.cpumask = cpumask_of_cpu(0); + clockevents_register_device(&lguest_clockevent); + + enable_lguest_irq(0); } static void lguest_load_esp0(struct tss_struct *tss, @@ -418,8 +500,7 @@ static __init char *lguest_memory_setup(void) /* We do this here because lockcheck barfs if before start_kernel */ atomic_notifier_chain_register(&panic_notifier_list, &paniced); - e820.nr_map = 0; - add_memory_region(0, PFN_PHYS(boot->max_pfn), E820_RAM); + add_memory_region(E820_MAP->addr, E820_MAP->size, E820_MAP->type); return "LGUEST"; } @@ -450,8 +531,13 @@ static unsigned lguest_patch(u8 type, u16 clobber, void *insns, unsigned len) return insn_len; } -__init void lguest_init(void) +__init void lguest_init(void *boot) { + /* Copy boot parameters first. */ + memcpy(&boot_params, boot, PARAM_SIZE); + memcpy(boot_command_line, __va(boot_params.hdr.cmd_line_ptr), + COMMAND_LINE_SIZE); + paravirt_ops.name = "lguest"; paravirt_ops.paravirt_enabled = 1; paravirt_ops.kernel_rpl = 1; @@ -498,10 +584,8 @@ __init void lguest_init(void) paravirt_ops.time_init = lguest_time_init; paravirt_ops.set_lazy_mode = lguest_lazy_mode; paravirt_ops.wbinvd = lguest_wbinvd; - paravirt_ops.sched_clock = lguest_sched_clock; hcall(LHCALL_LGUEST_INIT, __pa(&lguest_data), 0, 0); - strncpy(boot_command_line, boot->cmdline, COMMAND_LINE_SIZE); /* We use top of mem for initial pagetables. */ init_pg_tables_end = __pa(pg0); @@ -532,13 +616,6 @@ __init void lguest_init(void) add_preferred_console("hvc", 0, NULL); - if (boot->initrd_size) { - /* We stash this at top of memory. */ - INITRD_START = boot->max_pfn*PAGE_SIZE - boot->initrd_size; - INITRD_SIZE = boot->initrd_size; - LOADER_TYPE = 0xFF; - } - pm_power_off = lguest_power_off; start_kernel(); } diff --git a/drivers/lguest/lguest_asm.S b/drivers/lguest/lguest_asm.S index 5ac3d20bb18..00046c57b5b 100644 --- a/drivers/lguest/lguest_asm.S +++ b/drivers/lguest/lguest_asm.S @@ -10,7 +10,8 @@ * This is where we begin: we have a magic signature which the launcher looks * for. The plan is that the Linux boot protocol will be extended with a * "platform type" field which will guide us here from the normal entry point, - * but for the moment this suffices. + * but for the moment this suffices. We pass the virtual address of the boot + * info to lguest_init(). * * We put it in .init.text will be discarded after boot. */ @@ -18,6 +19,8 @@ .ascii "GenuineLguest" /* Set up initial stack. */ movl $(init_thread_union+THREAD_SIZE),%esp + movl %esi, %eax + addl $__PAGE_OFFSET, %eax jmp lguest_init /* The templates for inline patching. */ diff --git a/drivers/lguest/lguest_user.c b/drivers/lguest/lguest_user.c new file mode 100644 index 00000000000..e90d7a783da --- /dev/null +++ b/drivers/lguest/lguest_user.c @@ -0,0 +1,236 @@ +/* Userspace control of the guest, via /dev/lguest. */ +#include +#include +#include +#include "lg.h" + +static void setup_regs(struct lguest_regs *regs, unsigned long start) +{ + /* Write out stack in format lguest expects, so we can switch to it. */ + regs->ds = regs->es = regs->ss = __KERNEL_DS|GUEST_PL; + regs->cs = __KERNEL_CS|GUEST_PL; + regs->eflags = 0x202; /* Interrupts enabled. */ + regs->eip = start; + /* esi points to our boot information (physical address 0) */ +} + +/* + addr */ +static long user_get_dma(struct lguest *lg, const u32 __user *input) +{ + unsigned long key, udma, irq; + + if (get_user(key, input) != 0) + return -EFAULT; + udma = get_dma_buffer(lg, key, &irq); + if (!udma) + return -ENOENT; + + /* We put irq number in udma->used_len. */ + lgwrite_u32(lg, udma + offsetof(struct lguest_dma, used_len), irq); + return udma; +} + +/* To force the Guest to stop running and return to the Launcher, the + * Waker sets writes LHREQ_BREAK and the value "1" to /dev/lguest. The + * Launcher then writes LHREQ_BREAK and "0" to release the Waker. */ +static int break_guest_out(struct lguest *lg, const u32 __user *input) +{ + unsigned long on; + + /* Fetch whether they're turning break on or off.. */ + if (get_user(on, input) != 0) + return -EFAULT; + + if (on) { + lg->break_out = 1; + /* Pop it out (may be running on different CPU) */ + wake_up_process(lg->tsk); + /* Wait for them to reset it */ + return wait_event_interruptible(lg->break_wq, !lg->break_out); + } else { + lg->break_out = 0; + wake_up(&lg->break_wq); + return 0; + } +} + +/* + irq */ +static int user_send_irq(struct lguest *lg, const u32 __user *input) +{ + u32 irq; + + if (get_user(irq, input) != 0) + return -EFAULT; + if (irq >= LGUEST_IRQS) + return -EINVAL; + set_bit(irq, lg->irqs_pending); + return 0; +} + +static ssize_t read(struct file *file, char __user *user, size_t size,loff_t*o) +{ + struct lguest *lg = file->private_data; + + if (!lg) + return -EINVAL; + + /* If you're not the task which owns the guest, go away. */ + if (current != lg->tsk) + return -EPERM; + + if (lg->dead) { + size_t len; + + if (IS_ERR(lg->dead)) + return PTR_ERR(lg->dead); + + len = min(size, strlen(lg->dead)+1); + if (copy_to_user(user, lg->dead, len) != 0) + return -EFAULT; + return len; + } + + if (lg->dma_is_pending) + lg->dma_is_pending = 0; + + return run_guest(lg, (unsigned long __user *)user); +} + +/* Take: pfnlimit, pgdir, start, pageoffset. */ +static int initialize(struct file *file, const u32 __user *input) +{ + struct lguest *lg; + int err, i; + u32 args[4]; + + /* We grab the Big Lguest lock, which protects the global array + * "lguests" and multiple simultaneous initializations. */ + mutex_lock(&lguest_lock); + + if (file->private_data) { + err = -EBUSY; + goto unlock; + } + + if (copy_from_user(args, input, sizeof(args)) != 0) { + err = -EFAULT; + goto unlock; + } + + i = find_free_guest(); + if (i < 0) { + err = -ENOSPC; + goto unlock; + } + lg = &lguests[i]; + lg->guestid = i; + lg->pfn_limit = args[0]; + lg->page_offset = args[3]; + lg->regs_page = get_zeroed_page(GFP_KERNEL); + if (!lg->regs_page) { + err = -ENOMEM; + goto release_guest; + } + lg->regs = (void *)lg->regs_page + PAGE_SIZE - sizeof(*lg->regs); + + err = init_guest_pagetable(lg, args[1]); + if (err) + goto free_regs; + + setup_regs(lg->regs, args[2]); + setup_guest_gdt(lg); + init_clockdev(lg); + lg->tsk = current; + lg->mm = get_task_mm(lg->tsk); + init_waitqueue_head(&lg->break_wq); + lg->last_pages = NULL; + file->private_data = lg; + + mutex_unlock(&lguest_lock); + + return sizeof(args); + +free_regs: + free_page(lg->regs_page); +release_guest: + memset(lg, 0, sizeof(*lg)); +unlock: + mutex_unlock(&lguest_lock); + return err; +} + +static ssize_t write(struct file *file, const char __user *input, + size_t size, loff_t *off) +{ + struct lguest *lg = file->private_data; + u32 req; + + if (get_user(req, input) != 0) + return -EFAULT; + input += sizeof(req); + + if (req != LHREQ_INITIALIZE && !lg) + return -EINVAL; + if (lg && lg->dead) + return -ENOENT; + + /* If you're not the task which owns the Guest, you can only break */ + if (lg && current != lg->tsk && req != LHREQ_BREAK) + return -EPERM; + + switch (req) { + case LHREQ_INITIALIZE: + return initialize(file, (const u32 __user *)input); + case LHREQ_GETDMA: + return user_get_dma(lg, (const u32 __user *)input); + case LHREQ_IRQ: + return user_send_irq(lg, (const u32 __user *)input); + case LHREQ_BREAK: + return break_guest_out(lg, (const u32 __user *)input); + default: + return -EINVAL; + } +} + +static int close(struct inode *inode, struct file *file) +{ + struct lguest *lg = file->private_data; + + if (!lg) + return 0; + + mutex_lock(&lguest_lock); + /* Cancels the hrtimer set via LHCALL_SET_CLOCKEVENT. */ + hrtimer_cancel(&lg->hrt); + release_all_dma(lg); + free_guest_pagetable(lg); + mmput(lg->mm); + if (!IS_ERR(lg->dead)) + kfree(lg->dead); + free_page(lg->regs_page); + memset(lg, 0, sizeof(*lg)); + mutex_unlock(&lguest_lock); + return 0; +} + +static struct file_operations lguest_fops = { + .owner = THIS_MODULE, + .release = close, + .write = write, + .read = read, +}; +static struct miscdevice lguest_dev = { + .minor = MISC_DYNAMIC_MINOR, + .name = "lguest", + .fops = &lguest_fops, +}; + +int __init lguest_device_init(void) +{ + return misc_register(&lguest_dev); +} + +void __exit lguest_device_remove(void) +{ + misc_deregister(&lguest_dev); +} diff --git a/drivers/lguest/page_tables.c b/drivers/lguest/page_tables.c new file mode 100644 index 00000000000..1b0ba09b126 --- /dev/null +++ b/drivers/lguest/page_tables.c @@ -0,0 +1,411 @@ +/* Shadow page table operations. + * Copyright (C) Rusty Russell IBM Corporation 2006. + * GPL v2 and any later version */ +#include +#include +#include +#include +#include +#include +#include "lg.h" + +#define PTES_PER_PAGE_SHIFT 10 +#define PTES_PER_PAGE (1 << PTES_PER_PAGE_SHIFT) +#define SWITCHER_PGD_INDEX (PTES_PER_PAGE - 1) + +static DEFINE_PER_CPU(spte_t *, switcher_pte_pages); +#define switcher_pte_page(cpu) per_cpu(switcher_pte_pages, cpu) + +static unsigned vaddr_to_pgd_index(unsigned long vaddr) +{ + return vaddr >> (PAGE_SHIFT + PTES_PER_PAGE_SHIFT); +} + +/* These access the shadow versions (ie. the ones used by the CPU). */ +static spgd_t *spgd_addr(struct lguest *lg, u32 i, unsigned long vaddr) +{ + unsigned int index = vaddr_to_pgd_index(vaddr); + + if (index >= SWITCHER_PGD_INDEX) { + kill_guest(lg, "attempt to access switcher pages"); + index = 0; + } + return &lg->pgdirs[i].pgdir[index]; +} + +static spte_t *spte_addr(struct lguest *lg, spgd_t spgd, unsigned long vaddr) +{ + spte_t *page = __va(spgd.pfn << PAGE_SHIFT); + BUG_ON(!(spgd.flags & _PAGE_PRESENT)); + return &page[(vaddr >> PAGE_SHIFT) % PTES_PER_PAGE]; +} + +/* These access the guest versions. */ +static unsigned long gpgd_addr(struct lguest *lg, unsigned long vaddr) +{ + unsigned int index = vaddr >> (PAGE_SHIFT + PTES_PER_PAGE_SHIFT); + return lg->pgdirs[lg->pgdidx].cr3 + index * sizeof(gpgd_t); +} + +static unsigned long gpte_addr(struct lguest *lg, + gpgd_t gpgd, unsigned long vaddr) +{ + unsigned long gpage = gpgd.pfn << PAGE_SHIFT; + BUG_ON(!(gpgd.flags & _PAGE_PRESENT)); + return gpage + ((vaddr>>PAGE_SHIFT) % PTES_PER_PAGE) * sizeof(gpte_t); +} + +/* Do a virtual -> physical mapping on a user page. */ +static unsigned long get_pfn(unsigned long virtpfn, int write) +{ + struct page *page; + unsigned long ret = -1UL; + + down_read(¤t->mm->mmap_sem); + if (get_user_pages(current, current->mm, virtpfn << PAGE_SHIFT, + 1, write, 1, &page, NULL) == 1) + ret = page_to_pfn(page); + up_read(¤t->mm->mmap_sem); + return ret; +} + +static spte_t gpte_to_spte(struct lguest *lg, gpte_t gpte, int write) +{ + spte_t spte; + unsigned long pfn; + + /* We ignore the global flag. */ + spte.flags = (gpte.flags & ~_PAGE_GLOBAL); + pfn = get_pfn(gpte.pfn, write); + if (pfn == -1UL) { + kill_guest(lg, "failed to get page %u", gpte.pfn); + /* Must not put_page() bogus page on cleanup. */ + spte.flags = 0; + } + spte.pfn = pfn; + return spte; +} + +static void release_pte(spte_t pte) +{ + if (pte.flags & _PAGE_PRESENT) + put_page(pfn_to_page(pte.pfn)); +} + +static void check_gpte(struct lguest *lg, gpte_t gpte) +{ + if ((gpte.flags & (_PAGE_PWT|_PAGE_PSE)) || gpte.pfn >= lg->pfn_limit) + kill_guest(lg, "bad page table entry"); +} + +static void check_gpgd(struct lguest *lg, gpgd_t gpgd) +{ + if ((gpgd.flags & ~_PAGE_TABLE) || gpgd.pfn >= lg->pfn_limit) + kill_guest(lg, "bad page directory entry"); +} + +/* FIXME: We hold reference to pages, which prevents them from being + swapped. It'd be nice to have a callback when Linux wants to swap out. */ + +/* We fault pages in, which allows us to update accessed/dirty bits. + * Return true if we got page. */ +int demand_page(struct lguest *lg, unsigned long vaddr, int errcode) +{ + gpgd_t gpgd; + spgd_t *spgd; + unsigned long gpte_ptr; + gpte_t gpte; + spte_t *spte; + + gpgd = mkgpgd(lgread_u32(lg, gpgd_addr(lg, vaddr))); + if (!(gpgd.flags & _PAGE_PRESENT)) + return 0; + + spgd = spgd_addr(lg, lg->pgdidx, vaddr); + if (!(spgd->flags & _PAGE_PRESENT)) { + /* Get a page of PTEs for them. */ + unsigned long ptepage = get_zeroed_page(GFP_KERNEL); + /* FIXME: Steal from self in this case? */ + if (!ptepage) { + kill_guest(lg, "out of memory allocating pte page"); + return 0; + } + check_gpgd(lg, gpgd); + spgd->raw.val = (__pa(ptepage) | gpgd.flags); + } + + gpte_ptr = gpte_addr(lg, gpgd, vaddr); + gpte = mkgpte(lgread_u32(lg, gpte_ptr)); + + /* No page? */ + if (!(gpte.flags & _PAGE_PRESENT)) + return 0; + + /* Write to read-only page? */ + if ((errcode & 2) && !(gpte.flags & _PAGE_RW)) + return 0; + + /* User access to a non-user page? */ + if ((errcode & 4) && !(gpte.flags & _PAGE_USER)) + return 0; + + check_gpte(lg, gpte); + gpte.flags |= _PAGE_ACCESSED; + if (errcode & 2) + gpte.flags |= _PAGE_DIRTY; + + /* We're done with the old pte. */ + spte = spte_addr(lg, *spgd, vaddr); + release_pte(*spte); + + /* We don't make it writable if this isn't a write: later + * write will fault so we can set dirty bit in guest. */ + if (gpte.flags & _PAGE_DIRTY) + *spte = gpte_to_spte(lg, gpte, 1); + else { + gpte_t ro_gpte = gpte; + ro_gpte.flags &= ~_PAGE_RW; + *spte = gpte_to_spte(lg, ro_gpte, 0); + } + + /* Now we update dirty/accessed on guest. */ + lgwrite_u32(lg, gpte_ptr, gpte.raw.val); + return 1; +} + +/* This is much faster than the full demand_page logic. */ +static int page_writable(struct lguest *lg, unsigned long vaddr) +{ + spgd_t *spgd; + unsigned long flags; + + spgd = spgd_addr(lg, lg->pgdidx, vaddr); + if (!(spgd->flags & _PAGE_PRESENT)) + return 0; + + flags = spte_addr(lg, *spgd, vaddr)->flags; + return (flags & (_PAGE_PRESENT|_PAGE_RW)) == (_PAGE_PRESENT|_PAGE_RW); +} + +void pin_page(struct lguest *lg, unsigned long vaddr) +{ + if (!page_writable(lg, vaddr) && !demand_page(lg, vaddr, 2)) + kill_guest(lg, "bad stack page %#lx", vaddr); +} + +static void release_pgd(struct lguest *lg, spgd_t *spgd) +{ + if (spgd->flags & _PAGE_PRESENT) { + unsigned int i; + spte_t *ptepage = __va(spgd->pfn << PAGE_SHIFT); + for (i = 0; i < PTES_PER_PAGE; i++) + release_pte(ptepage[i]); + free_page((long)ptepage); + spgd->raw.val = 0; + } +} + +static void flush_user_mappings(struct lguest *lg, int idx) +{ + unsigned int i; + for (i = 0; i < vaddr_to_pgd_index(lg->page_offset); i++) + release_pgd(lg, lg->pgdirs[idx].pgdir + i); +} + +void guest_pagetable_flush_user(struct lguest *lg) +{ + flush_user_mappings(lg, lg->pgdidx); +} + +static unsigned int find_pgdir(struct lguest *lg, unsigned long pgtable) +{ + unsigned int i; + for (i = 0; i < ARRAY_SIZE(lg->pgdirs); i++) + if (lg->pgdirs[i].cr3 == pgtable) + break; + return i; +} + +static unsigned int new_pgdir(struct lguest *lg, + unsigned long cr3, + int *blank_pgdir) +{ + unsigned int next; + + next = random32() % ARRAY_SIZE(lg->pgdirs); + if (!lg->pgdirs[next].pgdir) { + lg->pgdirs[next].pgdir = (spgd_t *)get_zeroed_page(GFP_KERNEL); + if (!lg->pgdirs[next].pgdir) + next = lg->pgdidx; + else + /* There are no mappings: you'll need to re-pin */ + *blank_pgdir = 1; + } + lg->pgdirs[next].cr3 = cr3; + /* Release all the non-kernel mappings. */ + flush_user_mappings(lg, next); + + return next; +} + +void guest_new_pagetable(struct lguest *lg, unsigned long pgtable) +{ + int newpgdir, repin = 0; + + newpgdir = find_pgdir(lg, pgtable); + if (newpgdir == ARRAY_SIZE(lg->pgdirs)) + newpgdir = new_pgdir(lg, pgtable, &repin); + lg->pgdidx = newpgdir; + if (repin) + pin_stack_pages(lg); +} + +static void release_all_pagetables(struct lguest *lg) +{ + unsigned int i, j; + + for (i = 0; i < ARRAY_SIZE(lg->pgdirs); i++) + if (lg->pgdirs[i].pgdir) + for (j = 0; j < SWITCHER_PGD_INDEX; j++) + release_pgd(lg, lg->pgdirs[i].pgdir + j); +} + +void guest_pagetable_clear_all(struct lguest *lg) +{ + release_all_pagetables(lg); + pin_stack_pages(lg); +} + +static void do_set_pte(struct lguest *lg, int idx, + unsigned long vaddr, gpte_t gpte) +{ + spgd_t *spgd = spgd_addr(lg, idx, vaddr); + if (spgd->flags & _PAGE_PRESENT) { + spte_t *spte = spte_addr(lg, *spgd, vaddr); + release_pte(*spte); + if (gpte.flags & (_PAGE_DIRTY | _PAGE_ACCESSED)) { + check_gpte(lg, gpte); + *spte = gpte_to_spte(lg, gpte, gpte.flags&_PAGE_DIRTY); + } else + spte->raw.val = 0; + } +} + +void guest_set_pte(struct lguest *lg, + unsigned long cr3, unsigned long vaddr, gpte_t gpte) +{ + /* Kernel mappings must be changed on all top levels. */ + if (vaddr >= lg->page_offset) { + unsigned int i; + for (i = 0; i < ARRAY_SIZE(lg->pgdirs); i++) + if (lg->pgdirs[i].pgdir) + do_set_pte(lg, i, vaddr, gpte); + } else { + int pgdir = find_pgdir(lg, cr3); + if (pgdir != ARRAY_SIZE(lg->pgdirs)) + do_set_pte(lg, pgdir, vaddr, gpte); + } +} + +void guest_set_pmd(struct lguest *lg, unsigned long cr3, u32 idx) +{ + int pgdir; + + if (idx >= SWITCHER_PGD_INDEX) + return; + + pgdir = find_pgdir(lg, cr3); + if (pgdir < ARRAY_SIZE(lg->pgdirs)) + release_pgd(lg, lg->pgdirs[pgdir].pgdir + idx); +} + +int init_guest_pagetable(struct lguest *lg, unsigned long pgtable) +{ + /* We assume this in flush_user_mappings, so check now */ + if (vaddr_to_pgd_index(lg->page_offset) >= SWITCHER_PGD_INDEX) + return -EINVAL; + lg->pgdidx = 0; + lg->pgdirs[lg->pgdidx].cr3 = pgtable; + lg->pgdirs[lg->pgdidx].pgdir = (spgd_t*)get_zeroed_page(GFP_KERNEL); + if (!lg->pgdirs[lg->pgdidx].pgdir) + return -ENOMEM; + return 0; +} + +void free_guest_pagetable(struct lguest *lg) +{ + unsigned int i; + + release_all_pagetables(lg); + for (i = 0; i < ARRAY_SIZE(lg->pgdirs); i++) + free_page((long)lg->pgdirs[i].pgdir); +} + +/* Caller must be preempt-safe */ +void map_switcher_in_guest(struct lguest *lg, struct lguest_pages *pages) +{ + spte_t *switcher_pte_page = __get_cpu_var(switcher_pte_pages); + spgd_t switcher_pgd; + spte_t regs_pte; + + /* Since switcher less that 4MB, we simply mug top pte page. */ + switcher_pgd.pfn = __pa(switcher_pte_page) >> PAGE_SHIFT; + switcher_pgd.flags = _PAGE_KERNEL; + lg->pgdirs[lg->pgdidx].pgdir[SWITCHER_PGD_INDEX] = switcher_pgd; + + /* Map our regs page over stack page. */ + regs_pte.pfn = __pa(lg->regs_page) >> PAGE_SHIFT; + regs_pte.flags = _PAGE_KERNEL; + switcher_pte_page[(unsigned long)pages/PAGE_SIZE%PTES_PER_PAGE] + = regs_pte; +} + +static void free_switcher_pte_pages(void) +{ + unsigned int i; + + for_each_possible_cpu(i) + free_page((long)switcher_pte_page(i)); +} + +static __init void populate_switcher_pte_page(unsigned int cpu, + struct page *switcher_page[], + unsigned int pages) +{ + unsigned int i; + spte_t *pte = switcher_pte_page(cpu); + + for (i = 0; i < pages; i++) { + pte[i].pfn = page_to_pfn(switcher_page[i]); + pte[i].flags = _PAGE_PRESENT|_PAGE_ACCESSED; + } + + /* We only map this CPU's pages, so guest can't see others. */ + i = pages + cpu*2; + + /* First page (regs) is rw, second (state) is ro. */ + pte[i].pfn = page_to_pfn(switcher_page[i]); + pte[i].flags = _PAGE_PRESENT|_PAGE_ACCESSED|_PAGE_RW; + pte[i+1].pfn = page_to_pfn(switcher_page[i+1]); + pte[i+1].flags = _PAGE_PRESENT|_PAGE_ACCESSED; +} + +__init int init_pagetables(struct page **switcher_page, unsigned int pages) +{ + unsigned int i; + + for_each_possible_cpu(i) { + switcher_pte_page(i) = (spte_t *)get_zeroed_page(GFP_KERNEL); + if (!switcher_pte_page(i)) { + free_switcher_pte_pages(); + return -ENOMEM; + } + populate_switcher_pte_page(i, switcher_page, pages); + } + return 0; +} + +void free_pagetables(void) +{ + free_switcher_pte_pages(); +} diff --git a/drivers/lguest/segments.c b/drivers/lguest/segments.c new file mode 100644 index 00000000000..1b2cfe89dcd --- /dev/null +++ b/drivers/lguest/segments.c @@ -0,0 +1,125 @@ +#include "lg.h" + +static int desc_ok(const struct desc_struct *gdt) +{ + /* MBZ=0, P=1, DT=1 */ + return ((gdt->b & 0x00209000) == 0x00009000); +} + +static int segment_present(const struct desc_struct *gdt) +{ + return gdt->b & 0x8000; +} + +static int ignored_gdt(unsigned int num) +{ + return (num == GDT_ENTRY_TSS + || num == GDT_ENTRY_LGUEST_CS + || num == GDT_ENTRY_LGUEST_DS + || num == GDT_ENTRY_DOUBLEFAULT_TSS); +} + +/* We don't allow removal of CS, DS or SS; it doesn't make sense. */ +static void check_segment_use(struct lguest *lg, unsigned int desc) +{ + if (lg->regs->gs / 8 == desc) + lg->regs->gs = 0; + if (lg->regs->fs / 8 == desc) + lg->regs->fs = 0; + if (lg->regs->es / 8 == desc) + lg->regs->es = 0; + if (lg->regs->ds / 8 == desc + || lg->regs->cs / 8 == desc + || lg->regs->ss / 8 == desc) + kill_guest(lg, "Removed live GDT entry %u", desc); +} + +static void fixup_gdt_table(struct lguest *lg, unsigned start, unsigned end) +{ + unsigned int i; + + for (i = start; i < end; i++) { + /* We never copy these ones to real gdt */ + if (ignored_gdt(i)) + continue; + + /* We could fault in switch_to_guest if they are using + * a removed segment. */ + if (!segment_present(&lg->gdt[i])) { + check_segment_use(lg, i); + continue; + } + + if (!desc_ok(&lg->gdt[i])) + kill_guest(lg, "Bad GDT descriptor %i", i); + + /* DPL 0 presumably means "for use by guest". */ + if ((lg->gdt[i].b & 0x00006000) == 0) + lg->gdt[i].b |= (GUEST_PL << 13); + + /* Set accessed bit, since gdt isn't writable. */ + lg->gdt[i].b |= 0x00000100; + } +} + +void setup_default_gdt_entries(struct lguest_ro_state *state) +{ + struct desc_struct *gdt = state->guest_gdt; + unsigned long tss = (unsigned long)&state->guest_tss; + + /* Hypervisor segments. */ + gdt[GDT_ENTRY_LGUEST_CS] = FULL_EXEC_SEGMENT; + gdt[GDT_ENTRY_LGUEST_DS] = FULL_SEGMENT; + + /* This is the one which we *cannot* copy from guest, since tss + is depended on this lguest_ro_state, ie. this cpu. */ + gdt[GDT_ENTRY_TSS].a = 0x00000067 | (tss << 16); + gdt[GDT_ENTRY_TSS].b = 0x00008900 | (tss & 0xFF000000) + | ((tss >> 16) & 0x000000FF); +} + +void setup_guest_gdt(struct lguest *lg) +{ + lg->gdt[GDT_ENTRY_KERNEL_CS] = FULL_EXEC_SEGMENT; + lg->gdt[GDT_ENTRY_KERNEL_DS] = FULL_SEGMENT; + lg->gdt[GDT_ENTRY_KERNEL_CS].b |= (GUEST_PL << 13); + lg->gdt[GDT_ENTRY_KERNEL_DS].b |= (GUEST_PL << 13); +} + +/* This is a fast version for the common case where only the three TLS entries + * have changed. */ +void copy_gdt_tls(const struct lguest *lg, struct desc_struct *gdt) +{ + unsigned int i; + + for (i = GDT_ENTRY_TLS_MIN; i <= GDT_ENTRY_TLS_MAX; i++) + gdt[i] = lg->gdt[i]; +} + +void copy_gdt(const struct lguest *lg, struct desc_struct *gdt) +{ + unsigned int i; + + for (i = 0; i < GDT_ENTRIES; i++) + if (!ignored_gdt(i)) + gdt[i] = lg->gdt[i]; +} + +void load_guest_gdt(struct lguest *lg, unsigned long table, u32 num) +{ + if (num > ARRAY_SIZE(lg->gdt)) + kill_guest(lg, "too many gdt entries %i", num); + + lgread(lg, lg->gdt, table, num * sizeof(lg->gdt[0])); + fixup_gdt_table(lg, 0, ARRAY_SIZE(lg->gdt)); + lg->changed |= CHANGED_GDT; +} + +void guest_load_tls(struct lguest *lg, unsigned long gtls) +{ + struct desc_struct *tls = &lg->gdt[GDT_ENTRY_TLS_MIN]; + + lgread(lg, tls, gtls, sizeof(*tls)*GDT_ENTRY_TLS_ENTRIES); + fixup_gdt_table(lg, GDT_ENTRY_TLS_MIN, GDT_ENTRY_TLS_MAX+1); + lg->changed |= CHANGED_GDT_TLS; +} diff --git a/drivers/lguest/switcher.S b/drivers/lguest/switcher.S new file mode 100644 index 00000000000..eadd4cc299d --- /dev/null +++ b/drivers/lguest/switcher.S @@ -0,0 +1,159 @@ +/* This code sits at 0xFFC00000 to do the low-level guest<->host switch. + + There is are two pages above us for this CPU (struct lguest_pages). + The second page (struct lguest_ro_state) becomes read-only after the + context switch. The first page (the stack for traps) remains writable, + but while we're in here, the guest cannot be running. +*/ +#include +#include +#include "lg.h" + +.text +ENTRY(start_switcher_text) + +/* %eax points to lguest pages for this CPU. %ebx contains cr3 value. + All normal registers can be clobbered! */ +ENTRY(switch_to_guest) + /* Save host segments on host stack. */ + pushl %es + pushl %ds + pushl %gs + pushl %fs + /* With CONFIG_FRAME_POINTER, gcc doesn't let us clobber this! */ + pushl %ebp + /* Save host stack. */ + movl %esp, LGUEST_PAGES_host_sp(%eax) + /* Switch to guest stack: if we get NMI we expect to be there. */ + movl %eax, %edx + addl $LGUEST_PAGES_regs, %edx + movl %edx, %esp + /* Switch to guest's GDT, IDT. */ + lgdt LGUEST_PAGES_guest_gdt_desc(%eax) + lidt LGUEST_PAGES_guest_idt_desc(%eax) + /* Switch to guest's TSS while GDT still writable. */ + movl $(GDT_ENTRY_TSS*8), %edx + ltr %dx + /* Set host's TSS GDT entry to available (clear byte 5 bit 2). */ + movl (LGUEST_PAGES_host_gdt_desc+2)(%eax), %edx + andb $0xFD, (GDT_ENTRY_TSS*8 + 5)(%edx) + /* Switch to guest page tables: lguest_pages->state now read-only. */ + movl %ebx, %cr3 + /* Restore guest regs */ + popl %ebx + popl %ecx + popl %edx + popl %esi + popl %edi + popl %ebp + popl %gs + popl %eax + popl %fs + popl %ds + popl %es + /* Skip error code and trap number */ + addl $8, %esp + iret + +#define SWITCH_TO_HOST \ + /* Save guest state */ \ + pushl %es; \ + pushl %ds; \ + pushl %fs; \ + pushl %eax; \ + pushl %gs; \ + pushl %ebp; \ + pushl %edi; \ + pushl %esi; \ + pushl %edx; \ + pushl %ecx; \ + pushl %ebx; \ + /* Load lguest ds segment for convenience. */ \ + movl $(LGUEST_DS), %eax; \ + movl %eax, %ds; \ + /* Figure out where we are, based on stack (at top of regs). */ \ + movl %esp, %eax; \ + subl $LGUEST_PAGES_regs, %eax; \ + /* Put trap number in %ebx before we switch cr3 and lose it. */ \ + movl LGUEST_PAGES_regs_trapnum(%eax), %ebx; \ + /* Switch to host page tables (host GDT, IDT and stack are in host \ + mem, so need this first) */ \ + movl LGUEST_PAGES_host_cr3(%eax), %edx; \ + movl %edx, %cr3; \ + /* Set guest's TSS to available (clear byte 5 bit 2). */ \ + andb $0xFD, (LGUEST_PAGES_guest_gdt+GDT_ENTRY_TSS*8+5)(%eax); \ + /* Switch to host's GDT & IDT. */ \ + lgdt LGUEST_PAGES_host_gdt_desc(%eax); \ + lidt LGUEST_PAGES_host_idt_desc(%eax); \ + /* Switch to host's stack. */ \ + movl LGUEST_PAGES_host_sp(%eax), %esp; \ + /* Switch to host's TSS */ \ + movl $(GDT_ENTRY_TSS*8), %edx; \ + ltr %dx; \ + popl %ebp; \ + popl %fs; \ + popl %gs; \ + popl %ds; \ + popl %es + +/* Return to run_guest_once. */ +return_to_host: + SWITCH_TO_HOST + iret + +deliver_to_host: + SWITCH_TO_HOST + /* Decode IDT and jump to hosts' irq handler. When that does iret, it + * will return to run_guest_once. This is a feature. */ + movl (LGUEST_PAGES_host_idt_desc+2)(%eax), %edx + leal (%edx,%ebx,8), %eax + movzwl (%eax),%edx + movl 4(%eax), %eax + xorw %ax, %ax + orl %eax, %edx + jmp *%edx + +/* Real hardware interrupts are delivered straight to the host. Others + cause us to return to run_guest_once so it can decide what to do. Note + that some of these are overridden by the guest to deliver directly, and + never enter here (see load_guest_idt_entry). */ +.macro IRQ_STUB N TARGET + .data; .long 1f; .text; 1: + /* Make an error number for most traps, which don't have one. */ + .if (\N <> 8) && (\N < 10 || \N > 14) && (\N <> 17) + pushl $0 + .endif + pushl $\N + jmp \TARGET + ALIGN +.endm + +.macro IRQ_STUBS FIRST LAST TARGET + irq=\FIRST + .rept \LAST-\FIRST+1 + IRQ_STUB irq \TARGET + irq=irq+1 + .endr +.endm + +/* We intercept every interrupt, because we may need to switch back to + * host. Unfortunately we can't tell them apart except by entry + * point, so we need 256 entry points. + */ +.data +.global default_idt_entries +default_idt_entries: +.text + IRQ_STUBS 0 1 return_to_host /* First two traps */ + IRQ_STUB 2 handle_nmi /* NMI */ + IRQ_STUBS 3 31 return_to_host /* Rest of traps */ + IRQ_STUBS 32 127 deliver_to_host /* Real interrupts */ + IRQ_STUB 128 return_to_host /* System call (overridden) */ + IRQ_STUBS 129 255 deliver_to_host /* Other real interrupts */ + +/* We ignore NMI and return. */ +handle_nmi: + addl $8, %esp + iret + +ENTRY(end_switcher_text) diff --git a/include/asm-i386/tsc.h b/include/asm-i386/tsc.h index 62c091ffccc..a4d806610b7 100644 --- a/include/asm-i386/tsc.h +++ b/include/asm-i386/tsc.h @@ -63,6 +63,7 @@ extern void tsc_init(void); extern void mark_tsc_unstable(char *reason); extern int unsynchronized_tsc(void); extern void init_tsc_clocksource(void); +int check_tsc_unstable(void); /* * Boot-time check whether the TSCs are synchronized across diff --git a/include/linux/lguest.h b/include/linux/lguest.h index f30c04fc22b..500aace21ca 100644 --- a/include/linux/lguest.h +++ b/include/linux/lguest.h @@ -3,11 +3,6 @@ #ifndef _ASM_LGUEST_H #define _ASM_LGUEST_H -/* These are randomly chosen numbers which indicate we're an lguest at boot */ -#define LGUEST_MAGIC_EBP 0x4C687970 -#define LGUEST_MAGIC_EDI 0x652D4D65 -#define LGUEST_MAGIC_ESI 0xFFFFFFFF - #ifndef __ASSEMBLY__ #include @@ -20,7 +15,7 @@ #define LHCALL_LOAD_IDT_ENTRY 6 #define LHCALL_SET_STACK 7 #define LHCALL_TS 8 -#define LHCALL_TIMER_READ 9 +#define LHCALL_SET_CLOCKEVENT 9 #define LHCALL_HALT 10 #define LHCALL_GET_WALLCLOCK 11 #define LHCALL_BIND_DMA 12 @@ -29,6 +24,9 @@ #define LHCALL_SET_PMD 15 #define LHCALL_LOAD_TLS 16 +#define LG_CLOCK_MIN_DELTA 100UL +#define LG_CLOCK_MAX_DELTA ULONG_MAX + #define LGUEST_TRAP_ENTRY 0x1F static inline unsigned long @@ -75,6 +73,8 @@ struct lguest_data unsigned long reserve_mem; /* ID of this guest (used by network driver to set ethernet address) */ u16 guestid; + /* KHz for the TSC clock. */ + u32 tsc_khz; /* Fields initialized by the guest at boot: */ /* Instruction range to suppress interrupts even if enabled */ diff --git a/include/linux/lguest_launcher.h b/include/linux/lguest_launcher.h new file mode 100644 index 00000000000..0ba414a40c8 --- /dev/null +++ b/include/linux/lguest_launcher.h @@ -0,0 +1,73 @@ +#ifndef _ASM_LGUEST_USER +#define _ASM_LGUEST_USER +/* Everything the "lguest" userspace program needs to know. */ +/* They can register up to 32 arrays of lguest_dma. */ +#define LGUEST_MAX_DMA 32 +/* At most we can dma 16 lguest_dma in one op. */ +#define LGUEST_MAX_DMA_SECTIONS 16 + +/* How many devices? Assume each one wants up to two dma arrays per device. */ +#define LGUEST_MAX_DEVICES (LGUEST_MAX_DMA/2) + +struct lguest_dma +{ + /* 0 if free to be used, filled by hypervisor. */ + u32 used_len; + unsigned long addr[LGUEST_MAX_DMA_SECTIONS]; + u16 len[LGUEST_MAX_DMA_SECTIONS]; +}; + +struct lguest_block_page +{ + /* 0 is a read, 1 is a write. */ + int type; + u32 sector; /* Offset in device = sector * 512. */ + u32 bytes; /* Length expected to be read/written in bytes */ + /* 0 = pending, 1 = done, 2 = done, error */ + int result; + u32 num_sectors; /* Disk length = num_sectors * 512 */ +}; + +/* There is a shared page of these. */ +struct lguest_net +{ + /* Simply the mac address (with multicast bit meaning promisc). */ + unsigned char mac[6]; +}; + +/* Where the Host expects the Guest to SEND_DMA console output to. */ +#define LGUEST_CONSOLE_DMA_KEY 0 + +/* We have a page of these descriptors in the lguest_device page. */ +struct lguest_device_desc { + u16 type; +#define LGUEST_DEVICE_T_CONSOLE 1 +#define LGUEST_DEVICE_T_NET 2 +#define LGUEST_DEVICE_T_BLOCK 3 + + u16 features; +#define LGUEST_NET_F_NOCSUM 0x4000 /* Don't bother checksumming */ +#define LGUEST_DEVICE_F_RANDOMNESS 0x8000 /* IRQ is fairly random */ + + u16 status; +/* 256 and above are device specific. */ +#define LGUEST_DEVICE_S_ACKNOWLEDGE 1 /* We have seen device. */ +#define LGUEST_DEVICE_S_DRIVER 2 /* We have found a driver */ +#define LGUEST_DEVICE_S_DRIVER_OK 4 /* Driver says OK! */ +#define LGUEST_DEVICE_S_REMOVED 8 /* Device has gone away. */ +#define LGUEST_DEVICE_S_REMOVED_ACK 16 /* Driver has been told. */ +#define LGUEST_DEVICE_S_FAILED 128 /* Something actually failed */ + + u16 num_pages; + u32 pfn; +}; + +/* Write command first word is a request. */ +enum lguest_req +{ + LHREQ_INITIALIZE, /* + pfnlimit, pgdir, start, pageoffset */ + LHREQ_GETDMA, /* + addr (returns &lguest_dma, irq in ->used_len) */ + LHREQ_IRQ, /* + irq */ + LHREQ_BREAK, /* + on/off flag (on blocks until someone does off) */ +}; +#endif /* _ASM_LGUEST_USER */ diff --git a/kernel/fork.c b/kernel/fork.c index e7a2d995b08..46983899822 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -127,7 +127,6 @@ void __put_task_struct(struct task_struct *tsk) if (!profile_handoff_task(tsk)) free_task(tsk); } -EXPORT_SYMBOL_GPL(__put_task_struct); void __init fork_init(unsigned long mempages) { -- cgit v1.2.3-70-g09d2 From 20c2df83d25c6a95affe6157a4c9cac4cf5ffaac Mon Sep 17 00:00:00 2001 From: Paul Mundt Date: Fri, 20 Jul 2007 10:11:58 +0900 Subject: mm: Remove slab destructors from kmem_cache_create(). Slab destructors were no longer supported after Christoph's c59def9f222d44bb7e2f0a559f2906191a0862d7 change. They've been BUGs for both slab and slub, and slob never supported them either. This rips out support for the dtor pointer from kmem_cache_create() completely and fixes up every single callsite in the kernel (there were about 224, not including the slab allocator definitions themselves, or the documentation references). Signed-off-by: Paul Mundt --- arch/arm/plat-s3c24xx/dma.c | 2 +- arch/arm26/mm/memc.c | 4 ++-- arch/i386/mm/init.c | 3 +-- arch/ia64/ia32/ia32_support.c | 2 +- arch/powerpc/kernel/rtas_flash.c | 2 +- arch/powerpc/mm/hugetlbpage.c | 2 +- arch/powerpc/mm/init_64.c | 3 +-- arch/powerpc/platforms/cell/spufs/inode.c | 2 +- arch/sh/kernel/cpu/sh4/sq.c | 3 +-- arch/sh/mm/pmb.c | 2 +- arch/sparc64/mm/tsb.c | 3 +-- block/bsg.c | 2 +- block/ll_rw_blk.c | 6 +++--- drivers/acpi/osl.c | 2 +- drivers/block/aoe/aoeblk.c | 4 ++-- drivers/ieee1394/eth1394.c | 2 +- drivers/infiniband/core/mad.c | 1 - drivers/infiniband/hw/amso1100/c2_vq.c | 2 +- drivers/infiniband/hw/ehca/ehca_av.c | 2 +- drivers/infiniband/hw/ehca/ehca_cq.c | 2 +- drivers/infiniband/hw/ehca/ehca_main.c | 2 +- drivers/infiniband/hw/ehca/ehca_mrmw.c | 4 ++-- drivers/infiniband/hw/ehca/ehca_pd.c | 2 +- drivers/infiniband/hw/ehca/ehca_qp.c | 2 +- drivers/infiniband/ulp/iser/iscsi_iser.c | 2 +- drivers/kvm/mmu.c | 8 ++++---- drivers/md/raid5.c | 4 ++-- drivers/message/i2o/i2o_block.c | 3 +-- drivers/mtd/ubi/eba.c | 2 +- drivers/mtd/ubi/wl.c | 2 +- drivers/s390/block/dasd_devmap.c | 2 +- drivers/s390/scsi/zfcp_aux.c | 6 +++--- drivers/scsi/aic94xx/aic94xx_init.c | 4 ++-- drivers/scsi/libsas/sas_init.c | 2 +- drivers/scsi/qla2xxx/qla_os.c | 2 +- drivers/scsi/qla4xxx/ql4_os.c | 2 +- drivers/scsi/scsi.c | 2 +- drivers/scsi/scsi_lib.c | 4 ++-- drivers/scsi/scsi_tgt_lib.c | 2 +- drivers/usb/host/uhci-hcd.c | 2 +- drivers/usb/mon/mon_text.c | 2 +- fs/adfs/super.c | 4 ++-- fs/affs/super.c | 2 +- fs/afs/super.c | 3 +-- fs/befs/linuxvfs.c | 4 ++-- fs/bfs/inode.c | 4 ++-- fs/bio.c | 2 +- fs/block_dev.c | 2 +- fs/cifs/cifsfs.c | 10 +++++----- fs/coda/inode.c | 4 ++-- fs/configfs/mount.c | 2 +- fs/dcache.c | 4 ++-- fs/dcookies.c | 2 +- fs/dlm/lowcomms.c | 2 +- fs/dlm/memory.c | 2 +- fs/dnotify.c | 2 +- fs/dquot.c | 4 ++-- fs/ecryptfs/main.c | 2 +- fs/efs/super.c | 4 ++-- fs/eventpoll.c | 4 ++-- fs/ext2/super.c | 4 ++-- fs/ext3/super.c | 2 +- fs/ext4/super.c | 2 +- fs/fat/cache.c | 2 +- fs/fat/inode.c | 2 +- fs/fcntl.c | 2 +- fs/freevxfs/vxfs_super.c | 4 ++-- fs/fuse/dev.c | 2 +- fs/fuse/inode.c | 2 +- fs/gfs2/main.c | 6 +++--- fs/hfs/super.c | 2 +- fs/hfsplus/super.c | 2 +- fs/hpfs/super.c | 4 ++-- fs/hugetlbfs/inode.c | 2 +- fs/inode.c | 3 +-- fs/inotify_user.c | 4 ++-- fs/isofs/inode.c | 2 +- fs/jbd/journal.c | 8 +++----- fs/jbd/revoke.c | 4 ++-- fs/jbd2/journal.c | 8 +++----- fs/jbd2/revoke.c | 4 ++-- fs/jffs2/malloc.c | 18 +++++++++--------- fs/jffs2/super.c | 2 +- fs/jfs/jfs_metapage.c | 2 +- fs/jfs/super.c | 2 +- fs/locks.c | 2 +- fs/mbcache.c | 2 +- fs/minix/inode.c | 4 ++-- fs/namespace.c | 2 +- fs/ncpfs/inode.c | 4 ++-- fs/nfs/direct.c | 2 +- fs/nfs/inode.c | 4 ++-- fs/nfs/pagelist.c | 2 +- fs/nfs/read.c | 2 +- fs/nfs/write.c | 2 +- fs/nfsd/nfs4state.c | 8 ++++---- fs/ntfs/super.c | 10 +++++----- fs/ocfs2/dlm/dlmfs.c | 2 +- fs/ocfs2/dlm/dlmmaster.c | 2 +- fs/ocfs2/super.c | 2 +- fs/ocfs2/uptodate.c | 2 +- fs/openpromfs/inode.c | 2 +- fs/proc/inode.c | 4 ++-- fs/qnx4/inode.c | 2 +- fs/reiserfs/super.c | 2 +- fs/romfs/inode.c | 4 ++-- fs/smbfs/inode.c | 4 ++-- fs/smbfs/request.c | 2 +- fs/sysfs/mount.c | 2 +- fs/sysv/inode.c | 2 +- fs/udf/super.c | 2 +- fs/ufs/super.c | 4 ++-- fs/xfs/linux-2.6/kmem.h | 4 ++-- include/linux/i2o.h | 3 +-- include/linux/slab.h | 3 +-- ipc/mqueue.c | 2 +- kernel/fork.c | 18 +++++++++--------- kernel/nsproxy.c | 2 +- kernel/posix-timers.c | 2 +- kernel/user.c | 2 +- lib/idr.c | 2 +- lib/radix-tree.c | 2 +- mm/mempolicy.c | 4 ++-- mm/rmap.c | 2 +- mm/shmem.c | 2 +- mm/slab.c | 17 +++++++---------- mm/slob.c | 3 +-- mm/slub.c | 4 +--- net/bridge/br_fdb.c | 2 +- net/core/flow.c | 2 +- net/core/neighbour.c | 2 +- net/core/skbuff.c | 4 ++-- net/core/sock.c | 6 +++--- net/dccp/ackvec.c | 4 ++-- net/dccp/ccid.c | 2 +- net/dccp/ccids/lib/loss_interval.c | 2 +- net/dccp/ccids/lib/packet_history.c | 4 ++-- net/dccp/proto.c | 2 +- net/decnet/dn_route.c | 2 +- net/decnet/dn_table.c | 2 +- net/ipv4/fib_hash.c | 4 ++-- net/ipv4/fib_trie.c | 2 +- net/ipv4/inetpeer.c | 2 +- net/ipv4/ipmr.c | 2 +- net/ipv4/ipvs/ip_vs_conn.c | 2 +- net/ipv4/route.c | 2 +- net/ipv4/tcp.c | 2 +- net/ipv6/ip6_fib.c | 2 +- net/ipv6/route.c | 2 +- net/ipv6/xfrm6_tunnel.c | 2 +- net/netfilter/nf_conntrack_core.c | 2 +- net/netfilter/nf_conntrack_expect.c | 2 +- net/netfilter/xt_hashlimit.c | 2 +- net/rxrpc/af_rxrpc.c | 2 +- net/sctp/protocol.c | 4 ++-- net/socket.c | 3 +-- net/sunrpc/rpc_pipe.c | 2 +- net/sunrpc/sched.c | 4 ++-- net/tipc/handler.c | 2 +- net/xfrm/xfrm_input.c | 2 +- net/xfrm/xfrm_policy.c | 2 +- security/keys/key.c | 2 +- security/selinux/avc.c | 2 +- security/selinux/hooks.c | 2 +- security/selinux/ss/avtab.c | 2 +- 165 files changed, 247 insertions(+), 268 deletions(-) (limited to 'kernel/fork.c') diff --git a/arch/arm/plat-s3c24xx/dma.c b/arch/arm/plat-s3c24xx/dma.c index 08d80f2f51f..6d048490c55 100644 --- a/arch/arm/plat-s3c24xx/dma.c +++ b/arch/arm/plat-s3c24xx/dma.c @@ -1333,7 +1333,7 @@ int __init s3c24xx_dma_init(unsigned int channels, unsigned int irq, dma_kmem = kmem_cache_create("dma_desc", sizeof(struct s3c2410_dma_buf), 0, SLAB_HWCACHE_ALIGN, - s3c2410_dma_cache_ctor, NULL); + s3c2410_dma_cache_ctor); if (dma_kmem == NULL) { printk(KERN_ERR "dma failed to make kmem cache\n"); diff --git a/arch/arm26/mm/memc.c b/arch/arm26/mm/memc.c index 42505541a9b..ffecd857824 100644 --- a/arch/arm26/mm/memc.c +++ b/arch/arm26/mm/memc.c @@ -176,9 +176,9 @@ void __init pgtable_cache_init(void) { pte_cache = kmem_cache_create("pte-cache", sizeof(pte_t) * PTRS_PER_PTE, - 0, SLAB_PANIC, pte_cache_ctor, NULL); + 0, SLAB_PANIC, pte_cache_ctor); pgd_cache = kmem_cache_create("pgd-cache", MEMC_TABLE_SIZE + sizeof(pgd_t) * PTRS_PER_PGD, - 0, SLAB_PANIC, pgd_cache_ctor, NULL); + 0, SLAB_PANIC, pgd_cache_ctor); } diff --git a/arch/i386/mm/init.c b/arch/i386/mm/init.c index 6a68b1ae061..6e72f22e6bb 100644 --- a/arch/i386/mm/init.c +++ b/arch/i386/mm/init.c @@ -752,8 +752,7 @@ void __init pgtable_cache_init(void) PTRS_PER_PMD*sizeof(pmd_t), PTRS_PER_PMD*sizeof(pmd_t), SLAB_PANIC, - pmd_ctor, - NULL); + pmd_ctor); if (!SHARED_KERNEL_PMD) { /* If we're in PAE mode and have a non-shared kernel pmd, then the pgd size must be a diff --git a/arch/ia64/ia32/ia32_support.c b/arch/ia64/ia32/ia32_support.c index beea7a0b9dc..e13a1a1db4b 100644 --- a/arch/ia64/ia32/ia32_support.c +++ b/arch/ia64/ia32/ia32_support.c @@ -253,7 +253,7 @@ ia32_init (void) partial_page_cachep = kmem_cache_create("partial_page_cache", sizeof(struct partial_page), - 0, SLAB_PANIC, NULL, NULL); + 0, SLAB_PANIC, NULL); } #endif return 0; diff --git a/arch/powerpc/kernel/rtas_flash.c b/arch/powerpc/kernel/rtas_flash.c index f72118c0844..62b7bf2f3ea 100644 --- a/arch/powerpc/kernel/rtas_flash.c +++ b/arch/powerpc/kernel/rtas_flash.c @@ -804,7 +804,7 @@ int __init rtas_flash_init(void) flash_block_cache = kmem_cache_create("rtas_flash_cache", RTAS_BLK_SIZE, RTAS_BLK_SIZE, 0, - rtas_block_ctor, NULL); + rtas_block_ctor); if (!flash_block_cache) { printk(KERN_ERR "%s: failed to create block cache\n", __FUNCTION__); diff --git a/arch/powerpc/mm/hugetlbpage.c b/arch/powerpc/mm/hugetlbpage.c index 92a1b16fb7e..4835f73af30 100644 --- a/arch/powerpc/mm/hugetlbpage.c +++ b/arch/powerpc/mm/hugetlbpage.c @@ -542,7 +542,7 @@ static int __init hugetlbpage_init(void) HUGEPTE_TABLE_SIZE, HUGEPTE_TABLE_SIZE, 0, - zero_ctor, NULL); + zero_ctor); if (! huge_pgtable_cache) panic("hugetlbpage_init(): could not create hugepte cache\n"); diff --git a/arch/powerpc/mm/init_64.c b/arch/powerpc/mm/init_64.c index 1d6edf724c8..9f27bb56a61 100644 --- a/arch/powerpc/mm/init_64.c +++ b/arch/powerpc/mm/init_64.c @@ -178,7 +178,6 @@ void pgtable_cache_init(void) pgtable_cache[i] = kmem_cache_create(name, size, size, SLAB_PANIC, - zero_ctor, - NULL); + zero_ctor); } } diff --git a/arch/powerpc/platforms/cell/spufs/inode.c b/arch/powerpc/platforms/cell/spufs/inode.c index f37460e5bfd..7eb4d6cbcb7 100644 --- a/arch/powerpc/platforms/cell/spufs/inode.c +++ b/arch/powerpc/platforms/cell/spufs/inode.c @@ -654,7 +654,7 @@ static int __init spufs_init(void) ret = -ENOMEM; spufs_inode_cache = kmem_cache_create("spufs_inode_cache", sizeof(struct spufs_inode_info), 0, - SLAB_HWCACHE_ALIGN, spufs_init_once, NULL); + SLAB_HWCACHE_ALIGN, spufs_init_once); if (!spufs_inode_cache) goto out; diff --git a/arch/sh/kernel/cpu/sh4/sq.c b/arch/sh/kernel/cpu/sh4/sq.c index d7fff752e56..b98d6c3e6f3 100644 --- a/arch/sh/kernel/cpu/sh4/sq.c +++ b/arch/sh/kernel/cpu/sh4/sq.c @@ -371,8 +371,7 @@ static int __init sq_api_init(void) printk(KERN_NOTICE "sq: Registering store queue API.\n"); sq_cache = kmem_cache_create("store_queue_cache", - sizeof(struct sq_mapping), 0, 0, - NULL, NULL); + sizeof(struct sq_mapping), 0, 0, NULL); if (unlikely(!sq_cache)) return ret; diff --git a/arch/sh/mm/pmb.c b/arch/sh/mm/pmb.c index b6a5a338145..a08a4a958ad 100644 --- a/arch/sh/mm/pmb.c +++ b/arch/sh/mm/pmb.c @@ -310,7 +310,7 @@ static int __init pmb_init(void) BUG_ON(unlikely(nr_entries >= NR_PMB_ENTRIES)); pmb_cache = kmem_cache_create("pmb", sizeof(struct pmb_entry), 0, - SLAB_PANIC, pmb_cache_ctor, NULL); + SLAB_PANIC, pmb_cache_ctor); jump_to_P2(); diff --git a/arch/sparc64/mm/tsb.c b/arch/sparc64/mm/tsb.c index 8eb8a7c76ec..7ff0a02f581 100644 --- a/arch/sparc64/mm/tsb.c +++ b/arch/sparc64/mm/tsb.c @@ -262,8 +262,7 @@ void __init pgtable_cache_init(void) tsb_caches[i] = kmem_cache_create(name, size, size, - 0, - NULL, NULL); + 0, NULL); if (!tsb_caches[i]) { prom_printf("Could not create %s cache\n", name); prom_halt(); diff --git a/block/bsg.c b/block/bsg.c index baa04e7adf1..f2992e72b84 100644 --- a/block/bsg.c +++ b/block/bsg.c @@ -1043,7 +1043,7 @@ static int __init bsg_init(void) dev_t devid; bsg_cmd_cachep = kmem_cache_create("bsg_cmd", - sizeof(struct bsg_command), 0, 0, NULL, NULL); + sizeof(struct bsg_command), 0, 0, NULL); if (!bsg_cmd_cachep) { printk(KERN_ERR "bsg: failed creating slab cache\n"); return -ENOMEM; diff --git a/block/ll_rw_blk.c b/block/ll_rw_blk.c index d7cadf30416..66056ca5e63 100644 --- a/block/ll_rw_blk.c +++ b/block/ll_rw_blk.c @@ -3698,13 +3698,13 @@ int __init blk_dev_init(void) panic("Failed to create kblockd\n"); request_cachep = kmem_cache_create("blkdev_requests", - sizeof(struct request), 0, SLAB_PANIC, NULL, NULL); + sizeof(struct request), 0, SLAB_PANIC, NULL); requestq_cachep = kmem_cache_create("blkdev_queue", - sizeof(request_queue_t), 0, SLAB_PANIC, NULL, NULL); + sizeof(request_queue_t), 0, SLAB_PANIC, NULL); iocontext_cachep = kmem_cache_create("blkdev_ioc", - sizeof(struct io_context), 0, SLAB_PANIC, NULL, NULL); + sizeof(struct io_context), 0, SLAB_PANIC, NULL); for_each_possible_cpu(i) INIT_LIST_HEAD(&per_cpu(blk_cpu_done, i)); diff --git a/drivers/acpi/osl.c b/drivers/acpi/osl.c index 2e7ba615d76..00d53c2fd1e 100644 --- a/drivers/acpi/osl.c +++ b/drivers/acpi/osl.c @@ -1098,7 +1098,7 @@ void acpi_os_release_lock(acpi_spinlock lockp, acpi_cpu_flags flags) acpi_status acpi_os_create_cache(char *name, u16 size, u16 depth, acpi_cache_t ** cache) { - *cache = kmem_cache_create(name, size, 0, 0, NULL, NULL); + *cache = kmem_cache_create(name, size, 0, 0, NULL); if (*cache == NULL) return AE_ERROR; else diff --git a/drivers/block/aoe/aoeblk.c b/drivers/block/aoe/aoeblk.c index 478489c568a..4f598270fa3 100644 --- a/drivers/block/aoe/aoeblk.c +++ b/drivers/block/aoe/aoeblk.c @@ -257,9 +257,9 @@ aoeblk_exit(void) int __init aoeblk_init(void) { - buf_pool_cache = kmem_cache_create("aoe_bufs", + buf_pool_cache = kmem_cache_create("aoe_bufs", sizeof(struct buf), - 0, 0, NULL, NULL); + 0, 0, NULL); if (buf_pool_cache == NULL) return -ENOMEM; diff --git a/drivers/ieee1394/eth1394.c b/drivers/ieee1394/eth1394.c index 93362eed94e..3a9d7e2d4de 100644 --- a/drivers/ieee1394/eth1394.c +++ b/drivers/ieee1394/eth1394.c @@ -1729,7 +1729,7 @@ static int __init ether1394_init_module(void) packet_task_cache = kmem_cache_create("packet_task", sizeof(struct packet_task), - 0, 0, NULL, NULL); + 0, 0, NULL); if (!packet_task_cache) return -ENOMEM; diff --git a/drivers/infiniband/core/mad.c b/drivers/infiniband/core/mad.c index 6b8faca02f8..bc547f1d34b 100644 --- a/drivers/infiniband/core/mad.c +++ b/drivers/infiniband/core/mad.c @@ -2998,7 +2998,6 @@ static int __init ib_mad_init_module(void) sizeof(struct ib_mad_private), 0, SLAB_HWCACHE_ALIGN, - NULL, NULL); if (!ib_mad_cache) { printk(KERN_ERR PFX "Couldn't create ib_mad cache\n"); diff --git a/drivers/infiniband/hw/amso1100/c2_vq.c b/drivers/infiniband/hw/amso1100/c2_vq.c index 36620a22413..cfdacb1ec27 100644 --- a/drivers/infiniband/hw/amso1100/c2_vq.c +++ b/drivers/infiniband/hw/amso1100/c2_vq.c @@ -85,7 +85,7 @@ int vq_init(struct c2_dev *c2dev) (char) ('0' + c2dev->devnum)); c2dev->host_msg_cache = kmem_cache_create(c2dev->vq_cache_name, c2dev->rep_vq.msg_size, 0, - SLAB_HWCACHE_ALIGN, NULL, NULL); + SLAB_HWCACHE_ALIGN, NULL); if (c2dev->host_msg_cache == NULL) { return -ENOMEM; } diff --git a/drivers/infiniband/hw/ehca/ehca_av.c b/drivers/infiniband/hw/ehca/ehca_av.c index e53a97af126..97d108634c5 100644 --- a/drivers/infiniband/hw/ehca/ehca_av.c +++ b/drivers/infiniband/hw/ehca/ehca_av.c @@ -259,7 +259,7 @@ int ehca_init_av_cache(void) av_cache = kmem_cache_create("ehca_cache_av", sizeof(struct ehca_av), 0, SLAB_HWCACHE_ALIGN, - NULL, NULL); + NULL); if (!av_cache) return -ENOMEM; return 0; diff --git a/drivers/infiniband/hw/ehca/ehca_cq.c b/drivers/infiniband/hw/ehca/ehca_cq.c index 9e87883b561..1e8ca3fca4a 100644 --- a/drivers/infiniband/hw/ehca/ehca_cq.c +++ b/drivers/infiniband/hw/ehca/ehca_cq.c @@ -387,7 +387,7 @@ int ehca_init_cq_cache(void) cq_cache = kmem_cache_create("ehca_cache_cq", sizeof(struct ehca_cq), 0, SLAB_HWCACHE_ALIGN, - NULL, NULL); + NULL); if (!cq_cache) return -ENOMEM; return 0; diff --git a/drivers/infiniband/hw/ehca/ehca_main.c b/drivers/infiniband/hw/ehca/ehca_main.c index 36377c6db3d..04c324330b7 100644 --- a/drivers/infiniband/hw/ehca/ehca_main.c +++ b/drivers/infiniband/hw/ehca/ehca_main.c @@ -163,7 +163,7 @@ static int ehca_create_slab_caches(void) ctblk_cache = kmem_cache_create("ehca_cache_ctblk", EHCA_PAGESIZE, H_CB_ALIGNMENT, SLAB_HWCACHE_ALIGN, - NULL, NULL); + NULL); if (!ctblk_cache) { ehca_gen_err("Cannot create ctblk SLAB cache."); ehca_cleanup_mrmw_cache(); diff --git a/drivers/infiniband/hw/ehca/ehca_mrmw.c b/drivers/infiniband/hw/ehca/ehca_mrmw.c index 6262c5462d5..9f4c9d46e8e 100644 --- a/drivers/infiniband/hw/ehca/ehca_mrmw.c +++ b/drivers/infiniband/hw/ehca/ehca_mrmw.c @@ -1950,13 +1950,13 @@ int ehca_init_mrmw_cache(void) mr_cache = kmem_cache_create("ehca_cache_mr", sizeof(struct ehca_mr), 0, SLAB_HWCACHE_ALIGN, - NULL, NULL); + NULL); if (!mr_cache) return -ENOMEM; mw_cache = kmem_cache_create("ehca_cache_mw", sizeof(struct ehca_mw), 0, SLAB_HWCACHE_ALIGN, - NULL, NULL); + NULL); if (!mw_cache) { kmem_cache_destroy(mr_cache); mr_cache = NULL; diff --git a/drivers/infiniband/hw/ehca/ehca_pd.c b/drivers/infiniband/hw/ehca/ehca_pd.c index 79d0591a804..c85312ad292 100644 --- a/drivers/infiniband/hw/ehca/ehca_pd.c +++ b/drivers/infiniband/hw/ehca/ehca_pd.c @@ -100,7 +100,7 @@ int ehca_init_pd_cache(void) pd_cache = kmem_cache_create("ehca_cache_pd", sizeof(struct ehca_pd), 0, SLAB_HWCACHE_ALIGN, - NULL, NULL); + NULL); if (!pd_cache) return -ENOMEM; return 0; diff --git a/drivers/infiniband/hw/ehca/ehca_qp.c b/drivers/infiniband/hw/ehca/ehca_qp.c index 48e9ceacd6f..a3146e696c5 100644 --- a/drivers/infiniband/hw/ehca/ehca_qp.c +++ b/drivers/infiniband/hw/ehca/ehca_qp.c @@ -1760,7 +1760,7 @@ int ehca_init_qp_cache(void) qp_cache = kmem_cache_create("ehca_cache_qp", sizeof(struct ehca_qp), 0, SLAB_HWCACHE_ALIGN, - NULL, NULL); + NULL); if (!qp_cache) return -ENOMEM; return 0; diff --git a/drivers/infiniband/ulp/iser/iscsi_iser.c b/drivers/infiniband/ulp/iser/iscsi_iser.c index effdee299b0..5db31438027 100644 --- a/drivers/infiniband/ulp/iser/iscsi_iser.c +++ b/drivers/infiniband/ulp/iser/iscsi_iser.c @@ -637,7 +637,7 @@ static int __init iser_init(void) ig.desc_cache = kmem_cache_create("iser_descriptors", sizeof (struct iser_desc), 0, SLAB_HWCACHE_ALIGN, - NULL, NULL); + NULL); if (ig.desc_cache == NULL) return -ENOMEM; diff --git a/drivers/kvm/mmu.c b/drivers/kvm/mmu.c index b297a6b111a..1199d3f32ac 100644 --- a/drivers/kvm/mmu.c +++ b/drivers/kvm/mmu.c @@ -1332,24 +1332,24 @@ int kvm_mmu_module_init(void) { pte_chain_cache = kmem_cache_create("kvm_pte_chain", sizeof(struct kvm_pte_chain), - 0, 0, NULL, NULL); + 0, 0, NULL); if (!pte_chain_cache) goto nomem; rmap_desc_cache = kmem_cache_create("kvm_rmap_desc", sizeof(struct kvm_rmap_desc), - 0, 0, NULL, NULL); + 0, 0, NULL); if (!rmap_desc_cache) goto nomem; mmu_page_cache = kmem_cache_create("kvm_mmu_page", PAGE_SIZE, - PAGE_SIZE, 0, NULL, NULL); + PAGE_SIZE, 0, NULL); if (!mmu_page_cache) goto nomem; mmu_page_header_cache = kmem_cache_create("kvm_mmu_page_header", sizeof(struct kvm_mmu_page), - 0, 0, NULL, NULL); + 0, 0, NULL); if (!mmu_page_header_cache) goto nomem; diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c index 0b66afef2d8..c8dfdb30291 100644 --- a/drivers/md/raid5.c +++ b/drivers/md/raid5.c @@ -951,7 +951,7 @@ static int grow_stripes(raid5_conf_t *conf, int num) conf->active_name = 0; sc = kmem_cache_create(conf->cache_name[conf->active_name], sizeof(struct stripe_head)+(devs-1)*sizeof(struct r5dev), - 0, 0, NULL, NULL); + 0, 0, NULL); if (!sc) return 1; conf->slab_cache = sc; @@ -1003,7 +1003,7 @@ static int resize_stripes(raid5_conf_t *conf, int newsize) /* Step 1 */ sc = kmem_cache_create(conf->cache_name[1-conf->active_name], sizeof(struct stripe_head)+(newsize-1)*sizeof(struct r5dev), - 0, 0, NULL, NULL); + 0, 0, NULL); if (!sc) return -ENOMEM; diff --git a/drivers/message/i2o/i2o_block.c b/drivers/message/i2o/i2o_block.c index 64a52bd7544..988c8ce47f5 100644 --- a/drivers/message/i2o/i2o_block.c +++ b/drivers/message/i2o/i2o_block.c @@ -1171,8 +1171,7 @@ static int __init i2o_block_init(void) /* Allocate request mempool and slab */ size = sizeof(struct i2o_block_request); i2o_blk_req_pool.slab = kmem_cache_create("i2o_block_req", size, 0, - SLAB_HWCACHE_ALIGN, NULL, - NULL); + SLAB_HWCACHE_ALIGN, NULL); if (!i2o_blk_req_pool.slab) { osm_err("can't init request slab\n"); rc = -ENOMEM; diff --git a/drivers/mtd/ubi/eba.c b/drivers/mtd/ubi/eba.c index 8aff9385613..7c5e29eaf11 100644 --- a/drivers/mtd/ubi/eba.c +++ b/drivers/mtd/ubi/eba.c @@ -1149,7 +1149,7 @@ int ubi_eba_init_scan(struct ubi_device *ubi, struct ubi_scan_info *si) if (ubi_devices_cnt == 0) { ltree_slab = kmem_cache_create("ubi_ltree_slab", sizeof(struct ltree_entry), 0, - 0, <ree_entry_ctor, NULL); + 0, <ree_entry_ctor); if (!ltree_slab) return -ENOMEM; } diff --git a/drivers/mtd/ubi/wl.c b/drivers/mtd/ubi/wl.c index 9de95376209..a5a9b8d8730 100644 --- a/drivers/mtd/ubi/wl.c +++ b/drivers/mtd/ubi/wl.c @@ -1452,7 +1452,7 @@ int ubi_wl_init_scan(struct ubi_device *ubi, struct ubi_scan_info *si) if (ubi_devices_cnt == 0) { wl_entries_slab = kmem_cache_create("ubi_wl_entry_slab", sizeof(struct ubi_wl_entry), - 0, 0, NULL, NULL); + 0, 0, NULL); if (!wl_entries_slab) return -ENOMEM; } diff --git a/drivers/s390/block/dasd_devmap.c b/drivers/s390/block/dasd_devmap.c index 6a89cefe99b..0c67258fb9e 100644 --- a/drivers/s390/block/dasd_devmap.c +++ b/drivers/s390/block/dasd_devmap.c @@ -291,7 +291,7 @@ dasd_parse_keyword( char *parsestring ) { dasd_page_cache = kmem_cache_create("dasd_page_cache", PAGE_SIZE, PAGE_SIZE, SLAB_CACHE_DMA, - NULL, NULL ); + NULL); if (!dasd_page_cache) MESSAGE(KERN_WARNING, "%s", "Failed to create slab, " "fixed buffer mode disabled."); diff --git a/drivers/s390/scsi/zfcp_aux.c b/drivers/s390/scsi/zfcp_aux.c index a1db9592513..9726261c367 100644 --- a/drivers/s390/scsi/zfcp_aux.c +++ b/drivers/s390/scsi/zfcp_aux.c @@ -259,21 +259,21 @@ zfcp_module_init(void) size = sizeof(struct zfcp_fsf_req_qtcb); align = calc_alignment(size); zfcp_data.fsf_req_qtcb_cache = - kmem_cache_create("zfcp_fsf", size, align, 0, NULL, NULL); + kmem_cache_create("zfcp_fsf", size, align, 0, NULL); if (!zfcp_data.fsf_req_qtcb_cache) goto out; size = sizeof(struct fsf_status_read_buffer); align = calc_alignment(size); zfcp_data.sr_buffer_cache = - kmem_cache_create("zfcp_sr", size, align, 0, NULL, NULL); + kmem_cache_create("zfcp_sr", size, align, 0, NULL); if (!zfcp_data.sr_buffer_cache) goto out_sr_cache; size = sizeof(struct zfcp_gid_pn_data); align = calc_alignment(size); zfcp_data.gid_pn_cache = - kmem_cache_create("zfcp_gid", size, align, 0, NULL, NULL); + kmem_cache_create("zfcp_gid", size, align, 0, NULL); if (!zfcp_data.gid_pn_cache) goto out_gid_cache; diff --git a/drivers/scsi/aic94xx/aic94xx_init.c b/drivers/scsi/aic94xx/aic94xx_init.c index 1c0d7578e79..b8c6810090d 100644 --- a/drivers/scsi/aic94xx/aic94xx_init.c +++ b/drivers/scsi/aic94xx/aic94xx_init.c @@ -462,7 +462,7 @@ static int asd_create_global_caches(void) sizeof(struct asd_dma_tok), 0, SLAB_HWCACHE_ALIGN, - NULL, NULL); + NULL); if (!asd_dma_token_cache) { asd_printk("couldn't create dma token cache\n"); return -ENOMEM; @@ -474,7 +474,7 @@ static int asd_create_global_caches(void) sizeof(struct asd_ascb), 0, SLAB_HWCACHE_ALIGN, - NULL, NULL); + NULL); if (!asd_ascb_cache) { asd_printk("couldn't create ascb cache\n"); goto Err; diff --git a/drivers/scsi/libsas/sas_init.c b/drivers/scsi/libsas/sas_init.c index 965698c8b7b..1396c83b0c9 100644 --- a/drivers/scsi/libsas/sas_init.c +++ b/drivers/scsi/libsas/sas_init.c @@ -292,7 +292,7 @@ EXPORT_SYMBOL_GPL(sas_domain_release_transport); static int __init sas_class_init(void) { sas_task_cache = kmem_cache_create("sas_task", sizeof(struct sas_task), - 0, SLAB_HWCACHE_ALIGN, NULL, NULL); + 0, SLAB_HWCACHE_ALIGN, NULL); if (!sas_task_cache) return -ENOMEM; diff --git a/drivers/scsi/qla2xxx/qla_os.c b/drivers/scsi/qla2xxx/qla_os.c index b5a77b0c0de..92376f9dfdd 100644 --- a/drivers/scsi/qla2xxx/qla_os.c +++ b/drivers/scsi/qla2xxx/qla_os.c @@ -2723,7 +2723,7 @@ qla2x00_module_init(void) /* Allocate cache for SRBs. */ srb_cachep = kmem_cache_create("qla2xxx_srbs", sizeof(srb_t), 0, - SLAB_HWCACHE_ALIGN, NULL, NULL); + SLAB_HWCACHE_ALIGN, NULL); if (srb_cachep == NULL) { printk(KERN_ERR "qla2xxx: Unable to allocate SRB cache...Failing load!\n"); diff --git a/drivers/scsi/qla4xxx/ql4_os.c b/drivers/scsi/qla4xxx/ql4_os.c index e69160a7bc6..b1d565c12c5 100644 --- a/drivers/scsi/qla4xxx/ql4_os.c +++ b/drivers/scsi/qla4xxx/ql4_os.c @@ -1677,7 +1677,7 @@ static int __init qla4xxx_module_init(void) /* Allocate cache for SRBs. */ srb_cachep = kmem_cache_create("qla4xxx_srbs", sizeof(struct srb), 0, - SLAB_HWCACHE_ALIGN, NULL, NULL); + SLAB_HWCACHE_ALIGN, NULL); if (srb_cachep == NULL) { printk(KERN_ERR "%s: Unable to allocate SRB cache..." diff --git a/drivers/scsi/scsi.c b/drivers/scsi/scsi.c index a691dda40d2..a5de1a829a7 100644 --- a/drivers/scsi/scsi.c +++ b/drivers/scsi/scsi.c @@ -288,7 +288,7 @@ int scsi_setup_command_freelist(struct Scsi_Host *shost) if (!pool->users) { pool->slab = kmem_cache_create(pool->name, sizeof(struct scsi_cmnd), 0, - pool->slab_flags, NULL, NULL); + pool->slab_flags, NULL); if (!pool->slab) goto fail; } diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c index 1f5a07bf2a7..da63c544919 100644 --- a/drivers/scsi/scsi_lib.c +++ b/drivers/scsi/scsi_lib.c @@ -1661,7 +1661,7 @@ int __init scsi_init_queue(void) scsi_io_context_cache = kmem_cache_create("scsi_io_context", sizeof(struct scsi_io_context), - 0, 0, NULL, NULL); + 0, 0, NULL); if (!scsi_io_context_cache) { printk(KERN_ERR "SCSI: can't init scsi io context cache\n"); return -ENOMEM; @@ -1672,7 +1672,7 @@ int __init scsi_init_queue(void) int size = sgp->size * sizeof(struct scatterlist); sgp->slab = kmem_cache_create(sgp->name, size, 0, - SLAB_HWCACHE_ALIGN, NULL, NULL); + SLAB_HWCACHE_ALIGN, NULL); if (!sgp->slab) { printk(KERN_ERR "SCSI: can't init sg slab %s\n", sgp->name); diff --git a/drivers/scsi/scsi_tgt_lib.c b/drivers/scsi/scsi_tgt_lib.c index 2570f48a69c..371b69c110b 100644 --- a/drivers/scsi/scsi_tgt_lib.c +++ b/drivers/scsi/scsi_tgt_lib.c @@ -585,7 +585,7 @@ static int __init scsi_tgt_init(void) scsi_tgt_cmd_cache = kmem_cache_create("scsi_tgt_cmd", sizeof(struct scsi_tgt_cmd), - 0, 0, NULL, NULL); + 0, 0, NULL); if (!scsi_tgt_cmd_cache) return -ENOMEM; diff --git a/drivers/usb/host/uhci-hcd.c b/drivers/usb/host/uhci-hcd.c index 76c555a67da..805e5fc5f5d 100644 --- a/drivers/usb/host/uhci-hcd.c +++ b/drivers/usb/host/uhci-hcd.c @@ -933,7 +933,7 @@ static int __init uhci_hcd_init(void) } uhci_up_cachep = kmem_cache_create("uhci_urb_priv", - sizeof(struct urb_priv), 0, 0, NULL, NULL); + sizeof(struct urb_priv), 0, 0, NULL); if (!uhci_up_cachep) goto up_failed; diff --git a/drivers/usb/mon/mon_text.c b/drivers/usb/mon/mon_text.c index 982b773d71e..8f27a9e1c36 100644 --- a/drivers/usb/mon/mon_text.c +++ b/drivers/usb/mon/mon_text.c @@ -340,7 +340,7 @@ static int mon_text_open(struct inode *inode, struct file *file) snprintf(rp->slab_name, SLAB_NAME_SZ, "mon_text_%p", rp); rp->e_slab = kmem_cache_create(rp->slab_name, sizeof(struct mon_event_text), sizeof(long), 0, - mon_text_ctor, NULL); + mon_text_ctor); if (rp->e_slab == NULL) { rc = -ENOMEM; goto err_slab; diff --git a/fs/adfs/super.c b/fs/adfs/super.c index de2ed5ca335..1c9fd302949 100644 --- a/fs/adfs/super.c +++ b/fs/adfs/super.c @@ -234,14 +234,14 @@ static void init_once(void * foo, struct kmem_cache * cachep, unsigned long flag inode_init_once(&ei->vfs_inode); } - + static int init_inodecache(void) { adfs_inode_cachep = kmem_cache_create("adfs_inode_cache", sizeof(struct adfs_inode_info), 0, (SLAB_RECLAIM_ACCOUNT| SLAB_MEM_SPREAD), - init_once, NULL); + init_once); if (adfs_inode_cachep == NULL) return -ENOMEM; return 0; diff --git a/fs/affs/super.c b/fs/affs/super.c index 6d0ebc32153..c80191ae205 100644 --- a/fs/affs/super.c +++ b/fs/affs/super.c @@ -99,7 +99,7 @@ static int init_inodecache(void) sizeof(struct affs_inode_info), 0, (SLAB_RECLAIM_ACCOUNT| SLAB_MEM_SPREAD), - init_once, NULL); + init_once); if (affs_inode_cachep == NULL) return -ENOMEM; return 0; diff --git a/fs/afs/super.c b/fs/afs/super.c index 993cdf1cce3..b8808b40f82 100644 --- a/fs/afs/super.c +++ b/fs/afs/super.c @@ -89,8 +89,7 @@ int __init afs_fs_init(void) sizeof(struct afs_vnode), 0, SLAB_HWCACHE_ALIGN, - afs_i_init_once, - NULL); + afs_i_init_once); if (!afs_inode_cachep) { printk(KERN_NOTICE "kAFS: Failed to allocate inode cache\n"); return ret; diff --git a/fs/befs/linuxvfs.c b/fs/befs/linuxvfs.c index a5c5171c282..a4514182768 100644 --- a/fs/befs/linuxvfs.c +++ b/fs/befs/linuxvfs.c @@ -414,7 +414,7 @@ befs_read_inode(struct inode *inode) } /* Initialize the inode cache. Called at fs setup. - * + * * Taken from NFS implementation by Al Viro. */ static int @@ -424,7 +424,7 @@ befs_init_inodecache(void) sizeof (struct befs_inode_info), 0, (SLAB_RECLAIM_ACCOUNT| SLAB_MEM_SPREAD), - init_once, NULL); + init_once); if (befs_inode_cachep == NULL) { printk(KERN_ERR "befs_init_inodecache: " "Couldn't initalize inode slabcache\n"); diff --git a/fs/bfs/inode.c b/fs/bfs/inode.c index 58c7bd9f530..f346eb14e86 100644 --- a/fs/bfs/inode.c +++ b/fs/bfs/inode.c @@ -250,14 +250,14 @@ static void init_once(void * foo, struct kmem_cache * cachep, unsigned long flag inode_init_once(&bi->vfs_inode); } - + static int init_inodecache(void) { bfs_inode_cachep = kmem_cache_create("bfs_inode_cache", sizeof(struct bfs_inode_info), 0, (SLAB_RECLAIM_ACCOUNT| SLAB_MEM_SPREAD), - init_once, NULL); + init_once); if (bfs_inode_cachep == NULL) return -ENOMEM; return 0; diff --git a/fs/bio.c b/fs/bio.c index 33e46340a76..0d2c2d38b7b 100644 --- a/fs/bio.c +++ b/fs/bio.c @@ -1187,7 +1187,7 @@ static void __init biovec_init_slabs(void) size = bvs->nr_vecs * sizeof(struct bio_vec); bvs->slab = kmem_cache_create(bvs->name, size, 0, - SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL, NULL); + SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL); } } diff --git a/fs/block_dev.c b/fs/block_dev.c index 3635315e3b9..2980eabe577 100644 --- a/fs/block_dev.c +++ b/fs/block_dev.c @@ -517,7 +517,7 @@ void __init bdev_cache_init(void) bdev_cachep = kmem_cache_create("bdev_cache", sizeof(struct bdev_inode), 0, (SLAB_HWCACHE_ALIGN|SLAB_RECLAIM_ACCOUNT| SLAB_MEM_SPREAD|SLAB_PANIC), - init_once, NULL); + init_once); err = register_filesystem(&bd_type); if (err) panic("Cannot register bdev pseudo-fs"); diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c index 1fd0dc85f53..cabb6a55d7d 100644 --- a/fs/cifs/cifsfs.c +++ b/fs/cifs/cifsfs.c @@ -719,7 +719,7 @@ cifs_init_inodecache(void) sizeof (struct cifsInodeInfo), 0, (SLAB_RECLAIM_ACCOUNT| SLAB_MEM_SPREAD), - cifs_init_once, NULL); + cifs_init_once); if (cifs_inode_cachep == NULL) return -ENOMEM; @@ -748,7 +748,7 @@ cifs_init_request_bufs(void) cifs_req_cachep = kmem_cache_create("cifs_request", CIFSMaxBufSize + MAX_CIFS_HDR_SIZE, 0, - SLAB_HWCACHE_ALIGN, NULL, NULL); + SLAB_HWCACHE_ALIGN, NULL); if (cifs_req_cachep == NULL) return -ENOMEM; @@ -776,7 +776,7 @@ cifs_init_request_bufs(void) alloc of large cifs buffers even when page debugging is on */ cifs_sm_req_cachep = kmem_cache_create("cifs_small_rq", MAX_CIFS_SMALL_BUFFER_SIZE, 0, SLAB_HWCACHE_ALIGN, - NULL, NULL); + NULL); if (cifs_sm_req_cachep == NULL) { mempool_destroy(cifs_req_poolp); kmem_cache_destroy(cifs_req_cachep); @@ -817,7 +817,7 @@ cifs_init_mids(void) { cifs_mid_cachep = kmem_cache_create("cifs_mpx_ids", sizeof (struct mid_q_entry), 0, - SLAB_HWCACHE_ALIGN, NULL, NULL); + SLAB_HWCACHE_ALIGN, NULL); if (cifs_mid_cachep == NULL) return -ENOMEM; @@ -830,7 +830,7 @@ cifs_init_mids(void) cifs_oplock_cachep = kmem_cache_create("cifs_oplock_structs", sizeof (struct oplock_q_entry), 0, - SLAB_HWCACHE_ALIGN, NULL, NULL); + SLAB_HWCACHE_ALIGN, NULL); if (cifs_oplock_cachep == NULL) { mempool_destroy(cifs_mid_poolp); kmem_cache_destroy(cifs_mid_cachep); diff --git a/fs/coda/inode.c b/fs/coda/inode.c index 6771a4271e3..342f4e0d582 100644 --- a/fs/coda/inode.c +++ b/fs/coda/inode.c @@ -64,13 +64,13 @@ static void init_once(void * foo, struct kmem_cache * cachep, unsigned long flag inode_init_once(&ei->vfs_inode); } - + int coda_init_inodecache(void) { coda_inode_cachep = kmem_cache_create("coda_inode_cache", sizeof(struct coda_inode_info), 0, SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD, - init_once, NULL); + init_once); if (coda_inode_cachep == NULL) return -ENOMEM; return 0; diff --git a/fs/configfs/mount.c b/fs/configfs/mount.c index b00d962de83..871b0cb6183 100644 --- a/fs/configfs/mount.c +++ b/fs/configfs/mount.c @@ -136,7 +136,7 @@ static int __init configfs_init(void) configfs_dir_cachep = kmem_cache_create("configfs_dir_cache", sizeof(struct configfs_dirent), - 0, 0, NULL, NULL); + 0, 0, NULL); if (!configfs_dir_cachep) goto out; diff --git a/fs/dcache.c b/fs/dcache.c index cb9d05056b5..678d39deb60 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -2165,10 +2165,10 @@ void __init vfs_caches_init(unsigned long mempages) mempages -= reserve; names_cachep = kmem_cache_create("names_cache", PATH_MAX, 0, - SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL, NULL); + SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL); filp_cachep = kmem_cache_create("filp", sizeof(struct file), 0, - SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL, NULL); + SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL); dcache_init(mempages); inode_init(mempages); diff --git a/fs/dcookies.c b/fs/dcookies.c index 21af1629f9b..c1208f53bd7 100644 --- a/fs/dcookies.c +++ b/fs/dcookies.c @@ -205,7 +205,7 @@ static int dcookie_init(void) dcookie_cache = kmem_cache_create("dcookie_cache", sizeof(struct dcookie_struct), - 0, 0, NULL, NULL); + 0, 0, NULL); if (!dcookie_cache) goto out; diff --git a/fs/dlm/lowcomms.c b/fs/dlm/lowcomms.c index 0553a6158dc..dd362739d29 100644 --- a/fs/dlm/lowcomms.c +++ b/fs/dlm/lowcomms.c @@ -1449,7 +1449,7 @@ int dlm_lowcomms_start(void) error = -ENOMEM; con_cache = kmem_cache_create("dlm_conn", sizeof(struct connection), __alignof__(struct connection), 0, - NULL, NULL); + NULL); if (!con_cache) goto out; diff --git a/fs/dlm/memory.c b/fs/dlm/memory.c index fb9e2ee998a..ecf0e5cb203 100644 --- a/fs/dlm/memory.c +++ b/fs/dlm/memory.c @@ -23,7 +23,7 @@ int dlm_memory_init(void) int ret = 0; lkb_cache = kmem_cache_create("dlm_lkb", sizeof(struct dlm_lkb), - __alignof__(struct dlm_lkb), 0, NULL, NULL); + __alignof__(struct dlm_lkb), 0, NULL); if (!lkb_cache) ret = -ENOMEM; return ret; diff --git a/fs/dnotify.c b/fs/dnotify.c index 936409fcd93..28d01ed66de 100644 --- a/fs/dnotify.c +++ b/fs/dnotify.c @@ -176,7 +176,7 @@ EXPORT_SYMBOL_GPL(dnotify_parent); static int __init dnotify_init(void) { dn_cache = kmem_cache_create("dnotify_cache", - sizeof(struct dnotify_struct), 0, SLAB_PANIC, NULL, NULL); + sizeof(struct dnotify_struct), 0, SLAB_PANIC, NULL); return 0; } diff --git a/fs/dquot.c b/fs/dquot.c index 7e273151f58..de9a29f64ff 100644 --- a/fs/dquot.c +++ b/fs/dquot.c @@ -1848,11 +1848,11 @@ static int __init dquot_init(void) register_sysctl_table(sys_table); - dquot_cachep = kmem_cache_create("dquot", + dquot_cachep = kmem_cache_create("dquot", sizeof(struct dquot), sizeof(unsigned long) * 4, (SLAB_HWCACHE_ALIGN|SLAB_RECLAIM_ACCOUNT| SLAB_MEM_SPREAD|SLAB_PANIC), - NULL, NULL); + NULL); order = 0; dquot_hash = (struct hlist_head *)__get_free_pages(GFP_ATOMIC, order); diff --git a/fs/ecryptfs/main.c b/fs/ecryptfs/main.c index 02ca6f1e55d..e557a676692 100644 --- a/fs/ecryptfs/main.c +++ b/fs/ecryptfs/main.c @@ -677,7 +677,7 @@ static int ecryptfs_init_kmem_caches(void) info = &ecryptfs_cache_infos[i]; *(info->cache) = kmem_cache_create(info->name, info->size, - 0, SLAB_HWCACHE_ALIGN, info->ctor, NULL); + 0, SLAB_HWCACHE_ALIGN, info->ctor); if (!*(info->cache)) { ecryptfs_free_kmem_caches(); ecryptfs_printk(KERN_WARNING, "%s: " diff --git a/fs/efs/super.c b/fs/efs/super.c index d360c81f3a7..ce4acb8ff81 100644 --- a/fs/efs/super.c +++ b/fs/efs/super.c @@ -75,13 +75,13 @@ static void init_once(void * foo, struct kmem_cache * cachep, unsigned long flag inode_init_once(&ei->vfs_inode); } - + static int init_inodecache(void) { efs_inode_cachep = kmem_cache_create("efs_inode_cache", sizeof(struct efs_inode_info), 0, SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD, - init_once, NULL); + init_once); if (efs_inode_cachep == NULL) return -ENOMEM; return 0; diff --git a/fs/eventpoll.c b/fs/eventpoll.c index 0b73cd45a06..77b9953624f 100644 --- a/fs/eventpoll.c +++ b/fs/eventpoll.c @@ -1324,12 +1324,12 @@ static int __init eventpoll_init(void) /* Allocates slab cache used to allocate "struct epitem" items */ epi_cache = kmem_cache_create("eventpoll_epi", sizeof(struct epitem), 0, SLAB_HWCACHE_ALIGN|EPI_SLAB_DEBUG|SLAB_PANIC, - NULL, NULL); + NULL); /* Allocates slab cache used to allocate "struct eppoll_entry" */ pwq_cache = kmem_cache_create("eventpoll_pwq", sizeof(struct eppoll_entry), 0, - EPI_SLAB_DEBUG|SLAB_PANIC, NULL, NULL); + EPI_SLAB_DEBUG|SLAB_PANIC, NULL); return 0; } diff --git a/fs/ext2/super.c b/fs/ext2/super.c index a6b1072daea..68579a0ed3f 100644 --- a/fs/ext2/super.c +++ b/fs/ext2/super.c @@ -167,14 +167,14 @@ static void init_once(void * foo, struct kmem_cache * cachep, unsigned long flag #endif inode_init_once(&ei->vfs_inode); } - + static int init_inodecache(void) { ext2_inode_cachep = kmem_cache_create("ext2_inode_cache", sizeof(struct ext2_inode_info), 0, (SLAB_RECLAIM_ACCOUNT| SLAB_MEM_SPREAD), - init_once, NULL); + init_once); if (ext2_inode_cachep == NULL) return -ENOMEM; return 0; diff --git a/fs/ext3/super.c b/fs/ext3/super.c index 4f84dc86628..f0614e3f1fe 100644 --- a/fs/ext3/super.c +++ b/fs/ext3/super.c @@ -490,7 +490,7 @@ static int init_inodecache(void) sizeof(struct ext3_inode_info), 0, (SLAB_RECLAIM_ACCOUNT| SLAB_MEM_SPREAD), - init_once, NULL); + init_once); if (ext3_inode_cachep == NULL) return -ENOMEM; return 0; diff --git a/fs/ext4/super.c b/fs/ext4/super.c index 6dcbb28dc06..75adbb64e02 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -541,7 +541,7 @@ static int init_inodecache(void) sizeof(struct ext4_inode_info), 0, (SLAB_RECLAIM_ACCOUNT| SLAB_MEM_SPREAD), - init_once, NULL); + init_once); if (ext4_inode_cachep == NULL) return -ENOMEM; return 0; diff --git a/fs/fat/cache.c b/fs/fat/cache.c index 3c9c8a15ec7..be6f89b152c 100644 --- a/fs/fat/cache.c +++ b/fs/fat/cache.c @@ -48,7 +48,7 @@ int __init fat_cache_init(void) fat_cache_cachep = kmem_cache_create("fat_cache", sizeof(struct fat_cache), 0, SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD, - init_once, NULL); + init_once); if (fat_cache_cachep == NULL) return -ENOMEM; return 0; diff --git a/fs/fat/inode.c b/fs/fat/inode.c index 0a7ddb39a59..4baa5f20536 100644 --- a/fs/fat/inode.c +++ b/fs/fat/inode.c @@ -514,7 +514,7 @@ static int __init fat_init_inodecache(void) sizeof(struct msdos_inode_info), 0, (SLAB_RECLAIM_ACCOUNT| SLAB_MEM_SPREAD), - init_once, NULL); + init_once); if (fat_inode_cachep == NULL) return -ENOMEM; return 0; diff --git a/fs/fcntl.c b/fs/fcntl.c index 3f22e9f4f69..78b2ff04405 100644 --- a/fs/fcntl.c +++ b/fs/fcntl.c @@ -638,7 +638,7 @@ EXPORT_SYMBOL(kill_fasync); static int __init fasync_init(void) { fasync_cache = kmem_cache_create("fasync_cache", - sizeof(struct fasync_struct), 0, SLAB_PANIC, NULL, NULL); + sizeof(struct fasync_struct), 0, SLAB_PANIC, NULL); return 0; } diff --git a/fs/freevxfs/vxfs_super.c b/fs/freevxfs/vxfs_super.c index 647d600f0bc..4f95572d272 100644 --- a/fs/freevxfs/vxfs_super.c +++ b/fs/freevxfs/vxfs_super.c @@ -263,8 +263,8 @@ vxfs_init(void) int rv; vxfs_inode_cachep = kmem_cache_create("vxfs_inode", - sizeof(struct vxfs_inode_info), 0, - SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD, NULL, NULL); + sizeof(struct vxfs_inode_info), 0, + SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD, NULL); if (!vxfs_inode_cachep) return -ENOMEM; rv = register_filesystem(&vxfs_fs_type); diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c index 357764d85ff..3ad22beb24c 100644 --- a/fs/fuse/dev.c +++ b/fs/fuse/dev.c @@ -1044,7 +1044,7 @@ int __init fuse_dev_init(void) int err = -ENOMEM; fuse_req_cachep = kmem_cache_create("fuse_request", sizeof(struct fuse_req), - 0, 0, NULL, NULL); + 0, 0, NULL); if (!fuse_req_cachep) goto out; diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c index cc5efc13496..5448f625ab5 100644 --- a/fs/fuse/inode.c +++ b/fs/fuse/inode.c @@ -706,7 +706,7 @@ static int __init fuse_fs_init(void) fuse_inode_cachep = kmem_cache_create("fuse_inode", sizeof(struct fuse_inode), 0, SLAB_HWCACHE_ALIGN, - fuse_inode_init_once, NULL); + fuse_inode_init_once); err = -ENOMEM; if (!fuse_inode_cachep) goto out_unreg2; diff --git a/fs/gfs2/main.c b/fs/gfs2/main.c index 787a0edef10..d5d4e68b880 100644 --- a/fs/gfs2/main.c +++ b/fs/gfs2/main.c @@ -72,7 +72,7 @@ static int __init init_gfs2_fs(void) gfs2_glock_cachep = kmem_cache_create("gfs2_glock", sizeof(struct gfs2_glock), 0, 0, - gfs2_init_glock_once, NULL); + gfs2_init_glock_once); if (!gfs2_glock_cachep) goto fail; @@ -80,13 +80,13 @@ static int __init init_gfs2_fs(void) sizeof(struct gfs2_inode), 0, SLAB_RECLAIM_ACCOUNT| SLAB_MEM_SPREAD, - gfs2_init_inode_once, NULL); + gfs2_init_inode_once); if (!gfs2_inode_cachep) goto fail; gfs2_bufdata_cachep = kmem_cache_create("gfs2_bufdata", sizeof(struct gfs2_bufdata), - 0, 0, NULL, NULL); + 0, 0, NULL); if (!gfs2_bufdata_cachep) goto fail; diff --git a/fs/hfs/super.c b/fs/hfs/super.c index 92cf8751e42..6c5f92dfb50 100644 --- a/fs/hfs/super.c +++ b/fs/hfs/super.c @@ -443,7 +443,7 @@ static int __init init_hfs_fs(void) hfs_inode_cachep = kmem_cache_create("hfs_inode_cache", sizeof(struct hfs_inode_info), 0, SLAB_HWCACHE_ALIGN, - hfs_init_once, NULL); + hfs_init_once); if (!hfs_inode_cachep) return -ENOMEM; err = register_filesystem(&hfs_fs_type); diff --git a/fs/hfsplus/super.c b/fs/hfsplus/super.c index 6d87a2a9534..7b0f2e5a44e 100644 --- a/fs/hfsplus/super.c +++ b/fs/hfsplus/super.c @@ -479,7 +479,7 @@ static int __init init_hfsplus_fs(void) hfsplus_inode_cachep = kmem_cache_create("hfsplus_icache", HFSPLUS_INODE_SIZE, 0, SLAB_HWCACHE_ALIGN, - hfsplus_init_once, NULL); + hfsplus_init_once); if (!hfsplus_inode_cachep) return -ENOMEM; err = register_filesystem(&hfsplus_fs_type); diff --git a/fs/hpfs/super.c b/fs/hpfs/super.c index 29cc34abb2e..89612ee7c80 100644 --- a/fs/hpfs/super.c +++ b/fs/hpfs/super.c @@ -181,14 +181,14 @@ static void init_once(void * foo, struct kmem_cache * cachep, unsigned long flag mutex_init(&ei->i_parent_mutex); inode_init_once(&ei->vfs_inode); } - + static int init_inodecache(void) { hpfs_inode_cachep = kmem_cache_create("hpfs_inode_cache", sizeof(struct hpfs_inode_info), 0, (SLAB_RECLAIM_ACCOUNT| SLAB_MEM_SPREAD), - init_once, NULL); + init_once); if (hpfs_inode_cachep == NULL) return -ENOMEM; return 0; diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c index d145cb79c30..c848a191525 100644 --- a/fs/hugetlbfs/inode.c +++ b/fs/hugetlbfs/inode.c @@ -848,7 +848,7 @@ static int __init init_hugetlbfs_fs(void) hugetlbfs_inode_cachep = kmem_cache_create("hugetlbfs_inode_cache", sizeof(struct hugetlbfs_inode_info), - 0, 0, init_once, NULL); + 0, 0, init_once); if (hugetlbfs_inode_cachep == NULL) return -ENOMEM; diff --git a/fs/inode.c b/fs/inode.c index 320e088d0b2..29f5068f819 100644 --- a/fs/inode.c +++ b/fs/inode.c @@ -1388,8 +1388,7 @@ void __init inode_init(unsigned long mempages) 0, (SLAB_RECLAIM_ACCOUNT|SLAB_PANIC| SLAB_MEM_SPREAD), - init_once, - NULL); + init_once); register_shrinker(&icache_shrinker); /* Hash may have been set up in inode_init_early */ diff --git a/fs/inotify_user.c b/fs/inotify_user.c index 9f2224f65a1..9bf2f6c09df 100644 --- a/fs/inotify_user.c +++ b/fs/inotify_user.c @@ -716,10 +716,10 @@ static int __init inotify_user_setup(void) watch_cachep = kmem_cache_create("inotify_watch_cache", sizeof(struct inotify_user_watch), - 0, SLAB_PANIC, NULL, NULL); + 0, SLAB_PANIC, NULL); event_cachep = kmem_cache_create("inotify_event_cache", sizeof(struct inotify_kernel_event), - 0, SLAB_PANIC, NULL, NULL); + 0, SLAB_PANIC, NULL); return 0; } diff --git a/fs/isofs/inode.c b/fs/isofs/inode.c index 4f5418be059..95c72aa8186 100644 --- a/fs/isofs/inode.c +++ b/fs/isofs/inode.c @@ -86,7 +86,7 @@ static int init_inodecache(void) sizeof(struct iso_inode_info), 0, (SLAB_RECLAIM_ACCOUNT| SLAB_MEM_SPREAD), - init_once, NULL); + init_once); if (isofs_inode_cachep == NULL) return -ENOMEM; return 0; diff --git a/fs/jbd/journal.c b/fs/jbd/journal.c index 46fe7439fb9..06ab3c10b1b 100644 --- a/fs/jbd/journal.c +++ b/fs/jbd/journal.c @@ -1668,7 +1668,7 @@ static int journal_create_jbd_slab(size_t slab_size) * boundary. */ jbd_slab[i] = kmem_cache_create(jbd_slab_names[i], - slab_size, slab_size, 0, NULL, NULL); + slab_size, slab_size, 0, NULL); if (!jbd_slab[i]) { printk(KERN_EMERG "JBD: no memory for jbd_slab cache\n"); return -ENOMEM; @@ -1711,8 +1711,7 @@ static int journal_init_journal_head_cache(void) sizeof(struct journal_head), 0, /* offset */ 0, /* flags */ - NULL, /* ctor */ - NULL); /* dtor */ + NULL); /* ctor */ retval = 0; if (journal_head_cache == 0) { retval = -ENOMEM; @@ -2008,8 +2007,7 @@ static int __init journal_init_handle_cache(void) sizeof(handle_t), 0, /* offset */ 0, /* flags */ - NULL, /* ctor */ - NULL); /* dtor */ + NULL); /* ctor */ if (jbd_handle_cache == NULL) { printk(KERN_EMERG "JBD: failed to create handle cache\n"); return -ENOMEM; diff --git a/fs/jbd/revoke.c b/fs/jbd/revoke.c index 8db2fa25170..62e13c8db13 100644 --- a/fs/jbd/revoke.c +++ b/fs/jbd/revoke.c @@ -170,13 +170,13 @@ int __init journal_init_revoke_caches(void) { revoke_record_cache = kmem_cache_create("revoke_record", sizeof(struct jbd_revoke_record_s), - 0, SLAB_HWCACHE_ALIGN, NULL, NULL); + 0, SLAB_HWCACHE_ALIGN, NULL); if (revoke_record_cache == 0) return -ENOMEM; revoke_table_cache = kmem_cache_create("revoke_table", sizeof(struct jbd_revoke_table_s), - 0, 0, NULL, NULL); + 0, 0, NULL); if (revoke_table_cache == 0) { kmem_cache_destroy(revoke_record_cache); revoke_record_cache = NULL; diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c index f290cb7cb83..f37324aee81 100644 --- a/fs/jbd2/journal.c +++ b/fs/jbd2/journal.c @@ -1680,7 +1680,7 @@ static int jbd2_journal_create_jbd_slab(size_t slab_size) * boundary. */ jbd_slab[i] = kmem_cache_create(jbd_slab_names[i], - slab_size, slab_size, 0, NULL, NULL); + slab_size, slab_size, 0, NULL); if (!jbd_slab[i]) { printk(KERN_EMERG "JBD: no memory for jbd_slab cache\n"); return -ENOMEM; @@ -1723,8 +1723,7 @@ static int journal_init_jbd2_journal_head_cache(void) sizeof(struct journal_head), 0, /* offset */ 0, /* flags */ - NULL, /* ctor */ - NULL); /* dtor */ + NULL); /* ctor */ retval = 0; if (jbd2_journal_head_cache == 0) { retval = -ENOMEM; @@ -2006,8 +2005,7 @@ static int __init journal_init_handle_cache(void) sizeof(handle_t), 0, /* offset */ 0, /* flags */ - NULL, /* ctor */ - NULL); /* dtor */ + NULL); /* ctor */ if (jbd2_handle_cache == NULL) { printk(KERN_EMERG "JBD: failed to create handle cache\n"); return -ENOMEM; diff --git a/fs/jbd2/revoke.c b/fs/jbd2/revoke.c index 28cac049a56..01d88975e0c 100644 --- a/fs/jbd2/revoke.c +++ b/fs/jbd2/revoke.c @@ -171,13 +171,13 @@ int __init jbd2_journal_init_revoke_caches(void) { jbd2_revoke_record_cache = kmem_cache_create("jbd2_revoke_record", sizeof(struct jbd2_revoke_record_s), - 0, SLAB_HWCACHE_ALIGN, NULL, NULL); + 0, SLAB_HWCACHE_ALIGN, NULL); if (jbd2_revoke_record_cache == 0) return -ENOMEM; jbd2_revoke_table_cache = kmem_cache_create("jbd2_revoke_table", sizeof(struct jbd2_revoke_table_s), - 0, 0, NULL, NULL); + 0, 0, NULL); if (jbd2_revoke_table_cache == 0) { kmem_cache_destroy(jbd2_revoke_record_cache); jbd2_revoke_record_cache = NULL; diff --git a/fs/jffs2/malloc.c b/fs/jffs2/malloc.c index 35c1a5e30ba..f9211252b5f 100644 --- a/fs/jffs2/malloc.c +++ b/fs/jffs2/malloc.c @@ -33,56 +33,56 @@ int __init jffs2_create_slab_caches(void) { full_dnode_slab = kmem_cache_create("jffs2_full_dnode", sizeof(struct jffs2_full_dnode), - 0, 0, NULL, NULL); + 0, 0, NULL); if (!full_dnode_slab) goto err; raw_dirent_slab = kmem_cache_create("jffs2_raw_dirent", sizeof(struct jffs2_raw_dirent), - 0, 0, NULL, NULL); + 0, 0, NULL); if (!raw_dirent_slab) goto err; raw_inode_slab = kmem_cache_create("jffs2_raw_inode", sizeof(struct jffs2_raw_inode), - 0, 0, NULL, NULL); + 0, 0, NULL); if (!raw_inode_slab) goto err; tmp_dnode_info_slab = kmem_cache_create("jffs2_tmp_dnode", sizeof(struct jffs2_tmp_dnode_info), - 0, 0, NULL, NULL); + 0, 0, NULL); if (!tmp_dnode_info_slab) goto err; raw_node_ref_slab = kmem_cache_create("jffs2_refblock", sizeof(struct jffs2_raw_node_ref) * (REFS_PER_BLOCK + 1), - 0, 0, NULL, NULL); + 0, 0, NULL); if (!raw_node_ref_slab) goto err; node_frag_slab = kmem_cache_create("jffs2_node_frag", sizeof(struct jffs2_node_frag), - 0, 0, NULL, NULL); + 0, 0, NULL); if (!node_frag_slab) goto err; inode_cache_slab = kmem_cache_create("jffs2_inode_cache", sizeof(struct jffs2_inode_cache), - 0, 0, NULL, NULL); + 0, 0, NULL); if (!inode_cache_slab) goto err; #ifdef CONFIG_JFFS2_FS_XATTR xattr_datum_cache = kmem_cache_create("jffs2_xattr_datum", sizeof(struct jffs2_xattr_datum), - 0, 0, NULL, NULL); + 0, 0, NULL); if (!xattr_datum_cache) goto err; xattr_ref_cache = kmem_cache_create("jffs2_xattr_ref", sizeof(struct jffs2_xattr_ref), - 0, 0, NULL, NULL); + 0, 0, NULL); if (!xattr_ref_cache) goto err; #endif diff --git a/fs/jffs2/super.c b/fs/jffs2/super.c index e220d3bd610..be2b70c2ec1 100644 --- a/fs/jffs2/super.c +++ b/fs/jffs2/super.c @@ -192,7 +192,7 @@ static int __init init_jffs2_fs(void) sizeof(struct jffs2_inode_info), 0, (SLAB_RECLAIM_ACCOUNT| SLAB_MEM_SPREAD), - jffs2_i_init_once, NULL); + jffs2_i_init_once); if (!jffs2_inode_cachep) { printk(KERN_ERR "JFFS2 error: Failed to initialise inode cache\n"); return -ENOMEM; diff --git a/fs/jfs/jfs_metapage.c b/fs/jfs/jfs_metapage.c index 77c7f1129dd..62e96be02ac 100644 --- a/fs/jfs/jfs_metapage.c +++ b/fs/jfs/jfs_metapage.c @@ -213,7 +213,7 @@ int __init metapage_init(void) * Allocate the metapage structures */ metapage_cache = kmem_cache_create("jfs_mp", sizeof(struct metapage), - 0, 0, init_once, NULL); + 0, 0, init_once); if (metapage_cache == NULL) return -ENOMEM; diff --git a/fs/jfs/super.c b/fs/jfs/super.c index 929fceca799..4b372f55065 100644 --- a/fs/jfs/super.c +++ b/fs/jfs/super.c @@ -776,7 +776,7 @@ static int __init init_jfs_fs(void) jfs_inode_cachep = kmem_cache_create("jfs_ip", sizeof(struct jfs_inode_info), 0, SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD, - init_once, NULL); + init_once); if (jfs_inode_cachep == NULL) return -ENOMEM; diff --git a/fs/locks.c b/fs/locks.c index 4f2d749ac62..31051063724 100644 --- a/fs/locks.c +++ b/fs/locks.c @@ -2276,7 +2276,7 @@ static int __init filelock_init(void) { filelock_cache = kmem_cache_create("file_lock_cache", sizeof(struct file_lock), 0, SLAB_PANIC, - init_once, NULL); + init_once); return 0; } diff --git a/fs/mbcache.c b/fs/mbcache.c index fbb1d02f879..1046cbefbfb 100644 --- a/fs/mbcache.c +++ b/fs/mbcache.c @@ -292,7 +292,7 @@ mb_cache_create(const char *name, struct mb_cache_op *cache_op, INIT_LIST_HEAD(&cache->c_indexes_hash[m][n]); } cache->c_entry_cache = kmem_cache_create(name, entry_size, 0, - SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD, NULL, NULL); + SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD, NULL); if (!cache->c_entry_cache) goto fail; diff --git a/fs/minix/inode.c b/fs/minix/inode.c index be4044614ac..43668d7d668 100644 --- a/fs/minix/inode.c +++ b/fs/minix/inode.c @@ -75,14 +75,14 @@ static void init_once(void * foo, struct kmem_cache * cachep, unsigned long flag inode_init_once(&ei->vfs_inode); } - + static int init_inodecache(void) { minix_inode_cachep = kmem_cache_create("minix_inode_cache", sizeof(struct minix_inode_info), 0, (SLAB_RECLAIM_ACCOUNT| SLAB_MEM_SPREAD), - init_once, NULL); + init_once); if (minix_inode_cachep == NULL) return -ENOMEM; return 0; diff --git a/fs/namespace.c b/fs/namespace.c index 4198003d7e1..ddbda13c2d3 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -1801,7 +1801,7 @@ void __init mnt_init(unsigned long mempages) init_rwsem(&namespace_sem); mnt_cache = kmem_cache_create("mnt_cache", sizeof(struct vfsmount), - 0, SLAB_HWCACHE_ALIGN | SLAB_PANIC, NULL, NULL); + 0, SLAB_HWCACHE_ALIGN | SLAB_PANIC, NULL); mount_hashtable = (struct list_head *)__get_free_page(GFP_ATOMIC); diff --git a/fs/ncpfs/inode.c b/fs/ncpfs/inode.c index cf06eb9f050..7f8536dbded 100644 --- a/fs/ncpfs/inode.c +++ b/fs/ncpfs/inode.c @@ -63,14 +63,14 @@ static void init_once(void * foo, struct kmem_cache * cachep, unsigned long flag mutex_init(&ei->open_mutex); inode_init_once(&ei->vfs_inode); } - + static int init_inodecache(void) { ncp_inode_cachep = kmem_cache_create("ncp_inode_cache", sizeof(struct ncp_inode_info), 0, (SLAB_RECLAIM_ACCOUNT| SLAB_MEM_SPREAD), - init_once, NULL); + init_once); if (ncp_inode_cachep == NULL) return -ENOMEM; return 0; diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c index a5c82b6f3b4..fcf4d384610 100644 --- a/fs/nfs/direct.c +++ b/fs/nfs/direct.c @@ -875,7 +875,7 @@ int __init nfs_init_directcache(void) sizeof(struct nfs_direct_req), 0, (SLAB_RECLAIM_ACCOUNT| SLAB_MEM_SPREAD), - NULL, NULL); + NULL); if (nfs_direct_cachep == NULL) return -ENOMEM; diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index 3d9fccf4ef9..bca6cdcb9f0 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -1165,14 +1165,14 @@ static void init_once(void * foo, struct kmem_cache * cachep, unsigned long flag nfsi->npages = 0; nfs4_init_once(nfsi); } - + static int __init nfs_init_inodecache(void) { nfs_inode_cachep = kmem_cache_create("nfs_inode_cache", sizeof(struct nfs_inode), 0, (SLAB_RECLAIM_ACCOUNT| SLAB_MEM_SPREAD), - init_once, NULL); + init_once); if (nfs_inode_cachep == NULL) return -ENOMEM; diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c index f56dae5216f..345bb9b4765 100644 --- a/fs/nfs/pagelist.c +++ b/fs/nfs/pagelist.c @@ -442,7 +442,7 @@ int __init nfs_init_nfspagecache(void) nfs_page_cachep = kmem_cache_create("nfs_page", sizeof(struct nfs_page), 0, SLAB_HWCACHE_ALIGN, - NULL, NULL); + NULL); if (nfs_page_cachep == NULL) return -ENOMEM; diff --git a/fs/nfs/read.c b/fs/nfs/read.c index 6ae2e58ed05..19e05633f4e 100644 --- a/fs/nfs/read.c +++ b/fs/nfs/read.c @@ -598,7 +598,7 @@ int __init nfs_init_readpagecache(void) nfs_rdata_cachep = kmem_cache_create("nfs_read_data", sizeof(struct nfs_read_data), 0, SLAB_HWCACHE_ALIGN, - NULL, NULL); + NULL); if (nfs_rdata_cachep == NULL) return -ENOMEM; diff --git a/fs/nfs/write.c b/fs/nfs/write.c index 73ac992ece8..ef97e0c0f5b 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -1467,7 +1467,7 @@ int __init nfs_init_writepagecache(void) nfs_wdata_cachep = kmem_cache_create("nfs_write_data", sizeof(struct nfs_write_data), 0, SLAB_HWCACHE_ALIGN, - NULL, NULL); + NULL); if (nfs_wdata_cachep == NULL) return -ENOMEM; diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index 6284807bd37..3f559700788 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -1032,19 +1032,19 @@ static int nfsd4_init_slabs(void) { stateowner_slab = kmem_cache_create("nfsd4_stateowners", - sizeof(struct nfs4_stateowner), 0, 0, NULL, NULL); + sizeof(struct nfs4_stateowner), 0, 0, NULL); if (stateowner_slab == NULL) goto out_nomem; file_slab = kmem_cache_create("nfsd4_files", - sizeof(struct nfs4_file), 0, 0, NULL, NULL); + sizeof(struct nfs4_file), 0, 0, NULL); if (file_slab == NULL) goto out_nomem; stateid_slab = kmem_cache_create("nfsd4_stateids", - sizeof(struct nfs4_stateid), 0, 0, NULL, NULL); + sizeof(struct nfs4_stateid), 0, 0, NULL); if (stateid_slab == NULL) goto out_nomem; deleg_slab = kmem_cache_create("nfsd4_delegations", - sizeof(struct nfs4_delegation), 0, 0, NULL, NULL); + sizeof(struct nfs4_delegation), 0, 0, NULL); if (deleg_slab == NULL) goto out_nomem; return 0; diff --git a/fs/ntfs/super.c b/fs/ntfs/super.c index 4566b918255..90c4e3a2970 100644 --- a/fs/ntfs/super.c +++ b/fs/ntfs/super.c @@ -3143,7 +3143,7 @@ static int __init init_ntfs_fs(void) ntfs_index_ctx_cache = kmem_cache_create(ntfs_index_ctx_cache_name, sizeof(ntfs_index_context), 0 /* offset */, - SLAB_HWCACHE_ALIGN, NULL /* ctor */, NULL /* dtor */); + SLAB_HWCACHE_ALIGN, NULL /* ctor */); if (!ntfs_index_ctx_cache) { printk(KERN_CRIT "NTFS: Failed to create %s!\n", ntfs_index_ctx_cache_name); @@ -3151,7 +3151,7 @@ static int __init init_ntfs_fs(void) } ntfs_attr_ctx_cache = kmem_cache_create(ntfs_attr_ctx_cache_name, sizeof(ntfs_attr_search_ctx), 0 /* offset */, - SLAB_HWCACHE_ALIGN, NULL /* ctor */, NULL /* dtor */); + SLAB_HWCACHE_ALIGN, NULL /* ctor */); if (!ntfs_attr_ctx_cache) { printk(KERN_CRIT "NTFS: Failed to create %s!\n", ntfs_attr_ctx_cache_name); @@ -3160,7 +3160,7 @@ static int __init init_ntfs_fs(void) ntfs_name_cache = kmem_cache_create(ntfs_name_cache_name, (NTFS_MAX_NAME_LEN+1) * sizeof(ntfschar), 0, - SLAB_HWCACHE_ALIGN, NULL, NULL); + SLAB_HWCACHE_ALIGN, NULL); if (!ntfs_name_cache) { printk(KERN_CRIT "NTFS: Failed to create %s!\n", ntfs_name_cache_name); @@ -3169,7 +3169,7 @@ static int __init init_ntfs_fs(void) ntfs_inode_cache = kmem_cache_create(ntfs_inode_cache_name, sizeof(ntfs_inode), 0, - SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD, NULL, NULL); + SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD, NULL); if (!ntfs_inode_cache) { printk(KERN_CRIT "NTFS: Failed to create %s!\n", ntfs_inode_cache_name); @@ -3179,7 +3179,7 @@ static int __init init_ntfs_fs(void) ntfs_big_inode_cache = kmem_cache_create(ntfs_big_inode_cache_name, sizeof(big_ntfs_inode), 0, SLAB_HWCACHE_ALIGN|SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD, - ntfs_big_inode_init_once, NULL); + ntfs_big_inode_init_once); if (!ntfs_big_inode_cache) { printk(KERN_CRIT "NTFS: Failed to create %s!\n", ntfs_big_inode_cache_name); diff --git a/fs/ocfs2/dlm/dlmfs.c b/fs/ocfs2/dlm/dlmfs.c index fd8cb1badc9..7418dc83de1 100644 --- a/fs/ocfs2/dlm/dlmfs.c +++ b/fs/ocfs2/dlm/dlmfs.c @@ -592,7 +592,7 @@ static int __init init_dlmfs_fs(void) sizeof(struct dlmfs_inode_private), 0, (SLAB_HWCACHE_ALIGN|SLAB_RECLAIM_ACCOUNT| SLAB_MEM_SPREAD), - dlmfs_init_once, NULL); + dlmfs_init_once); if (!dlmfs_inode_cache) return -ENOMEM; cleanup_inode = 1; diff --git a/fs/ocfs2/dlm/dlmmaster.c b/fs/ocfs2/dlm/dlmmaster.c index 65b2b9b9268..62e4a7daa28 100644 --- a/fs/ocfs2/dlm/dlmmaster.c +++ b/fs/ocfs2/dlm/dlmmaster.c @@ -510,7 +510,7 @@ int dlm_init_mle_cache(void) dlm_mle_cache = kmem_cache_create("dlm_mle_cache", sizeof(struct dlm_master_list_entry), 0, SLAB_HWCACHE_ALIGN, - NULL, NULL); + NULL); if (dlm_mle_cache == NULL) return -ENOMEM; return 0; diff --git a/fs/ocfs2/super.c b/fs/ocfs2/super.c index 3a5a1ed09ac..200c7d4790d 100644 --- a/fs/ocfs2/super.c +++ b/fs/ocfs2/super.c @@ -984,7 +984,7 @@ static int ocfs2_initialize_mem_caches(void) 0, (SLAB_HWCACHE_ALIGN|SLAB_RECLAIM_ACCOUNT| SLAB_MEM_SPREAD), - ocfs2_inode_init_once, NULL); + ocfs2_inode_init_once); if (!ocfs2_inode_cachep) return -ENOMEM; diff --git a/fs/ocfs2/uptodate.c b/fs/ocfs2/uptodate.c index 39814b900fc..4da8851f2b2 100644 --- a/fs/ocfs2/uptodate.c +++ b/fs/ocfs2/uptodate.c @@ -548,7 +548,7 @@ int __init init_ocfs2_uptodate_cache(void) { ocfs2_uptodate_cachep = kmem_cache_create("ocfs2_uptodate", sizeof(struct ocfs2_meta_cache_item), - 0, SLAB_HWCACHE_ALIGN, NULL, NULL); + 0, SLAB_HWCACHE_ALIGN, NULL); if (!ocfs2_uptodate_cachep) return -ENOMEM; diff --git a/fs/openpromfs/inode.c b/fs/openpromfs/inode.c index e62397341c3..dd86be2aa6c 100644 --- a/fs/openpromfs/inode.c +++ b/fs/openpromfs/inode.c @@ -431,7 +431,7 @@ static int __init init_openprom_fs(void) 0, (SLAB_RECLAIM_ACCOUNT | SLAB_MEM_SPREAD), - op_inode_init_once, NULL); + op_inode_init_once); if (!op_inode_cachep) return -ENOMEM; diff --git a/fs/proc/inode.c b/fs/proc/inode.c index dd28e86ab42..94e2c1adf18 100644 --- a/fs/proc/inode.c +++ b/fs/proc/inode.c @@ -112,14 +112,14 @@ static void init_once(void * foo, struct kmem_cache * cachep, unsigned long flag inode_init_once(&ei->vfs_inode); } - + int __init proc_init_inodecache(void) { proc_inode_cachep = kmem_cache_create("proc_inode_cache", sizeof(struct proc_inode), 0, (SLAB_RECLAIM_ACCOUNT| SLAB_MEM_SPREAD), - init_once, NULL); + init_once); if (proc_inode_cachep == NULL) return -ENOMEM; return 0; diff --git a/fs/qnx4/inode.c b/fs/qnx4/inode.c index 8d256eb1181..1bc8d873a9e 100644 --- a/fs/qnx4/inode.c +++ b/fs/qnx4/inode.c @@ -545,7 +545,7 @@ static int init_inodecache(void) sizeof(struct qnx4_inode_info), 0, (SLAB_RECLAIM_ACCOUNT| SLAB_MEM_SPREAD), - init_once, NULL); + init_once); if (qnx4_inode_cachep == NULL) return -ENOMEM; return 0; diff --git a/fs/reiserfs/super.c b/fs/reiserfs/super.c index 5a93cfe1a03..5b68dd3f191 100644 --- a/fs/reiserfs/super.c +++ b/fs/reiserfs/super.c @@ -527,7 +527,7 @@ static int init_inodecache(void) reiserfs_inode_info), 0, (SLAB_RECLAIM_ACCOUNT| SLAB_MEM_SPREAD), - init_once, NULL); + init_once); if (reiserfs_inode_cachep == NULL) return -ENOMEM; return 0; diff --git a/fs/romfs/inode.c b/fs/romfs/inode.c index 2284e03342c..dae7945f90e 100644 --- a/fs/romfs/inode.c +++ b/fs/romfs/inode.c @@ -572,14 +572,14 @@ static void init_once(void *foo, struct kmem_cache *cachep, unsigned long flags) inode_init_once(&ei->vfs_inode); } - + static int init_inodecache(void) { romfs_inode_cachep = kmem_cache_create("romfs_inode_cache", sizeof(struct romfs_inode_info), 0, (SLAB_RECLAIM_ACCOUNT| SLAB_MEM_SPREAD), - init_once, NULL); + init_once); if (romfs_inode_cachep == NULL) return -ENOMEM; return 0; diff --git a/fs/smbfs/inode.c b/fs/smbfs/inode.c index 6724a6cf01f..73d1450a95d 100644 --- a/fs/smbfs/inode.c +++ b/fs/smbfs/inode.c @@ -73,14 +73,14 @@ static void init_once(void * foo, struct kmem_cache * cachep, unsigned long flag inode_init_once(&ei->vfs_inode); } - + static int init_inodecache(void) { smb_inode_cachep = kmem_cache_create("smb_inode_cache", sizeof(struct smb_inode_info), 0, (SLAB_RECLAIM_ACCOUNT| SLAB_MEM_SPREAD), - init_once, NULL); + init_once); if (smb_inode_cachep == NULL) return -ENOMEM; return 0; diff --git a/fs/smbfs/request.c b/fs/smbfs/request.c index 3f54a0f80fa..ca4b2d59c0c 100644 --- a/fs/smbfs/request.c +++ b/fs/smbfs/request.c @@ -40,7 +40,7 @@ int smb_init_request_cache(void) req_cachep = kmem_cache_create("smb_request", sizeof(struct smb_request), 0, SMB_SLAB_DEBUG | SLAB_HWCACHE_ALIGN, - NULL, NULL); + NULL); if (req_cachep == NULL) return -ENOMEM; diff --git a/fs/sysfs/mount.c b/fs/sysfs/mount.c index 60714d075c2..fbc7b65fe26 100644 --- a/fs/sysfs/mount.c +++ b/fs/sysfs/mount.c @@ -86,7 +86,7 @@ int __init sysfs_init(void) sysfs_dir_cachep = kmem_cache_create("sysfs_dir_cache", sizeof(struct sysfs_dirent), - 0, 0, NULL, NULL); + 0, 0, NULL); if (!sysfs_dir_cachep) goto out; diff --git a/fs/sysv/inode.c b/fs/sysv/inode.c index 56441169339..7c4e5d302ab 100644 --- a/fs/sysv/inode.c +++ b/fs/sysv/inode.c @@ -342,7 +342,7 @@ int __init sysv_init_icache(void) sysv_inode_cachep = kmem_cache_create("sysv_inode_cache", sizeof(struct sysv_inode_info), 0, SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD, - init_once, NULL); + init_once); if (!sysv_inode_cachep) return -ENOMEM; return 0; diff --git a/fs/udf/super.c b/fs/udf/super.c index 911387aa181..72097ee6b75 100644 --- a/fs/udf/super.c +++ b/fs/udf/super.c @@ -149,7 +149,7 @@ static int init_inodecache(void) sizeof(struct udf_inode_info), 0, (SLAB_RECLAIM_ACCOUNT | SLAB_MEM_SPREAD), - init_once, NULL); + init_once); if (udf_inode_cachep == NULL) return -ENOMEM; return 0; diff --git a/fs/ufs/super.c b/fs/ufs/super.c index 2b3011689e8..73402c5eeb8 100644 --- a/fs/ufs/super.c +++ b/fs/ufs/super.c @@ -1240,14 +1240,14 @@ static void init_once(void * foo, struct kmem_cache * cachep, unsigned long flag inode_init_once(&ei->vfs_inode); } - + static int init_inodecache(void) { ufs_inode_cachep = kmem_cache_create("ufs_inode_cache", sizeof(struct ufs_inode_info), 0, (SLAB_RECLAIM_ACCOUNT| SLAB_MEM_SPREAD), - init_once, NULL); + init_once); if (ufs_inode_cachep == NULL) return -ENOMEM; return 0; diff --git a/fs/xfs/linux-2.6/kmem.h b/fs/xfs/linux-2.6/kmem.h index 4b6470cf87f..b4acc7f3c37 100644 --- a/fs/xfs/linux-2.6/kmem.h +++ b/fs/xfs/linux-2.6/kmem.h @@ -74,14 +74,14 @@ extern void kmem_free(void *, size_t); static inline kmem_zone_t * kmem_zone_init(int size, char *zone_name) { - return kmem_cache_create(zone_name, size, 0, 0, NULL, NULL); + return kmem_cache_create(zone_name, size, 0, 0, NULL); } static inline kmem_zone_t * kmem_zone_init_flags(int size, char *zone_name, unsigned long flags, void (*construct)(void *, kmem_zone_t *, unsigned long)) { - return kmem_cache_create(zone_name, size, 0, flags, construct, NULL); + return kmem_cache_create(zone_name, size, 0, flags, construct); } static inline void diff --git a/include/linux/i2o.h b/include/linux/i2o.h index 333a370a3bd..9752307d16b 100644 --- a/include/linux/i2o.h +++ b/include/linux/i2o.h @@ -946,8 +946,7 @@ static inline int i2o_pool_alloc(struct i2o_pool *pool, const char *name, strcpy(pool->name, name); pool->slab = - kmem_cache_create(pool->name, size, 0, SLAB_HWCACHE_ALIGN, NULL, - NULL); + kmem_cache_create(pool->name, size, 0, SLAB_HWCACHE_ALIGN, NULL); if (!pool->slab) goto free_name; diff --git a/include/linux/slab.h b/include/linux/slab.h index 0e1d0daef6a..7d0ecc1659f 100644 --- a/include/linux/slab.h +++ b/include/linux/slab.h @@ -51,7 +51,6 @@ int slab_is_available(void); struct kmem_cache *kmem_cache_create(const char *, size_t, size_t, unsigned long, - void (*)(void *, struct kmem_cache *, unsigned long), void (*)(void *, struct kmem_cache *, unsigned long)); void kmem_cache_destroy(struct kmem_cache *); int kmem_cache_shrink(struct kmem_cache *); @@ -70,7 +69,7 @@ int kmem_ptr_validate(struct kmem_cache *cachep, const void *ptr); */ #define KMEM_CACHE(__struct, __flags) kmem_cache_create(#__struct,\ sizeof(struct __struct), __alignof__(struct __struct),\ - (__flags), NULL, NULL) + (__flags), NULL) /* * The largest kmalloc size supported by the slab allocators is diff --git a/ipc/mqueue.c b/ipc/mqueue.c index a242c83d89d..145d5a0d299 100644 --- a/ipc/mqueue.c +++ b/ipc/mqueue.c @@ -1253,7 +1253,7 @@ static int __init init_mqueue_fs(void) mqueue_inode_cachep = kmem_cache_create("mqueue_inode_cache", sizeof(struct mqueue_inode_info), 0, - SLAB_HWCACHE_ALIGN, init_once, NULL); + SLAB_HWCACHE_ALIGN, init_once); if (mqueue_inode_cachep == NULL) return -ENOMEM; diff --git a/kernel/fork.c b/kernel/fork.c index 46983899822..7332e236d36 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -137,7 +137,7 @@ void __init fork_init(unsigned long mempages) /* create a slab on which task_structs can be allocated */ task_struct_cachep = kmem_cache_create("task_struct", sizeof(struct task_struct), - ARCH_MIN_TASKALIGN, SLAB_PANIC, NULL, NULL); + ARCH_MIN_TASKALIGN, SLAB_PANIC, NULL); #endif /* @@ -1446,22 +1446,22 @@ void __init proc_caches_init(void) sighand_cachep = kmem_cache_create("sighand_cache", sizeof(struct sighand_struct), 0, SLAB_HWCACHE_ALIGN|SLAB_PANIC|SLAB_DESTROY_BY_RCU, - sighand_ctor, NULL); + sighand_ctor); signal_cachep = kmem_cache_create("signal_cache", sizeof(struct signal_struct), 0, - SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL, NULL); - files_cachep = kmem_cache_create("files_cache", + SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL); + files_cachep = kmem_cache_create("files_cache", sizeof(struct files_struct), 0, - SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL, NULL); - fs_cachep = kmem_cache_create("fs_cache", + SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL); + fs_cachep = kmem_cache_create("fs_cache", sizeof(struct fs_struct), 0, - SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL, NULL); + SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL); vm_area_cachep = kmem_cache_create("vm_area_struct", sizeof(struct vm_area_struct), 0, - SLAB_PANIC, NULL, NULL); + SLAB_PANIC, NULL); mm_cachep = kmem_cache_create("mm_struct", sizeof(struct mm_struct), ARCH_MIN_MMSTRUCT_ALIGN, - SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL, NULL); + SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL); } /* diff --git a/kernel/nsproxy.c b/kernel/nsproxy.c index 10f0bbba382..a4fb7d46971 100644 --- a/kernel/nsproxy.c +++ b/kernel/nsproxy.c @@ -193,7 +193,7 @@ int unshare_nsproxy_namespaces(unsigned long unshare_flags, static int __init nsproxy_cache_init(void) { nsproxy_cachep = kmem_cache_create("nsproxy", sizeof(struct nsproxy), - 0, SLAB_PANIC, NULL, NULL); + 0, SLAB_PANIC, NULL); return 0; } diff --git a/kernel/posix-timers.c b/kernel/posix-timers.c index 329ce017207..55b3761edaa 100644 --- a/kernel/posix-timers.c +++ b/kernel/posix-timers.c @@ -241,7 +241,7 @@ static __init int init_posix_timers(void) register_posix_clock(CLOCK_MONOTONIC, &clock_monotonic); posix_timers_cache = kmem_cache_create("posix_timers_cache", - sizeof (struct k_itimer), 0, 0, NULL, NULL); + sizeof (struct k_itimer), 0, 0, NULL); idr_init(&posix_timers_id); return 0; } diff --git a/kernel/user.c b/kernel/user.c index 98b82507797..e7d11cef699 100644 --- a/kernel/user.c +++ b/kernel/user.c @@ -208,7 +208,7 @@ static int __init uid_cache_init(void) int n; uid_cachep = kmem_cache_create("uid_cache", sizeof(struct user_struct), - 0, SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL, NULL); + 0, SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL); for(n = 0; n < UIDHASH_SZ; ++n) INIT_LIST_HEAD(init_user_ns.uidhash_table + n); diff --git a/lib/idr.c b/lib/idr.c index 5ca67b3cfd3..ffd61941e75 100644 --- a/lib/idr.c +++ b/lib/idr.c @@ -590,7 +590,7 @@ static int init_id_cache(void) { if (!idr_layer_cache) idr_layer_cache = kmem_cache_create("idr_layer_cache", - sizeof(struct idr_layer), 0, 0, idr_cache_ctor, NULL); + sizeof(struct idr_layer), 0, 0, idr_cache_ctor); return 0; } diff --git a/lib/radix-tree.c b/lib/radix-tree.c index 9927cca14cb..514efb200be 100644 --- a/lib/radix-tree.c +++ b/lib/radix-tree.c @@ -1021,7 +1021,7 @@ void __init radix_tree_init(void) { radix_tree_node_cachep = kmem_cache_create("radix_tree_node", sizeof(struct radix_tree_node), 0, - SLAB_PANIC, radix_tree_node_ctor, NULL); + SLAB_PANIC, radix_tree_node_ctor); radix_tree_init_maxindex(); hotcpu_notifier(radix_tree_callback, 0); } diff --git a/mm/mempolicy.c b/mm/mempolicy.c index 9f4e9b95e8f..71b84b45154 100644 --- a/mm/mempolicy.c +++ b/mm/mempolicy.c @@ -1605,11 +1605,11 @@ void __init numa_policy_init(void) policy_cache = kmem_cache_create("numa_policy", sizeof(struct mempolicy), - 0, SLAB_PANIC, NULL, NULL); + 0, SLAB_PANIC, NULL); sn_cache = kmem_cache_create("shared_policy_node", sizeof(struct sp_node), - 0, SLAB_PANIC, NULL, NULL); + 0, SLAB_PANIC, NULL); /* * Set interleaving policy for system init. Interleaving is only diff --git a/mm/rmap.c b/mm/rmap.c index fede5c7910b..41ac39749ef 100644 --- a/mm/rmap.c +++ b/mm/rmap.c @@ -149,7 +149,7 @@ static void anon_vma_ctor(void *data, struct kmem_cache *cachep, void __init anon_vma_init(void) { anon_vma_cachep = kmem_cache_create("anon_vma", sizeof(struct anon_vma), - 0, SLAB_DESTROY_BY_RCU|SLAB_PANIC, anon_vma_ctor, NULL); + 0, SLAB_DESTROY_BY_RCU|SLAB_PANIC, anon_vma_ctor); } /* diff --git a/mm/shmem.c b/mm/shmem.c index ad155c7745d..fcd19d323f9 100644 --- a/mm/shmem.c +++ b/mm/shmem.c @@ -2322,7 +2322,7 @@ static int init_inodecache(void) { shmem_inode_cachep = kmem_cache_create("shmem_inode_cache", sizeof(struct shmem_inode_info), - 0, 0, init_once, NULL); + 0, 0, init_once); if (shmem_inode_cachep == NULL) return -ENOMEM; return 0; diff --git a/mm/slab.c b/mm/slab.c index c3feeaab387..bde271c001b 100644 --- a/mm/slab.c +++ b/mm/slab.c @@ -1484,7 +1484,7 @@ void __init kmem_cache_init(void) sizes[INDEX_AC].cs_size, ARCH_KMALLOC_MINALIGN, ARCH_KMALLOC_FLAGS|SLAB_PANIC, - NULL, NULL); + NULL); if (INDEX_AC != INDEX_L3) { sizes[INDEX_L3].cs_cachep = @@ -1492,7 +1492,7 @@ void __init kmem_cache_init(void) sizes[INDEX_L3].cs_size, ARCH_KMALLOC_MINALIGN, ARCH_KMALLOC_FLAGS|SLAB_PANIC, - NULL, NULL); + NULL); } slab_early_init = 0; @@ -1510,7 +1510,7 @@ void __init kmem_cache_init(void) sizes->cs_size, ARCH_KMALLOC_MINALIGN, ARCH_KMALLOC_FLAGS|SLAB_PANIC, - NULL, NULL); + NULL); } #ifdef CONFIG_ZONE_DMA sizes->cs_dmacachep = kmem_cache_create( @@ -1519,7 +1519,7 @@ void __init kmem_cache_init(void) ARCH_KMALLOC_MINALIGN, ARCH_KMALLOC_FLAGS|SLAB_CACHE_DMA| SLAB_PANIC, - NULL, NULL); + NULL); #endif sizes++; names++; @@ -2101,12 +2101,10 @@ static int __init_refok setup_cpu_cache(struct kmem_cache *cachep) * @align: The required alignment for the objects. * @flags: SLAB flags * @ctor: A constructor for the objects. - * @dtor: A destructor for the objects (not implemented anymore). * * Returns a ptr to the cache on success, NULL on failure. * Cannot be called within a int, but can be interrupted. - * The @ctor is run when new pages are allocated by the cache - * and the @dtor is run before the pages are handed back. + * The @ctor is run when new pages are allocated by the cache. * * @name must be valid until the cache is destroyed. This implies that * the module calling this has to destroy the cache before getting unloaded. @@ -2126,8 +2124,7 @@ static int __init_refok setup_cpu_cache(struct kmem_cache *cachep) struct kmem_cache * kmem_cache_create (const char *name, size_t size, size_t align, unsigned long flags, - void (*ctor)(void*, struct kmem_cache *, unsigned long), - void (*dtor)(void*, struct kmem_cache *, unsigned long)) + void (*ctor)(void*, struct kmem_cache *, unsigned long)) { size_t left_over, slab_size, ralign; struct kmem_cache *cachep = NULL, *pc; @@ -2136,7 +2133,7 @@ kmem_cache_create (const char *name, size_t size, size_t align, * Sanity checks... these are all serious usage bugs. */ if (!name || in_interrupt() || (size < BYTES_PER_WORD) || - size > KMALLOC_MAX_SIZE || dtor) { + size > KMALLOC_MAX_SIZE) { printk(KERN_ERR "%s: Early error in slab %s\n", __FUNCTION__, name); BUG(); diff --git a/mm/slob.c b/mm/slob.c index c89ef116d7a..d50920ecc02 100644 --- a/mm/slob.c +++ b/mm/slob.c @@ -492,8 +492,7 @@ struct kmem_cache { struct kmem_cache *kmem_cache_create(const char *name, size_t size, size_t align, unsigned long flags, - void (*ctor)(void*, struct kmem_cache *, unsigned long), - void (*dtor)(void*, struct kmem_cache *, unsigned long)) + void (*ctor)(void*, struct kmem_cache *, unsigned long)) { struct kmem_cache *c; diff --git a/mm/slub.c b/mm/slub.c index 322f3a5d72c..9b2d6178d06 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -2668,12 +2668,10 @@ static struct kmem_cache *find_mergeable(size_t size, struct kmem_cache *kmem_cache_create(const char *name, size_t size, size_t align, unsigned long flags, - void (*ctor)(void *, struct kmem_cache *, unsigned long), - void (*dtor)(void *, struct kmem_cache *, unsigned long)) + void (*ctor)(void *, struct kmem_cache *, unsigned long)) { struct kmem_cache *s; - BUG_ON(dtor); down_write(&slub_lock); s = find_mergeable(size, align, flags, ctor); if (s) { diff --git a/net/bridge/br_fdb.c b/net/bridge/br_fdb.c index 3fc69729381..69b70977f00 100644 --- a/net/bridge/br_fdb.c +++ b/net/bridge/br_fdb.c @@ -36,7 +36,7 @@ int __init br_fdb_init(void) br_fdb_cache = kmem_cache_create("bridge_fdb_cache", sizeof(struct net_bridge_fdb_entry), 0, - SLAB_HWCACHE_ALIGN, NULL, NULL); + SLAB_HWCACHE_ALIGN, NULL); if (!br_fdb_cache) return -ENOMEM; diff --git a/net/core/flow.c b/net/core/flow.c index 051430545a0..0ab5234b17d 100644 --- a/net/core/flow.c +++ b/net/core/flow.c @@ -350,7 +350,7 @@ static int __init flow_cache_init(void) flow_cachep = kmem_cache_create("flow_cache", sizeof(struct flow_cache_entry), 0, SLAB_HWCACHE_ALIGN|SLAB_PANIC, - NULL, NULL); + NULL); flow_hash_shift = 10; flow_lwm = 2 * flow_hash_size; flow_hwm = 4 * flow_hash_size; diff --git a/net/core/neighbour.c b/net/core/neighbour.c index 9df26a07f06..ca2a1533138 100644 --- a/net/core/neighbour.c +++ b/net/core/neighbour.c @@ -1347,7 +1347,7 @@ void neigh_table_init_no_netlink(struct neigh_table *tbl) tbl->kmem_cachep = kmem_cache_create(tbl->id, tbl->entry_size, 0, SLAB_HWCACHE_ALIGN|SLAB_PANIC, - NULL, NULL); + NULL); tbl->stats = alloc_percpu(struct neigh_statistics); if (!tbl->stats) panic("cannot create neighbour cache statistics"); diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 0583e8498f1..35021eb3ed0 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -2021,13 +2021,13 @@ void __init skb_init(void) sizeof(struct sk_buff), 0, SLAB_HWCACHE_ALIGN|SLAB_PANIC, - NULL, NULL); + NULL); skbuff_fclone_cache = kmem_cache_create("skbuff_fclone_cache", (2*sizeof(struct sk_buff)) + sizeof(atomic_t), 0, SLAB_HWCACHE_ALIGN|SLAB_PANIC, - NULL, NULL); + NULL); } /** diff --git a/net/core/sock.c b/net/core/sock.c index 239a08a6ff2..bd209c4477a 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -1767,7 +1767,7 @@ int proto_register(struct proto *prot, int alloc_slab) if (alloc_slab) { prot->slab = kmem_cache_create(prot->name, prot->obj_size, 0, - SLAB_HWCACHE_ALIGN, NULL, NULL); + SLAB_HWCACHE_ALIGN, NULL); if (prot->slab == NULL) { printk(KERN_CRIT "%s: Can't create sock SLAB cache!\n", @@ -1785,7 +1785,7 @@ int proto_register(struct proto *prot, int alloc_slab) sprintf(request_sock_slab_name, mask, prot->name); prot->rsk_prot->slab = kmem_cache_create(request_sock_slab_name, prot->rsk_prot->obj_size, 0, - SLAB_HWCACHE_ALIGN, NULL, NULL); + SLAB_HWCACHE_ALIGN, NULL); if (prot->rsk_prot->slab == NULL) { printk(KERN_CRIT "%s: Can't create request sock SLAB cache!\n", @@ -1807,7 +1807,7 @@ int proto_register(struct proto *prot, int alloc_slab) kmem_cache_create(timewait_sock_slab_name, prot->twsk_prot->twsk_obj_size, 0, SLAB_HWCACHE_ALIGN, - NULL, NULL); + NULL); if (prot->twsk_prot->twsk_slab == NULL) goto out_free_timewait_sock_slab_name; } diff --git a/net/dccp/ackvec.c b/net/dccp/ackvec.c index 01030f34617..7ac775f9a64 100644 --- a/net/dccp/ackvec.c +++ b/net/dccp/ackvec.c @@ -481,14 +481,14 @@ int __init dccp_ackvec_init(void) { dccp_ackvec_slab = kmem_cache_create("dccp_ackvec", sizeof(struct dccp_ackvec), 0, - SLAB_HWCACHE_ALIGN, NULL, NULL); + SLAB_HWCACHE_ALIGN, NULL); if (dccp_ackvec_slab == NULL) goto out_err; dccp_ackvec_record_slab = kmem_cache_create("dccp_ackvec_record", sizeof(struct dccp_ackvec_record), - 0, SLAB_HWCACHE_ALIGN, NULL, NULL); + 0, SLAB_HWCACHE_ALIGN, NULL); if (dccp_ackvec_record_slab == NULL) goto out_destroy_slab; diff --git a/net/dccp/ccid.c b/net/dccp/ccid.c index d8cf92f09e6..ccbf72c793b 100644 --- a/net/dccp/ccid.c +++ b/net/dccp/ccid.c @@ -69,7 +69,7 @@ static struct kmem_cache *ccid_kmem_cache_create(int obj_size, const char *fmt,. if (slab_name == NULL) return NULL; slab = kmem_cache_create(slab_name, sizeof(struct ccid) + obj_size, 0, - SLAB_HWCACHE_ALIGN, NULL, NULL); + SLAB_HWCACHE_ALIGN, NULL); if (slab == NULL) kfree(slab_name); return slab; diff --git a/net/dccp/ccids/lib/loss_interval.c b/net/dccp/ccids/lib/loss_interval.c index dd0fc992b04..174d3f13d93 100644 --- a/net/dccp/ccids/lib/loss_interval.c +++ b/net/dccp/ccids/lib/loss_interval.c @@ -282,7 +282,7 @@ static __init int dccp_li_init(void) { dccp_li_cachep = kmem_cache_create("dccp_li_hist", sizeof(struct dccp_li_hist_entry), - 0, SLAB_HWCACHE_ALIGN, NULL, NULL); + 0, SLAB_HWCACHE_ALIGN, NULL); return dccp_li_cachep == NULL ? -ENOBUFS : 0; } diff --git a/net/dccp/ccids/lib/packet_history.c b/net/dccp/ccids/lib/packet_history.c index 2e8ef42721e..34c4f604772 100644 --- a/net/dccp/ccids/lib/packet_history.c +++ b/net/dccp/ccids/lib/packet_history.c @@ -59,7 +59,7 @@ struct dccp_tx_hist *dccp_tx_hist_new(const char *name) hist->dccptxh_slab = kmem_cache_create(slab_name, sizeof(struct dccp_tx_hist_entry), 0, SLAB_HWCACHE_ALIGN, - NULL, NULL); + NULL); if (hist->dccptxh_slab == NULL) goto out_free_slab_name; out: @@ -148,7 +148,7 @@ struct dccp_rx_hist *dccp_rx_hist_new(const char *name) hist->dccprxh_slab = kmem_cache_create(slab_name, sizeof(struct dccp_rx_hist_entry), 0, SLAB_HWCACHE_ALIGN, - NULL, NULL); + NULL); if (hist->dccprxh_slab == NULL) goto out_free_slab_name; out: diff --git a/net/dccp/proto.c b/net/dccp/proto.c index 6607b7b14f3..04b59ec4f51 100644 --- a/net/dccp/proto.c +++ b/net/dccp/proto.c @@ -1003,7 +1003,7 @@ static int __init dccp_init(void) dccp_hashinfo.bind_bucket_cachep = kmem_cache_create("dccp_bind_bucket", sizeof(struct inet_bind_bucket), 0, - SLAB_HWCACHE_ALIGN, NULL, NULL); + SLAB_HWCACHE_ALIGN, NULL); if (!dccp_hashinfo.bind_bucket_cachep) goto out; diff --git a/net/decnet/dn_route.c b/net/decnet/dn_route.c index 82622fb6f68..f2a61ef2af9 100644 --- a/net/decnet/dn_route.c +++ b/net/decnet/dn_route.c @@ -1770,7 +1770,7 @@ void __init dn_route_init(void) dn_dst_ops.kmem_cachep = kmem_cache_create("dn_dst_cache", sizeof(struct dn_route), 0, - SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL, NULL); + SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL); init_timer(&dn_route_timer); dn_route_timer.function = dn_dst_check_expire; dn_route_timer.expires = jiffies + decnet_dst_gc_interval * HZ; diff --git a/net/decnet/dn_table.c b/net/decnet/dn_table.c index d6615c9361e..fda0772fa21 100644 --- a/net/decnet/dn_table.c +++ b/net/decnet/dn_table.c @@ -881,7 +881,7 @@ void __init dn_fib_table_init(void) dn_hash_kmem = kmem_cache_create("dn_fib_info_cache", sizeof(struct dn_fib_info), 0, SLAB_HWCACHE_ALIGN, - NULL, NULL); + NULL); } void __exit dn_fib_table_cleanup(void) diff --git a/net/ipv4/fib_hash.c b/net/ipv4/fib_hash.c index 07e843a47dd..9ad1d9ff9ce 100644 --- a/net/ipv4/fib_hash.c +++ b/net/ipv4/fib_hash.c @@ -771,13 +771,13 @@ struct fib_table * __init fib_hash_init(u32 id) fn_hash_kmem = kmem_cache_create("ip_fib_hash", sizeof(struct fib_node), 0, SLAB_HWCACHE_ALIGN, - NULL, NULL); + NULL); if (fn_alias_kmem == NULL) fn_alias_kmem = kmem_cache_create("ip_fib_alias", sizeof(struct fib_alias), 0, SLAB_HWCACHE_ALIGN, - NULL, NULL); + NULL); tb = kmalloc(sizeof(struct fib_table) + sizeof(struct fn_hash), GFP_KERNEL); diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c index 30e332ade61..9ca786a6fd3 100644 --- a/net/ipv4/fib_trie.c +++ b/net/ipv4/fib_trie.c @@ -1970,7 +1970,7 @@ struct fib_table * __init fib_hash_init(u32 id) fn_alias_kmem = kmem_cache_create("ip_fib_alias", sizeof(struct fib_alias), 0, SLAB_HWCACHE_ALIGN, - NULL, NULL); + NULL); tb = kmalloc(sizeof(struct fib_table) + sizeof(struct trie), GFP_KERNEL); diff --git a/net/ipv4/inetpeer.c b/net/ipv4/inetpeer.c index 2f44e612806..6cbce96a54c 100644 --- a/net/ipv4/inetpeer.c +++ b/net/ipv4/inetpeer.c @@ -123,7 +123,7 @@ void __init inet_initpeers(void) peer_cachep = kmem_cache_create("inet_peer_cache", sizeof(struct inet_peer), 0, SLAB_HWCACHE_ALIGN|SLAB_PANIC, - NULL, NULL); + NULL); /* All the timers, started at system startup tend to synchronize. Perturb it a bit. diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c index d96582acdf6..7003cc1b7fe 100644 --- a/net/ipv4/ipmr.c +++ b/net/ipv4/ipmr.c @@ -1917,7 +1917,7 @@ void __init ip_mr_init(void) mrt_cachep = kmem_cache_create("ip_mrt_cache", sizeof(struct mfc_cache), 0, SLAB_HWCACHE_ALIGN|SLAB_PANIC, - NULL, NULL); + NULL); init_timer(&ipmr_expire_timer); ipmr_expire_timer.function=ipmr_expire_process; register_netdevice_notifier(&ip_mr_notifier); diff --git a/net/ipv4/ipvs/ip_vs_conn.c b/net/ipv4/ipvs/ip_vs_conn.c index 3b446b1a6b9..d612a6a5d95 100644 --- a/net/ipv4/ipvs/ip_vs_conn.c +++ b/net/ipv4/ipvs/ip_vs_conn.c @@ -901,7 +901,7 @@ int ip_vs_conn_init(void) /* Allocate ip_vs_conn slab cache */ ip_vs_conn_cachep = kmem_cache_create("ip_vs_conn", sizeof(struct ip_vs_conn), 0, - SLAB_HWCACHE_ALIGN, NULL, NULL); + SLAB_HWCACHE_ALIGN, NULL); if (!ip_vs_conn_cachep) { vfree(ip_vs_conn_tab); return -ENOMEM; diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 88fa648d7ba..df42b7fb326 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -2967,7 +2967,7 @@ int __init ip_rt_init(void) ipv4_dst_ops.kmem_cachep = kmem_cache_create("ip_dst_cache", sizeof(struct rtable), 0, - SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL, NULL); + SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL); ipv4_dst_blackhole_ops.kmem_cachep = ipv4_dst_ops.kmem_cachep; diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 987b94403be..da4c0b6ab79 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -2430,7 +2430,7 @@ void __init tcp_init(void) tcp_hashinfo.bind_bucket_cachep = kmem_cache_create("tcp_bind_bucket", sizeof(struct inet_bind_bucket), 0, - SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL, NULL); + SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL); /* Size and allocate the main established and bind bucket * hash tables. diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c index 662a7d9681f..6a612a701ea 100644 --- a/net/ipv6/ip6_fib.c +++ b/net/ipv6/ip6_fib.c @@ -1474,7 +1474,7 @@ void __init fib6_init(void) fib6_node_kmem = kmem_cache_create("fib6_nodes", sizeof(struct fib6_node), 0, SLAB_HWCACHE_ALIGN|SLAB_PANIC, - NULL, NULL); + NULL); fib6_tables_init(); diff --git a/net/ipv6/route.c b/net/ipv6/route.c index fe8d9837f9f..919de682b33 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -2555,7 +2555,7 @@ void __init ip6_route_init(void) #endif ip6_dst_ops.kmem_cachep = kmem_cache_create("ip6_dst_cache", sizeof(struct rt6_info), 0, - SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL, NULL); + SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL); ip6_dst_blackhole_ops.kmem_cachep = ip6_dst_ops.kmem_cachep; fib6_init(); diff --git a/net/ipv6/xfrm6_tunnel.c b/net/ipv6/xfrm6_tunnel.c index 6f87dd568de..30f3236c402 100644 --- a/net/ipv6/xfrm6_tunnel.c +++ b/net/ipv6/xfrm6_tunnel.c @@ -84,7 +84,7 @@ static int xfrm6_tunnel_spi_init(void) xfrm6_tunnel_spi_kmem = kmem_cache_create("xfrm6_tunnel_spi", sizeof(struct xfrm6_tunnel_spi), 0, SLAB_HWCACHE_ALIGN, - NULL, NULL); + NULL); if (!xfrm6_tunnel_spi_kmem) return -ENOMEM; diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c index 8cce814f6be..aa086c83af8 100644 --- a/net/netfilter/nf_conntrack_core.c +++ b/net/netfilter/nf_conntrack_core.c @@ -1108,7 +1108,7 @@ int __init nf_conntrack_init(void) nf_conntrack_cachep = kmem_cache_create("nf_conntrack", sizeof(struct nf_conn), - 0, 0, NULL, NULL); + 0, 0, NULL); if (!nf_conntrack_cachep) { printk(KERN_ERR "Unable to create nf_conn slab cache\n"); goto err_free_hash; diff --git a/net/netfilter/nf_conntrack_expect.c b/net/netfilter/nf_conntrack_expect.c index 2191fe008f6..1aa6229ca99 100644 --- a/net/netfilter/nf_conntrack_expect.c +++ b/net/netfilter/nf_conntrack_expect.c @@ -540,7 +540,7 @@ int __init nf_conntrack_expect_init(void) nf_ct_expect_cachep = kmem_cache_create("nf_conntrack_expect", sizeof(struct nf_conntrack_expect), - 0, 0, NULL, NULL); + 0, 0, NULL); if (!nf_ct_expect_cachep) goto err2; diff --git a/net/netfilter/xt_hashlimit.c b/net/netfilter/xt_hashlimit.c index d6b3d01975b..bd45f9d3f7d 100644 --- a/net/netfilter/xt_hashlimit.c +++ b/net/netfilter/xt_hashlimit.c @@ -738,7 +738,7 @@ static int __init xt_hashlimit_init(void) err = -ENOMEM; hashlimit_cachep = kmem_cache_create("xt_hashlimit", sizeof(struct dsthash_ent), 0, 0, - NULL, NULL); + NULL); if (!hashlimit_cachep) { printk(KERN_ERR "xt_hashlimit: unable to create slab cache\n"); goto err2; diff --git a/net/rxrpc/af_rxrpc.c b/net/rxrpc/af_rxrpc.c index 46f6d572ad2..16a68df4e36 100644 --- a/net/rxrpc/af_rxrpc.c +++ b/net/rxrpc/af_rxrpc.c @@ -792,7 +792,7 @@ static int __init af_rxrpc_init(void) ret = -ENOMEM; rxrpc_call_jar = kmem_cache_create( "rxrpc_call_jar", sizeof(struct rxrpc_call), 0, - SLAB_HWCACHE_ALIGN, NULL, NULL); + SLAB_HWCACHE_ALIGN, NULL); if (!rxrpc_call_jar) { printk(KERN_NOTICE "RxRPC: Failed to allocate call jar\n"); goto error_call_jar; diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c index 34bab36637a..e98579b788b 100644 --- a/net/sctp/protocol.c +++ b/net/sctp/protocol.c @@ -980,14 +980,14 @@ SCTP_STATIC __init int sctp_init(void) sctp_bucket_cachep = kmem_cache_create("sctp_bind_bucket", sizeof(struct sctp_bind_bucket), 0, SLAB_HWCACHE_ALIGN, - NULL, NULL); + NULL); if (!sctp_bucket_cachep) goto out; sctp_chunk_cachep = kmem_cache_create("sctp_chunk", sizeof(struct sctp_chunk), 0, SLAB_HWCACHE_ALIGN, - NULL, NULL); + NULL); if (!sctp_chunk_cachep) goto err_chunk_cachep; diff --git a/net/socket.c b/net/socket.c index b7111425004..ec077037f53 100644 --- a/net/socket.c +++ b/net/socket.c @@ -272,8 +272,7 @@ static int init_inodecache(void) (SLAB_HWCACHE_ALIGN | SLAB_RECLAIM_ACCOUNT | SLAB_MEM_SPREAD), - init_once, - NULL); + init_once); if (sock_inode_cachep == NULL) return -ENOMEM; return 0; diff --git a/net/sunrpc/rpc_pipe.c b/net/sunrpc/rpc_pipe.c index 5b2b6fb244f..650af064ff8 100644 --- a/net/sunrpc/rpc_pipe.c +++ b/net/sunrpc/rpc_pipe.c @@ -867,7 +867,7 @@ int register_rpc_pipefs(void) sizeof(struct rpc_inode), 0, (SLAB_HWCACHE_ALIGN|SLAB_RECLAIM_ACCOUNT| SLAB_MEM_SPREAD), - init_once, NULL); + init_once); if (!rpc_inode_cachep) return -ENOMEM; err = register_filesystem(&rpc_pipe_fs_type); diff --git a/net/sunrpc/sched.c b/net/sunrpc/sched.c index 2ac43c41c3a..b5723c262a3 100644 --- a/net/sunrpc/sched.c +++ b/net/sunrpc/sched.c @@ -1031,13 +1031,13 @@ rpc_init_mempool(void) rpc_task_slabp = kmem_cache_create("rpc_tasks", sizeof(struct rpc_task), 0, SLAB_HWCACHE_ALIGN, - NULL, NULL); + NULL); if (!rpc_task_slabp) goto err_nomem; rpc_buffer_slabp = kmem_cache_create("rpc_buffers", RPC_BUFFER_MAXSIZE, 0, SLAB_HWCACHE_ALIGN, - NULL, NULL); + NULL); if (!rpc_buffer_slabp) goto err_nomem; rpc_task_mempool = mempool_create_slab_pool(RPC_TASK_POOLSIZE, diff --git a/net/tipc/handler.c b/net/tipc/handler.c index e1dcf663f8a..0c70010a7df 100644 --- a/net/tipc/handler.c +++ b/net/tipc/handler.c @@ -97,7 +97,7 @@ int tipc_handler_start(void) { tipc_queue_item_cache = kmem_cache_create("tipc_queue_items", sizeof(struct queue_item), - 0, SLAB_HWCACHE_ALIGN, NULL, NULL); + 0, SLAB_HWCACHE_ALIGN, NULL); if (!tipc_queue_item_cache) return -ENOMEM; diff --git a/net/xfrm/xfrm_input.c b/net/xfrm/xfrm_input.c index 5c4695840c5..113f4442998 100644 --- a/net/xfrm/xfrm_input.c +++ b/net/xfrm/xfrm_input.c @@ -83,5 +83,5 @@ void __init xfrm_input_init(void) secpath_cachep = kmem_cache_create("secpath_cache", sizeof(struct sec_path), 0, SLAB_HWCACHE_ALIGN|SLAB_PANIC, - NULL, NULL); + NULL); } diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c index cfaf17c8851..c3a4b0a1868 100644 --- a/net/xfrm/xfrm_policy.c +++ b/net/xfrm/xfrm_policy.c @@ -2378,7 +2378,7 @@ static void __init xfrm_policy_init(void) xfrm_dst_cache = kmem_cache_create("xfrm_dst_cache", sizeof(struct xfrm_dst), 0, SLAB_HWCACHE_ALIGN|SLAB_PANIC, - NULL, NULL); + NULL); hmask = 8 - 1; sz = (hmask+1) * sizeof(struct hlist_head); diff --git a/security/keys/key.c b/security/keys/key.c index 700400d801d..01bbc6d9d19 100644 --- a/security/keys/key.c +++ b/security/keys/key.c @@ -1001,7 +1001,7 @@ void __init key_init(void) { /* allocate a slab in which we can store keys */ key_jar = kmem_cache_create("key_jar", sizeof(struct key), - 0, SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL, NULL); + 0, SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL); /* add the special key types */ list_add_tail(&key_type_keyring.link, &key_types_list); diff --git a/security/selinux/avc.c b/security/selinux/avc.c index 78c408fd2b0..ecd06738453 100644 --- a/security/selinux/avc.c +++ b/security/selinux/avc.c @@ -239,7 +239,7 @@ void __init avc_init(void) atomic_set(&avc_cache.lru_hint, 0); avc_node_cachep = kmem_cache_create("avc_node", sizeof(struct avc_node), - 0, SLAB_PANIC, NULL, NULL); + 0, SLAB_PANIC, NULL); audit_log(current->audit_context, GFP_KERNEL, AUDIT_KERNEL, "AVC INITIALIZED\n"); } diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c index 26356e67108..0fac6829c63 100644 --- a/security/selinux/hooks.c +++ b/security/selinux/hooks.c @@ -4913,7 +4913,7 @@ static __init int selinux_init(void) sel_inode_cache = kmem_cache_create("selinux_inode_security", sizeof(struct inode_security_struct), - 0, SLAB_PANIC, NULL, NULL); + 0, SLAB_PANIC, NULL); avc_init(); original_ops = secondary_ops = security_ops; diff --git a/security/selinux/ss/avtab.c b/security/selinux/ss/avtab.c index 3122908afdc..85705eb289e 100644 --- a/security/selinux/ss/avtab.c +++ b/security/selinux/ss/avtab.c @@ -445,7 +445,7 @@ void avtab_cache_init(void) { avtab_node_cachep = kmem_cache_create("avtab_node", sizeof(struct avtab_node), - 0, SLAB_PANIC, NULL, NULL); + 0, SLAB_PANIC, NULL); } void avtab_cache_destroy(void) -- cgit v1.2.3-70-g09d2 From b8fceee17a310f189188599a8fa5e9beaff57eb0 Mon Sep 17 00:00:00 2001 From: Davide Libenzi Date: Thu, 20 Sep 2007 12:40:16 -0700 Subject: signalfd simplification This simplifies signalfd code, by avoiding it to remain attached to the sighand during its lifetime. In this way, the signalfd remain attached to the sighand only during poll(2) (and select and epoll) and read(2). This also allows to remove all the custom "tsk == current" checks in kernel/signal.c, since dequeue_signal() will only be called by "current". I think this is also what Ben was suggesting time ago. The external effect of this, is that a thread can extract only its own private signals and the group ones. I think this is an acceptable behaviour, in that those are the signals the thread would be able to fetch w/out signalfd. Signed-off-by: Davide Libenzi Signed-off-by: Linus Torvalds --- fs/exec.c | 3 - fs/signalfd.c | 190 +++++++--------------------------------------- include/linux/init_task.h | 2 +- include/linux/sched.h | 2 +- include/linux/signalfd.h | 40 +--------- kernel/exit.c | 9 --- kernel/fork.c | 2 +- kernel/signal.c | 8 +- 8 files changed, 39 insertions(+), 217 deletions(-) (limited to 'kernel/fork.c') diff --git a/fs/exec.c b/fs/exec.c index c21a8cc0627..073b0b8c6d0 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -50,7 +50,6 @@ #include #include #include -#include #include #include @@ -784,7 +783,6 @@ static int de_thread(struct task_struct *tsk) * and we can just re-use it all. */ if (atomic_read(&oldsighand->count) <= 1) { - signalfd_detach(tsk); exit_itimers(sig); return 0; } @@ -923,7 +921,6 @@ static int de_thread(struct task_struct *tsk) sig->flags = 0; no_thread_group: - signalfd_detach(tsk); exit_itimers(sig); if (leader) release_task(leader); diff --git a/fs/signalfd.c b/fs/signalfd.c index a8e293d3003..aefb0be0794 100644 --- a/fs/signalfd.c +++ b/fs/signalfd.c @@ -11,8 +11,10 @@ * Now using anonymous inode source. * Thanks to Oleg Nesterov for useful code review and suggestions. * More comments and suggestions from Arnd Bergmann. - * Sat May 19, 2007: Davi E. M. Arnaut + * Sat May 19, 2007: Davi E. M. Arnaut * Retrieve multiple signals with one read() call + * Sun Jul 15, 2007: Davide Libenzi + * Attach to the sighand only during read() and poll(). */ #include @@ -27,102 +29,12 @@ #include struct signalfd_ctx { - struct list_head lnk; - wait_queue_head_t wqh; sigset_t sigmask; - struct task_struct *tsk; }; -struct signalfd_lockctx { - struct task_struct *tsk; - unsigned long flags; -}; - -/* - * Tries to acquire the sighand lock. We do not increment the sighand - * use count, and we do not even pin the task struct, so we need to - * do it inside an RCU read lock, and we must be prepared for the - * ctx->tsk going to NULL (in signalfd_deliver()), and for the sighand - * being detached. We return 0 if the sighand has been detached, or - * 1 if we were able to pin the sighand lock. - */ -static int signalfd_lock(struct signalfd_ctx *ctx, struct signalfd_lockctx *lk) -{ - struct sighand_struct *sighand = NULL; - - rcu_read_lock(); - lk->tsk = rcu_dereference(ctx->tsk); - if (likely(lk->tsk != NULL)) - sighand = lock_task_sighand(lk->tsk, &lk->flags); - rcu_read_unlock(); - - if (!sighand) - return 0; - - if (!ctx->tsk) { - unlock_task_sighand(lk->tsk, &lk->flags); - return 0; - } - - if (lk->tsk->tgid == current->tgid) - lk->tsk = current; - - return 1; -} - -static void signalfd_unlock(struct signalfd_lockctx *lk) -{ - unlock_task_sighand(lk->tsk, &lk->flags); -} - -/* - * This must be called with the sighand lock held. - */ -void signalfd_deliver(struct task_struct *tsk, int sig) -{ - struct sighand_struct *sighand = tsk->sighand; - struct signalfd_ctx *ctx, *tmp; - - BUG_ON(!sig); - list_for_each_entry_safe(ctx, tmp, &sighand->signalfd_list, lnk) { - /* - * We use a negative signal value as a way to broadcast that the - * sighand has been orphaned, so that we can notify all the - * listeners about this. Remember the ctx->sigmask is inverted, - * so if the user is interested in a signal, that corresponding - * bit will be zero. - */ - if (sig < 0) { - if (ctx->tsk == tsk) { - ctx->tsk = NULL; - list_del_init(&ctx->lnk); - wake_up(&ctx->wqh); - } - } else { - if (!sigismember(&ctx->sigmask, sig)) - wake_up(&ctx->wqh); - } - } -} - -static void signalfd_cleanup(struct signalfd_ctx *ctx) -{ - struct signalfd_lockctx lk; - - /* - * This is tricky. If the sighand is gone, we do not need to remove - * context from the list, the list itself won't be there anymore. - */ - if (signalfd_lock(ctx, &lk)) { - list_del(&ctx->lnk); - signalfd_unlock(&lk); - } - kfree(ctx); -} - static int signalfd_release(struct inode *inode, struct file *file) { - signalfd_cleanup(file->private_data); + kfree(file->private_data); return 0; } @@ -130,23 +42,15 @@ static unsigned int signalfd_poll(struct file *file, poll_table *wait) { struct signalfd_ctx *ctx = file->private_data; unsigned int events = 0; - struct signalfd_lockctx lk; - poll_wait(file, &ctx->wqh, wait); + poll_wait(file, ¤t->sighand->signalfd_wqh, wait); - /* - * Let the caller get a POLLIN in this case, ala socket recv() when - * the peer disconnects. - */ - if (signalfd_lock(ctx, &lk)) { - if ((lk.tsk == current && - next_signal(&lk.tsk->pending, &ctx->sigmask) > 0) || - next_signal(&lk.tsk->signal->shared_pending, - &ctx->sigmask) > 0) - events |= POLLIN; - signalfd_unlock(&lk); - } else + spin_lock_irq(¤t->sighand->siglock); + if (next_signal(¤t->pending, &ctx->sigmask) || + next_signal(¤t->signal->shared_pending, + &ctx->sigmask)) events |= POLLIN; + spin_unlock_irq(¤t->sighand->siglock); return events; } @@ -219,59 +123,46 @@ static ssize_t signalfd_dequeue(struct signalfd_ctx *ctx, siginfo_t *info, int nonblock) { ssize_t ret; - struct signalfd_lockctx lk; DECLARE_WAITQUEUE(wait, current); - if (!signalfd_lock(ctx, &lk)) - return 0; - - ret = dequeue_signal(lk.tsk, &ctx->sigmask, info); + spin_lock_irq(¤t->sighand->siglock); + ret = dequeue_signal(current, &ctx->sigmask, info); switch (ret) { case 0: if (!nonblock) break; ret = -EAGAIN; default: - signalfd_unlock(&lk); + spin_unlock_irq(¤t->sighand->siglock); return ret; } - add_wait_queue(&ctx->wqh, &wait); + add_wait_queue(¤t->sighand->signalfd_wqh, &wait); for (;;) { set_current_state(TASK_INTERRUPTIBLE); - ret = dequeue_signal(lk.tsk, &ctx->sigmask, info); - signalfd_unlock(&lk); + ret = dequeue_signal(current, &ctx->sigmask, info); if (ret != 0) break; if (signal_pending(current)) { ret = -ERESTARTSYS; break; } + spin_unlock_irq(¤t->sighand->siglock); schedule(); - ret = signalfd_lock(ctx, &lk); - if (unlikely(!ret)) { - /* - * Let the caller read zero byte, ala socket - * recv() when the peer disconnect. This test - * must be done before doing a dequeue_signal(), - * because if the sighand has been orphaned, - * the dequeue_signal() call is going to crash - * because ->sighand will be long gone. - */ - break; - } + spin_lock_irq(¤t->sighand->siglock); } + spin_unlock_irq(¤t->sighand->siglock); - remove_wait_queue(&ctx->wqh, &wait); + remove_wait_queue(¤t->sighand->signalfd_wqh, &wait); __set_current_state(TASK_RUNNING); return ret; } /* - * Returns either the size of a "struct signalfd_siginfo", or zero if the - * sighand we are attached to, has been orphaned. The "count" parameter - * must be at least the size of a "struct signalfd_siginfo". + * Returns a multiple of the size of a "struct signalfd_siginfo", or a negative + * error code. The "count" parameter must be at least the size of a + * "struct signalfd_siginfo". */ static ssize_t signalfd_read(struct file *file, char __user *buf, size_t count, loff_t *ppos) @@ -287,7 +178,6 @@ static ssize_t signalfd_read(struct file *file, char __user *buf, size_t count, return -EINVAL; siginfo = (struct signalfd_siginfo __user *) buf; - do { ret = signalfd_dequeue(ctx, &info, nonblock); if (unlikely(ret <= 0)) @@ -300,7 +190,7 @@ static ssize_t signalfd_read(struct file *file, char __user *buf, size_t count, nonblock = 1; } while (--count); - return total ? total : ret; + return total ? total: ret; } static const struct file_operations signalfd_fops = { @@ -309,20 +199,13 @@ static const struct file_operations signalfd_fops = { .read = signalfd_read, }; -/* - * Create a file descriptor that is associated with our signal - * state. We can pass it around to others if we want to, but - * it will always be _our_ signal state. - */ asmlinkage long sys_signalfd(int ufd, sigset_t __user *user_mask, size_t sizemask) { int error; sigset_t sigmask; struct signalfd_ctx *ctx; - struct sighand_struct *sighand; struct file *file; struct inode *inode; - struct signalfd_lockctx lk; if (sizemask != sizeof(sigset_t) || copy_from_user(&sigmask, user_mask, sizeof(sigmask))) @@ -335,17 +218,7 @@ asmlinkage long sys_signalfd(int ufd, sigset_t __user *user_mask, size_t sizemas if (!ctx) return -ENOMEM; - init_waitqueue_head(&ctx->wqh); ctx->sigmask = sigmask; - ctx->tsk = current->group_leader; - - sighand = current->sighand; - /* - * Add this fd to the list of signal listeners. - */ - spin_lock_irq(&sighand->siglock); - list_add_tail(&ctx->lnk, &sighand->signalfd_list); - spin_unlock_irq(&sighand->siglock); /* * When we call this, the initialization must be complete, since @@ -364,23 +237,18 @@ asmlinkage long sys_signalfd(int ufd, sigset_t __user *user_mask, size_t sizemas fput(file); return -EINVAL; } - /* - * We need to be prepared of the fact that the sighand this fd - * is attached to, has been detched. In that case signalfd_lock() - * will return 0, and we'll just skip setting the new mask. - */ - if (signalfd_lock(ctx, &lk)) { - ctx->sigmask = sigmask; - signalfd_unlock(&lk); - } - wake_up(&ctx->wqh); + spin_lock_irq(¤t->sighand->siglock); + ctx->sigmask = sigmask; + spin_unlock_irq(¤t->sighand->siglock); + + wake_up(¤t->sighand->signalfd_wqh); fput(file); } return ufd; err_fdalloc: - signalfd_cleanup(ctx); + kfree(ctx); return error; } diff --git a/include/linux/init_task.h b/include/linux/init_task.h index cab741c2d60..f8abfa349ef 100644 --- a/include/linux/init_task.h +++ b/include/linux/init_task.h @@ -86,7 +86,7 @@ extern struct nsproxy init_nsproxy; .count = ATOMIC_INIT(1), \ .action = { { { .sa_handler = NULL, } }, }, \ .siglock = __SPIN_LOCK_UNLOCKED(sighand.siglock), \ - .signalfd_list = LIST_HEAD_INIT(sighand.signalfd_list), \ + .signalfd_wqh = __WAIT_QUEUE_HEAD_INITIALIZER(sighand.signalfd_wqh), \ } extern struct group_info init_groups; diff --git a/include/linux/sched.h b/include/linux/sched.h index 3de79016f2a..a01ac6dd5f5 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -438,7 +438,7 @@ struct sighand_struct { atomic_t count; struct k_sigaction action[_NSIG]; spinlock_t siglock; - struct list_head signalfd_list; + wait_queue_head_t signalfd_wqh; }; struct pacct_struct { diff --git a/include/linux/signalfd.h b/include/linux/signalfd.h index 51042949569..4c9ff0910ae 100644 --- a/include/linux/signalfd.h +++ b/include/linux/signalfd.h @@ -45,49 +45,17 @@ struct signalfd_siginfo { #ifdef CONFIG_SIGNALFD /* - * Deliver the signal to listening signalfd. This must be called - * with the sighand lock held. Same are the following that end up - * calling signalfd_deliver(). - */ -void signalfd_deliver(struct task_struct *tsk, int sig); - -/* - * No need to fall inside signalfd_deliver() if no signal listeners - * are available. + * Deliver the signal to listening signalfd. */ static inline void signalfd_notify(struct task_struct *tsk, int sig) { - if (unlikely(!list_empty(&tsk->sighand->signalfd_list))) - signalfd_deliver(tsk, sig); -} - -/* - * The signal -1 is used to notify the signalfd that the sighand - * is on its way to be detached. - */ -static inline void signalfd_detach_locked(struct task_struct *tsk) -{ - if (unlikely(!list_empty(&tsk->sighand->signalfd_list))) - signalfd_deliver(tsk, -1); -} - -static inline void signalfd_detach(struct task_struct *tsk) -{ - struct sighand_struct *sighand = tsk->sighand; - - if (unlikely(!list_empty(&sighand->signalfd_list))) { - spin_lock_irq(&sighand->siglock); - signalfd_deliver(tsk, -1); - spin_unlock_irq(&sighand->siglock); - } + if (unlikely(waitqueue_active(&tsk->sighand->signalfd_wqh))) + wake_up(&tsk->sighand->signalfd_wqh); } #else /* CONFIG_SIGNALFD */ -#define signalfd_deliver(t, s) do { } while (0) -#define signalfd_notify(t, s) do { } while (0) -#define signalfd_detach_locked(t) do { } while (0) -#define signalfd_detach(t) do { } while (0) +static inline void signalfd_notify(struct task_struct *tsk, int sig) { } #endif /* CONFIG_SIGNALFD */ diff --git a/kernel/exit.c b/kernel/exit.c index 06b24b3aa37..993369ee94d 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -24,7 +24,6 @@ #include #include #include -#include #include #include #include @@ -86,14 +85,6 @@ static void __exit_signal(struct task_struct *tsk) sighand = rcu_dereference(tsk->sighand); spin_lock(&sighand->siglock); - /* - * Notify that this sighand has been detached. This must - * be called with the tsk->sighand lock held. Also, this - * access tsk->sighand internally, so it must be called - * before tsk->sighand is reset. - */ - signalfd_detach_locked(tsk); - posix_cpu_timers_exit(tsk); if (atomic_dec_and_test(&sig->count)) posix_cpu_timers_exit_group(tsk); diff --git a/kernel/fork.c b/kernel/fork.c index 7332e236d36..33f12f48684 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -1438,7 +1438,7 @@ static void sighand_ctor(void *data, struct kmem_cache *cachep, struct sighand_struct *sighand = data; spin_lock_init(&sighand->siglock); - INIT_LIST_HEAD(&sighand->signalfd_list); + init_waitqueue_head(&sighand->signalfd_wqh); } void __init proc_caches_init(void) diff --git a/kernel/signal.c b/kernel/signal.c index 3169bed0b4d..9fb91a32edd 100644 --- a/kernel/signal.c +++ b/kernel/signal.c @@ -378,8 +378,7 @@ int dequeue_signal(struct task_struct *tsk, sigset_t *mask, siginfo_t *info) /* We only dequeue private signals from ourselves, we don't let * signalfd steal them */ - if (likely(tsk == current)) - signr = __dequeue_signal(&tsk->pending, mask, info); + signr = __dequeue_signal(&tsk->pending, mask, info); if (!signr) { signr = __dequeue_signal(&tsk->signal->shared_pending, mask, info); @@ -407,8 +406,7 @@ int dequeue_signal(struct task_struct *tsk, sigset_t *mask, siginfo_t *info) } } } - if (likely(tsk == current)) - recalc_sigpending(); + recalc_sigpending(); if (signr && unlikely(sig_kernel_stop(signr))) { /* * Set a marker that we have dequeued a stop signal. Our @@ -425,7 +423,7 @@ int dequeue_signal(struct task_struct *tsk, sigset_t *mask, siginfo_t *info) if (!(tsk->signal->flags & SIGNAL_GROUP_EXIT)) tsk->signal->flags |= SIGNAL_STOP_DEQUEUED; } - if (signr && likely(tsk == current) && + if (signr && ((info->si_code & __SI_MASK) == __SI_TIMER) && info->si_sys_private){ /* -- cgit v1.2.3-70-g09d2 From 9dd776b6d7b0b85966b6ddd03e2b2aae59012ab1 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Wed, 26 Sep 2007 22:04:26 -0700 Subject: [NET]: Add network namespace clone & unshare support. This patch allows you to create a new network namespace using sys_clone, or sys_unshare. As the network namespace is still experimental and under development clone and unshare support is only made available when CONFIG_NET_NS is selected at compile time. As this patch introduces network namespace support into code paths that exist when the CONFIG_NET is not selected there are a few additions made to net_namespace.h to allow a few more functions to be used when the networking stack is not compiled in. Signed-off-by: Eric W. Biederman Signed-off-by: David S. Miller --- include/linux/sched.h | 1 + include/net/net_namespace.h | 18 ++++++++++++++++++ kernel/fork.c | 3 ++- kernel/nsproxy.c | 15 +++++++++++++-- net/Kconfig | 8 ++++++++ net/core/net_namespace.c | 43 +++++++++++++++++++++++++++++++++++++++++-- 6 files changed, 83 insertions(+), 5 deletions(-) (limited to 'kernel/fork.c') diff --git a/include/linux/sched.h b/include/linux/sched.h index 313c6b6e774..a4a141055c4 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -27,6 +27,7 @@ #define CLONE_NEWUTS 0x04000000 /* New utsname group? */ #define CLONE_NEWIPC 0x08000000 /* New ipcs */ #define CLONE_NEWUSER 0x10000000 /* New user namespace */ +#define CLONE_NEWNET 0x20000000 /* New network namespace */ /* * Scheduling policies diff --git a/include/net/net_namespace.h b/include/net/net_namespace.h index ac8f8304094..3ea4194613e 100644 --- a/include/net/net_namespace.h +++ b/include/net/net_namespace.h @@ -38,11 +38,23 @@ extern struct net init_net; extern struct list_head net_namespace_list; +#ifdef CONFIG_NET +extern struct net *copy_net_ns(unsigned long flags, struct net *net_ns); +#else +static inline struct net *copy_net_ns(unsigned long flags, struct net *net_ns) +{ + /* There is nothing to copy so this is a noop */ + return net_ns; +} +#endif + extern void __put_net(struct net *net); static inline struct net *get_net(struct net *net) { +#ifdef CONFIG_NET atomic_inc(&net->count); +#endif return net; } @@ -60,19 +72,25 @@ static inline struct net *maybe_get_net(struct net *net) static inline void put_net(struct net *net) { +#ifdef CONFIG_NET if (atomic_dec_and_test(&net->count)) __put_net(net); +#endif } static inline struct net *hold_net(struct net *net) { +#ifdef CONFIG_NET atomic_inc(&net->use_count); +#endif return net; } static inline void release_net(struct net *net) { +#ifdef CONFIG_NET atomic_dec(&net->use_count); +#endif } extern void net_lock(void); diff --git a/kernel/fork.c b/kernel/fork.c index 33f12f48684..5e67f90a169 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -1608,7 +1608,8 @@ asmlinkage long sys_unshare(unsigned long unshare_flags) err = -EINVAL; if (unshare_flags & ~(CLONE_THREAD|CLONE_FS|CLONE_NEWNS|CLONE_SIGHAND| CLONE_VM|CLONE_FILES|CLONE_SYSVSEM| - CLONE_NEWUTS|CLONE_NEWIPC|CLONE_NEWUSER)) + CLONE_NEWUTS|CLONE_NEWIPC|CLONE_NEWUSER| + CLONE_NEWNET)) goto bad_unshare_out; if ((err = unshare_thread(unshare_flags))) diff --git a/kernel/nsproxy.c b/kernel/nsproxy.c index a4fb7d46971..f1decd21a53 100644 --- a/kernel/nsproxy.c +++ b/kernel/nsproxy.c @@ -20,6 +20,7 @@ #include #include #include +#include static struct kmem_cache *nsproxy_cachep; @@ -98,8 +99,17 @@ static struct nsproxy *create_new_namespaces(unsigned long flags, goto out_user; } + new_nsp->net_ns = copy_net_ns(flags, tsk->nsproxy->net_ns); + if (IS_ERR(new_nsp->net_ns)) { + err = PTR_ERR(new_nsp->net_ns); + goto out_net; + } + return new_nsp; +out_net: + if (new_nsp->user_ns) + put_user_ns(new_nsp->user_ns); out_user: if (new_nsp->pid_ns) put_pid_ns(new_nsp->pid_ns); @@ -132,7 +142,7 @@ int copy_namespaces(unsigned long flags, struct task_struct *tsk) get_nsproxy(old_ns); - if (!(flags & (CLONE_NEWNS | CLONE_NEWUTS | CLONE_NEWIPC | CLONE_NEWUSER))) + if (!(flags & (CLONE_NEWNS | CLONE_NEWUTS | CLONE_NEWIPC | CLONE_NEWUSER | CLONE_NEWNET))) return 0; if (!capable(CAP_SYS_ADMIN)) { @@ -164,6 +174,7 @@ void free_nsproxy(struct nsproxy *ns) put_pid_ns(ns->pid_ns); if (ns->user_ns) put_user_ns(ns->user_ns); + put_net(ns->net_ns); kmem_cache_free(nsproxy_cachep, ns); } @@ -177,7 +188,7 @@ int unshare_nsproxy_namespaces(unsigned long unshare_flags, int err = 0; if (!(unshare_flags & (CLONE_NEWNS | CLONE_NEWUTS | CLONE_NEWIPC | - CLONE_NEWUSER))) + CLONE_NEWUSER | CLONE_NEWNET))) return 0; if (!capable(CAP_SYS_ADMIN)) diff --git a/net/Kconfig b/net/Kconfig index cdba08ca2ef..ab4e6da5012 100644 --- a/net/Kconfig +++ b/net/Kconfig @@ -27,6 +27,14 @@ if NET menu "Networking options" +config NET_NS + bool "Network namespace support" + default n + depends on EXPERIMENTAL && !SYSFS + help + Allow user space to create what appear to be multiple instances + of the network stack. + source "net/packet/Kconfig" source "net/unix/Kconfig" source "net/xfrm/Kconfig" diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c index 0e6cb02d7b7..e478e353ea6 100644 --- a/net/core/net_namespace.c +++ b/net/core/net_namespace.c @@ -4,6 +4,7 @@ #include #include #include +#include #include /* @@ -32,12 +33,10 @@ void net_unlock(void) mutex_unlock(&net_list_mutex); } -#if 0 static struct net *net_alloc(void) { return kmem_cache_alloc(net_cachep, GFP_KERNEL); } -#endif static void net_free(struct net *net) { @@ -128,6 +127,46 @@ out_undo: goto out; } +struct net *copy_net_ns(unsigned long flags, struct net *old_net) +{ + struct net *new_net = NULL; + int err; + + get_net(old_net); + + if (!(flags & CLONE_NEWNET)) + return old_net; + +#ifndef CONFIG_NET_NS + return ERR_PTR(-EINVAL); +#endif + + err = -ENOMEM; + new_net = net_alloc(); + if (!new_net) + goto out; + + mutex_lock(&net_mutex); + err = setup_net(new_net); + if (err) + goto out_unlock; + + net_lock(); + list_add_tail(&new_net->list, &net_namespace_list); + net_unlock(); + + +out_unlock: + mutex_unlock(&net_mutex); +out: + put_net(old_net); + if (err) { + net_free(new_net); + new_net = ERR_PTR(err); + } + return new_net; +} + static int __init net_ns_init(void) { int err; -- cgit v1.2.3-70-g09d2