From e74e396204bfcb67570ba4517b08f5918e69afea Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Mon, 30 Mar 2009 19:07:44 +0900 Subject: percpu: use dynamic percpu allocator as the default percpu allocator This patch makes most !CONFIG_HAVE_SETUP_PER_CPU_AREA archs use dynamic percpu allocator. The first chunk is allocated using embedding helper and 8k is reserved for modules. This ensures that the new allocator behaves almost identically to the original allocator as long as static percpu variables are concerned, so it shouldn't introduce much breakage. s390 and alpha use custom SHIFT_PERCPU_PTR() to work around addressing range limit the addressing model imposes. Unfortunately, this breaks if the address is specified using a variable, so for now, the two archs aren't converted. The following architectures are affected by this change. * sh * arm * cris * mips * sparc(32) * blackfin * avr32 * parisc (broken, under investigation) * m32r * powerpc(32) As this change makes the dynamic allocator the default one, CONFIG_HAVE_DYNAMIC_PER_CPU_AREA is replaced with its invert - CONFIG_HAVE_LEGACY_PER_CPU_AREA, which is added to yet-to-be converted archs. These archs implement their own setup_per_cpu_areas() and the conversion is not trivial. * powerpc(64) * sparc(64) * ia64 * alpha * s390 Boot and batch alloc/free tests on x86_32 with debug code (x86_32 doesn't use default first chunk initialization). Compile tested on sparc(32), powerpc(32), arm and alpha. Kyle McMartin reported that this change breaks parisc. The problem is still under investigation and he is okay with pushing this patch forward and fixing parisc later. [ Impact: use dynamic allocator for most archs w/o custom percpu setup ] Signed-off-by: Tejun Heo Acked-by: Rusty Russell Acked-by: David S. Miller Acked-by: Benjamin Herrenschmidt Acked-by: Martin Schwidefsky Reviewed-by: Christoph Lameter Cc: Paul Mundt Cc: Russell King Cc: Mikael Starvik Cc: Ralf Baechle Cc: Bryan Wu Cc: Kyle McMartin Cc: Matthew Wilcox Cc: Grant Grundler Cc: Hirokazu Takata Cc: Richard Henderson Cc: Ivan Kokshaysky Cc: Heiko Carstens Cc: Ingo Molnar --- kernel/module.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'kernel/module.c') diff --git a/kernel/module.c b/kernel/module.c index 38928fcaff2..f5934954fa9 100644 --- a/kernel/module.c +++ b/kernel/module.c @@ -364,7 +364,7 @@ EXPORT_SYMBOL_GPL(find_module); #ifdef CONFIG_SMP -#ifdef CONFIG_HAVE_DYNAMIC_PER_CPU_AREA +#ifndef CONFIG_HAVE_LEGACY_PER_CPU_AREA static void *percpu_modalloc(unsigned long size, unsigned long align, const char *name) @@ -389,7 +389,7 @@ static void percpu_modfree(void *freeme) free_percpu(freeme); } -#else /* ... !CONFIG_HAVE_DYNAMIC_PER_CPU_AREA */ +#else /* ... CONFIG_HAVE_LEGACY_PER_CPU_AREA */ /* Number of blocks used and allocated. */ static unsigned int pcpu_num_used, pcpu_num_allocated; @@ -535,7 +535,7 @@ static int percpu_modinit(void) } __initcall(percpu_modinit); -#endif /* CONFIG_HAVE_DYNAMIC_PER_CPU_AREA */ +#endif /* CONFIG_HAVE_LEGACY_PER_CPU_AREA */ static unsigned int find_pcpusec(Elf_Ehdr *hdr, Elf_Shdr *sechdrs, -- cgit v1.2.3-70-g09d2 From ad361c9884e809340f6daca80d56a9e9c871690a Mon Sep 17 00:00:00 2001 From: Joe Perches Date: Mon, 6 Jul 2009 13:05:40 -0700 Subject: Remove multiple KERN_ prefixes from printk formats Commit 5fd29d6ccbc98884569d6f3105aeca70858b3e0f ("printk: clean up handling of log-levels and newlines") changed printk semantics. printk lines with multiple KERN_ prefixes are no longer emitted as before the patch. is now included in the output on each additional use. Remove all uses of multiple KERN_s in formats. Signed-off-by: Joe Perches Signed-off-by: Linus Torvalds --- arch/avr32/kernel/traps.c | 13 ++++---- arch/blackfin/kernel/setup.c | 41 ++++++++++++------------- arch/blackfin/kernel/traps.c | 34 ++++++++++----------- arch/m68knommu/kernel/process.c | 21 +++++++------ arch/m68knommu/kernel/traps.c | 6 ++-- arch/mn10300/kernel/traps.c | 21 +++++-------- arch/parisc/kernel/process.c | 2 +- arch/parisc/kernel/traps.c | 20 ++++++------- arch/um/kernel/sysrq.c | 4 +-- arch/x86/kernel/apic/io_apic.c | 1 - arch/x86/kernel/cpu/cpufreq/powernow-k8.c | 2 +- arch/x86/kernel/cpu/mcheck/mce.c | 12 ++++---- arch/x86/kernel/e820.c | 7 ++--- arch/x86/kernel/pci-gart_64.c | 2 +- arch/x86/mm/fault.c | 9 +++--- arch/xtensa/kernel/traps.c | 6 ++-- drivers/block/amiflop.c | 2 +- drivers/block/xsysace.c | 7 +++-- drivers/char/hw_random/intel-rng.c | 9 +++--- drivers/char/isicom.c | 16 +++++----- drivers/i2c/busses/i2c-ibm_iic.c | 9 +++--- drivers/md/md.c | 18 ++++++----- drivers/misc/sgi-xp/xpnet.c | 4 +-- drivers/net/a2065.c | 12 ++------ drivers/net/arcnet/arcnet.c | 26 +++++++--------- drivers/net/bmac.c | 7 +++-- drivers/net/bnx2x_main.c | 7 +++-- drivers/net/dl2k.c | 17 ++++++----- drivers/net/epic100.c | 5 ++-- drivers/net/fealnx.c | 15 ++++++---- drivers/net/hamachi.c | 23 +++++++------- drivers/net/hamradio/baycom_epp.c | 2 +- drivers/net/hamradio/baycom_par.c | 2 +- drivers/net/hamradio/baycom_ser_fdx.c | 2 +- drivers/net/hamradio/baycom_ser_hdx.c | 2 +- drivers/net/natsemi.c | 4 +-- drivers/net/ne.c | 2 +- drivers/net/pci-skeleton.c | 2 +- drivers/net/pcmcia/ibmtr_cs.c | 13 ++++---- drivers/net/pcmcia/nmclan_cs.c | 15 ++++------ drivers/net/pcnet32.c | 50 +++++++++++++++---------------- drivers/net/starfire.c | 2 +- drivers/net/sundance.c | 6 ++-- drivers/net/tsi108_eth.c | 8 +++-- drivers/net/tulip/de2104x.c | 7 +++-- drivers/net/tulip/tulip_core.c | 14 +++++---- drivers/net/tulip/winbond-840.c | 6 ++-- drivers/net/wan/hd64570.c | 3 +- drivers/net/wan/hd64572.c | 3 +- drivers/net/wan/sbni.c | 8 ++--- drivers/net/wireless/ray_cs.c | 9 +++--- drivers/net/wireless/wavelan_cs.c | 13 ++------ drivers/net/yellowfin.c | 23 +++++++------- drivers/parisc/eisa_enumerator.c | 14 ++++----- drivers/pcmcia/tcic.c | 3 +- drivers/scsi/atari_NCR5380.c | 3 +- drivers/scsi/mac53c94.c | 5 ++-- drivers/scsi/sg.c | 2 +- drivers/scsi/sun3_NCR5380.c | 3 +- drivers/serial/8250_pci.c | 11 +++---- drivers/ssb/pcmcia.c | 4 +-- drivers/usb/core/hcd.c | 2 +- drivers/video/amba-clcd.c | 7 +++-- drivers/video/matrox/matroxfb_DAC1064.c | 4 +-- drivers/video/matrox/matroxfb_Ti3026.c | 4 +-- drivers/video/stifb.c | 7 ++--- fs/jffs2/erase.c | 10 ++++--- kernel/module.c | 6 ++-- sound/pci/emu10k1/p16v.c | 2 +- sound/usb/usx2y/usbusx2yaudio.c | 7 +++-- 70 files changed, 330 insertions(+), 338 deletions(-) (limited to 'kernel/module.c') diff --git a/arch/avr32/kernel/traps.c b/arch/avr32/kernel/traps.c index 6e3d491184e..b91b2044af9 100644 --- a/arch/avr32/kernel/traps.c +++ b/arch/avr32/kernel/traps.c @@ -32,22 +32,25 @@ void NORET_TYPE die(const char *str, struct pt_regs *regs, long err) spin_lock_irq(&die_lock); bust_spinlocks(1); - printk(KERN_ALERT "Oops: %s, sig: %ld [#%d]\n" KERN_EMERG, + printk(KERN_ALERT "Oops: %s, sig: %ld [#%d]\n", str, err, ++die_counter); + + printk(KERN_EMERG); + #ifdef CONFIG_PREEMPT - printk("PREEMPT "); + printk(KERN_CONT "PREEMPT "); #endif #ifdef CONFIG_FRAME_POINTER - printk("FRAME_POINTER "); + printk(KERN_CONT "FRAME_POINTER "); #endif if (current_cpu_data.features & AVR32_FEATURE_OCD) { unsigned long did = ocd_read(DID); - printk("chip: 0x%03lx:0x%04lx rev %lu\n", + printk(KERN_CONT "chip: 0x%03lx:0x%04lx rev %lu\n", (did >> 1) & 0x7ff, (did >> 12) & 0x7fff, (did >> 28) & 0xf); } else { - printk("cpu: arch %u r%u / core %u r%u\n", + printk(KERN_CONT "cpu: arch %u r%u / core %u r%u\n", current_cpu_data.arch_type, current_cpu_data.arch_revision, current_cpu_data.cpu_type, diff --git a/arch/blackfin/kernel/setup.c b/arch/blackfin/kernel/setup.c index 298f023bcc0..6136c33e919 100644 --- a/arch/blackfin/kernel/setup.c +++ b/arch/blackfin/kernel/setup.c @@ -408,13 +408,14 @@ static void __init print_memory_map(char *who) bfin_memmap.map[i].addr + bfin_memmap.map[i].size); switch (bfin_memmap.map[i].type) { case BFIN_MEMMAP_RAM: - printk("(usable)\n"); - break; + printk(KERN_CONT "(usable)\n"); + break; case BFIN_MEMMAP_RESERVED: - printk("(reserved)\n"); - break; - default: printk("type %lu\n", bfin_memmap.map[i].type); - break; + printk(KERN_CONT "(reserved)\n"); + break; + default: + printk(KERN_CONT "type %lu\n", bfin_memmap.map[i].type); + break; } } } @@ -614,19 +615,19 @@ static __init void memory_setup(void) printk(KERN_INFO "Kernel Managed Memory: %ldMB\n", _ramend >> 20); printk(KERN_INFO "Memory map:\n" - KERN_INFO " fixedcode = 0x%p-0x%p\n" - KERN_INFO " text = 0x%p-0x%p\n" - KERN_INFO " rodata = 0x%p-0x%p\n" - KERN_INFO " bss = 0x%p-0x%p\n" - KERN_INFO " data = 0x%p-0x%p\n" - KERN_INFO " stack = 0x%p-0x%p\n" - KERN_INFO " init = 0x%p-0x%p\n" - KERN_INFO " available = 0x%p-0x%p\n" + " fixedcode = 0x%p-0x%p\n" + " text = 0x%p-0x%p\n" + " rodata = 0x%p-0x%p\n" + " bss = 0x%p-0x%p\n" + " data = 0x%p-0x%p\n" + " stack = 0x%p-0x%p\n" + " init = 0x%p-0x%p\n" + " available = 0x%p-0x%p\n" #ifdef CONFIG_MTD_UCLINUX - KERN_INFO " rootfs = 0x%p-0x%p\n" + " rootfs = 0x%p-0x%p\n" #endif #if DMA_UNCACHED_REGION > 0 - KERN_INFO " DMA Zone = 0x%p-0x%p\n" + " DMA Zone = 0x%p-0x%p\n" #endif , (void *)FIXED_CODE_START, (void *)FIXED_CODE_END, _stext, _etext, @@ -859,13 +860,13 @@ void __init setup_arch(char **cmdline_p) #endif printk(KERN_INFO "Hardware Trace "); if (bfin_read_TBUFCTL() & 0x1) - printk("Active "); + printk(KERN_CONT "Active "); else - printk("Off "); + printk(KERN_CONT "Off "); if (bfin_read_TBUFCTL() & 0x2) - printk("and Enabled\n"); + printk(KERN_CONT "and Enabled\n"); else - printk("and Disabled\n"); + printk(KERN_CONT "and Disabled\n"); #if defined(CONFIG_CHR_DEV_FLASH) || defined(CONFIG_BLK_DEV_FLASH) /* we need to initialize the Flashrom device here since we might diff --git a/arch/blackfin/kernel/traps.c b/arch/blackfin/kernel/traps.c index 8eeb457ce5d..8a1caf2bb5b 100644 --- a/arch/blackfin/kernel/traps.c +++ b/arch/blackfin/kernel/traps.c @@ -212,7 +212,7 @@ asmlinkage void double_fault_c(struct pt_regs *fp) console_verbose(); oops_in_progress = 1; #ifdef CONFIG_DEBUG_VERBOSE - printk(KERN_EMERG "\n" KERN_EMERG "Double Fault\n"); + printk(KERN_EMERG "Double Fault\n"); #ifdef CONFIG_DEBUG_DOUBLEFAULT_PRINT if (((long)fp->seqstat & SEQSTAT_EXCAUSE) == VEC_UNCOV) { unsigned int cpu = smp_processor_id(); @@ -583,15 +583,14 @@ asmlinkage void trap_c(struct pt_regs *fp) #ifndef CONFIG_DEBUG_BFIN_NO_KERN_HWTRACE if (trapnr == VEC_CPLB_I_M || trapnr == VEC_CPLB_M) verbose_printk(KERN_NOTICE "No trace since you do not have " - "CONFIG_DEBUG_BFIN_NO_KERN_HWTRACE enabled\n" - KERN_NOTICE "\n"); + "CONFIG_DEBUG_BFIN_NO_KERN_HWTRACE enabled\n\n"); else #endif dump_bfin_trace_buffer(); if (oops_in_progress) { /* Dump the current kernel stack */ - verbose_printk(KERN_NOTICE "\n" KERN_NOTICE "Kernel Stack\n"); + verbose_printk(KERN_NOTICE "Kernel Stack\n"); show_stack(current, NULL); print_modules(); #ifndef CONFIG_ACCESS_CHECK @@ -906,7 +905,7 @@ void show_stack(struct task_struct *task, unsigned long *stack) ret_addr = 0; if (!j && i % 8 == 0) - printk("\n" KERN_NOTICE "%p:",addr); + printk(KERN_NOTICE "%p:",addr); /* if it is an odd address, or zero, just skip it */ if (*addr & 0x1 || !*addr) @@ -996,9 +995,9 @@ void dump_bfin_process(struct pt_regs *fp) printk(KERN_NOTICE "CPU = %d\n", current_thread_info()->cpu); if (!((unsigned long)current->mm & 0x3) && (unsigned long)current->mm >= FIXED_CODE_START) - verbose_printk(KERN_NOTICE "TEXT = 0x%p-0x%p DATA = 0x%p-0x%p\n" - KERN_NOTICE " BSS = 0x%p-0x%p USER-STACK = 0x%p\n" - KERN_NOTICE "\n", + verbose_printk(KERN_NOTICE + "TEXT = 0x%p-0x%p DATA = 0x%p-0x%p\n" + " BSS = 0x%p-0x%p USER-STACK = 0x%p\n\n", (void *)current->mm->start_code, (void *)current->mm->end_code, (void *)current->mm->start_data, @@ -1009,8 +1008,8 @@ void dump_bfin_process(struct pt_regs *fp) else verbose_printk(KERN_NOTICE "invalid mm\n"); } else - verbose_printk(KERN_NOTICE "\n" KERN_NOTICE - "No Valid process in current context\n"); + verbose_printk(KERN_NOTICE + "No Valid process in current context\n"); #endif } @@ -1028,7 +1027,7 @@ void dump_bfin_mem(struct pt_regs *fp) addr < (unsigned short *)((unsigned long)erraddr & ~0xF) + 0x10; addr++) { if (!((unsigned long)addr & 0xF)) - verbose_printk("\n" KERN_NOTICE "0x%p: ", addr); + verbose_printk(KERN_NOTICE "0x%p: ", addr); if (!get_instruction(&val, addr)) { val = 0; @@ -1056,9 +1055,9 @@ void dump_bfin_mem(struct pt_regs *fp) oops_in_progress)){ verbose_printk(KERN_NOTICE "Looks like this was a deferred error - sorry\n"); #ifndef CONFIG_DEBUG_HWERR - verbose_printk(KERN_NOTICE "The remaining message may be meaningless\n" - KERN_NOTICE "You should enable CONFIG_DEBUG_HWERR to get a" - " better idea where it came from\n"); + verbose_printk(KERN_NOTICE +"The remaining message may be meaningless\n" +"You should enable CONFIG_DEBUG_HWERR to get a better idea where it came from\n"); #else /* If we are handling only one peripheral interrupt * and current mm and pid are valid, and the last error @@ -1114,9 +1113,10 @@ void show_regs(struct pt_regs *fp) verbose_printk(KERN_NOTICE "%s", linux_banner); - verbose_printk(KERN_NOTICE "\n" KERN_NOTICE "SEQUENCER STATUS:\t\t%s\n", print_tainted()); + verbose_printk(KERN_NOTICE "\nSEQUENCER STATUS:\t\t%s\n", + print_tainted()); verbose_printk(KERN_NOTICE " SEQSTAT: %08lx IPEND: %04lx SYSCFG: %04lx\n", - (long)fp->seqstat, fp->ipend, fp->syscfg); + (long)fp->seqstat, fp->ipend, fp->syscfg); if ((fp->seqstat & SEQSTAT_EXCAUSE) == VEC_HWERR) { verbose_printk(KERN_NOTICE " HWERRCAUSE: 0x%lx\n", (fp->seqstat & SEQSTAT_HWERRCAUSE) >> 14); @@ -1184,7 +1184,7 @@ unlock: verbose_printk(KERN_NOTICE "ICPLB_FAULT_ADDR: %s\n", buf); } - verbose_printk(KERN_NOTICE "\n" KERN_NOTICE "PROCESSOR STATE:\n"); + verbose_printk(KERN_NOTICE "PROCESSOR STATE:\n"); verbose_printk(KERN_NOTICE " R0 : %08lx R1 : %08lx R2 : %08lx R3 : %08lx\n", fp->r0, fp->r1, fp->r2, fp->r3); verbose_printk(KERN_NOTICE " R4 : %08lx R5 : %08lx R6 : %08lx R7 : %08lx\n", diff --git a/arch/m68knommu/kernel/process.c b/arch/m68knommu/kernel/process.c index 1e96c6eb631..8f8f4abab2f 100644 --- a/arch/m68knommu/kernel/process.c +++ b/arch/m68knommu/kernel/process.c @@ -290,7 +290,7 @@ void dump(struct pt_regs *fp) unsigned char *tp; int i; - printk(KERN_EMERG "\n" KERN_EMERG "CURRENT PROCESS:\n" KERN_EMERG "\n"); + printk(KERN_EMERG "\nCURRENT PROCESS:\n\n"); printk(KERN_EMERG "COMM=%s PID=%d\n", current->comm, current->pid); if (current->mm) { @@ -301,8 +301,7 @@ void dump(struct pt_regs *fp) (int) current->mm->end_data, (int) current->mm->end_data, (int) current->mm->brk); - printk(KERN_EMERG "USER-STACK=%08x KERNEL-STACK=%08x\n" - KERN_EMERG "\n", + printk(KERN_EMERG "USER-STACK=%08x KERNEL-STACK=%08x\n\n", (int) current->mm->start_stack, (int)(((unsigned long) current) + THREAD_SIZE)); } @@ -313,35 +312,35 @@ void dump(struct pt_regs *fp) fp->d0, fp->d1, fp->d2, fp->d3); printk(KERN_EMERG "d4: %08lx d5: %08lx a0: %08lx a1: %08lx\n", fp->d4, fp->d5, fp->a0, fp->a1); - printk(KERN_EMERG "\n" KERN_EMERG "USP: %08x TRAPFRAME: %08x\n", + printk(KERN_EMERG "\nUSP: %08x TRAPFRAME: %08x\n", (unsigned int) rdusp(), (unsigned int) fp); - printk(KERN_EMERG "\n" KERN_EMERG "CODE:"); + printk(KERN_EMERG "\nCODE:"); tp = ((unsigned char *) fp->pc) - 0x20; for (sp = (unsigned long *) tp, i = 0; (i < 0x40); i += 4) { if ((i % 0x10) == 0) - printk("\n" KERN_EMERG "%08x: ", (int) (tp + i)); + printk(KERN_EMERG "%08x: ", (int) (tp + i)); printk("%08x ", (int) *sp++); } - printk("\n" KERN_EMERG "\n"); + printk(KERN_EMERG "\n"); printk(KERN_EMERG "KERNEL STACK:"); tp = ((unsigned char *) fp) - 0x40; for (sp = (unsigned long *) tp, i = 0; (i < 0xc0); i += 4) { if ((i % 0x10) == 0) - printk("\n" KERN_EMERG "%08x: ", (int) (tp + i)); + printk(KERN_EMERG "%08x: ", (int) (tp + i)); printk("%08x ", (int) *sp++); } - printk("\n" KERN_EMERG "\n"); + printk(KERN_EMERG "\n"); printk(KERN_EMERG "USER STACK:"); tp = (unsigned char *) (rdusp() - 0x10); for (sp = (unsigned long *) tp, i = 0; (i < 0x80); i += 4) { if ((i % 0x10) == 0) - printk("\n" KERN_EMERG "%08x: ", (int) (tp + i)); + printk(KERN_EMERG "%08x: ", (int) (tp + i)); printk("%08x ", (int) *sp++); } - printk("\n" KERN_EMERG "\n"); + printk(KERN_EMERG "\n"); } /* diff --git a/arch/m68knommu/kernel/traps.c b/arch/m68knommu/kernel/traps.c index 51d325343ab..3739c8f657d 100644 --- a/arch/m68knommu/kernel/traps.c +++ b/arch/m68knommu/kernel/traps.c @@ -111,7 +111,7 @@ static void print_this_address(unsigned long addr, int i) if (i % 5) printk(KERN_CONT " [%08lx] ", addr); else - printk(KERN_CONT "\n" KERN_EMERG " [%08lx] ", addr); + printk(KERN_EMERG " [%08lx] ", addr); i++; #endif } @@ -137,8 +137,8 @@ static void __show_stack(struct task_struct *task, unsigned long *stack) if (stack + 1 + i > endstack) break; if (i % 8 == 0) - printk("\n" KERN_EMERG " "); - printk(" %08lx", *(stack + i)); + printk(KERN_EMERG " "); + printk(KERN_CONT " %08lx", *(stack + i)); } printk("\n"); i = 0; diff --git a/arch/mn10300/kernel/traps.c b/arch/mn10300/kernel/traps.c index 681ad8c9e4f..0dfdc500112 100644 --- a/arch/mn10300/kernel/traps.c +++ b/arch/mn10300/kernel/traps.c @@ -136,8 +136,7 @@ void show_trace(unsigned long *sp) unsigned long *stack, addr, module_start, module_end; int i; - printk(KERN_EMERG "\n" - KERN_EMERG "Call Trace:"); + printk(KERN_EMERG "\nCall Trace:"); stack = sp; i = 0; @@ -153,7 +152,7 @@ void show_trace(unsigned long *sp) printk("\n"); #else if ((i % 6) == 0) - printk("\n" KERN_EMERG " "); + printk(KERN_EMERG " "); printk("[<%08lx>] ", addr); i++; #endif @@ -180,7 +179,7 @@ void show_stack(struct task_struct *task, unsigned long *sp) if (((long) stack & (THREAD_SIZE - 1)) == 0) break; if ((i % 8) == 0) - printk("\n" KERN_EMERG " "); + printk(KERN_EMERG " "); printk("%08lx ", *stack++); } @@ -264,8 +263,7 @@ void show_registers(struct pt_regs *regs) show_stack(current, (unsigned long *) sp); #if 0 - printk(KERN_EMERG "\n" - KERN_EMERG "Code: "); + printk(KERN_EMERG "\nCode: "); if (regs->pc < PAGE_OFFSET) goto bad; @@ -311,16 +309,14 @@ void die(const char *str, struct pt_regs *regs, enum exception_code code) { console_verbose(); spin_lock_irq(&die_lock); - printk(KERN_EMERG "\n" - KERN_EMERG "%s: %04x\n", + printk(KERN_EMERG "\n%s: %04x\n", str, code & 0xffff); show_registers(regs); if (regs->pc >= 0x02000000 && regs->pc < 0x04000000 && (regs->epsw & (EPSW_IM | EPSW_IE)) != (EPSW_IM | EPSW_IE)) { printk(KERN_EMERG "Exception in usermode interrupt handler\n"); - printk(KERN_EMERG "\n" - KERN_EMERG " Please connect to kernel debugger !!\n"); + printk(KERN_EMERG "\nPlease connect to kernel debugger !!\n"); asm volatile ("0: bra 0b"); } @@ -429,9 +425,8 @@ asmlinkage void io_bus_error(u32 bcberr, u32 bcbear, struct pt_regs *regs) { console_verbose(); - printk(KERN_EMERG "\n" - KERN_EMERG "Asynchronous I/O Bus Error\n" - KERN_EMERG "==========================\n"); + printk(KERN_EMERG "Asynchronous I/O Bus Error\n"); + printk(KERN_EMERG "==========================\n"); if (bcberr & BCBERR_BEME) printk(KERN_EMERG "- Multiple recorded errors\n"); diff --git a/arch/parisc/kernel/process.c b/arch/parisc/kernel/process.c index 61c07078c07..1f3aa8db020 100644 --- a/arch/parisc/kernel/process.c +++ b/arch/parisc/kernel/process.c @@ -156,7 +156,7 @@ void machine_power_off(void) * software. The user has to press the button himself. */ printk(KERN_EMERG "System shut down completed.\n" - KERN_EMERG "Please power this system off now."); + "Please power this system off now."); } void (*pm_power_off)(void) = machine_power_off; diff --git a/arch/parisc/kernel/traps.c b/arch/parisc/kernel/traps.c index c32f5d6d778..528f0ff9b27 100644 --- a/arch/parisc/kernel/traps.c +++ b/arch/parisc/kernel/traps.c @@ -250,15 +250,14 @@ void die_if_kernel(char *str, struct pt_regs *regs, long err) oops_enter(); /* Amuse the user in a SPARC fashion */ - if (err) printk( -KERN_CRIT " _______________________________ \n" -KERN_CRIT " < Your System ate a SPARC! Gah! >\n" -KERN_CRIT " ------------------------------- \n" -KERN_CRIT " \\ ^__^\n" -KERN_CRIT " \\ (xx)\\_______\n" -KERN_CRIT " (__)\\ )\\/\\\n" -KERN_CRIT " U ||----w |\n" -KERN_CRIT " || ||\n"); + if (err) printk(KERN_CRIT + " _______________________________ \n" + " < Your System ate a SPARC! Gah! >\n" + " ------------------------------- \n" + " \\ ^__^\n" + " (__)\\ )\\/\\\n" + " U ||----w |\n" + " || ||\n"); /* unlock the pdc lock if necessary */ pdc_emergency_unlock(); @@ -797,7 +796,8 @@ void notrace handle_interruption(int code, struct pt_regs *regs) else printk(KERN_DEBUG "User Fault (long pointer) (fault %d) ", code); - printk("pid=%d command='%s'\n", task_pid_nr(current), current->comm); + printk(KERN_CONT "pid=%d command='%s'\n", + task_pid_nr(current), current->comm); show_regs(regs); #endif si.si_signo = SIGSEGV; diff --git a/arch/um/kernel/sysrq.c b/arch/um/kernel/sysrq.c index 56d43d0a396..0960de54495 100644 --- a/arch/um/kernel/sysrq.c +++ b/arch/um/kernel/sysrq.c @@ -70,8 +70,8 @@ void show_stack(struct task_struct *task, unsigned long *esp) if (kstack_end(stack)) break; if (i && ((i % 8) == 0)) - printk("\n" KERN_INFO " "); - printk("%08lx ", *stack++); + printk(KERN_INFO " "); + printk(KERN_CONT "%08lx ", *stack++); } show_trace(task, esp); diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c index 8fd1efb5a0b..90b5e6efa93 100644 --- a/arch/x86/kernel/apic/io_apic.c +++ b/arch/x86/kernel/apic/io_apic.c @@ -1739,7 +1739,6 @@ __apicdebuginit(void) print_local_APIC(void *dummy) if (apic_verbosity == APIC_QUIET) return; - printk(KERN_DEBUG "\n"); printk(KERN_DEBUG "printing local APIC contents on CPU#%d/%d:\n", smp_processor_id(), hard_smp_processor_id()); v = apic_read(APIC_ID); diff --git a/arch/x86/kernel/cpu/cpufreq/powernow-k8.c b/arch/x86/kernel/cpu/cpufreq/powernow-k8.c index ae068f59603..2a50ef89100 100644 --- a/arch/x86/kernel/cpu/cpufreq/powernow-k8.c +++ b/arch/x86/kernel/cpu/cpufreq/powernow-k8.c @@ -1259,7 +1259,7 @@ static int __cpuinit powernowk8_cpu_init(struct cpufreq_policy *pol) { static const char ACPI_PSS_BIOS_BUG_MSG[] = KERN_ERR FW_BUG PFX "No compatible ACPI _PSS objects found.\n" - KERN_ERR FW_BUG PFX "Try again with latest BIOS.\n"; + FW_BUG PFX "Try again with latest BIOS.\n"; struct powernow_k8_data *data; struct init_on_cpu init_on_cpu; int rc; diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c index af425b83202..484c1e5f658 100644 --- a/arch/x86/kernel/cpu/mcheck/mce.c +++ b/arch/x86/kernel/cpu/mcheck/mce.c @@ -194,14 +194,14 @@ static void print_mce(struct mce *m) m->cs, m->ip); if (m->cs == __KERNEL_CS) print_symbol("{%s}", m->ip); - printk("\n"); + printk(KERN_CONT "\n"); } printk(KERN_EMERG "TSC %llx ", m->tsc); if (m->addr) - printk("ADDR %llx ", m->addr); + printk(KERN_CONT "ADDR %llx ", m->addr); if (m->misc) - printk("MISC %llx ", m->misc); - printk("\n"); + printk(KERN_CONT "MISC %llx ", m->misc); + printk(KERN_CONT "\n"); printk(KERN_EMERG "PROCESSOR %u:%x TIME %llu SOCKET %u APIC %x\n", m->cpuvendor, m->cpuid, m->time, m->socketid, m->apicid); @@ -209,13 +209,13 @@ static void print_mce(struct mce *m) static void print_mce_head(void) { - printk(KERN_EMERG "\n" KERN_EMERG "HARDWARE ERROR\n"); + printk(KERN_EMERG "\nHARDWARE ERROR\n"); } static void print_mce_tail(void) { printk(KERN_EMERG "This is not a software problem!\n" - KERN_EMERG "Run through mcelog --ascii to decode and contact your hardware vendor\n"); + "Run through mcelog --ascii to decode and contact your hardware vendor\n"); } #define PANIC_TIMEOUT 5 /* 5 seconds */ diff --git a/arch/x86/kernel/e820.c b/arch/x86/kernel/e820.c index c4ca89d9aaf..5cb5725b2ba 100644 --- a/arch/x86/kernel/e820.c +++ b/arch/x86/kernel/e820.c @@ -627,10 +627,9 @@ __init void e820_setup_gap(void) #ifdef CONFIG_X86_64 if (!found) { gapstart = (max_pfn << PAGE_SHIFT) + 1024*1024; - printk(KERN_ERR "PCI: Warning: Cannot find a gap in the 32bit " - "address range\n" - KERN_ERR "PCI: Unassigned devices with 32bit resource " - "registers may break!\n"); + printk(KERN_ERR + "PCI: Warning: Cannot find a gap in the 32bit address range\n" + "PCI: Unassigned devices with 32bit resource registers may break!\n"); } #endif diff --git a/arch/x86/kernel/pci-gart_64.c b/arch/x86/kernel/pci-gart_64.c index cfd9f906389..d2e56b8f48e 100644 --- a/arch/x86/kernel/pci-gart_64.c +++ b/arch/x86/kernel/pci-gart_64.c @@ -675,7 +675,7 @@ static __init int init_k8_gatt(struct agp_kern_info *info) nommu: /* Should not happen anymore */ printk(KERN_WARNING "PCI-DMA: More than 4GB of RAM and no IOMMU\n" - KERN_WARNING "falling back to iommu=soft.\n"); + "falling back to iommu=soft.\n"); return -1; } diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c index 78a5fff857b..85307cc6e45 100644 --- a/arch/x86/mm/fault.c +++ b/arch/x86/mm/fault.c @@ -426,10 +426,11 @@ static noinline int vmalloc_fault(unsigned long address) } static const char errata93_warning[] = -KERN_ERR "******* Your BIOS seems to not contain a fix for K8 errata #93\n" -KERN_ERR "******* Working around it, but it may cause SEGVs or burn power.\n" -KERN_ERR "******* Please consider a BIOS update.\n" -KERN_ERR "******* Disabling USB legacy in the BIOS may also help.\n"; +KERN_ERR +"******* Your BIOS seems to not contain a fix for K8 errata #93\n" +"******* Working around it, but it may cause SEGVs or burn power.\n" +"******* Please consider a BIOS update.\n" +"******* Disabling USB legacy in the BIOS may also help.\n"; /* * No vm86 mode in 64-bit mode: diff --git a/arch/xtensa/kernel/traps.c b/arch/xtensa/kernel/traps.c index ba9ab934978..e64efac3b9d 100644 --- a/arch/xtensa/kernel/traps.c +++ b/arch/xtensa/kernel/traps.c @@ -354,10 +354,10 @@ void show_regs(struct pt_regs * regs) for (i = 0; i < 16; i++) { if ((i % 8) == 0) - printk ("\n" KERN_INFO "a%02d: ", i); - printk("%08lx ", regs->areg[i]); + printk(KERN_INFO "a%02d:", i); + printk(KERN_CONT " %08lx", regs->areg[i]); } - printk("\n"); + printk(KERN_CONT "\n"); printk("pc: %08lx, ps: %08lx, depc: %08lx, excvaddr: %08lx\n", regs->pc, regs->ps, regs->depc, regs->excvaddr); diff --git a/drivers/block/amiflop.c b/drivers/block/amiflop.c index 9c6e5b0fe89..2f07b7c99a9 100644 --- a/drivers/block/amiflop.c +++ b/drivers/block/amiflop.c @@ -1645,7 +1645,7 @@ static int __init fd_probe_drives(void) { int drive,drives,nomem; - printk(KERN_INFO "FD: probing units\n" KERN_INFO "found "); + printk(KERN_INFO "FD: probing units\nfound "); drives=0; nomem=0; for(drive=0;drivedev, " ctrl: %.8x seccnt/cmd: %.4x ver:%.4x\n" - KERN_INFO " status:%.8x mpu_lba:%.8x busmode:%4x\n" - KERN_INFO " error: %.8x cfg_lba:%.8x fatstat:%.4x\n", + dev_info(ace->dev, + " ctrl: %.8x seccnt/cmd: %.4x ver:%.4x\n" + " status:%.8x mpu_lba:%.8x busmode:%4x\n" + " error: %.8x cfg_lba:%.8x fatstat:%.4x\n", ace_in32(ace, ACE_CTRL), ace_in(ace, ACE_SECCNTCMD), ace_in(ace, ACE_VERSION), diff --git a/drivers/char/hw_random/intel-rng.c b/drivers/char/hw_random/intel-rng.c index 5dcbe603eca..91b53eb1c05 100644 --- a/drivers/char/hw_random/intel-rng.c +++ b/drivers/char/hw_random/intel-rng.c @@ -305,10 +305,11 @@ static int __init intel_init_hw_struct(struct intel_rng_hw *intel_rng_hw, (BIOS_CNTL_LOCK_ENABLE_MASK|BIOS_CNTL_WRITE_ENABLE_MASK)) == BIOS_CNTL_LOCK_ENABLE_MASK) { static __initdata /*const*/ char warning[] = - KERN_WARNING PFX "Firmware space is locked read-only. If you can't or\n" - KERN_WARNING PFX "don't want to disable this in firmware setup, and if\n" - KERN_WARNING PFX "you are certain that your system has a functional\n" - KERN_WARNING PFX "RNG, try using the 'no_fwh_detect' option.\n"; + KERN_WARNING +PFX "Firmware space is locked read-only. If you can't or\n" +PFX "don't want to disable this in firmware setup, and if\n" +PFX "you are certain that your system has a functional\n" +PFX "RNG, try using the 'no_fwh_detect' option.\n"; if (no_fwh_detect) return -ENODEV; diff --git a/drivers/char/isicom.c b/drivers/char/isicom.c index 4159292e35c..621d1184673 100644 --- a/drivers/char/isicom.c +++ b/drivers/char/isicom.c @@ -1478,10 +1478,10 @@ static int __devinit load_firmware(struct pci_dev *pdev, status = inw(base + 0x4); if (status != 0) { dev_warn(&pdev->dev, "Card%d rejected load header:\n" - KERN_WARNING "Address:0x%x\n" - KERN_WARNING "Count:0x%x\n" - KERN_WARNING "Status:0x%x\n", - index + 1, frame->addr, frame->count, status); + "Address:0x%x\n" + "Count:0x%x\n" + "Status:0x%x\n", + index + 1, frame->addr, frame->count, status); goto errrelfw; } outsw(base, frame->data, word_count); @@ -1526,10 +1526,10 @@ static int __devinit load_firmware(struct pci_dev *pdev, status = inw(base + 0x4); if (status != 0) { dev_warn(&pdev->dev, "Card%d rejected verify header:\n" - KERN_WARNING "Address:0x%x\n" - KERN_WARNING "Count:0x%x\n" - KERN_WARNING "Status: 0x%x\n", - index + 1, frame->addr, frame->count, status); + "Address:0x%x\n" + "Count:0x%x\n" + "Status: 0x%x\n", + index + 1, frame->addr, frame->count, status); goto errrelfw; } diff --git a/drivers/i2c/busses/i2c-ibm_iic.c b/drivers/i2c/busses/i2c-ibm_iic.c index e4476743f20..b1bc6e277d2 100644 --- a/drivers/i2c/busses/i2c-ibm_iic.c +++ b/drivers/i2c/busses/i2c-ibm_iic.c @@ -85,10 +85,11 @@ static void dump_iic_regs(const char* header, struct ibm_iic_private* dev) { volatile struct iic_regs __iomem *iic = dev->vaddr; printk(KERN_DEBUG "ibm-iic%d: %s\n", dev->idx, header); - printk(KERN_DEBUG " cntl = 0x%02x, mdcntl = 0x%02x\n" - KERN_DEBUG " sts = 0x%02x, extsts = 0x%02x\n" - KERN_DEBUG " clkdiv = 0x%02x, xfrcnt = 0x%02x\n" - KERN_DEBUG " xtcntlss = 0x%02x, directcntl = 0x%02x\n", + printk(KERN_DEBUG + " cntl = 0x%02x, mdcntl = 0x%02x\n" + " sts = 0x%02x, extsts = 0x%02x\n" + " clkdiv = 0x%02x, xfrcnt = 0x%02x\n" + " xtcntlss = 0x%02x, directcntl = 0x%02x\n", in_8(&iic->cntl), in_8(&iic->mdcntl), in_8(&iic->sts), in_8(&iic->extsts), in_8(&iic->clkdiv), in_8(&iic->xfrcnt), in_8(&iic->xtcntlss), in_8(&iic->directcntl)); diff --git a/drivers/md/md.c b/drivers/md/md.c index 0f4a70c43ff..d4351ff0849 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -1756,9 +1756,10 @@ static void print_sb_1(struct mdp_superblock_1 *sb) __u8 *uuid; uuid = sb->set_uuid; - printk(KERN_INFO "md: SB: (V:%u) (F:0x%08x) Array-ID:<%02x%02x%02x%02x" - ":%02x%02x:%02x%02x:%02x%02x:%02x%02x%02x%02x%02x%02x>\n" - KERN_INFO "md: Name: \"%s\" CT:%llu\n", + printk(KERN_INFO + "md: SB: (V:%u) (F:0x%08x) Array-ID:<%02x%02x%02x%02x" + ":%02x%02x:%02x%02x:%02x%02x:%02x%02x%02x%02x%02x%02x>\n" + "md: Name: \"%s\" CT:%llu\n", le32_to_cpu(sb->major_version), le32_to_cpu(sb->feature_map), uuid[0], uuid[1], uuid[2], uuid[3], @@ -1770,12 +1771,13 @@ static void print_sb_1(struct mdp_superblock_1 *sb) & MD_SUPERBLOCK_1_TIME_SEC_MASK); uuid = sb->device_uuid; - printk(KERN_INFO "md: L%u SZ%llu RD:%u LO:%u CS:%u DO:%llu DS:%llu SO:%llu" + printk(KERN_INFO + "md: L%u SZ%llu RD:%u LO:%u CS:%u DO:%llu DS:%llu SO:%llu" " RO:%llu\n" - KERN_INFO "md: Dev:%08x UUID: %02x%02x%02x%02x:%02x%02x:%02x%02x:%02x%02x" - ":%02x%02x%02x%02x%02x%02x\n" - KERN_INFO "md: (F:0x%08x) UT:%llu Events:%llu ResyncOffset:%llu CSUM:0x%08x\n" - KERN_INFO "md: (MaxDev:%u) \n", + "md: Dev:%08x UUID: %02x%02x%02x%02x:%02x%02x:%02x%02x:%02x%02x" + ":%02x%02x%02x%02x%02x%02x\n" + "md: (F:0x%08x) UT:%llu Events:%llu ResyncOffset:%llu CSUM:0x%08x\n" + "md: (MaxDev:%u) \n", le32_to_cpu(sb->level), (unsigned long long)le64_to_cpu(sb->size), le32_to_cpu(sb->raid_disks), diff --git a/drivers/misc/sgi-xp/xpnet.c b/drivers/misc/sgi-xp/xpnet.c index 8d1c60a3f0d..5d778ec8cdb 100644 --- a/drivers/misc/sgi-xp/xpnet.c +++ b/drivers/misc/sgi-xp/xpnet.c @@ -235,7 +235,7 @@ xpnet_receive(short partid, int channel, struct xpnet_message *msg) skb->ip_summed = CHECKSUM_UNNECESSARY; dev_dbg(xpnet, "passing skb to network layer\n" - KERN_DEBUG "\tskb->head=0x%p skb->data=0x%p skb->tail=0x%p " + "\tskb->head=0x%p skb->data=0x%p skb->tail=0x%p " "skb->end=0x%p skb->len=%d\n", (void *)skb->head, (void *)skb->data, skb_tail_pointer(skb), skb_end_pointer(skb), skb->len); @@ -399,7 +399,7 @@ xpnet_send(struct sk_buff *skb, struct xpnet_pending_msg *queued_msg, msg->buf_pa = xp_pa((void *)start_addr); dev_dbg(xpnet, "sending XPC message to %d:%d\n" - KERN_DEBUG "msg->buf_pa=0x%lx, msg->size=%u, " + "msg->buf_pa=0x%lx, msg->size=%u, " "msg->leadin_ignore=%u, msg->tailout_ignore=%u\n", dest_partid, XPC_NET_CHANNEL, msg->buf_pa, msg->size, msg->leadin_ignore, msg->tailout_ignore); diff --git a/drivers/net/a2065.c b/drivers/net/a2065.c index 85a18175730..08787f5a22a 100644 --- a/drivers/net/a2065.c +++ b/drivers/net/a2065.c @@ -569,16 +569,8 @@ static int lance_start_xmit (struct sk_buff *skb, struct net_device *dev) #ifdef DEBUG_DRIVER /* dump the packet */ - { - int i; - - for (i = 0; i < 64; i++) { - if ((i % 16) == 0) - printk("\n" KERN_DEBUG); - printk ("%2.2x ", skb->data [i]); - } - printk("\n"); - } + print_hex_dump(KERN_DEBUG, "skb->data: ", DUMP_PREFIX_NONE, + 16, 1, skb->data, 64, true); #endif entry = lp->tx_new & lp->tx_ring_mod_mask; ib->btx_ring [entry].length = (-skblen) | 0xf000; diff --git a/drivers/net/arcnet/arcnet.c b/drivers/net/arcnet/arcnet.c index d6d4ab3b430..7d227cdab9f 100644 --- a/drivers/net/arcnet/arcnet.c +++ b/drivers/net/arcnet/arcnet.c @@ -158,15 +158,12 @@ module_exit(arcnet_exit); void arcnet_dump_skb(struct net_device *dev, struct sk_buff *skb, char *desc) { - int i; + char hdr[32]; - printk(KERN_DEBUG "%6s: skb dump (%s) follows:", dev->name, desc); - for (i = 0; i < skb->len; i++) { - if (i % 16 == 0) - printk("\n" KERN_DEBUG "[%04X] ", i); - printk("%02X ", ((u_char *) skb->data)[i]); - } - printk("\n"); + /* dump the packet */ + snprintf(hdr, sizeof(hdr), "%6s:%s skb->data:", dev->name, desc); + print_hex_dump(KERN_DEBUG, hdr, DUMP_PREFIX_OFFSET, + 16, 1, skb->data, skb->len, true); } EXPORT_SYMBOL(arcnet_dump_skb); @@ -184,6 +181,7 @@ static void arcnet_dump_packet(struct net_device *dev, int bufnum, int i, length; unsigned long flags = 0; static uint8_t buf[512]; + char hdr[32]; /* hw.copy_from_card expects IRQ context so take the IRQ lock to keep it single threaded */ @@ -197,14 +195,10 @@ static void arcnet_dump_packet(struct net_device *dev, int bufnum, /* if the offset[0] byte is nonzero, this is a 256-byte packet */ length = (buf[2] ? 256 : 512); - printk(KERN_DEBUG "%6s: packet dump (%s) follows:", dev->name, desc); - for (i = 0; i < length; i++) { - if (i % 16 == 0) - printk("\n" KERN_DEBUG "[%04X] ", i); - printk("%02X ", buf[i]); - } - printk("\n"); - + /* dump the packet */ + snprintf(hdr, sizeof(hdr), "%6s:%s packet dump:", dev->name, desc); + print_hex_dump(KERN_DEBUG, hdr, DUMP_PREFIX_OFFSET, + 16, 1, buf, length, true); } #else diff --git a/drivers/net/bmac.c b/drivers/net/bmac.c index 9578a3dfac0..a76315dc776 100644 --- a/drivers/net/bmac.c +++ b/drivers/net/bmac.c @@ -428,10 +428,11 @@ bmac_init_phy(struct net_device *dev) printk(KERN_DEBUG "phy registers:"); for (addr = 0; addr < 32; ++addr) { if ((addr & 7) == 0) - printk("\n" KERN_DEBUG); - printk(" %.4x", bmac_mif_read(dev, addr)); + printk(KERN_DEBUG); + printk(KERN_CONT " %.4x", bmac_mif_read(dev, addr)); } - printk("\n"); + print(KERN_CONT "\n"); + if (bp->is_bmac_plus) { unsigned int capable, ctrl; diff --git a/drivers/net/bnx2x_main.c b/drivers/net/bnx2x_main.c index 6c67be67976..c36a5f33739 100644 --- a/drivers/net/bnx2x_main.c +++ b/drivers/net/bnx2x_main.c @@ -484,8 +484,9 @@ static void bnx2x_fw_dump(struct bnx2x *bp) mark = REG_RD(bp, MCP_REG_MCPR_SCRATCH + 0xf104); mark = ((mark + 0x3) & ~0x3); - printk(KERN_ERR PFX "begin fw dump (mark 0x%x)\n" KERN_ERR, mark); + printk(KERN_ERR PFX "begin fw dump (mark 0x%x)\n", mark); + printk(KERN_ERR PFX); for (offset = mark - 0x08000000; offset <= 0xF900; offset += 0x8*4) { for (word = 0; word < 8; word++) data[word] = htonl(REG_RD(bp, MCP_REG_MCPR_SCRATCH + @@ -500,7 +501,7 @@ static void bnx2x_fw_dump(struct bnx2x *bp) data[8] = 0x0; printk(KERN_CONT "%s", (char *)data); } - printk("\n" KERN_ERR PFX "end of fw dump\n"); + printk(KERN_ERR PFX "end of fw dump\n"); } static void bnx2x_panic_dump(struct bnx2x *bp) @@ -7354,7 +7355,7 @@ static void bnx2x_reset_task(struct work_struct *work) #ifdef BNX2X_STOP_ON_ERROR BNX2X_ERR("reset task called but STOP_ON_ERROR defined" " so reset not done to allow debug dump,\n" - KERN_ERR " you will need to reboot when done\n"); + " you will need to reboot when done\n"); return; #endif diff --git a/drivers/net/dl2k.c b/drivers/net/dl2k.c index 895d72143ee..4b6a219fece 100644 --- a/drivers/net/dl2k.c +++ b/drivers/net/dl2k.c @@ -268,8 +268,9 @@ rio_probe1 (struct pci_dev *pdev, const struct pci_device_id *ent) printk(KERN_INFO "tx_coalesce:\t%d packets\n", tx_coalesce); if (np->coalesce) - printk(KERN_INFO "rx_coalesce:\t%d packets\n" - KERN_INFO "rx_timeout: \t%d ns\n", + printk(KERN_INFO + "rx_coalesce:\t%d packets\n" + "rx_timeout: \t%d ns\n", np->rx_coalesce, np->rx_timeout*640); if (np->vlan) printk(KERN_INFO "vlan(id):\t%d\n", np->vlan); @@ -1522,9 +1523,9 @@ mii_get_media (struct net_device *dev) printk (KERN_INFO "Operating at 10 Mbps, "); } if (bmcr & MII_BMCR_DUPLEX_MODE) { - printk ("Full duplex\n"); + printk (KERN_CONT "Full duplex\n"); } else { - printk ("Half duplex\n"); + printk (KERN_CONT "Half duplex\n"); } } if (np->tx_flow) @@ -1614,9 +1615,9 @@ mii_set_media (struct net_device *dev) } if (np->full_duplex) { bmcr |= MII_BMCR_DUPLEX_MODE; - printk ("Full duplex\n"); + printk (KERN_CONT "Full duplex\n"); } else { - printk ("Half duplex\n"); + printk (KERN_CONT "Half duplex\n"); } #if 0 /* Set 1000BaseT Master/Slave setting */ @@ -1669,9 +1670,9 @@ mii_get_media_pcs (struct net_device *dev) __u16 bmcr = mii_read (dev, phy_addr, PCS_BMCR); printk (KERN_INFO "Operating at 1000 Mbps, "); if (bmcr & MII_BMCR_DUPLEX_MODE) { - printk ("Full duplex\n"); + printk (KERN_CONT "Full duplex\n"); } else { - printk ("Half duplex\n"); + printk (KERN_CONT "Half duplex\n"); } } if (np->tx_flow) diff --git a/drivers/net/epic100.c b/drivers/net/epic100.c index b60e27dfcfa..88d7ebf3122 100644 --- a/drivers/net/epic100.c +++ b/drivers/net/epic100.c @@ -338,8 +338,7 @@ static int __devinit epic_init_one (struct pci_dev *pdev, #ifndef MODULE static int printed_version; if (!printed_version++) - printk (KERN_INFO "%s" KERN_INFO "%s", - version, version2); + printk(KERN_INFO "%s%s", version, version2); #endif card_idx++; @@ -1600,7 +1599,7 @@ static int __init epic_init (void) { /* when a module, this is printed whether or not devices are found in probe */ #ifdef MODULE - printk (KERN_INFO "%s" KERN_INFO "%s", + printk (KERN_INFO "%s%s", version, version2); #endif diff --git a/drivers/net/fealnx.c b/drivers/net/fealnx.c index 891be28a7d4..053fb49820b 100644 --- a/drivers/net/fealnx.c +++ b/drivers/net/fealnx.c @@ -1209,17 +1209,20 @@ static void fealnx_tx_timeout(struct net_device *dev) unsigned long flags; int i; - printk(KERN_WARNING "%s: Transmit timed out, status %8.8x," - " resetting...\n", dev->name, ioread32(ioaddr + ISR)); + printk(KERN_WARNING + "%s: Transmit timed out, status %8.8x, resetting...\n", + dev->name, ioread32(ioaddr + ISR)); { printk(KERN_DEBUG " Rx ring %p: ", np->rx_ring); for (i = 0; i < RX_RING_SIZE; i++) - printk(" %8.8x", (unsigned int) np->rx_ring[i].status); - printk("\n" KERN_DEBUG " Tx ring %p: ", np->tx_ring); + printk(PR_CONT " %8.8x", + (unsigned int) np->rx_ring[i].status); + printk(KERN_CONT "\n"); + printk(KERN_DEBUG " Tx ring %p: ", np->tx_ring); for (i = 0; i < TX_RING_SIZE; i++) - printk(" %4.4x", np->tx_ring[i].status); - printk("\n"); + printk(PR_CONT " %4.4x", np->tx_ring[i].status); + printk(PR_CONT "\n"); } spin_lock_irqsave(&np->lock, flags); diff --git a/drivers/net/hamachi.c b/drivers/net/hamachi.c index 9d5b62cb30f..d62378cbc14 100644 --- a/drivers/net/hamachi.c +++ b/drivers/net/hamachi.c @@ -173,8 +173,8 @@ static int tx_params[MAX_UNITS] = {-1, -1, -1, -1, -1, -1, -1, -1}; static const char version[] __devinitconst = KERN_INFO DRV_NAME ".c:v" DRV_VERSION " " DRV_RELDATE " Written by Donald Becker\n" -KERN_INFO " Some modifications by Eric kasten \n" -KERN_INFO " Further modifications by Keith Underwood \n"; +" Some modifications by Eric kasten \n" +" Further modifications by Keith Underwood \n"; /* IP_MF appears to be only defined in , however, @@ -1080,11 +1080,14 @@ static void hamachi_tx_timeout(struct net_device *dev) { printk(KERN_DEBUG " Rx ring %p: ", hmp->rx_ring); for (i = 0; i < RX_RING_SIZE; i++) - printk(" %8.8x", le32_to_cpu(hmp->rx_ring[i].status_n_length)); - printk("\n"KERN_DEBUG" Tx ring %p: ", hmp->tx_ring); + printk(KERN_CONT " %8.8x", + le32_to_cpu(hmp->rx_ring[i].status_n_length)); + printk(KERN_CONT "\n"); + printk(KERN_DEBUG" Tx ring %p: ", hmp->tx_ring); for (i = 0; i < TX_RING_SIZE; i++) - printk(" %4.4x", le32_to_cpu(hmp->tx_ring[i].status_n_length)); - printk("\n"); + printk(KERN_CONT " %4.4x", + le32_to_cpu(hmp->tx_ring[i].status_n_length)); + printk(KERN_CONT "\n"); } /* Reinit the hardware and make sure the Rx and Tx processes @@ -1753,13 +1756,13 @@ static int hamachi_close(struct net_device *dev) #ifdef __i386__ if (hamachi_debug > 2) { - printk("\n"KERN_DEBUG" Tx ring at %8.8x:\n", + printk(KERN_DEBUG " Tx ring at %8.8x:\n", (int)hmp->tx_ring_dma); for (i = 0; i < TX_RING_SIZE; i++) - printk(" %c #%d desc. %8.8x %8.8x.\n", + printk(KERN_DEBUG " %c #%d desc. %8.8x %8.8x.\n", readl(ioaddr + TxCurPtr) == (long)&hmp->tx_ring[i] ? '>' : ' ', i, hmp->tx_ring[i].status_n_length, hmp->tx_ring[i].addr); - printk("\n"KERN_DEBUG " Rx ring %8.8x:\n", + printk(KERN_DEBUG " Rx ring %8.8x:\n", (int)hmp->rx_ring_dma); for (i = 0; i < RX_RING_SIZE; i++) { printk(KERN_DEBUG " %c #%d desc. %4.4x %8.8x\n", @@ -1770,7 +1773,7 @@ static int hamachi_close(struct net_device *dev) u16 *addr = (u16 *) hmp->rx_skbuff[i]->data; int j; - + printk(KERN_DEBUG "Addr: "); for (j = 0; j < 0x50; j++) printk(" %4.4x", addr[j]); printk("\n"); diff --git a/drivers/net/hamradio/baycom_epp.c b/drivers/net/hamradio/baycom_epp.c index 5e4b7afd068..352703255bb 100644 --- a/drivers/net/hamradio/baycom_epp.c +++ b/drivers/net/hamradio/baycom_epp.c @@ -68,7 +68,7 @@ static const char paranoia_str[] = KERN_ERR static const char bc_drvname[] = "baycom_epp"; static const char bc_drvinfo[] = KERN_INFO "baycom_epp: (C) 1998-2000 Thomas Sailer, HB9JNX/AE4WA\n" -KERN_INFO "baycom_epp: version 0.7 compiled " __TIME__ " " __DATE__ "\n"; +"baycom_epp: version 0.7 compiled " __TIME__ " " __DATE__ "\n"; /* --------------------------------------------------------------------- */ diff --git a/drivers/net/hamradio/baycom_par.c b/drivers/net/hamradio/baycom_par.c index 2e6fc4dc74b..5f5af9a606f 100644 --- a/drivers/net/hamradio/baycom_par.c +++ b/drivers/net/hamradio/baycom_par.c @@ -102,7 +102,7 @@ static const char bc_drvname[] = "baycom_par"; static const char bc_drvinfo[] = KERN_INFO "baycom_par: (C) 1996-2000 Thomas Sailer, HB9JNX/AE4WA\n" -KERN_INFO "baycom_par: version 0.9 compiled " __TIME__ " " __DATE__ "\n"; +"baycom_par: version 0.9 compiled " __TIME__ " " __DATE__ "\n"; /* --------------------------------------------------------------------- */ diff --git a/drivers/net/hamradio/baycom_ser_fdx.c b/drivers/net/hamradio/baycom_ser_fdx.c index b6a816e60c0..aa4488e871b 100644 --- a/drivers/net/hamradio/baycom_ser_fdx.c +++ b/drivers/net/hamradio/baycom_ser_fdx.c @@ -91,7 +91,7 @@ static const char bc_drvname[] = "baycom_ser_fdx"; static const char bc_drvinfo[] = KERN_INFO "baycom_ser_fdx: (C) 1996-2000 Thomas Sailer, HB9JNX/AE4WA\n" -KERN_INFO "baycom_ser_fdx: version 0.10 compiled " __TIME__ " " __DATE__ "\n"; +"baycom_ser_fdx: version 0.10 compiled " __TIME__ " " __DATE__ "\n"; /* --------------------------------------------------------------------- */ diff --git a/drivers/net/hamradio/baycom_ser_hdx.c b/drivers/net/hamradio/baycom_ser_hdx.c index 3bcc57acbe6..88c59359602 100644 --- a/drivers/net/hamradio/baycom_ser_hdx.c +++ b/drivers/net/hamradio/baycom_ser_hdx.c @@ -79,7 +79,7 @@ static const char bc_drvname[] = "baycom_ser_hdx"; static const char bc_drvinfo[] = KERN_INFO "baycom_ser_hdx: (C) 1996-2000 Thomas Sailer, HB9JNX/AE4WA\n" -KERN_INFO "baycom_ser_hdx: version 0.10 compiled " __TIME__ " " __DATE__ "\n"; +"baycom_ser_hdx: version 0.10 compiled " __TIME__ " " __DATE__ "\n"; /* --------------------------------------------------------------------- */ diff --git a/drivers/net/natsemi.c b/drivers/net/natsemi.c index c9bfe4eea18..78c088331f5 100644 --- a/drivers/net/natsemi.c +++ b/drivers/net/natsemi.c @@ -130,8 +130,8 @@ static int full_duplex[MAX_UNITS]; static const char version[] __devinitconst = KERN_INFO DRV_NAME " dp8381x driver, version " DRV_VERSION ", " DRV_RELDATE "\n" - KERN_INFO " originally by Donald Becker \n" - KERN_INFO " 2.4.x kernel port by Jeff Garzik, Tjeerd Mulder\n"; + " originally by Donald Becker \n" + " 2.4.x kernel port by Jeff Garzik, Tjeerd Mulder\n"; MODULE_AUTHOR("Donald Becker "); MODULE_DESCRIPTION("National Semiconductor DP8381x series PCI Ethernet driver"); diff --git a/drivers/net/ne.c b/drivers/net/ne.c index 5c3e242428f..992dbfffdb0 100644 --- a/drivers/net/ne.c +++ b/drivers/net/ne.c @@ -321,7 +321,7 @@ static int __init ne_probe1(struct net_device *dev, unsigned long ioaddr) } if (ei_debug && version_printed++ == 0) - printk(KERN_INFO "%s" KERN_INFO "%s", version1, version2); + printk(KERN_INFO "%s%s", version1, version2); printk(KERN_INFO "NE*000 ethercard probe at %#3lx:", ioaddr); diff --git a/drivers/net/pci-skeleton.c b/drivers/net/pci-skeleton.c index 8c1f6988f39..89f7b2ad523 100644 --- a/drivers/net/pci-skeleton.c +++ b/drivers/net/pci-skeleton.c @@ -105,7 +105,7 @@ IVc. Errata static char version[] __devinitdata = KERN_INFO NETDRV_DRIVER_LOAD_MSG "\n" -KERN_INFO " Support available from http://foo.com/bar/baz.html\n"; +" Support available from http://foo.com/bar/baz.html\n"; /* define to 1 to enable PIO instead of MMIO */ #undef USE_IO_OPS diff --git a/drivers/net/pcmcia/ibmtr_cs.c b/drivers/net/pcmcia/ibmtr_cs.c index f51944b28cf..06618af1a46 100644 --- a/drivers/net/pcmcia/ibmtr_cs.c +++ b/drivers/net/pcmcia/ibmtr_cs.c @@ -298,14 +298,11 @@ static int __devinit ibmtr_config(struct pcmcia_device *link) strcpy(info->node.dev_name, dev->name); - printk(KERN_INFO "%s: port %#3lx, irq %d,", - dev->name, dev->base_addr, dev->irq); - printk (" mmio %#5lx,", (u_long)ti->mmio); - printk (" sram %#5lx,", (u_long)ti->sram_base << 12); - printk ("\n" KERN_INFO " hwaddr="); - for (i = 0; i < TR_ALEN; i++) - printk("%02X", dev->dev_addr[i]); - printk("\n"); + printk(KERN_INFO + "%s: port %#3lx, irq %d, mmio %#5lx, sram %#5lx, hwaddr=%pM\n", + dev->name, dev->base_addr, dev->irq, + (u_long)ti->mmio, (u_long)(ti->sram_base << 12), + dev->dev_addr); return 0; cs_failed: diff --git a/drivers/net/pcmcia/nmclan_cs.c b/drivers/net/pcmcia/nmclan_cs.c index 02ef63ed1f9..36de91baf23 100644 --- a/drivers/net/pcmcia/nmclan_cs.c +++ b/drivers/net/pcmcia/nmclan_cs.c @@ -1425,15 +1425,12 @@ static void BuildLAF(int *ladrf, int *adr) ladrf[byte] |= (1 << (hashcode & 7)); #ifdef PCMCIA_DEBUG - if (pc_debug > 2) { - printk(KERN_DEBUG " adr ="); - for (i = 0; i < 6; i++) - printk(" %02X", adr[i]); - printk("\n" KERN_DEBUG " hashcode = %d(decimal), ladrf[0:63]" - " =", hashcode); - for (i = 0; i < 8; i++) - printk(" %02X", ladrf[i]); - printk("\n"); + if (pc_debug > 2) + printk(KERN_DEBUG " adr =%pM\n", adr); + printk(KERN_DEBUG " hashcode = %d(decimal), ladrf[0:63] =", hashcode); + for (i = 0; i < 8; i++) + printk(KERN_CONT " %02X", ladrf[i]); + printk(KERN_CONT "\n"); } #endif } /* BuildLAF */ diff --git a/drivers/net/pcnet32.c b/drivers/net/pcnet32.c index 1c35e1d637a..28368157dac 100644 --- a/drivers/net/pcnet32.c +++ b/drivers/net/pcnet32.c @@ -485,7 +485,7 @@ static void pcnet32_realloc_tx_ring(struct net_device *dev, &new_ring_dma_addr); if (new_tx_ring == NULL) { if (netif_msg_drv(lp)) - printk("\n" KERN_ERR + printk(KERN_ERR "%s: Consistent memory allocation failed.\n", dev->name); return; @@ -496,7 +496,7 @@ static void pcnet32_realloc_tx_ring(struct net_device *dev, GFP_ATOMIC); if (!new_dma_addr_list) { if (netif_msg_drv(lp)) - printk("\n" KERN_ERR + printk(KERN_ERR "%s: Memory allocation failed.\n", dev->name); goto free_new_tx_ring; } @@ -505,7 +505,7 @@ static void pcnet32_realloc_tx_ring(struct net_device *dev, GFP_ATOMIC); if (!new_skb_list) { if (netif_msg_drv(lp)) - printk("\n" KERN_ERR + printk(KERN_ERR "%s: Memory allocation failed.\n", dev->name); goto free_new_lists; } @@ -563,7 +563,7 @@ static void pcnet32_realloc_rx_ring(struct net_device *dev, &new_ring_dma_addr); if (new_rx_ring == NULL) { if (netif_msg_drv(lp)) - printk("\n" KERN_ERR + printk(KERN_ERR "%s: Consistent memory allocation failed.\n", dev->name); return; @@ -574,7 +574,7 @@ static void pcnet32_realloc_rx_ring(struct net_device *dev, GFP_ATOMIC); if (!new_dma_addr_list) { if (netif_msg_drv(lp)) - printk("\n" KERN_ERR + printk(KERN_ERR "%s: Memory allocation failed.\n", dev->name); goto free_new_rx_ring; } @@ -583,7 +583,7 @@ static void pcnet32_realloc_rx_ring(struct net_device *dev, GFP_ATOMIC); if (!new_skb_list) { if (netif_msg_drv(lp)) - printk("\n" KERN_ERR + printk(KERN_ERR "%s: Memory allocation failed.\n", dev->name); goto free_new_lists; } @@ -1766,38 +1766,38 @@ pcnet32_probe1(unsigned long ioaddr, int shared, struct pci_dev *pdev) /* Version 0x2623 and 0x2624 */ if (((chip_version + 1) & 0xfffe) == 0x2624) { i = a->read_csr(ioaddr, 80) & 0x0C00; /* Check tx_start_pt */ - printk("\n" KERN_INFO " tx_start_pt(0x%04x):", i); + printk(KERN_INFO " tx_start_pt(0x%04x):", i); switch (i >> 10) { case 0: - printk(" 20 bytes,"); + printk(KERN_CONT " 20 bytes,"); break; case 1: - printk(" 64 bytes,"); + printk(KERN_CONT " 64 bytes,"); break; case 2: - printk(" 128 bytes,"); + printk(KERN_CONT " 128 bytes,"); break; case 3: - printk("~220 bytes,"); + printk(KERN_CONT "~220 bytes,"); break; } i = a->read_bcr(ioaddr, 18); /* Check Burst/Bus control */ - printk(" BCR18(%x):", i & 0xffff); + printk(KERN_CONT " BCR18(%x):", i & 0xffff); if (i & (1 << 5)) - printk("BurstWrEn "); + printk(KERN_CONT "BurstWrEn "); if (i & (1 << 6)) - printk("BurstRdEn "); + printk(KERN_CONT "BurstRdEn "); if (i & (1 << 7)) - printk("DWordIO "); + printk(KERN_CONT "DWordIO "); if (i & (1 << 11)) - printk("NoUFlow "); + printk(KERN_CONT "NoUFlow "); i = a->read_bcr(ioaddr, 25); - printk("\n" KERN_INFO " SRAMSIZE=0x%04x,", i << 8); + printk(KERN_INFO " SRAMSIZE=0x%04x,", i << 8); i = a->read_bcr(ioaddr, 26); - printk(" SRAM_BND=0x%04x,", i << 8); + printk(KERN_CONT " SRAM_BND=0x%04x,", i << 8); i = a->read_bcr(ioaddr, 27); if (i & (1 << 14)) - printk("LowLatRx"); + printk(KERN_CONT "LowLatRx"); } } @@ -1996,7 +1996,7 @@ static int pcnet32_alloc_ring(struct net_device *dev, const char *name) &lp->tx_ring_dma_addr); if (lp->tx_ring == NULL) { if (netif_msg_drv(lp)) - printk("\n" KERN_ERR PFX + printk(KERN_ERR PFX "%s: Consistent memory allocation failed.\n", name); return -ENOMEM; @@ -2008,7 +2008,7 @@ static int pcnet32_alloc_ring(struct net_device *dev, const char *name) &lp->rx_ring_dma_addr); if (lp->rx_ring == NULL) { if (netif_msg_drv(lp)) - printk("\n" KERN_ERR PFX + printk(KERN_ERR PFX "%s: Consistent memory allocation failed.\n", name); return -ENOMEM; @@ -2018,7 +2018,7 @@ static int pcnet32_alloc_ring(struct net_device *dev, const char *name) GFP_ATOMIC); if (!lp->tx_dma_addr) { if (netif_msg_drv(lp)) - printk("\n" KERN_ERR PFX + printk(KERN_ERR PFX "%s: Memory allocation failed.\n", name); return -ENOMEM; } @@ -2027,7 +2027,7 @@ static int pcnet32_alloc_ring(struct net_device *dev, const char *name) GFP_ATOMIC); if (!lp->rx_dma_addr) { if (netif_msg_drv(lp)) - printk("\n" KERN_ERR PFX + printk(KERN_ERR PFX "%s: Memory allocation failed.\n", name); return -ENOMEM; } @@ -2036,7 +2036,7 @@ static int pcnet32_alloc_ring(struct net_device *dev, const char *name) GFP_ATOMIC); if (!lp->tx_skbuff) { if (netif_msg_drv(lp)) - printk("\n" KERN_ERR PFX + printk(KERN_ERR PFX "%s: Memory allocation failed.\n", name); return -ENOMEM; } @@ -2045,7 +2045,7 @@ static int pcnet32_alloc_ring(struct net_device *dev, const char *name) GFP_ATOMIC); if (!lp->rx_skbuff) { if (netif_msg_drv(lp)) - printk("\n" KERN_ERR PFX + printk(KERN_ERR PFX "%s: Memory allocation failed.\n", name); return -ENOMEM; } diff --git a/drivers/net/starfire.c b/drivers/net/starfire.c index 838cce8b8ff..669253c7bd4 100644 --- a/drivers/net/starfire.c +++ b/drivers/net/starfire.c @@ -180,7 +180,7 @@ static int full_duplex[MAX_UNITS] = {0, }; /* These identify the driver base version and may not be removed. */ static const char version[] __devinitconst = KERN_INFO "starfire.c:v1.03 7/26/2000 Written by Donald Becker \n" -KERN_INFO " (unofficial 2.2/2.4 kernel port, version " DRV_VERSION ", " DRV_RELDATE ")\n"; +" (unofficial 2.2/2.4 kernel port, version " DRV_VERSION ", " DRV_RELDATE ")\n"; MODULE_AUTHOR("Donald Becker "); MODULE_DESCRIPTION("Adaptec Starfire Ethernet driver"); diff --git a/drivers/net/sundance.c b/drivers/net/sundance.c index 545f81b34ad..d1521c3875b 100644 --- a/drivers/net/sundance.c +++ b/drivers/net/sundance.c @@ -1698,13 +1698,13 @@ static int netdev_close(struct net_device *dev) #ifdef __i386__ if (netif_msg_hw(np)) { - printk("\n"KERN_DEBUG" Tx ring at %8.8x:\n", + printk(KERN_DEBUG " Tx ring at %8.8x:\n", (int)(np->tx_ring_dma)); for (i = 0; i < TX_RING_SIZE; i++) - printk(" #%d desc. %4.4x %8.8x %8.8x.\n", + printk(KERN_DEBUG " #%d desc. %4.4x %8.8x %8.8x.\n", i, np->tx_ring[i].status, np->tx_ring[i].frag[0].addr, np->tx_ring[i].frag[0].length); - printk("\n"KERN_DEBUG " Rx ring %8.8x:\n", + printk(KERN_DEBUG " Rx ring %8.8x:\n", (int)(np->rx_ring_dma)); for (i = 0; i < /*RX_RING_SIZE*/4 ; i++) { printk(KERN_DEBUG " #%d desc. %4.4x %4.4x %8.8x\n", diff --git a/drivers/net/tsi108_eth.c b/drivers/net/tsi108_eth.c index 0f78f99f9b2..7030bd5e984 100644 --- a/drivers/net/tsi108_eth.c +++ b/drivers/net/tsi108_eth.c @@ -1132,7 +1132,9 @@ static int tsi108_get_mac(struct net_device *dev) } if (!is_valid_ether_addr(dev->dev_addr)) { - printk("KERN_ERR: word1: %08x, word2: %08x\n", word1, word2); + printk(KERN_ERR + "%s: Invalid MAC address. word1: %08x, word2: %08x\n", + dev->name, word1, word2); return -EINVAL; } @@ -1201,8 +1203,8 @@ static void tsi108_set_rx_mode(struct net_device *dev) __set_bit(hash, &data->mc_hash[0]); } else { printk(KERN_ERR - "%s: got multicast address of length %d " - "instead of 6.\n", dev->name, + "%s: got multicast address of length %d instead of 6.\n", + dev->name, mc->dmi_addrlen); } diff --git a/drivers/net/tulip/de2104x.c b/drivers/net/tulip/de2104x.c index 81f054dbb88..ef49744a508 100644 --- a/drivers/net/tulip/de2104x.c +++ b/drivers/net/tulip/de2104x.c @@ -944,9 +944,10 @@ static void de_set_media (struct de_private *de) macmode &= ~FullDuplex; if (netif_msg_link(de)) { - printk(KERN_INFO "%s: set link %s\n" - KERN_INFO "%s: mode 0x%x, sia 0x%x,0x%x,0x%x,0x%x\n" - KERN_INFO "%s: set mode 0x%x, set sia 0x%x,0x%x,0x%x\n", + printk(KERN_INFO + "%s: set link %s\n" + "%s: mode 0x%x, sia 0x%x,0x%x,0x%x,0x%x\n" + "%s: set mode 0x%x, set sia 0x%x,0x%x,0x%x\n", de->dev->name, media_name[media], de->dev->name, dr32(MacMode), dr32(SIAStatus), dr32(CSR13), dr32(CSR14), dr32(CSR15), diff --git a/drivers/net/tulip/tulip_core.c b/drivers/net/tulip/tulip_core.c index 2abb5d3becc..99a63649f4f 100644 --- a/drivers/net/tulip/tulip_core.c +++ b/drivers/net/tulip/tulip_core.c @@ -570,16 +570,18 @@ static void tulip_tx_timeout(struct net_device *dev) (unsigned int)tp->rx_ring[i].buffer2, buf[0], buf[1], buf[2]); for (j = 0; buf[j] != 0xee && j < 1600; j++) - if (j < 100) printk(" %2.2x", buf[j]); - printk(" j=%d.\n", j); + if (j < 100) + printk(KERN_CONT " %2.2x", buf[j]); + printk(KERN_CONT " j=%d.\n", j); } printk(KERN_DEBUG " Rx ring %8.8x: ", (int)tp->rx_ring); for (i = 0; i < RX_RING_SIZE; i++) - printk(" %8.8x", (unsigned int)tp->rx_ring[i].status); - printk("\n" KERN_DEBUG " Tx ring %8.8x: ", (int)tp->tx_ring); + printk(KERN_CONT " %8.8x", + (unsigned int)tp->rx_ring[i].status); + printk(KERN_DEBUG " Tx ring %8.8x: ", (int)tp->tx_ring); for (i = 0; i < TX_RING_SIZE; i++) - printk(" %8.8x", (unsigned int)tp->tx_ring[i].status); - printk("\n"); + printk(KERN_CONT " %8.8x", (unsigned int)tp->tx_ring[i].status); + printk(KERN_CONT "\n"); } #endif diff --git a/drivers/net/tulip/winbond-840.c b/drivers/net/tulip/winbond-840.c index 842b1a2c40d..0f15773dae5 100644 --- a/drivers/net/tulip/winbond-840.c +++ b/drivers/net/tulip/winbond-840.c @@ -142,7 +142,7 @@ static int full_duplex[MAX_UNITS] = {-1, -1, -1, -1, -1, -1, -1, -1}; static const char version[] __initconst = KERN_INFO DRV_NAME ".c:v" DRV_VERSION " (2.4 port) " DRV_RELDATE " Donald Becker \n" - KERN_INFO " http://www.scyld.com/network/drivers.html\n"; + " http://www.scyld.com/network/drivers.html\n"; MODULE_AUTHOR("Donald Becker "); MODULE_DESCRIPTION("Winbond W89c840 Ethernet driver"); @@ -939,7 +939,7 @@ static void tx_timeout(struct net_device *dev) printk(KERN_DEBUG " Rx ring %p: ", np->rx_ring); for (i = 0; i < RX_RING_SIZE; i++) printk(" %8.8x", (unsigned int)np->rx_ring[i].status); - printk("\n"KERN_DEBUG" Tx ring %p: ", np->tx_ring); + printk(KERN_DEBUG" Tx ring %p: ", np->tx_ring); for (i = 0; i < TX_RING_SIZE; i++) printk(" %8.8x", np->tx_ring[i].status); printk("\n"); @@ -1520,7 +1520,7 @@ static int netdev_close(struct net_device *dev) printk(KERN_DEBUG " #%d desc. %4.4x %4.4x %8.8x.\n", i, np->tx_ring[i].length, np->tx_ring[i].status, np->tx_ring[i].buffer1); - printk("\n"KERN_DEBUG " Rx ring %8.8x:\n", + printk(KERN_DEBUG " Rx ring %8.8x:\n", (int)np->rx_ring); for (i = 0; i < RX_RING_SIZE; i++) { printk(KERN_DEBUG " #%d desc. %4.4x %4.4x %8.8x\n", diff --git a/drivers/net/wan/hd64570.c b/drivers/net/wan/hd64570.c index 223238de475..1ea1ef6c3b9 100644 --- a/drivers/net/wan/hd64570.c +++ b/drivers/net/wan/hd64570.c @@ -584,8 +584,9 @@ static void sca_dump_rings(struct net_device *dev) sca_in(DSR_RX(phy_node(port)), card) & DSR_DE ? "" : "in"); for (cnt = 0; cnt < port_to_card(port)->rx_ring_buffers; cnt++) printk(" %02X", readb(&(desc_address(port, cnt, 0)->stat))); + printk(KERN_CONT "\n"); - printk("\n" KERN_DEBUG "TX ring: CDA=%u EDA=%u DSR=%02X in=%u " + printk(KERN_DEBUG "TX ring: CDA=%u EDA=%u DSR=%02X in=%u " "last=%u %sactive", sca_inw(get_dmac_tx(port) + CDAL, card), sca_inw(get_dmac_tx(port) + EDAL, card), diff --git a/drivers/net/wan/hd64572.c b/drivers/net/wan/hd64572.c index 497b003d723..f099c34a3ae 100644 --- a/drivers/net/wan/hd64572.c +++ b/drivers/net/wan/hd64572.c @@ -529,8 +529,9 @@ static void sca_dump_rings(struct net_device *dev) sca_in(DSR_RX(port->chan), card) & DSR_DE ? "" : "in"); for (cnt = 0; cnt < port->card->rx_ring_buffers; cnt++) printk(" %02X", readb(&(desc_address(port, cnt, 0)->stat))); + printk(KERN_CONT "\n"); - printk("\n" KERN_DEBUG "TX ring: CDA=%u EDA=%u DSR=%02X in=%u " + printk(KERN_DEBUG "TX ring: CDA=%u EDA=%u DSR=%02X in=%u " "last=%u %sactive", sca_inl(get_dmac_tx(port) + CDAL, card), sca_inl(get_dmac_tx(port) + EDAL, card), diff --git a/drivers/net/wan/sbni.c b/drivers/net/wan/sbni.c index 3fb9dbc88a1..d14e95a08d6 100644 --- a/drivers/net/wan/sbni.c +++ b/drivers/net/wan/sbni.c @@ -326,11 +326,9 @@ sbni_pci_probe( struct net_device *dev ) } if (pci_irq_line <= 0 || pci_irq_line >= nr_irqs) - printk( KERN_WARNING " WARNING: The PCI BIOS assigned " - "this PCI card to IRQ %d, which is unlikely " - "to work!.\n" - KERN_WARNING " You should use the PCI BIOS " - "setup to assign a valid IRQ line.\n", + printk( KERN_WARNING + " WARNING: The PCI BIOS assigned this PCI card to IRQ %d, which is unlikely to work!.\n" + " You should use the PCI BIOS setup to assign a valid IRQ line.\n", pci_irq_line ); /* avoiding re-enable dual adapters */ diff --git a/drivers/net/wireless/ray_cs.c b/drivers/net/wireless/ray_cs.c index b10b0383dfa..698b11b1cad 100644 --- a/drivers/net/wireless/ray_cs.c +++ b/drivers/net/wireless/ray_cs.c @@ -2427,11 +2427,10 @@ static void untranslate(ray_dev_t *local, struct sk_buff *skb, int len) #ifdef PCMCIA_DEBUG if (pc_debug > 3) { - int i; - printk(KERN_DEBUG "skb->data before untranslate"); - for (i = 0; i < 64; i++) - printk("%02x ", skb->data[i]); - printk("\n" KERN_DEBUG + print_hex_dump(KERN_DEBUG, "skb->data before untranslate: ", + DUMP_PREFIX_NONE, 16, 1, + skb->data, 64, true); + printk(KERN_DEBUG "type = %08x, xsap = %02x%02x%02x, org = %02x02x02x\n", ntohs(type), psnap->dsap, psnap->ssap, psnap->ctrl, psnap->org[0], psnap->org[1], psnap->org[2]); diff --git a/drivers/net/wireless/wavelan_cs.c b/drivers/net/wireless/wavelan_cs.c index 6af706408ac..c6d300666ad 100644 --- a/drivers/net/wireless/wavelan_cs.c +++ b/drivers/net/wireless/wavelan_cs.c @@ -3556,17 +3556,8 @@ wv_82593_config(struct net_device * dev) cfblk.rcvstop = TRUE; /* Enable Receive Stop Register */ #ifdef DEBUG_I82593_SHOW - { - u_char *c = (u_char *) &cfblk; - int i; - printk(KERN_DEBUG "wavelan_cs: config block:"); - for(i = 0; i < sizeof(struct i82593_conf_block); i++,c++) - { - if((i % 16) == 0) printk("\n" KERN_DEBUG); - printk("%02x ", *c); - } - printk("\n"); - } + print_hex_dump(KERN_DEBUG, "wavelan_cs: config block: ", DUMP_PREFIX_NONE, + 16, 1, &cfblk, sizeof(struct i82593_conf_block), false); #endif /* Copy the config block to the i82593 */ diff --git a/drivers/net/yellowfin.c b/drivers/net/yellowfin.c index 3c7a5053f1d..a07580138e8 100644 --- a/drivers/net/yellowfin.c +++ b/drivers/net/yellowfin.c @@ -109,7 +109,7 @@ static int gx_fix; /* These identify the driver base version and may not be removed. */ static const char version[] __devinitconst = KERN_INFO DRV_NAME ".c:v1.05 1/09/2001 Written by Donald Becker \n" - KERN_INFO " (unofficial 2.4.x port, " DRV_VERSION ", " DRV_RELDATE ")\n"; + " (unofficial 2.4.x port, " DRV_VERSION ", " DRV_RELDATE ")\n"; MODULE_AUTHOR("Donald Becker "); MODULE_DESCRIPTION("Packet Engines Yellowfin G-NIC Gigabit Ethernet driver"); @@ -700,12 +700,15 @@ static void yellowfin_tx_timeout(struct net_device *dev) int i; printk(KERN_WARNING " Rx ring %p: ", yp->rx_ring); for (i = 0; i < RX_RING_SIZE; i++) - printk(" %8.8x", yp->rx_ring[i].result_status); - printk("\n"KERN_WARNING" Tx ring %p: ", yp->tx_ring); + printk(KERN_CONT " %8.8x", + yp->rx_ring[i].result_status); + printk(KERN_CONT "\n"); + printk(KERN_WARNING" Tx ring %p: ", yp->tx_ring); for (i = 0; i < TX_RING_SIZE; i++) - printk(" %4.4x /%8.8x", yp->tx_status[i].tx_errs, - yp->tx_ring[i].result_status); - printk("\n"); + printk(KERN_CONT " %4.4x /%8.8x", + yp->tx_status[i].tx_errs, + yp->tx_ring[i].result_status); + printk(KERN_CONT "\n"); } /* If the hardware is found to hang regularly, we will update the code @@ -1216,20 +1219,20 @@ static int yellowfin_close(struct net_device *dev) #if defined(__i386__) if (yellowfin_debug > 2) { - printk("\n"KERN_DEBUG" Tx ring at %8.8llx:\n", + printk(KERN_DEBUG" Tx ring at %8.8llx:\n", (unsigned long long)yp->tx_ring_dma); for (i = 0; i < TX_RING_SIZE*2; i++) - printk(" %c #%d desc. %8.8x %8.8x %8.8x %8.8x.\n", + printk(KERN_DEBUG " %c #%d desc. %8.8x %8.8x %8.8x %8.8x.\n", ioread32(ioaddr + TxPtr) == (long)&yp->tx_ring[i] ? '>' : ' ', i, yp->tx_ring[i].dbdma_cmd, yp->tx_ring[i].addr, yp->tx_ring[i].branch_addr, yp->tx_ring[i].result_status); printk(KERN_DEBUG " Tx status %p:\n", yp->tx_status); for (i = 0; i < TX_RING_SIZE; i++) - printk(" #%d status %4.4x %4.4x %4.4x %4.4x.\n", + printk(KERN_DEBUG " #%d status %4.4x %4.4x %4.4x %4.4x.\n", i, yp->tx_status[i].tx_cnt, yp->tx_status[i].tx_errs, yp->tx_status[i].total_tx_cnt, yp->tx_status[i].paused); - printk("\n"KERN_DEBUG " Rx ring %8.8llx:\n", + printk(KERN_DEBUG " Rx ring %8.8llx:\n", (unsigned long long)yp->rx_ring_dma); for (i = 0; i < RX_RING_SIZE; i++) { printk(KERN_DEBUG " %c #%d desc. %8.8x %8.8x %8.8x\n", diff --git a/drivers/parisc/eisa_enumerator.c b/drivers/parisc/eisa_enumerator.c index c709ecc2b7f..0be1d50645a 100644 --- a/drivers/parisc/eisa_enumerator.c +++ b/drivers/parisc/eisa_enumerator.c @@ -101,7 +101,7 @@ static int configure_memory(const unsigned char *buf, printk("memory %lx-%lx ", (unsigned long)res->start, (unsigned long)res->end); result = request_resource(mem_parent, res); if (result < 0) { - printk("\n" KERN_ERR "EISA Enumerator: failed to claim EISA Bus address space!\n"); + printk(KERN_ERR "EISA Enumerator: failed to claim EISA Bus address space!\n"); return result; } } @@ -191,7 +191,7 @@ static int configure_port(const unsigned char *buf, struct resource *io_parent, printk("ioports %lx-%lx ", (unsigned long)res->start, (unsigned long)res->end); result = request_resource(io_parent, res); if (result < 0) { - printk("\n" KERN_ERR "EISA Enumerator: failed to claim EISA Bus address space!\n"); + printk(KERN_ERR "EISA Enumerator: failed to claim EISA Bus address space!\n"); return result; } } @@ -224,7 +224,7 @@ static int configure_port_init(const unsigned char *buf) case HPEE_PORT_INIT_WIDTH_BYTE: s=1; if (c & HPEE_PORT_INIT_MASK) { - printk("\n" KERN_WARNING "port_init: unverified mask attribute\n"); + printk(KERN_WARNING "port_init: unverified mask attribute\n"); outb((inb(get_16(buf+len+1) & get_8(buf+len+3)) | get_8(buf+len+4)), get_16(buf+len+1)); @@ -249,7 +249,7 @@ static int configure_port_init(const unsigned char *buf) case HPEE_PORT_INIT_WIDTH_DWORD: s=4; if (c & HPEE_PORT_INIT_MASK) { - printk("\n" KERN_WARNING "port_init: unverified mask attribute\n"); + printk(KERN_WARNING "port_init: unverified mask attribute\n"); outl((inl(get_16(buf+len+1) & get_32(buf+len+3)) | get_32(buf+len+7)), get_16(buf+len+1)); @@ -259,7 +259,7 @@ static int configure_port_init(const unsigned char *buf) break; default: - printk("\n" KERN_ERR "Invalid port init word %02x\n", c); + printk(KERN_ERR "Invalid port init word %02x\n", c); return 0; } @@ -297,7 +297,7 @@ static int configure_type_string(const unsigned char *buf) /* just skip past the type field */ len = get_8(buf); if (len > 80) { - printk("\n" KERN_ERR "eisa_enumerator: type info field too long (%d, max is 80)\n", len); + printk(KERN_ERR "eisa_enumerator: type info field too long (%d, max is 80)\n", len); } return 1+len; @@ -398,7 +398,7 @@ static int parse_slot_config(int slot, } if (p0 + function_len < pos) { - printk("\n" KERN_ERR "eisa_enumerator: function %d length mis-match " + printk(KERN_ERR "eisa_enumerator: function %d length mis-match " "got %d, expected %d\n", num_func, pos-p0, function_len); res=-1; diff --git a/drivers/pcmcia/tcic.c b/drivers/pcmcia/tcic.c index 9ad97ea836e..8eb04230fec 100644 --- a/drivers/pcmcia/tcic.c +++ b/drivers/pcmcia/tcic.c @@ -472,7 +472,8 @@ static int __init init_tcic(void) init_timer(&poll_timer); /* Build interrupt mask */ - printk(", %d sockets\n" KERN_INFO " irq list (", sockets); + printk(KERN_CONT ", %d sockets\n", sockets); + printk(KERN_INFO " irq list ("); if (irq_list_count == 0) mask = irq_mask; else diff --git a/drivers/scsi/atari_NCR5380.c b/drivers/scsi/atari_NCR5380.c index 0471f880048..4240b05aef6 100644 --- a/drivers/scsi/atari_NCR5380.c +++ b/drivers/scsi/atari_NCR5380.c @@ -2826,8 +2826,7 @@ int NCR5380_abort(Scsi_Cmnd *cmd) */ local_irq_restore(flags); - printk(KERN_INFO "scsi%d: warning : SCSI command probably completed successfully\n" - KERN_INFO " before abortion\n", HOSTNO); + printk(KERN_INFO "scsi%d: warning : SCSI command probably completed successfully before abortion\n", HOSTNO); /* Maybe it is sufficient just to release the ST-DMA lock... (if * possible at all) At least, we should check if the lock could be diff --git a/drivers/scsi/mac53c94.c b/drivers/scsi/mac53c94.c index b12ad7c7c67..18735b39b3d 100644 --- a/drivers/scsi/mac53c94.c +++ b/drivers/scsi/mac53c94.c @@ -75,8 +75,9 @@ static int mac53c94_queue(struct scsi_cmnd *cmd, void (*done)(struct scsi_cmnd * int i; printk(KERN_DEBUG "mac53c94_queue %p: command is", cmd); for (i = 0; i < cmd->cmd_len; ++i) - printk(" %.2x", cmd->cmnd[i]); - printk("\n" KERN_DEBUG "use_sg=%d request_bufflen=%d request_buffer=%p\n", + printk(KERN_CONT " %.2x", cmd->cmnd[i]); + printk(KERN_CONT "\n"); + printk(KERN_DEBUG "use_sg=%d request_bufflen=%d request_buffer=%p\n", scsi_sg_count(cmd), scsi_bufflen(cmd), scsi_sglist(cmd)); } #endif diff --git a/drivers/scsi/sg.c b/drivers/scsi/sg.c index ef142fd47a8..4d6f2fe1cfe 100644 --- a/drivers/scsi/sg.c +++ b/drivers/scsi/sg.c @@ -619,7 +619,7 @@ sg_write(struct file *filp, const char __user *buf, size_t count, loff_t * ppos) if (strcmp(current->comm, cmd) && printk_ratelimit()) { printk(KERN_WARNING "sg_write: data in/out %d/%d bytes for SCSI command 0x%x--" - "guessing data in;\n" KERN_WARNING " " + "guessing data in;\n " "program %s not setting count and/or reply_len properly\n", old_hdr.reply_len - (int)SZ_SG_HEADER, input_size, (unsigned int) cmnd[0], diff --git a/drivers/scsi/sun3_NCR5380.c b/drivers/scsi/sun3_NCR5380.c index bcaba86060a..75da6e58ce5 100644 --- a/drivers/scsi/sun3_NCR5380.c +++ b/drivers/scsi/sun3_NCR5380.c @@ -2860,8 +2860,7 @@ static int NCR5380_abort(struct scsi_cmnd *cmd) */ local_irq_restore(flags); - printk(KERN_INFO "scsi%d: warning : SCSI command probably completed successfully\n" - KERN_INFO " before abortion\n", HOSTNO); + printk(KERN_INFO "scsi%d: warning : SCSI command probably completed successfully before abortion\n", HOSTNO); return SCSI_ABORT_NOT_RUNNING; } diff --git a/drivers/serial/8250_pci.c b/drivers/serial/8250_pci.c index 6160e03f410..e7108e75653 100644 --- a/drivers/serial/8250_pci.c +++ b/drivers/serial/8250_pci.c @@ -60,11 +60,12 @@ struct serial_private { static void moan_device(const char *str, struct pci_dev *dev) { - printk(KERN_WARNING "%s: %s\n" - KERN_WARNING "Please send the output of lspci -vv, this\n" - KERN_WARNING "message (0x%04x,0x%04x,0x%04x,0x%04x), the\n" - KERN_WARNING "manufacturer and name of serial board or\n" - KERN_WARNING "modem board to rmk+serial@arm.linux.org.uk.\n", + printk(KERN_WARNING + "%s: %s\n" + "Please send the output of lspci -vv, this\n" + "message (0x%04x,0x%04x,0x%04x,0x%04x), the\n" + "manufacturer and name of serial board or\n" + "modem board to rmk+serial@arm.linux.org.uk.\n", pci_name(dev), str, dev->vendor, dev->device, dev->subsystem_vendor, dev->subsystem_device); } diff --git a/drivers/ssb/pcmcia.c b/drivers/ssb/pcmcia.c index fbfadbac67e..131030f693c 100644 --- a/drivers/ssb/pcmcia.c +++ b/drivers/ssb/pcmcia.c @@ -583,7 +583,7 @@ static int ssb_pcmcia_sprom_write_all(struct ssb_bus *bus, const u16 *sprom) ssb_printk("."); err = ssb_pcmcia_sprom_write(bus, i, sprom[i]); if (err) { - ssb_printk("\n" KERN_NOTICE PFX + ssb_printk(KERN_NOTICE PFX "Failed to write to SPROM.\n"); failed = 1; break; @@ -591,7 +591,7 @@ static int ssb_pcmcia_sprom_write_all(struct ssb_bus *bus, const u16 *sprom) } err = ssb_pcmcia_sprom_command(bus, SSB_PCMCIA_SPROMCTL_WRITEDIS); if (err) { - ssb_printk("\n" KERN_NOTICE PFX + ssb_printk(KERN_NOTICE PFX "Could not disable SPROM write access.\n"); failed = 1; } diff --git a/drivers/usb/core/hcd.c b/drivers/usb/core/hcd.c index ce3f453f02e..95ccfa0b9fc 100644 --- a/drivers/usb/core/hcd.c +++ b/drivers/usb/core/hcd.c @@ -648,7 +648,7 @@ void usb_hcd_poll_rh_status(struct usb_hcd *hcd) struct urb *urb; int length; unsigned long flags; - char buffer[4]; /* Any root hubs with > 31 ports? */ + char buffer[6]; /* Any root hubs with > 31 ports? */ if (unlikely(!hcd->rh_registered)) return; diff --git a/drivers/video/amba-clcd.c b/drivers/video/amba-clcd.c index fb8163d181a..a21efcd10b7 100644 --- a/drivers/video/amba-clcd.c +++ b/drivers/video/amba-clcd.c @@ -226,9 +226,10 @@ static int clcdfb_set_par(struct fb_info *info) clcdfb_enable(fb, regs.cntl); #ifdef DEBUG - printk(KERN_INFO "CLCD: Registers set to\n" - KERN_INFO " %08x %08x %08x %08x\n" - KERN_INFO " %08x %08x %08x %08x\n", + printk(KERN_INFO + "CLCD: Registers set to\n" + " %08x %08x %08x %08x\n" + " %08x %08x %08x %08x\n", readl(fb->regs + CLCD_TIM0), readl(fb->regs + CLCD_TIM1), readl(fb->regs + CLCD_TIM2), readl(fb->regs + CLCD_TIM3), readl(fb->regs + CLCD_UBAS), readl(fb->regs + CLCD_LBAS), diff --git a/drivers/video/matrox/matroxfb_DAC1064.c b/drivers/video/matrox/matroxfb_DAC1064.c index 0ce3b0a8979..a74e5da17aa 100644 --- a/drivers/video/matrox/matroxfb_DAC1064.c +++ b/drivers/video/matrox/matroxfb_DAC1064.c @@ -454,9 +454,9 @@ static void DAC1064_restore_2(WPMINFO2) { dprintk(KERN_DEBUG "DAC1064regs "); for (i = 0; i < sizeof(MGA1064_DAC_regs); i++) { dprintk("R%02X=%02X ", MGA1064_DAC_regs[i], ACCESS_FBINFO(hw).DACreg[i]); - if ((i & 0x7) == 0x7) dprintk("\n" KERN_DEBUG "continuing... "); + if ((i & 0x7) == 0x7) dprintk(KERN_DEBUG "continuing... "); } - dprintk("\n" KERN_DEBUG "DAC1064clk "); + dprintk(KERN_DEBUG "DAC1064clk "); for (i = 0; i < 6; i++) dprintk("C%02X=%02X ", i, ACCESS_FBINFO(hw).DACclk[i]); dprintk("\n"); diff --git a/drivers/video/matrox/matroxfb_Ti3026.c b/drivers/video/matrox/matroxfb_Ti3026.c index 13524821e24..4e825112a60 100644 --- a/drivers/video/matrox/matroxfb_Ti3026.c +++ b/drivers/video/matrox/matroxfb_Ti3026.c @@ -651,9 +651,9 @@ static void Ti3026_restore(WPMINFO2) { dprintk(KERN_DEBUG "3026DACregs "); for (i = 0; i < 21; i++) { dprintk("R%02X=%02X ", DACseq[i], hw->DACreg[i]); - if ((i & 0x7) == 0x7) dprintk("\n" KERN_DEBUG "continuing... "); + if ((i & 0x7) == 0x7) dprintk(KERN_DEBUG "continuing... "); } - dprintk("\n" KERN_DEBUG "DACclk "); + dprintk(KERN_DEBUG "DACclk "); for (i = 0; i < 6; i++) dprintk("C%02X=%02X ", i, hw->DACclk[i]); dprintk("\n"); diff --git a/drivers/video/stifb.c b/drivers/video/stifb.c index eec9dcb7f59..6120f0c526f 100644 --- a/drivers/video/stifb.c +++ b/drivers/video/stifb.c @@ -1115,10 +1115,9 @@ static int __init stifb_init_fb(struct sti_struct *sti, int bpp_pref) if the device name contains the string "DX" and tell the user how to reconfigure the card. */ if (strstr(sti->outptr.dev_name, "DX")) { - printk(KERN_WARNING "WARNING: stifb framebuffer driver does not " - "support '%s' in double-buffer mode.\n" - KERN_WARNING "WARNING: Please disable the double-buffer mode " - "in IPL menu (the PARISC-BIOS).\n", + printk(KERN_WARNING +"WARNING: stifb framebuffer driver does not support '%s' in double-buffer mode.\n" +"WARNING: Please disable the double-buffer mode in IPL menu (the PARISC-BIOS).\n", sti->outptr.dev_name); goto out_err0; } diff --git a/fs/jffs2/erase.c b/fs/jffs2/erase.c index a0244740b75..b47679be118 100644 --- a/fs/jffs2/erase.c +++ b/fs/jffs2/erase.c @@ -270,19 +270,21 @@ static inline void jffs2_remove_node_refs_from_ino_list(struct jffs2_sb_info *c, D2({ int i=0; struct jffs2_raw_node_ref *this; - printk(KERN_DEBUG "After remove_node_refs_from_ino_list: \n" KERN_DEBUG); + printk(KERN_DEBUG "After remove_node_refs_from_ino_list: \n"); this = ic->nodes; + printk(KERN_DEBUG); while(this) { - printk( "0x%08x(%d)->", ref_offset(this), ref_flags(this)); + printk(KERN_CONT "0x%08x(%d)->", + ref_offset(this), ref_flags(this)); if (++i == 5) { - printk("\n" KERN_DEBUG); + printk(KERN_DEBUG); i=0; } this = this->next_in_ino; } - printk("\n"); + printk(KERN_CONT "\n"); }); switch (ic->class) { diff --git a/kernel/module.c b/kernel/module.c index 38928fcaff2..0a049837008 100644 --- a/kernel/module.c +++ b/kernel/module.c @@ -2451,9 +2451,9 @@ SYSCALL_DEFINE3(init_module, void __user *, umod, return ret; } if (ret > 0) { - printk(KERN_WARNING "%s: '%s'->init suspiciously returned %d, " - "it should follow 0/-E convention\n" - KERN_WARNING "%s: loading module anyway...\n", + printk(KERN_WARNING +"%s: '%s'->init suspiciously returned %d, it should follow 0/-E convention\n" +"%s: loading module anyway...\n", __func__, mod->name, ret, __func__); dump_stack(); diff --git a/sound/pci/emu10k1/p16v.c b/sound/pci/emu10k1/p16v.c index e617acaf10e..61b8ab39800 100644 --- a/sound/pci/emu10k1/p16v.c +++ b/sound/pci/emu10k1/p16v.c @@ -644,7 +644,7 @@ int __devinit snd_p16v_pcm(struct snd_emu10k1 *emu, int device, struct snd_pcm * int err; int capture=1; - /* snd_printk("KERN_DEBUG snd_p16v_pcm called. device=%d\n", device); */ + /* snd_printk(KERN_DEBUG "snd_p16v_pcm called. device=%d\n", device); */ emu->p16v_device_offset = device; if (rpcm) *rpcm = NULL; diff --git a/sound/usb/usx2y/usbusx2yaudio.c b/sound/usb/usx2y/usbusx2yaudio.c index dd1ab617784..9efd27f6b52 100644 --- a/sound/usb/usx2y/usbusx2yaudio.c +++ b/sound/usb/usx2y/usbusx2yaudio.c @@ -296,9 +296,10 @@ static void usX2Y_error_urb_status(struct usX2Ydev *usX2Y, static void usX2Y_error_sequence(struct usX2Ydev *usX2Y, struct snd_usX2Y_substream *subs, struct urb *urb) { - snd_printk(KERN_ERR "Sequence Error!(hcd_frame=%i ep=%i%s;wait=%i,frame=%i).\n" - KERN_ERR "Most propably some urb of usb-frame %i is still missing.\n" - KERN_ERR "Cause could be too long delays in usb-hcd interrupt handling.\n", + snd_printk(KERN_ERR +"Sequence Error!(hcd_frame=%i ep=%i%s;wait=%i,frame=%i).\n" +"Most propably some urb of usb-frame %i is still missing.\n" +"Cause could be too long delays in usb-hcd interrupt handling.\n", usb_get_current_frame_number(usX2Y->chip.dev), subs->endpoint, usb_pipein(urb->pipe) ? "in" : "out", usX2Y->wait_iso_frame, urb->start_frame, usX2Y->wait_iso_frame); -- cgit v1.2.3-70-g09d2 From 6560dc160f3a96b8f1f43e2c6b51aa6eb9898b90 Mon Sep 17 00:00:00 2001 From: Mike Frysinger Date: Thu, 23 Jul 2009 23:42:08 +0930 Subject: module: use MODULE_SYMBOL_PREFIX with module_layout The check_modstruct_version() needs to look up the symbol "module_layout" in the kernel, but it does so literally and not by a C identifier. The trouble is that it does not include a symbol prefix for those ports that need it (like the Blackfin and H8300 port). So make sure we tack on the MODULE_SYMBOL_PREFIX define to the front of it. Signed-off-by: Mike Frysinger Signed-off-by: Rusty Russell Signed-off-by: Linus Torvalds --- kernel/module.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'kernel/module.c') diff --git a/kernel/module.c b/kernel/module.c index 0a049837008..fd141140355 100644 --- a/kernel/module.c +++ b/kernel/module.c @@ -1068,7 +1068,8 @@ static inline int check_modstruct_version(Elf_Shdr *sechdrs, { const unsigned long *crc; - if (!find_symbol("module_layout", NULL, &crc, true, false)) + if (!find_symbol(MODULE_SYMBOL_PREFIX "module_layout", NULL, + &crc, true, false)) BUG(); return check_version(sechdrs, versindex, "module_layout", mod, crc); } -- cgit v1.2.3-70-g09d2 From 7ead8b8313d92b3a69a1a61b0dcbc4cd66c960dc Mon Sep 17 00:00:00 2001 From: Li Zefan Date: Mon, 17 Aug 2009 16:56:28 +0800 Subject: tracing/events: Add module tracepoints Add trace points to trace module_load, module_free, module_get, module_put and module_request, and use trace_event facility to get the trace output. Here's the sample output: TASK-PID CPU# TIMESTAMP FUNCTION | | | | | <...>-42 [000] 1.758380: module_request: fb0 wait=1 call_site=fb_open ... <...>-60 [000] 3.269403: module_load: scsi_wait_scan <...>-60 [000] 3.269432: module_put: scsi_wait_scan call_site=sys_init_module refcnt=0 <...>-61 [001] 3.273168: module_free: scsi_wait_scan ... <...>-1021 [000] 13.836081: module_load: sunrpc <...>-1021 [000] 13.840589: module_put: sunrpc call_site=sys_init_module refcnt=-1 <...>-1027 [000] 13.848098: module_get: sunrpc call_site=try_module_get refcnt=0 <...>-1027 [000] 13.848308: module_get: sunrpc call_site=get_filesystem refcnt=1 <...>-1027 [000] 13.848692: module_put: sunrpc call_site=put_filesystem refcnt=0 ... modprobe-2587 [001] 1088.437213: module_load: trace_events_sample F modprobe-2587 [001] 1088.437786: module_put: trace_events_sample call_site=sys_init_module refcnt=0 Note: - the taints flag can be 'F', 'C' and/or 'P' if mod->taints != 0 - the module refcnt is percpu, so it can be negative in a specific cpu Signed-off-by: Li Zefan Acked-by: Rusty Russell Cc: Steven Rostedt Cc: Frederic Weisbecker Cc: Rusty Russell LKML-Reference: <4A891B3C.5030608@cn.fujitsu.com> Signed-off-by: Ingo Molnar --- include/linux/module.h | 14 ++++- include/trace/events/module.h | 126 ++++++++++++++++++++++++++++++++++++++++++ kernel/kmod.c | 4 ++ kernel/module.c | 11 ++++ 4 files changed, 152 insertions(+), 3 deletions(-) create mode 100644 include/trace/events/module.h (limited to 'kernel/module.c') diff --git a/include/linux/module.h b/include/linux/module.h index 098bdb7bfac..f8f92d015ef 100644 --- a/include/linux/module.h +++ b/include/linux/module.h @@ -17,10 +17,12 @@ #include #include #include -#include +#include #include +#include + /* Not Yet Implemented */ #define MODULE_SUPPORTED_DEVICE(name) @@ -462,7 +464,10 @@ static inline local_t *__module_ref_addr(struct module *mod, int cpu) static inline void __module_get(struct module *module) { if (module) { - local_inc(__module_ref_addr(module, get_cpu())); + unsigned int cpu = get_cpu(); + local_inc(__module_ref_addr(module, cpu)); + trace_module_get(module, _THIS_IP_, + local_read(__module_ref_addr(module, cpu))); put_cpu(); } } @@ -473,8 +478,11 @@ static inline int try_module_get(struct module *module) if (module) { unsigned int cpu = get_cpu(); - if (likely(module_is_live(module))) + if (likely(module_is_live(module))) { local_inc(__module_ref_addr(module, cpu)); + trace_module_get(module, _THIS_IP_, + local_read(__module_ref_addr(module, cpu))); + } else ret = 0; put_cpu(); diff --git a/include/trace/events/module.h b/include/trace/events/module.h new file mode 100644 index 00000000000..84160fb1847 --- /dev/null +++ b/include/trace/events/module.h @@ -0,0 +1,126 @@ +#undef TRACE_SYSTEM +#define TRACE_SYSTEM module + +#if !defined(_TRACE_MODULE_H) || defined(TRACE_HEADER_MULTI_READ) +#define _TRACE_MODULE_H + +#include + +#ifdef CONFIG_MODULES + +struct module; + +#define show_module_flags(flags) __print_flags(flags, "", \ + { (1UL << TAINT_PROPRIETARY_MODULE), "P" }, \ + { (1UL << TAINT_FORCED_MODULE), "F" }, \ + { (1UL << TAINT_CRAP), "C" }) + +TRACE_EVENT(module_load, + + TP_PROTO(struct module *mod), + + TP_ARGS(mod), + + TP_STRUCT__entry( + __field( unsigned int, taints ) + __string( name, mod->name ) + ), + + TP_fast_assign( + __entry->taints = mod->taints; + __assign_str(name, mod->name); + ), + + TP_printk("%s %s", __get_str(name), show_module_flags(__entry->taints)) +); + +TRACE_EVENT(module_free, + + TP_PROTO(struct module *mod), + + TP_ARGS(mod), + + TP_STRUCT__entry( + __string( name, mod->name ) + ), + + TP_fast_assign( + __assign_str(name, mod->name); + ), + + TP_printk("%s", __get_str(name)) +); + +TRACE_EVENT(module_get, + + TP_PROTO(struct module *mod, unsigned long ip, int refcnt), + + TP_ARGS(mod, ip, refcnt), + + TP_STRUCT__entry( + __field( unsigned long, ip ) + __field( int, refcnt ) + __string( name, mod->name ) + ), + + TP_fast_assign( + __entry->ip = ip; + __entry->refcnt = refcnt; + __assign_str(name, mod->name); + ), + + TP_printk("%s call_site=%pf refcnt=%d", + __get_str(name), (void *)__entry->ip, __entry->refcnt) +); + +TRACE_EVENT(module_put, + + TP_PROTO(struct module *mod, unsigned long ip, int refcnt), + + TP_ARGS(mod, ip, refcnt), + + TP_STRUCT__entry( + __field( unsigned long, ip ) + __field( int, refcnt ) + __string( name, mod->name ) + ), + + TP_fast_assign( + __entry->ip = ip; + __entry->refcnt = refcnt; + __assign_str(name, mod->name); + ), + + TP_printk("%s call_site=%pf refcnt=%d", + __get_str(name), (void *)__entry->ip, __entry->refcnt) +); + +TRACE_EVENT(module_request, + + TP_PROTO(char *name, bool wait, unsigned long ip), + + TP_ARGS(name, wait, ip), + + TP_STRUCT__entry( + __field( bool, wait ) + __field( unsigned long, ip ) + __string( name, name ) + ), + + TP_fast_assign( + __entry->wait = wait; + __entry->ip = ip; + __assign_str(name, name); + ), + + TP_printk("%s wait=%d call_site=%pf", + __get_str(name), (int)__entry->wait, (void *)__entry->ip) +); + +#endif /* CONFIG_MODULES */ + +#endif /* _TRACE_MODULE_H */ + +/* This part must be outside protection */ +#include + diff --git a/kernel/kmod.c b/kernel/kmod.c index 385c31a1bdb..a92280870e3 100644 --- a/kernel/kmod.c +++ b/kernel/kmod.c @@ -37,6 +37,8 @@ #include #include +#include + extern int max_threads; static struct workqueue_struct *khelper_wq; @@ -108,6 +110,8 @@ int __request_module(bool wait, const char *fmt, ...) return -ENOMEM; } + trace_module_request(module_name, wait, _RET_IP_); + ret = call_usermodehelper(modprobe_path, argv, envp, wait ? UMH_WAIT_PROC : UMH_WAIT_EXEC); atomic_dec(&kmod_concurrent); diff --git a/kernel/module.c b/kernel/module.c index fd141140355..b1821438694 100644 --- a/kernel/module.c +++ b/kernel/module.c @@ -55,6 +55,11 @@ #include #include +#define CREATE_TRACE_POINTS +#include + +EXPORT_TRACEPOINT_SYMBOL(module_get); + #if 0 #define DEBUGP printk #else @@ -940,6 +945,8 @@ void module_put(struct module *module) if (module) { unsigned int cpu = get_cpu(); local_dec(__module_ref_addr(module, cpu)); + trace_module_put(module, _RET_IP_, + local_read(__module_ref_addr(module, cpu))); /* Maybe they're waiting for us to drop reference? */ if (unlikely(!module_is_live(module))) wake_up_process(module->waiter); @@ -1491,6 +1498,8 @@ static int __unlink_module(void *_mod) /* Free a module, remove from lists, etc (must hold module_mutex). */ static void free_module(struct module *mod) { + trace_module_free(mod); + /* Delete from various lists */ stop_machine(__unlink_module, mod, NULL); remove_notes_attrs(mod); @@ -2358,6 +2367,8 @@ static noinline struct module *load_module(void __user *umod, /* Get rid of temporary copy */ vfree(hdr); + trace_module_load(mod); + /* Done! */ return mod; -- cgit v1.2.3-70-g09d2 From 7d1d16e416e61aeef8655d542f8e4a4fc6e808e4 Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Wed, 26 Aug 2009 22:02:54 +0930 Subject: module: fix BUG_ON() for powerpc (and other function descriptor archs) The rarely-used symbol_put_addr() needs to use dereference_function_descriptor on powerpc. Reported-by: Paul Mackerras Signed-off-by: Rusty Russell --- kernel/module.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'kernel/module.c') diff --git a/kernel/module.c b/kernel/module.c index fd141140355..07c80e68a6c 100644 --- a/kernel/module.c +++ b/kernel/module.c @@ -909,16 +909,18 @@ void __symbol_put(const char *symbol) } EXPORT_SYMBOL(__symbol_put); +/* Note this assumes addr is a function, which it currently always is. */ void symbol_put_addr(void *addr) { struct module *modaddr; + unsigned long a = (unsigned long)dereference_function_descriptor(addr); - if (core_kernel_text((unsigned long)addr)) + if (core_kernel_text(a)) return; /* module_text_address is safe here: we're supposed to have reference * to module from symbol_get, so it can't go away. */ - modaddr = __module_text_address((unsigned long)addr); + modaddr = __module_text_address(a); BUG_ON(!modaddr); module_put(modaddr); } -- cgit v1.2.3-70-g09d2 From 1b364bf438cf337a3818aee77d68c0713f3e1fc4 Mon Sep 17 00:00:00 2001 From: James Bottomley Date: Wed, 26 Aug 2009 22:04:12 +0930 Subject: module: workaround duplicate section names The root cause is a duplicate section name (.text); is this legal? [ Amerigo Wang: "AFAIK, yes." ] However, there's a problem with commit 6d76013381ed28979cd122eb4b249a88b5e384fa in that if you fail to allocate a mod->sect_attrs (in this case it's null because of the duplication), it still gets used without checking in add_notes_attrs() This should fix it [ This patch leaves other problems, particularly the sections directory, but recent parisc toolchains seem to produce these modules and this prevents a crash and is a minimal change -- RR ] Signed-off-by: James Bottomley Signed-off-by: Rusty Russell Tested-by: Helge Deller Signed-off-by: Linus Torvalds --- kernel/module.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'kernel/module.c') diff --git a/kernel/module.c b/kernel/module.c index 07c80e68a6c..eccb561dd8a 100644 --- a/kernel/module.c +++ b/kernel/module.c @@ -2355,7 +2355,8 @@ static noinline struct module *load_module(void __user *umod, if (err < 0) goto unlink; add_sect_attrs(mod, hdr->e_shnum, secstrings, sechdrs); - add_notes_attrs(mod, hdr->e_shnum, secstrings, sechdrs); + if (mod->sect_attrs) + add_notes_attrs(mod, hdr->e_shnum, secstrings, sechdrs); /* Get rid of temporary copy */ vfree(hdr); -- cgit v1.2.3-70-g09d2 From ea6bff368548d79529421a9dc0710fc5330eb504 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Fri, 28 Aug 2009 10:44:56 +0200 Subject: modules: Fix build error in the !CONFIG_KALLSYMS case MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit > James Bottomley (1): > module: workaround duplicate section names -tip testing found that this patch breaks the build on x86 if CONFIG_KALLSYMS is disabled: kernel/module.c: In function ‘load_module’: kernel/module.c:2367: error: ‘struct module’ has no member named ‘sect_attrs’ distcc[8269] ERROR: compile kernel/module.c on ph/32 failed make[1]: *** [kernel/module.o] Error 1 make: *** [kernel] Error 2 make: *** Waiting for unfinished jobs.... Commit 1b364bf misses the fact that section attributes are only built and dealt with if kallsyms is enabled. The patch below fixes this. ( note, technically speaking this should depend on CONFIG_SYSFS as well but this patch is correct too and keeps the #ifdef less intrusive - in the KALLSYMS && !SYSFS case the code is a NOP. ) Signed-off-by: Ingo Molnar [ Replaced patch with a slightly cleaner variation by James Bottomley ] Signed-off-by: Linus Torvalds --- kernel/module.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) (limited to 'kernel/module.c') diff --git a/kernel/module.c b/kernel/module.c index eccb561dd8a..2d537186191 100644 --- a/kernel/module.c +++ b/kernel/module.c @@ -1274,6 +1274,10 @@ static void add_notes_attrs(struct module *mod, unsigned int nsect, struct module_notes_attrs *notes_attrs; struct bin_attribute *nattr; + /* failed to create section attributes, so can't create notes */ + if (!mod->sect_attrs) + return; + /* Count notes sections and allocate structures. */ notes = 0; for (i = 0; i < nsect; i++) @@ -2355,8 +2359,7 @@ static noinline struct module *load_module(void __user *umod, if (err < 0) goto unlink; add_sect_attrs(mod, hdr->e_shnum, secstrings, sechdrs); - if (mod->sect_attrs) - add_notes_attrs(mod, hdr->e_shnum, secstrings, sechdrs); + add_notes_attrs(mod, hdr->e_shnum, secstrings, sechdrs); /* Get rid of temporary copy */ vfree(hdr); -- cgit v1.2.3-70-g09d2 From fc5377668c3d808e1d53c4aee152c836f55c3490 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 17 Sep 2009 19:35:28 +0200 Subject: tracing: Remove markers Now that the last users of markers have migrated to the event tracer we can kill off the (now orphan) support code. Signed-off-by: Christoph Hellwig Acked-by: Mathieu Desnoyers Cc: Steven Rostedt Cc: Frederic Weisbecker LKML-Reference: <20090917173527.GA1699@lst.de> Signed-off-by: Ingo Molnar --- Documentation/markers.txt | 104 ---- arch/powerpc/platforms/cell/spufs/file.c | 1 - arch/powerpc/platforms/cell/spufs/sched.c | 1 - include/linux/kvm_host.h | 1 - include/linux/marker.h | 221 ------- include/linux/module.h | 11 - init/Kconfig | 7 - kernel/Makefile | 1 - kernel/marker.c | 930 ------------------------------ kernel/module.c | 18 - kernel/trace/trace_printk.c | 1 - samples/Kconfig | 6 - samples/Makefile | 2 +- samples/markers/Makefile | 4 - samples/markers/marker-example.c | 53 -- samples/markers/probe-example.c | 92 --- scripts/Makefile.modpost | 12 - 17 files changed, 1 insertion(+), 1464 deletions(-) delete mode 100644 Documentation/markers.txt delete mode 100644 include/linux/marker.h delete mode 100644 kernel/marker.c delete mode 100644 samples/markers/Makefile delete mode 100644 samples/markers/marker-example.c delete mode 100644 samples/markers/probe-example.c (limited to 'kernel/module.c') diff --git a/Documentation/markers.txt b/Documentation/markers.txt deleted file mode 100644 index d2b3d0e91b2..00000000000 --- a/Documentation/markers.txt +++ /dev/null @@ -1,104 +0,0 @@ - Using the Linux Kernel Markers - - Mathieu Desnoyers - - -This document introduces Linux Kernel Markers and their use. It provides -examples of how to insert markers in the kernel and connect probe functions to -them and provides some examples of probe functions. - - -* Purpose of markers - -A marker placed in code provides a hook to call a function (probe) that you can -provide at runtime. A marker can be "on" (a probe is connected to it) or "off" -(no probe is attached). When a marker is "off" it has no effect, except for -adding a tiny time penalty (checking a condition for a branch) and space -penalty (adding a few bytes for the function call at the end of the -instrumented function and adds a data structure in a separate section). When a -marker is "on", the function you provide is called each time the marker is -executed, in the execution context of the caller. When the function provided -ends its execution, it returns to the caller (continuing from the marker site). - -You can put markers at important locations in the code. Markers are -lightweight hooks that can pass an arbitrary number of parameters, -described in a printk-like format string, to the attached probe function. - -They can be used for tracing and performance accounting. - - -* Usage - -In order to use the macro trace_mark, you should include linux/marker.h. - -#include - -And, - -trace_mark(subsystem_event, "myint %d mystring %s", someint, somestring); -Where : -- subsystem_event is an identifier unique to your event - - subsystem is the name of your subsystem. - - event is the name of the event to mark. -- "myint %d mystring %s" is the formatted string for the serializer. "myint" and - "mystring" are repectively the field names associated with the first and - second parameter. -- someint is an integer. -- somestring is a char pointer. - -Connecting a function (probe) to a marker is done by providing a probe (function -to call) for the specific marker through marker_probe_register() and can be -activated by calling marker_arm(). Marker deactivation can be done by calling -marker_disarm() as many times as marker_arm() has been called. Removing a probe -is done through marker_probe_unregister(); it will disarm the probe. - -marker_synchronize_unregister() must be called between probe unregistration and -the first occurrence of -- the end of module exit function, - to make sure there is no caller left using the probe; -- the free of any resource used by the probes, - to make sure the probes wont be accessing invalid data. -This, and the fact that preemption is disabled around the probe call, make sure -that probe removal and module unload are safe. See the "Probe example" section -below for a sample probe module. - -The marker mechanism supports inserting multiple instances of the same marker. -Markers can be put in inline functions, inlined static functions, and -unrolled loops as well as regular functions. - -The naming scheme "subsystem_event" is suggested here as a convention intended -to limit collisions. Marker names are global to the kernel: they are considered -as being the same whether they are in the core kernel image or in modules. -Conflicting format strings for markers with the same name will cause the markers -to be detected to have a different format string not to be armed and will output -a printk warning which identifies the inconsistency: - -"Format mismatch for probe probe_name (format), marker (format)" - -Another way to use markers is to simply define the marker without generating any -function call to actually call into the marker. This is useful in combination -with tracepoint probes in a scheme like this : - -void probe_tracepoint_name(unsigned int arg1, struct task_struct *tsk); - -DEFINE_MARKER_TP(marker_eventname, tracepoint_name, probe_tracepoint_name, - "arg1 %u pid %d"); - -notrace void probe_tracepoint_name(unsigned int arg1, struct task_struct *tsk) -{ - struct marker *marker = &GET_MARKER(kernel_irq_entry); - /* write data to trace buffers ... */ -} - -* Probe / marker example - -See the example provided in samples/markers/src - -Compile them with your kernel. - -Run, as root : -modprobe marker-example (insmod order is not important) -modprobe probe-example -cat /proc/marker-example (returns an expected error) -rmmod marker-example probe-example -dmesg diff --git a/arch/powerpc/platforms/cell/spufs/file.c b/arch/powerpc/platforms/cell/spufs/file.c index ab8aef9bb8e..8f079b865ad 100644 --- a/arch/powerpc/platforms/cell/spufs/file.c +++ b/arch/powerpc/platforms/cell/spufs/file.c @@ -29,7 +29,6 @@ #include #include #include -#include #include #include diff --git a/arch/powerpc/platforms/cell/spufs/sched.c b/arch/powerpc/platforms/cell/spufs/sched.c index bb5b77c66d0..4678078fede 100644 --- a/arch/powerpc/platforms/cell/spufs/sched.c +++ b/arch/powerpc/platforms/cell/spufs/sched.c @@ -39,7 +39,6 @@ #include #include #include -#include #include #include diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 4af56036a6b..b7bbb5ddd7a 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -15,7 +15,6 @@ #include #include #include -#include #include #include diff --git a/include/linux/marker.h b/include/linux/marker.h deleted file mode 100644 index b85e74ca782..00000000000 --- a/include/linux/marker.h +++ /dev/null @@ -1,221 +0,0 @@ -#ifndef _LINUX_MARKER_H -#define _LINUX_MARKER_H - -/* - * Code markup for dynamic and static tracing. - * - * See Documentation/marker.txt. - * - * (C) Copyright 2006 Mathieu Desnoyers - * - * This file is released under the GPLv2. - * See the file COPYING for more details. - */ - -#include -#include - -struct module; -struct marker; - -/** - * marker_probe_func - Type of a marker probe function - * @probe_private: probe private data - * @call_private: call site private data - * @fmt: format string - * @args: variable argument list pointer. Use a pointer to overcome C's - * inability to pass this around as a pointer in a portable manner in - * the callee otherwise. - * - * Type of marker probe functions. They receive the mdata and need to parse the - * format string to recover the variable argument list. - */ -typedef void marker_probe_func(void *probe_private, void *call_private, - const char *fmt, va_list *args); - -struct marker_probe_closure { - marker_probe_func *func; /* Callback */ - void *probe_private; /* Private probe data */ -}; - -struct marker { - const char *name; /* Marker name */ - const char *format; /* Marker format string, describing the - * variable argument list. - */ - char state; /* Marker state. */ - char ptype; /* probe type : 0 : single, 1 : multi */ - /* Probe wrapper */ - void (*call)(const struct marker *mdata, void *call_private, ...); - struct marker_probe_closure single; - struct marker_probe_closure *multi; - const char *tp_name; /* Optional tracepoint name */ - void *tp_cb; /* Optional tracepoint callback */ -} __attribute__((aligned(8))); - -#ifdef CONFIG_MARKERS - -#define _DEFINE_MARKER(name, tp_name_str, tp_cb, format) \ - static const char __mstrtab_##name[] \ - __attribute__((section("__markers_strings"))) \ - = #name "\0" format; \ - static struct marker __mark_##name \ - __attribute__((section("__markers"), aligned(8))) = \ - { __mstrtab_##name, &__mstrtab_##name[sizeof(#name)], \ - 0, 0, marker_probe_cb, { __mark_empty_function, NULL},\ - NULL, tp_name_str, tp_cb } - -#define DEFINE_MARKER(name, format) \ - _DEFINE_MARKER(name, NULL, NULL, format) - -#define DEFINE_MARKER_TP(name, tp_name, tp_cb, format) \ - _DEFINE_MARKER(name, #tp_name, tp_cb, format) - -/* - * Note : the empty asm volatile with read constraint is used here instead of a - * "used" attribute to fix a gcc 4.1.x bug. - * Make sure the alignment of the structure in the __markers section will - * not add unwanted padding between the beginning of the section and the - * structure. Force alignment to the same alignment as the section start. - * - * The "generic" argument controls which marker enabling mechanism must be used. - * If generic is true, a variable read is used. - * If generic is false, immediate values are used. - */ -#define __trace_mark(generic, name, call_private, format, args...) \ - do { \ - DEFINE_MARKER(name, format); \ - __mark_check_format(format, ## args); \ - if (unlikely(__mark_##name.state)) { \ - (*__mark_##name.call) \ - (&__mark_##name, call_private, ## args);\ - } \ - } while (0) - -#define __trace_mark_tp(name, call_private, tp_name, tp_cb, format, args...) \ - do { \ - void __check_tp_type(void) \ - { \ - register_trace_##tp_name(tp_cb); \ - } \ - DEFINE_MARKER_TP(name, tp_name, tp_cb, format); \ - __mark_check_format(format, ## args); \ - (*__mark_##name.call)(&__mark_##name, call_private, \ - ## args); \ - } while (0) - -extern void marker_update_probe_range(struct marker *begin, - struct marker *end); - -#define GET_MARKER(name) (__mark_##name) - -#else /* !CONFIG_MARKERS */ -#define DEFINE_MARKER(name, tp_name, tp_cb, format) -#define __trace_mark(generic, name, call_private, format, args...) \ - __mark_check_format(format, ## args) -#define __trace_mark_tp(name, call_private, tp_name, tp_cb, format, args...) \ - do { \ - void __check_tp_type(void) \ - { \ - register_trace_##tp_name(tp_cb); \ - } \ - __mark_check_format(format, ## args); \ - } while (0) -static inline void marker_update_probe_range(struct marker *begin, - struct marker *end) -{ } -#define GET_MARKER(name) -#endif /* CONFIG_MARKERS */ - -/** - * trace_mark - Marker using code patching - * @name: marker name, not quoted. - * @format: format string - * @args...: variable argument list - * - * Places a marker using optimized code patching technique (imv_read()) - * to be enabled when immediate values are present. - */ -#define trace_mark(name, format, args...) \ - __trace_mark(0, name, NULL, format, ## args) - -/** - * _trace_mark - Marker using variable read - * @name: marker name, not quoted. - * @format: format string - * @args...: variable argument list - * - * Places a marker using a standard memory read (_imv_read()) to be - * enabled. Should be used for markers in code paths where instruction - * modification based enabling is not welcome. (__init and __exit functions, - * lockdep, some traps, printk). - */ -#define _trace_mark(name, format, args...) \ - __trace_mark(1, name, NULL, format, ## args) - -/** - * trace_mark_tp - Marker in a tracepoint callback - * @name: marker name, not quoted. - * @tp_name: tracepoint name, not quoted. - * @tp_cb: tracepoint callback. Should have an associated global symbol so it - * is not optimized away by the compiler (should not be static). - * @format: format string - * @args...: variable argument list - * - * Places a marker in a tracepoint callback. - */ -#define trace_mark_tp(name, tp_name, tp_cb, format, args...) \ - __trace_mark_tp(name, NULL, tp_name, tp_cb, format, ## args) - -/** - * MARK_NOARGS - Format string for a marker with no argument. - */ -#define MARK_NOARGS " " - -/* To be used for string format validity checking with gcc */ -static inline void __printf(1, 2) ___mark_check_format(const char *fmt, ...) -{ -} - -#define __mark_check_format(format, args...) \ - do { \ - if (0) \ - ___mark_check_format(format, ## args); \ - } while (0) - -extern marker_probe_func __mark_empty_function; - -extern void marker_probe_cb(const struct marker *mdata, - void *call_private, ...); - -/* - * Connect a probe to a marker. - * private data pointer must be a valid allocated memory address, or NULL. - */ -extern int marker_probe_register(const char *name, const char *format, - marker_probe_func *probe, void *probe_private); - -/* - * Returns the private data given to marker_probe_register. - */ -extern int marker_probe_unregister(const char *name, - marker_probe_func *probe, void *probe_private); -/* - * Unregister a marker by providing the registered private data. - */ -extern int marker_probe_unregister_private_data(marker_probe_func *probe, - void *probe_private); - -extern void *marker_get_private_data(const char *name, marker_probe_func *probe, - int num); - -/* - * marker_synchronize_unregister must be called between the last marker probe - * unregistration and the first one of - * - the end of module exit function - * - the free of any resource used by the probes - * to ensure the code and data are valid for any possibly running probes. - */ -#define marker_synchronize_unregister() synchronize_sched() - -#endif diff --git a/include/linux/module.h b/include/linux/module.h index f8f92d015ef..1c755b2f937 100644 --- a/include/linux/module.h +++ b/include/linux/module.h @@ -15,7 +15,6 @@ #include #include #include -#include #include #include @@ -327,10 +326,6 @@ struct module /* The command line arguments (may be mangled). People like keeping pointers to this stuff */ char *args; -#ifdef CONFIG_MARKERS - struct marker *markers; - unsigned int num_markers; -#endif #ifdef CONFIG_TRACEPOINTS struct tracepoint *tracepoints; unsigned int num_tracepoints; @@ -535,8 +530,6 @@ int unregister_module_notifier(struct notifier_block * nb); extern void print_modules(void); -extern void module_update_markers(void); - extern void module_update_tracepoints(void); extern int module_get_iter_tracepoints(struct tracepoint_iter *iter); @@ -651,10 +644,6 @@ static inline void print_modules(void) { } -static inline void module_update_markers(void) -{ -} - static inline void module_update_tracepoints(void) { } diff --git a/init/Kconfig b/init/Kconfig index 8e8b76d8a27..4cc0fa13d5e 100644 --- a/init/Kconfig +++ b/init/Kconfig @@ -1054,13 +1054,6 @@ config PROFILING config TRACEPOINTS bool -config MARKERS - bool "Activate markers" - select TRACEPOINTS - help - Place an empty function call at each marker site. Can be - dynamically changed for a probe function. - source "arch/Kconfig" config SLOW_WORK diff --git a/kernel/Makefile b/kernel/Makefile index 3d9c7e27e3f..7c9b0a58550 100644 --- a/kernel/Makefile +++ b/kernel/Makefile @@ -87,7 +87,6 @@ obj-$(CONFIG_RELAY) += relay.o obj-$(CONFIG_SYSCTL) += utsname_sysctl.o obj-$(CONFIG_TASK_DELAY_ACCT) += delayacct.o obj-$(CONFIG_TASKSTATS) += taskstats.o tsacct.o -obj-$(CONFIG_MARKERS) += marker.o obj-$(CONFIG_TRACEPOINTS) += tracepoint.o obj-$(CONFIG_LATENCYTOP) += latencytop.o obj-$(CONFIG_FUNCTION_TRACER) += trace/ diff --git a/kernel/marker.c b/kernel/marker.c deleted file mode 100644 index ea54f264786..00000000000 --- a/kernel/marker.c +++ /dev/null @@ -1,930 +0,0 @@ -/* - * Copyright (C) 2007 Mathieu Desnoyers - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. - */ -#include -#include -#include -#include -#include -#include -#include -#include -#include - -extern struct marker __start___markers[]; -extern struct marker __stop___markers[]; - -/* Set to 1 to enable marker debug output */ -static const int marker_debug; - -/* - * markers_mutex nests inside module_mutex. Markers mutex protects the builtin - * and module markers and the hash table. - */ -static DEFINE_MUTEX(markers_mutex); - -/* - * Marker hash table, containing the active markers. - * Protected by module_mutex. - */ -#define MARKER_HASH_BITS 6 -#define MARKER_TABLE_SIZE (1 << MARKER_HASH_BITS) -static struct hlist_head marker_table[MARKER_TABLE_SIZE]; - -/* - * Note about RCU : - * It is used to make sure every handler has finished using its private data - * between two consecutive operation (add or remove) on a given marker. It is - * also used to delay the free of multiple probes array until a quiescent state - * is reached. - * marker entries modifications are protected by the markers_mutex. - */ -struct marker_entry { - struct hlist_node hlist; - char *format; - /* Probe wrapper */ - void (*call)(const struct marker *mdata, void *call_private, ...); - struct marker_probe_closure single; - struct marker_probe_closure *multi; - int refcount; /* Number of times armed. 0 if disarmed. */ - struct rcu_head rcu; - void *oldptr; - int rcu_pending; - unsigned char ptype:1; - unsigned char format_allocated:1; - char name[0]; /* Contains name'\0'format'\0' */ -}; - -/** - * __mark_empty_function - Empty probe callback - * @probe_private: probe private data - * @call_private: call site private data - * @fmt: format string - * @...: variable argument list - * - * Empty callback provided as a probe to the markers. By providing this to a - * disabled marker, we make sure the execution flow is always valid even - * though the function pointer change and the marker enabling are two distinct - * operations that modifies the execution flow of preemptible code. - */ -notrace void __mark_empty_function(void *probe_private, void *call_private, - const char *fmt, va_list *args) -{ -} -EXPORT_SYMBOL_GPL(__mark_empty_function); - -/* - * marker_probe_cb Callback that prepares the variable argument list for probes. - * @mdata: pointer of type struct marker - * @call_private: caller site private data - * @...: Variable argument list. - * - * Since we do not use "typical" pointer based RCU in the 1 argument case, we - * need to put a full smp_rmb() in this branch. This is why we do not use - * rcu_dereference() for the pointer read. - */ -notrace void marker_probe_cb(const struct marker *mdata, - void *call_private, ...) -{ - va_list args; - char ptype; - - /* - * rcu_read_lock_sched does two things : disabling preemption to make - * sure the teardown of the callbacks can be done correctly when they - * are in modules and they insure RCU read coherency. - */ - rcu_read_lock_sched_notrace(); - ptype = mdata->ptype; - if (likely(!ptype)) { - marker_probe_func *func; - /* Must read the ptype before ptr. They are not data dependant, - * so we put an explicit smp_rmb() here. */ - smp_rmb(); - func = mdata->single.func; - /* Must read the ptr before private data. They are not data - * dependant, so we put an explicit smp_rmb() here. */ - smp_rmb(); - va_start(args, call_private); - func(mdata->single.probe_private, call_private, mdata->format, - &args); - va_end(args); - } else { - struct marker_probe_closure *multi; - int i; - /* - * Read mdata->ptype before mdata->multi. - */ - smp_rmb(); - multi = mdata->multi; - /* - * multi points to an array, therefore accessing the array - * depends on reading multi. However, even in this case, - * we must insure that the pointer is read _before_ the array - * data. Same as rcu_dereference, but we need a full smp_rmb() - * in the fast path, so put the explicit barrier here. - */ - smp_read_barrier_depends(); - for (i = 0; multi[i].func; i++) { - va_start(args, call_private); - multi[i].func(multi[i].probe_private, call_private, - mdata->format, &args); - va_end(args); - } - } - rcu_read_unlock_sched_notrace(); -} -EXPORT_SYMBOL_GPL(marker_probe_cb); - -/* - * marker_probe_cb Callback that does not prepare the variable argument list. - * @mdata: pointer of type struct marker - * @call_private: caller site private data - * @...: Variable argument list. - * - * Should be connected to markers "MARK_NOARGS". - */ -static notrace void marker_probe_cb_noarg(const struct marker *mdata, - void *call_private, ...) -{ - va_list args; /* not initialized */ - char ptype; - - rcu_read_lock_sched_notrace(); - ptype = mdata->ptype; - if (likely(!ptype)) { - marker_probe_func *func; - /* Must read the ptype before ptr. They are not data dependant, - * so we put an explicit smp_rmb() here. */ - smp_rmb(); - func = mdata->single.func; - /* Must read the ptr before private data. They are not data - * dependant, so we put an explicit smp_rmb() here. */ - smp_rmb(); - func(mdata->single.probe_private, call_private, mdata->format, - &args); - } else { - struct marker_probe_closure *multi; - int i; - /* - * Read mdata->ptype before mdata->multi. - */ - smp_rmb(); - multi = mdata->multi; - /* - * multi points to an array, therefore accessing the array - * depends on reading multi. However, even in this case, - * we must insure that the pointer is read _before_ the array - * data. Same as rcu_dereference, but we need a full smp_rmb() - * in the fast path, so put the explicit barrier here. - */ - smp_read_barrier_depends(); - for (i = 0; multi[i].func; i++) - multi[i].func(multi[i].probe_private, call_private, - mdata->format, &args); - } - rcu_read_unlock_sched_notrace(); -} - -static void free_old_closure(struct rcu_head *head) -{ - struct marker_entry *entry = container_of(head, - struct marker_entry, rcu); - kfree(entry->oldptr); - /* Make sure we free the data before setting the pending flag to 0 */ - smp_wmb(); - entry->rcu_pending = 0; -} - -static void debug_print_probes(struct marker_entry *entry) -{ - int i; - - if (!marker_debug) - return; - - if (!entry->ptype) { - printk(KERN_DEBUG "Single probe : %p %p\n", - entry->single.func, - entry->single.probe_private); - } else { - for (i = 0; entry->multi[i].func; i++) - printk(KERN_DEBUG "Multi probe %d : %p %p\n", i, - entry->multi[i].func, - entry->multi[i].probe_private); - } -} - -static struct marker_probe_closure * -marker_entry_add_probe(struct marker_entry *entry, - marker_probe_func *probe, void *probe_private) -{ - int nr_probes = 0; - struct marker_probe_closure *old, *new; - - WARN_ON(!probe); - - debug_print_probes(entry); - old = entry->multi; - if (!entry->ptype) { - if (entry->single.func == probe && - entry->single.probe_private == probe_private) - return ERR_PTR(-EBUSY); - if (entry->single.func == __mark_empty_function) { - /* 0 -> 1 probes */ - entry->single.func = probe; - entry->single.probe_private = probe_private; - entry->refcount = 1; - entry->ptype = 0; - debug_print_probes(entry); - return NULL; - } else { - /* 1 -> 2 probes */ - nr_probes = 1; - old = NULL; - } - } else { - /* (N -> N+1), (N != 0, 1) probes */ - for (nr_probes = 0; old[nr_probes].func; nr_probes++) - if (old[nr_probes].func == probe - && old[nr_probes].probe_private - == probe_private) - return ERR_PTR(-EBUSY); - } - /* + 2 : one for new probe, one for NULL func */ - new = kzalloc((nr_probes + 2) * sizeof(struct marker_probe_closure), - GFP_KERNEL); - if (new == NULL) - return ERR_PTR(-ENOMEM); - if (!old) - new[0] = entry->single; - else - memcpy(new, old, - nr_probes * sizeof(struct marker_probe_closure)); - new[nr_probes].func = probe; - new[nr_probes].probe_private = probe_private; - entry->refcount = nr_probes + 1; - entry->multi = new; - entry->ptype = 1; - debug_print_probes(entry); - return old; -} - -static struct marker_probe_closure * -marker_entry_remove_probe(struct marker_entry *entry, - marker_probe_func *probe, void *probe_private) -{ - int nr_probes = 0, nr_del = 0, i; - struct marker_probe_closure *old, *new; - - old = entry->multi; - - debug_print_probes(entry); - if (!entry->ptype) { - /* 0 -> N is an error */ - WARN_ON(entry->single.func == __mark_empty_function); - /* 1 -> 0 probes */ - WARN_ON(probe && entry->single.func != probe); - WARN_ON(entry->single.probe_private != probe_private); - entry->single.func = __mark_empty_function; - entry->refcount = 0; - entry->ptype = 0; - debug_print_probes(entry); - return NULL; - } else { - /* (N -> M), (N > 1, M >= 0) probes */ - for (nr_probes = 0; old[nr_probes].func; nr_probes++) { - if ((!probe || old[nr_probes].func == probe) - && old[nr_probes].probe_private - == probe_private) - nr_del++; - } - } - - if (nr_probes - nr_del == 0) { - /* N -> 0, (N > 1) */ - entry->single.func = __mark_empty_function; - entry->refcount = 0; - entry->ptype = 0; - } else if (nr_probes - nr_del == 1) { - /* N -> 1, (N > 1) */ - for (i = 0; old[i].func; i++) - if ((probe && old[i].func != probe) || - old[i].probe_private != probe_private) - entry->single = old[i]; - entry->refcount = 1; - entry->ptype = 0; - } else { - int j = 0; - /* N -> M, (N > 1, M > 1) */ - /* + 1 for NULL */ - new = kzalloc((nr_probes - nr_del + 1) - * sizeof(struct marker_probe_closure), GFP_KERNEL); - if (new == NULL) - return ERR_PTR(-ENOMEM); - for (i = 0; old[i].func; i++) - if ((probe && old[i].func != probe) || - old[i].probe_private != probe_private) - new[j++] = old[i]; - entry->refcount = nr_probes - nr_del; - entry->ptype = 1; - entry->multi = new; - } - debug_print_probes(entry); - return old; -} - -/* - * Get marker if the marker is present in the marker hash table. - * Must be called with markers_mutex held. - * Returns NULL if not present. - */ -static struct marker_entry *get_marker(const char *name) -{ - struct hlist_head *head; - struct hlist_node *node; - struct marker_entry *e; - u32 hash = jhash(name, strlen(name), 0); - - head = &marker_table[hash & ((1 << MARKER_HASH_BITS)-1)]; - hlist_for_each_entry(e, node, head, hlist) { - if (!strcmp(name, e->name)) - return e; - } - return NULL; -} - -/* - * Add the marker to the marker hash table. Must be called with markers_mutex - * held. - */ -static struct marker_entry *add_marker(const char *name, const char *format) -{ - struct hlist_head *head; - struct hlist_node *node; - struct marker_entry *e; - size_t name_len = strlen(name) + 1; - size_t format_len = 0; - u32 hash = jhash(name, name_len-1, 0); - - if (format) - format_len = strlen(format) + 1; - head = &marker_table[hash & ((1 << MARKER_HASH_BITS)-1)]; - hlist_for_each_entry(e, node, head, hlist) { - if (!strcmp(name, e->name)) { - printk(KERN_NOTICE - "Marker %s busy\n", name); - return ERR_PTR(-EBUSY); /* Already there */ - } - } - /* - * Using kmalloc here to allocate a variable length element. Could - * cause some memory fragmentation if overused. - */ - e = kmalloc(sizeof(struct marker_entry) + name_len + format_len, - GFP_KERNEL); - if (!e) - return ERR_PTR(-ENOMEM); - memcpy(&e->name[0], name, name_len); - if (format) { - e->format = &e->name[name_len]; - memcpy(e->format, format, format_len); - if (strcmp(e->format, MARK_NOARGS) == 0) - e->call = marker_probe_cb_noarg; - else - e->call = marker_probe_cb; - trace_mark(core_marker_format, "name %s format %s", - e->name, e->format); - } else { - e->format = NULL; - e->call = marker_probe_cb; - } - e->single.func = __mark_empty_function; - e->single.probe_private = NULL; - e->multi = NULL; - e->ptype = 0; - e->format_allocated = 0; - e->refcount = 0; - e->rcu_pending = 0; - hlist_add_head(&e->hlist, head); - return e; -} - -/* - * Remove the marker from the marker hash table. Must be called with mutex_lock - * held. - */ -static int remove_marker(const char *name) -{ - struct hlist_head *head; - struct hlist_node *node; - struct marker_entry *e; - int found = 0; - size_t len = strlen(name) + 1; - u32 hash = jhash(name, len-1, 0); - - head = &marker_table[hash & ((1 << MARKER_HASH_BITS)-1)]; - hlist_for_each_entry(e, node, head, hlist) { - if (!strcmp(name, e->name)) { - found = 1; - break; - } - } - if (!found) - return -ENOENT; - if (e->single.func != __mark_empty_function) - return -EBUSY; - hlist_del(&e->hlist); - if (e->format_allocated) - kfree(e->format); - /* Make sure the call_rcu has been executed */ - if (e->rcu_pending) - rcu_barrier_sched(); - kfree(e); - return 0; -} - -/* - * Set the mark_entry format to the format found in the element. - */ -static int marker_set_format(struct marker_entry *entry, const char *format) -{ - entry->format = kstrdup(format, GFP_KERNEL); - if (!entry->format) - return -ENOMEM; - entry->format_allocated = 1; - - trace_mark(core_marker_format, "name %s format %s", - entry->name, entry->format); - return 0; -} - -/* - * Sets the probe callback corresponding to one marker. - */ -static int set_marker(struct marker_entry *entry, struct marker *elem, - int active) -{ - int ret = 0; - WARN_ON(strcmp(entry->name, elem->name) != 0); - - if (entry->format) { - if (strcmp(entry->format, elem->format) != 0) { - printk(KERN_NOTICE - "Format mismatch for probe %s " - "(%s), marker (%s)\n", - entry->name, - entry->format, - elem->format); - return -EPERM; - } - } else { - ret = marker_set_format(entry, elem->format); - if (ret) - return ret; - } - - /* - * probe_cb setup (statically known) is done here. It is - * asynchronous with the rest of execution, therefore we only - * pass from a "safe" callback (with argument) to an "unsafe" - * callback (does not set arguments). - */ - elem->call = entry->call; - /* - * Sanity check : - * We only update the single probe private data when the ptr is - * set to a _non_ single probe! (0 -> 1 and N -> 1, N != 1) - */ - WARN_ON(elem->single.func != __mark_empty_function - && elem->single.probe_private != entry->single.probe_private - && !elem->ptype); - elem->single.probe_private = entry->single.probe_private; - /* - * Make sure the private data is valid when we update the - * single probe ptr. - */ - smp_wmb(); - elem->single.func = entry->single.func; - /* - * We also make sure that the new probe callbacks array is consistent - * before setting a pointer to it. - */ - rcu_assign_pointer(elem->multi, entry->multi); - /* - * Update the function or multi probe array pointer before setting the - * ptype. - */ - smp_wmb(); - elem->ptype = entry->ptype; - - if (elem->tp_name && (active ^ elem->state)) { - WARN_ON(!elem->tp_cb); - /* - * It is ok to directly call the probe registration because type - * checking has been done in the __trace_mark_tp() macro. - */ - - if (active) { - /* - * try_module_get should always succeed because we hold - * lock_module() to get the tp_cb address. - */ - ret = try_module_get(__module_text_address( - (unsigned long)elem->tp_cb)); - BUG_ON(!ret); - ret = tracepoint_probe_register_noupdate( - elem->tp_name, - elem->tp_cb); - } else { - ret = tracepoint_probe_unregister_noupdate( - elem->tp_name, - elem->tp_cb); - /* - * tracepoint_probe_update_all() must be called - * before the module containing tp_cb is unloaded. - */ - module_put(__module_text_address( - (unsigned long)elem->tp_cb)); - } - } - elem->state = active; - - return ret; -} - -/* - * Disable a marker and its probe callback. - * Note: only waiting an RCU period after setting elem->call to the empty - * function insures that the original callback is not used anymore. This insured - * by rcu_read_lock_sched around the call site. - */ -static void disable_marker(struct marker *elem) -{ - int ret; - - /* leave "call" as is. It is known statically. */ - if (elem->tp_name && elem->state) { - WARN_ON(!elem->tp_cb); - /* - * It is ok to directly call the probe registration because type - * checking has been done in the __trace_mark_tp() macro. - */ - ret = tracepoint_probe_unregister_noupdate(elem->tp_name, - elem->tp_cb); - WARN_ON(ret); - /* - * tracepoint_probe_update_all() must be called - * before the module containing tp_cb is unloaded. - */ - module_put(__module_text_address((unsigned long)elem->tp_cb)); - } - elem->state = 0; - elem->single.func = __mark_empty_function; - /* Update the function before setting the ptype */ - smp_wmb(); - elem->ptype = 0; /* single probe */ - /* - * Leave the private data and id there, because removal is racy and - * should be done only after an RCU period. These are never used until - * the next initialization anyway. - */ -} - -/** - * marker_update_probe_range - Update a probe range - * @begin: beginning of the range - * @end: end of the range - * - * Updates the probe callback corresponding to a range of markers. - */ -void marker_update_probe_range(struct marker *begin, - struct marker *end) -{ - struct marker *iter; - struct marker_entry *mark_entry; - - mutex_lock(&markers_mutex); - for (iter = begin; iter < end; iter++) { - mark_entry = get_marker(iter->name); - if (mark_entry) { - set_marker(mark_entry, iter, !!mark_entry->refcount); - /* - * ignore error, continue - */ - } else { - disable_marker(iter); - } - } - mutex_unlock(&markers_mutex); -} - -/* - * Update probes, removing the faulty probes. - * - * Internal callback only changed before the first probe is connected to it. - * Single probe private data can only be changed on 0 -> 1 and 2 -> 1 - * transitions. All other transitions will leave the old private data valid. - * This makes the non-atomicity of the callback/private data updates valid. - * - * "special case" updates : - * 0 -> 1 callback - * 1 -> 0 callback - * 1 -> 2 callbacks - * 2 -> 1 callbacks - * Other updates all behave the same, just like the 2 -> 3 or 3 -> 2 updates. - * Site effect : marker_set_format may delete the marker entry (creating a - * replacement). - */ -static void marker_update_probes(void) -{ - /* Core kernel markers */ - marker_update_probe_range(__start___markers, __stop___markers); - /* Markers in modules. */ - module_update_markers(); - tracepoint_probe_update_all(); -} - -/** - * marker_probe_register - Connect a probe to a marker - * @name: marker name - * @format: format string - * @probe: probe handler - * @probe_private: probe private data - * - * private data must be a valid allocated memory address, or NULL. - * Returns 0 if ok, error value on error. - * The probe address must at least be aligned on the architecture pointer size. - */ -int marker_probe_register(const char *name, const char *format, - marker_probe_func *probe, void *probe_private) -{ - struct marker_entry *entry; - int ret = 0; - struct marker_probe_closure *old; - - mutex_lock(&markers_mutex); - entry = get_marker(name); - if (!entry) { - entry = add_marker(name, format); - if (IS_ERR(entry)) - ret = PTR_ERR(entry); - } else if (format) { - if (!entry->format) - ret = marker_set_format(entry, format); - else if (strcmp(entry->format, format)) - ret = -EPERM; - } - if (ret) - goto end; - - /* - * If we detect that a call_rcu is pending for this marker, - * make sure it's executed now. - */ - if (entry->rcu_pending) - rcu_barrier_sched(); - old = marker_entry_add_probe(entry, probe, probe_private); - if (IS_ERR(old)) { - ret = PTR_ERR(old); - goto end; - } - mutex_unlock(&markers_mutex); - marker_update_probes(); - mutex_lock(&markers_mutex); - entry = get_marker(name); - if (!entry) - goto end; - if (entry->rcu_pending) - rcu_barrier_sched(); - entry->oldptr = old; - entry->rcu_pending = 1; - /* write rcu_pending before calling the RCU callback */ - smp_wmb(); - call_rcu_sched(&entry->rcu, free_old_closure); -end: - mutex_unlock(&markers_mutex); - return ret; -} -EXPORT_SYMBOL_GPL(marker_probe_register); - -/** - * marker_probe_unregister - Disconnect a probe from a marker - * @name: marker name - * @probe: probe function pointer - * @probe_private: probe private data - * - * Returns the private data given to marker_probe_register, or an ERR_PTR(). - * We do not need to call a synchronize_sched to make sure the probes have - * finished running before doing a module unload, because the module unload - * itself uses stop_machine(), which insures that every preempt disabled section - * have finished. - */ -int marker_probe_unregister(const char *name, - marker_probe_func *probe, void *probe_private) -{ - struct marker_entry *entry; - struct marker_probe_closure *old; - int ret = -ENOENT; - - mutex_lock(&markers_mutex); - entry = get_marker(name); - if (!entry) - goto end; - if (entry->rcu_pending) - rcu_barrier_sched(); - old = marker_entry_remove_probe(entry, probe, probe_private); - mutex_unlock(&markers_mutex); - marker_update_probes(); - mutex_lock(&markers_mutex); - entry = get_marker(name); - if (!entry) - goto end; - if (entry->rcu_pending) - rcu_barrier_sched(); - entry->oldptr = old; - entry->rcu_pending = 1; - /* write rcu_pending before calling the RCU callback */ - smp_wmb(); - call_rcu_sched(&entry->rcu, free_old_closure); - remove_marker(name); /* Ignore busy error message */ - ret = 0; -end: - mutex_unlock(&markers_mutex); - return ret; -} -EXPORT_SYMBOL_GPL(marker_probe_unregister); - -static struct marker_entry * -get_marker_from_private_data(marker_probe_func *probe, void *probe_private) -{ - struct marker_entry *entry; - unsigned int i; - struct hlist_head *head; - struct hlist_node *node; - - for (i = 0; i < MARKER_TABLE_SIZE; i++) { - head = &marker_table[i]; - hlist_for_each_entry(entry, node, head, hlist) { - if (!entry->ptype) { - if (entry->single.func == probe - && entry->single.probe_private - == probe_private) - return entry; - } else { - struct marker_probe_closure *closure; - closure = entry->multi; - for (i = 0; closure[i].func; i++) { - if (closure[i].func == probe && - closure[i].probe_private - == probe_private) - return entry; - } - } - } - } - return NULL; -} - -/** - * marker_probe_unregister_private_data - Disconnect a probe from a marker - * @probe: probe function - * @probe_private: probe private data - * - * Unregister a probe by providing the registered private data. - * Only removes the first marker found in hash table. - * Return 0 on success or error value. - * We do not need to call a synchronize_sched to make sure the probes have - * finished running before doing a module unload, because the module unload - * itself uses stop_machine(), which insures that every preempt disabled section - * have finished. - */ -int marker_probe_unregister_private_data(marker_probe_func *probe, - void *probe_private) -{ - struct marker_entry *entry; - int ret = 0; - struct marker_probe_closure *old; - - mutex_lock(&markers_mutex); - entry = get_marker_from_private_data(probe, probe_private); - if (!entry) { - ret = -ENOENT; - goto end; - } - if (entry->rcu_pending) - rcu_barrier_sched(); - old = marker_entry_remove_probe(entry, NULL, probe_private); - mutex_unlock(&markers_mutex); - marker_update_probes(); - mutex_lock(&markers_mutex); - entry = get_marker_from_private_data(probe, probe_private); - if (!entry) - goto end; - if (entry->rcu_pending) - rcu_barrier_sched(); - entry->oldptr = old; - entry->rcu_pending = 1; - /* write rcu_pending before calling the RCU callback */ - smp_wmb(); - call_rcu_sched(&entry->rcu, free_old_closure); - remove_marker(entry->name); /* Ignore busy error message */ -end: - mutex_unlock(&markers_mutex); - return ret; -} -EXPORT_SYMBOL_GPL(marker_probe_unregister_private_data); - -/** - * marker_get_private_data - Get a marker's probe private data - * @name: marker name - * @probe: probe to match - * @num: get the nth matching probe's private data - * - * Returns the nth private data pointer (starting from 0) matching, or an - * ERR_PTR. - * Returns the private data pointer, or an ERR_PTR. - * The private data pointer should _only_ be dereferenced if the caller is the - * owner of the data, or its content could vanish. This is mostly used to - * confirm that a caller is the owner of a registered probe. - */ -void *marker_get_private_data(const char *name, marker_probe_func *probe, - int num) -{ - struct hlist_head *head; - struct hlist_node *node; - struct marker_entry *e; - size_t name_len = strlen(name) + 1; - u32 hash = jhash(name, name_len-1, 0); - int i; - - head = &marker_table[hash & ((1 << MARKER_HASH_BITS)-1)]; - hlist_for_each_entry(e, node, head, hlist) { - if (!strcmp(name, e->name)) { - if (!e->ptype) { - if (num == 0 && e->single.func == probe) - return e->single.probe_private; - } else { - struct marker_probe_closure *closure; - int match = 0; - closure = e->multi; - for (i = 0; closure[i].func; i++) { - if (closure[i].func != probe) - continue; - if (match++ == num) - return closure[i].probe_private; - } - } - break; - } - } - return ERR_PTR(-ENOENT); -} -EXPORT_SYMBOL_GPL(marker_get_private_data); - -#ifdef CONFIG_MODULES - -int marker_module_notify(struct notifier_block *self, - unsigned long val, void *data) -{ - struct module *mod = data; - - switch (val) { - case MODULE_STATE_COMING: - marker_update_probe_range(mod->markers, - mod->markers + mod->num_markers); - break; - case MODULE_STATE_GOING: - marker_update_probe_range(mod->markers, - mod->markers + mod->num_markers); - break; - } - return 0; -} - -struct notifier_block marker_module_nb = { - .notifier_call = marker_module_notify, - .priority = 0, -}; - -static int init_markers(void) -{ - return register_module_notifier(&marker_module_nb); -} -__initcall(init_markers); - -#endif /* CONFIG_MODULES */ diff --git a/kernel/module.c b/kernel/module.c index 05ce49ced8f..b6ee424245d 100644 --- a/kernel/module.c +++ b/kernel/module.c @@ -2237,10 +2237,6 @@ static noinline struct module *load_module(void __user *umod, sizeof(*mod->ctors), &mod->num_ctors); #endif -#ifdef CONFIG_MARKERS - mod->markers = section_objs(hdr, sechdrs, secstrings, "__markers", - sizeof(*mod->markers), &mod->num_markers); -#endif #ifdef CONFIG_TRACEPOINTS mod->tracepoints = section_objs(hdr, sechdrs, secstrings, "__tracepoints", @@ -2958,20 +2954,6 @@ void module_layout(struct module *mod, EXPORT_SYMBOL(module_layout); #endif -#ifdef CONFIG_MARKERS -void module_update_markers(void) -{ - struct module *mod; - - mutex_lock(&module_mutex); - list_for_each_entry(mod, &modules, list) - if (!mod->taints) - marker_update_probe_range(mod->markers, - mod->markers + mod->num_markers); - mutex_unlock(&module_mutex); -} -#endif - #ifdef CONFIG_TRACEPOINTS void module_update_tracepoints(void) { diff --git a/kernel/trace/trace_printk.c b/kernel/trace/trace_printk.c index 687699d365a..2547d8813cf 100644 --- a/kernel/trace/trace_printk.c +++ b/kernel/trace/trace_printk.c @@ -11,7 +11,6 @@ #include #include #include -#include #include #include #include diff --git a/samples/Kconfig b/samples/Kconfig index 428b065ba69..b92bde3c6a8 100644 --- a/samples/Kconfig +++ b/samples/Kconfig @@ -7,12 +7,6 @@ menuconfig SAMPLES if SAMPLES -config SAMPLE_MARKERS - tristate "Build markers examples -- loadable modules only" - depends on MARKERS && m - help - This build markers example modules. - config SAMPLE_TRACEPOINTS tristate "Build tracepoints examples -- loadable modules only" depends on TRACEPOINTS && m diff --git a/samples/Makefile b/samples/Makefile index 13e4b470b53..43343a03b1f 100644 --- a/samples/Makefile +++ b/samples/Makefile @@ -1,3 +1,3 @@ # Makefile for Linux samples code -obj-$(CONFIG_SAMPLES) += markers/ kobject/ kprobes/ tracepoints/ trace_events/ +obj-$(CONFIG_SAMPLES) += kobject/ kprobes/ tracepoints/ trace_events/ diff --git a/samples/markers/Makefile b/samples/markers/Makefile deleted file mode 100644 index 6d7231265f0..00000000000 --- a/samples/markers/Makefile +++ /dev/null @@ -1,4 +0,0 @@ -# builds the kprobes example kernel modules; -# then to use one (as root): insmod - -obj-$(CONFIG_SAMPLE_MARKERS) += probe-example.o marker-example.o diff --git a/samples/markers/marker-example.c b/samples/markers/marker-example.c deleted file mode 100644 index e9cd9c0bc84..00000000000 --- a/samples/markers/marker-example.c +++ /dev/null @@ -1,53 +0,0 @@ -/* marker-example.c - * - * Executes a marker when /proc/marker-example is opened. - * - * (C) Copyright 2007 Mathieu Desnoyers - * - * This file is released under the GPLv2. - * See the file COPYING for more details. - */ - -#include -#include -#include -#include - -struct proc_dir_entry *pentry_example; - -static int my_open(struct inode *inode, struct file *file) -{ - int i; - - trace_mark(subsystem_event, "integer %d string %s", 123, - "example string"); - for (i = 0; i < 10; i++) - trace_mark(subsystem_eventb, MARK_NOARGS); - return -EPERM; -} - -static struct file_operations mark_ops = { - .open = my_open, -}; - -static int __init example_init(void) -{ - printk(KERN_ALERT "example init\n"); - pentry_example = proc_create("marker-example", 0444, NULL, &mark_ops); - if (!pentry_example) - return -EPERM; - return 0; -} - -static void __exit example_exit(void) -{ - printk(KERN_ALERT "example exit\n"); - remove_proc_entry("marker-example", NULL); -} - -module_init(example_init) -module_exit(example_exit) - -MODULE_LICENSE("GPL"); -MODULE_AUTHOR("Mathieu Desnoyers"); -MODULE_DESCRIPTION("Marker example"); diff --git a/samples/markers/probe-example.c b/samples/markers/probe-example.c deleted file mode 100644 index 2dfb3b32937..00000000000 --- a/samples/markers/probe-example.c +++ /dev/null @@ -1,92 +0,0 @@ -/* probe-example.c - * - * Connects two functions to marker call sites. - * - * (C) Copyright 2007 Mathieu Desnoyers - * - * This file is released under the GPLv2. - * See the file COPYING for more details. - */ - -#include -#include -#include -#include -#include - -struct probe_data { - const char *name; - const char *format; - marker_probe_func *probe_func; -}; - -void probe_subsystem_event(void *probe_data, void *call_data, - const char *format, va_list *args) -{ - /* Declare args */ - unsigned int value; - const char *mystr; - - /* Assign args */ - value = va_arg(*args, typeof(value)); - mystr = va_arg(*args, typeof(mystr)); - - /* Call printk */ - printk(KERN_INFO "Value %u, string %s\n", value, mystr); - - /* or count, check rights, serialize data in a buffer */ -} - -atomic_t eventb_count = ATOMIC_INIT(0); - -void probe_subsystem_eventb(void *probe_data, void *call_data, - const char *format, va_list *args) -{ - /* Increment counter */ - atomic_inc(&eventb_count); -} - -static struct probe_data probe_array[] = -{ - { .name = "subsystem_event", - .format = "integer %d string %s", - .probe_func = probe_subsystem_event }, - { .name = "subsystem_eventb", - .format = MARK_NOARGS, - .probe_func = probe_subsystem_eventb }, -}; - -static int __init probe_init(void) -{ - int result; - int i; - - for (i = 0; i < ARRAY_SIZE(probe_array); i++) { - result = marker_probe_register(probe_array[i].name, - probe_array[i].format, - probe_array[i].probe_func, &probe_array[i]); - if (result) - printk(KERN_INFO "Unable to register probe %s\n", - probe_array[i].name); - } - return 0; -} - -static void __exit probe_fini(void) -{ - int i; - - for (i = 0; i < ARRAY_SIZE(probe_array); i++) - marker_probe_unregister(probe_array[i].name, - probe_array[i].probe_func, &probe_array[i]); - printk(KERN_INFO "Number of event b : %u\n", - atomic_read(&eventb_count)); - marker_synchronize_unregister(); -} - -module_init(probe_init); -module_exit(probe_fini); - -MODULE_LICENSE("GPL"); -MODULE_AUTHOR("Mathieu Desnoyers"); -MODULE_DESCRIPTION("SUBSYSTEM Probe"); diff --git a/scripts/Makefile.modpost b/scripts/Makefile.modpost index f4053dc7b5d..8f14c81abbc 100644 --- a/scripts/Makefile.modpost +++ b/scripts/Makefile.modpost @@ -13,7 +13,6 @@ # 2) modpost is then used to # 3) create one .mod.c file pr. module # 4) create one Module.symvers file with CRC for all exported symbols -# 4a) [CONFIG_MARKERS] create one Module.markers file listing defined markers # 5) compile all .mod.c files # 6) final link of the module to a file @@ -59,10 +58,6 @@ include scripts/Makefile.lib kernelsymfile := $(objtree)/Module.symvers modulesymfile := $(firstword $(KBUILD_EXTMOD))/Module.symvers -kernelmarkersfile := $(objtree)/Module.markers -modulemarkersfile := $(firstword $(KBUILD_EXTMOD))/Module.markers - -markersfile = $(if $(KBUILD_EXTMOD),$(modulemarkersfile),$(kernelmarkersfile)) # Step 1), find all modules listed in $(MODVERDIR)/ __modules := $(sort $(shell grep -h '\.ko' /dev/null $(wildcard $(MODVERDIR)/*.mod))) @@ -85,8 +80,6 @@ modpost = scripts/mod/modpost \ $(if $(KBUILD_EXTRA_SYMBOLS), $(patsubst %, -e %,$(KBUILD_EXTRA_SYMBOLS))) \ $(if $(KBUILD_EXTMOD),-o $(modulesymfile)) \ $(if $(CONFIG_DEBUG_SECTION_MISMATCH),,-S) \ - $(if $(CONFIG_MARKERS),-K $(kernelmarkersfile)) \ - $(if $(CONFIG_MARKERS),-M $(markersfile)) \ $(if $(KBUILD_EXTMOD)$(KBUILD_MODPOST_WARN),-w) \ $(if $(cross_build),-c) @@ -101,17 +94,12 @@ quiet_cmd_kernel-mod = MODPOST $@ cmd_kernel-mod = $(modpost) $@ vmlinux.o: FORCE - @rm -fr $(kernelmarkersfile) $(call cmd,kernel-mod) # Declare generated files as targets for modpost $(symverfile): __modpost ; $(modules:.ko=.mod.c): __modpost ; -ifdef CONFIG_MARKERS -$(markersfile): __modpost ; -endif - # Step 5), compile all *.mod.c files -- cgit v1.2.3-70-g09d2 From eb8cdec4a984fde123a91250dcc9e0bddf5eafdc Mon Sep 17 00:00:00 2001 From: Bernd Schmidt Date: Mon, 21 Sep 2009 17:03:57 -0700 Subject: nommu: add support for Memory Protection Units (MPU) Some architectures (like the Blackfin arch) implement some of the "simpler" features that one would expect out of a MMU such as memory protection. In our case, we actually get read/write/exec protection down to the page boundary so processes can't stomp on each other let alone the kernel. There is a performance decrease (which depends greatly on the workload) however as the hardware/software interaction was not optimized at design time. Signed-off-by: Bernd Schmidt Signed-off-by: Bryan Wu Signed-off-by: Mike Frysinger Acked-by: David Howells Acked-by: Greg Ungerer Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/module.c | 5 +++++ mm/nommu.c | 21 +++++++++++++++++++++ 2 files changed, 26 insertions(+) (limited to 'kernel/module.c') diff --git a/kernel/module.c b/kernel/module.c index b6ee424245d..e6bc4b28aa6 100644 --- a/kernel/module.c +++ b/kernel/module.c @@ -47,6 +47,7 @@ #include #include #include +#include #include #include #include @@ -1535,6 +1536,10 @@ static void free_module(struct module *mod) /* Finally, free the core (containing the module structure) */ module_free(mod, mod->module_core); + +#ifdef CONFIG_MPU + update_protections(current->mm); +#endif } void *__symbol_get(const char *symbol) diff --git a/mm/nommu.c b/mm/nommu.c index 2d02ca17ce1..1a4473faac4 100644 --- a/mm/nommu.c +++ b/mm/nommu.c @@ -33,6 +33,7 @@ #include #include #include +#include #include "internal.h" static inline __attribute__((format(printf, 1, 2))) @@ -622,6 +623,22 @@ static void put_nommu_region(struct vm_region *region) __put_nommu_region(region); } +/* + * update protection on a vma + */ +static void protect_vma(struct vm_area_struct *vma, unsigned long flags) +{ +#ifdef CONFIG_MPU + struct mm_struct *mm = vma->vm_mm; + long start = vma->vm_start & PAGE_MASK; + while (start < vma->vm_end) { + protect_page(mm, start, flags); + start += PAGE_SIZE; + } + update_protections(mm); +#endif +} + /* * add a VMA into a process's mm_struct in the appropriate place in the list * and tree and add to the address space's page tree also if not an anonymous @@ -641,6 +658,8 @@ static void add_vma_to_mm(struct mm_struct *mm, struct vm_area_struct *vma) mm->map_count++; vma->vm_mm = mm; + protect_vma(vma, vma->vm_flags); + /* add the VMA to the mapping */ if (vma->vm_file) { mapping = vma->vm_file->f_mapping; @@ -703,6 +722,8 @@ static void delete_vma_from_mm(struct vm_area_struct *vma) kenter("%p", vma); + protect_vma(vma, 0); + mm->map_count--; if (mm->mmap_cache == vma) mm->mmap_cache = NULL; -- cgit v1.2.3-70-g09d2 From 115e8a288252ef748f34f8b7c1115c563d702eda Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Mon, 21 Sep 2009 18:22:11 +0200 Subject: modules, tracing: Remove stale struct marker signature from module_layout() Linus reported this new build warning: kernel/module.c:2951: warning: ?struct marker? declared inside parameter list kernel/module.c:2951: warning: its scope is only this definition or declaration, which is probably not what you want Caused by: fc53776: tracing: Remove markers module_layout() is an artificial symbol with 'significant' symbols listed in its argument list so that it gets a proper argument types signature that modversions can pick up to decide whether a module is version-compatible or not. If these dont match then we wont even look at a module. Remove the stale marker symbol. Reported-by: Linus Torvalds LKML-Reference: Cc: Christoph Hellwig Cc: Mathieu Desnoyers Cc: Steven Rostedt Cc: Frederic Weisbecker Cc: Rusty Russell Signed-off-by: Ingo Molnar --- kernel/module.c | 1 - 1 file changed, 1 deletion(-) (limited to 'kernel/module.c') diff --git a/kernel/module.c b/kernel/module.c index b6ee424245d..392eb3defbc 100644 --- a/kernel/module.c +++ b/kernel/module.c @@ -2947,7 +2947,6 @@ void module_layout(struct module *mod, struct modversion_info *ver, struct kernel_param *kp, struct kernel_symbol *ks, - struct marker *marker, struct tracepoint *tp) { } -- cgit v1.2.3-70-g09d2 From 4a4962263f07d14660849ec134ee42b63e95ea9a Mon Sep 17 00:00:00 2001 From: Jan Beulich Date: Mon, 6 Jul 2009 14:50:42 +0100 Subject: module: reduce symbol table for loaded modules (v2) Discard all symbols not interesting for kallsyms use: absolute, section, and in the common case (!KALLSYMS_ALL) data ones. Signed-off-by: Jan Beulich Signed-off-by: Rusty Russell --- include/linux/module.h | 10 ++++-- kernel/module.c | 91 +++++++++++++++++++++++++++++++++++++++++++++++--- 2 files changed, 94 insertions(+), 7 deletions(-) (limited to 'kernel/module.c') diff --git a/include/linux/module.h b/include/linux/module.h index 1c755b2f937..1d3ccb173fd 100644 --- a/include/linux/module.h +++ b/include/linux/module.h @@ -308,9 +308,13 @@ struct module #endif #ifdef CONFIG_KALLSYMS - /* We keep the symbol and string tables for kallsyms. */ - Elf_Sym *symtab; - unsigned int num_symtab; + /* + * We keep the symbol and string tables for kallsyms. + * The core_* fields below are temporary, loader-only (they + * could really be discarded after module init). + */ + Elf_Sym *symtab, *core_symtab; + unsigned int num_symtab, core_num_syms; char *strtab; /* Section attributes */ diff --git a/kernel/module.c b/kernel/module.c index e6bc4b28aa6..97f4d5e1553 100644 --- a/kernel/module.c +++ b/kernel/module.c @@ -1862,13 +1862,68 @@ static char elf_type(const Elf_Sym *sym, return '?'; } +static bool is_core_symbol(const Elf_Sym *src, const Elf_Shdr *sechdrs, + unsigned int shnum) +{ + const Elf_Shdr *sec; + + if (src->st_shndx == SHN_UNDEF + || src->st_shndx >= shnum + || !src->st_name) + return false; + + sec = sechdrs + src->st_shndx; + if (!(sec->sh_flags & SHF_ALLOC) +#ifndef CONFIG_KALLSYMS_ALL + || !(sec->sh_flags & SHF_EXECINSTR) +#endif + || (sec->sh_entsize & INIT_OFFSET_MASK)) + return false; + + return true; +} + +static unsigned long layout_symtab(struct module *mod, + Elf_Shdr *sechdrs, + unsigned int symindex, + const Elf_Ehdr *hdr, + const char *secstrings) +{ + unsigned long symoffs; + Elf_Shdr *symsect = sechdrs + symindex; + const Elf_Sym *src; + unsigned int i, nsrc, ndst; + + /* Put symbol section at end of init part of module. */ + symsect->sh_flags |= SHF_ALLOC; + symsect->sh_entsize = get_offset(mod, &mod->init_size, symsect, + symindex) | INIT_OFFSET_MASK; + DEBUGP("\t%s\n", secstrings + symsect->sh_name); + + src = (void *)hdr + symsect->sh_offset; + nsrc = symsect->sh_size / sizeof(*src); + for (ndst = i = 1; i < nsrc; ++i, ++src) + if (is_core_symbol(src, sechdrs, hdr->e_shnum)) + ++ndst; + + /* Append room for core symbols at end of core part. */ + symoffs = ALIGN(mod->core_size, symsect->sh_addralign ?: 1); + mod->core_size = symoffs + ndst * sizeof(Elf_Sym); + + return symoffs; +} + static void add_kallsyms(struct module *mod, Elf_Shdr *sechdrs, + unsigned int shnum, unsigned int symindex, unsigned int strindex, + unsigned long symoffs, const char *secstrings) { - unsigned int i; + unsigned int i, ndst; + const Elf_Sym *src; + Elf_Sym *dst; mod->symtab = (void *)sechdrs[symindex].sh_addr; mod->num_symtab = sechdrs[symindex].sh_size / sizeof(Elf_Sym); @@ -1878,12 +1933,32 @@ static void add_kallsyms(struct module *mod, for (i = 0; i < mod->num_symtab; i++) mod->symtab[i].st_info = elf_type(&mod->symtab[i], sechdrs, secstrings, mod); + + mod->core_symtab = dst = mod->module_core + symoffs; + src = mod->symtab; + *dst = *src; + for (ndst = i = 1; i < mod->num_symtab; ++i, ++src) { + if (!is_core_symbol(src, sechdrs, shnum)) + continue; + dst[ndst] = *src; + ++ndst; + } + mod->core_num_syms = ndst; } #else +static inline unsigned long layout_symtab(struct module *mod, + Elf_Shdr *sechdrs, + unsigned int symindex, + const Elf_Hdr *hdr, + const char *secstrings) +{ +} static inline void add_kallsyms(struct module *mod, Elf_Shdr *sechdrs, + unsigned int shnum, unsigned int symindex, unsigned int strindex, + unsigned long symoffs, const char *secstrings) { } @@ -1959,6 +2034,9 @@ static noinline struct module *load_module(void __user *umod, struct module *mod; long err = 0; void *percpu = NULL, *ptr = NULL; /* Stops spurious gcc warning */ +#ifdef CONFIG_KALLSYMS + unsigned long symoffs; +#endif mm_segment_t old_fs; DEBUGP("load_module: umod=%p, len=%lu, uargs=%p\n", @@ -2041,8 +2119,7 @@ static noinline struct module *load_module(void __user *umod, sechdrs[infoindex].sh_flags &= ~(unsigned long)SHF_ALLOC; sechdrs[versindex].sh_flags &= ~(unsigned long)SHF_ALLOC; #ifdef CONFIG_KALLSYMS - /* Keep symbol and string tables for decoding later. */ - sechdrs[symindex].sh_flags |= SHF_ALLOC; + /* Keep string table for decoding later. */ sechdrs[strindex].sh_flags |= SHF_ALLOC; #endif @@ -2109,6 +2186,7 @@ static noinline struct module *load_module(void __user *umod, this is done generically; there doesn't appear to be any special cases for the architectures. */ layout_sections(mod, hdr, sechdrs, secstrings); + symoffs = layout_symtab(mod, sechdrs, symindex, hdr, secstrings); /* Do the allocs. */ ptr = module_alloc_update_bounds(mod->core_size); @@ -2313,7 +2391,8 @@ static noinline struct module *load_module(void __user *umod, percpu_modcopy(mod->percpu, (void *)sechdrs[pcpuindex].sh_addr, sechdrs[pcpuindex].sh_size); - add_kallsyms(mod, sechdrs, symindex, strindex, secstrings); + add_kallsyms(mod, sechdrs, hdr->e_shnum, symindex, strindex, + symoffs, secstrings); if (!mod->taints) { struct _ddebug *debug; @@ -2491,6 +2570,10 @@ SYSCALL_DEFINE3(init_module, void __user *, umod, /* Drop initial reference. */ module_put(mod); trim_init_extable(mod); +#ifdef CONFIG_KALLSYMS + mod->num_symtab = mod->core_num_syms; + mod->symtab = mod->core_symtab; +#endif module_free(mod, mod->module_init); mod->module_init = NULL; mod->init_size = 0; -- cgit v1.2.3-70-g09d2 From 554bdfe5acf3715e87c8d5e25a4f9a896ac9f014 Mon Sep 17 00:00:00 2001 From: Jan Beulich Date: Mon, 6 Jul 2009 14:51:44 +0100 Subject: module: reduce string table for loaded modules (v2) Also remove all parts of the string table (referenced by the symbol table) that are not needed for kallsyms use (i.e. which were only referenced by symbols discarded by the previous patch, or not referenced at all for whatever reason). Signed-off-by: Jan Beulich Signed-off-by: Rusty Russell --- include/linux/module.h | 2 +- kernel/module.c | 68 +++++++++++++++++++++++++++++++++++++++++--------- 2 files changed, 57 insertions(+), 13 deletions(-) (limited to 'kernel/module.c') diff --git a/include/linux/module.h b/include/linux/module.h index 1d3ccb173fd..aca98036595 100644 --- a/include/linux/module.h +++ b/include/linux/module.h @@ -315,7 +315,7 @@ struct module */ Elf_Sym *symtab, *core_symtab; unsigned int num_symtab, core_num_syms; - char *strtab; + char *strtab, *core_strtab; /* Section attributes */ struct module_sect_attrs *sect_attrs; diff --git a/kernel/module.c b/kernel/module.c index 97f4d5e1553..39827c3d948 100644 --- a/kernel/module.c +++ b/kernel/module.c @@ -1886,12 +1886,17 @@ static bool is_core_symbol(const Elf_Sym *src, const Elf_Shdr *sechdrs, static unsigned long layout_symtab(struct module *mod, Elf_Shdr *sechdrs, unsigned int symindex, + unsigned int strindex, const Elf_Ehdr *hdr, - const char *secstrings) + const char *secstrings, + unsigned long *pstroffs, + unsigned long *strmap) { unsigned long symoffs; Elf_Shdr *symsect = sechdrs + symindex; + Elf_Shdr *strsect = sechdrs + strindex; const Elf_Sym *src; + const char *strtab; unsigned int i, nsrc, ndst; /* Put symbol section at end of init part of module. */ @@ -1902,14 +1907,31 @@ static unsigned long layout_symtab(struct module *mod, src = (void *)hdr + symsect->sh_offset; nsrc = symsect->sh_size / sizeof(*src); + strtab = (void *)hdr + strsect->sh_offset; for (ndst = i = 1; i < nsrc; ++i, ++src) - if (is_core_symbol(src, sechdrs, hdr->e_shnum)) + if (is_core_symbol(src, sechdrs, hdr->e_shnum)) { + unsigned int j = src->st_name; + + while(!__test_and_set_bit(j, strmap) && strtab[j]) + ++j; ++ndst; + } /* Append room for core symbols at end of core part. */ symoffs = ALIGN(mod->core_size, symsect->sh_addralign ?: 1); mod->core_size = symoffs + ndst * sizeof(Elf_Sym); + /* Put string table section at end of init part of module. */ + strsect->sh_flags |= SHF_ALLOC; + strsect->sh_entsize = get_offset(mod, &mod->init_size, strsect, + strindex) | INIT_OFFSET_MASK; + DEBUGP("\t%s\n", secstrings + strsect->sh_name); + + /* Append room for core symbols' strings at end of core part. */ + *pstroffs = mod->core_size; + __set_bit(0, strmap); + mod->core_size += bitmap_weight(strmap, strsect->sh_size); + return symoffs; } @@ -1919,11 +1941,14 @@ static void add_kallsyms(struct module *mod, unsigned int symindex, unsigned int strindex, unsigned long symoffs, - const char *secstrings) + unsigned long stroffs, + const char *secstrings, + unsigned long *strmap) { unsigned int i, ndst; const Elf_Sym *src; Elf_Sym *dst; + char *s; mod->symtab = (void *)sechdrs[symindex].sh_addr; mod->num_symtab = sechdrs[symindex].sh_size / sizeof(Elf_Sym); @@ -1941,16 +1966,25 @@ static void add_kallsyms(struct module *mod, if (!is_core_symbol(src, sechdrs, shnum)) continue; dst[ndst] = *src; + dst[ndst].st_name = bitmap_weight(strmap, dst[ndst].st_name); ++ndst; } mod->core_num_syms = ndst; + + mod->core_strtab = s = mod->module_core + stroffs; + for (*s = 0, i = 1; i < sechdrs[strindex].sh_size; ++i) + if (test_bit(i, strmap)) + *++s = mod->strtab[i]; } #else static inline unsigned long layout_symtab(struct module *mod, Elf_Shdr *sechdrs, unsigned int symindex, + unsigned int strindex, const Elf_Hdr *hdr, - const char *secstrings) + const char *secstrings, + unsigned long *pstroffs, + unsigned long *strmap) { } static inline void add_kallsyms(struct module *mod, @@ -1959,7 +1993,9 @@ static inline void add_kallsyms(struct module *mod, unsigned int symindex, unsigned int strindex, unsigned long symoffs, - const char *secstrings) + unsigned long stroffs, + const char *secstrings, + const unsigned long *strmap) { } #endif /* CONFIG_KALLSYMS */ @@ -2035,7 +2071,7 @@ static noinline struct module *load_module(void __user *umod, long err = 0; void *percpu = NULL, *ptr = NULL; /* Stops spurious gcc warning */ #ifdef CONFIG_KALLSYMS - unsigned long symoffs; + unsigned long symoffs, stroffs, *strmap; #endif mm_segment_t old_fs; @@ -2118,10 +2154,6 @@ static noinline struct module *load_module(void __user *umod, /* Don't keep modinfo and version sections. */ sechdrs[infoindex].sh_flags &= ~(unsigned long)SHF_ALLOC; sechdrs[versindex].sh_flags &= ~(unsigned long)SHF_ALLOC; -#ifdef CONFIG_KALLSYMS - /* Keep string table for decoding later. */ - sechdrs[strindex].sh_flags |= SHF_ALLOC; -#endif /* Check module struct version now, before we try to use module. */ if (!check_modstruct_version(sechdrs, versindex, mod)) { @@ -2157,6 +2189,13 @@ static noinline struct module *load_module(void __user *umod, goto free_hdr; } + strmap = kzalloc(BITS_TO_LONGS(sechdrs[strindex].sh_size) + * sizeof(long), GFP_KERNEL); + if (!strmap) { + err = -ENOMEM; + goto free_mod; + } + if (find_module(mod->name)) { err = -EEXIST; goto free_mod; @@ -2186,7 +2225,8 @@ static noinline struct module *load_module(void __user *umod, this is done generically; there doesn't appear to be any special cases for the architectures. */ layout_sections(mod, hdr, sechdrs, secstrings); - symoffs = layout_symtab(mod, sechdrs, symindex, hdr, secstrings); + symoffs = layout_symtab(mod, sechdrs, symindex, strindex, hdr, + secstrings, &stroffs, strmap); /* Do the allocs. */ ptr = module_alloc_update_bounds(mod->core_size); @@ -2392,7 +2432,9 @@ static noinline struct module *load_module(void __user *umod, sechdrs[pcpuindex].sh_size); add_kallsyms(mod, sechdrs, hdr->e_shnum, symindex, strindex, - symoffs, secstrings); + symoffs, stroffs, secstrings, strmap); + kfree(strmap); + strmap = NULL; if (!mod->taints) { struct _ddebug *debug; @@ -2481,6 +2523,7 @@ static noinline struct module *load_module(void __user *umod, percpu_modfree(percpu); free_mod: kfree(args); + kfree(strmap); free_hdr: vfree(hdr); return ERR_PTR(err); @@ -2573,6 +2616,7 @@ SYSCALL_DEFINE3(init_module, void __user *, umod, #ifdef CONFIG_KALLSYMS mod->num_symtab = mod->core_num_syms; mod->symtab = mod->core_symtab; + mod->strtab = mod->core_strtab; #endif module_free(mod, mod->module_init); mod->module_init = NULL; -- cgit v1.2.3-70-g09d2 From a263f7763c364015f92e7c097fa46c6673f6fcb0 Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Fri, 25 Sep 2009 00:32:58 -0600 Subject: module: fix memory leak when load fails after srcversion/version allocated Normally the twisty paths of sysfs will free the attributes, but not if we fail before we hook it into sysfs (which is the last thing we do in load_module). (This sysfs code is a turd, no doubt there are other issues lurking too). Reported-by: Tetsuo Handa Signed-off-by: Rusty Russell Cc: Catalin Marinas Tested-by: Tetsuo Handa --- kernel/module.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) (limited to 'kernel/module.c') diff --git a/kernel/module.c b/kernel/module.c index 39827c3d948..c54f10d90e1 100644 --- a/kernel/module.c +++ b/kernel/module.c @@ -1797,6 +1797,17 @@ static void setup_modinfo(struct module *mod, Elf_Shdr *sechdrs, } } +static void free_modinfo(struct module *mod) +{ + struct module_attribute *attr; + int i; + + for (i = 0; (attr = modinfo_attrs[i]); i++) { + if (attr->free) + attr->free(mod); + } +} + #ifdef CONFIG_KALLSYMS /* lookup symbol in given range of kernel_symbols */ @@ -2506,6 +2517,7 @@ static noinline struct module *load_module(void __user *umod, synchronize_sched(); module_arch_cleanup(mod); cleanup: + free_modinfo(mod); kobject_del(&mod->mkobj.kobj); kobject_put(&mod->mkobj.kobj); free_unload: -- cgit v1.2.3-70-g09d2 From ffa9f12a41ec117207e8d953f90b9c179546c8d7 Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Fri, 25 Sep 2009 00:32:59 -0600 Subject: module: don't call percpu_modfree on NULL pointer. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The general one handles NULL, the static obsolescent (CONFIG_HAVE_LEGACY_PER_CPU_AREA) one in module.c doesn't; Eric's commit 720eba31 assumed it did, and various frobbings since then kept that assumption. All other callers in module.c all protect it with an if; this effectively does the same as free_init is only goto if we fail percpu_modalloc(). Reported-by: Kamalesh Babulal Signed-off-by: Rusty Russell Cc: Eric Dumazet Cc: Masami Hiramatsu Cc: Américo Wang Tested-by: Kamalesh Babulal --- kernel/module.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'kernel/module.c') diff --git a/kernel/module.c b/kernel/module.c index c54f10d90e1..5a29397ca4b 100644 --- a/kernel/module.c +++ b/kernel/module.c @@ -2523,8 +2523,8 @@ static noinline struct module *load_module(void __user *umod, free_unload: module_unload_free(mod); #if defined(CONFIG_MODULE_UNLOAD) && defined(CONFIG_SMP) - free_init: percpu_modfree(mod->refptr); + free_init: #endif module_free(mod, mod->module_init); free_core: -- cgit v1.2.3-70-g09d2 From 3ae91c21dd29e413f4111978152c14061f0984b0 Mon Sep 17 00:00:00 2001 From: Paul Mundt Date: Thu, 1 Oct 2009 15:43:54 -0700 Subject: module: fix up CONFIG_KALLSYMS=n build. Starting from commit 4a4962263f07d14660849ec134ee42b63e95ea9a "reduce symbol table for loaded modules (v2)", the kernel/module.c build is broken with CONFIG_KALLSYMS disabled. CC kernel/module.o kernel/module.c:1995: warning: type defaults to 'int' in declaration of 'Elf_Hdr' kernel/module.c:1995: error: expected ';', ',' or ')' before '*' token kernel/module.c: In function 'load_module': kernel/module.c:2203: error: 'strmap' undeclared (first use in this function) kernel/module.c:2203: error: (Each undeclared identifier is reported only once kernel/module.c:2203: error: for each function it appears in.) kernel/module.c:2239: error: 'symoffs' undeclared (first use in this function) kernel/module.c:2239: error: implicit declaration of function 'layout_symtab' kernel/module.c:2240: error: 'stroffs' undeclared (first use in this function) make[1]: *** [kernel/module.o] Error 1 make: *** [kernel/module.o] Error 2 There are three different issues: - layout_symtab() takes a const Elf_Ehdr - layout_symtab() needs to return a value - symoffs/stroffs/strmap are referenced by the load_module() code despite being ifdefed out, which seems unnecessary given the noop behaviour of layout_symtab()/add_kallsyms() in the case of CONFIG_KALLSYMS=n. Signed-off-by: Paul Mundt Acked-by: Jan Beulich Acked-by: Rusty Russell Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/module.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) (limited to 'kernel/module.c') diff --git a/kernel/module.c b/kernel/module.c index fe748a86d45..8b7d8805819 100644 --- a/kernel/module.c +++ b/kernel/module.c @@ -1992,12 +1992,14 @@ static inline unsigned long layout_symtab(struct module *mod, Elf_Shdr *sechdrs, unsigned int symindex, unsigned int strindex, - const Elf_Hdr *hdr, + const Elf_Ehdr *hdr, const char *secstrings, unsigned long *pstroffs, unsigned long *strmap) { + return 0; } + static inline void add_kallsyms(struct module *mod, Elf_Shdr *sechdrs, unsigned int shnum, @@ -2081,9 +2083,8 @@ static noinline struct module *load_module(void __user *umod, struct module *mod; long err = 0; void *percpu = NULL, *ptr = NULL; /* Stops spurious gcc warning */ -#ifdef CONFIG_KALLSYMS unsigned long symoffs, stroffs, *strmap; -#endif + mm_segment_t old_fs; DEBUGP("load_module: umod=%p, len=%lu, uargs=%p\n", -- cgit v1.2.3-70-g09d2 From 23fb064bb96f001ecb8682129f7ee1bc1ca691bc Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Tue, 21 Jul 2009 21:18:35 +0900 Subject: percpu: kill legacy percpu allocator With ia64 converted, there's no arch left which still uses legacy percpu allocator. Kill it. Signed-off-by: Tejun Heo Delightedly-acked-by: Rusty Russell Cc: Ingo Molnar Cc: Christoph Lameter --- include/linux/percpu.h | 24 ------- kernel/module.c | 150 ----------------------------------------- mm/Makefile | 4 -- mm/allocpercpu.c | 177 ------------------------------------------------- mm/percpu.c | 2 - 5 files changed, 357 deletions(-) delete mode 100644 mm/allocpercpu.c (limited to 'kernel/module.c') diff --git a/include/linux/percpu.h b/include/linux/percpu.h index 878836ca999..5baf5b8788f 100644 --- a/include/linux/percpu.h +++ b/include/linux/percpu.h @@ -34,8 +34,6 @@ #ifdef CONFIG_SMP -#ifndef CONFIG_HAVE_LEGACY_PER_CPU_AREA - /* minimum unit size, also is the maximum supported allocation size */ #define PCPU_MIN_UNIT_SIZE PFN_ALIGN(64 << 10) @@ -130,28 +128,6 @@ extern int __init pcpu_page_first_chunk(size_t reserved_size, #define per_cpu_ptr(ptr, cpu) SHIFT_PERCPU_PTR((ptr), per_cpu_offset((cpu))) extern void *__alloc_reserved_percpu(size_t size, size_t align); - -#else /* CONFIG_HAVE_LEGACY_PER_CPU_AREA */ - -struct percpu_data { - void *ptrs[1]; -}; - -/* pointer disguising messes up the kmemleak objects tracking */ -#ifndef CONFIG_DEBUG_KMEMLEAK -#define __percpu_disguise(pdata) (struct percpu_data *)~(unsigned long)(pdata) -#else -#define __percpu_disguise(pdata) (struct percpu_data *)(pdata) -#endif - -#define per_cpu_ptr(ptr, cpu) \ -({ \ - struct percpu_data *__p = __percpu_disguise(ptr); \ - (__typeof__(ptr))__p->ptrs[(cpu)]; \ -}) - -#endif /* CONFIG_HAVE_LEGACY_PER_CPU_AREA */ - extern void *__alloc_percpu(size_t size, size_t align); extern void free_percpu(void *__pdata); diff --git a/kernel/module.c b/kernel/module.c index 8b7d8805819..64787cddeb5 100644 --- a/kernel/module.c +++ b/kernel/module.c @@ -370,8 +370,6 @@ EXPORT_SYMBOL_GPL(find_module); #ifdef CONFIG_SMP -#ifndef CONFIG_HAVE_LEGACY_PER_CPU_AREA - static void *percpu_modalloc(unsigned long size, unsigned long align, const char *name) { @@ -395,154 +393,6 @@ static void percpu_modfree(void *freeme) free_percpu(freeme); } -#else /* ... CONFIG_HAVE_LEGACY_PER_CPU_AREA */ - -/* Number of blocks used and allocated. */ -static unsigned int pcpu_num_used, pcpu_num_allocated; -/* Size of each block. -ve means used. */ -static int *pcpu_size; - -static int split_block(unsigned int i, unsigned short size) -{ - /* Reallocation required? */ - if (pcpu_num_used + 1 > pcpu_num_allocated) { - int *new; - - new = krealloc(pcpu_size, sizeof(new[0])*pcpu_num_allocated*2, - GFP_KERNEL); - if (!new) - return 0; - - pcpu_num_allocated *= 2; - pcpu_size = new; - } - - /* Insert a new subblock */ - memmove(&pcpu_size[i+1], &pcpu_size[i], - sizeof(pcpu_size[0]) * (pcpu_num_used - i)); - pcpu_num_used++; - - pcpu_size[i+1] -= size; - pcpu_size[i] = size; - return 1; -} - -static inline unsigned int block_size(int val) -{ - if (val < 0) - return -val; - return val; -} - -static void *percpu_modalloc(unsigned long size, unsigned long align, - const char *name) -{ - unsigned long extra; - unsigned int i; - void *ptr; - int cpu; - - if (align > PAGE_SIZE) { - printk(KERN_WARNING "%s: per-cpu alignment %li > %li\n", - name, align, PAGE_SIZE); - align = PAGE_SIZE; - } - - ptr = __per_cpu_start; - for (i = 0; i < pcpu_num_used; ptr += block_size(pcpu_size[i]), i++) { - /* Extra for alignment requirement. */ - extra = ALIGN((unsigned long)ptr, align) - (unsigned long)ptr; - BUG_ON(i == 0 && extra != 0); - - if (pcpu_size[i] < 0 || pcpu_size[i] < extra + size) - continue; - - /* Transfer extra to previous block. */ - if (pcpu_size[i-1] < 0) - pcpu_size[i-1] -= extra; - else - pcpu_size[i-1] += extra; - pcpu_size[i] -= extra; - ptr += extra; - - /* Split block if warranted */ - if (pcpu_size[i] - size > sizeof(unsigned long)) - if (!split_block(i, size)) - return NULL; - - /* add the per-cpu scanning areas */ - for_each_possible_cpu(cpu) - kmemleak_alloc(ptr + per_cpu_offset(cpu), size, 0, - GFP_KERNEL); - - /* Mark allocated */ - pcpu_size[i] = -pcpu_size[i]; - return ptr; - } - - printk(KERN_WARNING "Could not allocate %lu bytes percpu data\n", - size); - return NULL; -} - -static void percpu_modfree(void *freeme) -{ - unsigned int i; - void *ptr = __per_cpu_start + block_size(pcpu_size[0]); - int cpu; - - /* First entry is core kernel percpu data. */ - for (i = 1; i < pcpu_num_used; ptr += block_size(pcpu_size[i]), i++) { - if (ptr == freeme) { - pcpu_size[i] = -pcpu_size[i]; - goto free; - } - } - BUG(); - - free: - /* remove the per-cpu scanning areas */ - for_each_possible_cpu(cpu) - kmemleak_free(freeme + per_cpu_offset(cpu)); - - /* Merge with previous? */ - if (pcpu_size[i-1] >= 0) { - pcpu_size[i-1] += pcpu_size[i]; - pcpu_num_used--; - memmove(&pcpu_size[i], &pcpu_size[i+1], - (pcpu_num_used - i) * sizeof(pcpu_size[0])); - i--; - } - /* Merge with next? */ - if (i+1 < pcpu_num_used && pcpu_size[i+1] >= 0) { - pcpu_size[i] += pcpu_size[i+1]; - pcpu_num_used--; - memmove(&pcpu_size[i+1], &pcpu_size[i+2], - (pcpu_num_used - (i+1)) * sizeof(pcpu_size[0])); - } -} - -static int percpu_modinit(void) -{ - pcpu_num_used = 2; - pcpu_num_allocated = 2; - pcpu_size = kmalloc(sizeof(pcpu_size[0]) * pcpu_num_allocated, - GFP_KERNEL); - /* Static in-kernel percpu data (used). */ - pcpu_size[0] = -(__per_cpu_end-__per_cpu_start); - /* Free room. */ - pcpu_size[1] = PERCPU_ENOUGH_ROOM + pcpu_size[0]; - if (pcpu_size[1] < 0) { - printk(KERN_ERR "No per-cpu room for modules.\n"); - pcpu_num_used = 1; - } - - return 0; -} -__initcall(percpu_modinit); - -#endif /* CONFIG_HAVE_LEGACY_PER_CPU_AREA */ - static unsigned int find_pcpusec(Elf_Ehdr *hdr, Elf_Shdr *sechdrs, const char *secstrings) diff --git a/mm/Makefile b/mm/Makefile index ebf849042ed..82131d0f8d8 100644 --- a/mm/Makefile +++ b/mm/Makefile @@ -34,11 +34,7 @@ obj-$(CONFIG_FAILSLAB) += failslab.o obj-$(CONFIG_MEMORY_HOTPLUG) += memory_hotplug.o obj-$(CONFIG_FS_XIP) += filemap_xip.o obj-$(CONFIG_MIGRATION) += migrate.o -ifndef CONFIG_HAVE_LEGACY_PER_CPU_AREA obj-$(CONFIG_SMP) += percpu.o -else -obj-$(CONFIG_SMP) += allocpercpu.o -endif obj-$(CONFIG_QUICKLIST) += quicklist.o obj-$(CONFIG_CGROUP_MEM_RES_CTLR) += memcontrol.o page_cgroup.o obj-$(CONFIG_MEMORY_FAILURE) += memory-failure.o diff --git a/mm/allocpercpu.c b/mm/allocpercpu.c deleted file mode 100644 index df34ceae0c6..00000000000 --- a/mm/allocpercpu.c +++ /dev/null @@ -1,177 +0,0 @@ -/* - * linux/mm/allocpercpu.c - * - * Separated from slab.c August 11, 2006 Christoph Lameter - */ -#include -#include -#include -#include - -#ifndef cache_line_size -#define cache_line_size() L1_CACHE_BYTES -#endif - -/** - * percpu_depopulate - depopulate per-cpu data for given cpu - * @__pdata: per-cpu data to depopulate - * @cpu: depopulate per-cpu data for this cpu - * - * Depopulating per-cpu data for a cpu going offline would be a typical - * use case. You need to register a cpu hotplug handler for that purpose. - */ -static void percpu_depopulate(void *__pdata, int cpu) -{ - struct percpu_data *pdata = __percpu_disguise(__pdata); - - kfree(pdata->ptrs[cpu]); - pdata->ptrs[cpu] = NULL; -} - -/** - * percpu_depopulate_mask - depopulate per-cpu data for some cpu's - * @__pdata: per-cpu data to depopulate - * @mask: depopulate per-cpu data for cpu's selected through mask bits - */ -static void __percpu_depopulate_mask(void *__pdata, const cpumask_t *mask) -{ - int cpu; - for_each_cpu_mask_nr(cpu, *mask) - percpu_depopulate(__pdata, cpu); -} - -#define percpu_depopulate_mask(__pdata, mask) \ - __percpu_depopulate_mask((__pdata), &(mask)) - -/** - * percpu_populate - populate per-cpu data for given cpu - * @__pdata: per-cpu data to populate further - * @size: size of per-cpu object - * @gfp: may sleep or not etc. - * @cpu: populate per-data for this cpu - * - * Populating per-cpu data for a cpu coming online would be a typical - * use case. You need to register a cpu hotplug handler for that purpose. - * Per-cpu object is populated with zeroed buffer. - */ -static void *percpu_populate(void *__pdata, size_t size, gfp_t gfp, int cpu) -{ - struct percpu_data *pdata = __percpu_disguise(__pdata); - int node = cpu_to_node(cpu); - - /* - * We should make sure each CPU gets private memory. - */ - size = roundup(size, cache_line_size()); - - BUG_ON(pdata->ptrs[cpu]); - if (node_online(node)) - pdata->ptrs[cpu] = kmalloc_node(size, gfp|__GFP_ZERO, node); - else - pdata->ptrs[cpu] = kzalloc(size, gfp); - return pdata->ptrs[cpu]; -} - -/** - * percpu_populate_mask - populate per-cpu data for more cpu's - * @__pdata: per-cpu data to populate further - * @size: size of per-cpu object - * @gfp: may sleep or not etc. - * @mask: populate per-cpu data for cpu's selected through mask bits - * - * Per-cpu objects are populated with zeroed buffers. - */ -static int __percpu_populate_mask(void *__pdata, size_t size, gfp_t gfp, - cpumask_t *mask) -{ - cpumask_t populated; - int cpu; - - cpus_clear(populated); - for_each_cpu_mask_nr(cpu, *mask) - if (unlikely(!percpu_populate(__pdata, size, gfp, cpu))) { - __percpu_depopulate_mask(__pdata, &populated); - return -ENOMEM; - } else - cpu_set(cpu, populated); - return 0; -} - -#define percpu_populate_mask(__pdata, size, gfp, mask) \ - __percpu_populate_mask((__pdata), (size), (gfp), &(mask)) - -/** - * alloc_percpu - initial setup of per-cpu data - * @size: size of per-cpu object - * @align: alignment - * - * Allocate dynamic percpu area. Percpu objects are populated with - * zeroed buffers. - */ -void *__alloc_percpu(size_t size, size_t align) -{ - /* - * We allocate whole cache lines to avoid false sharing - */ - size_t sz = roundup(nr_cpu_ids * sizeof(void *), cache_line_size()); - void *pdata = kzalloc(sz, GFP_KERNEL); - void *__pdata = __percpu_disguise(pdata); - - /* - * Can't easily make larger alignment work with kmalloc. WARN - * on it. Larger alignment should only be used for module - * percpu sections on SMP for which this path isn't used. - */ - WARN_ON_ONCE(align > SMP_CACHE_BYTES); - - if (unlikely(!pdata)) - return NULL; - if (likely(!__percpu_populate_mask(__pdata, size, GFP_KERNEL, - &cpu_possible_map))) - return __pdata; - kfree(pdata); - return NULL; -} -EXPORT_SYMBOL_GPL(__alloc_percpu); - -/** - * free_percpu - final cleanup of per-cpu data - * @__pdata: object to clean up - * - * We simply clean up any per-cpu object left. No need for the client to - * track and specify through a bis mask which per-cpu objects are to free. - */ -void free_percpu(void *__pdata) -{ - if (unlikely(!__pdata)) - return; - __percpu_depopulate_mask(__pdata, cpu_possible_mask); - kfree(__percpu_disguise(__pdata)); -} -EXPORT_SYMBOL_GPL(free_percpu); - -/* - * Generic percpu area setup. - */ -#ifndef CONFIG_HAVE_SETUP_PER_CPU_AREA -unsigned long __per_cpu_offset[NR_CPUS] __read_mostly; - -EXPORT_SYMBOL(__per_cpu_offset); - -void __init setup_per_cpu_areas(void) -{ - unsigned long size, i; - char *ptr; - unsigned long nr_possible_cpus = num_possible_cpus(); - - /* Copy section for each CPU (we discard the original) */ - size = ALIGN(PERCPU_ENOUGH_ROOM, PAGE_SIZE); - ptr = alloc_bootmem_pages(size * nr_possible_cpus); - - for_each_possible_cpu(i) { - __per_cpu_offset[i] = ptr - __per_cpu_start; - memcpy(ptr, __per_cpu_start, __per_cpu_end - __per_cpu_start); - ptr += size; - } -} -#endif /* CONFIG_HAVE_SETUP_PER_CPU_AREA */ diff --git a/mm/percpu.c b/mm/percpu.c index 4a048abad04..e4e08b87b77 100644 --- a/mm/percpu.c +++ b/mm/percpu.c @@ -46,8 +46,6 @@ * * To use this allocator, arch code should do the followings. * - * - drop CONFIG_HAVE_LEGACY_PER_CPU_AREA - * * - define __addr_to_pcpu_ptr() and __pcpu_ptr_to_addr() to translate * regular address to percpu pointer and back if they need to be * different from the default -- cgit v1.2.3-70-g09d2 From c017b4be3e84176cab10eca5e6c4faeb8cfc6f3e Mon Sep 17 00:00:00 2001 From: Catalin Marinas Date: Wed, 28 Oct 2009 13:33:09 +0000 Subject: kmemleak: Simplify the kmemleak_scan_area() function prototype This function was taking non-necessary arguments which can be determined by kmemleak. The patch also modifies the calling sites. Signed-off-by: Catalin Marinas Cc: Pekka Enberg Cc: Christoph Lameter Cc: Rusty Russell --- include/linux/kmemleak.h | 6 ++---- kernel/module.c | 7 ++----- mm/kmemleak.c | 49 +++++++++++++++++++++--------------------------- mm/slab.c | 4 ++-- 4 files changed, 27 insertions(+), 39 deletions(-) (limited to 'kernel/module.c') diff --git a/include/linux/kmemleak.h b/include/linux/kmemleak.h index 3c7497d46ee..99d9a6766f7 100644 --- a/include/linux/kmemleak.h +++ b/include/linux/kmemleak.h @@ -32,8 +32,7 @@ extern void kmemleak_padding(const void *ptr, unsigned long offset, size_t size) __ref; extern void kmemleak_not_leak(const void *ptr) __ref; extern void kmemleak_ignore(const void *ptr) __ref; -extern void kmemleak_scan_area(const void *ptr, unsigned long offset, - size_t length, gfp_t gfp) __ref; +extern void kmemleak_scan_area(const void *ptr, size_t size, gfp_t gfp) __ref; extern void kmemleak_no_scan(const void *ptr) __ref; static inline void kmemleak_alloc_recursive(const void *ptr, size_t size, @@ -84,8 +83,7 @@ static inline void kmemleak_not_leak(const void *ptr) static inline void kmemleak_ignore(const void *ptr) { } -static inline void kmemleak_scan_area(const void *ptr, unsigned long offset, - size_t length, gfp_t gfp) +static inline void kmemleak_scan_area(const void *ptr, size_t size, gfp_t gfp) { } static inline void kmemleak_erase(void **ptr) diff --git a/kernel/module.c b/kernel/module.c index 8b7d8805819..1eb95209707 100644 --- a/kernel/module.c +++ b/kernel/module.c @@ -2043,9 +2043,7 @@ static void kmemleak_load_module(struct module *mod, Elf_Ehdr *hdr, unsigned int i; /* only scan the sections containing data */ - kmemleak_scan_area(mod->module_core, (unsigned long)mod - - (unsigned long)mod->module_core, - sizeof(struct module), GFP_KERNEL); + kmemleak_scan_area(mod, sizeof(struct module), GFP_KERNEL); for (i = 1; i < hdr->e_shnum; i++) { if (!(sechdrs[i].sh_flags & SHF_ALLOC)) @@ -2054,8 +2052,7 @@ static void kmemleak_load_module(struct module *mod, Elf_Ehdr *hdr, && strncmp(secstrings + sechdrs[i].sh_name, ".bss", 4) != 0) continue; - kmemleak_scan_area(mod->module_core, sechdrs[i].sh_addr - - (unsigned long)mod->module_core, + kmemleak_scan_area((void *)sechdrs[i].sh_addr, sechdrs[i].sh_size, GFP_KERNEL); } } diff --git a/mm/kmemleak.c b/mm/kmemleak.c index 8bf765c4f58..96106358e04 100644 --- a/mm/kmemleak.c +++ b/mm/kmemleak.c @@ -119,8 +119,8 @@ /* scanning area inside a memory block */ struct kmemleak_scan_area { struct hlist_node node; - unsigned long offset; - size_t length; + unsigned long start; + size_t size; }; #define KMEMLEAK_GREY 0 @@ -241,8 +241,6 @@ struct early_log { const void *ptr; /* allocated/freed memory block */ size_t size; /* memory block size */ int min_count; /* minimum reference count */ - unsigned long offset; /* scan area offset */ - size_t length; /* scan area length */ unsigned long trace[MAX_TRACE]; /* stack trace */ unsigned int trace_len; /* stack trace length */ }; @@ -720,14 +718,13 @@ static void make_black_object(unsigned long ptr) * Add a scanning area to the object. If at least one such area is added, * kmemleak will only scan these ranges rather than the whole memory block. */ -static void add_scan_area(unsigned long ptr, unsigned long offset, - size_t length, gfp_t gfp) +static void add_scan_area(unsigned long ptr, size_t size, gfp_t gfp) { unsigned long flags; struct kmemleak_object *object; struct kmemleak_scan_area *area; - object = find_and_get_object(ptr, 0); + object = find_and_get_object(ptr, 1); if (!object) { kmemleak_warn("Adding scan area to unknown object at 0x%08lx\n", ptr); @@ -741,7 +738,7 @@ static void add_scan_area(unsigned long ptr, unsigned long offset, } spin_lock_irqsave(&object->lock, flags); - if (offset + length > object->size) { + if (ptr + size > object->pointer + object->size) { kmemleak_warn("Scan area larger than object 0x%08lx\n", ptr); dump_object_info(object); kmem_cache_free(scan_area_cache, area); @@ -749,8 +746,8 @@ static void add_scan_area(unsigned long ptr, unsigned long offset, } INIT_HLIST_NODE(&area->node); - area->offset = offset; - area->length = length; + area->start = ptr; + area->size = size; hlist_add_head(&area->node, &object->area_list); out_unlock: @@ -786,7 +783,7 @@ static void object_no_scan(unsigned long ptr) * processed later once kmemleak is fully initialized. */ static void __init log_early(int op_type, const void *ptr, size_t size, - int min_count, unsigned long offset, size_t length) + int min_count) { unsigned long flags; struct early_log *log; @@ -808,8 +805,6 @@ static void __init log_early(int op_type, const void *ptr, size_t size, log->ptr = ptr; log->size = size; log->min_count = min_count; - log->offset = offset; - log->length = length; if (op_type == KMEMLEAK_ALLOC) log->trace_len = __save_stack_trace(log->trace); crt_early_log++; @@ -858,7 +853,7 @@ void __ref kmemleak_alloc(const void *ptr, size_t size, int min_count, if (atomic_read(&kmemleak_enabled) && ptr && !IS_ERR(ptr)) create_object((unsigned long)ptr, size, min_count, gfp); else if (atomic_read(&kmemleak_early_log)) - log_early(KMEMLEAK_ALLOC, ptr, size, min_count, 0, 0); + log_early(KMEMLEAK_ALLOC, ptr, size, min_count); } EXPORT_SYMBOL_GPL(kmemleak_alloc); @@ -873,7 +868,7 @@ void __ref kmemleak_free(const void *ptr) if (atomic_read(&kmemleak_enabled) && ptr && !IS_ERR(ptr)) delete_object_full((unsigned long)ptr); else if (atomic_read(&kmemleak_early_log)) - log_early(KMEMLEAK_FREE, ptr, 0, 0, 0, 0); + log_early(KMEMLEAK_FREE, ptr, 0, 0); } EXPORT_SYMBOL_GPL(kmemleak_free); @@ -888,7 +883,7 @@ void __ref kmemleak_free_part(const void *ptr, size_t size) if (atomic_read(&kmemleak_enabled) && ptr && !IS_ERR(ptr)) delete_object_part((unsigned long)ptr, size); else if (atomic_read(&kmemleak_early_log)) - log_early(KMEMLEAK_FREE_PART, ptr, size, 0, 0, 0); + log_early(KMEMLEAK_FREE_PART, ptr, size, 0); } EXPORT_SYMBOL_GPL(kmemleak_free_part); @@ -903,7 +898,7 @@ void __ref kmemleak_not_leak(const void *ptr) if (atomic_read(&kmemleak_enabled) && ptr && !IS_ERR(ptr)) make_gray_object((unsigned long)ptr); else if (atomic_read(&kmemleak_early_log)) - log_early(KMEMLEAK_NOT_LEAK, ptr, 0, 0, 0, 0); + log_early(KMEMLEAK_NOT_LEAK, ptr, 0, 0); } EXPORT_SYMBOL(kmemleak_not_leak); @@ -919,22 +914,21 @@ void __ref kmemleak_ignore(const void *ptr) if (atomic_read(&kmemleak_enabled) && ptr && !IS_ERR(ptr)) make_black_object((unsigned long)ptr); else if (atomic_read(&kmemleak_early_log)) - log_early(KMEMLEAK_IGNORE, ptr, 0, 0, 0, 0); + log_early(KMEMLEAK_IGNORE, ptr, 0, 0); } EXPORT_SYMBOL(kmemleak_ignore); /* * Limit the range to be scanned in an allocated memory block. */ -void __ref kmemleak_scan_area(const void *ptr, unsigned long offset, - size_t length, gfp_t gfp) +void __ref kmemleak_scan_area(const void *ptr, size_t size, gfp_t gfp) { pr_debug("%s(0x%p)\n", __func__, ptr); if (atomic_read(&kmemleak_enabled) && ptr && !IS_ERR(ptr)) - add_scan_area((unsigned long)ptr, offset, length, gfp); + add_scan_area((unsigned long)ptr, size, gfp); else if (atomic_read(&kmemleak_early_log)) - log_early(KMEMLEAK_SCAN_AREA, ptr, 0, 0, offset, length); + log_early(KMEMLEAK_SCAN_AREA, ptr, size, 0); } EXPORT_SYMBOL(kmemleak_scan_area); @@ -948,7 +942,7 @@ void __ref kmemleak_no_scan(const void *ptr) if (atomic_read(&kmemleak_enabled) && ptr && !IS_ERR(ptr)) object_no_scan((unsigned long)ptr); else if (atomic_read(&kmemleak_early_log)) - log_early(KMEMLEAK_NO_SCAN, ptr, 0, 0, 0, 0); + log_early(KMEMLEAK_NO_SCAN, ptr, 0, 0); } EXPORT_SYMBOL(kmemleak_no_scan); @@ -1075,9 +1069,9 @@ static void scan_object(struct kmemleak_object *object) } } else hlist_for_each_entry(area, elem, &object->area_list, node) - scan_block((void *)(object->pointer + area->offset), - (void *)(object->pointer + area->offset - + area->length), object, 0); + scan_block((void *)area->start, + (void *)(area->start + area->size), + object, 0); out: spin_unlock_irqrestore(&object->lock, flags); } @@ -1642,8 +1636,7 @@ void __init kmemleak_init(void) kmemleak_ignore(log->ptr); break; case KMEMLEAK_SCAN_AREA: - kmemleak_scan_area(log->ptr, log->offset, log->length, - GFP_KERNEL); + kmemleak_scan_area(log->ptr, log->size, GFP_KERNEL); break; case KMEMLEAK_NO_SCAN: kmemleak_no_scan(log->ptr); diff --git a/mm/slab.c b/mm/slab.c index 646db308519..d2713a944eb 100644 --- a/mm/slab.c +++ b/mm/slab.c @@ -2584,8 +2584,8 @@ static struct slab *alloc_slabmgmt(struct kmem_cache *cachep, void *objp, * kmemleak does not treat the ->s_mem pointer as a reference * to the object. Otherwise we will not report the leak. */ - kmemleak_scan_area(slabp, offsetof(struct slab, list), - sizeof(struct list_head), local_flags); + kmemleak_scan_area(&slabp->list, sizeof(struct list_head), + local_flags); if (!slabp) return NULL; } else { -- cgit v1.2.3-70-g09d2 From a6f5aa1ea05686ad6e84593a00a04161e6dfb3a3 Mon Sep 17 00:00:00 2001 From: Catalin Marinas Date: Wed, 28 Oct 2009 13:33:10 +0000 Subject: kmemleak: Scan the _ftrace_events section in modules This section contains pointers to allocated objects and not scanning it leads to false positives. Reported-by: Zdenek Kabelac Acked-by: Rusty Russell Signed-off-by: Catalin Marinas --- kernel/module.c | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'kernel/module.c') diff --git a/kernel/module.c b/kernel/module.c index 1eb95209707..dd29ba43c34 100644 --- a/kernel/module.c +++ b/kernel/module.c @@ -2380,6 +2380,12 @@ static noinline struct module *load_module(void __user *umod, "_ftrace_events", sizeof(*mod->trace_events), &mod->num_trace_events); + /* + * This section contains pointers to allocated objects in the trace + * code and not scanning it leads to false positives. + */ + kmemleak_scan_area(mod->trace_events, sizeof(*mod->trace_events) * + mod->num_trace_events, GFP_KERNEL); #endif #ifdef CONFIG_FTRACE_MCOUNT_RECORD /* sechdrs[0].sh_size is always zero */ -- cgit v1.2.3-70-g09d2 From 35dead4235e2b67da7275b4122fed37099c2f462 Mon Sep 17 00:00:00 2001 From: Helge Deller Date: Thu, 3 Dec 2009 00:29:15 +0100 Subject: modules: don't export section names of empty sections via sysfs On the parisc architecture we face for each and every loaded kernel module this kernel "badness warning": sysfs: cannot create duplicate filename '/module/ac97_bus/sections/.text' Badness at fs/sysfs/dir.c:487 Reason for that is, that on parisc all kernel modules do have multiple .text sections due to the usage of the -ffunction-sections compiler flag which is needed to reach all jump targets on this platform. An objdump on such a kernel module gives: Sections: Idx Name Size VMA LMA File off Algn 0 .note.gnu.build-id 00000024 00000000 00000000 00000034 2**2 CONTENTS, ALLOC, LOAD, READONLY, DATA 1 .text 00000000 00000000 00000000 00000058 2**0 CONTENTS, ALLOC, LOAD, READONLY, CODE 2 .text.ac97_bus_match 0000001c 00000000 00000000 00000058 2**2 CONTENTS, ALLOC, LOAD, READONLY, CODE 3 .text 00000000 00000000 00000000 000000d4 2**0 CONTENTS, ALLOC, LOAD, READONLY, CODE ... Since the .text sections are empty (size of 0 bytes) and won't be loaded by the kernel module loader anyway, I don't see a reason why such sections need to be listed under /sys/module//sections/ either. The attached patch does solve this issue by not exporting section names which are empty. This fixes bugzilla http://bugzilla.kernel.org/show_bug.cgi?id=14703 Signed-off-by: Helge Deller CC: rusty@rustcorp.com.au CC: akpm@linux-foundation.org CC: James.Bottomley@HansenPartnership.com CC: roland@redhat.com CC: dave@hiauly1.hia.nrc.ca Signed-off-by: Linus Torvalds --- kernel/module.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'kernel/module.c') diff --git a/kernel/module.c b/kernel/module.c index 8b7d8805819..5842a71cf05 100644 --- a/kernel/module.c +++ b/kernel/module.c @@ -1187,7 +1187,8 @@ static void add_sect_attrs(struct module *mod, unsigned int nsect, /* Count loaded sections and allocate structures */ for (i = 0; i < nsect; i++) - if (sechdrs[i].sh_flags & SHF_ALLOC) + if (sechdrs[i].sh_flags & SHF_ALLOC + && sechdrs[i].sh_size) nloaded++; size[0] = ALIGN(sizeof(*sect_attrs) + nloaded * sizeof(sect_attrs->attrs[0]), @@ -1207,6 +1208,8 @@ static void add_sect_attrs(struct module *mod, unsigned int nsect, for (i = 0; i < nsect; i++) { if (! (sechdrs[i].sh_flags & SHF_ALLOC)) continue; + if (!sechdrs[i].sh_size) + continue; sattr->address = sechdrs[i].sh_addr; sattr->name = kstrdup(secstrings + sechdrs[i].sh_name, GFP_KERNEL); -- cgit v1.2.3-70-g09d2 From d4703aefdbc8f9f347f6dcefcddd791294314eb7 Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Tue, 15 Dec 2009 16:28:32 -0600 Subject: module: handle ppc64 relocating kcrctabs when CONFIG_RELOCATABLE=y powerpc applies relocations to the kcrctab. They're absolute symbols, but it's not completely unreasonable: other archs may too, but the relocation is often 0. http://lists.ozlabs.org/pipermail/linuxppc-dev/2009-November/077972.html Inspired-by: Neil Horman Signed-off-by: Rusty Russell Tested-by: Neil Horman Acked-by: Paul Mackerras --- arch/powerpc/include/asm/module.h | 5 +++++ arch/powerpc/kernel/vmlinux.lds.S | 3 +++ kernel/module.c | 28 +++++++++++++++++++++------- 3 files changed, 29 insertions(+), 7 deletions(-) (limited to 'kernel/module.c') diff --git a/arch/powerpc/include/asm/module.h b/arch/powerpc/include/asm/module.h index 08454880a2c..0192a4ee2bc 100644 --- a/arch/powerpc/include/asm/module.h +++ b/arch/powerpc/include/asm/module.h @@ -87,5 +87,10 @@ struct exception_table_entry; void sort_ex_table(struct exception_table_entry *start, struct exception_table_entry *finish); +#ifdef CONFIG_MODVERSIONS +#define ARCH_RELOCATES_KCRCTAB + +extern const unsigned long reloc_start[]; +#endif #endif /* __KERNEL__ */ #endif /* _ASM_POWERPC_MODULE_H */ diff --git a/arch/powerpc/kernel/vmlinux.lds.S b/arch/powerpc/kernel/vmlinux.lds.S index 27735a7ac12..dcd01c82e70 100644 --- a/arch/powerpc/kernel/vmlinux.lds.S +++ b/arch/powerpc/kernel/vmlinux.lds.S @@ -38,6 +38,9 @@ jiffies = jiffies_64 + 4; #endif SECTIONS { + . = 0; + reloc_start = .; + . = KERNELBASE; /* diff --git a/kernel/module.c b/kernel/module.c index 12afc5a3ddd..a65dc787a27 100644 --- a/kernel/module.c +++ b/kernel/module.c @@ -880,11 +880,23 @@ static int try_to_force_load(struct module *mod, const char *reason) } #ifdef CONFIG_MODVERSIONS +/* If the arch applies (non-zero) relocations to kernel kcrctab, unapply it. */ +static unsigned long maybe_relocated(unsigned long crc, + const struct module *crc_owner) +{ +#ifdef ARCH_RELOCATES_KCRCTAB + if (crc_owner == NULL) + return crc - (unsigned long)reloc_start; +#endif + return crc; +} + static int check_version(Elf_Shdr *sechdrs, unsigned int versindex, const char *symname, struct module *mod, - const unsigned long *crc) + const unsigned long *crc, + const struct module *crc_owner) { unsigned int i, num_versions; struct modversion_info *versions; @@ -905,10 +917,10 @@ static int check_version(Elf_Shdr *sechdrs, if (strcmp(versions[i].name, symname) != 0) continue; - if (versions[i].crc == *crc) + if (versions[i].crc == maybe_relocated(*crc, crc_owner)) return 1; DEBUGP("Found checksum %lX vs module %lX\n", - *crc, versions[i].crc); + maybe_relocated(*crc, crc_owner), versions[i].crc); goto bad_version; } @@ -931,7 +943,8 @@ static inline int check_modstruct_version(Elf_Shdr *sechdrs, if (!find_symbol(MODULE_SYMBOL_PREFIX "module_layout", NULL, &crc, true, false)) BUG(); - return check_version(sechdrs, versindex, "module_layout", mod, crc); + return check_version(sechdrs, versindex, "module_layout", mod, crc, + NULL); } /* First part is kernel version, which we ignore if module has crcs. */ @@ -949,7 +962,8 @@ static inline int check_version(Elf_Shdr *sechdrs, unsigned int versindex, const char *symname, struct module *mod, - const unsigned long *crc) + const unsigned long *crc, + const struct module *crc_owner) { return 1; } @@ -984,8 +998,8 @@ static const struct kernel_symbol *resolve_symbol(Elf_Shdr *sechdrs, /* use_module can fail due to OOM, or module initialization or unloading */ if (sym) { - if (!check_version(sechdrs, versindex, name, mod, crc) || - !use_module(mod, owner)) + if (!check_version(sechdrs, versindex, name, mod, crc, owner) + || !use_module(mod, owner)) sym = NULL; } return sym; -- cgit v1.2.3-70-g09d2 From e1783a240f491fb233f04edc042e16b18a7a79ba Mon Sep 17 00:00:00 2001 From: Christoph Lameter Date: Tue, 5 Jan 2010 15:34:50 +0900 Subject: module: Use this_cpu_xx to dynamically allocate counters Use cpu ops to deal with the per cpu data instead of a local_t. Reduces memory requirements, cache footprint and decreases cycle counts. The this_cpu_xx operations are also used for !SMP mode. Otherwise we could not drop the use of __module_ref_addr() which would make per cpu data handling complicated. this_cpu_xx operations have their own fallback for !SMP. V8-V9: - Leave include asm/module.h since ringbuffer.c depends on it. Nothing else does though. Another patch will deal with that. - Remove spurious free. Signed-off-by: Christoph Lameter Acked-by: Rusty Russell Signed-off-by: Tejun Heo --- include/linux/module.h | 36 ++++++++++++++---------------------- kernel/module.c | 29 +++++++++++++++-------------- 2 files changed, 29 insertions(+), 36 deletions(-) (limited to 'kernel/module.c') diff --git a/include/linux/module.h b/include/linux/module.h index 6cb1a3cab5d..2302f09ea2d 100644 --- a/include/linux/module.h +++ b/include/linux/module.h @@ -18,6 +18,7 @@ #include #include +#include #include #include @@ -363,11 +364,9 @@ struct module /* Destruction function. */ void (*exit)(void); -#ifdef CONFIG_SMP - char *refptr; -#else - local_t ref; -#endif + struct module_ref { + int count; + } *refptr; #endif #ifdef CONFIG_CONSTRUCTORS @@ -454,25 +453,16 @@ void __symbol_put(const char *symbol); #define symbol_put(x) __symbol_put(MODULE_SYMBOL_PREFIX #x) void symbol_put_addr(void *addr); -static inline local_t *__module_ref_addr(struct module *mod, int cpu) -{ -#ifdef CONFIG_SMP - return (local_t *) (mod->refptr + per_cpu_offset(cpu)); -#else - return &mod->ref; -#endif -} - /* Sometimes we know we already have a refcount, and it's easier not to handle the error case (which only happens with rmmod --wait). */ static inline void __module_get(struct module *module) { if (module) { - unsigned int cpu = get_cpu(); - local_inc(__module_ref_addr(module, cpu)); + preempt_disable(); + __this_cpu_inc(module->refptr->count); trace_module_get(module, _THIS_IP_, - local_read(__module_ref_addr(module, cpu))); - put_cpu(); + __this_cpu_read(module->refptr->count)); + preempt_enable(); } } @@ -481,15 +471,17 @@ static inline int try_module_get(struct module *module) int ret = 1; if (module) { - unsigned int cpu = get_cpu(); + preempt_disable(); + if (likely(module_is_live(module))) { - local_inc(__module_ref_addr(module, cpu)); + __this_cpu_inc(module->refptr->count); trace_module_get(module, _THIS_IP_, - local_read(__module_ref_addr(module, cpu))); + __this_cpu_read(module->refptr->count)); } else ret = 0; - put_cpu(); + + preempt_enable(); } return ret; } diff --git a/kernel/module.c b/kernel/module.c index e96b8ed1cb6..9bf228052ec 100644 --- a/kernel/module.c +++ b/kernel/module.c @@ -474,9 +474,10 @@ static void module_unload_init(struct module *mod) INIT_LIST_HEAD(&mod->modules_which_use_me); for_each_possible_cpu(cpu) - local_set(__module_ref_addr(mod, cpu), 0); + per_cpu_ptr(mod->refptr, cpu)->count = 0; + /* Hold reference count during initialization. */ - local_set(__module_ref_addr(mod, raw_smp_processor_id()), 1); + __this_cpu_write(mod->refptr->count, 1); /* Backwards compatibility macros put refcount during init. */ mod->waiter = current; } @@ -619,7 +620,7 @@ unsigned int module_refcount(struct module *mod) int cpu; for_each_possible_cpu(cpu) - total += local_read(__module_ref_addr(mod, cpu)); + total += per_cpu_ptr(mod->refptr, cpu)->count; return total; } EXPORT_SYMBOL(module_refcount); @@ -796,14 +797,15 @@ static struct module_attribute refcnt = { void module_put(struct module *module) { if (module) { - unsigned int cpu = get_cpu(); - local_dec(__module_ref_addr(module, cpu)); + preempt_disable(); + __this_cpu_dec(module->refptr->count); + trace_module_put(module, _RET_IP_, - local_read(__module_ref_addr(module, cpu))); + __this_cpu_read(module->refptr->count)); /* Maybe they're waiting for us to drop reference? */ if (unlikely(!module_is_live(module))) wake_up_process(module->waiter); - put_cpu(); + preempt_enable(); } } EXPORT_SYMBOL(module_put); @@ -1394,9 +1396,9 @@ static void free_module(struct module *mod) kfree(mod->args); if (mod->percpu) percpu_modfree(mod->percpu); -#if defined(CONFIG_MODULE_UNLOAD) && defined(CONFIG_SMP) +#if defined(CONFIG_MODULE_UNLOAD) if (mod->refptr) - percpu_modfree(mod->refptr); + free_percpu(mod->refptr); #endif /* Free lock-classes: */ lockdep_free_key_range(mod->module_core, mod->core_size); @@ -2159,9 +2161,8 @@ static noinline struct module *load_module(void __user *umod, mod = (void *)sechdrs[modindex].sh_addr; kmemleak_load_module(mod, hdr, sechdrs, secstrings); -#if defined(CONFIG_MODULE_UNLOAD) && defined(CONFIG_SMP) - mod->refptr = percpu_modalloc(sizeof(local_t), __alignof__(local_t), - mod->name); +#if defined(CONFIG_MODULE_UNLOAD) + mod->refptr = alloc_percpu(struct module_ref); if (!mod->refptr) { err = -ENOMEM; goto free_init; @@ -2393,8 +2394,8 @@ static noinline struct module *load_module(void __user *umod, kobject_put(&mod->mkobj.kobj); free_unload: module_unload_free(mod); -#if defined(CONFIG_MODULE_UNLOAD) && defined(CONFIG_SMP) - percpu_modfree(mod->refptr); +#if defined(CONFIG_MODULE_UNLOAD) + free_percpu(mod->refptr); free_init: #endif module_free(mod, mod->module_init); -- cgit v1.2.3-70-g09d2 From 10b465aaf9536ee5a16652fa0700740183d48ec9 Mon Sep 17 00:00:00 2001 From: Ben Hutchings Date: Sat, 19 Dec 2009 14:43:01 +0000 Subject: modules: Skip empty sections when exporting section notes Commit 35dead4 "modules: don't export section names of empty sections via sysfs" changed the set of sections that have attributes, but did not change the iteration over these attributes in add_notes_attrs(). This can lead to add_notes_attrs() creating attributes with the wrong names or with null name pointers. Introduce a sect_empty() function and use it in both add_sect_attrs() and add_notes_attrs(). Reported-by: Martin Michlmayr Signed-off-by: Ben Hutchings Tested-by: Martin Michlmayr Cc: stable@kernel.org Signed-off-by: Rusty Russell Signed-off-by: Linus Torvalds --- kernel/module.c | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) (limited to 'kernel/module.c') diff --git a/kernel/module.c b/kernel/module.c index e96b8ed1cb6..f82386bd9ee 100644 --- a/kernel/module.c +++ b/kernel/module.c @@ -1010,6 +1010,12 @@ static const struct kernel_symbol *resolve_symbol(Elf_Shdr *sechdrs, * J. Corbet */ #if defined(CONFIG_KALLSYMS) && defined(CONFIG_SYSFS) + +static inline bool sect_empty(const Elf_Shdr *sect) +{ + return !(sect->sh_flags & SHF_ALLOC) || sect->sh_size == 0; +} + struct module_sect_attr { struct module_attribute mattr; @@ -1051,8 +1057,7 @@ static void add_sect_attrs(struct module *mod, unsigned int nsect, /* Count loaded sections and allocate structures */ for (i = 0; i < nsect; i++) - if (sechdrs[i].sh_flags & SHF_ALLOC - && sechdrs[i].sh_size) + if (!sect_empty(&sechdrs[i])) nloaded++; size[0] = ALIGN(sizeof(*sect_attrs) + nloaded * sizeof(sect_attrs->attrs[0]), @@ -1070,9 +1075,7 @@ static void add_sect_attrs(struct module *mod, unsigned int nsect, sattr = §_attrs->attrs[0]; gattr = §_attrs->grp.attrs[0]; for (i = 0; i < nsect; i++) { - if (! (sechdrs[i].sh_flags & SHF_ALLOC)) - continue; - if (!sechdrs[i].sh_size) + if (sect_empty(&sechdrs[i])) continue; sattr->address = sechdrs[i].sh_addr; sattr->name = kstrdup(secstrings + sechdrs[i].sh_name, @@ -1156,7 +1159,7 @@ static void add_notes_attrs(struct module *mod, unsigned int nsect, /* Count notes sections and allocate structures. */ notes = 0; for (i = 0; i < nsect; i++) - if ((sechdrs[i].sh_flags & SHF_ALLOC) && + if (!sect_empty(&sechdrs[i]) && (sechdrs[i].sh_type == SHT_NOTE)) ++notes; @@ -1172,7 +1175,7 @@ static void add_notes_attrs(struct module *mod, unsigned int nsect, notes_attrs->notes = notes; nattr = ¬es_attrs->attrs[0]; for (loaded = i = 0; i < nsect; ++i) { - if (!(sechdrs[i].sh_flags & SHF_ALLOC)) + if (sect_empty(&sechdrs[i])) continue; if (sechdrs[i].sh_type == SHT_NOTE) { nattr->attr.name = mod->sect_attrs->attrs[loaded].name; -- cgit v1.2.3-70-g09d2 From 3d9a854c2dac3e888393b23ba7adafcce4d6d4b9 Mon Sep 17 00:00:00 2001 From: Denys Vlasenko Date: Sat, 20 Feb 2010 01:03:43 +0100 Subject: Rename .data[.percpu][.XXX] to .data[..percpu][..XXX]. Signed-off-by: Denys Vlasenko Signed-off-by: Michal Marek --- arch/ia64/include/asm/percpu.h | 2 +- include/asm-generic/percpu.h | 10 +++++----- include/asm-generic/vmlinux.lds.h | 24 ++++++++++++------------ include/linux/percpu-defs.h | 4 ++-- kernel/module.c | 2 +- 5 files changed, 21 insertions(+), 21 deletions(-) (limited to 'kernel/module.c') diff --git a/arch/ia64/include/asm/percpu.h b/arch/ia64/include/asm/percpu.h index 30cf46534dd..35d9aeb6d85 100644 --- a/arch/ia64/include/asm/percpu.h +++ b/arch/ia64/include/asm/percpu.h @@ -31,7 +31,7 @@ extern void *per_cpu_init(void); #endif /* SMP */ -#define PER_CPU_BASE_SECTION ".data.percpu" +#define PER_CPU_BASE_SECTION ".data..percpu" /* * Be extremely careful when taking the address of this variable! Due to virtual diff --git a/include/asm-generic/percpu.h b/include/asm-generic/percpu.h index 8087b90d467..1202a1550e9 100644 --- a/include/asm-generic/percpu.h +++ b/include/asm-generic/percpu.h @@ -76,7 +76,7 @@ extern void setup_per_cpu_areas(void); #ifndef PER_CPU_BASE_SECTION #ifdef CONFIG_SMP -#define PER_CPU_BASE_SECTION ".data.percpu" +#define PER_CPU_BASE_SECTION ".data..percpu" #else #define PER_CPU_BASE_SECTION ".data" #endif @@ -88,15 +88,15 @@ extern void setup_per_cpu_areas(void); #define PER_CPU_SHARED_ALIGNED_SECTION "" #define PER_CPU_ALIGNED_SECTION "" #else -#define PER_CPU_SHARED_ALIGNED_SECTION ".shared_aligned" -#define PER_CPU_ALIGNED_SECTION ".shared_aligned" +#define PER_CPU_SHARED_ALIGNED_SECTION "..shared_aligned" +#define PER_CPU_ALIGNED_SECTION "..shared_aligned" #endif -#define PER_CPU_FIRST_SECTION ".first" +#define PER_CPU_FIRST_SECTION "..first" #else #define PER_CPU_SHARED_ALIGNED_SECTION "" -#define PER_CPU_ALIGNED_SECTION ".shared_aligned" +#define PER_CPU_ALIGNED_SECTION "..shared_aligned" #define PER_CPU_FIRST_SECTION "" #endif diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h index 32cddc15594..e304fcd10bc 100644 --- a/include/asm-generic/vmlinux.lds.h +++ b/include/asm-generic/vmlinux.lds.h @@ -666,16 +666,16 @@ */ #define PERCPU_VADDR(vaddr, phdr) \ VMLINUX_SYMBOL(__per_cpu_load) = .; \ - .data.percpu vaddr : AT(VMLINUX_SYMBOL(__per_cpu_load) \ + .data..percpu vaddr : AT(VMLINUX_SYMBOL(__per_cpu_load) \ - LOAD_OFFSET) { \ VMLINUX_SYMBOL(__per_cpu_start) = .; \ - *(.data.percpu.first) \ - *(.data.percpu.page_aligned) \ - *(.data.percpu) \ - *(.data.percpu.shared_aligned) \ + *(.data..percpu..first) \ + *(.data..percpu..page_aligned) \ + *(.data..percpu) \ + *(.data..percpu..shared_aligned) \ VMLINUX_SYMBOL(__per_cpu_end) = .; \ } phdr \ - . = VMLINUX_SYMBOL(__per_cpu_load) + SIZEOF(.data.percpu); + . = VMLINUX_SYMBOL(__per_cpu_load) + SIZEOF(.data..percpu); /** * PERCPU - define output section for percpu area, simple version @@ -687,18 +687,18 @@ * * This macro is equivalent to ALIGN(align); PERCPU_VADDR( , ) except * that __per_cpu_load is defined as a relative symbol against - * .data.percpu which is required for relocatable x86_32 + * .data..percpu which is required for relocatable x86_32 * configuration. */ #define PERCPU(align) \ . = ALIGN(align); \ - .data.percpu : AT(ADDR(.data.percpu) - LOAD_OFFSET) { \ + .data..percpu : AT(ADDR(.data..percpu) - LOAD_OFFSET) { \ VMLINUX_SYMBOL(__per_cpu_load) = .; \ VMLINUX_SYMBOL(__per_cpu_start) = .; \ - *(.data.percpu.first) \ - *(.data.percpu.page_aligned) \ - *(.data.percpu) \ - *(.data.percpu.shared_aligned) \ + *(.data..percpu..first) \ + *(.data..percpu..page_aligned) \ + *(.data..percpu) \ + *(.data..percpu..shared_aligned) \ VMLINUX_SYMBOL(__per_cpu_end) = .; \ } diff --git a/include/linux/percpu-defs.h b/include/linux/percpu-defs.h index 5a5d6ce4bd5..2351191f8c8 100644 --- a/include/linux/percpu-defs.h +++ b/include/linux/percpu-defs.h @@ -127,11 +127,11 @@ * Declaration/definition used for per-CPU variables that must be page aligned. */ #define DECLARE_PER_CPU_PAGE_ALIGNED(type, name) \ - DECLARE_PER_CPU_SECTION(type, name, ".page_aligned") \ + DECLARE_PER_CPU_SECTION(type, name, "..page_aligned") \ __aligned(PAGE_SIZE) #define DEFINE_PER_CPU_PAGE_ALIGNED(type, name) \ - DEFINE_PER_CPU_SECTION(type, name, ".page_aligned") \ + DEFINE_PER_CPU_SECTION(type, name, "..page_aligned") \ __aligned(PAGE_SIZE) /* diff --git a/kernel/module.c b/kernel/module.c index f82386bd9ee..5daf0abd63c 100644 --- a/kernel/module.c +++ b/kernel/module.c @@ -397,7 +397,7 @@ static unsigned int find_pcpusec(Elf_Ehdr *hdr, Elf_Shdr *sechdrs, const char *secstrings) { - return find_sec(hdr, sechdrs, secstrings, ".data.percpu"); + return find_sec(hdr, sechdrs, secstrings, ".data..percpu"); } static void percpu_modcopy(void *pcpudest, const void *from, unsigned long size) -- cgit v1.2.3-70-g09d2 From 361795b1eb7c08e9e65a2ebb4a4e536294d378a2 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Fri, 12 Feb 2010 13:41:56 -0800 Subject: sysfs: Use sysfs_attr_init and sysfs_bin_attr_init on module dynamic attributes A little more whack-a-mole annotating the dynamic sysfs attributes. I had everything built into my earlier test kernel, and so I missed these. Signed-off-by: Eric W. Biederman Signed-off-by: Greg Kroah-Hartman --- kernel/module.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'kernel/module.c') diff --git a/kernel/module.c b/kernel/module.c index e5538d5f00a..c968d3606dc 100644 --- a/kernel/module.c +++ b/kernel/module.c @@ -1085,6 +1085,7 @@ static void add_sect_attrs(struct module *mod, unsigned int nsect, if (sattr->name == NULL) goto out; sect_attrs->nsections++; + sysfs_attr_init(&sattr->mattr.attr); sattr->mattr.show = module_sect_show; sattr->mattr.store = NULL; sattr->mattr.attr.name = sattr->name; @@ -1180,6 +1181,7 @@ static void add_notes_attrs(struct module *mod, unsigned int nsect, if (sect_empty(&sechdrs[i])) continue; if (sechdrs[i].sh_type == SHT_NOTE) { + sysfs_bin_attr_init(nattr); nattr->attr.name = mod->sect_attrs->attrs[loaded].name; nattr->attr.mode = S_IRUGO; nattr->size = sechdrs[i].sh_size; @@ -1252,6 +1254,7 @@ int module_add_modinfo_attrs(struct module *mod) if (!attr->test || (attr->test && attr->test(mod))) { memcpy(temp_attr, attr, sizeof(*temp_attr)); + sysfs_attr_init(&temp_attr->attr); error = sysfs_create_file(&mod->mkobj.kobj,&temp_attr->attr); ++temp_attr; } -- cgit v1.2.3-70-g09d2 From 259354deaaf03d49a02dbb9975d6ec2a54675672 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Wed, 10 Mar 2010 18:56:10 +0900 Subject: module: encapsulate percpu handling better and record percpu_size Better encapsulate module static percpu area handling so that code outsidef of CONFIG_SMP ifdef doesn't deal with mod->percpu directly and add mod->percpu_size and record percpu_size in it. Both percpu fields are compiled out on UP. While at it, mark mod->percpu w/ __percpu. This is to prepare for is_module_percpu_address(). Signed-off-by: Tejun Heo Acked-by: Rusty Russell --- include/linux/module.h | 5 +++- kernel/module.c | 66 ++++++++++++++++++++++++++------------------------ 2 files changed, 39 insertions(+), 32 deletions(-) (limited to 'kernel/module.c') diff --git a/include/linux/module.h b/include/linux/module.h index 5e869ffd34a..87d247ac676 100644 --- a/include/linux/module.h +++ b/include/linux/module.h @@ -330,8 +330,11 @@ struct module struct module_notes_attrs *notes_attrs; #endif +#ifdef CONFIG_SMP /* Per-cpu data. */ - void *percpu; + void __percpu *percpu; + unsigned int percpu_size; +#endif /* The command line arguments (may be mangled). People like keeping pointers to this stuff */ diff --git a/kernel/module.c b/kernel/module.c index c968d3606dc..e7a6e53fc73 100644 --- a/kernel/module.c +++ b/kernel/module.c @@ -370,27 +370,33 @@ EXPORT_SYMBOL_GPL(find_module); #ifdef CONFIG_SMP -static void *percpu_modalloc(unsigned long size, unsigned long align, - const char *name) +static inline void __percpu *mod_percpu(struct module *mod) { - void *ptr; + return mod->percpu; +} +static int percpu_modalloc(struct module *mod, + unsigned long size, unsigned long align) +{ if (align > PAGE_SIZE) { printk(KERN_WARNING "%s: per-cpu alignment %li > %li\n", - name, align, PAGE_SIZE); + mod->name, align, PAGE_SIZE); align = PAGE_SIZE; } - ptr = __alloc_reserved_percpu(size, align); - if (!ptr) + mod->percpu = __alloc_reserved_percpu(size, align); + if (!mod->percpu) { printk(KERN_WARNING "Could not allocate %lu bytes percpu data\n", size); - return ptr; + return -ENOMEM; + } + mod->percpu_size = size; + return 0; } -static void percpu_modfree(void *freeme) +static void percpu_modfree(struct module *mod) { - free_percpu(freeme); + free_percpu(mod->percpu); } static unsigned int find_pcpusec(Elf_Ehdr *hdr, @@ -400,24 +406,28 @@ static unsigned int find_pcpusec(Elf_Ehdr *hdr, return find_sec(hdr, sechdrs, secstrings, ".data.percpu"); } -static void percpu_modcopy(void *pcpudest, const void *from, unsigned long size) +static void percpu_modcopy(struct module *mod, + const void *from, unsigned long size) { int cpu; for_each_possible_cpu(cpu) - memcpy(pcpudest + per_cpu_offset(cpu), from, size); + memcpy(per_cpu_ptr(mod->percpu, cpu), from, size); } #else /* ... !CONFIG_SMP */ -static inline void *percpu_modalloc(unsigned long size, unsigned long align, - const char *name) +static inline void __percpu *mod_percpu(struct module *mod) { return NULL; } -static inline void percpu_modfree(void *pcpuptr) +static inline int percpu_modalloc(struct module *mod, + unsigned long size, unsigned long align) +{ + return -ENOMEM; +} +static inline void percpu_modfree(struct module *mod) { - BUG(); } static inline unsigned int find_pcpusec(Elf_Ehdr *hdr, Elf_Shdr *sechdrs, @@ -425,8 +435,8 @@ static inline unsigned int find_pcpusec(Elf_Ehdr *hdr, { return 0; } -static inline void percpu_modcopy(void *pcpudst, const void *src, - unsigned long size) +static inline void percpu_modcopy(struct module *mod, + const void *from, unsigned long size) { /* pcpusec should be 0, and size of that section should be 0. */ BUG_ON(size != 0); @@ -1400,8 +1410,7 @@ static void free_module(struct module *mod) /* This may be NULL, but that's OK */ module_free(mod, mod->module_init); kfree(mod->args); - if (mod->percpu) - percpu_modfree(mod->percpu); + percpu_modfree(mod); #if defined(CONFIG_MODULE_UNLOAD) if (mod->refptr) free_percpu(mod->refptr); @@ -1520,7 +1529,7 @@ static int simplify_symbols(Elf_Shdr *sechdrs, default: /* Divert to percpu allocation if a percpu var. */ if (sym[i].st_shndx == pcpuindex) - secbase = (unsigned long)mod->percpu; + secbase = (unsigned long)mod_percpu(mod); else secbase = sechdrs[sym[i].st_shndx].sh_addr; sym[i].st_value += secbase; @@ -1954,7 +1963,7 @@ static noinline struct module *load_module(void __user *umod, unsigned int modindex, versindex, infoindex, pcpuindex; struct module *mod; long err = 0; - void *percpu = NULL, *ptr = NULL; /* Stops spurious gcc warning */ + void *ptr = NULL; /* Stops spurious gcc warning */ unsigned long symoffs, stroffs, *strmap; mm_segment_t old_fs; @@ -2094,15 +2103,11 @@ static noinline struct module *load_module(void __user *umod, if (pcpuindex) { /* We have a special allocation for this section. */ - percpu = percpu_modalloc(sechdrs[pcpuindex].sh_size, - sechdrs[pcpuindex].sh_addralign, - mod->name); - if (!percpu) { - err = -ENOMEM; + err = percpu_modalloc(mod, sechdrs[pcpuindex].sh_size, + sechdrs[pcpuindex].sh_addralign); + if (err) goto free_mod; - } sechdrs[pcpuindex].sh_flags &= ~(unsigned long)SHF_ALLOC; - mod->percpu = percpu; } /* Determine total sizes, and put offsets in sh_entsize. For now @@ -2317,7 +2322,7 @@ static noinline struct module *load_module(void __user *umod, sort_extable(mod->extable, mod->extable + mod->num_exentries); /* Finally, copy percpu area over. */ - percpu_modcopy(mod->percpu, (void *)sechdrs[pcpuindex].sh_addr, + percpu_modcopy(mod, (void *)sechdrs[pcpuindex].sh_addr, sechdrs[pcpuindex].sh_size); add_kallsyms(mod, sechdrs, hdr->e_shnum, symindex, strindex, @@ -2409,8 +2414,7 @@ static noinline struct module *load_module(void __user *umod, module_free(mod, mod->module_core); /* mod will be freed with core. Don't access it beyond this line! */ free_percpu: - if (percpu) - percpu_modfree(percpu); + percpu_modfree(mod); free_mod: kfree(args); kfree(strmap); -- cgit v1.2.3-70-g09d2 From 10fad5e46f6c7bdfb01b1a012380a38e3c6ab346 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Wed, 10 Mar 2010 18:57:54 +0900 Subject: percpu, module: implement and use is_kernel/module_percpu_address() lockdep has custom code to check whether a pointer belongs to static percpu area which is somewhat broken. Implement proper is_kernel/module_percpu_address() and replace the custom code. On UP, percpu variables are regular static variables and can't be distinguished from them. Always return %false on UP. Signed-off-by: Tejun Heo Acked-by: Peter Zijlstra Cc: Rusty Russell Cc: Ingo Molnar --- include/linux/module.h | 1 + include/linux/percpu.h | 7 +++++++ kernel/lockdep.c | 21 +++++---------------- kernel/module.c | 38 ++++++++++++++++++++++++++++++++++++++ mm/percpu.c | 26 ++++++++++++++++++++++++++ 5 files changed, 77 insertions(+), 16 deletions(-) (limited to 'kernel/module.c') diff --git a/include/linux/module.h b/include/linux/module.h index 87d247ac676..f0e2659f4e3 100644 --- a/include/linux/module.h +++ b/include/linux/module.h @@ -395,6 +395,7 @@ static inline int module_is_live(struct module *mod) struct module *__module_text_address(unsigned long addr); struct module *__module_address(unsigned long addr); bool is_module_address(unsigned long addr); +bool is_module_percpu_address(unsigned long addr); bool is_module_text_address(unsigned long addr); static inline int within_module_core(unsigned long addr, struct module *mod) diff --git a/include/linux/percpu.h b/include/linux/percpu.h index a93e5bfdccb..11d5f834b54 100644 --- a/include/linux/percpu.h +++ b/include/linux/percpu.h @@ -137,6 +137,7 @@ extern int __init pcpu_page_first_chunk(size_t reserved_size, extern void __percpu *__alloc_reserved_percpu(size_t size, size_t align); extern void __percpu *__alloc_percpu(size_t size, size_t align); extern void free_percpu(void __percpu *__pdata); +extern bool is_kernel_percpu_address(unsigned long addr); extern phys_addr_t per_cpu_ptr_to_phys(void *addr); #ifndef CONFIG_HAVE_SETUP_PER_CPU_AREA @@ -163,6 +164,12 @@ static inline void free_percpu(void __percpu *p) kfree(p); } +/* can't distinguish from other static vars, always false */ +static inline bool is_kernel_percpu_address(unsigned long addr) +{ + return false; +} + static inline phys_addr_t per_cpu_ptr_to_phys(void *addr) { return __pa(addr); diff --git a/kernel/lockdep.c b/kernel/lockdep.c index c927a549db2..9bbb9c841e4 100644 --- a/kernel/lockdep.c +++ b/kernel/lockdep.c @@ -582,9 +582,6 @@ static int static_obj(void *obj) unsigned long start = (unsigned long) &_stext, end = (unsigned long) &_end, addr = (unsigned long) obj; -#ifdef CONFIG_SMP - int i; -#endif /* * static variable? @@ -595,24 +592,16 @@ static int static_obj(void *obj) if (arch_is_kernel_data(addr)) return 1; -#ifdef CONFIG_SMP /* - * percpu var? + * in-kernel percpu var? */ - for_each_possible_cpu(i) { - start = (unsigned long) &__per_cpu_start + per_cpu_offset(i); - end = (unsigned long) &__per_cpu_start + PERCPU_ENOUGH_ROOM - + per_cpu_offset(i); - - if ((addr >= start) && (addr < end)) - return 1; - } -#endif + if (is_kernel_percpu_address(addr)) + return 1; /* - * module var? + * module static or percpu var? */ - return is_module_address(addr); + return is_module_address(addr) || is_module_percpu_address(addr); } /* diff --git a/kernel/module.c b/kernel/module.c index e7a6e53fc73..9f8d23d8b3a 100644 --- a/kernel/module.c +++ b/kernel/module.c @@ -415,6 +415,40 @@ static void percpu_modcopy(struct module *mod, memcpy(per_cpu_ptr(mod->percpu, cpu), from, size); } +/** + * is_module_percpu_address - test whether address is from module static percpu + * @addr: address to test + * + * Test whether @addr belongs to module static percpu area. + * + * RETURNS: + * %true if @addr is from module static percpu area + */ +bool is_module_percpu_address(unsigned long addr) +{ + struct module *mod; + unsigned int cpu; + + preempt_disable(); + + list_for_each_entry_rcu(mod, &modules, list) { + if (!mod->percpu_size) + continue; + for_each_possible_cpu(cpu) { + void *start = per_cpu_ptr(mod->percpu, cpu); + + if ((void *)addr >= start && + (void *)addr < start + mod->percpu_size) { + preempt_enable(); + return true; + } + } + } + + preempt_enable(); + return false; +} + #else /* ... !CONFIG_SMP */ static inline void __percpu *mod_percpu(struct module *mod) @@ -441,6 +475,10 @@ static inline void percpu_modcopy(struct module *mod, /* pcpusec should be 0, and size of that section should be 0. */ BUG_ON(size != 0); } +bool is_module_percpu_address(unsigned long addr) +{ + return false; +} #endif /* CONFIG_SMP */ diff --git a/mm/percpu.c b/mm/percpu.c index 768419d44ad..6e09741ddc6 100644 --- a/mm/percpu.c +++ b/mm/percpu.c @@ -1303,6 +1303,32 @@ void free_percpu(void __percpu *ptr) } EXPORT_SYMBOL_GPL(free_percpu); +/** + * is_kernel_percpu_address - test whether address is from static percpu area + * @addr: address to test + * + * Test whether @addr belongs to in-kernel static percpu area. Module + * static percpu areas are not considered. For those, use + * is_module_percpu_address(). + * + * RETURNS: + * %true if @addr is from in-kernel static percpu area, %false otherwise. + */ +bool is_kernel_percpu_address(unsigned long addr) +{ + const size_t static_size = __per_cpu_end - __per_cpu_start; + void __percpu *base = __addr_to_pcpu_ptr(pcpu_base_addr); + unsigned int cpu; + + for_each_possible_cpu(cpu) { + void *start = per_cpu_ptr(base, cpu); + + if ((void *)addr >= start && (void *)addr < start + static_size) + return true; + } + return false; +} + /** * per_cpu_ptr_to_phys - convert translated percpu address to physical address * @addr: the address to be converted to physical address -- cgit v1.2.3-70-g09d2 From ae832d1e03ac9bf09fb8a07fb37908ab40c7cd0e Mon Sep 17 00:00:00 2001 From: Li Zefan Date: Wed, 24 Mar 2010 10:57:43 +0800 Subject: tracing: Remove side effect from module tracepoints that caused a GPF Remove the @refcnt argument, because it has side-effects, and arguments with side-effects are not skipped by the jump over disabled instrumentation and are executed even when the tracepoint is disabled. This was also causing a GPF as found by Randy Dunlap: Subject: 2.6.33 GP fault only when built with tracing LKML-Reference: <4BA2B69D.3000309@oracle.com> Note, the current 2.6.34-rc has a fix for the actual cause of the GPF, but this fixes one of its triggers. Tested-by: Randy Dunlap Acked-by: Mathieu Desnoyers Signed-off-by: Li Zefan LKML-Reference: <4BA97FA7.6040406@cn.fujitsu.com> Signed-off-by: Steven Rostedt --- include/linux/module.h | 6 ++---- include/trace/events/module.h | 14 +++++++------- kernel/module.c | 3 +-- 3 files changed, 10 insertions(+), 13 deletions(-) (limited to 'kernel/module.c') diff --git a/include/linux/module.h b/include/linux/module.h index 5e869ffd34a..393ec39b580 100644 --- a/include/linux/module.h +++ b/include/linux/module.h @@ -460,8 +460,7 @@ static inline void __module_get(struct module *module) if (module) { preempt_disable(); __this_cpu_inc(module->refptr->count); - trace_module_get(module, _THIS_IP_, - __this_cpu_read(module->refptr->count)); + trace_module_get(module, _THIS_IP_); preempt_enable(); } } @@ -475,8 +474,7 @@ static inline int try_module_get(struct module *module) if (likely(module_is_live(module))) { __this_cpu_inc(module->refptr->count); - trace_module_get(module, _THIS_IP_, - __this_cpu_read(module->refptr->count)); + trace_module_get(module, _THIS_IP_); } else ret = 0; diff --git a/include/trace/events/module.h b/include/trace/events/module.h index 4b0f48ba16a..a585f8135bd 100644 --- a/include/trace/events/module.h +++ b/include/trace/events/module.h @@ -53,9 +53,9 @@ TRACE_EVENT(module_free, DECLARE_EVENT_CLASS(module_refcnt, - TP_PROTO(struct module *mod, unsigned long ip, int refcnt), + TP_PROTO(struct module *mod, unsigned long ip), - TP_ARGS(mod, ip, refcnt), + TP_ARGS(mod, ip), TP_STRUCT__entry( __field( unsigned long, ip ) @@ -65,7 +65,7 @@ DECLARE_EVENT_CLASS(module_refcnt, TP_fast_assign( __entry->ip = ip; - __entry->refcnt = refcnt; + __entry->refcnt = __this_cpu_read(mod->refptr->count); __assign_str(name, mod->name); ), @@ -75,16 +75,16 @@ DECLARE_EVENT_CLASS(module_refcnt, DEFINE_EVENT(module_refcnt, module_get, - TP_PROTO(struct module *mod, unsigned long ip, int refcnt), + TP_PROTO(struct module *mod, unsigned long ip), - TP_ARGS(mod, ip, refcnt) + TP_ARGS(mod, ip) ); DEFINE_EVENT(module_refcnt, module_put, - TP_PROTO(struct module *mod, unsigned long ip, int refcnt), + TP_PROTO(struct module *mod, unsigned long ip), - TP_ARGS(mod, ip, refcnt) + TP_ARGS(mod, ip) ); TRACE_EVENT(module_request, diff --git a/kernel/module.c b/kernel/module.c index c968d3606dc..21591ad921f 100644 --- a/kernel/module.c +++ b/kernel/module.c @@ -800,8 +800,7 @@ void module_put(struct module *module) preempt_disable(); __this_cpu_dec(module->refptr->count); - trace_module_put(module, _RET_IP_, - __this_cpu_read(module->refptr->count)); + trace_module_put(module, _RET_IP_); /* Maybe they're waiting for us to drop reference? */ if (unlikely(!module_is_live(module))) wake_up_process(module->waiter); -- cgit v1.2.3-70-g09d2 From eb0c53771fb2f5f66b0edb3ebce33be4bbf1c285 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Mon, 29 Mar 2010 14:25:18 -0400 Subject: tracing: Fix compile error in module tracepoints when MODULE_UNLOAD not set If modules are configured in the build but unloading of modules is not, then the refcnt is not defined. Place the get/put module tracepoints under CONFIG_MODULE_UNLOAD since it references this field in the module structure. As a side-effect, this patch also reduces the code when MODULE_UNLOAD is not set, because these unused tracepoints are not created. Signed-off-by: Steven Rostedt --- include/trace/events/module.h | 4 ++++ kernel/module.c | 5 +++-- 2 files changed, 7 insertions(+), 2 deletions(-) (limited to 'kernel/module.c') diff --git a/include/trace/events/module.h b/include/trace/events/module.h index a585f8135bd..f07b44a2b24 100644 --- a/include/trace/events/module.h +++ b/include/trace/events/module.h @@ -51,6 +51,9 @@ TRACE_EVENT(module_free, TP_printk("%s", __get_str(name)) ); +#ifdef CONFIG_MODULE_UNLOAD +/* trace_module_get/put are only used if CONFIG_MODULE_UNLOAD is defined */ + DECLARE_EVENT_CLASS(module_refcnt, TP_PROTO(struct module *mod, unsigned long ip), @@ -86,6 +89,7 @@ DEFINE_EVENT(module_refcnt, module_put, TP_ARGS(mod, ip) ); +#endif /* CONFIG_MODULE_UNLOAD */ TRACE_EVENT(module_request, diff --git a/kernel/module.c b/kernel/module.c index 21591ad921f..d9e237926b6 100644 --- a/kernel/module.c +++ b/kernel/module.c @@ -59,8 +59,6 @@ #define CREATE_TRACE_POINTS #include -EXPORT_TRACEPOINT_SYMBOL(module_get); - #if 0 #define DEBUGP printk #else @@ -467,6 +465,9 @@ MODINFO_ATTR(srcversion); static char last_unloaded_module[MODULE_NAME_LEN+1]; #ifdef CONFIG_MODULE_UNLOAD + +EXPORT_TRACEPOINT_SYMBOL(module_get); + /* Init the unload section of the module. */ static void module_unload_init(struct module *mod) { -- cgit v1.2.3-70-g09d2 From 5fbfb18d7a5b846946d52c4a10e3aaa213ec31b6 Mon Sep 17 00:00:00 2001 From: Nick Piggin Date: Thu, 1 Apr 2010 19:09:40 +1100 Subject: Fix up possibly racy module refcounting Module refcounting is implemented with a per-cpu counter for speed. However there is a race when tallying the counter where a reference may be taken by one CPU and released by another. Reference count summation may then see the decrement without having seen the previous increment, leading to lower than expected count. A module which never has its actual reference drop below 1 may return a reference count of 0 due to this race. Module removal generally runs under stop_machine, which prevents this race causing bugs due to removal of in-use modules. However there are other real bugs in module.c code and driver code (module_refcount is exported) where the callers do not run under stop_machine. Fix this by maintaining running per-cpu counters for the number of module refcount increments and the number of refcount decrements. The increments are tallied after the decrements, so any decrement seen will always have its corresponding increment counted. The final refcount is the difference of the total increments and decrements, preventing a low-refcount from being returned. Signed-off-by: Nick Piggin Acked-by: Rusty Russell Signed-off-by: Linus Torvalds --- include/linux/module.h | 14 +++++++------- kernel/module.c | 35 +++++++++++++++++++++++++++-------- 2 files changed, 34 insertions(+), 15 deletions(-) (limited to 'kernel/module.c') diff --git a/include/linux/module.h b/include/linux/module.h index 8bd399a0034..515d53ae6a7 100644 --- a/include/linux/module.h +++ b/include/linux/module.h @@ -368,7 +368,8 @@ struct module void (*exit)(void); struct module_ref { - int count; + unsigned int incs; + unsigned int decs; } __percpu *refptr; #endif @@ -463,9 +464,9 @@ static inline void __module_get(struct module *module) { if (module) { preempt_disable(); - __this_cpu_inc(module->refptr->count); + __this_cpu_inc(module->refptr->incs); trace_module_get(module, _THIS_IP_, - __this_cpu_read(module->refptr->count)); + __this_cpu_read(module->refptr->incs)); preempt_enable(); } } @@ -478,11 +479,10 @@ static inline int try_module_get(struct module *module) preempt_disable(); if (likely(module_is_live(module))) { - __this_cpu_inc(module->refptr->count); + __this_cpu_inc(module->refptr->incs); trace_module_get(module, _THIS_IP_, - __this_cpu_read(module->refptr->count)); - } - else + __this_cpu_read(module->refptr->incs)); + } else ret = 0; preempt_enable(); diff --git a/kernel/module.c b/kernel/module.c index 9f8d23d8b3a..1016b75b026 100644 --- a/kernel/module.c +++ b/kernel/module.c @@ -521,11 +521,13 @@ static void module_unload_init(struct module *mod) int cpu; INIT_LIST_HEAD(&mod->modules_which_use_me); - for_each_possible_cpu(cpu) - per_cpu_ptr(mod->refptr, cpu)->count = 0; + for_each_possible_cpu(cpu) { + per_cpu_ptr(mod->refptr, cpu)->incs = 0; + per_cpu_ptr(mod->refptr, cpu)->decs = 0; + } /* Hold reference count during initialization. */ - __this_cpu_write(mod->refptr->count, 1); + __this_cpu_write(mod->refptr->incs, 1); /* Backwards compatibility macros put refcount during init. */ mod->waiter = current; } @@ -664,12 +666,28 @@ static int try_stop_module(struct module *mod, int flags, int *forced) unsigned int module_refcount(struct module *mod) { - unsigned int total = 0; + unsigned int incs = 0, decs = 0; int cpu; for_each_possible_cpu(cpu) - total += per_cpu_ptr(mod->refptr, cpu)->count; - return total; + decs += per_cpu_ptr(mod->refptr, cpu)->decs; + /* + * ensure the incs are added up after the decs. + * module_put ensures incs are visible before decs with smp_wmb. + * + * This 2-count scheme avoids the situation where the refcount + * for CPU0 is read, then CPU0 increments the module refcount, + * then CPU1 drops that refcount, then the refcount for CPU1 is + * read. We would record a decrement but not its corresponding + * increment so we would see a low count (disaster). + * + * Rare situation? But module_refcount can be preempted, and we + * might be tallying up 4096+ CPUs. So it is not impossible. + */ + smp_rmb(); + for_each_possible_cpu(cpu) + incs += per_cpu_ptr(mod->refptr, cpu)->incs; + return incs - decs; } EXPORT_SYMBOL(module_refcount); @@ -846,10 +864,11 @@ void module_put(struct module *module) { if (module) { preempt_disable(); - __this_cpu_dec(module->refptr->count); + smp_wmb(); /* see comment in module_refcount */ + __this_cpu_inc(module->refptr->decs); trace_module_put(module, _RET_IP_, - __this_cpu_read(module->refptr->count)); + __this_cpu_read(module->refptr->decs)); /* Maybe they're waiting for us to drop reference? */ if (unlikely(!module_is_live(module))) wake_up_process(module->waiter); -- cgit v1.2.3-70-g09d2 From 3fc1f1e27a5b807791d72e5d992aa33b668a6626 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Thu, 6 May 2010 18:49:20 +0200 Subject: stop_machine: reimplement using cpu_stop Reimplement stop_machine using cpu_stop. As cpu stoppers are guaranteed to be available for all online cpus, stop_machine_create/destroy() are no longer necessary and removed. With resource management and synchronization handled by cpu_stop, the new implementation is much simpler. Asking the cpu_stop to execute the stop_cpu() state machine on all online cpus with cpu hotplug disabled is enough. stop_machine itself doesn't need to manage any global resources anymore, so all per-instance information is rolled into struct stop_machine_data and the mutex and all static data variables are removed. The previous implementation created and destroyed RT workqueues as necessary which made stop_machine() calls highly expensive on very large machines. According to Dimitri Sivanich, preventing the dynamic creation/destruction makes booting faster more than twice on very large machines. cpu_stop resources are preallocated for all online cpus and should have the same effect. Signed-off-by: Tejun Heo Acked-by: Rusty Russell Acked-by: Peter Zijlstra Cc: Oleg Nesterov Cc: Dimitri Sivanich --- arch/s390/kernel/time.c | 1 - drivers/xen/manage.c | 14 +--- include/linux/stop_machine.h | 20 ------ kernel/cpu.c | 8 --- kernel/module.c | 14 +--- kernel/stop_machine.c | 158 +++++++++++-------------------------------- 6 files changed, 42 insertions(+), 173 deletions(-) (limited to 'kernel/module.c') diff --git a/arch/s390/kernel/time.c b/arch/s390/kernel/time.c index fba6dec156b..03d96569f18 100644 --- a/arch/s390/kernel/time.c +++ b/arch/s390/kernel/time.c @@ -390,7 +390,6 @@ static void __init time_init_wq(void) if (time_sync_wq) return; time_sync_wq = create_singlethread_workqueue("timesync"); - stop_machine_create(); } /* diff --git a/drivers/xen/manage.c b/drivers/xen/manage.c index 2ac4440e7b0..8943b8ccee1 100644 --- a/drivers/xen/manage.c +++ b/drivers/xen/manage.c @@ -80,12 +80,6 @@ static void do_suspend(void) shutting_down = SHUTDOWN_SUSPEND; - err = stop_machine_create(); - if (err) { - printk(KERN_ERR "xen suspend: failed to setup stop_machine %d\n", err); - goto out; - } - #ifdef CONFIG_PREEMPT /* If the kernel is preemptible, we need to freeze all the processes to prevent them from being in the middle of a pagetable update @@ -93,7 +87,7 @@ static void do_suspend(void) err = freeze_processes(); if (err) { printk(KERN_ERR "xen suspend: freeze failed %d\n", err); - goto out_destroy_sm; + goto out; } #endif @@ -136,12 +130,8 @@ out_resume: out_thaw: #ifdef CONFIG_PREEMPT thaw_processes(); - -out_destroy_sm: -#endif - stop_machine_destroy(); - out: +#endif shutting_down = SHUTDOWN_INVALID; } #endif /* CONFIG_PM_SLEEP */ diff --git a/include/linux/stop_machine.h b/include/linux/stop_machine.h index efcbd6c3794..0e552e72a4c 100644 --- a/include/linux/stop_machine.h +++ b/include/linux/stop_machine.h @@ -67,23 +67,6 @@ int stop_machine(int (*fn)(void *), void *data, const struct cpumask *cpus); */ int __stop_machine(int (*fn)(void *), void *data, const struct cpumask *cpus); -/** - * stop_machine_create: create all stop_machine threads - * - * Description: This causes all stop_machine threads to be created before - * stop_machine actually gets called. This can be used by subsystems that - * need a non failing stop_machine infrastructure. - */ -int stop_machine_create(void); - -/** - * stop_machine_destroy: destroy all stop_machine threads - * - * Description: This causes all stop_machine threads which were created with - * stop_machine_create to be destroyed again. - */ -void stop_machine_destroy(void); - #else static inline int stop_machine(int (*fn)(void *), void *data, @@ -96,8 +79,5 @@ static inline int stop_machine(int (*fn)(void *), void *data, return ret; } -static inline int stop_machine_create(void) { return 0; } -static inline void stop_machine_destroy(void) { } - #endif /* CONFIG_SMP */ #endif /* _LINUX_STOP_MACHINE */ diff --git a/kernel/cpu.c b/kernel/cpu.c index 914aedcde84..54577757477 100644 --- a/kernel/cpu.c +++ b/kernel/cpu.c @@ -266,9 +266,6 @@ int __ref cpu_down(unsigned int cpu) { int err; - err = stop_machine_create(); - if (err) - return err; cpu_maps_update_begin(); if (cpu_hotplug_disabled) { @@ -280,7 +277,6 @@ int __ref cpu_down(unsigned int cpu) out: cpu_maps_update_done(); - stop_machine_destroy(); return err; } EXPORT_SYMBOL(cpu_down); @@ -361,9 +357,6 @@ int disable_nonboot_cpus(void) { int cpu, first_cpu, error; - error = stop_machine_create(); - if (error) - return error; cpu_maps_update_begin(); first_cpu = cpumask_first(cpu_online_mask); /* @@ -394,7 +387,6 @@ int disable_nonboot_cpus(void) printk(KERN_ERR "Non-boot CPUs are not disabled\n"); } cpu_maps_update_done(); - stop_machine_destroy(); return error; } diff --git a/kernel/module.c b/kernel/module.c index 1016b75b026..0838246d8c9 100644 --- a/kernel/module.c +++ b/kernel/module.c @@ -723,16 +723,8 @@ SYSCALL_DEFINE2(delete_module, const char __user *, name_user, return -EFAULT; name[MODULE_NAME_LEN-1] = '\0'; - /* Create stop_machine threads since free_module relies on - * a non-failing stop_machine call. */ - ret = stop_machine_create(); - if (ret) - return ret; - - if (mutex_lock_interruptible(&module_mutex) != 0) { - ret = -EINTR; - goto out_stop; - } + if (mutex_lock_interruptible(&module_mutex) != 0) + return -EINTR; mod = find_module(name); if (!mod) { @@ -792,8 +784,6 @@ SYSCALL_DEFINE2(delete_module, const char __user *, name_user, out: mutex_unlock(&module_mutex); -out_stop: - stop_machine_destroy(); return ret; } diff --git a/kernel/stop_machine.c b/kernel/stop_machine.c index 7e3f9182aef..884c7a1afee 100644 --- a/kernel/stop_machine.c +++ b/kernel/stop_machine.c @@ -388,174 +388,92 @@ enum stopmachine_state { /* Exit */ STOPMACHINE_EXIT, }; -static enum stopmachine_state state; struct stop_machine_data { - int (*fn)(void *); - void *data; - int fnret; + int (*fn)(void *); + void *data; + /* Like num_online_cpus(), but hotplug cpu uses us, so we need this. */ + unsigned int num_threads; + const struct cpumask *active_cpus; + + enum stopmachine_state state; + atomic_t thread_ack; }; -/* Like num_online_cpus(), but hotplug cpu uses us, so we need this. */ -static unsigned int num_threads; -static atomic_t thread_ack; -static DEFINE_MUTEX(lock); -/* setup_lock protects refcount, stop_machine_wq and stop_machine_work. */ -static DEFINE_MUTEX(setup_lock); -/* Users of stop_machine. */ -static int refcount; -static struct workqueue_struct *stop_machine_wq; -static struct stop_machine_data active, idle; -static const struct cpumask *active_cpus; -static void __percpu *stop_machine_work; - -static void set_state(enum stopmachine_state newstate) +static void set_state(struct stop_machine_data *smdata, + enum stopmachine_state newstate) { /* Reset ack counter. */ - atomic_set(&thread_ack, num_threads); + atomic_set(&smdata->thread_ack, smdata->num_threads); smp_wmb(); - state = newstate; + smdata->state = newstate; } /* Last one to ack a state moves to the next state. */ -static void ack_state(void) +static void ack_state(struct stop_machine_data *smdata) { - if (atomic_dec_and_test(&thread_ack)) - set_state(state + 1); + if (atomic_dec_and_test(&smdata->thread_ack)) + set_state(smdata, smdata->state + 1); } -/* This is the actual function which stops the CPU. It runs - * in the context of a dedicated stopmachine workqueue. */ -static void stop_cpu(struct work_struct *unused) +/* This is the cpu_stop function which stops the CPU. */ +static int stop_machine_cpu_stop(void *data) { + struct stop_machine_data *smdata = data; enum stopmachine_state curstate = STOPMACHINE_NONE; - struct stop_machine_data *smdata = &idle; - int cpu = smp_processor_id(); - int err; + int cpu = smp_processor_id(), err = 0; + bool is_active; + + if (!smdata->active_cpus) + is_active = cpu == cpumask_first(cpu_online_mask); + else + is_active = cpumask_test_cpu(cpu, smdata->active_cpus); - if (!active_cpus) { - if (cpu == cpumask_first(cpu_online_mask)) - smdata = &active; - } else { - if (cpumask_test_cpu(cpu, active_cpus)) - smdata = &active; - } /* Simple state machine */ do { /* Chill out and ensure we re-read stopmachine_state. */ cpu_relax(); - if (state != curstate) { - curstate = state; + if (smdata->state != curstate) { + curstate = smdata->state; switch (curstate) { case STOPMACHINE_DISABLE_IRQ: local_irq_disable(); hard_irq_disable(); break; case STOPMACHINE_RUN: - /* On multiple CPUs only a single error code - * is needed to tell that something failed. */ - err = smdata->fn(smdata->data); - if (err) - smdata->fnret = err; + if (is_active) + err = smdata->fn(smdata->data); break; default: break; } - ack_state(); + ack_state(smdata); } } while (curstate != STOPMACHINE_EXIT); local_irq_enable(); + return err; } -/* Callback for CPUs which aren't supposed to do anything. */ -static int chill(void *unused) -{ - return 0; -} - -int stop_machine_create(void) -{ - mutex_lock(&setup_lock); - if (refcount) - goto done; - stop_machine_wq = create_rt_workqueue("kstop"); - if (!stop_machine_wq) - goto err_out; - stop_machine_work = alloc_percpu(struct work_struct); - if (!stop_machine_work) - goto err_out; -done: - refcount++; - mutex_unlock(&setup_lock); - return 0; - -err_out: - if (stop_machine_wq) - destroy_workqueue(stop_machine_wq); - mutex_unlock(&setup_lock); - return -ENOMEM; -} -EXPORT_SYMBOL_GPL(stop_machine_create); - -void stop_machine_destroy(void) -{ - mutex_lock(&setup_lock); - refcount--; - if (refcount) - goto done; - destroy_workqueue(stop_machine_wq); - free_percpu(stop_machine_work); -done: - mutex_unlock(&setup_lock); -} -EXPORT_SYMBOL_GPL(stop_machine_destroy); - int __stop_machine(int (*fn)(void *), void *data, const struct cpumask *cpus) { - struct work_struct *sm_work; - int i, ret; - - /* Set up initial state. */ - mutex_lock(&lock); - num_threads = num_online_cpus(); - active_cpus = cpus; - active.fn = fn; - active.data = data; - active.fnret = 0; - idle.fn = chill; - idle.data = NULL; - - set_state(STOPMACHINE_PREPARE); - - /* Schedule the stop_cpu work on all cpus: hold this CPU so one - * doesn't hit this CPU until we're ready. */ - get_cpu(); - for_each_online_cpu(i) { - sm_work = per_cpu_ptr(stop_machine_work, i); - INIT_WORK(sm_work, stop_cpu); - queue_work_on(i, stop_machine_wq, sm_work); - } - /* This will release the thread on our CPU. */ - put_cpu(); - flush_workqueue(stop_machine_wq); - ret = active.fnret; - mutex_unlock(&lock); - return ret; + struct stop_machine_data smdata = { .fn = fn, .data = data, + .num_threads = num_online_cpus(), + .active_cpus = cpus }; + + /* Set the initial state and stop all online cpus. */ + set_state(&smdata, STOPMACHINE_PREPARE); + return stop_cpus(cpu_online_mask, stop_machine_cpu_stop, &smdata); } int stop_machine(int (*fn)(void *), void *data, const struct cpumask *cpus) { int ret; - ret = stop_machine_create(); - if (ret) - return ret; /* No CPUs can come up or down during this. */ get_online_cpus(); ret = __stop_machine(fn, data, cpus); put_online_cpus(); - stop_machine_destroy(); return ret; } EXPORT_SYMBOL_GPL(stop_machine); -- cgit v1.2.3-70-g09d2 From 480b02df3aa9f07d1c7df0cd8be7a5ca73893455 Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Wed, 19 May 2010 17:33:39 -0600 Subject: module: drop the lock while waiting for module to complete initialization. This fixes "gave up waiting for init of module libcrc32c." which happened at boot time due to multiple parallel module loads. The problem was a deadlock: we wait for a module to finish initializing, but we keep the module_lock mutex so it can't complete. In particular, this could reasonably happen if a module does a request_module() in its initialization routine. So we change use_module() to return an errno rather than a bool, and if it's -EBUSY we drop the lock and wait in the caller, then reaquire the lock. Reported-by: Brandon Philips Signed-off-by: Rusty Russell Tested-by: Brandon Philips --- kernel/module.c | 59 ++++++++++++++++++++++++++++++++++++--------------------- 1 file changed, 37 insertions(+), 22 deletions(-) (limited to 'kernel/module.c') diff --git a/kernel/module.c b/kernel/module.c index e2564580f3f..970d773aec6 100644 --- a/kernel/module.c +++ b/kernel/module.c @@ -561,33 +561,26 @@ int use_module(struct module *a, struct module *b) struct module_use *use; int no_warn, err; - if (b == NULL || already_uses(a, b)) return 1; - - /* If we're interrupted or time out, we fail. */ - if (wait_event_interruptible_timeout( - module_wq, (err = strong_try_module_get(b)) != -EBUSY, - 30 * HZ) <= 0) { - printk("%s: gave up waiting for init of module %s.\n", - a->name, b->name); + if (b == NULL || already_uses(a, b)) return 0; - } - /* If strong_try_module_get() returned a different error, we fail. */ + /* If we're interrupted or time out, we fail. */ + err = strong_try_module_get(b); if (err) - return 0; + return err; DEBUGP("Allocating new usage for %s.\n", a->name); use = kmalloc(sizeof(*use), GFP_ATOMIC); if (!use) { printk("%s: out of memory loading\n", a->name); module_put(b); - return 0; + return -ENOMEM; } use->module_which_uses = a; list_add(&use->list, &b->modules_which_use_me); no_warn = sysfs_create_link(b->holders_dir, &a->mkobj.kobj, a->name); - return 1; + return 0; } EXPORT_SYMBOL_GPL(use_module); @@ -880,7 +873,7 @@ static inline void module_unload_free(struct module *mod) int use_module(struct module *a, struct module *b) { - return strong_try_module_get(b) == 0; + return strong_try_module_get(b); } EXPORT_SYMBOL_GPL(use_module); @@ -1051,17 +1044,39 @@ static const struct kernel_symbol *resolve_symbol(Elf_Shdr *sechdrs, struct module *owner; const struct kernel_symbol *sym; const unsigned long *crc; + DEFINE_WAIT(wait); + int err; + long timeleft = 30 * HZ; +again: sym = find_symbol(name, &owner, &crc, !(mod->taints & (1 << TAINT_PROPRIETARY_MODULE)), true); - /* use_module can fail due to OOM, - or module initialization or unloading */ - if (sym) { - if (!check_version(sechdrs, versindex, name, mod, crc, owner) - || !use_module(mod, owner)) - sym = NULL; - } - return sym; + if (!sym) + return NULL; + + if (!check_version(sechdrs, versindex, name, mod, crc, owner)) + return NULL; + + prepare_to_wait(&module_wq, &wait, TASK_INTERRUPTIBLE); + err = use_module(mod, owner); + if (likely(!err) || err != -EBUSY || signal_pending(current)) { + finish_wait(&module_wq, &wait); + return err ? NULL : sym; + } + + /* Module is still loading. Drop lock and wait. */ + mutex_unlock(&module_mutex); + timeleft = schedule_timeout(timeleft); + mutex_lock(&module_mutex); + finish_wait(&module_wq, &wait); + + /* Module might be gone entirely, or replaced. Re-lookup. */ + if (timeleft) + goto again; + + printk(KERN_WARNING "%s: gave up waiting for init of module %s.\n", + mod->name, owner->name); + return NULL; } /* -- cgit v1.2.3-70-g09d2 From 67fc4e0cb931d6b4ccf21248e4199b154478ecea Mon Sep 17 00:00:00 2001 From: Jason Wessel Date: Thu, 20 May 2010 21:04:21 -0500 Subject: kdb: core for kgdb back end (2 of 2) This patch contains the hooks and instrumentation into kernel which live outside the kernel/debug directory, which the kdb core will call to run commands like lsmod, dmesg, bt etc... CC: linux-arch@vger.kernel.org Signed-off-by: Jason Wessel Signed-off-by: Martin Hicks --- arch/arm/include/asm/kmap_types.h | 1 + arch/powerpc/include/asm/kmap_types.h | 1 + include/asm-generic/kmap_types.h | 3 ++- init/main.c | 2 ++ kernel/kallsyms.c | 21 ++++++++++++++++++ kernel/module.c | 4 ++++ kernel/printk.c | 16 ++++++++++++++ kernel/sched.c | 7 ++++-- kernel/signal.c | 40 +++++++++++++++++++++++++++++++++++ 9 files changed, 92 insertions(+), 3 deletions(-) (limited to 'kernel/module.c') diff --git a/arch/arm/include/asm/kmap_types.h b/arch/arm/include/asm/kmap_types.h index c4b2ea3fbe4..e51b1e81df0 100644 --- a/arch/arm/include/asm/kmap_types.h +++ b/arch/arm/include/asm/kmap_types.h @@ -20,6 +20,7 @@ enum km_type { KM_SOFTIRQ1, KM_L1_CACHE, KM_L2_CACHE, + KM_KDB, KM_TYPE_NR }; diff --git a/arch/powerpc/include/asm/kmap_types.h b/arch/powerpc/include/asm/kmap_types.h index 916369575c9..bca8fdcd254 100644 --- a/arch/powerpc/include/asm/kmap_types.h +++ b/arch/powerpc/include/asm/kmap_types.h @@ -26,6 +26,7 @@ enum km_type { KM_SOFTIRQ1, KM_PPC_SYNC_PAGE, KM_PPC_SYNC_ICACHE, + KM_KDB, KM_TYPE_NR }; diff --git a/include/asm-generic/kmap_types.h b/include/asm-generic/kmap_types.h index e5f234a0854..97e807c8c81 100644 --- a/include/asm-generic/kmap_types.h +++ b/include/asm-generic/kmap_types.h @@ -28,7 +28,8 @@ KMAP_D(15) KM_UML_USERCOPY, KMAP_D(16) KM_IRQ_PTE, KMAP_D(17) KM_NMI, KMAP_D(18) KM_NMI_PTE, -KMAP_D(19) KM_TYPE_NR +KMAP_D(19) KM_KDB, +KMAP_D(20) KM_TYPE_NR }; #undef KMAP_D diff --git a/init/main.c b/init/main.c index 5c854027152..372771333d9 100644 --- a/init/main.c +++ b/init/main.c @@ -62,6 +62,7 @@ #include #include #include +#include #include #include #include @@ -675,6 +676,7 @@ asmlinkage void __init start_kernel(void) buffer_init(); key_init(); security_init(); + kdb_init(KDB_INIT_FULL); vfs_caches_init(totalram_pages); signals_init(); /* rootfs populating might need page-writeback */ diff --git a/kernel/kallsyms.c b/kernel/kallsyms.c index 13aff293f4d..6f6d091b575 100644 --- a/kernel/kallsyms.c +++ b/kernel/kallsyms.c @@ -16,6 +16,7 @@ #include #include #include +#include #include #include #include /* for cond_resched */ @@ -516,6 +517,26 @@ static int kallsyms_open(struct inode *inode, struct file *file) return ret; } +#ifdef CONFIG_KGDB_KDB +const char *kdb_walk_kallsyms(loff_t *pos) +{ + static struct kallsym_iter kdb_walk_kallsyms_iter; + if (*pos == 0) { + memset(&kdb_walk_kallsyms_iter, 0, + sizeof(kdb_walk_kallsyms_iter)); + reset_iter(&kdb_walk_kallsyms_iter, 0); + } + while (1) { + if (!update_iter(&kdb_walk_kallsyms_iter, *pos)) + return NULL; + ++*pos; + /* Some debugging symbols have no name. Ignore them. */ + if (kdb_walk_kallsyms_iter.name[0]) + return kdb_walk_kallsyms_iter.name; + } +} +#endif /* CONFIG_KGDB_KDB */ + static const struct file_operations kallsyms_operations = { .open = kallsyms_open, .read = seq_read, diff --git a/kernel/module.c b/kernel/module.c index e2564580f3f..b751f190247 100644 --- a/kernel/module.c +++ b/kernel/module.c @@ -77,6 +77,10 @@ DEFINE_MUTEX(module_mutex); EXPORT_SYMBOL_GPL(module_mutex); static LIST_HEAD(modules); +#ifdef CONFIG_KGDB_KDB +struct list_head *kdb_modules = &modules; /* kdb needs the list of modules */ +#endif /* CONFIG_KGDB_KDB */ + /* Block module loading/unloading? */ int modules_disabled = 0; diff --git a/kernel/printk.c b/kernel/printk.c index 75077ad0b53..9213b8b5bb4 100644 --- a/kernel/printk.c +++ b/kernel/printk.c @@ -413,6 +413,22 @@ SYSCALL_DEFINE3(syslog, int, type, char __user *, buf, int, len) return do_syslog(type, buf, len, SYSLOG_FROM_CALL); } +#ifdef CONFIG_KGDB_KDB +/* kdb dmesg command needs access to the syslog buffer. do_syslog() + * uses locks so it cannot be used during debugging. Just tell kdb + * where the start and end of the physical and logical logs are. This + * is equivalent to do_syslog(3). + */ +void kdb_syslog_data(char *syslog_data[4]) +{ + syslog_data[0] = log_buf; + syslog_data[1] = log_buf + log_buf_len; + syslog_data[2] = log_buf + log_end - + (logged_chars < log_buf_len ? logged_chars : log_buf_len); + syslog_data[3] = log_buf + log_end; +} +#endif /* CONFIG_KGDB_KDB */ + /* * Call the console drivers on a range of log_buf */ diff --git a/kernel/sched.c b/kernel/sched.c index 1d93cd0ae4d..a25c1324af5 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -7758,9 +7758,9 @@ void normalize_rt_tasks(void) #endif /* CONFIG_MAGIC_SYSRQ */ -#ifdef CONFIG_IA64 +#if defined(CONFIG_IA64) || defined(CONFIG_KGDB_KDB) /* - * These functions are only useful for the IA64 MCA handling. + * These functions are only useful for the IA64 MCA handling, or kdb. * * They can only be called when the whole system has been * stopped - every CPU needs to be quiescent, and no scheduling @@ -7780,6 +7780,9 @@ struct task_struct *curr_task(int cpu) return cpu_curr(cpu); } +#endif /* defined(CONFIG_IA64) || defined(CONFIG_KGDB_KDB) */ + +#ifdef CONFIG_IA64 /** * set_curr_task - set the current task for a given cpu. * @cpu: the processor in question. diff --git a/kernel/signal.c b/kernel/signal.c index dbd7fe073c5..825a3f24ad7 100644 --- a/kernel/signal.c +++ b/kernel/signal.c @@ -2735,3 +2735,43 @@ void __init signals_init(void) { sigqueue_cachep = KMEM_CACHE(sigqueue, SLAB_PANIC); } + +#ifdef CONFIG_KGDB_KDB +#include +/* + * kdb_send_sig_info - Allows kdb to send signals without exposing + * signal internals. This function checks if the required locks are + * available before calling the main signal code, to avoid kdb + * deadlocks. + */ +void +kdb_send_sig_info(struct task_struct *t, struct siginfo *info) +{ + static struct task_struct *kdb_prev_t; + int sig, new_t; + if (!spin_trylock(&t->sighand->siglock)) { + kdb_printf("Can't do kill command now.\n" + "The sigmask lock is held somewhere else in " + "kernel, try again later\n"); + return; + } + spin_unlock(&t->sighand->siglock); + new_t = kdb_prev_t != t; + kdb_prev_t = t; + if (t->state != TASK_RUNNING && new_t) { + kdb_printf("Process is not RUNNING, sending a signal from " + "kdb risks deadlock\n" + "on the run queue locks. " + "The signal has _not_ been sent.\n" + "Reissue the kill command if you want to risk " + "the deadlock.\n"); + return; + } + sig = info->si_signo; + if (send_sig_info(sig, info, t)) + kdb_printf("Fail to deliver Signal %d to process %d.\n", + sig, t->pid); + else + kdb_printf("Signal %d is sent to process %d.\n", sig, t->pid); +} +#endif /* CONFIG_KGDB_KDB */ -- cgit v1.2.3-70-g09d2 From 2c3c8bea608866d8bd9dcf92657d57fdcac011c5 Mon Sep 17 00:00:00 2001 From: Chris Wright Date: Wed, 12 May 2010 18:28:57 -0700 Subject: sysfs: add struct file* to bin_attr callbacks This allows bin_attr->read,write,mmap callbacks to check file specific data (such as inode owner) as part of any privilege validation. Signed-off-by: Chris Wright Signed-off-by: Greg Kroah-Hartman --- arch/alpha/kernel/pci-sysfs.c | 8 ++++--- arch/mips/txx9/generic/setup.c | 4 ++-- arch/powerpc/sysdev/mv64x60_pci.c | 4 ++-- arch/s390/kernel/ipl.c | 14 ++++++------ drivers/acpi/system.c | 2 +- drivers/base/firmware_class.c | 11 ++++++---- drivers/firmware/dcdbas.c | 4 ++-- drivers/firmware/dell_rbu.c | 10 ++++----- drivers/firmware/efivars.c | 4 ++-- drivers/gpu/drm/drm_sysfs.c | 5 +++-- drivers/misc/c2port/core.c | 4 ++-- drivers/misc/ds1682.c | 6 ++++-- drivers/misc/eeprom/at24.c | 6 ++++-- drivers/misc/eeprom/at25.c | 6 ++++-- drivers/misc/eeprom/eeprom.c | 3 ++- drivers/misc/eeprom/max6875.c | 2 +- drivers/net/netxen/netxen_nic_main.c | 11 ++++++---- drivers/net/qlcnic/qlcnic_main.c | 12 +++++++---- drivers/pci/hotplug/acpiphp_ibm.c | 5 +++-- drivers/pci/pci-sysfs.c | 42 +++++++++++++++++++++++++----------- drivers/pcmcia/cistpl.c | 4 ++-- drivers/power/olpc_battery.c | 2 +- drivers/rapidio/rio-sysfs.c | 6 ++++-- drivers/rtc/rtc-cmos.c | 6 ++++-- drivers/rtc/rtc-ds1305.c | 6 ++++-- drivers/rtc/rtc-ds1307.c | 6 ++++-- drivers/rtc/rtc-ds1511.c | 10 +++++---- drivers/rtc/rtc-ds1553.c | 4 ++-- drivers/rtc/rtc-ds1742.c | 4 ++-- drivers/rtc/rtc-m48t59.c | 4 ++-- drivers/rtc/rtc-stk17ta8.c | 4 ++-- drivers/rtc/rtc-tx4939.c | 4 ++-- drivers/s390/cio/chp.c | 5 +++-- drivers/scsi/3w-sas.c | 4 ++-- drivers/scsi/arcmsr/arcmsr_attr.c | 9 +++++--- drivers/scsi/ibmvscsi/ibmvfc.c | 3 ++- drivers/scsi/ipr.c | 9 +++++--- drivers/scsi/lpfc/lpfc_attr.c | 20 ++++++++++++----- drivers/scsi/qla2xxx/qla_attr.c | 32 +++++++++++++-------------- drivers/staging/udlfb/udlfb.c | 3 ++- drivers/usb/core/sysfs.c | 3 ++- drivers/video/aty/radeon_base.c | 4 ++-- drivers/w1/slaves/w1_ds2431.c | 4 ++-- drivers/w1/slaves/w1_ds2433.c | 4 ++-- drivers/w1/slaves/w1_ds2760.c | 2 +- drivers/w1/w1.c | 4 ++-- drivers/zorro/zorro-sysfs.c | 2 +- fs/sysfs/bin.c | 24 ++++++++++----------- include/linux/sysfs.h | 7 +++--- kernel/ksysfs.c | 3 ++- kernel/module.c | 2 +- net/bridge/br_sysfs_br.c | 2 +- 52 files changed, 220 insertions(+), 149 deletions(-) (limited to 'kernel/module.c') diff --git a/arch/alpha/kernel/pci-sysfs.c b/arch/alpha/kernel/pci-sysfs.c index d979e7c7bc4..a5fffc882c7 100644 --- a/arch/alpha/kernel/pci-sysfs.c +++ b/arch/alpha/kernel/pci-sysfs.c @@ -53,6 +53,7 @@ static int __pci_mmap_fits(struct pci_dev *pdev, int num, /** * pci_mmap_resource - map a PCI resource into user memory space + * @filp: open sysfs file * @kobj: kobject for mapping * @attr: struct bin_attribute for the file being mapped * @vma: struct vm_area_struct passed into the mmap @@ -60,7 +61,8 @@ static int __pci_mmap_fits(struct pci_dev *pdev, int num, * * Use the bus mapping routines to map a PCI resource into userspace. */ -static int pci_mmap_resource(struct kobject *kobj, struct bin_attribute *attr, +static int pci_mmap_resource(struct file *filp, struct kobject *kobj, + struct bin_attribute *attr, struct vm_area_struct *vma, int sparse) { struct pci_dev *pdev = to_pci_dev(container_of(kobj, @@ -89,14 +91,14 @@ static int pci_mmap_resource(struct kobject *kobj, struct bin_attribute *attr, return hose_mmap_page_range(pdev->sysdata, vma, mmap_type, sparse); } -static int pci_mmap_resource_sparse(struct kobject *kobj, +static int pci_mmap_resource_sparse(struct file *filp, struct kobject *kobj, struct bin_attribute *attr, struct vm_area_struct *vma) { return pci_mmap_resource(kobj, attr, vma, 1); } -static int pci_mmap_resource_dense(struct kobject *kobj, +static int pci_mmap_resource_dense(struct file *filp, struct kobject *kobj, struct bin_attribute *attr, struct vm_area_struct *vma) { diff --git a/arch/mips/txx9/generic/setup.c b/arch/mips/txx9/generic/setup.c index adc69291f9e..575d219b800 100644 --- a/arch/mips/txx9/generic/setup.c +++ b/arch/mips/txx9/generic/setup.c @@ -905,7 +905,7 @@ struct txx9_sramc_sysdev { void __iomem *base; }; -static ssize_t txx9_sram_read(struct kobject *kobj, +static ssize_t txx9_sram_read(struct file *filp, struct kobject *kobj, struct bin_attribute *bin_attr, char *buf, loff_t pos, size_t size) { @@ -920,7 +920,7 @@ static ssize_t txx9_sram_read(struct kobject *kobj, return size; } -static ssize_t txx9_sram_write(struct kobject *kobj, +static ssize_t txx9_sram_write(struct file *filp, struct kobject *kobj, struct bin_attribute *bin_attr, char *buf, loff_t pos, size_t size) { diff --git a/arch/powerpc/sysdev/mv64x60_pci.c b/arch/powerpc/sysdev/mv64x60_pci.c index 1456015a22d..198f288570c 100644 --- a/arch/powerpc/sysdev/mv64x60_pci.c +++ b/arch/powerpc/sysdev/mv64x60_pci.c @@ -24,7 +24,7 @@ #define MV64X60_VAL_LEN_MAX 11 #define MV64X60_PCICFG_CPCI_HOTSWAP 0x68 -static ssize_t mv64x60_hs_reg_read(struct kobject *kobj, +static ssize_t mv64x60_hs_reg_read(struct file *filp, struct kobject *kobj, struct bin_attribute *attr, char *buf, loff_t off, size_t count) { @@ -45,7 +45,7 @@ static ssize_t mv64x60_hs_reg_read(struct kobject *kobj, return sprintf(buf, "0x%08x\n", v); } -static ssize_t mv64x60_hs_reg_write(struct kobject *kobj, +static ssize_t mv64x60_hs_reg_write(struct file *filp, struct kobject *kobj, struct bin_attribute *attr, char *buf, loff_t off, size_t count) { diff --git a/arch/s390/kernel/ipl.c b/arch/s390/kernel/ipl.c index 72c8b0d070c..a689070be28 100644 --- a/arch/s390/kernel/ipl.c +++ b/arch/s390/kernel/ipl.c @@ -403,8 +403,9 @@ static ssize_t sys_ipl_device_show(struct kobject *kobj, static struct kobj_attribute sys_ipl_device_attr = __ATTR(device, S_IRUGO, sys_ipl_device_show, NULL); -static ssize_t ipl_parameter_read(struct kobject *kobj, struct bin_attribute *attr, - char *buf, loff_t off, size_t count) +static ssize_t ipl_parameter_read(struct file *filp, struct kobject *kobj, + struct bin_attribute *attr, char *buf, + loff_t off, size_t count) { return memory_read_from_buffer(buf, count, &off, IPL_PARMBLOCK_START, IPL_PARMBLOCK_SIZE); @@ -419,8 +420,9 @@ static struct bin_attribute ipl_parameter_attr = { .read = &ipl_parameter_read, }; -static ssize_t ipl_scp_data_read(struct kobject *kobj, struct bin_attribute *attr, - char *buf, loff_t off, size_t count) +static ssize_t ipl_scp_data_read(struct file *filp, struct kobject *kobj, + struct bin_attribute *attr, char *buf, + loff_t off, size_t count) { unsigned int size = IPL_PARMBLOCK_START->ipl_info.fcp.scp_data_len; void *scp_data = &IPL_PARMBLOCK_START->ipl_info.fcp.scp_data; @@ -694,7 +696,7 @@ static struct kobj_attribute sys_reipl_ccw_vmparm_attr = /* FCP reipl device attributes */ -static ssize_t reipl_fcp_scpdata_read(struct kobject *kobj, +static ssize_t reipl_fcp_scpdata_read(struct file *filp, struct kobject *kobj, struct bin_attribute *attr, char *buf, loff_t off, size_t count) { @@ -704,7 +706,7 @@ static ssize_t reipl_fcp_scpdata_read(struct kobject *kobj, return memory_read_from_buffer(buf, count, &off, scp_data, size); } -static ssize_t reipl_fcp_scpdata_write(struct kobject *kobj, +static ssize_t reipl_fcp_scpdata_write(struct file *filp, struct kobject *kobj, struct bin_attribute *attr, char *buf, loff_t off, size_t count) { diff --git a/drivers/acpi/system.c b/drivers/acpi/system.c index e35525b39f6..c79e789ed03 100644 --- a/drivers/acpi/system.c +++ b/drivers/acpi/system.c @@ -71,7 +71,7 @@ struct acpi_table_attr { struct list_head node; }; -static ssize_t acpi_table_show(struct kobject *kobj, +static ssize_t acpi_table_show(struct file *filp, struct kobject *kobj, struct bin_attribute *bin_attr, char *buf, loff_t offset, size_t count) { diff --git a/drivers/base/firmware_class.c b/drivers/base/firmware_class.c index d98e424675c..3f093b0dd21 100644 --- a/drivers/base/firmware_class.c +++ b/drivers/base/firmware_class.c @@ -278,8 +278,9 @@ static ssize_t firmware_loading_store(struct device *dev, static DEVICE_ATTR(loading, 0644, firmware_loading_show, firmware_loading_store); static ssize_t -firmware_data_read(struct kobject *kobj, struct bin_attribute *bin_attr, - char *buffer, loff_t offset, size_t count) +firmware_data_read(struct file *filp, struct kobject *kobj, + struct bin_attribute *bin_attr, char *buffer, loff_t offset, + size_t count) { struct device *dev = to_dev(kobj); struct firmware_priv *fw_priv = dev_get_drvdata(dev); @@ -362,6 +363,7 @@ fw_realloc_buffer(struct firmware_priv *fw_priv, int min_size) /** * firmware_data_write - write method for firmware + * @filp: open sysfs file * @kobj: kobject for the device * @bin_attr: bin_attr structure * @buffer: buffer being written @@ -372,8 +374,9 @@ fw_realloc_buffer(struct firmware_priv *fw_priv, int min_size) * the driver as a firmware image. **/ static ssize_t -firmware_data_write(struct kobject *kobj, struct bin_attribute *bin_attr, - char *buffer, loff_t offset, size_t count) +firmware_data_write(struct file* filp, struct kobject *kobj, + struct bin_attribute *bin_attr, char *buffer, + loff_t offset, size_t count) { struct device *dev = to_dev(kobj); struct firmware_priv *fw_priv = dev_get_drvdata(dev); diff --git a/drivers/firmware/dcdbas.c b/drivers/firmware/dcdbas.c index fb09bb3c0ad..aa9bc9e980e 100644 --- a/drivers/firmware/dcdbas.c +++ b/drivers/firmware/dcdbas.c @@ -149,7 +149,7 @@ static ssize_t smi_data_buf_size_store(struct device *dev, return count; } -static ssize_t smi_data_read(struct kobject *kobj, +static ssize_t smi_data_read(struct file *filp, struct kobject *kobj, struct bin_attribute *bin_attr, char *buf, loff_t pos, size_t count) { @@ -162,7 +162,7 @@ static ssize_t smi_data_read(struct kobject *kobj, return ret; } -static ssize_t smi_data_write(struct kobject *kobj, +static ssize_t smi_data_write(struct file *filp, struct kobject *kobj, struct bin_attribute *bin_attr, char *buf, loff_t pos, size_t count) { diff --git a/drivers/firmware/dell_rbu.c b/drivers/firmware/dell_rbu.c index 3a4460265b1..2f452f1f7c8 100644 --- a/drivers/firmware/dell_rbu.c +++ b/drivers/firmware/dell_rbu.c @@ -522,7 +522,7 @@ static ssize_t read_rbu_mono_data(char *buffer, loff_t pos, size_t count) rbu_data.image_update_buffer, rbu_data.bios_image_size); } -static ssize_t read_rbu_data(struct kobject *kobj, +static ssize_t read_rbu_data(struct file *filp, struct kobject *kobj, struct bin_attribute *bin_attr, char *buffer, loff_t pos, size_t count) { @@ -576,7 +576,7 @@ static void callbackfn_rbu(const struct firmware *fw, void *context) release_firmware(fw); } -static ssize_t read_rbu_image_type(struct kobject *kobj, +static ssize_t read_rbu_image_type(struct file *filp, struct kobject *kobj, struct bin_attribute *bin_attr, char *buffer, loff_t pos, size_t count) { @@ -586,7 +586,7 @@ static ssize_t read_rbu_image_type(struct kobject *kobj, return size; } -static ssize_t write_rbu_image_type(struct kobject *kobj, +static ssize_t write_rbu_image_type(struct file *filp, struct kobject *kobj, struct bin_attribute *bin_attr, char *buffer, loff_t pos, size_t count) { @@ -647,7 +647,7 @@ static ssize_t write_rbu_image_type(struct kobject *kobj, return rc; } -static ssize_t read_rbu_packet_size(struct kobject *kobj, +static ssize_t read_rbu_packet_size(struct file *filp, struct kobject *kobj, struct bin_attribute *bin_attr, char *buffer, loff_t pos, size_t count) { @@ -660,7 +660,7 @@ static ssize_t read_rbu_packet_size(struct kobject *kobj, return size; } -static ssize_t write_rbu_packet_size(struct kobject *kobj, +static ssize_t write_rbu_packet_size(struct file *filp, struct kobject *kobj, struct bin_attribute *bin_attr, char *buffer, loff_t pos, size_t count) { diff --git a/drivers/firmware/efivars.c b/drivers/firmware/efivars.c index 81b70bd0758..2a62ec6390e 100644 --- a/drivers/firmware/efivars.c +++ b/drivers/firmware/efivars.c @@ -402,7 +402,7 @@ efivar_unregister(struct efivar_entry *var) } -static ssize_t efivar_create(struct kobject *kobj, +static ssize_t efivar_create(struct file *filp, struct kobject *kobj, struct bin_attribute *bin_attr, char *buf, loff_t pos, size_t count) { @@ -461,7 +461,7 @@ static ssize_t efivar_create(struct kobject *kobj, return count; } -static ssize_t efivar_delete(struct kobject *kobj, +static ssize_t efivar_delete(struct file *filp, struct kobject *kobj, struct bin_attribute *bin_attr, char *buf, loff_t pos, size_t count) { diff --git a/drivers/gpu/drm/drm_sysfs.c b/drivers/gpu/drm/drm_sysfs.c index 25bbd30ed7a..387166d5a10 100644 --- a/drivers/gpu/drm/drm_sysfs.c +++ b/drivers/gpu/drm/drm_sysfs.c @@ -193,8 +193,9 @@ static ssize_t enabled_show(struct device *device, "disabled"); } -static ssize_t edid_show(struct kobject *kobj, struct bin_attribute *attr, - char *buf, loff_t off, size_t count) +static ssize_t edid_show(struct file *filp, struct kobject *kobj, + struct bin_attribute *attr, char *buf, loff_t off, + size_t count) { struct device *connector_dev = container_of(kobj, struct device, kobj); struct drm_connector *connector = to_drm_connector(connector_dev); diff --git a/drivers/misc/c2port/core.c b/drivers/misc/c2port/core.c index ed090e77c9c..19fc7c1cb42 100644 --- a/drivers/misc/c2port/core.c +++ b/drivers/misc/c2port/core.c @@ -707,7 +707,7 @@ static ssize_t __c2port_read_flash_data(struct c2port_device *dev, return nread; } -static ssize_t c2port_read_flash_data(struct kobject *kobj, +static ssize_t c2port_read_flash_data(struct file *filp, struct kobject *kobj, struct bin_attribute *attr, char *buffer, loff_t offset, size_t count) { @@ -824,7 +824,7 @@ static ssize_t __c2port_write_flash_data(struct c2port_device *dev, return nwrite; } -static ssize_t c2port_write_flash_data(struct kobject *kobj, +static ssize_t c2port_write_flash_data(struct file *filp, struct kobject *kobj, struct bin_attribute *attr, char *buffer, loff_t offset, size_t count) { diff --git a/drivers/misc/ds1682.c b/drivers/misc/ds1682.c index 9197cfc5501..a513f0aa643 100644 --- a/drivers/misc/ds1682.c +++ b/drivers/misc/ds1682.c @@ -140,7 +140,8 @@ static const struct attribute_group ds1682_group = { /* * User data attribute */ -static ssize_t ds1682_eeprom_read(struct kobject *kobj, struct bin_attribute *attr, +static ssize_t ds1682_eeprom_read(struct file *filp, struct kobject *kobj, + struct bin_attribute *attr, char *buf, loff_t off, size_t count) { struct i2c_client *client = kobj_to_i2c_client(kobj); @@ -163,7 +164,8 @@ static ssize_t ds1682_eeprom_read(struct kobject *kobj, struct bin_attribute *at return count; } -static ssize_t ds1682_eeprom_write(struct kobject *kobj, struct bin_attribute *attr, +static ssize_t ds1682_eeprom_write(struct file *filp, struct kobject *kobj, + struct bin_attribute *attr, char *buf, loff_t off, size_t count) { struct i2c_client *client = kobj_to_i2c_client(kobj); diff --git a/drivers/misc/eeprom/at24.c b/drivers/misc/eeprom/at24.c index db7d0f21b65..a79a62f7548 100644 --- a/drivers/misc/eeprom/at24.c +++ b/drivers/misc/eeprom/at24.c @@ -274,7 +274,8 @@ static ssize_t at24_read(struct at24_data *at24, return retval; } -static ssize_t at24_bin_read(struct kobject *kobj, struct bin_attribute *attr, +static ssize_t at24_bin_read(struct file *filp, struct kobject *kobj, + struct bin_attribute *attr, char *buf, loff_t off, size_t count) { struct at24_data *at24; @@ -395,7 +396,8 @@ static ssize_t at24_write(struct at24_data *at24, const char *buf, loff_t off, return retval; } -static ssize_t at24_bin_write(struct kobject *kobj, struct bin_attribute *attr, +static ssize_t at24_bin_write(struct file *filp, struct kobject *kobj, + struct bin_attribute *attr, char *buf, loff_t off, size_t count) { struct at24_data *at24; diff --git a/drivers/misc/eeprom/at25.c b/drivers/misc/eeprom/at25.c index d194212a41f..c627e4174cc 100644 --- a/drivers/misc/eeprom/at25.c +++ b/drivers/misc/eeprom/at25.c @@ -126,7 +126,8 @@ at25_ee_read( } static ssize_t -at25_bin_read(struct kobject *kobj, struct bin_attribute *bin_attr, +at25_bin_read(struct file *filp, struct kobject *kobj, + struct bin_attribute *bin_attr, char *buf, loff_t off, size_t count) { struct device *dev; @@ -253,7 +254,8 @@ at25_ee_write(struct at25_data *at25, const char *buf, loff_t off, } static ssize_t -at25_bin_write(struct kobject *kobj, struct bin_attribute *bin_attr, +at25_bin_write(struct file *filp, struct kobject *kobj, + struct bin_attribute *bin_attr, char *buf, loff_t off, size_t count) { struct device *dev; diff --git a/drivers/misc/eeprom/eeprom.c b/drivers/misc/eeprom/eeprom.c index e306a8cd2f9..45060ddc4e5 100644 --- a/drivers/misc/eeprom/eeprom.c +++ b/drivers/misc/eeprom/eeprom.c @@ -81,7 +81,8 @@ exit: mutex_unlock(&data->update_lock); } -static ssize_t eeprom_read(struct kobject *kobj, struct bin_attribute *bin_attr, +static ssize_t eeprom_read(struct file *filp, struct kobject *kobj, + struct bin_attribute *bin_attr, char *buf, loff_t off, size_t count) { struct i2c_client *client = to_i2c_client(container_of(kobj, struct device, kobj)); diff --git a/drivers/misc/eeprom/max6875.c b/drivers/misc/eeprom/max6875.c index fe290927850..5653a3ce051 100644 --- a/drivers/misc/eeprom/max6875.c +++ b/drivers/misc/eeprom/max6875.c @@ -107,7 +107,7 @@ exit_up: mutex_unlock(&data->update_lock); } -static ssize_t max6875_read(struct kobject *kobj, +static ssize_t max6875_read(struct file *filp, struct kobject *kobj, struct bin_attribute *bin_attr, char *buf, loff_t off, size_t count) { diff --git a/drivers/net/netxen/netxen_nic_main.c b/drivers/net/netxen/netxen_nic_main.c index c61a61f177b..6ce6ce1df6d 100644 --- a/drivers/net/netxen/netxen_nic_main.c +++ b/drivers/net/netxen/netxen_nic_main.c @@ -2560,7 +2560,8 @@ netxen_sysfs_validate_crb(struct netxen_adapter *adapter, } static ssize_t -netxen_sysfs_read_crb(struct kobject *kobj, struct bin_attribute *attr, +netxen_sysfs_read_crb(struct file *filp, struct kobject *kobj, + struct bin_attribute *attr, char *buf, loff_t offset, size_t size) { struct device *dev = container_of(kobj, struct device, kobj); @@ -2587,7 +2588,8 @@ netxen_sysfs_read_crb(struct kobject *kobj, struct bin_attribute *attr, } static ssize_t -netxen_sysfs_write_crb(struct kobject *kobj, struct bin_attribute *attr, +netxen_sysfs_write_crb(struct file *filp, struct kobject *kobj, + struct bin_attribute *attr, char *buf, loff_t offset, size_t size) { struct device *dev = container_of(kobj, struct device, kobj); @@ -2627,7 +2629,8 @@ netxen_sysfs_validate_mem(struct netxen_adapter *adapter, } static ssize_t -netxen_sysfs_read_mem(struct kobject *kobj, struct bin_attribute *attr, +netxen_sysfs_read_mem(struct file *filp, struct kobject *kobj, + struct bin_attribute *attr, char *buf, loff_t offset, size_t size) { struct device *dev = container_of(kobj, struct device, kobj); @@ -2647,7 +2650,7 @@ netxen_sysfs_read_mem(struct kobject *kobj, struct bin_attribute *attr, return size; } -static ssize_t netxen_sysfs_write_mem(struct kobject *kobj, +static ssize_t netxen_sysfs_write_mem(struct file *filp, struct kobject *kobj, struct bin_attribute *attr, char *buf, loff_t offset, size_t size) { diff --git a/drivers/net/qlcnic/qlcnic_main.c b/drivers/net/qlcnic/qlcnic_main.c index 1003eb76fda..23ea9caa526 100644 --- a/drivers/net/qlcnic/qlcnic_main.c +++ b/drivers/net/qlcnic/qlcnic_main.c @@ -2464,7 +2464,8 @@ qlcnic_sysfs_validate_crb(struct qlcnic_adapter *adapter, } static ssize_t -qlcnic_sysfs_read_crb(struct kobject *kobj, struct bin_attribute *attr, +qlcnic_sysfs_read_crb(struct file *filp, struct kobject *kobj, + struct bin_attribute *attr, char *buf, loff_t offset, size_t size) { struct device *dev = container_of(kobj, struct device, kobj); @@ -2488,7 +2489,8 @@ qlcnic_sysfs_read_crb(struct kobject *kobj, struct bin_attribute *attr, } static ssize_t -qlcnic_sysfs_write_crb(struct kobject *kobj, struct bin_attribute *attr, +qlcnic_sysfs_write_crb(struct file *filp, struct kobject *kobj, + struct bin_attribute *attr, char *buf, loff_t offset, size_t size) { struct device *dev = container_of(kobj, struct device, kobj); @@ -2525,7 +2527,8 @@ qlcnic_sysfs_validate_mem(struct qlcnic_adapter *adapter, } static ssize_t -qlcnic_sysfs_read_mem(struct kobject *kobj, struct bin_attribute *attr, +qlcnic_sysfs_read_mem(struct file *filp, struct kobject *kobj, + struct bin_attribute *attr, char *buf, loff_t offset, size_t size) { struct device *dev = container_of(kobj, struct device, kobj); @@ -2546,7 +2549,8 @@ qlcnic_sysfs_read_mem(struct kobject *kobj, struct bin_attribute *attr, } static ssize_t -qlcnic_sysfs_write_mem(struct kobject *kobj, struct bin_attribute *attr, +qlcnic_sysfs_write_mem(struct file *filp, struct kobject *kobj, + struct bin_attribute *attr, char *buf, loff_t offset, size_t size) { struct device *dev = container_of(kobj, struct device, kobj); diff --git a/drivers/pci/hotplug/acpiphp_ibm.c b/drivers/pci/hotplug/acpiphp_ibm.c index 6ecbfb27db9..e525263210e 100644 --- a/drivers/pci/hotplug/acpiphp_ibm.c +++ b/drivers/pci/hotplug/acpiphp_ibm.c @@ -108,7 +108,7 @@ static int ibm_set_attention_status(struct hotplug_slot *slot, u8 status); static int ibm_get_attention_status(struct hotplug_slot *slot, u8 *status); static void ibm_handle_events(acpi_handle handle, u32 event, void *context); static int ibm_get_table_from_acpi(char **bufp); -static ssize_t ibm_read_apci_table(struct kobject *kobj, +static ssize_t ibm_read_apci_table(struct file *filp, struct kobject *kobj, struct bin_attribute *bin_attr, char *buffer, loff_t pos, size_t size); static acpi_status __init ibm_find_acpi_device(acpi_handle handle, @@ -351,6 +351,7 @@ read_table_done: /** * ibm_read_apci_table - callback for the sysfs apci_table file + * @filp: the open sysfs file * @kobj: the kobject this binary attribute is a part of * @bin_attr: struct bin_attribute for this file * @buffer: the kernel space buffer to fill @@ -364,7 +365,7 @@ read_table_done: * things get really tricky here... * our solution is to only allow reading the table in all at once. */ -static ssize_t ibm_read_apci_table(struct kobject *kobj, +static ssize_t ibm_read_apci_table(struct file *filp, struct kobject *kobj, struct bin_attribute *bin_attr, char *buffer, loff_t pos, size_t size) { diff --git a/drivers/pci/pci-sysfs.c b/drivers/pci/pci-sysfs.c index fad93983bfe..ad44557e65c 100644 --- a/drivers/pci/pci-sysfs.c +++ b/drivers/pci/pci-sysfs.c @@ -357,7 +357,8 @@ boot_vga_show(struct device *dev, struct device_attribute *attr, char *buf) struct device_attribute vga_attr = __ATTR_RO(boot_vga); static ssize_t -pci_read_config(struct kobject *kobj, struct bin_attribute *bin_attr, +pci_read_config(struct file *filp, struct kobject *kobj, + struct bin_attribute *bin_attr, char *buf, loff_t off, size_t count) { struct pci_dev *dev = to_pci_dev(container_of(kobj,struct device,kobj)); @@ -430,7 +431,8 @@ pci_read_config(struct kobject *kobj, struct bin_attribute *bin_attr, } static ssize_t -pci_write_config(struct kobject *kobj, struct bin_attribute *bin_attr, +pci_write_config(struct file* filp, struct kobject *kobj, + struct bin_attribute *bin_attr, char *buf, loff_t off, size_t count) { struct pci_dev *dev = to_pci_dev(container_of(kobj,struct device,kobj)); @@ -487,7 +489,8 @@ pci_write_config(struct kobject *kobj, struct bin_attribute *bin_attr, } static ssize_t -read_vpd_attr(struct kobject *kobj, struct bin_attribute *bin_attr, +read_vpd_attr(struct file *filp, struct kobject *kobj, + struct bin_attribute *bin_attr, char *buf, loff_t off, size_t count) { struct pci_dev *dev = @@ -502,7 +505,8 @@ read_vpd_attr(struct kobject *kobj, struct bin_attribute *bin_attr, } static ssize_t -write_vpd_attr(struct kobject *kobj, struct bin_attribute *bin_attr, +write_vpd_attr(struct file *filp, struct kobject *kobj, + struct bin_attribute *bin_attr, char *buf, loff_t off, size_t count) { struct pci_dev *dev = @@ -519,6 +523,7 @@ write_vpd_attr(struct kobject *kobj, struct bin_attribute *bin_attr, #ifdef HAVE_PCI_LEGACY /** * pci_read_legacy_io - read byte(s) from legacy I/O port space + * @filp: open sysfs file * @kobj: kobject corresponding to file to read from * @bin_attr: struct bin_attribute for this file * @buf: buffer to store results @@ -529,7 +534,8 @@ write_vpd_attr(struct kobject *kobj, struct bin_attribute *bin_attr, * callback routine (pci_legacy_read). */ static ssize_t -pci_read_legacy_io(struct kobject *kobj, struct bin_attribute *bin_attr, +pci_read_legacy_io(struct file *filp, struct kobject *kobj, + struct bin_attribute *bin_attr, char *buf, loff_t off, size_t count) { struct pci_bus *bus = to_pci_bus(container_of(kobj, @@ -545,6 +551,7 @@ pci_read_legacy_io(struct kobject *kobj, struct bin_attribute *bin_attr, /** * pci_write_legacy_io - write byte(s) to legacy I/O port space + * @filp: open sysfs file * @kobj: kobject corresponding to file to read from * @bin_attr: struct bin_attribute for this file * @buf: buffer containing value to be written @@ -555,7 +562,8 @@ pci_read_legacy_io(struct kobject *kobj, struct bin_attribute *bin_attr, * callback routine (pci_legacy_write). */ static ssize_t -pci_write_legacy_io(struct kobject *kobj, struct bin_attribute *bin_attr, +pci_write_legacy_io(struct file *filp, struct kobject *kobj, + struct bin_attribute *bin_attr, char *buf, loff_t off, size_t count) { struct pci_bus *bus = to_pci_bus(container_of(kobj, @@ -570,6 +578,7 @@ pci_write_legacy_io(struct kobject *kobj, struct bin_attribute *bin_attr, /** * pci_mmap_legacy_mem - map legacy PCI memory into user memory space + * @filp: open sysfs file * @kobj: kobject corresponding to device to be mapped * @attr: struct bin_attribute for this file * @vma: struct vm_area_struct passed to mmap @@ -579,7 +588,8 @@ pci_write_legacy_io(struct kobject *kobj, struct bin_attribute *bin_attr, * memory space. */ static int -pci_mmap_legacy_mem(struct kobject *kobj, struct bin_attribute *attr, +pci_mmap_legacy_mem(struct file *filp, struct kobject *kobj, + struct bin_attribute *attr, struct vm_area_struct *vma) { struct pci_bus *bus = to_pci_bus(container_of(kobj, @@ -591,6 +601,7 @@ pci_mmap_legacy_mem(struct kobject *kobj, struct bin_attribute *attr, /** * pci_mmap_legacy_io - map legacy PCI IO into user memory space + * @filp: open sysfs file * @kobj: kobject corresponding to device to be mapped * @attr: struct bin_attribute for this file * @vma: struct vm_area_struct passed to mmap @@ -600,7 +611,8 @@ pci_mmap_legacy_mem(struct kobject *kobj, struct bin_attribute *attr, * memory space. Returns -ENOSYS if the operation isn't supported */ static int -pci_mmap_legacy_io(struct kobject *kobj, struct bin_attribute *attr, +pci_mmap_legacy_io(struct file *filp, struct kobject *kobj, + struct bin_attribute *attr, struct vm_area_struct *vma) { struct pci_bus *bus = to_pci_bus(container_of(kobj, @@ -750,14 +762,16 @@ pci_mmap_resource(struct kobject *kobj, struct bin_attribute *attr, } static int -pci_mmap_resource_uc(struct kobject *kobj, struct bin_attribute *attr, +pci_mmap_resource_uc(struct file *filp, struct kobject *kobj, + struct bin_attribute *attr, struct vm_area_struct *vma) { return pci_mmap_resource(kobj, attr, vma, 0); } static int -pci_mmap_resource_wc(struct kobject *kobj, struct bin_attribute *attr, +pci_mmap_resource_wc(struct file *filp, struct kobject *kobj, + struct bin_attribute *attr, struct vm_area_struct *vma) { return pci_mmap_resource(kobj, attr, vma, 1); @@ -861,6 +875,7 @@ void __weak pci_remove_resource_files(struct pci_dev *dev) { return; } /** * pci_write_rom - used to enable access to the PCI ROM display + * @filp: sysfs file * @kobj: kernel object handle * @bin_attr: struct bin_attribute for this file * @buf: user input @@ -870,7 +885,8 @@ void __weak pci_remove_resource_files(struct pci_dev *dev) { return; } * writing anything except 0 enables it */ static ssize_t -pci_write_rom(struct kobject *kobj, struct bin_attribute *bin_attr, +pci_write_rom(struct file *filp, struct kobject *kobj, + struct bin_attribute *bin_attr, char *buf, loff_t off, size_t count) { struct pci_dev *pdev = to_pci_dev(container_of(kobj, struct device, kobj)); @@ -885,6 +901,7 @@ pci_write_rom(struct kobject *kobj, struct bin_attribute *bin_attr, /** * pci_read_rom - read a PCI ROM + * @filp: sysfs file * @kobj: kernel object handle * @bin_attr: struct bin_attribute for this file * @buf: where to put the data we read from the ROM @@ -895,7 +912,8 @@ pci_write_rom(struct kobject *kobj, struct bin_attribute *bin_attr, * device corresponding to @kobj. */ static ssize_t -pci_read_rom(struct kobject *kobj, struct bin_attribute *bin_attr, +pci_read_rom(struct file *filp, struct kobject *kobj, + struct bin_attribute *bin_attr, char *buf, loff_t off, size_t count) { struct pci_dev *pdev = to_pci_dev(container_of(kobj, struct device, kobj)); diff --git a/drivers/pcmcia/cistpl.c b/drivers/pcmcia/cistpl.c index 60d428be0b0..8844bc3e311 100644 --- a/drivers/pcmcia/cistpl.c +++ b/drivers/pcmcia/cistpl.c @@ -1531,7 +1531,7 @@ static ssize_t pccard_extract_cis(struct pcmcia_socket *s, char *buf, } -static ssize_t pccard_show_cis(struct kobject *kobj, +static ssize_t pccard_show_cis(struct file *filp, struct kobject *kobj, struct bin_attribute *bin_attr, char *buf, loff_t off, size_t count) { @@ -1562,7 +1562,7 @@ static ssize_t pccard_show_cis(struct kobject *kobj, } -static ssize_t pccard_store_cis(struct kobject *kobj, +static ssize_t pccard_store_cis(struct file *filp, struct kobject *kobj, struct bin_attribute *bin_attr, char *buf, loff_t off, size_t count) { diff --git a/drivers/power/olpc_battery.c b/drivers/power/olpc_battery.c index 8fefe5a7355..baefcf1cffc 100644 --- a/drivers/power/olpc_battery.c +++ b/drivers/power/olpc_battery.c @@ -354,7 +354,7 @@ static enum power_supply_property olpc_bat_props[] = { #define EEPROM_END 0x80 #define EEPROM_SIZE (EEPROM_END - EEPROM_START) -static ssize_t olpc_bat_eeprom_read(struct kobject *kobj, +static ssize_t olpc_bat_eeprom_read(struct file *filp, struct kobject *kobj, struct bin_attribute *attr, char *buf, loff_t off, size_t count) { uint8_t ec_byte; diff --git a/drivers/rapidio/rio-sysfs.c b/drivers/rapidio/rio-sysfs.c index ba742e82c57..00b47565835 100644 --- a/drivers/rapidio/rio-sysfs.c +++ b/drivers/rapidio/rio-sysfs.c @@ -68,7 +68,8 @@ struct device_attribute rio_dev_attrs[] = { }; static ssize_t -rio_read_config(struct kobject *kobj, struct bin_attribute *bin_attr, +rio_read_config(struct file *filp, struct kobject *kobj, + struct bin_attribute *bin_attr, char *buf, loff_t off, size_t count) { struct rio_dev *dev = @@ -139,7 +140,8 @@ rio_read_config(struct kobject *kobj, struct bin_attribute *bin_attr, } static ssize_t -rio_write_config(struct kobject *kobj, struct bin_attribute *bin_attr, +rio_write_config(struct file *filp, struct kobject *kobj, + struct bin_attribute *bin_attr, char *buf, loff_t off, size_t count) { struct rio_dev *dev = diff --git a/drivers/rtc/rtc-cmos.c b/drivers/rtc/rtc-cmos.c index e9aa814ddd2..ece4dbddc0e 100644 --- a/drivers/rtc/rtc-cmos.c +++ b/drivers/rtc/rtc-cmos.c @@ -519,7 +519,8 @@ static const struct rtc_class_ops cmos_rtc_ops = { #define NVRAM_OFFSET (RTC_REG_D + 1) static ssize_t -cmos_nvram_read(struct kobject *kobj, struct bin_attribute *attr, +cmos_nvram_read(struct file *filp, struct kobject *kobj, + struct bin_attribute *attr, char *buf, loff_t off, size_t count) { int retval; @@ -547,7 +548,8 @@ cmos_nvram_read(struct kobject *kobj, struct bin_attribute *attr, } static ssize_t -cmos_nvram_write(struct kobject *kobj, struct bin_attribute *attr, +cmos_nvram_write(struct file *filp, struct kobject *kobj, + struct bin_attribute *attr, char *buf, loff_t off, size_t count) { struct cmos_rtc *cmos; diff --git a/drivers/rtc/rtc-ds1305.c b/drivers/rtc/rtc-ds1305.c index 7836c9cec55..48da85e97ca 100644 --- a/drivers/rtc/rtc-ds1305.c +++ b/drivers/rtc/rtc-ds1305.c @@ -542,7 +542,8 @@ static void msg_init(struct spi_message *m, struct spi_transfer *x, } static ssize_t -ds1305_nvram_read(struct kobject *kobj, struct bin_attribute *attr, +ds1305_nvram_read(struct file *filp, struct kobject *kobj, + struct bin_attribute *attr, char *buf, loff_t off, size_t count) { struct spi_device *spi; @@ -572,7 +573,8 @@ ds1305_nvram_read(struct kobject *kobj, struct bin_attribute *attr, } static ssize_t -ds1305_nvram_write(struct kobject *kobj, struct bin_attribute *attr, +ds1305_nvram_write(struct file *filp, struct kobject *kobj, + struct bin_attribute *attr, char *buf, loff_t off, size_t count) { struct spi_device *spi; diff --git a/drivers/rtc/rtc-ds1307.c b/drivers/rtc/rtc-ds1307.c index c4ec5c158aa..de033b7ac21 100644 --- a/drivers/rtc/rtc-ds1307.c +++ b/drivers/rtc/rtc-ds1307.c @@ -556,7 +556,8 @@ static const struct rtc_class_ops ds13xx_rtc_ops = { #define NVRAM_SIZE 56 static ssize_t -ds1307_nvram_read(struct kobject *kobj, struct bin_attribute *attr, +ds1307_nvram_read(struct file *filp, struct kobject *kobj, + struct bin_attribute *attr, char *buf, loff_t off, size_t count) { struct i2c_client *client; @@ -580,7 +581,8 @@ ds1307_nvram_read(struct kobject *kobj, struct bin_attribute *attr, } static ssize_t -ds1307_nvram_write(struct kobject *kobj, struct bin_attribute *attr, +ds1307_nvram_write(struct file *filp, struct kobject *kobj, + struct bin_attribute *attr, char *buf, loff_t off, size_t count) { struct i2c_client *client; diff --git a/drivers/rtc/rtc-ds1511.c b/drivers/rtc/rtc-ds1511.c index 06b8566c453..37268e97de4 100644 --- a/drivers/rtc/rtc-ds1511.c +++ b/drivers/rtc/rtc-ds1511.c @@ -423,8 +423,9 @@ static const struct rtc_class_ops ds1511_rtc_ops = { }; static ssize_t -ds1511_nvram_read(struct kobject *kobj, struct bin_attribute *ba, - char *buf, loff_t pos, size_t size) +ds1511_nvram_read(struct file *filp, struct kobject *kobj, + struct bin_attribute *ba, + char *buf, loff_t pos, size_t size) { ssize_t count; @@ -452,8 +453,9 @@ ds1511_nvram_read(struct kobject *kobj, struct bin_attribute *ba, } static ssize_t -ds1511_nvram_write(struct kobject *kobj, struct bin_attribute *bin_attr, - char *buf, loff_t pos, size_t size) +ds1511_nvram_write(struct file *filp, struct kobject *kobj, + struct bin_attribute *bin_attr, + char *buf, loff_t pos, size_t size) { ssize_t count; diff --git a/drivers/rtc/rtc-ds1553.c b/drivers/rtc/rtc-ds1553.c index 244f9994bcb..ff432e2ca27 100644 --- a/drivers/rtc/rtc-ds1553.c +++ b/drivers/rtc/rtc-ds1553.c @@ -252,7 +252,7 @@ static const struct rtc_class_ops ds1553_rtc_ops = { .update_irq_enable = ds1553_rtc_update_irq_enable, }; -static ssize_t ds1553_nvram_read(struct kobject *kobj, +static ssize_t ds1553_nvram_read(struct file *filp, struct kobject *kobj, struct bin_attribute *bin_attr, char *buf, loff_t pos, size_t size) { @@ -267,7 +267,7 @@ static ssize_t ds1553_nvram_read(struct kobject *kobj, return count; } -static ssize_t ds1553_nvram_write(struct kobject *kobj, +static ssize_t ds1553_nvram_write(struct file *filp, struct kobject *kobj, struct bin_attribute *bin_attr, char *buf, loff_t pos, size_t size) { diff --git a/drivers/rtc/rtc-ds1742.c b/drivers/rtc/rtc-ds1742.c index 2b4b0bc42d6..042630c90dd 100644 --- a/drivers/rtc/rtc-ds1742.c +++ b/drivers/rtc/rtc-ds1742.c @@ -128,7 +128,7 @@ static const struct rtc_class_ops ds1742_rtc_ops = { .set_time = ds1742_rtc_set_time, }; -static ssize_t ds1742_nvram_read(struct kobject *kobj, +static ssize_t ds1742_nvram_read(struct file *filp, struct kobject *kobj, struct bin_attribute *bin_attr, char *buf, loff_t pos, size_t size) { @@ -143,7 +143,7 @@ static ssize_t ds1742_nvram_read(struct kobject *kobj, return count; } -static ssize_t ds1742_nvram_write(struct kobject *kobj, +static ssize_t ds1742_nvram_write(struct file *filp, struct kobject *kobj, struct bin_attribute *bin_attr, char *buf, loff_t pos, size_t size) { diff --git a/drivers/rtc/rtc-m48t59.c b/drivers/rtc/rtc-m48t59.c index 365ff3ac234..be8359fdb65 100644 --- a/drivers/rtc/rtc-m48t59.c +++ b/drivers/rtc/rtc-m48t59.c @@ -343,7 +343,7 @@ static const struct rtc_class_ops m48t02_rtc_ops = { .set_time = m48t59_rtc_set_time, }; -static ssize_t m48t59_nvram_read(struct kobject *kobj, +static ssize_t m48t59_nvram_read(struct file *filp, struct kobject *kobj, struct bin_attribute *bin_attr, char *buf, loff_t pos, size_t size) { @@ -363,7 +363,7 @@ static ssize_t m48t59_nvram_read(struct kobject *kobj, return cnt; } -static ssize_t m48t59_nvram_write(struct kobject *kobj, +static ssize_t m48t59_nvram_write(struct file *filp, struct kobject *kobj, struct bin_attribute *bin_attr, char *buf, loff_t pos, size_t size) { diff --git a/drivers/rtc/rtc-stk17ta8.c b/drivers/rtc/rtc-stk17ta8.c index b53a00198db..3b943673cd3 100644 --- a/drivers/rtc/rtc-stk17ta8.c +++ b/drivers/rtc/rtc-stk17ta8.c @@ -244,7 +244,7 @@ static const struct rtc_class_ops stk17ta8_rtc_ops = { .alarm_irq_enable = stk17ta8_rtc_alarm_irq_enable, }; -static ssize_t stk17ta8_nvram_read(struct kobject *kobj, +static ssize_t stk17ta8_nvram_read(struct file *filp, struct kobject *kobj, struct bin_attribute *attr, char *buf, loff_t pos, size_t size) { @@ -259,7 +259,7 @@ static ssize_t stk17ta8_nvram_read(struct kobject *kobj, return count; } -static ssize_t stk17ta8_nvram_write(struct kobject *kobj, +static ssize_t stk17ta8_nvram_write(struct file *filp, struct kobject *kobj, struct bin_attribute *attr, char *buf, loff_t pos, size_t size) { diff --git a/drivers/rtc/rtc-tx4939.c b/drivers/rtc/rtc-tx4939.c index 20bfc64a15c..ec6313d1535 100644 --- a/drivers/rtc/rtc-tx4939.c +++ b/drivers/rtc/rtc-tx4939.c @@ -188,7 +188,7 @@ static const struct rtc_class_ops tx4939_rtc_ops = { .alarm_irq_enable = tx4939_rtc_alarm_irq_enable, }; -static ssize_t tx4939_rtc_nvram_read(struct kobject *kobj, +static ssize_t tx4939_rtc_nvram_read(struct file *filp, struct kobject *kobj, struct bin_attribute *bin_attr, char *buf, loff_t pos, size_t size) { @@ -207,7 +207,7 @@ static ssize_t tx4939_rtc_nvram_read(struct kobject *kobj, return count; } -static ssize_t tx4939_rtc_nvram_write(struct kobject *kobj, +static ssize_t tx4939_rtc_nvram_write(struct file *filp, struct kobject *kobj, struct bin_attribute *bin_attr, char *buf, loff_t pos, size_t size) { diff --git a/drivers/s390/cio/chp.c b/drivers/s390/cio/chp.c index 1d16189f2f2..6c9fa15aac7 100644 --- a/drivers/s390/cio/chp.c +++ b/drivers/s390/cio/chp.c @@ -135,7 +135,8 @@ static int s390_vary_chpid(struct chp_id chpid, int on) /* * Channel measurement related functions */ -static ssize_t chp_measurement_chars_read(struct kobject *kobj, +static ssize_t chp_measurement_chars_read(struct file *filp, + struct kobject *kobj, struct bin_attribute *bin_attr, char *buf, loff_t off, size_t count) { @@ -182,7 +183,7 @@ static void chp_measurement_copy_block(struct cmg_entry *buf, } while (reference_buf.values[0] != buf->values[0]); } -static ssize_t chp_measurement_read(struct kobject *kobj, +static ssize_t chp_measurement_read(struct file *filp, struct kobject *kobj, struct bin_attribute *bin_attr, char *buf, loff_t off, size_t count) { diff --git a/drivers/scsi/3w-sas.c b/drivers/scsi/3w-sas.c index 54c5ffb1eaa..d38000db923 100644 --- a/drivers/scsi/3w-sas.c +++ b/drivers/scsi/3w-sas.c @@ -98,7 +98,7 @@ static int twl_reset_device_extension(TW_Device_Extension *tw_dev, int ioctl_res /* Functions */ /* This function returns AENs through sysfs */ -static ssize_t twl_sysfs_aen_read(struct kobject *kobj, +static ssize_t twl_sysfs_aen_read(struct file *filp, struct kobject *kobj, struct bin_attribute *bin_attr, char *outbuf, loff_t offset, size_t count) { @@ -129,7 +129,7 @@ static struct bin_attribute twl_sysfs_aen_read_attr = { }; /* This function returns driver compatibility info through sysfs */ -static ssize_t twl_sysfs_compat_info(struct kobject *kobj, +static ssize_t twl_sysfs_compat_info(struct file *filp, struct kobject *kobj, struct bin_attribute *bin_attr, char *outbuf, loff_t offset, size_t count) { diff --git a/drivers/scsi/arcmsr/arcmsr_attr.c b/drivers/scsi/arcmsr/arcmsr_attr.c index 5877f29a600..a4e04c50c43 100644 --- a/drivers/scsi/arcmsr/arcmsr_attr.c +++ b/drivers/scsi/arcmsr/arcmsr_attr.c @@ -59,7 +59,8 @@ struct device_attribute *arcmsr_host_attrs[]; -static ssize_t arcmsr_sysfs_iop_message_read(struct kobject *kobj, +static ssize_t arcmsr_sysfs_iop_message_read(struct file *filp, + struct kobject *kobj, struct bin_attribute *bin, char *buf, loff_t off, size_t count) @@ -105,7 +106,8 @@ static ssize_t arcmsr_sysfs_iop_message_read(struct kobject *kobj, return (allxfer_len); } -static ssize_t arcmsr_sysfs_iop_message_write(struct kobject *kobj, +static ssize_t arcmsr_sysfs_iop_message_write(struct file *filp, + struct kobject *kobj, struct bin_attribute *bin, char *buf, loff_t off, size_t count) @@ -153,7 +155,8 @@ static ssize_t arcmsr_sysfs_iop_message_write(struct kobject *kobj, } } -static ssize_t arcmsr_sysfs_iop_message_clear(struct kobject *kobj, +static ssize_t arcmsr_sysfs_iop_message_clear(struct file *filp, + struct kobject *kobj, struct bin_attribute *bin, char *buf, loff_t off, size_t count) diff --git a/drivers/scsi/ibmvscsi/ibmvfc.c b/drivers/scsi/ibmvscsi/ibmvfc.c index d18f45c9563..3eb2b7b3d8b 100644 --- a/drivers/scsi/ibmvscsi/ibmvfc.c +++ b/drivers/scsi/ibmvscsi/ibmvfc.c @@ -2919,6 +2919,7 @@ static DEVICE_ATTR(log_level, S_IRUGO | S_IWUSR, #ifdef CONFIG_SCSI_IBMVFC_TRACE /** * ibmvfc_read_trace - Dump the adapter trace + * @filp: open sysfs file * @kobj: kobject struct * @bin_attr: bin_attribute struct * @buf: buffer @@ -2928,7 +2929,7 @@ static DEVICE_ATTR(log_level, S_IRUGO | S_IWUSR, * Return value: * number of bytes printed to buffer **/ -static ssize_t ibmvfc_read_trace(struct kobject *kobj, +static ssize_t ibmvfc_read_trace(struct file *filp, struct kobject *kobj, struct bin_attribute *bin_attr, char *buf, loff_t off, size_t count) { diff --git a/drivers/scsi/ipr.c b/drivers/scsi/ipr.c index b90c118119d..6a6661c35b2 100644 --- a/drivers/scsi/ipr.c +++ b/drivers/scsi/ipr.c @@ -3120,6 +3120,7 @@ restart: #ifdef CONFIG_SCSI_IPR_TRACE /** * ipr_read_trace - Dump the adapter trace + * @filp: open sysfs file * @kobj: kobject struct * @bin_attr: bin_attribute struct * @buf: buffer @@ -3129,7 +3130,7 @@ restart: * Return value: * number of bytes printed to buffer **/ -static ssize_t ipr_read_trace(struct kobject *kobj, +static ssize_t ipr_read_trace(struct file *filp, struct kobject *kobj, struct bin_attribute *bin_attr, char *buf, loff_t off, size_t count) { @@ -3764,6 +3765,7 @@ static struct device_attribute *ipr_ioa_attrs[] = { #ifdef CONFIG_SCSI_IPR_DUMP /** * ipr_read_dump - Dump the adapter + * @filp: open sysfs file * @kobj: kobject struct * @bin_attr: bin_attribute struct * @buf: buffer @@ -3773,7 +3775,7 @@ static struct device_attribute *ipr_ioa_attrs[] = { * Return value: * number of bytes printed to buffer **/ -static ssize_t ipr_read_dump(struct kobject *kobj, +static ssize_t ipr_read_dump(struct file *filp, struct kobject *kobj, struct bin_attribute *bin_attr, char *buf, loff_t off, size_t count) { @@ -3927,6 +3929,7 @@ static int ipr_free_dump(struct ipr_ioa_cfg *ioa_cfg) /** * ipr_write_dump - Setup dump state of adapter + * @filp: open sysfs file * @kobj: kobject struct * @bin_attr: bin_attribute struct * @buf: buffer @@ -3936,7 +3939,7 @@ static int ipr_free_dump(struct ipr_ioa_cfg *ioa_cfg) * Return value: * number of bytes printed to buffer **/ -static ssize_t ipr_write_dump(struct kobject *kobj, +static ssize_t ipr_write_dump(struct file *filp, struct kobject *kobj, struct bin_attribute *bin_attr, char *buf, loff_t off, size_t count) { diff --git a/drivers/scsi/lpfc/lpfc_attr.c b/drivers/scsi/lpfc/lpfc_attr.c index 2e5f376d9cc..bf33b315f93 100644 --- a/drivers/scsi/lpfc/lpfc_attr.c +++ b/drivers/scsi/lpfc/lpfc_attr.c @@ -2643,6 +2643,7 @@ static DEVICE_ATTR(lpfc_stat_data_ctrl, S_IRUGO | S_IWUSR, /** * sysfs_drvr_stat_data_read - Read function for lpfc_drvr_stat_data attribute + * @filp: sysfs file * @kobj: Pointer to the kernel object * @bin_attr: Attribute object * @buff: Buffer pointer @@ -2654,7 +2655,8 @@ static DEVICE_ATTR(lpfc_stat_data_ctrl, S_IRUGO | S_IWUSR, * applications. **/ static ssize_t -sysfs_drvr_stat_data_read(struct kobject *kobj, struct bin_attribute *bin_attr, +sysfs_drvr_stat_data_read(struct file *filp, struct kobject *kobj, + struct bin_attribute *bin_attr, char *buf, loff_t off, size_t count) { struct device *dev = container_of(kobj, struct device, @@ -3362,6 +3364,7 @@ struct device_attribute *lpfc_vport_attrs[] = { /** * sysfs_ctlreg_write - Write method for writing to ctlreg + * @filp: open sysfs file * @kobj: kernel kobject that contains the kernel class device. * @bin_attr: kernel attributes passed to us. * @buf: contains the data to be written to the adapter IOREG space. @@ -3379,7 +3382,8 @@ struct device_attribute *lpfc_vport_attrs[] = { * value of count, buf contents written **/ static ssize_t -sysfs_ctlreg_write(struct kobject *kobj, struct bin_attribute *bin_attr, +sysfs_ctlreg_write(struct file *filp, struct kobject *kobj, + struct bin_attribute *bin_attr, char *buf, loff_t off, size_t count) { size_t buf_off; @@ -3415,6 +3419,7 @@ sysfs_ctlreg_write(struct kobject *kobj, struct bin_attribute *bin_attr, /** * sysfs_ctlreg_read - Read method for reading from ctlreg + * @filp: open sysfs file * @kobj: kernel kobject that contains the kernel class device. * @bin_attr: kernel attributes passed to us. * @buf: if successful contains the data from the adapter IOREG space. @@ -3431,7 +3436,8 @@ sysfs_ctlreg_write(struct kobject *kobj, struct bin_attribute *bin_attr, * value of count, buf contents read **/ static ssize_t -sysfs_ctlreg_read(struct kobject *kobj, struct bin_attribute *bin_attr, +sysfs_ctlreg_read(struct file *filp, struct kobject *kobj, + struct bin_attribute *bin_attr, char *buf, loff_t off, size_t count) { size_t buf_off; @@ -3496,6 +3502,7 @@ sysfs_mbox_idle(struct lpfc_hba *phba) /** * sysfs_mbox_write - Write method for writing information via mbox + * @filp: open sysfs file * @kobj: kernel kobject that contains the kernel class device. * @bin_attr: kernel attributes passed to us. * @buf: contains the data to be written to sysfs mbox. @@ -3516,7 +3523,8 @@ sysfs_mbox_idle(struct lpfc_hba *phba) * count number of bytes transferred **/ static ssize_t -sysfs_mbox_write(struct kobject *kobj, struct bin_attribute *bin_attr, +sysfs_mbox_write(struct file *filp, struct kobject *kobj, + struct bin_attribute *bin_attr, char *buf, loff_t off, size_t count) { struct device *dev = container_of(kobj, struct device, kobj); @@ -3571,6 +3579,7 @@ sysfs_mbox_write(struct kobject *kobj, struct bin_attribute *bin_attr, /** * sysfs_mbox_read - Read method for reading information via mbox + * @filp: open sysfs file * @kobj: kernel kobject that contains the kernel class device. * @bin_attr: kernel attributes passed to us. * @buf: contains the data to be read from sysfs mbox. @@ -3593,7 +3602,8 @@ sysfs_mbox_write(struct kobject *kobj, struct bin_attribute *bin_attr, * count number of bytes transferred **/ static ssize_t -sysfs_mbox_read(struct kobject *kobj, struct bin_attribute *bin_attr, +sysfs_mbox_read(struct file *filp, struct kobject *kobj, + struct bin_attribute *bin_attr, char *buf, loff_t off, size_t count) { struct device *dev = container_of(kobj, struct device, kobj); diff --git a/drivers/scsi/qla2xxx/qla_attr.c b/drivers/scsi/qla2xxx/qla_attr.c index 3b708606b93..1e4cafabba1 100644 --- a/drivers/scsi/qla2xxx/qla_attr.c +++ b/drivers/scsi/qla2xxx/qla_attr.c @@ -16,7 +16,7 @@ static int qla24xx_vport_disable(struct fc_vport *, bool); /* SYSFS attributes --------------------------------------------------------- */ static ssize_t -qla2x00_sysfs_read_fw_dump(struct kobject *kobj, +qla2x00_sysfs_read_fw_dump(struct file *filp, struct kobject *kobj, struct bin_attribute *bin_attr, char *buf, loff_t off, size_t count) { @@ -32,7 +32,7 @@ qla2x00_sysfs_read_fw_dump(struct kobject *kobj, } static ssize_t -qla2x00_sysfs_write_fw_dump(struct kobject *kobj, +qla2x00_sysfs_write_fw_dump(struct file *filp, struct kobject *kobj, struct bin_attribute *bin_attr, char *buf, loff_t off, size_t count) { @@ -92,7 +92,7 @@ static struct bin_attribute sysfs_fw_dump_attr = { }; static ssize_t -qla2x00_sysfs_read_nvram(struct kobject *kobj, +qla2x00_sysfs_read_nvram(struct file *filp, struct kobject *kobj, struct bin_attribute *bin_attr, char *buf, loff_t off, size_t count) { @@ -111,7 +111,7 @@ qla2x00_sysfs_read_nvram(struct kobject *kobj, } static ssize_t -qla2x00_sysfs_write_nvram(struct kobject *kobj, +qla2x00_sysfs_write_nvram(struct file *filp, struct kobject *kobj, struct bin_attribute *bin_attr, char *buf, loff_t off, size_t count) { @@ -177,7 +177,7 @@ static struct bin_attribute sysfs_nvram_attr = { }; static ssize_t -qla2x00_sysfs_read_optrom(struct kobject *kobj, +qla2x00_sysfs_read_optrom(struct file *filp, struct kobject *kobj, struct bin_attribute *bin_attr, char *buf, loff_t off, size_t count) { @@ -193,7 +193,7 @@ qla2x00_sysfs_read_optrom(struct kobject *kobj, } static ssize_t -qla2x00_sysfs_write_optrom(struct kobject *kobj, +qla2x00_sysfs_write_optrom(struct file *filp, struct kobject *kobj, struct bin_attribute *bin_attr, char *buf, loff_t off, size_t count) { @@ -224,7 +224,7 @@ static struct bin_attribute sysfs_optrom_attr = { }; static ssize_t -qla2x00_sysfs_write_optrom_ctl(struct kobject *kobj, +qla2x00_sysfs_write_optrom_ctl(struct file *filp, struct kobject *kobj, struct bin_attribute *bin_attr, char *buf, loff_t off, size_t count) { @@ -387,7 +387,7 @@ static struct bin_attribute sysfs_optrom_ctl_attr = { }; static ssize_t -qla2x00_sysfs_read_vpd(struct kobject *kobj, +qla2x00_sysfs_read_vpd(struct file *filp, struct kobject *kobj, struct bin_attribute *bin_attr, char *buf, loff_t off, size_t count) { @@ -408,7 +408,7 @@ qla2x00_sysfs_read_vpd(struct kobject *kobj, } static ssize_t -qla2x00_sysfs_write_vpd(struct kobject *kobj, +qla2x00_sysfs_write_vpd(struct file *filp, struct kobject *kobj, struct bin_attribute *bin_attr, char *buf, loff_t off, size_t count) { @@ -461,7 +461,7 @@ static struct bin_attribute sysfs_vpd_attr = { }; static ssize_t -qla2x00_sysfs_read_sfp(struct kobject *kobj, +qla2x00_sysfs_read_sfp(struct file *filp, struct kobject *kobj, struct bin_attribute *bin_attr, char *buf, loff_t off, size_t count) { @@ -522,7 +522,7 @@ static struct bin_attribute sysfs_sfp_attr = { }; static ssize_t -qla2x00_sysfs_write_reset(struct kobject *kobj, +qla2x00_sysfs_write_reset(struct file *filp, struct kobject *kobj, struct bin_attribute *bin_attr, char *buf, loff_t off, size_t count) { @@ -592,7 +592,7 @@ static struct bin_attribute sysfs_reset_attr = { }; static ssize_t -qla2x00_sysfs_write_edc(struct kobject *kobj, +qla2x00_sysfs_write_edc(struct file *filp, struct kobject *kobj, struct bin_attribute *bin_attr, char *buf, loff_t off, size_t count) { @@ -650,7 +650,7 @@ static struct bin_attribute sysfs_edc_attr = { }; static ssize_t -qla2x00_sysfs_write_edc_status(struct kobject *kobj, +qla2x00_sysfs_write_edc_status(struct file *filp, struct kobject *kobj, struct bin_attribute *bin_attr, char *buf, loff_t off, size_t count) { @@ -700,7 +700,7 @@ qla2x00_sysfs_write_edc_status(struct kobject *kobj, } static ssize_t -qla2x00_sysfs_read_edc_status(struct kobject *kobj, +qla2x00_sysfs_read_edc_status(struct file *filp, struct kobject *kobj, struct bin_attribute *bin_attr, char *buf, loff_t off, size_t count) { @@ -730,7 +730,7 @@ static struct bin_attribute sysfs_edc_status_attr = { }; static ssize_t -qla2x00_sysfs_read_xgmac_stats(struct kobject *kobj, +qla2x00_sysfs_read_xgmac_stats(struct file *filp, struct kobject *kobj, struct bin_attribute *bin_attr, char *buf, loff_t off, size_t count) { @@ -782,7 +782,7 @@ static struct bin_attribute sysfs_xgmac_stats_attr = { }; static ssize_t -qla2x00_sysfs_read_dcbx_tlv(struct kobject *kobj, +qla2x00_sysfs_read_dcbx_tlv(struct file *filp, struct kobject *kobj, struct bin_attribute *bin_attr, char *buf, loff_t off, size_t count) { diff --git a/drivers/staging/udlfb/udlfb.c b/drivers/staging/udlfb/udlfb.c index aa8195199a2..577f2bf6eb2 100644 --- a/drivers/staging/udlfb/udlfb.c +++ b/drivers/staging/udlfb/udlfb.c @@ -1063,7 +1063,8 @@ static ssize_t metrics_misc_show(struct device *fbdev, atomic_read(&dev->lost_pixels) ? "yes" : "no"); } -static ssize_t edid_show(struct kobject *kobj, struct bin_attribute *a, +static ssize_t edid_show(struct file *filp, struct kobject *kobj, + struct bin_attribute *a, char *buf, loff_t off, size_t count) { struct device *fbdev = container_of(kobj, struct device, kobj); struct fb_info *fb_info = dev_get_drvdata(fbdev); diff --git a/drivers/usb/core/sysfs.c b/drivers/usb/core/sysfs.c index 06863befaf3..448f5b47fc4 100644 --- a/drivers/usb/core/sysfs.c +++ b/drivers/usb/core/sysfs.c @@ -658,7 +658,8 @@ const struct attribute_group *usb_device_groups[] = { /* Binary descriptors */ static ssize_t -read_descriptors(struct kobject *kobj, struct bin_attribute *attr, +read_descriptors(struct file *filp, struct kobject *kobj, + struct bin_attribute *attr, char *buf, loff_t off, size_t count) { struct device *dev = container_of(kobj, struct device, kobj); diff --git a/drivers/video/aty/radeon_base.c b/drivers/video/aty/radeon_base.c index 6c37e8ee5ef..3c1e13ed1cb 100644 --- a/drivers/video/aty/radeon_base.c +++ b/drivers/video/aty/radeon_base.c @@ -2099,7 +2099,7 @@ static ssize_t radeon_show_one_edid(char *buf, loff_t off, size_t count, const u } -static ssize_t radeon_show_edid1(struct kobject *kobj, +static ssize_t radeon_show_edid1(struct file *filp, struct kobject *kobj, struct bin_attribute *bin_attr, char *buf, loff_t off, size_t count) { @@ -2112,7 +2112,7 @@ static ssize_t radeon_show_edid1(struct kobject *kobj, } -static ssize_t radeon_show_edid2(struct kobject *kobj, +static ssize_t radeon_show_edid2(struct file *filp, struct kobject *kobj, struct bin_attribute *bin_attr, char *buf, loff_t off, size_t count) { diff --git a/drivers/w1/slaves/w1_ds2431.c b/drivers/w1/slaves/w1_ds2431.c index 2c6c0cf6a20..84e2410aec1 100644 --- a/drivers/w1/slaves/w1_ds2431.c +++ b/drivers/w1/slaves/w1_ds2431.c @@ -96,7 +96,7 @@ static int w1_f2d_readblock(struct w1_slave *sl, int off, int count, char *buf) return -1; } -static ssize_t w1_f2d_read_bin(struct kobject *kobj, +static ssize_t w1_f2d_read_bin(struct file *filp, struct kobject *kobj, struct bin_attribute *bin_attr, char *buf, loff_t off, size_t count) { @@ -202,7 +202,7 @@ retry: return 0; } -static ssize_t w1_f2d_write_bin(struct kobject *kobj, +static ssize_t w1_f2d_write_bin(struct file *filp, struct kobject *kobj, struct bin_attribute *bin_attr, char *buf, loff_t off, size_t count) { diff --git a/drivers/w1/slaves/w1_ds2433.c b/drivers/w1/slaves/w1_ds2433.c index d2bf32118a9..0f7b8f9c509 100644 --- a/drivers/w1/slaves/w1_ds2433.c +++ b/drivers/w1/slaves/w1_ds2433.c @@ -92,7 +92,7 @@ static int w1_f23_refresh_block(struct w1_slave *sl, struct w1_f23_data *data, } #endif /* CONFIG_W1_SLAVE_DS2433_CRC */ -static ssize_t w1_f23_read_bin(struct kobject *kobj, +static ssize_t w1_f23_read_bin(struct file *filp, struct kobject *kobj, struct bin_attribute *bin_attr, char *buf, loff_t off, size_t count) { @@ -206,7 +206,7 @@ static int w1_f23_write(struct w1_slave *sl, int addr, int len, const u8 *data) return 0; } -static ssize_t w1_f23_write_bin(struct kobject *kobj, +static ssize_t w1_f23_write_bin(struct file *filp, struct kobject *kobj, struct bin_attribute *bin_attr, char *buf, loff_t off, size_t count) { diff --git a/drivers/w1/slaves/w1_ds2760.c b/drivers/w1/slaves/w1_ds2760.c index 6e153343e11..483d4518091 100644 --- a/drivers/w1/slaves/w1_ds2760.c +++ b/drivers/w1/slaves/w1_ds2760.c @@ -97,7 +97,7 @@ int w1_ds2760_recall_eeprom(struct device *dev, int addr) return w1_ds2760_eeprom_cmd(dev, addr, W1_DS2760_RECALL_DATA); } -static ssize_t w1_ds2760_read_bin(struct kobject *kobj, +static ssize_t w1_ds2760_read_bin(struct file *filp, struct kobject *kobj, struct bin_attribute *bin_attr, char *buf, loff_t off, size_t count) { diff --git a/drivers/w1/w1.c b/drivers/w1/w1.c index ad5897dc449..2839e281cd6 100644 --- a/drivers/w1/w1.c +++ b/drivers/w1/w1.c @@ -120,7 +120,7 @@ static struct device_attribute w1_slave_attr_id = /* Default family */ -static ssize_t w1_default_write(struct kobject *kobj, +static ssize_t w1_default_write(struct file *filp, struct kobject *kobj, struct bin_attribute *bin_attr, char *buf, loff_t off, size_t count) { @@ -139,7 +139,7 @@ out_up: return count; } -static ssize_t w1_default_read(struct kobject *kobj, +static ssize_t w1_default_read(struct file *filp, struct kobject *kobj, struct bin_attribute *bin_attr, char *buf, loff_t off, size_t count) { diff --git a/drivers/zorro/zorro-sysfs.c b/drivers/zorro/zorro-sysfs.c index eb924e0a64c..26f7184ef9e 100644 --- a/drivers/zorro/zorro-sysfs.c +++ b/drivers/zorro/zorro-sysfs.c @@ -49,7 +49,7 @@ static ssize_t zorro_show_resource(struct device *dev, struct device_attribute * static DEVICE_ATTR(resource, S_IRUGO, zorro_show_resource, NULL); -static ssize_t zorro_read_config(struct kobject *kobj, +static ssize_t zorro_read_config(struct file *filp, struct kobject *kobj, struct bin_attribute *bin_attr, char *buf, loff_t off, size_t count) { diff --git a/fs/sysfs/bin.c b/fs/sysfs/bin.c index 806b277453f..4e321f7353f 100644 --- a/fs/sysfs/bin.c +++ b/fs/sysfs/bin.c @@ -46,9 +46,9 @@ struct bin_buffer { }; static int -fill_read(struct dentry *dentry, char *buffer, loff_t off, size_t count) +fill_read(struct file *file, char *buffer, loff_t off, size_t count) { - struct sysfs_dirent *attr_sd = dentry->d_fsdata; + struct sysfs_dirent *attr_sd = file->f_path.dentry->d_fsdata; struct bin_attribute *attr = attr_sd->s_bin_attr.bin_attr; struct kobject *kobj = attr_sd->s_parent->s_dir.kobj; int rc; @@ -59,7 +59,7 @@ fill_read(struct dentry *dentry, char *buffer, loff_t off, size_t count) rc = -EIO; if (attr->read) - rc = attr->read(kobj, attr, buffer, off, count); + rc = attr->read(file, kobj, attr, buffer, off, count); sysfs_put_active(attr_sd); @@ -70,8 +70,7 @@ static ssize_t read(struct file *file, char __user *userbuf, size_t bytes, loff_t *off) { struct bin_buffer *bb = file->private_data; - struct dentry *dentry = file->f_path.dentry; - int size = dentry->d_inode->i_size; + int size = file->f_path.dentry->d_inode->i_size; loff_t offs = *off; int count = min_t(size_t, bytes, PAGE_SIZE); char *temp; @@ -92,7 +91,7 @@ read(struct file *file, char __user *userbuf, size_t bytes, loff_t *off) mutex_lock(&bb->mutex); - count = fill_read(dentry, bb->buffer, offs, count); + count = fill_read(file, bb->buffer, offs, count); if (count < 0) { mutex_unlock(&bb->mutex); goto out_free; @@ -117,9 +116,9 @@ read(struct file *file, char __user *userbuf, size_t bytes, loff_t *off) } static int -flush_write(struct dentry *dentry, char *buffer, loff_t offset, size_t count) +flush_write(struct file *file, char *buffer, loff_t offset, size_t count) { - struct sysfs_dirent *attr_sd = dentry->d_fsdata; + struct sysfs_dirent *attr_sd = file->f_path.dentry->d_fsdata; struct bin_attribute *attr = attr_sd->s_bin_attr.bin_attr; struct kobject *kobj = attr_sd->s_parent->s_dir.kobj; int rc; @@ -130,7 +129,7 @@ flush_write(struct dentry *dentry, char *buffer, loff_t offset, size_t count) rc = -EIO; if (attr->write) - rc = attr->write(kobj, attr, buffer, offset, count); + rc = attr->write(file, kobj, attr, buffer, offset, count); sysfs_put_active(attr_sd); @@ -141,8 +140,7 @@ static ssize_t write(struct file *file, const char __user *userbuf, size_t bytes, loff_t *off) { struct bin_buffer *bb = file->private_data; - struct dentry *dentry = file->f_path.dentry; - int size = dentry->d_inode->i_size; + int size = file->f_path.dentry->d_inode->i_size; loff_t offs = *off; int count = min_t(size_t, bytes, PAGE_SIZE); char *temp; @@ -165,7 +163,7 @@ static ssize_t write(struct file *file, const char __user *userbuf, memcpy(bb->buffer, temp, count); - count = flush_write(dentry, bb->buffer, offs, count); + count = flush_write(file, bb->buffer, offs, count); mutex_unlock(&bb->mutex); if (count > 0) @@ -363,7 +361,7 @@ static int mmap(struct file *file, struct vm_area_struct *vma) if (!attr->mmap) goto out_put; - rc = attr->mmap(kobj, attr, vma); + rc = attr->mmap(file, kobj, attr, vma); if (rc) goto out_put; diff --git a/include/linux/sysfs.h b/include/linux/sysfs.h index 6903e920403..f2694eb4dd3 100644 --- a/include/linux/sysfs.h +++ b/include/linux/sysfs.h @@ -87,17 +87,18 @@ struct attribute_group { #define attr_name(_attr) (_attr).attr.name +struct file; struct vm_area_struct; struct bin_attribute { struct attribute attr; size_t size; void *private; - ssize_t (*read)(struct kobject *, struct bin_attribute *, + ssize_t (*read)(struct file *, struct kobject *, struct bin_attribute *, char *, loff_t, size_t); - ssize_t (*write)(struct kobject *, struct bin_attribute *, + ssize_t (*write)(struct file *,struct kobject *, struct bin_attribute *, char *, loff_t, size_t); - int (*mmap)(struct kobject *, struct bin_attribute *attr, + int (*mmap)(struct file *, struct kobject *, struct bin_attribute *attr, struct vm_area_struct *vma); }; diff --git a/kernel/ksysfs.c b/kernel/ksysfs.c index 21fe3c42694..0b624e79180 100644 --- a/kernel/ksysfs.c +++ b/kernel/ksysfs.c @@ -138,7 +138,8 @@ extern const void __start_notes __attribute__((weak)); extern const void __stop_notes __attribute__((weak)); #define notes_size (&__stop_notes - &__start_notes) -static ssize_t notes_read(struct kobject *kobj, struct bin_attribute *bin_attr, +static ssize_t notes_read(struct file *filp, struct kobject *kobj, + struct bin_attribute *bin_attr, char *buf, loff_t off, size_t count) { memcpy(buf, &__start_notes + off, count); diff --git a/kernel/module.c b/kernel/module.c index e2564580f3f..5e14483768b 100644 --- a/kernel/module.c +++ b/kernel/module.c @@ -1182,7 +1182,7 @@ struct module_notes_attrs { struct bin_attribute attrs[0]; }; -static ssize_t module_notes_read(struct kobject *kobj, +static ssize_t module_notes_read(struct file *filp, struct kobject *kobj, struct bin_attribute *bin_attr, char *buf, loff_t pos, size_t count) { diff --git a/net/bridge/br_sysfs_br.c b/net/bridge/br_sysfs_br.c index dd321e39e62..486b8f3861d 100644 --- a/net/bridge/br_sysfs_br.c +++ b/net/bridge/br_sysfs_br.c @@ -659,7 +659,7 @@ static struct attribute_group bridge_group = { * * Returns the number of bytes read. */ -static ssize_t brforward_read(struct kobject *kobj, +static ssize_t brforward_read(struct file *filp, struct kobject *kobj, struct bin_attribute *bin_attr, char *buf, loff_t off, size_t count) { -- cgit v1.2.3-70-g09d2 From 7d52669b14e36f8365070324be009486d387ad00 Mon Sep 17 00:00:00 2001 From: Wenji Huang Date: Mon, 24 May 2010 14:33:12 -0700 Subject: module: remove duplicate declaration of __ksymtab_gpl_future Minor cleanup on duplicate __{start/stop}__ksymtab_gpl_future. Signed-off-by: Wenji Huang Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/module.c | 2 -- 1 file changed, 2 deletions(-) (limited to 'kernel/module.c') diff --git a/kernel/module.c b/kernel/module.c index a8014bfb5a4..625985e70e9 100644 --- a/kernel/module.c +++ b/kernel/module.c @@ -180,8 +180,6 @@ extern const struct kernel_symbol __start___ksymtab_gpl[]; extern const struct kernel_symbol __stop___ksymtab_gpl[]; extern const struct kernel_symbol __start___ksymtab_gpl_future[]; extern const struct kernel_symbol __stop___ksymtab_gpl_future[]; -extern const struct kernel_symbol __start___ksymtab_gpl_future[]; -extern const struct kernel_symbol __stop___ksymtab_gpl_future[]; extern const unsigned long __start___kcrctab[]; extern const unsigned long __start___kcrctab_gpl[]; extern const unsigned long __start___kcrctab_gpl_future[]; -- cgit v1.2.3-70-g09d2 From 218ce7351413b8287a80fab1d7b94906a5559f01 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Tue, 25 May 2010 16:48:30 -0700 Subject: Revert "module: drop the lock while waiting for module to complete initialization." This reverts commit 480b02df3aa9f07d1c7df0cd8be7a5ca73893455, since Rafael reports that it causes occasional kernel paging request faults in load_module(). Dropping the module lock and re-taking it deep in the call-chain is definitely not the right thing to do. That just turns the mutex from a lock into a "random non-locking data structure" that doesn't actually protect what it's supposed to protect. Requested-and-tested-by: Rafael J. Wysocki Cc: Rusty Russell Cc: Brandon Philips Cc: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/module.c | 59 +++++++++++++++++++++------------------------------------ 1 file changed, 22 insertions(+), 37 deletions(-) (limited to 'kernel/module.c') diff --git a/kernel/module.c b/kernel/module.c index 625985e70e9..333fbcc9697 100644 --- a/kernel/module.c +++ b/kernel/module.c @@ -563,26 +563,33 @@ int use_module(struct module *a, struct module *b) struct module_use *use; int no_warn, err; - if (b == NULL || already_uses(a, b)) - return 0; + if (b == NULL || already_uses(a, b)) return 1; /* If we're interrupted or time out, we fail. */ - err = strong_try_module_get(b); + if (wait_event_interruptible_timeout( + module_wq, (err = strong_try_module_get(b)) != -EBUSY, + 30 * HZ) <= 0) { + printk("%s: gave up waiting for init of module %s.\n", + a->name, b->name); + return 0; + } + + /* If strong_try_module_get() returned a different error, we fail. */ if (err) - return err; + return 0; DEBUGP("Allocating new usage for %s.\n", a->name); use = kmalloc(sizeof(*use), GFP_ATOMIC); if (!use) { printk("%s: out of memory loading\n", a->name); module_put(b); - return -ENOMEM; + return 0; } use->module_which_uses = a; list_add(&use->list, &b->modules_which_use_me); no_warn = sysfs_create_link(b->holders_dir, &a->mkobj.kobj, a->name); - return 0; + return 1; } EXPORT_SYMBOL_GPL(use_module); @@ -875,7 +882,7 @@ static inline void module_unload_free(struct module *mod) int use_module(struct module *a, struct module *b) { - return strong_try_module_get(b); + return strong_try_module_get(b) == 0; } EXPORT_SYMBOL_GPL(use_module); @@ -1046,39 +1053,17 @@ static const struct kernel_symbol *resolve_symbol(Elf_Shdr *sechdrs, struct module *owner; const struct kernel_symbol *sym; const unsigned long *crc; - DEFINE_WAIT(wait); - int err; - long timeleft = 30 * HZ; -again: sym = find_symbol(name, &owner, &crc, !(mod->taints & (1 << TAINT_PROPRIETARY_MODULE)), true); - if (!sym) - return NULL; - - if (!check_version(sechdrs, versindex, name, mod, crc, owner)) - return NULL; - - prepare_to_wait(&module_wq, &wait, TASK_INTERRUPTIBLE); - err = use_module(mod, owner); - if (likely(!err) || err != -EBUSY || signal_pending(current)) { - finish_wait(&module_wq, &wait); - return err ? NULL : sym; - } - - /* Module is still loading. Drop lock and wait. */ - mutex_unlock(&module_mutex); - timeleft = schedule_timeout(timeleft); - mutex_lock(&module_mutex); - finish_wait(&module_wq, &wait); - - /* Module might be gone entirely, or replaced. Re-lookup. */ - if (timeleft) - goto again; - - printk(KERN_WARNING "%s: gave up waiting for init of module %s.\n", - mod->name, owner->name); - return NULL; + /* use_module can fail due to OOM, + or module initialization or unloading */ + if (sym) { + if (!check_version(sechdrs, versindex, name, mod, crc, owner) + || !use_module(mod, owner)) + sym = NULL; + } + return sym; } /* -- cgit v1.2.3-70-g09d2 From 293a7cfeedc2b2380a7c7274902323c3cf5f7575 Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Mon, 31 May 2010 19:53:50 +0930 Subject: module: fix reference to mod->percpu after freeing module. Rafael sees a sometimes crash at precpu_modfree from kernel/module.c; it only occurred with another (since-reverted) patch, but that patch simply changed timing to uncover this bug, it was otherwise unrelated. The comment about the mod being freed is self-explanatory, but neither Tejun nor I read it. This bug was introduced in 259354deaa, after it had previously been fixed in 6e2b75740b. How embarrassing. Reported-by: "Rafael J. Wysocki" Signed-off-by: Rusty Russell Embarrassingly-Acked-by: Tejun Heo Cc: Masami Hiramatsu Tested-by: "Rafael J. Wysocki" Signed-off-by: Linus Torvalds --- kernel/module.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'kernel/module.c') diff --git a/kernel/module.c b/kernel/module.c index 333fbcc9697..d806e00e445 100644 --- a/kernel/module.c +++ b/kernel/module.c @@ -2014,6 +2014,7 @@ static noinline struct module *load_module(void __user *umod, long err = 0; void *ptr = NULL; /* Stops spurious gcc warning */ unsigned long symoffs, stroffs, *strmap; + void __percpu *percpu; mm_segment_t old_fs; @@ -2158,6 +2159,8 @@ static noinline struct module *load_module(void __user *umod, goto free_mod; sechdrs[pcpuindex].sh_flags &= ~(unsigned long)SHF_ALLOC; } + /* Keep this around for failure path. */ + percpu = mod_percpu(mod); /* Determine total sizes, and put offsets in sh_entsize. For now this is done generically; there doesn't appear to be any @@ -2463,7 +2466,7 @@ static noinline struct module *load_module(void __user *umod, module_free(mod, mod->module_core); /* mod will be freed with core. Don't access it beyond this line! */ free_percpu: - percpu_modfree(mod); + free_percpu(percpu); free_mod: kfree(args); kfree(strmap); -- cgit v1.2.3-70-g09d2 From 2c02dfe7fe3fba97a5665d329d039d2415ea5607 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Mon, 31 May 2010 12:19:37 -0700 Subject: module: Make the 'usage' lists be two-way When adding a module that depends on another one, we used to create a one-way list of "modules_which_use_me", so that module unloading could see who needs a module. It's actually quite simple to make that list go both ways: so that we not only can see "who uses me", but also see a list of modules that are "used by me". In fact, we always wanted that list in "module_unload_free()": when we unload a module, we want to also release all the other modules that are used by that module. But because we didn't have that list, we used to first iterate over all modules, and then iterate over each "used by me" list of that module. By making the list two-way, we simplify module_unload_free(), and it allows for some trivial fixes later too. Signed-off-by: Linus Torvalds Signed-off-by: Rusty Russell (cleaned & rebased) --- include/linux/module.h | 4 ++- kernel/module.c | 79 ++++++++++++++++++++++++++++++-------------------- 2 files changed, 51 insertions(+), 32 deletions(-) (limited to 'kernel/module.c') diff --git a/include/linux/module.h b/include/linux/module.h index 6914fcad467..680db9e2ac3 100644 --- a/include/linux/module.h +++ b/include/linux/module.h @@ -359,7 +359,9 @@ struct module #ifdef CONFIG_MODULE_UNLOAD /* What modules depend on me? */ - struct list_head modules_which_use_me; + struct list_head source_list; + /* What modules do I depend on? */ + struct list_head target_list; /* Who is waiting for us to be unloaded */ struct task_struct *waiter; diff --git a/kernel/module.c b/kernel/module.c index 0129769301e..be18c3e3468 100644 --- a/kernel/module.c +++ b/kernel/module.c @@ -523,7 +523,8 @@ static void module_unload_init(struct module *mod) { int cpu; - INIT_LIST_HEAD(&mod->modules_which_use_me); + INIT_LIST_HEAD(&mod->source_list); + INIT_LIST_HEAD(&mod->target_list); for_each_possible_cpu(cpu) { per_cpu_ptr(mod->refptr, cpu)->incs = 0; per_cpu_ptr(mod->refptr, cpu)->decs = 0; @@ -538,8 +539,9 @@ static void module_unload_init(struct module *mod) /* modules using other modules */ struct module_use { - struct list_head list; - struct module *module_which_uses; + struct list_head source_list; + struct list_head target_list; + struct module *source, *target; }; /* Does a already use b? */ @@ -547,8 +549,8 @@ static int already_uses(struct module *a, struct module *b) { struct module_use *use; - list_for_each_entry(use, &b->modules_which_use_me, list) { - if (use->module_which_uses == a) { + list_for_each_entry(use, &b->source_list, source_list) { + if (use->source == a) { DEBUGP("%s uses %s!\n", a->name, b->name); return 1; } @@ -557,6 +559,33 @@ static int already_uses(struct module *a, struct module *b) return 0; } +/* + * Module a uses b + * - we add 'a' as a "source", 'b' as a "target" of module use + * - the module_use is added to the list of 'b' sources (so + * 'b' can walk the list to see who sourced them), and of 'a' + * targets (so 'a' can see what modules it targets). + */ +static int add_module_usage(struct module *a, struct module *b) +{ + int no_warn; + struct module_use *use; + + DEBUGP("Allocating new usage for %s.\n", a->name); + use = kmalloc(sizeof(*use), GFP_ATOMIC); + if (!use) { + printk(KERN_WARNING "%s: out of memory loading\n", a->name); + return -ENOMEM; + } + + use->source = a; + use->target = b; + list_add(&use->source_list, &b->source_list); + list_add(&use->target_list, &a->target_list); + no_warn = sysfs_create_link(b->holders_dir, &a->mkobj.kobj, a->name); + return 0; +} + /* Module a uses b */ int use_module(struct module *a, struct module *b) { @@ -578,17 +607,11 @@ int use_module(struct module *a, struct module *b) if (err) return 0; - DEBUGP("Allocating new usage for %s.\n", a->name); - use = kmalloc(sizeof(*use), GFP_ATOMIC); - if (!use) { - printk("%s: out of memory loading\n", a->name); + err = add_module_usage(a, b); + if (err) { module_put(b); return 0; } - - use->module_which_uses = a; - list_add(&use->list, &b->modules_which_use_me); - no_warn = sysfs_create_link(b->holders_dir, &a->mkobj.kobj, a->name); return 1; } EXPORT_SYMBOL_GPL(use_module); @@ -596,22 +619,16 @@ EXPORT_SYMBOL_GPL(use_module); /* Clear the unload stuff of the module. */ static void module_unload_free(struct module *mod) { - struct module *i; - - list_for_each_entry(i, &modules, list) { - struct module_use *use; + struct module_use *use, *tmp; - list_for_each_entry(use, &i->modules_which_use_me, list) { - if (use->module_which_uses == mod) { - DEBUGP("%s unusing %s\n", mod->name, i->name); - module_put(i); - list_del(&use->list); - kfree(use); - sysfs_remove_link(i->holders_dir, mod->name); - /* There can be at most one match. */ - break; - } - } + list_for_each_entry_safe(use, tmp, &mod->target_list, target_list) { + struct module *i = use->target; + DEBUGP("%s unusing %s\n", mod->name, i->name); + module_put(i); + list_del(&use->source_list); + list_del(&use->target_list); + kfree(use); + sysfs_remove_link(i->holders_dir, mod->name); } } @@ -735,7 +752,7 @@ SYSCALL_DEFINE2(delete_module, const char __user *, name_user, goto out; } - if (!list_empty(&mod->modules_which_use_me)) { + if (!list_empty(&mod->source_list)) { /* Other modules depend on us: get rid of them first. */ ret = -EWOULDBLOCK; goto out; @@ -799,9 +816,9 @@ static inline void print_unload_info(struct seq_file *m, struct module *mod) /* Always include a trailing , so userspace can differentiate between this and the old multi-field proc format. */ - list_for_each_entry(use, &mod->modules_which_use_me, list) { + list_for_each_entry(use, &mod->source_list, source_list) { printed_something = 1; - seq_printf(m, "%s,", use->module_which_uses->name); + seq_printf(m, "%s,", use->source->name); } if (mod->init != NULL && mod->exit == NULL) { -- cgit v1.2.3-70-g09d2 From c8e21ced08b39ef8dfe7236fb2a923a95f645262 Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Sat, 5 Jun 2010 11:17:35 -0600 Subject: module: fix kdb's illicit use of struct module_use. Linus changed the structure, and luckily this didn't compile any more. Reported-by: Stephen Rothwell Signed-off-by: Rusty Russell Cc: Jason Wessel Cc: Martin Hicks --- include/linux/module.h | 7 +++++++ kernel/debug/kdb/kdb_main.c | 12 +++--------- kernel/module.c | 11 +---------- 3 files changed, 11 insertions(+), 19 deletions(-) (limited to 'kernel/module.c') diff --git a/include/linux/module.h b/include/linux/module.h index 680db9e2ac3..5d8fca5dcff 100644 --- a/include/linux/module.h +++ b/include/linux/module.h @@ -181,6 +181,13 @@ void *__symbol_get(const char *symbol); void *__symbol_get_gpl(const char *symbol); #define symbol_get(x) ((typeof(&x))(__symbol_get(MODULE_SYMBOL_PREFIX #x))) +/* modules using other modules: kdb wants to see this. */ +struct module_use { + struct list_head source_list; + struct list_head target_list; + struct module *source, *target; +}; + #ifndef __GENKSYMS__ #ifdef CONFIG_MODVERSIONS /* Mark the CRC weak since genksyms apparently decides not to diff --git a/kernel/debug/kdb/kdb_main.c b/kernel/debug/kdb/kdb_main.c index b724c791b6d..184cd8209c3 100644 --- a/kernel/debug/kdb/kdb_main.c +++ b/kernel/debug/kdb/kdb_main.c @@ -1857,12 +1857,6 @@ static int kdb_ef(int argc, const char **argv) } #if defined(CONFIG_MODULES) -/* modules using other modules */ -struct module_use { - struct list_head list; - struct module *module_which_uses; -}; - /* * kdb_lsmod - This function implements the 'lsmod' command. Lists * currently loaded kernel modules. @@ -1894,9 +1888,9 @@ static int kdb_lsmod(int argc, const char **argv) { struct module_use *use; kdb_printf(" [ "); - list_for_each_entry(use, &mod->modules_which_use_me, - list) - kdb_printf("%s ", use->module_which_uses->name); + list_for_each_entry(use, &mod->source_list, + source_list) + kdb_printf("%s ", use->target->name); kdb_printf("]\n"); } #endif diff --git a/kernel/module.c b/kernel/module.c index be18c3e3468..bbb1d812c79 100644 --- a/kernel/module.c +++ b/kernel/module.c @@ -536,14 +536,6 @@ static void module_unload_init(struct module *mod) mod->waiter = current; } -/* modules using other modules */ -struct module_use -{ - struct list_head source_list; - struct list_head target_list; - struct module *source, *target; -}; - /* Does a already use b? */ static int already_uses(struct module *a, struct module *b) { @@ -589,8 +581,7 @@ static int add_module_usage(struct module *a, struct module *b) /* Module a uses b */ int use_module(struct module *a, struct module *b) { - struct module_use *use; - int no_warn, err; + int err; if (b == NULL || already_uses(a, b)) return 1; -- cgit v1.2.3-70-g09d2 From 80a3d1bb410e000e176931a076cdf19a1e89a955 Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Sat, 5 Jun 2010 11:17:36 -0600 Subject: module: move sysfs exposure to end of load_module This means a little extra work, but is more logical: we don't put anything in sysfs until we're about to put the module into the global list an parse its parameters. This also gives us a logical place to put duplicate module detection in the next patch. Signed-off-by: Rusty Russell --- kernel/module.c | 47 ++++++++++++++++++++++++++++++++++++----------- 1 file changed, 36 insertions(+), 11 deletions(-) (limited to 'kernel/module.c') diff --git a/kernel/module.c b/kernel/module.c index bbb1d812c79..c690d988579 100644 --- a/kernel/module.c +++ b/kernel/module.c @@ -560,7 +560,6 @@ static int already_uses(struct module *a, struct module *b) */ static int add_module_usage(struct module *a, struct module *b) { - int no_warn; struct module_use *use; DEBUGP("Allocating new usage for %s.\n", a->name); @@ -574,7 +573,6 @@ static int add_module_usage(struct module *a, struct module *b) use->target = b; list_add(&use->source_list, &b->source_list); list_add(&use->target_list, &a->target_list); - no_warn = sysfs_create_link(b->holders_dir, &a->mkobj.kobj, a->name); return 0; } @@ -619,7 +617,6 @@ static void module_unload_free(struct module *mod) list_del(&use->source_list); list_del(&use->target_list); kfree(use); - sysfs_remove_link(i->holders_dir, mod->name); } } @@ -1303,6 +1300,29 @@ static inline void remove_notes_attrs(struct module *mod) #endif #ifdef CONFIG_SYSFS +static void add_usage_links(struct module *mod) +{ +#ifdef CONFIG_MODULE_UNLOAD + struct module_use *use; + int nowarn; + + list_for_each_entry(use, &mod->target_list, target_list) { + nowarn = sysfs_create_link(use->target->holders_dir, + &mod->mkobj.kobj, mod->name); + } +#endif +} + +static void del_usage_links(struct module *mod) +{ +#ifdef CONFIG_MODULE_UNLOAD + struct module_use *use; + + list_for_each_entry(use, &mod->target_list, target_list) + sysfs_remove_link(use->target->holders_dir, mod->name); +#endif +} + int module_add_modinfo_attrs(struct module *mod) { struct module_attribute *attr; @@ -1385,6 +1405,10 @@ int mod_sysfs_setup(struct module *mod, { int err; + err = mod_sysfs_init(mod); + if (err) + goto out; + mod->holders_dir = kobject_create_and_add("holders", &mod->mkobj.kobj); if (!mod->holders_dir) { err = -ENOMEM; @@ -1399,6 +1423,8 @@ int mod_sysfs_setup(struct module *mod, if (err) goto out_unreg_param; + add_usage_links(mod); + kobject_uevent(&mod->mkobj.kobj, KOBJ_ADD); return 0; @@ -1408,6 +1434,7 @@ out_unreg_holders: kobject_put(mod->holders_dir); out_unreg: kobject_put(&mod->mkobj.kobj); +out: return err; } @@ -1422,10 +1449,15 @@ static void mod_sysfs_fini(struct module *mod) { } +static void del_usage_links(struct module *mod) +{ +} + #endif /* CONFIG_SYSFS */ static void mod_kobject_remove(struct module *mod) { + del_usage_links(mod); module_remove_modinfo_attrs(mod); module_param_sysfs_remove(mod); kobject_put(mod->mkobj.drivers_dir); @@ -2242,11 +2274,6 @@ static noinline struct module *load_module(void __user *umod, /* Now we've moved module, initialize linked lists, etc. */ module_unload_init(mod); - /* add kobject, so we can reference it. */ - err = mod_sysfs_init(mod); - if (err) - goto free_unload; - /* Set up license info based on the info section */ set_license(mod, get_modinfo(sechdrs, infoindex, "license")); @@ -2443,6 +2470,7 @@ static noinline struct module *load_module(void __user *umod, err = mod_sysfs_setup(mod, mod->kp, mod->num_kp); if (err < 0) goto unlink; + add_sect_attrs(mod, hdr->e_shnum, secstrings, sechdrs); add_notes_attrs(mod, hdr->e_shnum, secstrings, sechdrs); @@ -2461,9 +2489,6 @@ static noinline struct module *load_module(void __user *umod, module_arch_cleanup(mod); cleanup: free_modinfo(mod); - kobject_del(&mod->mkobj.kobj); - kobject_put(&mod->mkobj.kobj); - free_unload: module_unload_free(mod); #if defined(CONFIG_MODULE_UNLOAD) free_percpu(mod->refptr); -- cgit v1.2.3-70-g09d2 From 6407ebb271fc34440b306f305e1efb7685eece26 Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Sat, 5 Jun 2010 11:17:36 -0600 Subject: module: Make module sysfs functions private. These were placed in the header in ef665c1a06 to get the various SYSFS/MODULE config combintations to compile. That may have been necessary then, but it's not now. These functions are all local to module.c. Signed-off-by: Rusty Russell Cc: Randy Dunlap --- include/linux/module.h | 33 --------------------------------- kernel/module.c | 29 +++++++++++++++++++++++++---- 2 files changed, 25 insertions(+), 37 deletions(-) (limited to 'kernel/module.c') diff --git a/include/linux/module.h b/include/linux/module.h index 5d8fca5dcff..8a6b9fdc7ff 100644 --- a/include/linux/module.h +++ b/include/linux/module.h @@ -672,43 +672,10 @@ static inline int module_get_iter_tracepoints(struct tracepoint_iter *iter) #endif /* CONFIG_MODULES */ -struct device_driver; #ifdef CONFIG_SYSFS -struct module; - extern struct kset *module_kset; extern struct kobj_type module_ktype; extern int module_sysfs_initialized; - -int mod_sysfs_init(struct module *mod); -int mod_sysfs_setup(struct module *mod, - struct kernel_param *kparam, - unsigned int num_params); -int module_add_modinfo_attrs(struct module *mod); -void module_remove_modinfo_attrs(struct module *mod); - -#else /* !CONFIG_SYSFS */ - -static inline int mod_sysfs_init(struct module *mod) -{ - return 0; -} - -static inline int mod_sysfs_setup(struct module *mod, - struct kernel_param *kparam, - unsigned int num_params) -{ - return 0; -} - -static inline int module_add_modinfo_attrs(struct module *mod) -{ - return 0; -} - -static inline void module_remove_modinfo_attrs(struct module *mod) -{ } - #endif /* CONFIG_SYSFS */ #define symbol_request(x) try_then_request_module(symbol_get(x), "symbol:" #x) diff --git a/kernel/module.c b/kernel/module.c index c690d988579..808aa18dd66 100644 --- a/kernel/module.c +++ b/kernel/module.c @@ -1323,7 +1323,7 @@ static void del_usage_links(struct module *mod) #endif } -int module_add_modinfo_attrs(struct module *mod) +static int module_add_modinfo_attrs(struct module *mod) { struct module_attribute *attr; struct module_attribute *temp_attr; @@ -1349,7 +1349,7 @@ int module_add_modinfo_attrs(struct module *mod) return error; } -void module_remove_modinfo_attrs(struct module *mod) +static void module_remove_modinfo_attrs(struct module *mod) { struct module_attribute *attr; int i; @@ -1365,7 +1365,7 @@ void module_remove_modinfo_attrs(struct module *mod) kfree(mod->modinfo_attrs); } -int mod_sysfs_init(struct module *mod) +static int mod_sysfs_init(struct module *mod) { int err; struct kobject *kobj; @@ -1399,7 +1399,7 @@ out: return err; } -int mod_sysfs_setup(struct module *mod, +static int mod_sysfs_setup(struct module *mod, struct kernel_param *kparam, unsigned int num_params) { @@ -1445,6 +1445,27 @@ static void mod_sysfs_fini(struct module *mod) #else /* CONFIG_SYSFS */ +static inline int mod_sysfs_init(struct module *mod) +{ + return 0; +} + +static inline int mod_sysfs_setup(struct module *mod, + struct kernel_param *kparam, + unsigned int num_params) +{ + return 0; +} + +static inline int module_add_modinfo_attrs(struct module *mod) +{ + return 0; +} + +static inline void module_remove_modinfo_attrs(struct module *mod) +{ +} + static void mod_sysfs_fini(struct module *mod) { } -- cgit v1.2.3-70-g09d2 From 75676500f8298f0ee89db12db97294883c4b768e Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Sat, 5 Jun 2010 11:17:36 -0600 Subject: module: make locking more fine-grained. Kay Sievers reports that we still have some contention over module loading which is slowing boot. Linus also disliked a previous "drop lock and regrab" patch to fix the bne2 "gave up waiting for init of module libcrc32c" message. This is more ambitious: we only grab the lock where we need it. Signed-off-by: Rusty Russell Cc: Brandon Philips Cc: Kay Sievers Cc: Linus Torvalds --- kernel/module.c | 65 +++++++++++++++++++++++++++++++++++++-------------------- 1 file changed, 42 insertions(+), 23 deletions(-) (limited to 'kernel/module.c') diff --git a/kernel/module.c b/kernel/module.c index 808aa18dd66..d293c213c22 100644 --- a/kernel/module.c +++ b/kernel/module.c @@ -72,7 +72,11 @@ /* If this is set, the section belongs in the init part of the module */ #define INIT_OFFSET_MASK (1UL << (BITS_PER_LONG-1)) -/* List of modules, protected by module_mutex or preempt_disable +/* + * Mutex protects: + * 1) List of modules (also safely readable with preempt_disable), + * 2) module_use links, + * 3) module_addr_min/module_addr_max. * (delete uses stop_machine/add uses RCU list operations). */ DEFINE_MUTEX(module_mutex); EXPORT_SYMBOL_GPL(module_mutex); @@ -90,7 +94,8 @@ static DECLARE_WAIT_QUEUE_HEAD(module_wq); static BLOCKING_NOTIFIER_HEAD(module_notify_list); -/* Bounds of module allocation, for speeding __module_address */ +/* Bounds of module allocation, for speeding __module_address. + * Protected by module_mutex. */ static unsigned long module_addr_min = -1UL, module_addr_max = 0; int register_module_notifier(struct notifier_block * nb) @@ -329,7 +334,7 @@ static bool find_symbol_in_section(const struct symsearch *syms, } /* Find a symbol and return it, along with, (optional) crc and - * (optional) module which owns it */ + * (optional) module which owns it. Needs preempt disabled or module_mutex. */ const struct kernel_symbol *find_symbol(const char *name, struct module **owner, const unsigned long **crc, @@ -576,7 +581,7 @@ static int add_module_usage(struct module *a, struct module *b) return 0; } -/* Module a uses b */ +/* Module a uses b: caller needs module_mutex() */ int use_module(struct module *a, struct module *b) { int err; @@ -610,6 +615,7 @@ static void module_unload_free(struct module *mod) { struct module_use *use, *tmp; + mutex_lock(&module_mutex); list_for_each_entry_safe(use, tmp, &mod->target_list, target_list) { struct module *i = use->target; DEBUGP("%s unusing %s\n", mod->name, i->name); @@ -618,6 +624,7 @@ static void module_unload_free(struct module *mod) list_del(&use->target_list); kfree(use); } + mutex_unlock(&module_mutex); } #ifdef CONFIG_MODULE_FORCE_UNLOAD @@ -784,13 +791,14 @@ SYSCALL_DEFINE2(delete_module, const char __user *, name_user, blocking_notifier_call_chain(&module_notify_list, MODULE_STATE_GOING, mod); async_synchronize_full(); - mutex_lock(&module_mutex); + /* Store the name of the last unloaded module for diagnostic purposes */ strlcpy(last_unloaded_module, mod->name, sizeof(last_unloaded_module)); ddebug_remove_module(mod->name); - free_module(mod); - out: + free_module(mod); + return 0; +out: mutex_unlock(&module_mutex); return ret; } @@ -1006,6 +1014,8 @@ static inline int check_modstruct_version(Elf_Shdr *sechdrs, { const unsigned long *crc; + /* Since this should be found in kernel (which can't be removed), + * no locking is necessary. */ if (!find_symbol(MODULE_SYMBOL_PREFIX "module_layout", NULL, &crc, true, false)) BUG(); @@ -1048,8 +1058,7 @@ static inline int same_magic(const char *amagic, const char *bmagic, } #endif /* CONFIG_MODVERSIONS */ -/* Resolve a symbol for this module. I.e. if we find one, record usage. - Must be holding module_mutex. */ +/* Resolve a symbol for this module. I.e. if we find one, record usage. */ static const struct kernel_symbol *resolve_symbol(Elf_Shdr *sechdrs, unsigned int versindex, const char *name, @@ -1059,6 +1068,7 @@ static const struct kernel_symbol *resolve_symbol(Elf_Shdr *sechdrs, const struct kernel_symbol *sym; const unsigned long *crc; + mutex_lock(&module_mutex); sym = find_symbol(name, &owner, &crc, !(mod->taints & (1 << TAINT_PROPRIETARY_MODULE)), true); /* use_module can fail due to OOM, @@ -1068,6 +1078,7 @@ static const struct kernel_symbol *resolve_symbol(Elf_Shdr *sechdrs, || !use_module(mod, owner)) sym = NULL; } + mutex_unlock(&module_mutex); return sym; } @@ -1306,10 +1317,12 @@ static void add_usage_links(struct module *mod) struct module_use *use; int nowarn; + mutex_lock(&module_mutex); list_for_each_entry(use, &mod->target_list, target_list) { nowarn = sysfs_create_link(use->target->holders_dir, &mod->mkobj.kobj, mod->name); } + mutex_unlock(&module_mutex); #endif } @@ -1318,8 +1331,10 @@ static void del_usage_links(struct module *mod) #ifdef CONFIG_MODULE_UNLOAD struct module_use *use; + mutex_lock(&module_mutex); list_for_each_entry(use, &mod->target_list, target_list) sysfs_remove_link(use->target->holders_dir, mod->name); + mutex_unlock(&module_mutex); #endif } @@ -1497,13 +1512,15 @@ static int __unlink_module(void *_mod) return 0; } -/* Free a module, remove from lists, etc (must hold module_mutex). */ +/* Free a module, remove from lists, etc. */ static void free_module(struct module *mod) { trace_module_free(mod); /* Delete from various lists */ + mutex_lock(&module_mutex); stop_machine(__unlink_module, mod, NULL); + mutex_unlock(&module_mutex); remove_notes_attrs(mod); remove_sect_attrs(mod); mod_kobject_remove(mod); @@ -1575,7 +1592,14 @@ static int verify_export_symbols(struct module *mod) for (i = 0; i < ARRAY_SIZE(arr); i++) { for (s = arr[i].sym; s < arr[i].sym + arr[i].num; s++) { - if (find_symbol(s->name, &owner, NULL, true, false)) { + const struct kernel_symbol *sym; + + /* Stopping preemption makes find_symbol safe. */ + preempt_disable(); + sym = find_symbol(s->name, &owner, NULL, true, false); + preempt_enable(); + + if (sym) { printk(KERN_ERR "%s: exports duplicate symbol %s" " (owned by %s)\n", @@ -2021,11 +2045,13 @@ static void *module_alloc_update_bounds(unsigned long size) void *ret = module_alloc(size); if (ret) { + mutex_lock(&module_mutex); /* Update module bounds. */ if ((unsigned long)ret < module_addr_min) module_addr_min = (unsigned long)ret; if ((unsigned long)ret + size > module_addr_max) module_addr_max = (unsigned long)ret + size; + mutex_unlock(&module_mutex); } return ret; } @@ -2482,7 +2508,9 @@ static noinline struct module *load_module(void __user *umod, * function to insert in a way safe to concurrent readers. * The mutex protects against concurrent writers. */ + mutex_lock(&module_mutex); list_add_rcu(&mod->list, &modules); + mutex_unlock(&module_mutex); err = parse_args(mod->name, mod->args, mod->kp, mod->num_kp, NULL); if (err < 0) @@ -2504,8 +2532,10 @@ static noinline struct module *load_module(void __user *umod, return mod; unlink: + mutex_lock(&module_mutex); /* Unlink carefully: kallsyms could be walking list. */ list_del_rcu(&mod->list); + mutex_unlock(&module_mutex); synchronize_sched(); module_arch_cleanup(mod); cleanup: @@ -2556,19 +2586,10 @@ SYSCALL_DEFINE3(init_module, void __user *, umod, if (!capable(CAP_SYS_MODULE) || modules_disabled) return -EPERM; - /* Only one module load at a time, please */ - if (mutex_lock_interruptible(&module_mutex) != 0) - return -EINTR; - /* Do all the hard work */ mod = load_module(umod, len, uargs); - if (IS_ERR(mod)) { - mutex_unlock(&module_mutex); + if (IS_ERR(mod)) return PTR_ERR(mod); - } - - /* Drop lock so they can recurse */ - mutex_unlock(&module_mutex); blocking_notifier_call_chain(&module_notify_list, MODULE_STATE_COMING, mod); @@ -2585,9 +2606,7 @@ SYSCALL_DEFINE3(init_module, void __user *, umod, module_put(mod); blocking_notifier_call_chain(&module_notify_list, MODULE_STATE_GOING, mod); - mutex_lock(&module_mutex); free_module(mod); - mutex_unlock(&module_mutex); wake_up(&module_wq); return ret; } -- cgit v1.2.3-70-g09d2 From 3bafeb6247042dcbb72b0141ec7c7107de9f0b99 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sat, 5 Jun 2010 11:17:36 -0600 Subject: module: move find_module check to end I think Rusty may have made the lock a bit _too_ finegrained there, and didn't add it to some places that needed it. It looks, for example, like PATCH 1/2 actually drops the lock in places where it's needed ("find_module()" is documented to need it, but now load_module() didn't hold it at all when it did the find_module()). Rather than adding a new "module_loading" list, I think we should be able to just use the existing "modules" list, and just fix up the locking a bit. In fact, maybe we could just move the "look up existing module" a bit later - optimistically assuming that the module doesn't exist, and then just undoing the work if it turns out that we were wrong, just before adding ourselves to the list. Signed-off-by: Rusty Russell --- kernel/module.c | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) (limited to 'kernel/module.c') diff --git a/kernel/module.c b/kernel/module.c index d293c213c22..28450047852 100644 --- a/kernel/module.c +++ b/kernel/module.c @@ -2226,11 +2226,6 @@ static noinline struct module *load_module(void __user *umod, goto free_mod; } - if (find_module(mod->name)) { - err = -EEXIST; - goto free_mod; - } - mod->state = MODULE_STATE_COMING; /* Allow arches to frob section contents and sizes. */ @@ -2509,6 +2504,12 @@ static noinline struct module *load_module(void __user *umod, * The mutex protects against concurrent writers. */ mutex_lock(&module_mutex); + if (find_module(mod->name)) { + err = -EEXIST; + /* This will also unlock the mutex */ + goto already_exists; + } + list_add_rcu(&mod->list, &modules); mutex_unlock(&module_mutex); @@ -2535,6 +2536,7 @@ static noinline struct module *load_module(void __user *umod, mutex_lock(&module_mutex); /* Unlink carefully: kallsyms could be walking list. */ list_del_rcu(&mod->list); + already_exists: mutex_unlock(&module_mutex); synchronize_sched(); module_arch_cleanup(mod); -- cgit v1.2.3-70-g09d2 From be593f4ce4eb1bd40e38fdc403371f149f6f12eb Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Sat, 5 Jun 2010 11:17:37 -0600 Subject: module: verify_export_symbols under the lock It disabled preempt so it was "safe", but nothing stops another module slipping in before this module is added to the global list now we don't hold the lock the whole time. So we check this just after we check for duplicate modules, and just before we put the module in the global list. (find_symbol finds symbols in coming and going modules, too). Signed-off-by: Rusty Russell --- kernel/module.c | 26 ++++++++++---------------- 1 file changed, 10 insertions(+), 16 deletions(-) (limited to 'kernel/module.c') diff --git a/kernel/module.c b/kernel/module.c index 28450047852..f99558e1945 100644 --- a/kernel/module.c +++ b/kernel/module.c @@ -1571,6 +1571,8 @@ EXPORT_SYMBOL_GPL(__symbol_get); /* * Ensure that an exported symbol [global namespace] does not already exist * in the kernel or in some other module's exported symbol table. + * + * You must hold the module_mutex. */ static int verify_export_symbols(struct module *mod) { @@ -1592,14 +1594,7 @@ static int verify_export_symbols(struct module *mod) for (i = 0; i < ARRAY_SIZE(arr); i++) { for (s = arr[i].sym; s < arr[i].sym + arr[i].num; s++) { - const struct kernel_symbol *sym; - - /* Stopping preemption makes find_symbol safe. */ - preempt_disable(); - sym = find_symbol(s->name, &owner, NULL, true, false); - preempt_enable(); - - if (sym) { + if (find_symbol(s->name, &owner, NULL, true, false)) { printk(KERN_ERR "%s: exports duplicate symbol %s" " (owned by %s)\n", @@ -2440,11 +2435,6 @@ static noinline struct module *load_module(void __user *umod, goto cleanup; } - /* Find duplicate symbols */ - err = verify_export_symbols(mod); - if (err < 0) - goto cleanup; - /* Set up and sort exception table */ mod->extable = section_objs(hdr, sechdrs, secstrings, "__ex_table", sizeof(*mod->extable), &mod->num_exentries); @@ -2506,10 +2496,14 @@ static noinline struct module *load_module(void __user *umod, mutex_lock(&module_mutex); if (find_module(mod->name)) { err = -EEXIST; - /* This will also unlock the mutex */ - goto already_exists; + goto unlock; } + /* Find duplicate symbols */ + err = verify_export_symbols(mod); + if (err < 0) + goto unlock; + list_add_rcu(&mod->list, &modules); mutex_unlock(&module_mutex); @@ -2536,7 +2530,7 @@ static noinline struct module *load_module(void __user *umod, mutex_lock(&module_mutex); /* Unlink carefully: kallsyms could be walking list. */ list_del_rcu(&mod->list); - already_exists: + unlock: mutex_unlock(&module_mutex); synchronize_sched(); module_arch_cleanup(mod); -- cgit v1.2.3-70-g09d2 From 9bea7f23952d5948f8e5dfdff4de09bb9981fb5f Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Sat, 5 Jun 2010 11:17:37 -0600 Subject: module: fix bne2 "gave up waiting for init of module libcrc32c" Problem: it's hard to avoid an init routine stumbling over a request_module these days. And it's not clear it's always a bad idea: for example, a module like kvm with dynamic dependencies on kvm-intel or kvm-amd would be neater if it could simply request_module the right one. In this particular case, it's libcrc32c: libcrc32c_mod_init crypto_alloc_shash crypto_alloc_tfm crypto_find_alg crypto_alg_mod_lookup crypto_larval_lookup request_module If another module is waiting inside resolve_symbol() for libcrc32c to finish initializing (ie. bne2 depends on libcrc32c) then it does so holding the module lock, and our request_module() can't make progress until that is released. Waiting inside resolve_symbol() without the lock isn't all that hard: we just need to pass the -EBUSY up the call chain so we can sleep where we don't hold the lock. Error reporting is a bit trickier: we need to copy the name of the unfinished module before releasing the lock. Other notes: 1) This also fixes a theoretical issue where a weak dependency would allow symbol version mismatches to be ignored. 2) We rename use_module to ref_module to make life easier for the only external user (the out-of-tree ksplice patches). Signed-off-by: Rusty Russell Cc: Linus Torvalds Cc: Tim Abbot Tested-by: Brandon Philips --- kernel/module.c | 91 +++++++++++++++++++++++++++++++++++++-------------------- 1 file changed, 59 insertions(+), 32 deletions(-) (limited to 'kernel/module.c') diff --git a/kernel/module.c b/kernel/module.c index f99558e1945..8c6b42840dd 100644 --- a/kernel/module.c +++ b/kernel/module.c @@ -582,33 +582,26 @@ static int add_module_usage(struct module *a, struct module *b) } /* Module a uses b: caller needs module_mutex() */ -int use_module(struct module *a, struct module *b) +int ref_module(struct module *a, struct module *b) { int err; - if (b == NULL || already_uses(a, b)) return 1; - - /* If we're interrupted or time out, we fail. */ - if (wait_event_interruptible_timeout( - module_wq, (err = strong_try_module_get(b)) != -EBUSY, - 30 * HZ) <= 0) { - printk("%s: gave up waiting for init of module %s.\n", - a->name, b->name); + if (b == NULL || already_uses(a, b)) return 0; - } - /* If strong_try_module_get() returned a different error, we fail. */ + /* If module isn't available, we fail. */ + err = strong_try_module_get(b); if (err) - return 0; + return err; err = add_module_usage(a, b); if (err) { module_put(b); - return 0; + return err; } - return 1; + return 0; } -EXPORT_SYMBOL_GPL(use_module); +EXPORT_SYMBOL_GPL(ref_module); /* Clear the unload stuff of the module. */ static void module_unload_free(struct module *mod) @@ -893,11 +886,11 @@ static inline void module_unload_free(struct module *mod) { } -int use_module(struct module *a, struct module *b) +int ref_module(struct module *a, struct module *b) { - return strong_try_module_get(b) == 0; + return strong_try_module_get(b); } -EXPORT_SYMBOL_GPL(use_module); +EXPORT_SYMBOL_GPL(ref_module); static inline void module_unload_init(struct module *mod) { @@ -1062,26 +1055,58 @@ static inline int same_magic(const char *amagic, const char *bmagic, static const struct kernel_symbol *resolve_symbol(Elf_Shdr *sechdrs, unsigned int versindex, const char *name, - struct module *mod) + struct module *mod, + char ownername[]) { struct module *owner; const struct kernel_symbol *sym; const unsigned long *crc; + int err; mutex_lock(&module_mutex); sym = find_symbol(name, &owner, &crc, !(mod->taints & (1 << TAINT_PROPRIETARY_MODULE)), true); - /* use_module can fail due to OOM, - or module initialization or unloading */ - if (sym) { - if (!check_version(sechdrs, versindex, name, mod, crc, owner) - || !use_module(mod, owner)) - sym = NULL; + if (!sym) + goto unlock; + + if (!check_version(sechdrs, versindex, name, mod, crc, owner)) { + sym = ERR_PTR(-EINVAL); + goto getname; } + + err = ref_module(mod, owner); + if (err) { + sym = ERR_PTR(err); + goto getname; + } + +getname: + /* We must make copy under the lock if we failed to get ref. */ + strncpy(ownername, module_name(owner), MODULE_NAME_LEN); +unlock: mutex_unlock(&module_mutex); return sym; } +static const struct kernel_symbol *resolve_symbol_wait(Elf_Shdr *sechdrs, + unsigned int versindex, + const char *name, + struct module *mod) +{ + const struct kernel_symbol *ksym; + char ownername[MODULE_NAME_LEN]; + + if (wait_event_interruptible_timeout(module_wq, + !IS_ERR(ksym = resolve_symbol(sechdrs, versindex, name, + mod, ownername)) || + PTR_ERR(ksym) != -EBUSY, + 30 * HZ) <= 0) { + printk(KERN_WARNING "%s: gave up waiting for init of module %s.\n", + mod->name, ownername); + } + return ksym; +} + /* * /sys/module/foo/sections stuff * J. Corbet @@ -1638,21 +1663,23 @@ static int simplify_symbols(Elf_Shdr *sechdrs, break; case SHN_UNDEF: - ksym = resolve_symbol(sechdrs, versindex, - strtab + sym[i].st_name, mod); + ksym = resolve_symbol_wait(sechdrs, versindex, + strtab + sym[i].st_name, + mod); /* Ok if resolved. */ - if (ksym) { + if (ksym && !IS_ERR(ksym)) { sym[i].st_value = ksym->value; break; } /* Ok if weak. */ - if (ELF_ST_BIND(sym[i].st_info) == STB_WEAK) + if (!ksym && ELF_ST_BIND(sym[i].st_info) == STB_WEAK) break; - printk(KERN_WARNING "%s: Unknown symbol %s\n", - mod->name, strtab + sym[i].st_name); - ret = -ENOENT; + printk(KERN_WARNING "%s: Unknown symbol %s (err %li)\n", + mod->name, strtab + sym[i].st_name, + PTR_ERR(ksym)); + ret = PTR_ERR(ksym) ?: -ENOENT; break; default: -- cgit v1.2.3-70-g09d2 From ff49d74ad383f54041378144ca1a229ee9aeaa59 Mon Sep 17 00:00:00 2001 From: Yehuda Sadeh Date: Sat, 3 Jul 2010 13:07:35 +1000 Subject: module: initialize module dynamic debug later We should initialize the module dynamic debug datastructures only after determining that the module is not loaded yet. This fixes a bug that introduced in 2.6.35-rc2, where when a trying to load a module twice, we also load it's dynamic printing data twice which causes all sorts of nasty issues. Also handle the dynamic debug cleanup later on failure. Signed-off-by: Yehuda Sadeh Signed-off-by: Rusty Russell (removed a #ifdef) Signed-off-by: Linus Torvalds --- include/linux/dynamic_debug.h | 4 ++-- kernel/module.c | 23 +++++++++++++++-------- lib/dynamic_debug.c | 2 +- 3 files changed, 18 insertions(+), 11 deletions(-) (limited to 'kernel/module.c') diff --git a/include/linux/dynamic_debug.h b/include/linux/dynamic_debug.h index b3cd4de9432..52c0da4bdd1 100644 --- a/include/linux/dynamic_debug.h +++ b/include/linux/dynamic_debug.h @@ -40,7 +40,7 @@ int ddebug_add_module(struct _ddebug *tab, unsigned int n, const char *modname); #if defined(CONFIG_DYNAMIC_DEBUG) -extern int ddebug_remove_module(char *mod_name); +extern int ddebug_remove_module(const char *mod_name); #define __dynamic_dbg_enabled(dd) ({ \ int __ret = 0; \ @@ -73,7 +73,7 @@ extern int ddebug_remove_module(char *mod_name); #else -static inline int ddebug_remove_module(char *mod) +static inline int ddebug_remove_module(const char *mod) { return 0; } diff --git a/kernel/module.c b/kernel/module.c index 8c6b42840dd..5d2d28197c8 100644 --- a/kernel/module.c +++ b/kernel/module.c @@ -2062,6 +2062,12 @@ static void dynamic_debug_setup(struct _ddebug *debug, unsigned int num) #endif } +static void dynamic_debug_remove(struct _ddebug *debug) +{ + if (debug) + ddebug_remove_module(debug->modname); +} + static void *module_alloc_update_bounds(unsigned long size) { void *ret = module_alloc(size); @@ -2124,6 +2130,8 @@ static noinline struct module *load_module(void __user *umod, void *ptr = NULL; /* Stops spurious gcc warning */ unsigned long symoffs, stroffs, *strmap; void __percpu *percpu; + struct _ddebug *debug = NULL; + unsigned int num_debug = 0; mm_segment_t old_fs; @@ -2476,15 +2484,9 @@ static noinline struct module *load_module(void __user *umod, kfree(strmap); strmap = NULL; - if (!mod->taints) { - struct _ddebug *debug; - unsigned int num_debug; - + if (!mod->taints) debug = section_objs(hdr, sechdrs, secstrings, "__verbose", sizeof(*debug), &num_debug); - if (debug) - dynamic_debug_setup(debug, num_debug); - } err = module_finalize(hdr, sechdrs, mod); if (err < 0) @@ -2526,10 +2528,13 @@ static noinline struct module *load_module(void __user *umod, goto unlock; } + if (debug) + dynamic_debug_setup(debug, num_debug); + /* Find duplicate symbols */ err = verify_export_symbols(mod); if (err < 0) - goto unlock; + goto ddebug; list_add_rcu(&mod->list, &modules); mutex_unlock(&module_mutex); @@ -2557,6 +2562,8 @@ static noinline struct module *load_module(void __user *umod, mutex_lock(&module_mutex); /* Unlink carefully: kallsyms could be walking list. */ list_del_rcu(&mod->list); + ddebug: + dynamic_debug_remove(debug); unlock: mutex_unlock(&module_mutex); synchronize_sched(); diff --git a/lib/dynamic_debug.c b/lib/dynamic_debug.c index 3df8eb17a60..02afc253372 100644 --- a/lib/dynamic_debug.c +++ b/lib/dynamic_debug.c @@ -692,7 +692,7 @@ static void ddebug_table_free(struct ddebug_table *dt) * Called in response to a module being unloaded. Removes * any ddebug_table's which point at the module. */ -int ddebug_remove_module(char *mod_name) +int ddebug_remove_module(const char *mod_name) { struct ddebug_table *dt, *nextdt; int ret = -ENOENT; -- cgit v1.2.3-70-g09d2 From b82bab4bbe9efa7bc7177fc20620fff19bd95484 Mon Sep 17 00:00:00 2001 From: Jason Baron Date: Tue, 27 Jul 2010 13:18:01 -0700 Subject: dynamic debug: move ddebug_remove_module() down into free_module() The command echo "file ec.c +p" >/sys/kernel/debug/dynamic_debug/control causes an oops. Move the call to ddebug_remove_module() down into free_module(). In this way it should be called from all error paths. Currently, we are missing the remove if the module init routine fails. Signed-off-by: Jason Baron Reported-by: Thomas Renninger Tested-by: Thomas Renninger Cc: [2.6.32+] Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/module.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'kernel/module.c') diff --git a/kernel/module.c b/kernel/module.c index 5d2d28197c8..6c562828c85 100644 --- a/kernel/module.c +++ b/kernel/module.c @@ -787,7 +787,6 @@ SYSCALL_DEFINE2(delete_module, const char __user *, name_user, /* Store the name of the last unloaded module for diagnostic purposes */ strlcpy(last_unloaded_module, mod->name, sizeof(last_unloaded_module)); - ddebug_remove_module(mod->name); free_module(mod); return 0; @@ -1550,6 +1549,9 @@ static void free_module(struct module *mod) remove_sect_attrs(mod); mod_kobject_remove(mod); + /* Remove dynamic debug info */ + ddebug_remove_module(mod->name); + /* Arch-specific cleanup. */ module_arch_cleanup(mod); -- cgit v1.2.3-70-g09d2 From 2409e74278b7fb917d39ef6d3c16223c04a386f2 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 5 Aug 2010 12:59:00 -0600 Subject: module: module_unload_init() cleanup No need to clear mod->refptr in module_unload_init(), since alloc_percpu() already clears allocated chunks. Signed-off-by: Eric Dumazet Signed-off-by: Rusty Russell (removed unused var) --- kernel/module.c | 6 ------ 1 file changed, 6 deletions(-) (limited to 'kernel/module.c') diff --git a/kernel/module.c b/kernel/module.c index 6c562828c85..404e722930f 100644 --- a/kernel/module.c +++ b/kernel/module.c @@ -526,14 +526,8 @@ EXPORT_TRACEPOINT_SYMBOL(module_get); /* Init the unload section of the module. */ static void module_unload_init(struct module *mod) { - int cpu; - INIT_LIST_HEAD(&mod->source_list); INIT_LIST_HEAD(&mod->target_list); - for_each_possible_cpu(cpu) { - per_cpu_ptr(mod->refptr, cpu)->incs = 0; - per_cpu_ptr(mod->refptr, cpu)->decs = 0; - } /* Hold reference count during initialization. */ __this_cpu_write(mod->refptr->incs, 1); -- cgit v1.2.3-70-g09d2 From f91a13bb99b73961d4e2743a6ff296ac553abc4f Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Thu, 5 Aug 2010 12:59:02 -0600 Subject: module: refactor load_module I'd start from the trivial stuff. There's a fair amount of straight-line code that just makes the function hard to read just because you have to page up and down so far. Some of it is trivial to just create a helper function for. Signed-off-by: Linus Torvalds Signed-off-by: Rusty Russell --- kernel/module.c | 130 ++++++++++++++++++++++++++++++-------------------------- 1 file changed, 69 insertions(+), 61 deletions(-) (limited to 'kernel/module.c') diff --git a/kernel/module.c b/kernel/module.c index 404e722930f..12905ed4439 100644 --- a/kernel/module.c +++ b/kernel/module.c @@ -2107,6 +2107,71 @@ static inline void kmemleak_load_module(struct module *mod, Elf_Ehdr *hdr, } #endif +static void find_module_sections(struct module *mod, Elf_Ehdr *hdr, + Elf_Shdr *sechdrs, const char *secstrings) +{ + mod->kp = section_objs(hdr, sechdrs, secstrings, "__param", + sizeof(*mod->kp), &mod->num_kp); + mod->syms = section_objs(hdr, sechdrs, secstrings, "__ksymtab", + sizeof(*mod->syms), &mod->num_syms); + mod->crcs = section_addr(hdr, sechdrs, secstrings, "__kcrctab"); + mod->gpl_syms = section_objs(hdr, sechdrs, secstrings, "__ksymtab_gpl", + sizeof(*mod->gpl_syms), + &mod->num_gpl_syms); + mod->gpl_crcs = section_addr(hdr, sechdrs, secstrings, "__kcrctab_gpl"); + mod->gpl_future_syms = section_objs(hdr, sechdrs, secstrings, + "__ksymtab_gpl_future", + sizeof(*mod->gpl_future_syms), + &mod->num_gpl_future_syms); + mod->gpl_future_crcs = section_addr(hdr, sechdrs, secstrings, + "__kcrctab_gpl_future"); + +#ifdef CONFIG_UNUSED_SYMBOLS + mod->unused_syms = section_objs(hdr, sechdrs, secstrings, + "__ksymtab_unused", + sizeof(*mod->unused_syms), + &mod->num_unused_syms); + mod->unused_crcs = section_addr(hdr, sechdrs, secstrings, + "__kcrctab_unused"); + mod->unused_gpl_syms = section_objs(hdr, sechdrs, secstrings, + "__ksymtab_unused_gpl", + sizeof(*mod->unused_gpl_syms), + &mod->num_unused_gpl_syms); + mod->unused_gpl_crcs = section_addr(hdr, sechdrs, secstrings, + "__kcrctab_unused_gpl"); +#endif +#ifdef CONFIG_CONSTRUCTORS + mod->ctors = section_objs(hdr, sechdrs, secstrings, ".ctors", + sizeof(*mod->ctors), &mod->num_ctors); +#endif + +#ifdef CONFIG_TRACEPOINTS + mod->tracepoints = section_objs(hdr, sechdrs, secstrings, + "__tracepoints", + sizeof(*mod->tracepoints), + &mod->num_tracepoints); +#endif +#ifdef CONFIG_EVENT_TRACING + mod->trace_events = section_objs(hdr, sechdrs, secstrings, + "_ftrace_events", + sizeof(*mod->trace_events), + &mod->num_trace_events); + /* + * This section contains pointers to allocated objects in the trace + * code and not scanning it leads to false positives. + */ + kmemleak_scan_area(mod->trace_events, sizeof(*mod->trace_events) * + mod->num_trace_events, GFP_KERNEL); +#endif +#ifdef CONFIG_FTRACE_MCOUNT_RECORD + /* sechdrs[0].sh_size is always zero */ + mod->ftrace_callsites = section_objs(hdr, sechdrs, secstrings, + "__mcount_loc", + sizeof(*mod->ftrace_callsites), + &mod->num_ftrace_callsites); +#endif +} + /* Allocate and load the module: note that size of section 0 is always zero, and we rely on this for optional sections. */ static noinline struct module *load_module(void __user *umod, @@ -2147,7 +2212,7 @@ static noinline struct module *load_module(void __user *umod, } /* Sanity checks against insmoding binaries or wrong arch, - weird elf version */ + weird elf version */ if (memcmp(hdr->e_ident, ELFMAG, SELFMAG) != 0 || hdr->e_type != ET_REL || !elf_check_arch(hdr) @@ -2326,7 +2391,8 @@ static noinline struct module *load_module(void __user *umod, sechdrs[i].sh_size); /* Update sh_addr to point to copy in image. */ sechdrs[i].sh_addr = (unsigned long)dest; - DEBUGP("\t0x%lx %s\n", sechdrs[i].sh_addr, secstrings + sechdrs[i].sh_name); + DEBUGP("\t0x%lx %s\n", + sechdrs[i].sh_addr, secstrings + sechdrs[i].sh_name); } /* Module has been moved. */ mod = (void *)sechdrs[modindex].sh_addr; @@ -2368,66 +2434,8 @@ static noinline struct module *load_module(void __user *umod, /* Now we've got everything in the final locations, we can * find optional sections. */ - mod->kp = section_objs(hdr, sechdrs, secstrings, "__param", - sizeof(*mod->kp), &mod->num_kp); - mod->syms = section_objs(hdr, sechdrs, secstrings, "__ksymtab", - sizeof(*mod->syms), &mod->num_syms); - mod->crcs = section_addr(hdr, sechdrs, secstrings, "__kcrctab"); - mod->gpl_syms = section_objs(hdr, sechdrs, secstrings, "__ksymtab_gpl", - sizeof(*mod->gpl_syms), - &mod->num_gpl_syms); - mod->gpl_crcs = section_addr(hdr, sechdrs, secstrings, "__kcrctab_gpl"); - mod->gpl_future_syms = section_objs(hdr, sechdrs, secstrings, - "__ksymtab_gpl_future", - sizeof(*mod->gpl_future_syms), - &mod->num_gpl_future_syms); - mod->gpl_future_crcs = section_addr(hdr, sechdrs, secstrings, - "__kcrctab_gpl_future"); + find_module_sections(mod, hdr, sechdrs, secstrings); -#ifdef CONFIG_UNUSED_SYMBOLS - mod->unused_syms = section_objs(hdr, sechdrs, secstrings, - "__ksymtab_unused", - sizeof(*mod->unused_syms), - &mod->num_unused_syms); - mod->unused_crcs = section_addr(hdr, sechdrs, secstrings, - "__kcrctab_unused"); - mod->unused_gpl_syms = section_objs(hdr, sechdrs, secstrings, - "__ksymtab_unused_gpl", - sizeof(*mod->unused_gpl_syms), - &mod->num_unused_gpl_syms); - mod->unused_gpl_crcs = section_addr(hdr, sechdrs, secstrings, - "__kcrctab_unused_gpl"); -#endif -#ifdef CONFIG_CONSTRUCTORS - mod->ctors = section_objs(hdr, sechdrs, secstrings, ".ctors", - sizeof(*mod->ctors), &mod->num_ctors); -#endif - -#ifdef CONFIG_TRACEPOINTS - mod->tracepoints = section_objs(hdr, sechdrs, secstrings, - "__tracepoints", - sizeof(*mod->tracepoints), - &mod->num_tracepoints); -#endif -#ifdef CONFIG_EVENT_TRACING - mod->trace_events = section_objs(hdr, sechdrs, secstrings, - "_ftrace_events", - sizeof(*mod->trace_events), - &mod->num_trace_events); - /* - * This section contains pointers to allocated objects in the trace - * code and not scanning it leads to false positives. - */ - kmemleak_scan_area(mod->trace_events, sizeof(*mod->trace_events) * - mod->num_trace_events, GFP_KERNEL); -#endif -#ifdef CONFIG_FTRACE_MCOUNT_RECORD - /* sechdrs[0].sh_size is always zero */ - mod->ftrace_callsites = section_objs(hdr, sechdrs, secstrings, - "__mcount_loc", - sizeof(*mod->ftrace_callsites), - &mod->num_ftrace_callsites); -#endif #ifdef CONFIG_MODVERSIONS if ((mod->num_syms && !mod->crcs) || (mod->num_gpl_syms && !mod->gpl_crcs) -- cgit v1.2.3-70-g09d2 From 65b8a9b4d5525348e55cf63a43517610ee8e0970 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Thu, 5 Aug 2010 12:59:02 -0600 Subject: module: refactor load_module part 2 Here's a second one. It's slightly less trivial - since we now have error cases - and equally untested so it may well be totally broken. But it also cleans up a bit more, and avoids one of the goto targets, because the "move_module()" helper now does both allocations or none. Signed-off-by: Linus Torvalds Signed-off-by: Rusty Russell --- kernel/module.c | 129 +++++++++++++++++++++++++++++++------------------------- 1 file changed, 72 insertions(+), 57 deletions(-) (limited to 'kernel/module.c') diff --git a/kernel/module.c b/kernel/module.c index 12905ed4439..19ddcb0dba3 100644 --- a/kernel/module.c +++ b/kernel/module.c @@ -2082,7 +2082,8 @@ static void *module_alloc_update_bounds(unsigned long size) #ifdef CONFIG_DEBUG_KMEMLEAK static void kmemleak_load_module(struct module *mod, Elf_Ehdr *hdr, - Elf_Shdr *sechdrs, char *secstrings) + const Elf_Shdr *sechdrs, + const char *secstrings) { unsigned int i; @@ -2102,7 +2103,8 @@ static void kmemleak_load_module(struct module *mod, Elf_Ehdr *hdr, } #else static inline void kmemleak_load_module(struct module *mod, Elf_Ehdr *hdr, - Elf_Shdr *sechdrs, char *secstrings) + Elf_Shdr *sechdrs, + const char *secstrings) { } #endif @@ -2172,6 +2174,70 @@ static void find_module_sections(struct module *mod, Elf_Ehdr *hdr, #endif } +static struct module *move_module(struct module *mod, + Elf_Ehdr *hdr, Elf_Shdr *sechdrs, + const char *secstrings, unsigned modindex) +{ + int i; + void *ptr; + + /* Do the allocs. */ + ptr = module_alloc_update_bounds(mod->core_size); + /* + * The pointer to this block is stored in the module structure + * which is inside the block. Just mark it as not being a + * leak. + */ + kmemleak_not_leak(ptr); + if (!ptr) + return ERR_PTR(-ENOMEM); + + memset(ptr, 0, mod->core_size); + mod->module_core = ptr; + + ptr = module_alloc_update_bounds(mod->init_size); + /* + * The pointer to this block is stored in the module structure + * which is inside the block. This block doesn't need to be + * scanned as it contains data and code that will be freed + * after the module is initialized. + */ + kmemleak_ignore(ptr); + if (!ptr && mod->init_size) { + module_free(mod, mod->module_core); + return ERR_PTR(-ENOMEM); + } + memset(ptr, 0, mod->init_size); + mod->module_init = ptr; + + /* Transfer each section which specifies SHF_ALLOC */ + DEBUGP("final section addresses:\n"); + for (i = 0; i < hdr->e_shnum; i++) { + void *dest; + + if (!(sechdrs[i].sh_flags & SHF_ALLOC)) + continue; + + if (sechdrs[i].sh_entsize & INIT_OFFSET_MASK) + dest = mod->module_init + + (sechdrs[i].sh_entsize & ~INIT_OFFSET_MASK); + else + dest = mod->module_core + sechdrs[i].sh_entsize; + + if (sechdrs[i].sh_type != SHT_NOBITS) + memcpy(dest, (void *)sechdrs[i].sh_addr, + sechdrs[i].sh_size); + /* Update sh_addr to point to copy in image. */ + sechdrs[i].sh_addr = (unsigned long)dest; + DEBUGP("\t0x%lx %s\n", + sechdrs[i].sh_addr, secstrings + sechdrs[i].sh_name); + } + /* Module has been moved. */ + mod = (void *)sechdrs[modindex].sh_addr; + kmemleak_load_module(mod, hdr, sechdrs, secstrings); + return mod; +} + /* Allocate and load the module: note that size of section 0 is always zero, and we rely on this for optional sections. */ static noinline struct module *load_module(void __user *umod, @@ -2188,7 +2254,6 @@ static noinline struct module *load_module(void __user *umod, unsigned int modindex, versindex, infoindex, pcpuindex; struct module *mod; long err = 0; - void *ptr = NULL; /* Stops spurious gcc warning */ unsigned long symoffs, stroffs, *strmap; void __percpu *percpu; struct _ddebug *debug = NULL; @@ -2342,61 +2407,12 @@ static noinline struct module *load_module(void __user *umod, symoffs = layout_symtab(mod, sechdrs, symindex, strindex, hdr, secstrings, &stroffs, strmap); - /* Do the allocs. */ - ptr = module_alloc_update_bounds(mod->core_size); - /* - * The pointer to this block is stored in the module structure - * which is inside the block. Just mark it as not being a - * leak. - */ - kmemleak_not_leak(ptr); - if (!ptr) { - err = -ENOMEM; + /* Allocate and move to the final place */ + mod = move_module(mod, hdr, sechdrs, secstrings, modindex); + if (IS_ERR(mod)) { + err = PTR_ERR(mod); goto free_percpu; } - memset(ptr, 0, mod->core_size); - mod->module_core = ptr; - - ptr = module_alloc_update_bounds(mod->init_size); - /* - * The pointer to this block is stored in the module structure - * which is inside the block. This block doesn't need to be - * scanned as it contains data and code that will be freed - * after the module is initialized. - */ - kmemleak_ignore(ptr); - if (!ptr && mod->init_size) { - err = -ENOMEM; - goto free_core; - } - memset(ptr, 0, mod->init_size); - mod->module_init = ptr; - - /* Transfer each section which specifies SHF_ALLOC */ - DEBUGP("final section addresses:\n"); - for (i = 0; i < hdr->e_shnum; i++) { - void *dest; - - if (!(sechdrs[i].sh_flags & SHF_ALLOC)) - continue; - - if (sechdrs[i].sh_entsize & INIT_OFFSET_MASK) - dest = mod->module_init - + (sechdrs[i].sh_entsize & ~INIT_OFFSET_MASK); - else - dest = mod->module_core + sechdrs[i].sh_entsize; - - if (sechdrs[i].sh_type != SHT_NOBITS) - memcpy(dest, (void *)sechdrs[i].sh_addr, - sechdrs[i].sh_size); - /* Update sh_addr to point to copy in image. */ - sechdrs[i].sh_addr = (unsigned long)dest; - DEBUGP("\t0x%lx %s\n", - sechdrs[i].sh_addr, secstrings + sechdrs[i].sh_name); - } - /* Module has been moved. */ - mod = (void *)sechdrs[modindex].sh_addr; - kmemleak_load_module(mod, hdr, sechdrs, secstrings); #if defined(CONFIG_MODULE_UNLOAD) mod->refptr = alloc_percpu(struct module_ref); @@ -2580,7 +2596,6 @@ static noinline struct module *load_module(void __user *umod, free_init: #endif module_free(mod, mod->module_init); - free_core: module_free(mod, mod->module_core); /* mod will be freed with core. Don't access it beyond this line! */ free_percpu: -- cgit v1.2.3-70-g09d2 From 40dd2560ec2df21036db9ec8b971f92d98fd1969 Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Thu, 5 Aug 2010 12:59:03 -0600 Subject: module: refactor load_module part 3 Extract out the allocation and copying in from userspace, and the first set of modinfo checks. Signed-off-by: Rusty Russell --- kernel/module.c | 122 ++++++++++++++++++++++++++++++++++---------------------- 1 file changed, 75 insertions(+), 47 deletions(-) (limited to 'kernel/module.c') diff --git a/kernel/module.c b/kernel/module.c index 19ddcb0dba3..d8faf35cba8 100644 --- a/kernel/module.c +++ b/kernel/module.c @@ -1803,7 +1803,7 @@ static char *next_string(char *string, unsigned long *secsize) return string; } -static char *get_modinfo(Elf_Shdr *sechdrs, +static char *get_modinfo(const Elf_Shdr *sechdrs, unsigned int info, const char *tag) { @@ -2109,6 +2109,73 @@ static inline void kmemleak_load_module(struct module *mod, Elf_Ehdr *hdr, } #endif +static int copy_and_check(Elf_Ehdr **hdrp, + const void __user *umod, unsigned long len) +{ + int err; + Elf_Ehdr *hdr; + + if (len < sizeof(*hdr)) + return -ENOEXEC; + + /* Suck in entire file: we'll want most of it. */ + /* vmalloc barfs on "unusual" numbers. Check here */ + if (len > 64 * 1024 * 1024 || (hdr = *hdrp = vmalloc(len)) == NULL) + return -ENOMEM; + + if (copy_from_user(hdr, umod, len) != 0) { + err = -EFAULT; + goto free_hdr; + } + + /* Sanity checks against insmoding binaries or wrong arch, + weird elf version */ + if (memcmp(hdr->e_ident, ELFMAG, SELFMAG) != 0 + || hdr->e_type != ET_REL + || !elf_check_arch(hdr) + || hdr->e_shentsize != sizeof(Elf_Shdr)) { + err = -ENOEXEC; + goto free_hdr; + } + + if (len < hdr->e_shoff + hdr->e_shnum * sizeof(Elf_Shdr)) { + err = -ENOEXEC; + goto free_hdr; + } + return 0; + +free_hdr: + vfree(hdr); + return err; +} + +static int check_modinfo(struct module *mod, + const Elf_Shdr *sechdrs, + unsigned int infoindex, unsigned int versindex) +{ + const char *modmagic = get_modinfo(sechdrs, infoindex, "vermagic"); + int err; + + /* This is allowed: modprobe --force will invalidate it. */ + if (!modmagic) { + err = try_to_force_load(mod, "bad vermagic"); + if (err) + return err; + } else if (!same_magic(modmagic, vermagic, versindex)) { + printk(KERN_ERR "%s: version magic '%s' should be '%s'\n", + mod->name, modmagic, vermagic); + return -ENOEXEC; + } + + if (get_modinfo(sechdrs, infoindex, "staging")) { + add_taint_module(mod, TAINT_CRAP); + printk(KERN_WARNING "%s: module is from the staging directory," + " the quality is unknown, you have been warned.\n", + mod->name); + } + return 0; +} + static void find_module_sections(struct module *mod, Elf_Ehdr *hdr, Elf_Shdr *sechdrs, const char *secstrings) { @@ -2246,14 +2313,13 @@ static noinline struct module *load_module(void __user *umod, { Elf_Ehdr *hdr; Elf_Shdr *sechdrs; - char *secstrings, *args, *modmagic, *strtab = NULL; - char *staging; + char *secstrings, *args, *strtab = NULL; unsigned int i; unsigned int symindex = 0; unsigned int strindex = 0; unsigned int modindex, versindex, infoindex, pcpuindex; struct module *mod; - long err = 0; + long err; unsigned long symoffs, stroffs, *strmap; void __percpu *percpu; struct _ddebug *debug = NULL; @@ -2263,31 +2329,10 @@ static noinline struct module *load_module(void __user *umod, DEBUGP("load_module: umod=%p, len=%lu, uargs=%p\n", umod, len, uargs); - if (len < sizeof(*hdr)) - return ERR_PTR(-ENOEXEC); - - /* Suck in entire file: we'll want most of it. */ - /* vmalloc barfs on "unusual" numbers. Check here */ - if (len > 64 * 1024 * 1024 || (hdr = vmalloc(len)) == NULL) - return ERR_PTR(-ENOMEM); - - if (copy_from_user(hdr, umod, len) != 0) { - err = -EFAULT; - goto free_hdr; - } - - /* Sanity checks against insmoding binaries or wrong arch, - weird elf version */ - if (memcmp(hdr->e_ident, ELFMAG, SELFMAG) != 0 - || hdr->e_type != ET_REL - || !elf_check_arch(hdr) - || hdr->e_shentsize != sizeof(*sechdrs)) { - err = -ENOEXEC; - goto free_hdr; - } - if (len < hdr->e_shoff + hdr->e_shnum * sizeof(Elf_Shdr)) - goto truncated; + err = copy_and_check(&hdr, umod, len); + if (err) + return ERR_PTR(err); /* Convenience variables */ sechdrs = (void *)hdr + hdr->e_shoff; @@ -2347,26 +2392,9 @@ static noinline struct module *load_module(void __user *umod, goto free_hdr; } - modmagic = get_modinfo(sechdrs, infoindex, "vermagic"); - /* This is allowed: modprobe --force will invalidate it. */ - if (!modmagic) { - err = try_to_force_load(mod, "bad vermagic"); - if (err) - goto free_hdr; - } else if (!same_magic(modmagic, vermagic, versindex)) { - printk(KERN_ERR "%s: version magic '%s' should be '%s'\n", - mod->name, modmagic, vermagic); - err = -ENOEXEC; + err = check_modinfo(mod, sechdrs, infoindex, versindex); + if (err) goto free_hdr; - } - - staging = get_modinfo(sechdrs, infoindex, "staging"); - if (staging) { - add_taint_module(mod, TAINT_CRAP); - printk(KERN_WARNING "%s: module is from the staging directory," - " the quality is unknown, you have been warned.\n", - mod->name); - } /* Now copy in args */ args = strndup_user(uargs, ~0UL >> 1); -- cgit v1.2.3-70-g09d2 From 9f85a4bbb1cf56f65b3d065a5f88204a757f2325 Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Thu, 5 Aug 2010 12:59:04 -0600 Subject: module: refactor load_module part 4 Allocate references inside module_unload_init(), clean up inside module_unload_free(). This version fixed to do allocation before __this_cpu_write, thanks to bug reports from linux-next from Dave Young and Stephen Rothwell . Signed-off-by: Rusty Russell --- kernel/module.c | 32 +++++++++++++++----------------- 1 file changed, 15 insertions(+), 17 deletions(-) (limited to 'kernel/module.c') diff --git a/kernel/module.c b/kernel/module.c index d8faf35cba8..241bc41dd6b 100644 --- a/kernel/module.c +++ b/kernel/module.c @@ -524,8 +524,12 @@ static char last_unloaded_module[MODULE_NAME_LEN+1]; EXPORT_TRACEPOINT_SYMBOL(module_get); /* Init the unload section of the module. */ -static void module_unload_init(struct module *mod) +static int module_unload_init(struct module *mod) { + mod->refptr = alloc_percpu(struct module_ref); + if (!mod->refptr) + return -ENOMEM; + INIT_LIST_HEAD(&mod->source_list); INIT_LIST_HEAD(&mod->target_list); @@ -533,6 +537,8 @@ static void module_unload_init(struct module *mod) __this_cpu_write(mod->refptr->incs, 1); /* Backwards compatibility macros put refcount during init. */ mod->waiter = current; + + return 0; } /* Does a already use b? */ @@ -612,6 +618,8 @@ static void module_unload_free(struct module *mod) kfree(use); } mutex_unlock(&module_mutex); + + free_percpu(mod->refptr); } #ifdef CONFIG_MODULE_FORCE_UNLOAD @@ -885,8 +893,9 @@ int ref_module(struct module *a, struct module *b) } EXPORT_SYMBOL_GPL(ref_module); -static inline void module_unload_init(struct module *mod) +static inline int module_unload_init(struct module *mod) { + return 0; } #endif /* CONFIG_MODULE_UNLOAD */ @@ -1559,10 +1568,7 @@ static void free_module(struct module *mod) module_free(mod, mod->module_init); kfree(mod->args); percpu_modfree(mod); -#if defined(CONFIG_MODULE_UNLOAD) - if (mod->refptr) - free_percpu(mod->refptr); -#endif + /* Free lock-classes: */ lockdep_free_key_range(mod->module_core, mod->core_size); @@ -2442,15 +2448,10 @@ static noinline struct module *load_module(void __user *umod, goto free_percpu; } -#if defined(CONFIG_MODULE_UNLOAD) - mod->refptr = alloc_percpu(struct module_ref); - if (!mod->refptr) { - err = -ENOMEM; - goto free_init; - } -#endif /* Now we've moved module, initialize linked lists, etc. */ - module_unload_init(mod); + err = module_unload_init(mod); + if (err) + goto free_init; /* Set up license info based on the info section */ set_license(mod, get_modinfo(sechdrs, infoindex, "license")); @@ -2619,10 +2620,7 @@ static noinline struct module *load_module(void __user *umod, cleanup: free_modinfo(mod); module_unload_free(mod); -#if defined(CONFIG_MODULE_UNLOAD) - free_percpu(mod->refptr); free_init: -#endif module_free(mod, mod->module_init); module_free(mod, mod->module_core); /* mod will be freed with core. Don't access it beyond this line! */ -- cgit v1.2.3-70-g09d2 From 22e268ebecc549f1b1907f114cb44d6044bdee3c Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Thu, 5 Aug 2010 12:59:05 -0600 Subject: module: refactor load_module part 5 1) Extract out the relocation loop into apply_relocations 2) Extract license and version checks into check_module_license_and_versions 3) Extract icache flushing into flush_module_icache 4) Move __obsparm warning into find_module_sections 5) Move license setting into check_modinfo. Signed-off-by: Rusty Russell --- kernel/module.c | 182 +++++++++++++++++++++++++++++++++----------------------- 1 file changed, 106 insertions(+), 76 deletions(-) (limited to 'kernel/module.c') diff --git a/kernel/module.c b/kernel/module.c index 241bc41dd6b..b536db804ca 100644 --- a/kernel/module.c +++ b/kernel/module.c @@ -1698,6 +1698,39 @@ static int simplify_symbols(Elf_Shdr *sechdrs, return ret; } +static int apply_relocations(struct module *mod, + Elf_Ehdr *hdr, + Elf_Shdr *sechdrs, + unsigned int symindex, + unsigned int strindex) +{ + unsigned int i; + int err = 0; + + /* Now do relocations. */ + for (i = 1; i < hdr->e_shnum; i++) { + const char *strtab = (char *)sechdrs[strindex].sh_addr; + unsigned int info = sechdrs[i].sh_info; + + /* Not a valid relocation section? */ + if (info >= hdr->e_shnum) + continue; + + /* Don't bother with non-allocated sections */ + if (!(sechdrs[info].sh_flags & SHF_ALLOC)) + continue; + + if (sechdrs[i].sh_type == SHT_REL) + err = apply_relocate(sechdrs, strtab, symindex, i, mod); + else if (sechdrs[i].sh_type == SHT_RELA) + err = apply_relocate_add(sechdrs, strtab, symindex, i, + mod); + if (err < 0) + break; + } + return err; +} + /* Additional bytes needed by arch in front of individual sections */ unsigned int __weak arch_mod_section_prepend(struct module *mod, unsigned int section) @@ -2179,6 +2212,10 @@ static int check_modinfo(struct module *mod, " the quality is unknown, you have been warned.\n", mod->name); } + + /* Set up license info based on the info section */ + set_license(mod, get_modinfo(sechdrs, infoindex, "license")); + return 0; } @@ -2245,6 +2282,10 @@ static void find_module_sections(struct module *mod, Elf_Ehdr *hdr, sizeof(*mod->ftrace_callsites), &mod->num_ftrace_callsites); #endif + + if (section_addr(hdr, sechdrs, secstrings, "__obsparm")) + printk(KERN_WARNING "%s: Ignoring obsolete parameters\n", + mod->name); } static struct module *move_module(struct module *mod, @@ -2311,6 +2352,60 @@ static struct module *move_module(struct module *mod, return mod; } +static int check_module_license_and_versions(struct module *mod, + Elf_Shdr *sechdrs) +{ + /* + * ndiswrapper is under GPL by itself, but loads proprietary modules. + * Don't use add_taint_module(), as it would prevent ndiswrapper from + * using GPL-only symbols it needs. + */ + if (strcmp(mod->name, "ndiswrapper") == 0) + add_taint(TAINT_PROPRIETARY_MODULE); + + /* driverloader was caught wrongly pretending to be under GPL */ + if (strcmp(mod->name, "driverloader") == 0) + add_taint_module(mod, TAINT_PROPRIETARY_MODULE); + +#ifdef CONFIG_MODVERSIONS + if ((mod->num_syms && !mod->crcs) + || (mod->num_gpl_syms && !mod->gpl_crcs) + || (mod->num_gpl_future_syms && !mod->gpl_future_crcs) +#ifdef CONFIG_UNUSED_SYMBOLS + || (mod->num_unused_syms && !mod->unused_crcs) + || (mod->num_unused_gpl_syms && !mod->unused_gpl_crcs) +#endif + ) { + return try_to_force_load(mod, + "no versions for exported symbols"); + } +#endif + return 0; +} + +static void flush_module_icache(const struct module *mod) +{ + mm_segment_t old_fs; + + /* flush the icache in correct context */ + old_fs = get_fs(); + set_fs(KERNEL_DS); + + /* + * Flush the instruction cache, since we've played with text. + * Do it before processing of module parameters, so the module + * can provide parameter accessor functions of its own. + */ + if (mod->module_init) + flush_icache_range((unsigned long)mod->module_init, + (unsigned long)mod->module_init + + mod->init_size); + flush_icache_range((unsigned long)mod->module_core, + (unsigned long)mod->module_core + mod->core_size); + + set_fs(old_fs); +} + /* Allocate and load the module: note that size of section 0 is always zero, and we rely on this for optional sections. */ static noinline struct module *load_module(void __user *umod, @@ -2331,8 +2426,6 @@ static noinline struct module *load_module(void __user *umod, struct _ddebug *debug = NULL; unsigned int num_debug = 0; - mm_segment_t old_fs; - DEBUGP("load_module: umod=%p, len=%lu, uargs=%p\n", umod, len, uargs); @@ -2453,20 +2546,13 @@ static noinline struct module *load_module(void __user *umod, if (err) goto free_init; - /* Set up license info based on the info section */ - set_license(mod, get_modinfo(sechdrs, infoindex, "license")); - - /* - * ndiswrapper is under GPL by itself, but loads proprietary modules. - * Don't use add_taint_module(), as it would prevent ndiswrapper from - * using GPL-only symbols it needs. - */ - if (strcmp(mod->name, "ndiswrapper") == 0) - add_taint(TAINT_PROPRIETARY_MODULE); + /* Now we've got everything in the final locations, we can + * find optional sections. */ + find_module_sections(mod, hdr, sechdrs, secstrings); - /* driverloader was caught wrongly pretending to be under GPL */ - if (strcmp(mod->name, "driverloader") == 0) - add_taint_module(mod, TAINT_PROPRIETARY_MODULE); + err = check_module_license_and_versions(mod, sechdrs); + if (err) + goto free_unload; /* Set up MODINFO_ATTR fields */ setup_modinfo(mod, sechdrs, infoindex); @@ -2477,47 +2563,9 @@ static noinline struct module *load_module(void __user *umod, if (err < 0) goto cleanup; - /* Now we've got everything in the final locations, we can - * find optional sections. */ - find_module_sections(mod, hdr, sechdrs, secstrings); - -#ifdef CONFIG_MODVERSIONS - if ((mod->num_syms && !mod->crcs) - || (mod->num_gpl_syms && !mod->gpl_crcs) - || (mod->num_gpl_future_syms && !mod->gpl_future_crcs) -#ifdef CONFIG_UNUSED_SYMBOLS - || (mod->num_unused_syms && !mod->unused_crcs) - || (mod->num_unused_gpl_syms && !mod->unused_gpl_crcs) -#endif - ) { - err = try_to_force_load(mod, - "no versions for exported symbols"); - if (err) - goto cleanup; - } -#endif - - /* Now do relocations. */ - for (i = 1; i < hdr->e_shnum; i++) { - const char *strtab = (char *)sechdrs[strindex].sh_addr; - unsigned int info = sechdrs[i].sh_info; - - /* Not a valid relocation section? */ - if (info >= hdr->e_shnum) - continue; - - /* Don't bother with non-allocated sections */ - if (!(sechdrs[info].sh_flags & SHF_ALLOC)) - continue; - - if (sechdrs[i].sh_type == SHT_REL) - err = apply_relocate(sechdrs, strtab, symindex, i,mod); - else if (sechdrs[i].sh_type == SHT_RELA) - err = apply_relocate_add(sechdrs, strtab, symindex, i, - mod); - if (err < 0) - goto cleanup; - } + err = apply_relocations(mod, hdr, sechdrs, symindex, strindex); + if (err < 0) + goto cleanup; /* Set up and sort exception table */ mod->extable = section_objs(hdr, sechdrs, secstrings, "__ex_table", @@ -2541,28 +2589,9 @@ static noinline struct module *load_module(void __user *umod, if (err < 0) goto cleanup; - /* flush the icache in correct context */ - old_fs = get_fs(); - set_fs(KERNEL_DS); - - /* - * Flush the instruction cache, since we've played with text. - * Do it before processing of module parameters, so the module - * can provide parameter accessor functions of its own. - */ - if (mod->module_init) - flush_icache_range((unsigned long)mod->module_init, - (unsigned long)mod->module_init - + mod->init_size); - flush_icache_range((unsigned long)mod->module_core, - (unsigned long)mod->module_core + mod->core_size); - - set_fs(old_fs); + flush_module_icache(mod); mod->args = args; - if (section_addr(hdr, sechdrs, secstrings, "__obsparm")) - printk(KERN_WARNING "%s: Ignoring obsolete parameters\n", - mod->name); /* Now sew it into the lists so we can get lockdep and oops * info during argument parsing. Noone should access us, since @@ -2619,6 +2648,7 @@ static noinline struct module *load_module(void __user *umod, module_arch_cleanup(mod); cleanup: free_modinfo(mod); + free_unload: module_unload_free(mod); free_init: module_free(mod, mod->module_init); -- cgit v1.2.3-70-g09d2 From 44032e631691adf1f406843d5e54deb795973ff7 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Thu, 5 Aug 2010 12:59:05 -0600 Subject: module: reduce stack usage for each_symbol() And now that I'm looking at that call-chain (to see if it would make sense to use some other more specific lock - doesn't look like it: all the readers are using RCU and this is the only writer), I also give you this trivial one-liner. It changes each_symbol() to not put that constant array on the stack, resulting in changing movq $C.388.31095, %rsi #, tmp85 subq $376, %rsp #, movq %rdi, %rbx # fn, fn leaq -208(%rbp), %rdi #, tmp84 movq %rbx, %rdx # fn, rep movsl xorl %esi, %esi # leaq -208(%rbp), %rdi #, tmp87 movq %r12, %rcx # data, call each_symbol_in_section.clone.0 # into xorl %esi, %esi # subq $216, %rsp #, movq %rdi, %rbx # fn, fn movq $arr.31078, %rdi #, call each_symbol_in_section.clone.0 # which is not so much about being obviously shorter and simpler because we don't unnecessarily copy that constant array around onto the stack, but also about having a much smaller stack footprint (376 vs 216 bytes - see the update of 'rsp'). Signed-off-by: Linus Torvalds Signed-off-by: Rusty Russell --- kernel/module.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'kernel/module.c') diff --git a/kernel/module.c b/kernel/module.c index b536db804ca..79545bda358 100644 --- a/kernel/module.c +++ b/kernel/module.c @@ -227,7 +227,7 @@ bool each_symbol(bool (*fn)(const struct symsearch *arr, struct module *owner, unsigned int symnum, void *data), void *data) { struct module *mod; - const struct symsearch arr[] = { + static const struct symsearch arr[] = { { __start___ksymtab, __stop___ksymtab, __start___kcrctab, NOT_GPL_ONLY, false }, { __start___ksymtab_gpl, __stop___ksymtab_gpl, -- cgit v1.2.3-70-g09d2 From 3264d3f9dd532ed9c3eb9491619e3f485b72747f Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Wed, 2 Jun 2010 11:01:06 -0700 Subject: module: add load_info Btw, here's a patch that _looks_ large, but it really pretty trivial, and sets things up so that it would be way easier to split off pieces of the module loading. The reason it looks large is that it creates a "module_info" structure that contains all the module state that we're building up while loading, instead of having individual variables for all the indices etc. So the patch ends up being large, because every "symindex" access instead becomes "info.index.sym" etc. That may be a few characters longer, but it then means that we can just pass a pointer to that "info" structure around. and let all the pieces fill it in very naturally. As an example of that, the patch also moves the initialization of all those convenience variables into a "setup_module_info()" function. And at this point it really does become very natural to start to peel off some of the error labels and move them into the helper functions - now the "truncated" case is gone, and is handled inside that setup function instead. So maybe you don't like this approach, and it does make the variable accesses a bit longer, but I don't think unreadably so. And the patch really does look big and scary, but there really should be absolutely no semantic changes - most of it was a trivial and mindless rename. In fact, it was so mindless that I on purpose kept the existing helper functions looking like this: - err = check_modinfo(mod, sechdrs, infoindex, versindex); + err = check_modinfo(mod, info.sechdrs, info.index.info, info.index.vers); rather than changing them to just take the "info" pointer. IOW, a second phase (if you think the approach is ok) would change that calling convention to just do err = check_modinfo(mod, &info); (and same for "layout_sections()", "layout_symtabs()" etc.) Similarly, while right now it makes things _look_ bigger, with things like this: versindex = find_sec(hdr, sechdrs, secstrings, "__versions"); becoming info->index.vers = find_sec(info->hdr, info->sechdrs, info->secstrings, "__versions"); in the new "setup_module_info()" function, that's again just a result of it being a search-and-replace patch. By using the 'info' pointer, we could just change the 'find_sec()' interface so that it ends up being info->index.vers = find_sec(info, "__versions"); instead, and then we'd actually have a shorter and more readable line. So for a lot of those mindless variable name expansions there's would be room for separate cleanups. I didn't move quite everything in there - if we do this to layout_symtabs, for example, we'd want to move the percpu, symoffs, stroffs, *strmap variables to be fields in that module_info structure too. But that's a much smaller patch, I moved just the really core stuff that is currently being set up and used in various parts. But even in this rough form, it removes close to 70 lines from that function (but adds 22 lines overall, of course - the structure definition, the helper function declarations and call-sites etc etc). Signed-off-by: Linus Torvalds Signed-off-by: Rusty Russell --- kernel/module.c | 228 +++++++++++++++++++++++++++++++------------------------- 1 file changed, 125 insertions(+), 103 deletions(-) (limited to 'kernel/module.c') diff --git a/kernel/module.c b/kernel/module.c index 79545bda358..cb40a4e64a0 100644 --- a/kernel/module.c +++ b/kernel/module.c @@ -2148,8 +2148,17 @@ static inline void kmemleak_load_module(struct module *mod, Elf_Ehdr *hdr, } #endif -static int copy_and_check(Elf_Ehdr **hdrp, - const void __user *umod, unsigned long len) +struct load_info { + Elf_Ehdr *hdr; + unsigned long len; + Elf_Shdr *sechdrs; + char *secstrings, *args, *strtab; + struct { + unsigned int sym, str, mod, vers, info, pcpu; + } index; +}; + +static int copy_and_check(struct load_info *info, const void __user *umod, unsigned long len) { int err; Elf_Ehdr *hdr; @@ -2159,7 +2168,7 @@ static int copy_and_check(Elf_Ehdr **hdrp, /* Suck in entire file: we'll want most of it. */ /* vmalloc barfs on "unusual" numbers. Check here */ - if (len > 64 * 1024 * 1024 || (hdr = *hdrp = vmalloc(len)) == NULL) + if (len > 64 * 1024 * 1024 || (hdr = vmalloc(len)) == NULL) return -ENOMEM; if (copy_from_user(hdr, umod, len) != 0) { @@ -2181,6 +2190,8 @@ static int copy_and_check(Elf_Ehdr **hdrp, err = -ENOEXEC; goto free_hdr; } + info->hdr = hdr; + info->len = len; return 0; free_hdr: @@ -2188,6 +2199,80 @@ free_hdr: return err; } +/* + * Set up our basic convenience variables (pointers to section headers, + * search for module section index etc), and do some basic section + * verification. + * + * Return the temporary module pointer (we'll replace it with the final + * one when we move the module sections around). + */ +static struct module *setup_load_info(struct load_info *info) +{ + unsigned int i; + struct module *mod; + + /* Set up the convenience variables */ + info->sechdrs = (void *)info->hdr + info->hdr->e_shoff; + info->secstrings = (void *)info->hdr + info->sechdrs[info->hdr->e_shstrndx].sh_offset; + info->sechdrs[0].sh_addr = 0; + + for (i = 1; i < info->hdr->e_shnum; i++) { + if (info->sechdrs[i].sh_type != SHT_NOBITS + && info->len < info->sechdrs[i].sh_offset + info->sechdrs[i].sh_size) + goto truncated; + + /* Mark all sections sh_addr with their address in the + temporary image. */ + info->sechdrs[i].sh_addr = (size_t)info->hdr + info->sechdrs[i].sh_offset; + + /* Internal symbols and strings. */ + if (info->sechdrs[i].sh_type == SHT_SYMTAB) { + info->index.sym = i; + info->index.str = info->sechdrs[i].sh_link; + info->strtab = (char *)info->hdr + info->sechdrs[info->index.str].sh_offset; + } +#ifndef CONFIG_MODULE_UNLOAD + /* Don't load .exit sections */ + if (strstarts(info->secstrings+info->sechdrs[i].sh_name, ".exit")) + info->sechdrs[i].sh_flags &= ~(unsigned long)SHF_ALLOC; +#endif + } + + info->index.mod = find_sec(info->hdr, info->sechdrs, info->secstrings, + ".gnu.linkonce.this_module"); + if (!info->index.mod) { + printk(KERN_WARNING "No module found in object\n"); + return ERR_PTR(-ENOEXEC); + } + /* This is temporary: point mod into copy of data. */ + mod = (void *)info->sechdrs[info->index.mod].sh_addr; + + if (info->index.sym == 0) { + printk(KERN_WARNING "%s: module has no symbols (stripped?)\n", + mod->name); + return ERR_PTR(-ENOEXEC); + } + + info->index.vers = find_sec(info->hdr, info->sechdrs, info->secstrings, "__versions"); + info->index.info = find_sec(info->hdr, info->sechdrs, info->secstrings, ".modinfo"); + info->index.pcpu = find_pcpusec(info->hdr, info->sechdrs, info->secstrings); + + /* Don't keep modinfo and version sections. */ + info->sechdrs[info->index.info].sh_flags &= ~(unsigned long)SHF_ALLOC; + info->sechdrs[info->index.vers].sh_flags &= ~(unsigned long)SHF_ALLOC; + + /* Check module struct version now, before we try to use module. */ + if (!check_modstruct_version(info->sechdrs, info->index.vers, mod)) + return ERR_PTR(-ENOEXEC); + + return mod; + + truncated: + printk(KERN_ERR "Module len %lu truncated\n", info->len); + return ERR_PTR(-ENOEXEC); +} + static int check_modinfo(struct module *mod, const Elf_Shdr *sechdrs, unsigned int infoindex, unsigned int versindex) @@ -2412,13 +2497,7 @@ static noinline struct module *load_module(void __user *umod, unsigned long len, const char __user *uargs) { - Elf_Ehdr *hdr; - Elf_Shdr *sechdrs; - char *secstrings, *args, *strtab = NULL; - unsigned int i; - unsigned int symindex = 0; - unsigned int strindex = 0; - unsigned int modindex, versindex, infoindex, pcpuindex; + struct load_info info = { NULL, }; struct module *mod; long err; unsigned long symoffs, stroffs, *strmap; @@ -2429,80 +2508,28 @@ static noinline struct module *load_module(void __user *umod, DEBUGP("load_module: umod=%p, len=%lu, uargs=%p\n", umod, len, uargs); - err = copy_and_check(&hdr, umod, len); + err = copy_and_check(&info, umod, len); if (err) return ERR_PTR(err); - /* Convenience variables */ - sechdrs = (void *)hdr + hdr->e_shoff; - secstrings = (void *)hdr + sechdrs[hdr->e_shstrndx].sh_offset; - sechdrs[0].sh_addr = 0; - - for (i = 1; i < hdr->e_shnum; i++) { - if (sechdrs[i].sh_type != SHT_NOBITS - && len < sechdrs[i].sh_offset + sechdrs[i].sh_size) - goto truncated; - - /* Mark all sections sh_addr with their address in the - temporary image. */ - sechdrs[i].sh_addr = (size_t)hdr + sechdrs[i].sh_offset; - - /* Internal symbols and strings. */ - if (sechdrs[i].sh_type == SHT_SYMTAB) { - symindex = i; - strindex = sechdrs[i].sh_link; - strtab = (char *)hdr + sechdrs[strindex].sh_offset; - } -#ifndef CONFIG_MODULE_UNLOAD - /* Don't load .exit sections */ - if (strstarts(secstrings+sechdrs[i].sh_name, ".exit")) - sechdrs[i].sh_flags &= ~(unsigned long)SHF_ALLOC; -#endif - } - - modindex = find_sec(hdr, sechdrs, secstrings, - ".gnu.linkonce.this_module"); - if (!modindex) { - printk(KERN_WARNING "No module found in object\n"); - err = -ENOEXEC; - goto free_hdr; - } - /* This is temporary: point mod into copy of data. */ - mod = (void *)sechdrs[modindex].sh_addr; - - if (symindex == 0) { - printk(KERN_WARNING "%s: module has no symbols (stripped?)\n", - mod->name); - err = -ENOEXEC; - goto free_hdr; - } - - versindex = find_sec(hdr, sechdrs, secstrings, "__versions"); - infoindex = find_sec(hdr, sechdrs, secstrings, ".modinfo"); - pcpuindex = find_pcpusec(hdr, sechdrs, secstrings); - - /* Don't keep modinfo and version sections. */ - sechdrs[infoindex].sh_flags &= ~(unsigned long)SHF_ALLOC; - sechdrs[versindex].sh_flags &= ~(unsigned long)SHF_ALLOC; - - /* Check module struct version now, before we try to use module. */ - if (!check_modstruct_version(sechdrs, versindex, mod)) { - err = -ENOEXEC; + mod = setup_load_info(&info); + if (IS_ERR(mod)) { + err = PTR_ERR(mod); goto free_hdr; } - err = check_modinfo(mod, sechdrs, infoindex, versindex); + err = check_modinfo(mod, info.sechdrs, info.index.info, info.index.vers); if (err) goto free_hdr; /* Now copy in args */ - args = strndup_user(uargs, ~0UL >> 1); - if (IS_ERR(args)) { - err = PTR_ERR(args); + info.args = strndup_user(uargs, ~0UL >> 1); + if (IS_ERR(info.args)) { + err = PTR_ERR(info.args); goto free_hdr; } - strmap = kzalloc(BITS_TO_LONGS(sechdrs[strindex].sh_size) + strmap = kzalloc(BITS_TO_LONGS(info.sechdrs[info.index.str].sh_size) * sizeof(long), GFP_KERNEL); if (!strmap) { err = -ENOMEM; @@ -2512,17 +2539,17 @@ static noinline struct module *load_module(void __user *umod, mod->state = MODULE_STATE_COMING; /* Allow arches to frob section contents and sizes. */ - err = module_frob_arch_sections(hdr, sechdrs, secstrings, mod); + err = module_frob_arch_sections(info.hdr, info.sechdrs, info.secstrings, mod); if (err < 0) goto free_mod; - if (pcpuindex) { + if (info.index.pcpu) { /* We have a special allocation for this section. */ - err = percpu_modalloc(mod, sechdrs[pcpuindex].sh_size, - sechdrs[pcpuindex].sh_addralign); + err = percpu_modalloc(mod, info.sechdrs[info.index.pcpu].sh_size, + info.sechdrs[info.index.pcpu].sh_addralign); if (err) goto free_mod; - sechdrs[pcpuindex].sh_flags &= ~(unsigned long)SHF_ALLOC; + info.sechdrs[info.index.pcpu].sh_flags &= ~(unsigned long)SHF_ALLOC; } /* Keep this around for failure path. */ percpu = mod_percpu(mod); @@ -2530,12 +2557,12 @@ static noinline struct module *load_module(void __user *umod, /* Determine total sizes, and put offsets in sh_entsize. For now this is done generically; there doesn't appear to be any special cases for the architectures. */ - layout_sections(mod, hdr, sechdrs, secstrings); - symoffs = layout_symtab(mod, sechdrs, symindex, strindex, hdr, - secstrings, &stroffs, strmap); + layout_sections(mod, info.hdr, info.sechdrs, info.secstrings); + symoffs = layout_symtab(mod, info.sechdrs, info.index.sym, info.index.str, info.hdr, + info.secstrings, &stroffs, strmap); /* Allocate and move to the final place */ - mod = move_module(mod, hdr, sechdrs, secstrings, modindex); + mod = move_module(mod, info.hdr, info.sechdrs, info.secstrings, info.index.mod); if (IS_ERR(mod)) { err = PTR_ERR(mod); goto free_percpu; @@ -2548,50 +2575,50 @@ static noinline struct module *load_module(void __user *umod, /* Now we've got everything in the final locations, we can * find optional sections. */ - find_module_sections(mod, hdr, sechdrs, secstrings); + find_module_sections(mod, info.hdr, info.sechdrs, info.secstrings); - err = check_module_license_and_versions(mod, sechdrs); + err = check_module_license_and_versions(mod, info.sechdrs); if (err) goto free_unload; /* Set up MODINFO_ATTR fields */ - setup_modinfo(mod, sechdrs, infoindex); + setup_modinfo(mod, info.sechdrs, info.index.info); /* Fix up syms, so that st_value is a pointer to location. */ - err = simplify_symbols(sechdrs, symindex, strtab, versindex, pcpuindex, + err = simplify_symbols(info.sechdrs, info.index.sym, info.strtab, info.index.vers, info.index.pcpu, mod); if (err < 0) goto cleanup; - err = apply_relocations(mod, hdr, sechdrs, symindex, strindex); + err = apply_relocations(mod, info.hdr, info.sechdrs, info.index.sym, info.index.str); if (err < 0) goto cleanup; /* Set up and sort exception table */ - mod->extable = section_objs(hdr, sechdrs, secstrings, "__ex_table", + mod->extable = section_objs(info.hdr, info.sechdrs, info.secstrings, "__ex_table", sizeof(*mod->extable), &mod->num_exentries); sort_extable(mod->extable, mod->extable + mod->num_exentries); /* Finally, copy percpu area over. */ - percpu_modcopy(mod, (void *)sechdrs[pcpuindex].sh_addr, - sechdrs[pcpuindex].sh_size); + percpu_modcopy(mod, (void *)info.sechdrs[info.index.pcpu].sh_addr, + info.sechdrs[info.index.pcpu].sh_size); - add_kallsyms(mod, sechdrs, hdr->e_shnum, symindex, strindex, - symoffs, stroffs, secstrings, strmap); + add_kallsyms(mod, info.sechdrs, info.hdr->e_shnum, info.index.sym, info.index.str, + symoffs, stroffs, info.secstrings, strmap); kfree(strmap); strmap = NULL; if (!mod->taints) - debug = section_objs(hdr, sechdrs, secstrings, "__verbose", + debug = section_objs(info.hdr, info.sechdrs, info.secstrings, "__verbose", sizeof(*debug), &num_debug); - err = module_finalize(hdr, sechdrs, mod); + err = module_finalize(info.hdr, info.sechdrs, mod); if (err < 0) goto cleanup; flush_module_icache(mod); - mod->args = args; + mod->args = info.args; /* Now sew it into the lists so we can get lockdep and oops * info during argument parsing. Noone should access us, since @@ -2625,11 +2652,11 @@ static noinline struct module *load_module(void __user *umod, if (err < 0) goto unlink; - add_sect_attrs(mod, hdr->e_shnum, secstrings, sechdrs); - add_notes_attrs(mod, hdr->e_shnum, secstrings, sechdrs); + add_sect_attrs(mod, info.hdr->e_shnum, info.secstrings, info.sechdrs); + add_notes_attrs(mod, info.hdr->e_shnum, info.secstrings, info.sechdrs); /* Get rid of temporary copy */ - vfree(hdr); + vfree(info.hdr); trace_module_load(mod); @@ -2657,16 +2684,11 @@ static noinline struct module *load_module(void __user *umod, free_percpu: free_percpu(percpu); free_mod: - kfree(args); + kfree(info.args); kfree(strmap); free_hdr: - vfree(hdr); + vfree(info.hdr); return ERR_PTR(err); - - truncated: - printk(KERN_ERR "Module len %lu truncated\n", len); - err = -ENOEXEC; - goto free_hdr; } /* Call module constructors. */ -- cgit v1.2.3-70-g09d2 From 8b5f61a795fe37be090b0fd18b6b7271db9298e0 Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Thu, 5 Aug 2010 12:59:06 -0600 Subject: module: refactor out section header rewriting Put all the "rewrite and check section headers" in one place. This adds another iteration over the sections, but it's far clearer. We iterate once for every find_section() so we already iterate over many times. Signed-off-by: Rusty Russell --- kernel/module.c | 70 ++++++++++++++++++++++++++++++++++++--------------------- 1 file changed, 45 insertions(+), 25 deletions(-) (limited to 'kernel/module.c') diff --git a/kernel/module.c b/kernel/module.c index cb40a4e64a0..91f3ebe230e 100644 --- a/kernel/module.c +++ b/kernel/module.c @@ -2158,6 +2158,7 @@ struct load_info { } index; }; +/* Sets info->hdr and info->len. */ static int copy_and_check(struct load_info *info, const void __user *umod, unsigned long len) { int err; @@ -2199,6 +2200,39 @@ free_hdr: return err; } +static int rewrite_section_headers(struct load_info *info) +{ + unsigned int i; + + /* This should always be true, but let's be sure. */ + info->sechdrs[0].sh_addr = 0; + + for (i = 1; i < info->hdr->e_shnum; i++) { + Elf_Shdr *shdr = &info->sechdrs[i]; + if (shdr->sh_type != SHT_NOBITS + && info->len < shdr->sh_offset + shdr->sh_size) { + printk(KERN_ERR "Module len %lu truncated\n", + info->len); + return -ENOEXEC; + } + + /* Mark all sections sh_addr with their address in the + temporary image. */ + shdr->sh_addr = (size_t)info->hdr + shdr->sh_offset; + +#ifndef CONFIG_MODULE_UNLOAD + /* Don't load .exit sections */ + if (strstarts(info->secstrings+shdr->sh_name, ".exit")) + shdr->sh_flags &= ~(unsigned long)SHF_ALLOC; +#endif + /* Don't keep modinfo and version sections. */ + if (!strcmp(info->secstrings+shdr->sh_name, "__versions") + || !strcmp(info->secstrings+shdr->sh_name, ".modinfo")) + shdr->sh_flags &= ~(unsigned long)SHF_ALLOC; + } + return 0; +} + /* * Set up our basic convenience variables (pointers to section headers, * search for module section index etc), and do some basic section @@ -2210,33 +2244,27 @@ free_hdr: static struct module *setup_load_info(struct load_info *info) { unsigned int i; + int err; struct module *mod; /* Set up the convenience variables */ info->sechdrs = (void *)info->hdr + info->hdr->e_shoff; - info->secstrings = (void *)info->hdr + info->sechdrs[info->hdr->e_shstrndx].sh_offset; - info->sechdrs[0].sh_addr = 0; - - for (i = 1; i < info->hdr->e_shnum; i++) { - if (info->sechdrs[i].sh_type != SHT_NOBITS - && info->len < info->sechdrs[i].sh_offset + info->sechdrs[i].sh_size) - goto truncated; + info->secstrings = (void *)info->hdr + + info->sechdrs[info->hdr->e_shstrndx].sh_offset; - /* Mark all sections sh_addr with their address in the - temporary image. */ - info->sechdrs[i].sh_addr = (size_t)info->hdr + info->sechdrs[i].sh_offset; + err = rewrite_section_headers(info); + if (err) + return ERR_PTR(err); - /* Internal symbols and strings. */ + /* Find internal symbols and strings. */ + for (i = 1; i < info->hdr->e_shnum; i++) { if (info->sechdrs[i].sh_type == SHT_SYMTAB) { info->index.sym = i; info->index.str = info->sechdrs[i].sh_link; - info->strtab = (char *)info->hdr + info->sechdrs[info->index.str].sh_offset; + info->strtab = (char *)info->hdr + + info->sechdrs[info->index.str].sh_offset; + break; } -#ifndef CONFIG_MODULE_UNLOAD - /* Don't load .exit sections */ - if (strstarts(info->secstrings+info->sechdrs[i].sh_name, ".exit")) - info->sechdrs[i].sh_flags &= ~(unsigned long)SHF_ALLOC; -#endif } info->index.mod = find_sec(info->hdr, info->sechdrs, info->secstrings, @@ -2258,19 +2286,11 @@ static struct module *setup_load_info(struct load_info *info) info->index.info = find_sec(info->hdr, info->sechdrs, info->secstrings, ".modinfo"); info->index.pcpu = find_pcpusec(info->hdr, info->sechdrs, info->secstrings); - /* Don't keep modinfo and version sections. */ - info->sechdrs[info->index.info].sh_flags &= ~(unsigned long)SHF_ALLOC; - info->sechdrs[info->index.vers].sh_flags &= ~(unsigned long)SHF_ALLOC; - /* Check module struct version now, before we try to use module. */ if (!check_modstruct_version(info->sechdrs, info->index.vers, mod)) return ERR_PTR(-ENOEXEC); return mod; - - truncated: - printk(KERN_ERR "Module len %lu truncated\n", info->len); - return ERR_PTR(-ENOEXEC); } static int check_modinfo(struct module *mod, -- cgit v1.2.3-70-g09d2 From d6df72a06e067139d491da4a9d14d92ad39e7a50 Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Thu, 5 Aug 2010 12:59:07 -0600 Subject: module: refactor out section header rewriting: FIX modversions We can't do the find_sec after removing the SHF_ALLOC flags; it won't find the sections. Signed-off-by: Rusty Russell --- kernel/module.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) (limited to 'kernel/module.c') diff --git a/kernel/module.c b/kernel/module.c index 91f3ebe230e..ab07f67de99 100644 --- a/kernel/module.c +++ b/kernel/module.c @@ -2225,11 +2225,13 @@ static int rewrite_section_headers(struct load_info *info) if (strstarts(info->secstrings+shdr->sh_name, ".exit")) shdr->sh_flags &= ~(unsigned long)SHF_ALLOC; #endif - /* Don't keep modinfo and version sections. */ - if (!strcmp(info->secstrings+shdr->sh_name, "__versions") - || !strcmp(info->secstrings+shdr->sh_name, ".modinfo")) - shdr->sh_flags &= ~(unsigned long)SHF_ALLOC; } + + /* Track but don't keep modinfo and version sections. */ + info->index.vers = find_sec(info->hdr, info->sechdrs, info->secstrings, "__versions"); + info->index.info = find_sec(info->hdr, info->sechdrs, info->secstrings, ".modinfo"); + info->sechdrs[info->index.info].sh_flags &= ~(unsigned long)SHF_ALLOC; + info->sechdrs[info->index.vers].sh_flags &= ~(unsigned long)SHF_ALLOC; return 0; } @@ -2282,8 +2284,6 @@ static struct module *setup_load_info(struct load_info *info) return ERR_PTR(-ENOEXEC); } - info->index.vers = find_sec(info->hdr, info->sechdrs, info->secstrings, "__versions"); - info->index.info = find_sec(info->hdr, info->sechdrs, info->secstrings, ".modinfo"); info->index.pcpu = find_pcpusec(info->hdr, info->sechdrs, info->secstrings); /* Check module struct version now, before we try to use module. */ -- cgit v1.2.3-70-g09d2 From eded41c1c6466081e0eb00d38719c6e6ee81a5d4 Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Thu, 5 Aug 2010 12:59:07 -0600 Subject: module: kallsyms functions take struct load_info Simple refactor causes us to lift struct definition to top of file. Signed-off-by: Rusty Russell --- kernel/module.c | 68 ++++++++++++++++++++++++--------------------------------- 1 file changed, 29 insertions(+), 39 deletions(-) (limited to 'kernel/module.c') diff --git a/kernel/module.c b/kernel/module.c index ab07f67de99..79c4d6f69dd 100644 --- a/kernel/module.c +++ b/kernel/module.c @@ -110,6 +110,16 @@ int unregister_module_notifier(struct notifier_block * nb) } EXPORT_SYMBOL(unregister_module_notifier); +struct load_info { + Elf_Ehdr *hdr; + unsigned long len; + Elf_Shdr *sechdrs; + char *secstrings, *args, *strtab; + struct { + unsigned int sym, str, mod, vers, info, pcpu; + } index; +}; + /* We require a truly strong try_module_get(): 0 means failure due to ongoing or failed initialization etc. */ static inline int strong_try_module_get(struct module *mod) @@ -1909,11 +1919,10 @@ static int is_exported(const char *name, unsigned long value, } /* As per nm */ -static char elf_type(const Elf_Sym *sym, - Elf_Shdr *sechdrs, - const char *secstrings, - struct module *mod) +static char elf_type(const Elf_Sym *sym, const struct load_info *info) { + const Elf_Shdr *sechdrs = info->sechdrs; + if (ELF_ST_BIND(sym->st_info) == STB_WEAK) { if (ELF_ST_TYPE(sym->st_info) == STT_OBJECT) return 'v'; @@ -1943,8 +1952,10 @@ static char elf_type(const Elf_Sym *sym, else return 'b'; } - if (strstarts(secstrings + sechdrs[sym->st_shndx].sh_name, ".debug")) + if (strstarts(info->secstrings + sechdrs[sym->st_shndx].sh_name, + ".debug")) { return 'n'; + } return '?'; } @@ -2021,35 +2032,30 @@ static unsigned long layout_symtab(struct module *mod, return symoffs; } -static void add_kallsyms(struct module *mod, - Elf_Shdr *sechdrs, - unsigned int shnum, - unsigned int symindex, - unsigned int strindex, +static void add_kallsyms(struct module *mod, struct load_info *info, unsigned long symoffs, unsigned long stroffs, - const char *secstrings, unsigned long *strmap) { unsigned int i, ndst; const Elf_Sym *src; Elf_Sym *dst; char *s; + Elf_Shdr *symsec = &info->sechdrs[info->index.sym]; - mod->symtab = (void *)sechdrs[symindex].sh_addr; - mod->num_symtab = sechdrs[symindex].sh_size / sizeof(Elf_Sym); - mod->strtab = (void *)sechdrs[strindex].sh_addr; + mod->symtab = (void *)symsec->sh_addr; + mod->num_symtab = symsec->sh_size / sizeof(Elf_Sym); + mod->strtab = info->strtab; /* Set types up while we still have access to sections. */ for (i = 0; i < mod->num_symtab; i++) - mod->symtab[i].st_info - = elf_type(&mod->symtab[i], sechdrs, secstrings, mod); + mod->symtab[i].st_info = elf_type(&mod->symtab[i], info); mod->core_symtab = dst = mod->module_core + symoffs; src = mod->symtab; *dst = *src; for (ndst = i = 1; i < mod->num_symtab; ++i, ++src) { - if (!is_core_symbol(src, sechdrs, shnum)) + if (!is_core_symbol(src, info->sechdrs, info->hdr->e_shnum)) continue; dst[ndst] = *src; dst[ndst].st_name = bitmap_weight(strmap, dst[ndst].st_name); @@ -2058,7 +2064,7 @@ static void add_kallsyms(struct module *mod, mod->core_num_syms = ndst; mod->core_strtab = s = mod->module_core + stroffs; - for (*s = 0, i = 1; i < sechdrs[strindex].sh_size; ++i) + for (*s = 0, i = 1; i < info->sechdrs[info->index.str].sh_size; ++i) if (test_bit(i, strmap)) *++s = mod->strtab[i]; } @@ -2075,15 +2081,10 @@ static inline unsigned long layout_symtab(struct module *mod, return 0; } -static inline void add_kallsyms(struct module *mod, - Elf_Shdr *sechdrs, - unsigned int shnum, - unsigned int symindex, - unsigned int strindex, - unsigned long symoffs, - unsigned long stroffs, - const char *secstrings, - const unsigned long *strmap) +static void add_kallsyms(struct module *mod, struct load_info *info, + unsigned long symoffs, + unsigned long stroffs, + unsigned long *strmap) { } #endif /* CONFIG_KALLSYMS */ @@ -2148,16 +2149,6 @@ static inline void kmemleak_load_module(struct module *mod, Elf_Ehdr *hdr, } #endif -struct load_info { - Elf_Ehdr *hdr; - unsigned long len; - Elf_Shdr *sechdrs; - char *secstrings, *args, *strtab; - struct { - unsigned int sym, str, mod, vers, info, pcpu; - } index; -}; - /* Sets info->hdr and info->len. */ static int copy_and_check(struct load_info *info, const void __user *umod, unsigned long len) { @@ -2623,8 +2614,7 @@ static noinline struct module *load_module(void __user *umod, percpu_modcopy(mod, (void *)info.sechdrs[info.index.pcpu].sh_addr, info.sechdrs[info.index.pcpu].sh_size); - add_kallsyms(mod, info.sechdrs, info.hdr->e_shnum, info.index.sym, info.index.str, - symoffs, stroffs, info.secstrings, strmap); + add_kallsyms(mod, &info, symoffs, stroffs, strmap); kfree(strmap); strmap = NULL; -- cgit v1.2.3-70-g09d2 From 511ca6ae43fbe0a7c9e0b50ad275f7ef24ef3b58 Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Thu, 5 Aug 2010 12:59:08 -0600 Subject: module: fix crash in get_ksymbol() when oopsing in module init Andrew had the sole pleasure of tickling this bug in linux-next; when we set up "info->strtab" it's pointing into the temporary copy of the module. For most uses that is fine, but kallsyms keeps a pointer around during module load (inside mod->strtab). If we oops for some reason inside a module's init function, kallsyms will use the mod->strtab pointer into the now-freed temporary module copy. (Later oopses work fine: after init we overwrite mod->strtab to point to a compacted core-only strtab). Reported-by: Andrew "Grumpy" Morton Signed-off-by: Rusty "Buggy" Russell Tested-by: Andrew "Happy" Morton Signed-off-by: Rusty Russell --- kernel/module.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'kernel/module.c') diff --git a/kernel/module.c b/kernel/module.c index 79c4d6f69dd..60cdd0459ea 100644 --- a/kernel/module.c +++ b/kernel/module.c @@ -2045,7 +2045,8 @@ static void add_kallsyms(struct module *mod, struct load_info *info, mod->symtab = (void *)symsec->sh_addr; mod->num_symtab = symsec->sh_size / sizeof(Elf_Sym); - mod->strtab = info->strtab; + /* Make sure we get permanent strtab: don't use info->strtab. */ + mod->strtab = (void *)info->sechdrs[info->index.str].sh_addr; /* Set types up while we still have access to sections. */ for (i = 0; i < mod->num_symtab; i++) -- cgit v1.2.3-70-g09d2 From d913188c75191114051cf0bac75dad444c6080fa Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Thu, 5 Aug 2010 12:59:08 -0600 Subject: module: layout_and_allocate layout_and_allocate() does everything up to and including the final struct module placement inside the allocated module memory. We have to store the symbol layout information in our struct load_info though. This avoids the nasty code we had before where 'mod' pointed first to the version inside the temporary allocation containing the entire file, then later was moved to point to the real struct module: now the main code only ever sees the final module address. (Includes fix for the Tony Luck-found Linus-diagnosed failure path error). Signed-off-by: Rusty Russell --- kernel/module.c | 224 +++++++++++++++++++++++++++++++------------------------- 1 file changed, 125 insertions(+), 99 deletions(-) (limited to 'kernel/module.c') diff --git a/kernel/module.c b/kernel/module.c index 60cdd0459ea..fb11e2a8823 100644 --- a/kernel/module.c +++ b/kernel/module.c @@ -115,6 +115,8 @@ struct load_info { unsigned long len; Elf_Shdr *sechdrs; char *secstrings, *args, *strtab; + unsigned long *strmap; + unsigned long symoffs, stroffs; struct { unsigned int sym, str, mod, vers, info, pcpu; } index; @@ -402,7 +404,8 @@ static int percpu_modalloc(struct module *mod, mod->percpu = __alloc_reserved_percpu(size, align); if (!mod->percpu) { printk(KERN_WARNING - "Could not allocate %lu bytes percpu data\n", size); + "%s: Could not allocate %lu bytes percpu data\n", + mod->name, size); return -ENOMEM; } mod->percpu_size = size; @@ -2032,10 +2035,7 @@ static unsigned long layout_symtab(struct module *mod, return symoffs; } -static void add_kallsyms(struct module *mod, struct load_info *info, - unsigned long symoffs, - unsigned long stroffs, - unsigned long *strmap) +static void add_kallsyms(struct module *mod, struct load_info *info) { unsigned int i, ndst; const Elf_Sym *src; @@ -2052,21 +2052,22 @@ static void add_kallsyms(struct module *mod, struct load_info *info, for (i = 0; i < mod->num_symtab; i++) mod->symtab[i].st_info = elf_type(&mod->symtab[i], info); - mod->core_symtab = dst = mod->module_core + symoffs; + mod->core_symtab = dst = mod->module_core + info->symoffs; src = mod->symtab; *dst = *src; for (ndst = i = 1; i < mod->num_symtab; ++i, ++src) { if (!is_core_symbol(src, info->sechdrs, info->hdr->e_shnum)) continue; dst[ndst] = *src; - dst[ndst].st_name = bitmap_weight(strmap, dst[ndst].st_name); + dst[ndst].st_name = bitmap_weight(info->strmap, + dst[ndst].st_name); ++ndst; } mod->core_num_syms = ndst; - mod->core_strtab = s = mod->module_core + stroffs; + mod->core_strtab = s = mod->module_core + info->stroffs; for (*s = 0, i = 1; i < info->sechdrs[info->index.str].sh_size; ++i) - if (test_bit(i, strmap)) + if (test_bit(i, info->strmap)) *++s = mod->strtab[i]; } #else @@ -2082,10 +2083,7 @@ static inline unsigned long layout_symtab(struct module *mod, return 0; } -static void add_kallsyms(struct module *mod, struct load_info *info, - unsigned long symoffs, - unsigned long stroffs, - unsigned long *strmap) +static void add_kallsyms(struct module *mod, struct load_info *info) { } #endif /* CONFIG_KALLSYMS */ @@ -2150,8 +2148,10 @@ static inline void kmemleak_load_module(struct module *mod, Elf_Ehdr *hdr, } #endif -/* Sets info->hdr and info->len. */ -static int copy_and_check(struct load_info *info, const void __user *umod, unsigned long len) +/* Sets info->hdr, info->len and info->args. */ +static int copy_and_check(struct load_info *info, + const void __user *umod, unsigned long len, + const char __user *uargs) { int err; Elf_Ehdr *hdr; @@ -2183,6 +2183,14 @@ static int copy_and_check(struct load_info *info, const void __user *umod, unsig err = -ENOEXEC; goto free_hdr; } + + /* Now copy in args */ + info->args = strndup_user(uargs, ~0UL >> 1); + if (IS_ERR(info->args)) { + err = PTR_ERR(info->args); + goto free_hdr; + } + info->hdr = hdr; info->len = len; return 0; @@ -2192,6 +2200,12 @@ free_hdr: return err; } +static void free_copy(struct load_info *info) +{ + kfree(info->args); + vfree(info->hdr); +} + static int rewrite_section_headers(struct load_info *info) { unsigned int i; @@ -2385,9 +2399,9 @@ static void find_module_sections(struct module *mod, Elf_Ehdr *hdr, mod->name); } -static struct module *move_module(struct module *mod, - Elf_Ehdr *hdr, Elf_Shdr *sechdrs, - const char *secstrings, unsigned modindex) +static int move_module(struct module *mod, + Elf_Ehdr *hdr, Elf_Shdr *sechdrs, + const char *secstrings, unsigned modindex) { int i; void *ptr; @@ -2401,7 +2415,7 @@ static struct module *move_module(struct module *mod, */ kmemleak_not_leak(ptr); if (!ptr) - return ERR_PTR(-ENOMEM); + return -ENOMEM; memset(ptr, 0, mod->core_size); mod->module_core = ptr; @@ -2416,7 +2430,7 @@ static struct module *move_module(struct module *mod, kmemleak_ignore(ptr); if (!ptr && mod->init_size) { module_free(mod, mod->module_core); - return ERR_PTR(-ENOMEM); + return -ENOMEM; } memset(ptr, 0, mod->init_size); mod->module_init = ptr; @@ -2443,10 +2457,8 @@ static struct module *move_module(struct module *mod, DEBUGP("\t0x%lx %s\n", sechdrs[i].sh_addr, secstrings + sechdrs[i].sh_name); } - /* Module has been moved. */ - mod = (void *)sechdrs[modindex].sh_addr; - kmemleak_load_module(mod, hdr, sechdrs, secstrings); - return mod; + + return 0; } static int check_module_license_and_versions(struct module *mod, @@ -2503,87 +2515,107 @@ static void flush_module_icache(const struct module *mod) set_fs(old_fs); } -/* Allocate and load the module: note that size of section 0 is always - zero, and we rely on this for optional sections. */ -static noinline struct module *load_module(void __user *umod, - unsigned long len, - const char __user *uargs) +static struct module *layout_and_allocate(struct load_info *info) { - struct load_info info = { NULL, }; + /* Module within temporary copy. */ struct module *mod; - long err; - unsigned long symoffs, stroffs, *strmap; - void __percpu *percpu; - struct _ddebug *debug = NULL; - unsigned int num_debug = 0; + int err; - DEBUGP("load_module: umod=%p, len=%lu, uargs=%p\n", - umod, len, uargs); + mod = setup_load_info(info); + if (IS_ERR(mod)) + return mod; - err = copy_and_check(&info, umod, len); + err = check_modinfo(mod, info->sechdrs, info->index.info, info->index.vers); if (err) return ERR_PTR(err); - mod = setup_load_info(&info); - if (IS_ERR(mod)) { - err = PTR_ERR(mod); - goto free_hdr; - } - - err = check_modinfo(mod, info.sechdrs, info.index.info, info.index.vers); - if (err) - goto free_hdr; - - /* Now copy in args */ - info.args = strndup_user(uargs, ~0UL >> 1); - if (IS_ERR(info.args)) { - err = PTR_ERR(info.args); - goto free_hdr; - } - - strmap = kzalloc(BITS_TO_LONGS(info.sechdrs[info.index.str].sh_size) - * sizeof(long), GFP_KERNEL); - if (!strmap) { - err = -ENOMEM; - goto free_mod; - } - - mod->state = MODULE_STATE_COMING; - /* Allow arches to frob section contents and sizes. */ - err = module_frob_arch_sections(info.hdr, info.sechdrs, info.secstrings, mod); + err = module_frob_arch_sections(info->hdr, info->sechdrs, info->secstrings, mod); if (err < 0) - goto free_mod; + goto free_args; - if (info.index.pcpu) { + if (info->index.pcpu) { /* We have a special allocation for this section. */ - err = percpu_modalloc(mod, info.sechdrs[info.index.pcpu].sh_size, - info.sechdrs[info.index.pcpu].sh_addralign); + err = percpu_modalloc(mod, info->sechdrs[info->index.pcpu].sh_size, + info->sechdrs[info->index.pcpu].sh_addralign); if (err) - goto free_mod; - info.sechdrs[info.index.pcpu].sh_flags &= ~(unsigned long)SHF_ALLOC; + goto free_args; + info->sechdrs[info->index.pcpu].sh_flags &= ~(unsigned long)SHF_ALLOC; } - /* Keep this around for failure path. */ - percpu = mod_percpu(mod); /* Determine total sizes, and put offsets in sh_entsize. For now this is done generically; there doesn't appear to be any special cases for the architectures. */ - layout_sections(mod, info.hdr, info.sechdrs, info.secstrings); - symoffs = layout_symtab(mod, info.sechdrs, info.index.sym, info.index.str, info.hdr, - info.secstrings, &stroffs, strmap); + layout_sections(mod, info->hdr, info->sechdrs, info->secstrings); + + info->strmap = kzalloc(BITS_TO_LONGS(info->sechdrs[info->index.str].sh_size) + * sizeof(long), GFP_KERNEL); + if (!info->strmap) { + err = -ENOMEM; + goto free_percpu; + } + info->symoffs = layout_symtab(mod, info->sechdrs, info->index.sym, info->index.str, info->hdr, + info->secstrings, &info->stroffs, info->strmap); /* Allocate and move to the final place */ - mod = move_module(mod, info.hdr, info.sechdrs, info.secstrings, info.index.mod); + err = move_module(mod, info->hdr, info->sechdrs, info->secstrings, info->index.mod); + if (err) + goto free_strmap; + + /* Module has been copied to its final place now: return it. */ + mod = (void *)info->sechdrs[info->index.mod].sh_addr; + kmemleak_load_module(mod, info->hdr, info->sechdrs, info->secstrings); + return mod; + +free_strmap: + kfree(info->strmap); +free_percpu: + percpu_modfree(mod); +free_args: + kfree(info->args); + return ERR_PTR(err); +} + +/* mod is no longer valid after this! */ +static void module_deallocate(struct module *mod, struct load_info *info) +{ + kfree(info->strmap); + percpu_modfree(mod); + module_free(mod, mod->module_init); + module_free(mod, mod->module_core); +} + +/* Allocate and load the module: note that size of section 0 is always + zero, and we rely on this for optional sections. */ +static noinline struct module *load_module(void __user *umod, + unsigned long len, + const char __user *uargs) +{ + struct load_info info = { NULL, }; + struct module *mod; + long err; + struct _ddebug *debug = NULL; + unsigned int num_debug = 0; + + DEBUGP("load_module: umod=%p, len=%lu, uargs=%p\n", + umod, len, uargs); + + /* Copy in the blobs from userspace, check they are vaguely sane. */ + err = copy_and_check(&info, umod, len, uargs); + if (err) + return ERR_PTR(err); + + /* Figure out module layout, and allocate all the memory. */ + mod = layout_and_allocate(&info); if (IS_ERR(mod)) { err = PTR_ERR(mod); - goto free_percpu; + goto free_copy; } /* Now we've moved module, initialize linked lists, etc. */ err = module_unload_init(mod); if (err) - goto free_init; + goto free_module; /* Now we've got everything in the final locations, we can * find optional sections. */ @@ -2600,11 +2632,11 @@ static noinline struct module *load_module(void __user *umod, err = simplify_symbols(info.sechdrs, info.index.sym, info.strtab, info.index.vers, info.index.pcpu, mod); if (err < 0) - goto cleanup; + goto free_modinfo; err = apply_relocations(mod, info.hdr, info.sechdrs, info.index.sym, info.index.str); if (err < 0) - goto cleanup; + goto free_modinfo; /* Set up and sort exception table */ mod->extable = section_objs(info.hdr, info.sechdrs, info.secstrings, "__ex_table", @@ -2615,9 +2647,7 @@ static noinline struct module *load_module(void __user *umod, percpu_modcopy(mod, (void *)info.sechdrs[info.index.pcpu].sh_addr, info.sechdrs[info.index.pcpu].sh_size); - add_kallsyms(mod, &info, symoffs, stroffs, strmap); - kfree(strmap); - strmap = NULL; + add_kallsyms(mod, &info); if (!mod->taints) debug = section_objs(info.hdr, info.sechdrs, info.secstrings, "__verbose", @@ -2625,12 +2655,14 @@ static noinline struct module *load_module(void __user *umod, err = module_finalize(info.hdr, info.sechdrs, mod); if (err < 0) - goto cleanup; + goto free_modinfo; flush_module_icache(mod); mod->args = info.args; + mod->state = MODULE_STATE_COMING; + /* Now sew it into the lists so we can get lockdep and oops * info during argument parsing. Noone should access us, since * strong_try_module_get() will fail. @@ -2666,8 +2698,9 @@ static noinline struct module *load_module(void __user *umod, add_sect_attrs(mod, info.hdr->e_shnum, info.secstrings, info.sechdrs); add_notes_attrs(mod, info.hdr->e_shnum, info.secstrings, info.sechdrs); - /* Get rid of temporary copy */ - vfree(info.hdr); + /* Get rid of temporary copy and strmap. */ + kfree(info.strmap); + free_copy(&info); trace_module_load(mod); @@ -2684,21 +2717,14 @@ static noinline struct module *load_module(void __user *umod, mutex_unlock(&module_mutex); synchronize_sched(); module_arch_cleanup(mod); - cleanup: + free_modinfo: free_modinfo(mod); free_unload: module_unload_free(mod); - free_init: - module_free(mod, mod->module_init); - module_free(mod, mod->module_core); - /* mod will be freed with core. Don't access it beyond this line! */ - free_percpu: - free_percpu(percpu); - free_mod: - kfree(info.args); - kfree(strmap); - free_hdr: - vfree(info.hdr); + free_module: + module_deallocate(mod, &info); + free_copy: + free_copy(&info); return ERR_PTR(err); } -- cgit v1.2.3-70-g09d2 From 8f6d037815466cb25e7de8f00536eca71d94d4c3 Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Thu, 5 Aug 2010 12:59:09 -0600 Subject: module: sysfs cleanup We change the sysfs functions to take struct load_info, and call them all in mod_sysfs_setup(). We also clean up the #ifdefs a little. Signed-off-by: Rusty Russell --- kernel/module.c | 77 +++++++++++++++++++++++++-------------------------------- 1 file changed, 34 insertions(+), 43 deletions(-) (limited to 'kernel/module.c') diff --git a/kernel/module.c b/kernel/module.c index fb11e2a8823..fd8d46c6976 100644 --- a/kernel/module.c +++ b/kernel/module.c @@ -1126,8 +1126,9 @@ static const struct kernel_symbol *resolve_symbol_wait(Elf_Shdr *sechdrs, * /sys/module/foo/sections stuff * J. Corbet */ -#if defined(CONFIG_KALLSYMS) && defined(CONFIG_SYSFS) +#ifdef CONFIG_SYSFS +#ifdef CONFIG_KALLSYMS static inline bool sect_empty(const Elf_Shdr *sect) { return !(sect->sh_flags & SHF_ALLOC) || sect->sh_size == 0; @@ -1164,8 +1165,7 @@ static void free_sect_attrs(struct module_sect_attrs *sect_attrs) kfree(sect_attrs); } -static void add_sect_attrs(struct module *mod, unsigned int nsect, - char *secstrings, Elf_Shdr *sechdrs) +static void add_sect_attrs(struct module *mod, const struct load_info *info) { unsigned int nloaded = 0, i, size[2]; struct module_sect_attrs *sect_attrs; @@ -1173,8 +1173,8 @@ static void add_sect_attrs(struct module *mod, unsigned int nsect, struct attribute **gattr; /* Count loaded sections and allocate structures */ - for (i = 0; i < nsect; i++) - if (!sect_empty(&sechdrs[i])) + for (i = 0; i < info->hdr->e_shnum; i++) + if (!sect_empty(&info->sechdrs[i])) nloaded++; size[0] = ALIGN(sizeof(*sect_attrs) + nloaded * sizeof(sect_attrs->attrs[0]), @@ -1191,11 +1191,12 @@ static void add_sect_attrs(struct module *mod, unsigned int nsect, sect_attrs->nsections = 0; sattr = §_attrs->attrs[0]; gattr = §_attrs->grp.attrs[0]; - for (i = 0; i < nsect; i++) { - if (sect_empty(&sechdrs[i])) + for (i = 0; i < info->hdr->e_shnum; i++) { + Elf_Shdr *sec = &info->sechdrs[i]; + if (sect_empty(sec)) continue; - sattr->address = sechdrs[i].sh_addr; - sattr->name = kstrdup(secstrings + sechdrs[i].sh_name, + sattr->address = sec->sh_addr; + sattr->name = kstrdup(info->secstrings + sec->sh_name, GFP_KERNEL); if (sattr->name == NULL) goto out; @@ -1263,8 +1264,7 @@ static void free_notes_attrs(struct module_notes_attrs *notes_attrs, kfree(notes_attrs); } -static void add_notes_attrs(struct module *mod, unsigned int nsect, - char *secstrings, Elf_Shdr *sechdrs) +static void add_notes_attrs(struct module *mod, const struct load_info *info) { unsigned int notes, loaded, i; struct module_notes_attrs *notes_attrs; @@ -1276,9 +1276,9 @@ static void add_notes_attrs(struct module *mod, unsigned int nsect, /* Count notes sections and allocate structures. */ notes = 0; - for (i = 0; i < nsect; i++) - if (!sect_empty(&sechdrs[i]) && - (sechdrs[i].sh_type == SHT_NOTE)) + for (i = 0; i < info->hdr->e_shnum; i++) + if (!sect_empty(&info->sechdrs[i]) && + (info->sechdrs[i].sh_type == SHT_NOTE)) ++notes; if (notes == 0) @@ -1292,15 +1292,15 @@ static void add_notes_attrs(struct module *mod, unsigned int nsect, notes_attrs->notes = notes; nattr = ¬es_attrs->attrs[0]; - for (loaded = i = 0; i < nsect; ++i) { - if (sect_empty(&sechdrs[i])) + for (loaded = i = 0; i < info->hdr->e_shnum; ++i) { + if (sect_empty(&info->sechdrs[i])) continue; - if (sechdrs[i].sh_type == SHT_NOTE) { + if (info->sechdrs[i].sh_type == SHT_NOTE) { sysfs_bin_attr_init(nattr); nattr->attr.name = mod->sect_attrs->attrs[loaded].name; nattr->attr.mode = S_IRUGO; - nattr->size = sechdrs[i].sh_size; - nattr->private = (void *) sechdrs[i].sh_addr; + nattr->size = info->sechdrs[i].sh_size; + nattr->private = (void *) info->sechdrs[i].sh_addr; nattr->read = module_notes_read; ++nattr; } @@ -1331,8 +1331,8 @@ static void remove_notes_attrs(struct module *mod) #else -static inline void add_sect_attrs(struct module *mod, unsigned int nsect, - char *sectstrings, Elf_Shdr *sechdrs) +static inline void add_sect_attrs(struct module *mod, + const struct load_info *info) { } @@ -1340,17 +1340,16 @@ static inline void remove_sect_attrs(struct module *mod) { } -static inline void add_notes_attrs(struct module *mod, unsigned int nsect, - char *sectstrings, Elf_Shdr *sechdrs) +static inline void add_notes_attrs(struct module *mod, + const struct load_info *info) { } static inline void remove_notes_attrs(struct module *mod) { } -#endif +#endif /* CONFIG_KALLSYMS */ -#ifdef CONFIG_SYSFS static void add_usage_links(struct module *mod) { #ifdef CONFIG_MODULE_UNLOAD @@ -1455,6 +1454,7 @@ out: } static int mod_sysfs_setup(struct module *mod, + const struct load_info *info, struct kernel_param *kparam, unsigned int num_params) { @@ -1479,6 +1479,8 @@ static int mod_sysfs_setup(struct module *mod, goto out_unreg_param; add_usage_links(mod); + add_sect_attrs(mod, info); + add_notes_attrs(mod, info); kobject_uevent(&mod->mkobj.kobj, KOBJ_ADD); return 0; @@ -1495,32 +1497,26 @@ out: static void mod_sysfs_fini(struct module *mod) { + remove_notes_attrs(mod); + remove_sect_attrs(mod); kobject_put(&mod->mkobj.kobj); } -#else /* CONFIG_SYSFS */ +#else /* !CONFIG_SYSFS */ -static inline int mod_sysfs_init(struct module *mod) +static int mod_sysfs_init(struct module *mod) { return 0; } -static inline int mod_sysfs_setup(struct module *mod, +static int mod_sysfs_setup(struct module *mod, + const struct load_info *info, struct kernel_param *kparam, unsigned int num_params) { return 0; } -static inline int module_add_modinfo_attrs(struct module *mod) -{ - return 0; -} - -static inline void module_remove_modinfo_attrs(struct module *mod) -{ -} - static void mod_sysfs_fini(struct module *mod) { } @@ -1561,8 +1557,6 @@ static void free_module(struct module *mod) mutex_lock(&module_mutex); stop_machine(__unlink_module, mod, NULL); mutex_unlock(&module_mutex); - remove_notes_attrs(mod); - remove_sect_attrs(mod); mod_kobject_remove(mod); /* Remove dynamic debug info */ @@ -2691,13 +2685,10 @@ static noinline struct module *load_module(void __user *umod, if (err < 0) goto unlink; - err = mod_sysfs_setup(mod, mod->kp, mod->num_kp); + err = mod_sysfs_setup(mod, &info, mod->kp, mod->num_kp); if (err < 0) goto unlink; - add_sect_attrs(mod, info.hdr->e_shnum, info.secstrings, info.sechdrs); - add_notes_attrs(mod, info.hdr->e_shnum, info.secstrings, info.sechdrs); - /* Get rid of temporary copy and strmap. */ kfree(info.strmap); free_copy(&info); -- cgit v1.2.3-70-g09d2 From 36b0360d17dc3928cc96347a18a3a1cdbb7e506d Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Thu, 5 Aug 2010 12:59:09 -0600 Subject: module: fix sysfs cleanup for !CONFIG_SYSFS Restore the stub module_remove_modinfo_attrs, remove the now-unused !CONFIG_SYSFS module_sysfs_init. Also, rename mod_kobject_remove() to mod_sysfs_teardown() as it is the logical counterpart to mod_sysfs_setup now. Reported-by: Randy Dunlap Signed-off-by: Rusty Russell --- kernel/module.c | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) (limited to 'kernel/module.c') diff --git a/kernel/module.c b/kernel/module.c index fd8d46c6976..a64b26cf187 100644 --- a/kernel/module.c +++ b/kernel/module.c @@ -1504,11 +1504,6 @@ static void mod_sysfs_fini(struct module *mod) #else /* !CONFIG_SYSFS */ -static int mod_sysfs_init(struct module *mod) -{ - return 0; -} - static int mod_sysfs_setup(struct module *mod, const struct load_info *info, struct kernel_param *kparam, @@ -1521,13 +1516,17 @@ static void mod_sysfs_fini(struct module *mod) { } +static void module_remove_modinfo_attrs(struct module *mod) +{ +} + static void del_usage_links(struct module *mod) { } #endif /* CONFIG_SYSFS */ -static void mod_kobject_remove(struct module *mod) +static void mod_sysfs_teardown(struct module *mod) { del_usage_links(mod); module_remove_modinfo_attrs(mod); @@ -1557,7 +1556,7 @@ static void free_module(struct module *mod) mutex_lock(&module_mutex); stop_machine(__unlink_module, mod, NULL); mutex_unlock(&module_mutex); - mod_kobject_remove(mod); + mod_sysfs_teardown(mod); /* Remove dynamic debug info */ ddebug_remove_module(mod->name); -- cgit v1.2.3-70-g09d2 From 49668688dd5a5f46c72f965835388ed16c596055 Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Thu, 5 Aug 2010 12:59:10 -0600 Subject: module: pass load_info into other functions Pass the struct load_info into all the other functions in module loading. This neatens things and makes them more consistent. Signed-off-by: Rusty Russell --- kernel/module.c | 372 ++++++++++++++++++++++++-------------------------------- 1 file changed, 157 insertions(+), 215 deletions(-) (limited to 'kernel/module.c') diff --git a/kernel/module.c b/kernel/module.c index a64b26cf187..29dd232f818 100644 --- a/kernel/module.c +++ b/kernel/module.c @@ -152,42 +152,38 @@ void __module_put_and_exit(struct module *mod, long code) EXPORT_SYMBOL(__module_put_and_exit); /* Find a module section: 0 means not found. */ -static unsigned int find_sec(Elf_Ehdr *hdr, - Elf_Shdr *sechdrs, - const char *secstrings, - const char *name) +static unsigned int find_sec(const struct load_info *info, const char *name) { unsigned int i; - for (i = 1; i < hdr->e_shnum; i++) + for (i = 1; i < info->hdr->e_shnum; i++) { + Elf_Shdr *shdr = &info->sechdrs[i]; /* Alloc bit cleared means "ignore it." */ - if ((sechdrs[i].sh_flags & SHF_ALLOC) - && strcmp(secstrings+sechdrs[i].sh_name, name) == 0) + if ((shdr->sh_flags & SHF_ALLOC) + && strcmp(info->secstrings + shdr->sh_name, name) == 0) return i; + } return 0; } /* Find a module section, or NULL. */ -static void *section_addr(Elf_Ehdr *hdr, Elf_Shdr *shdrs, - const char *secstrings, const char *name) +static void *section_addr(const struct load_info *info, const char *name) { /* Section 0 has sh_addr 0. */ - return (void *)shdrs[find_sec(hdr, shdrs, secstrings, name)].sh_addr; + return (void *)info->sechdrs[find_sec(info, name)].sh_addr; } /* Find a module section, or NULL. Fill in number of "objects" in section. */ -static void *section_objs(Elf_Ehdr *hdr, - Elf_Shdr *sechdrs, - const char *secstrings, +static void *section_objs(const struct load_info *info, const char *name, size_t object_size, unsigned int *num) { - unsigned int sec = find_sec(hdr, sechdrs, secstrings, name); + unsigned int sec = find_sec(info, name); /* Section 0 has sh_addr 0 and sh_size 0. */ - *num = sechdrs[sec].sh_size / object_size; - return (void *)sechdrs[sec].sh_addr; + *num = info->sechdrs[sec].sh_size / object_size; + return (void *)info->sechdrs[sec].sh_addr; } /* Provided by the linker */ @@ -417,11 +413,9 @@ static void percpu_modfree(struct module *mod) free_percpu(mod->percpu); } -static unsigned int find_pcpusec(Elf_Ehdr *hdr, - Elf_Shdr *sechdrs, - const char *secstrings) +static unsigned int find_pcpusec(struct load_info *info) { - return find_sec(hdr, sechdrs, secstrings, ".data..percpu"); + return find_sec(info, ".data..percpu"); } static void percpu_modcopy(struct module *mod, @@ -481,9 +475,7 @@ static inline int percpu_modalloc(struct module *mod, static inline void percpu_modfree(struct module *mod) { } -static inline unsigned int find_pcpusec(Elf_Ehdr *hdr, - Elf_Shdr *sechdrs, - const char *secstrings) +static unsigned int find_pcpusec(struct load_info *info) { return 0; } @@ -1067,10 +1059,9 @@ static inline int same_magic(const char *amagic, const char *bmagic, #endif /* CONFIG_MODVERSIONS */ /* Resolve a symbol for this module. I.e. if we find one, record usage. */ -static const struct kernel_symbol *resolve_symbol(Elf_Shdr *sechdrs, - unsigned int versindex, +static const struct kernel_symbol *resolve_symbol(struct module *mod, + const struct load_info *info, const char *name, - struct module *mod, char ownername[]) { struct module *owner; @@ -1084,7 +1075,8 @@ static const struct kernel_symbol *resolve_symbol(Elf_Shdr *sechdrs, if (!sym) goto unlock; - if (!check_version(sechdrs, versindex, name, mod, crc, owner)) { + if (!check_version(info->sechdrs, info->index.vers, name, mod, crc, + owner)) { sym = ERR_PTR(-EINVAL); goto getname; } @@ -1103,21 +1095,20 @@ unlock: return sym; } -static const struct kernel_symbol *resolve_symbol_wait(Elf_Shdr *sechdrs, - unsigned int versindex, - const char *name, - struct module *mod) +static const struct kernel_symbol * +resolve_symbol_wait(struct module *mod, + const struct load_info *info, + const char *name) { const struct kernel_symbol *ksym; - char ownername[MODULE_NAME_LEN]; + char owner[MODULE_NAME_LEN]; if (wait_event_interruptible_timeout(module_wq, - !IS_ERR(ksym = resolve_symbol(sechdrs, versindex, name, - mod, ownername)) || - PTR_ERR(ksym) != -EBUSY, + !IS_ERR(ksym = resolve_symbol(mod, info, name, owner)) + || PTR_ERR(ksym) != -EBUSY, 30 * HZ) <= 0) { printk(KERN_WARNING "%s: gave up waiting for init of module %s.\n", - mod->name, ownername); + mod->name, owner); } return ksym; } @@ -1640,25 +1631,23 @@ static int verify_export_symbols(struct module *mod) } /* Change all symbols so that st_value encodes the pointer directly. */ -static int simplify_symbols(Elf_Shdr *sechdrs, - unsigned int symindex, - const char *strtab, - unsigned int versindex, - unsigned int pcpuindex, - struct module *mod) -{ - Elf_Sym *sym = (void *)sechdrs[symindex].sh_addr; +static int simplify_symbols(struct module *mod, const struct load_info *info) +{ + Elf_Shdr *symsec = &info->sechdrs[info->index.sym]; + Elf_Sym *sym = (void *)symsec->sh_addr; unsigned long secbase; - unsigned int i, n = sechdrs[symindex].sh_size / sizeof(Elf_Sym); + unsigned int i; int ret = 0; const struct kernel_symbol *ksym; - for (i = 1; i < n; i++) { + for (i = 1; i < symsec->sh_size / sizeof(Elf_Sym); i++) { + const char *name = info->strtab + sym[i].st_name; + switch (sym[i].st_shndx) { case SHN_COMMON: /* We compiled with -fno-common. These are not supposed to happen. */ - DEBUGP("Common symbol: %s\n", strtab + sym[i].st_name); + DEBUGP("Common symbol: %s\n", name); printk("%s: please compile with -fno-common\n", mod->name); ret = -ENOEXEC; @@ -1671,9 +1660,7 @@ static int simplify_symbols(Elf_Shdr *sechdrs, break; case SHN_UNDEF: - ksym = resolve_symbol_wait(sechdrs, versindex, - strtab + sym[i].st_name, - mod); + ksym = resolve_symbol_wait(mod, info, name); /* Ok if resolved. */ if (ksym && !IS_ERR(ksym)) { sym[i].st_value = ksym->value; @@ -1685,17 +1672,16 @@ static int simplify_symbols(Elf_Shdr *sechdrs, break; printk(KERN_WARNING "%s: Unknown symbol %s (err %li)\n", - mod->name, strtab + sym[i].st_name, - PTR_ERR(ksym)); + mod->name, name, PTR_ERR(ksym)); ret = PTR_ERR(ksym) ?: -ENOENT; break; default: /* Divert to percpu allocation if a percpu var. */ - if (sym[i].st_shndx == pcpuindex) + if (sym[i].st_shndx == info->index.pcpu) secbase = (unsigned long)mod_percpu(mod); else - secbase = sechdrs[sym[i].st_shndx].sh_addr; + secbase = info->sechdrs[sym[i].st_shndx].sh_addr; sym[i].st_value += secbase; break; } @@ -1704,33 +1690,29 @@ static int simplify_symbols(Elf_Shdr *sechdrs, return ret; } -static int apply_relocations(struct module *mod, - Elf_Ehdr *hdr, - Elf_Shdr *sechdrs, - unsigned int symindex, - unsigned int strindex) +static int apply_relocations(struct module *mod, const struct load_info *info) { unsigned int i; int err = 0; /* Now do relocations. */ - for (i = 1; i < hdr->e_shnum; i++) { - const char *strtab = (char *)sechdrs[strindex].sh_addr; - unsigned int info = sechdrs[i].sh_info; + for (i = 1; i < info->hdr->e_shnum; i++) { + unsigned int infosec = info->sechdrs[i].sh_info; /* Not a valid relocation section? */ - if (info >= hdr->e_shnum) + if (infosec >= info->hdr->e_shnum) continue; /* Don't bother with non-allocated sections */ - if (!(sechdrs[info].sh_flags & SHF_ALLOC)) + if (!(info->sechdrs[infosec].sh_flags & SHF_ALLOC)) continue; - if (sechdrs[i].sh_type == SHT_REL) - err = apply_relocate(sechdrs, strtab, symindex, i, mod); - else if (sechdrs[i].sh_type == SHT_RELA) - err = apply_relocate_add(sechdrs, strtab, symindex, i, - mod); + if (info->sechdrs[i].sh_type == SHT_REL) + err = apply_relocate(info->sechdrs, info->strtab, + info->index.sym, i, mod); + else if (info->sechdrs[i].sh_type == SHT_RELA) + err = apply_relocate_add(info->sechdrs, info->strtab, + info->index.sym, i, mod); if (err < 0) break; } @@ -1761,10 +1743,7 @@ static long get_offset(struct module *mod, unsigned int *size, might -- code, read-only data, read-write data, small data. Tally sizes, and place the offsets into sh_entsize fields: high bit means it belongs in init. */ -static void layout_sections(struct module *mod, - const Elf_Ehdr *hdr, - Elf_Shdr *sechdrs, - const char *secstrings) +static void layout_sections(struct module *mod, struct load_info *info) { static unsigned long const masks[][2] = { /* NOTE: all executable code must be the first section @@ -1777,21 +1756,22 @@ static void layout_sections(struct module *mod, }; unsigned int m, i; - for (i = 0; i < hdr->e_shnum; i++) - sechdrs[i].sh_entsize = ~0UL; + for (i = 0; i < info->hdr->e_shnum; i++) + info->sechdrs[i].sh_entsize = ~0UL; DEBUGP("Core section allocation order:\n"); for (m = 0; m < ARRAY_SIZE(masks); ++m) { - for (i = 0; i < hdr->e_shnum; ++i) { - Elf_Shdr *s = &sechdrs[i]; + for (i = 0; i < info->hdr->e_shnum; ++i) { + Elf_Shdr *s = &info->sechdrs[i]; + const char *sname = info->secstrings + s->sh_name; if ((s->sh_flags & masks[m][0]) != masks[m][0] || (s->sh_flags & masks[m][1]) || s->sh_entsize != ~0UL - || strstarts(secstrings + s->sh_name, ".init")) + || strstarts(sname, ".init")) continue; s->sh_entsize = get_offset(mod, &mod->core_size, s, i); - DEBUGP("\t%s\n", secstrings + s->sh_name); + DEBUGP("\t%s\n", name); } if (m == 0) mod->core_text_size = mod->core_size; @@ -1799,17 +1779,18 @@ static void layout_sections(struct module *mod, DEBUGP("Init section allocation order:\n"); for (m = 0; m < ARRAY_SIZE(masks); ++m) { - for (i = 0; i < hdr->e_shnum; ++i) { - Elf_Shdr *s = &sechdrs[i]; + for (i = 0; i < info->hdr->e_shnum; ++i) { + Elf_Shdr *s = &info->sechdrs[i]; + const char *sname = info->secstrings + s->sh_name; if ((s->sh_flags & masks[m][0]) != masks[m][0] || (s->sh_flags & masks[m][1]) || s->sh_entsize != ~0UL - || !strstarts(secstrings + s->sh_name, ".init")) + || !strstarts(sname, ".init")) continue; s->sh_entsize = (get_offset(mod, &mod->init_size, s, i) | INIT_OFFSET_MASK); - DEBUGP("\t%s\n", secstrings + s->sh_name); + DEBUGP("\t%s\n", sname); } if (m == 0) mod->init_text_size = mod->init_size; @@ -1848,33 +1829,28 @@ static char *next_string(char *string, unsigned long *secsize) return string; } -static char *get_modinfo(const Elf_Shdr *sechdrs, - unsigned int info, - const char *tag) +static char *get_modinfo(struct load_info *info, const char *tag) { char *p; unsigned int taglen = strlen(tag); - unsigned long size = sechdrs[info].sh_size; + Elf_Shdr *infosec = &info->sechdrs[info->index.info]; + unsigned long size = infosec->sh_size; - for (p = (char *)sechdrs[info].sh_addr; p; p = next_string(p, &size)) { + for (p = (char *)infosec->sh_addr; p; p = next_string(p, &size)) { if (strncmp(p, tag, taglen) == 0 && p[taglen] == '=') return p + taglen + 1; } return NULL; } -static void setup_modinfo(struct module *mod, Elf_Shdr *sechdrs, - unsigned int infoindex) +static void setup_modinfo(struct module *mod, struct load_info *info) { struct module_attribute *attr; int i; for (i = 0; (attr = modinfo_attrs[i]); i++) { if (attr->setup) - attr->setup(mod, - get_modinfo(sechdrs, - infoindex, - attr->attr.name)); + attr->setup(mod, get_modinfo(info, attr->attr.name)); } } @@ -1976,56 +1952,45 @@ static bool is_core_symbol(const Elf_Sym *src, const Elf_Shdr *sechdrs, return true; } -static unsigned long layout_symtab(struct module *mod, - Elf_Shdr *sechdrs, - unsigned int symindex, - unsigned int strindex, - const Elf_Ehdr *hdr, - const char *secstrings, - unsigned long *pstroffs, - unsigned long *strmap) +static void layout_symtab(struct module *mod, struct load_info *info) { - unsigned long symoffs; - Elf_Shdr *symsect = sechdrs + symindex; - Elf_Shdr *strsect = sechdrs + strindex; + Elf_Shdr *symsect = info->sechdrs + info->index.sym; + Elf_Shdr *strsect = info->sechdrs + info->index.str; const Elf_Sym *src; - const char *strtab; unsigned int i, nsrc, ndst; /* Put symbol section at end of init part of module. */ symsect->sh_flags |= SHF_ALLOC; symsect->sh_entsize = get_offset(mod, &mod->init_size, symsect, - symindex) | INIT_OFFSET_MASK; - DEBUGP("\t%s\n", secstrings + symsect->sh_name); + info->index.sym) | INIT_OFFSET_MASK; + DEBUGP("\t%s\n", info->secstrings + symsect->sh_name); - src = (void *)hdr + symsect->sh_offset; + src = (void *)info->hdr + symsect->sh_offset; nsrc = symsect->sh_size / sizeof(*src); - strtab = (void *)hdr + strsect->sh_offset; for (ndst = i = 1; i < nsrc; ++i, ++src) - if (is_core_symbol(src, sechdrs, hdr->e_shnum)) { + if (is_core_symbol(src, info->sechdrs, info->hdr->e_shnum)) { unsigned int j = src->st_name; - while(!__test_and_set_bit(j, strmap) && strtab[j]) + while (!__test_and_set_bit(j, info->strmap) + && info->strtab[j]) ++j; ++ndst; } /* Append room for core symbols at end of core part. */ - symoffs = ALIGN(mod->core_size, symsect->sh_addralign ?: 1); - mod->core_size = symoffs + ndst * sizeof(Elf_Sym); + info->symoffs = ALIGN(mod->core_size, symsect->sh_addralign ?: 1); + mod->core_size = info->symoffs + ndst * sizeof(Elf_Sym); /* Put string table section at end of init part of module. */ strsect->sh_flags |= SHF_ALLOC; strsect->sh_entsize = get_offset(mod, &mod->init_size, strsect, - strindex) | INIT_OFFSET_MASK; - DEBUGP("\t%s\n", secstrings + strsect->sh_name); + info->index.str) | INIT_OFFSET_MASK; + DEBUGP("\t%s\n", info->secstrings + strsect->sh_name); /* Append room for core symbols' strings at end of core part. */ - *pstroffs = mod->core_size; - __set_bit(0, strmap); - mod->core_size += bitmap_weight(strmap, strsect->sh_size); - - return symoffs; + info->stroffs = mod->core_size; + __set_bit(0, info->strmap); + mod->core_size += bitmap_weight(info->strmap, strsect->sh_size); } static void add_kallsyms(struct module *mod, struct load_info *info) @@ -2064,16 +2029,8 @@ static void add_kallsyms(struct module *mod, struct load_info *info) *++s = mod->strtab[i]; } #else -static inline unsigned long layout_symtab(struct module *mod, - Elf_Shdr *sechdrs, - unsigned int symindex, - unsigned int strindex, - const Elf_Ehdr *hdr, - const char *secstrings, - unsigned long *pstroffs, - unsigned long *strmap) +static inline void layout_symtab(struct module *mod, struct load_info *info) { - return 0; } static void add_kallsyms(struct module *mod, struct load_info *info) @@ -2113,30 +2070,28 @@ static void *module_alloc_update_bounds(unsigned long size) } #ifdef CONFIG_DEBUG_KMEMLEAK -static void kmemleak_load_module(struct module *mod, Elf_Ehdr *hdr, - const Elf_Shdr *sechdrs, - const char *secstrings) +static void kmemleak_load_module(const struct module *mod, + const struct load_info *info) { unsigned int i; /* only scan the sections containing data */ kmemleak_scan_area(mod, sizeof(struct module), GFP_KERNEL); - for (i = 1; i < hdr->e_shnum; i++) { - if (!(sechdrs[i].sh_flags & SHF_ALLOC)) + for (i = 1; i < info->hdr->e_shnum; i++) { + const char *name = info->secstrings + info->sechdrs[i].sh_name; + if (!(info->sechdrs[i].sh_flags & SHF_ALLOC)) continue; - if (strncmp(secstrings + sechdrs[i].sh_name, ".data", 5) != 0 - && strncmp(secstrings + sechdrs[i].sh_name, ".bss", 4) != 0) + if (!strstarts(name, ".data") && !strstarts(name, ".bss")) continue; - kmemleak_scan_area((void *)sechdrs[i].sh_addr, - sechdrs[i].sh_size, GFP_KERNEL); + kmemleak_scan_area((void *)info->sechdrs[i].sh_addr, + info->sechdrs[i].sh_size, GFP_KERNEL); } } #else -static inline void kmemleak_load_module(struct module *mod, Elf_Ehdr *hdr, - Elf_Shdr *sechdrs, - const char *secstrings) +static inline void kmemleak_load_module(const struct module *mod, + const struct load_info *info) { } #endif @@ -2227,8 +2182,8 @@ static int rewrite_section_headers(struct load_info *info) } /* Track but don't keep modinfo and version sections. */ - info->index.vers = find_sec(info->hdr, info->sechdrs, info->secstrings, "__versions"); - info->index.info = find_sec(info->hdr, info->sechdrs, info->secstrings, ".modinfo"); + info->index.vers = find_sec(info, "__versions"); + info->index.info = find_sec(info, ".modinfo"); info->sechdrs[info->index.info].sh_flags &= ~(unsigned long)SHF_ALLOC; info->sechdrs[info->index.vers].sh_flags &= ~(unsigned long)SHF_ALLOC; return 0; @@ -2268,8 +2223,7 @@ static struct module *setup_load_info(struct load_info *info) } } - info->index.mod = find_sec(info->hdr, info->sechdrs, info->secstrings, - ".gnu.linkonce.this_module"); + info->index.mod = find_sec(info, ".gnu.linkonce.this_module"); if (!info->index.mod) { printk(KERN_WARNING "No module found in object\n"); return ERR_PTR(-ENOEXEC); @@ -2283,7 +2237,7 @@ static struct module *setup_load_info(struct load_info *info) return ERR_PTR(-ENOEXEC); } - info->index.pcpu = find_pcpusec(info->hdr, info->sechdrs, info->secstrings); + info->index.pcpu = find_pcpusec(info); /* Check module struct version now, before we try to use module. */ if (!check_modstruct_version(info->sechdrs, info->index.vers, mod)) @@ -2292,11 +2246,9 @@ static struct module *setup_load_info(struct load_info *info) return mod; } -static int check_modinfo(struct module *mod, - const Elf_Shdr *sechdrs, - unsigned int infoindex, unsigned int versindex) +static int check_modinfo(struct module *mod, struct load_info *info) { - const char *modmagic = get_modinfo(sechdrs, infoindex, "vermagic"); + const char *modmagic = get_modinfo(info, "vermagic"); int err; /* This is allowed: modprobe --force will invalidate it. */ @@ -2304,13 +2256,13 @@ static int check_modinfo(struct module *mod, err = try_to_force_load(mod, "bad vermagic"); if (err) return err; - } else if (!same_magic(modmagic, vermagic, versindex)) { + } else if (!same_magic(modmagic, vermagic, info->index.vers)) { printk(KERN_ERR "%s: version magic '%s' should be '%s'\n", mod->name, modmagic, vermagic); return -ENOEXEC; } - if (get_modinfo(sechdrs, infoindex, "staging")) { + if (get_modinfo(info, "staging")) { add_taint_module(mod, TAINT_CRAP); printk(KERN_WARNING "%s: module is from the staging directory," " the quality is unknown, you have been warned.\n", @@ -2318,58 +2270,51 @@ static int check_modinfo(struct module *mod, } /* Set up license info based on the info section */ - set_license(mod, get_modinfo(sechdrs, infoindex, "license")); + set_license(mod, get_modinfo(info, "license")); return 0; } -static void find_module_sections(struct module *mod, Elf_Ehdr *hdr, - Elf_Shdr *sechdrs, const char *secstrings) +static void find_module_sections(struct module *mod, + const struct load_info *info) { - mod->kp = section_objs(hdr, sechdrs, secstrings, "__param", + mod->kp = section_objs(info, "__param", sizeof(*mod->kp), &mod->num_kp); - mod->syms = section_objs(hdr, sechdrs, secstrings, "__ksymtab", + mod->syms = section_objs(info, "__ksymtab", sizeof(*mod->syms), &mod->num_syms); - mod->crcs = section_addr(hdr, sechdrs, secstrings, "__kcrctab"); - mod->gpl_syms = section_objs(hdr, sechdrs, secstrings, "__ksymtab_gpl", + mod->crcs = section_addr(info, "__kcrctab"); + mod->gpl_syms = section_objs(info, "__ksymtab_gpl", sizeof(*mod->gpl_syms), &mod->num_gpl_syms); - mod->gpl_crcs = section_addr(hdr, sechdrs, secstrings, "__kcrctab_gpl"); - mod->gpl_future_syms = section_objs(hdr, sechdrs, secstrings, + mod->gpl_crcs = section_addr(info, "__kcrctab_gpl"); + mod->gpl_future_syms = section_objs(info, "__ksymtab_gpl_future", sizeof(*mod->gpl_future_syms), &mod->num_gpl_future_syms); - mod->gpl_future_crcs = section_addr(hdr, sechdrs, secstrings, - "__kcrctab_gpl_future"); + mod->gpl_future_crcs = section_addr(info, "__kcrctab_gpl_future"); #ifdef CONFIG_UNUSED_SYMBOLS - mod->unused_syms = section_objs(hdr, sechdrs, secstrings, - "__ksymtab_unused", + mod->unused_syms = section_objs(info, "__ksymtab_unused", sizeof(*mod->unused_syms), &mod->num_unused_syms); - mod->unused_crcs = section_addr(hdr, sechdrs, secstrings, - "__kcrctab_unused"); - mod->unused_gpl_syms = section_objs(hdr, sechdrs, secstrings, - "__ksymtab_unused_gpl", + mod->unused_crcs = section_addr(info, "__kcrctab_unused"); + mod->unused_gpl_syms = section_objs(info, "__ksymtab_unused_gpl", sizeof(*mod->unused_gpl_syms), &mod->num_unused_gpl_syms); - mod->unused_gpl_crcs = section_addr(hdr, sechdrs, secstrings, - "__kcrctab_unused_gpl"); + mod->unused_gpl_crcs = section_addr(info, "__kcrctab_unused_gpl"); #endif #ifdef CONFIG_CONSTRUCTORS - mod->ctors = section_objs(hdr, sechdrs, secstrings, ".ctors", + mod->ctors = section_objs(info, ".ctors", sizeof(*mod->ctors), &mod->num_ctors); #endif #ifdef CONFIG_TRACEPOINTS - mod->tracepoints = section_objs(hdr, sechdrs, secstrings, - "__tracepoints", + mod->tracepoints = section_objs(info, "__tracepoints", sizeof(*mod->tracepoints), &mod->num_tracepoints); #endif #ifdef CONFIG_EVENT_TRACING - mod->trace_events = section_objs(hdr, sechdrs, secstrings, - "_ftrace_events", + mod->trace_events = section_objs(info, "_ftrace_events", sizeof(*mod->trace_events), &mod->num_trace_events); /* @@ -2381,20 +2326,17 @@ static void find_module_sections(struct module *mod, Elf_Ehdr *hdr, #endif #ifdef CONFIG_FTRACE_MCOUNT_RECORD /* sechdrs[0].sh_size is always zero */ - mod->ftrace_callsites = section_objs(hdr, sechdrs, secstrings, - "__mcount_loc", + mod->ftrace_callsites = section_objs(info, "__mcount_loc", sizeof(*mod->ftrace_callsites), &mod->num_ftrace_callsites); #endif - if (section_addr(hdr, sechdrs, secstrings, "__obsparm")) + if (section_addr(info, "__obsparm")) printk(KERN_WARNING "%s: Ignoring obsolete parameters\n", mod->name); } -static int move_module(struct module *mod, - Elf_Ehdr *hdr, Elf_Shdr *sechdrs, - const char *secstrings, unsigned modindex) +static int move_module(struct module *mod, struct load_info *info) { int i; void *ptr; @@ -2430,32 +2372,31 @@ static int move_module(struct module *mod, /* Transfer each section which specifies SHF_ALLOC */ DEBUGP("final section addresses:\n"); - for (i = 0; i < hdr->e_shnum; i++) { + for (i = 0; i < info->hdr->e_shnum; i++) { void *dest; + Elf_Shdr *shdr = &info->sechdrs[i]; - if (!(sechdrs[i].sh_flags & SHF_ALLOC)) + if (!(shdr->sh_flags & SHF_ALLOC)) continue; - if (sechdrs[i].sh_entsize & INIT_OFFSET_MASK) + if (shdr->sh_entsize & INIT_OFFSET_MASK) dest = mod->module_init - + (sechdrs[i].sh_entsize & ~INIT_OFFSET_MASK); + + (shdr->sh_entsize & ~INIT_OFFSET_MASK); else - dest = mod->module_core + sechdrs[i].sh_entsize; + dest = mod->module_core + shdr->sh_entsize; - if (sechdrs[i].sh_type != SHT_NOBITS) - memcpy(dest, (void *)sechdrs[i].sh_addr, - sechdrs[i].sh_size); + if (shdr->sh_type != SHT_NOBITS) + memcpy(dest, (void *)shdr->sh_addr, shdr->sh_size); /* Update sh_addr to point to copy in image. */ - sechdrs[i].sh_addr = (unsigned long)dest; + shdr->sh_addr = (unsigned long)dest; DEBUGP("\t0x%lx %s\n", - sechdrs[i].sh_addr, secstrings + sechdrs[i].sh_name); + shdr->sh_addr, info->secstrings + shdr->sh_name); } return 0; } -static int check_module_license_and_versions(struct module *mod, - Elf_Shdr *sechdrs) +static int check_module_license_and_versions(struct module *mod) { /* * ndiswrapper is under GPL by itself, but loads proprietary modules. @@ -2512,34 +2453,37 @@ static struct module *layout_and_allocate(struct load_info *info) { /* Module within temporary copy. */ struct module *mod; + Elf_Shdr *pcpusec; int err; mod = setup_load_info(info); if (IS_ERR(mod)) return mod; - err = check_modinfo(mod, info->sechdrs, info->index.info, info->index.vers); + err = check_modinfo(mod, info); if (err) return ERR_PTR(err); /* Allow arches to frob section contents and sizes. */ - err = module_frob_arch_sections(info->hdr, info->sechdrs, info->secstrings, mod); + err = module_frob_arch_sections(info->hdr, info->sechdrs, + info->secstrings, mod); if (err < 0) goto free_args; - if (info->index.pcpu) { + pcpusec = &info->sechdrs[info->index.pcpu]; + if (pcpusec->sh_size) { /* We have a special allocation for this section. */ - err = percpu_modalloc(mod, info->sechdrs[info->index.pcpu].sh_size, - info->sechdrs[info->index.pcpu].sh_addralign); + err = percpu_modalloc(mod, + pcpusec->sh_size, pcpusec->sh_addralign); if (err) goto free_args; - info->sechdrs[info->index.pcpu].sh_flags &= ~(unsigned long)SHF_ALLOC; + pcpusec->sh_flags &= ~(unsigned long)SHF_ALLOC; } /* Determine total sizes, and put offsets in sh_entsize. For now this is done generically; there doesn't appear to be any special cases for the architectures. */ - layout_sections(mod, info->hdr, info->sechdrs, info->secstrings); + layout_sections(mod, info); info->strmap = kzalloc(BITS_TO_LONGS(info->sechdrs[info->index.str].sh_size) * sizeof(long), GFP_KERNEL); @@ -2547,17 +2491,16 @@ static struct module *layout_and_allocate(struct load_info *info) err = -ENOMEM; goto free_percpu; } - info->symoffs = layout_symtab(mod, info->sechdrs, info->index.sym, info->index.str, info->hdr, - info->secstrings, &info->stroffs, info->strmap); + layout_symtab(mod, info); /* Allocate and move to the final place */ - err = move_module(mod, info->hdr, info->sechdrs, info->secstrings, info->index.mod); + err = move_module(mod, info); if (err) goto free_strmap; /* Module has been copied to its final place now: return it. */ mod = (void *)info->sechdrs[info->index.mod].sh_addr; - kmemleak_load_module(mod, info->hdr, info->sechdrs, info->secstrings); + kmemleak_load_module(mod, info); return mod; free_strmap: @@ -2605,34 +2548,33 @@ static noinline struct module *load_module(void __user *umod, goto free_copy; } - /* Now we've moved module, initialize linked lists, etc. */ + /* Now module is in final location, initialize linked lists, etc. */ err = module_unload_init(mod); if (err) goto free_module; /* Now we've got everything in the final locations, we can * find optional sections. */ - find_module_sections(mod, info.hdr, info.sechdrs, info.secstrings); + find_module_sections(mod, &info); - err = check_module_license_and_versions(mod, info.sechdrs); + err = check_module_license_and_versions(mod); if (err) goto free_unload; /* Set up MODINFO_ATTR fields */ - setup_modinfo(mod, info.sechdrs, info.index.info); + setup_modinfo(mod, &info); /* Fix up syms, so that st_value is a pointer to location. */ - err = simplify_symbols(info.sechdrs, info.index.sym, info.strtab, info.index.vers, info.index.pcpu, - mod); + err = simplify_symbols(mod, &info); if (err < 0) goto free_modinfo; - err = apply_relocations(mod, info.hdr, info.sechdrs, info.index.sym, info.index.str); + err = apply_relocations(mod, &info); if (err < 0) goto free_modinfo; /* Set up and sort exception table */ - mod->extable = section_objs(info.hdr, info.sechdrs, info.secstrings, "__ex_table", + mod->extable = section_objs(&info, "__ex_table", sizeof(*mod->extable), &mod->num_exentries); sort_extable(mod->extable, mod->extable + mod->num_exentries); @@ -2643,7 +2585,7 @@ static noinline struct module *load_module(void __user *umod, add_kallsyms(mod, &info); if (!mod->taints) - debug = section_objs(info.hdr, info.sechdrs, info.secstrings, "__verbose", + debug = section_objs(&info, "__verbose", sizeof(*debug), &num_debug); err = module_finalize(info.hdr, info.sechdrs, mod); -- cgit v1.2.3-70-g09d2 From 6526c534b2677ca601b7b92851437feb041d02a1 Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Thu, 5 Aug 2010 12:59:10 -0600 Subject: module: move module args strndup_user to just before use Instead of copying and allocating the args and storing it in load_info, we can just allocate them right before we need them. Signed-off-by: Rusty Russell --- kernel/module.c | 28 +++++++++++++--------------- 1 file changed, 13 insertions(+), 15 deletions(-) (limited to 'kernel/module.c') diff --git a/kernel/module.c b/kernel/module.c index 29dd232f818..cabafe22844 100644 --- a/kernel/module.c +++ b/kernel/module.c @@ -114,7 +114,7 @@ struct load_info { Elf_Ehdr *hdr; unsigned long len; Elf_Shdr *sechdrs; - char *secstrings, *args, *strtab; + char *secstrings, *strtab; unsigned long *strmap; unsigned long symoffs, stroffs; struct { @@ -2096,7 +2096,7 @@ static inline void kmemleak_load_module(const struct module *mod, } #endif -/* Sets info->hdr, info->len and info->args. */ +/* Sets info->hdr and info->len. */ static int copy_and_check(struct load_info *info, const void __user *umod, unsigned long len, const char __user *uargs) @@ -2132,13 +2132,6 @@ static int copy_and_check(struct load_info *info, goto free_hdr; } - /* Now copy in args */ - info->args = strndup_user(uargs, ~0UL >> 1); - if (IS_ERR(info->args)) { - err = PTR_ERR(info->args); - goto free_hdr; - } - info->hdr = hdr; info->len = len; return 0; @@ -2150,7 +2143,6 @@ free_hdr: static void free_copy(struct load_info *info) { - kfree(info->args); vfree(info->hdr); } @@ -2468,7 +2460,7 @@ static struct module *layout_and_allocate(struct load_info *info) err = module_frob_arch_sections(info->hdr, info->sechdrs, info->secstrings, mod); if (err < 0) - goto free_args; + goto out; pcpusec = &info->sechdrs[info->index.pcpu]; if (pcpusec->sh_size) { @@ -2476,7 +2468,7 @@ static struct module *layout_and_allocate(struct load_info *info) err = percpu_modalloc(mod, pcpusec->sh_size, pcpusec->sh_addralign); if (err) - goto free_args; + goto out; pcpusec->sh_flags &= ~(unsigned long)SHF_ALLOC; } @@ -2507,8 +2499,7 @@ free_strmap: kfree(info->strmap); free_percpu: percpu_modfree(mod); -free_args: - kfree(info->args); +out: return ERR_PTR(err); } @@ -2594,7 +2585,12 @@ static noinline struct module *load_module(void __user *umod, flush_module_icache(mod); - mod->args = info.args; + /* Now copy in args */ + mod->args = strndup_user(uargs, ~0UL >> 1); + if (IS_ERR(mod->args)) { + err = PTR_ERR(mod->args); + goto free_arch_cleanup; + } mod->state = MODULE_STATE_COMING; @@ -2648,6 +2644,8 @@ static noinline struct module *load_module(void __user *umod, unlock: mutex_unlock(&module_mutex); synchronize_sched(); + kfree(mod->args); + free_arch_cleanup: module_arch_cleanup(mod); free_modinfo: free_modinfo(mod); -- cgit v1.2.3-70-g09d2 From 811d66a0e1e99902d365497eec7884113a2665bd Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Thu, 5 Aug 2010 12:59:12 -0600 Subject: module: group post-relocation functions into post_relocation() This simply hoists more code out of load_module; we also put the identification of the extable and dynamic debug table in with the others in find_module_sections(). We move the taint check to the actual add/remove of the dynamic debug info: this is certain (find_module_sections is too early). Signed-off-by: Rusty Russell Cc: Yehuda Sadeh --- kernel/module.c | 56 ++++++++++++++++++++++++++++++++------------------------ 1 file changed, 32 insertions(+), 24 deletions(-) (limited to 'kernel/module.c') diff --git a/kernel/module.c b/kernel/module.c index cabafe22844..f3cba93ea1e 100644 --- a/kernel/module.c +++ b/kernel/module.c @@ -117,6 +117,8 @@ struct load_info { char *secstrings, *strtab; unsigned long *strmap; unsigned long symoffs, stroffs; + struct _ddebug *debug; + unsigned int num_debug; struct { unsigned int sym, str, mod, vers, info, pcpu; } index; @@ -1993,7 +1995,7 @@ static void layout_symtab(struct module *mod, struct load_info *info) mod->core_size += bitmap_weight(info->strmap, strsect->sh_size); } -static void add_kallsyms(struct module *mod, struct load_info *info) +static void add_kallsyms(struct module *mod, const struct load_info *info) { unsigned int i, ndst; const Elf_Sym *src; @@ -2040,6 +2042,8 @@ static void add_kallsyms(struct module *mod, struct load_info *info) static void dynamic_debug_setup(struct _ddebug *debug, unsigned int num) { + if (!debug) + return; #ifdef CONFIG_DYNAMIC_DEBUG if (ddebug_add_module(debug, num, debug->modname)) printk(KERN_ERR "dynamic debug error adding module: %s\n", @@ -2267,8 +2271,7 @@ static int check_modinfo(struct module *mod, struct load_info *info) return 0; } -static void find_module_sections(struct module *mod, - const struct load_info *info) +static void find_module_sections(struct module *mod, struct load_info *info) { mod->kp = section_objs(info, "__param", sizeof(*mod->kp), &mod->num_kp); @@ -2323,9 +2326,15 @@ static void find_module_sections(struct module *mod, &mod->num_ftrace_callsites); #endif + mod->extable = section_objs(info, "__ex_table", + sizeof(*mod->extable), &mod->num_exentries); + if (section_addr(info, "__obsparm")) printk(KERN_WARNING "%s: Ignoring obsolete parameters\n", mod->name); + + info->debug = section_objs(info, "__verbose", + sizeof(*info->debug), &info->num_debug); } static int move_module(struct module *mod, struct load_info *info) @@ -2512,6 +2521,20 @@ static void module_deallocate(struct module *mod, struct load_info *info) module_free(mod, mod->module_core); } +static int post_relocation(struct module *mod, const struct load_info *info) +{ + sort_extable(mod->extable, mod->extable + mod->num_exentries); + + /* Copy relocated percpu area over. */ + percpu_modcopy(mod, (void *)info->sechdrs[info->index.pcpu].sh_addr, + info->sechdrs[info->index.pcpu].sh_size); + + add_kallsyms(mod, info); + + /* Arch-specific module finalizing. */ + return module_finalize(info->hdr, info->sechdrs, mod); +} + /* Allocate and load the module: note that size of section 0 is always zero, and we rely on this for optional sections. */ static noinline struct module *load_module(void __user *umod, @@ -2521,8 +2544,6 @@ static noinline struct module *load_module(void __user *umod, struct load_info info = { NULL, }; struct module *mod; long err; - struct _ddebug *debug = NULL; - unsigned int num_debug = 0; DEBUGP("load_module: umod=%p, len=%lu, uargs=%p\n", umod, len, uargs); @@ -2564,22 +2585,7 @@ static noinline struct module *load_module(void __user *umod, if (err < 0) goto free_modinfo; - /* Set up and sort exception table */ - mod->extable = section_objs(&info, "__ex_table", - sizeof(*mod->extable), &mod->num_exentries); - sort_extable(mod->extable, mod->extable + mod->num_exentries); - - /* Finally, copy percpu area over. */ - percpu_modcopy(mod, (void *)info.sechdrs[info.index.pcpu].sh_addr, - info.sechdrs[info.index.pcpu].sh_size); - - add_kallsyms(mod, &info); - - if (!mod->taints) - debug = section_objs(&info, "__verbose", - sizeof(*debug), &num_debug); - - err = module_finalize(info.hdr, info.sechdrs, mod); + err = post_relocation(mod, &info); if (err < 0) goto free_modinfo; @@ -2607,8 +2613,9 @@ static noinline struct module *load_module(void __user *umod, goto unlock; } - if (debug) - dynamic_debug_setup(debug, num_debug); + /* This has to be done once we're sure module name is unique. */ + if (!mod->taints) + dynamic_debug_setup(info.debug, info.num_debug); /* Find duplicate symbols */ err = verify_export_symbols(mod); @@ -2640,7 +2647,8 @@ static noinline struct module *load_module(void __user *umod, /* Unlink carefully: kallsyms could be walking list. */ list_del_rcu(&mod->list); ddebug: - dynamic_debug_remove(debug); + if (!mod->taints) + dynamic_debug_remove(info.debug); unlock: mutex_unlock(&module_mutex); synchronize_sched(); -- cgit v1.2.3-70-g09d2 From 51f3d0f474aaebbc253100fa32a49c8256812330 Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Thu, 5 Aug 2010 12:59:13 -0600 Subject: module: cleanup comments, remove noinline On my (32-bit x86) machine, sys_init_module() uses 124 bytes of stack once load_module() is inlined. This effectively reverts ffb4ba76 which inlined it due to stack pressure. Signed-off-by: Rusty Russell --- kernel/module.c | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) (limited to 'kernel/module.c') diff --git a/kernel/module.c b/kernel/module.c index f3cba93ea1e..d0b5f8db11b 100644 --- a/kernel/module.c +++ b/kernel/module.c @@ -1,6 +1,6 @@ /* Copyright (C) 2002 Richard Henderson - Copyright (C) 2001 Rusty Russell, 2002 Rusty Russell IBM. + Copyright (C) 2001 Rusty Russell, 2002, 2010 Rusty Russell IBM. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -2523,12 +2523,14 @@ static void module_deallocate(struct module *mod, struct load_info *info) static int post_relocation(struct module *mod, const struct load_info *info) { + /* Sort exception table now relocations are done. */ sort_extable(mod->extable, mod->extable + mod->num_exentries); /* Copy relocated percpu area over. */ percpu_modcopy(mod, (void *)info->sechdrs[info->index.pcpu].sh_addr, info->sechdrs[info->index.pcpu].sh_size); + /* Setup kallsyms-specific fields. */ add_kallsyms(mod, info); /* Arch-specific module finalizing. */ @@ -2537,7 +2539,7 @@ static int post_relocation(struct module *mod, const struct load_info *info) /* Allocate and load the module: note that size of section 0 is always zero, and we rely on this for optional sections. */ -static noinline struct module *load_module(void __user *umod, +static struct module *load_module(void __user *umod, unsigned long len, const char __user *uargs) { @@ -2598,6 +2600,7 @@ static noinline struct module *load_module(void __user *umod, goto free_arch_cleanup; } + /* Mark state as coming so strong_try_module_get() ignores us. */ mod->state = MODULE_STATE_COMING; /* Now sew it into the lists so we can get lockdep and oops @@ -2625,10 +2628,12 @@ static noinline struct module *load_module(void __user *umod, list_add_rcu(&mod->list, &modules); mutex_unlock(&module_mutex); + /* Module is ready to execute: parsing args may do that. */ err = parse_args(mod->name, mod->args, mod->kp, mod->num_kp, NULL); if (err < 0) goto unlink; + /* Link in to syfs. */ err = mod_sysfs_setup(mod, &info, mod->kp, mod->num_kp); if (err < 0) goto unlink; @@ -2637,9 +2642,8 @@ static noinline struct module *load_module(void __user *umod, kfree(info.strmap); free_copy(&info); - trace_module_load(mod); - /* Done! */ + trace_module_load(mod); return mod; unlink: -- cgit v1.2.3-70-g09d2 From bf5438fca2950b03c21ad868090cc1a8fcd49536 Mon Sep 17 00:00:00 2001 From: Jason Baron Date: Fri, 17 Sep 2010 11:09:00 -0400 Subject: jump label: Base patch for jump label base patch to implement 'jump labeling'. Based on a new 'asm goto' inline assembly gcc mechanism, we can now branch to labels from an 'asm goto' statment. This allows us to create a 'no-op' fastpath, which can subsequently be patched with a jump to the slowpath code. This is useful for code which might be rarely used, but which we'd like to be able to call, if needed. Tracepoints are the current usecase that these are being implemented for. Acked-by: David S. Miller Signed-off-by: Jason Baron LKML-Reference: [ cleaned up some formating ] Signed-off-by: Steven Rostedt --- Makefile | 5 + arch/Kconfig | 3 + arch/x86/include/asm/alternative.h | 3 +- arch/x86/kernel/alternative.c | 2 +- include/asm-generic/vmlinux.lds.h | 10 ++ include/linux/jump_label.h | 58 +++++++ include/linux/module.h | 5 +- kernel/Makefile | 2 +- kernel/jump_label.c | 346 +++++++++++++++++++++++++++++++++++++ kernel/kprobes.c | 1 + kernel/module.c | 6 + scripts/gcc-goto.sh | 5 + 12 files changed, 442 insertions(+), 4 deletions(-) create mode 100644 include/linux/jump_label.h create mode 100644 kernel/jump_label.c create mode 100644 scripts/gcc-goto.sh (limited to 'kernel/module.c') diff --git a/Makefile b/Makefile index 92ab33f16cf..a906378d505 100644 --- a/Makefile +++ b/Makefile @@ -591,6 +591,11 @@ KBUILD_CFLAGS += $(call cc-option,-fno-strict-overflow) # conserve stack if available KBUILD_CFLAGS += $(call cc-option,-fconserve-stack) +# check for 'asm goto' +ifeq ($(shell $(CONFIG_SHELL) $(srctree)/scripts/gcc-goto.sh $(CC)), y) + KBUILD_CFLAGS += -DCC_HAVE_ASM_GOTO +endif + # Add user supplied CPPFLAGS, AFLAGS and CFLAGS as the last assignments # But warn user when we do so warn-assign = \ diff --git a/arch/Kconfig b/arch/Kconfig index 4877a8c8ee1..1462d8492d8 100644 --- a/arch/Kconfig +++ b/arch/Kconfig @@ -158,4 +158,7 @@ config HAVE_PERF_EVENTS_NMI subsystem. Also has support for calculating CPU cycle events to determine how many clock cycles in a given period. +config HAVE_ARCH_JUMP_LABEL + bool + source "kernel/gcov/Kconfig" diff --git a/arch/x86/include/asm/alternative.h b/arch/x86/include/asm/alternative.h index 634bf782dca..76561d20ea2 100644 --- a/arch/x86/include/asm/alternative.h +++ b/arch/x86/include/asm/alternative.h @@ -4,6 +4,7 @@ #include #include #include +#include #include /* @@ -182,7 +183,7 @@ extern void *text_poke_early(void *addr, const void *opcode, size_t len); extern void *text_poke(void *addr, const void *opcode, size_t len); extern void *text_poke_smp(void *addr, const void *opcode, size_t len); -#if defined(CONFIG_DYNAMIC_FTRACE) +#if defined(CONFIG_DYNAMIC_FTRACE) || defined(HAVE_JUMP_LABEL) #define IDEAL_NOP_SIZE_5 5 extern unsigned char ideal_nop5[IDEAL_NOP_SIZE_5]; extern void arch_init_ideal_nop5(void); diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c index 083bd010d92..cb0e6d385f6 100644 --- a/arch/x86/kernel/alternative.c +++ b/arch/x86/kernel/alternative.c @@ -641,7 +641,7 @@ void *__kprobes text_poke_smp(void *addr, const void *opcode, size_t len) return addr; } -#if defined(CONFIG_DYNAMIC_FTRACE) +#if defined(CONFIG_DYNAMIC_FTRACE) || defined(HAVE_JUMP_LABEL) unsigned char ideal_nop5[IDEAL_NOP_SIZE_5]; diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h index 8a92a170fb7..ef2af9948ea 100644 --- a/include/asm-generic/vmlinux.lds.h +++ b/include/asm-generic/vmlinux.lds.h @@ -220,6 +220,8 @@ \ BUG_TABLE \ \ + JUMP_TABLE \ + \ /* PCI quirks */ \ .pci_fixup : AT(ADDR(.pci_fixup) - LOAD_OFFSET) { \ VMLINUX_SYMBOL(__start_pci_fixups_early) = .; \ @@ -563,6 +565,14 @@ #define BUG_TABLE #endif +#define JUMP_TABLE \ + . = ALIGN(8); \ + __jump_table : AT(ADDR(__jump_table) - LOAD_OFFSET) { \ + VMLINUX_SYMBOL(__start___jump_table) = .; \ + *(__jump_table) \ + VMLINUX_SYMBOL(__stop___jump_table) = .; \ + } + #ifdef CONFIG_PM_TRACE #define TRACEDATA \ . = ALIGN(4); \ diff --git a/include/linux/jump_label.h b/include/linux/jump_label.h new file mode 100644 index 00000000000..de58656d28e --- /dev/null +++ b/include/linux/jump_label.h @@ -0,0 +1,58 @@ +#ifndef _LINUX_JUMP_LABEL_H +#define _LINUX_JUMP_LABEL_H + +#if defined(CC_HAVE_ASM_GOTO) && defined(CONFIG_HAVE_ARCH_JUMP_LABEL) +# include +# define HAVE_JUMP_LABEL +#endif + +enum jump_label_type { + JUMP_LABEL_ENABLE, + JUMP_LABEL_DISABLE +}; + +struct module; + +#ifdef HAVE_JUMP_LABEL + +extern struct jump_entry __start___jump_table[]; +extern struct jump_entry __stop___jump_table[]; + +extern void arch_jump_label_transform(struct jump_entry *entry, + enum jump_label_type type); +extern void jump_label_update(unsigned long key, enum jump_label_type type); +extern void jump_label_apply_nops(struct module *mod); +extern void arch_jump_label_text_poke_early(jump_label_t addr); + +#define enable_jump_label(key) \ + jump_label_update((unsigned long)key, JUMP_LABEL_ENABLE); + +#define disable_jump_label(key) \ + jump_label_update((unsigned long)key, JUMP_LABEL_DISABLE); + +#else + +#define JUMP_LABEL(key, label) \ +do { \ + if (unlikely(*key)) \ + goto label; \ +} while (0) + +#define enable_jump_label(cond_var) \ +do { \ + *(cond_var) = 1; \ +} while (0) + +#define disable_jump_label(cond_var) \ +do { \ + *(cond_var) = 0; \ +} while (0) + +static inline int jump_label_apply_nops(struct module *mod) +{ + return 0; +} + +#endif + +#endif diff --git a/include/linux/module.h b/include/linux/module.h index 8a6b9fdc7ff..403ac26023c 100644 --- a/include/linux/module.h +++ b/include/linux/module.h @@ -350,7 +350,10 @@ struct module struct tracepoint *tracepoints; unsigned int num_tracepoints; #endif - +#ifdef HAVE_JUMP_LABEL + struct jump_entry *jump_entries; + unsigned int num_jump_entries; +#endif #ifdef CONFIG_TRACING const char **trace_bprintk_fmt_start; unsigned int num_trace_bprintk_fmt; diff --git a/kernel/Makefile b/kernel/Makefile index 0b72d1a74be..d52b473c99a 100644 --- a/kernel/Makefile +++ b/kernel/Makefile @@ -10,7 +10,7 @@ obj-y = sched.o fork.o exec_domain.o panic.o printk.o \ kthread.o wait.o kfifo.o sys_ni.o posix-cpu-timers.o mutex.o \ hrtimer.o rwsem.o nsproxy.o srcu.o semaphore.o \ notifier.o ksysfs.o pm_qos_params.o sched_clock.o cred.o \ - async.o range.o + async.o range.o jump_label.o obj-$(CONFIG_HAVE_EARLY_RES) += early_res.o obj-y += groups.o diff --git a/kernel/jump_label.c b/kernel/jump_label.c new file mode 100644 index 00000000000..460fd40112b --- /dev/null +++ b/kernel/jump_label.c @@ -0,0 +1,346 @@ +/* + * jump label support + * + * Copyright (C) 2009 Jason Baron + * + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#ifdef HAVE_JUMP_LABEL + +#define JUMP_LABEL_HASH_BITS 6 +#define JUMP_LABEL_TABLE_SIZE (1 << JUMP_LABEL_HASH_BITS) +static struct hlist_head jump_label_table[JUMP_LABEL_TABLE_SIZE]; + +/* mutex to protect coming/going of the the jump_label table */ +static DEFINE_MUTEX(jump_label_mutex); + +struct jump_label_entry { + struct hlist_node hlist; + struct jump_entry *table; + int nr_entries; + /* hang modules off here */ + struct hlist_head modules; + unsigned long key; +}; + +struct jump_label_module_entry { + struct hlist_node hlist; + struct jump_entry *table; + int nr_entries; + struct module *mod; +}; + +static int jump_label_cmp(const void *a, const void *b) +{ + const struct jump_entry *jea = a; + const struct jump_entry *jeb = b; + + if (jea->key < jeb->key) + return -1; + + if (jea->key > jeb->key) + return 1; + + return 0; +} + +static void +sort_jump_label_entries(struct jump_entry *start, struct jump_entry *stop) +{ + unsigned long size; + + size = (((unsigned long)stop - (unsigned long)start) + / sizeof(struct jump_entry)); + sort(start, size, sizeof(struct jump_entry), jump_label_cmp, NULL); +} + +static struct jump_label_entry *get_jump_label_entry(jump_label_t key) +{ + struct hlist_head *head; + struct hlist_node *node; + struct jump_label_entry *e; + u32 hash = jhash((void *)&key, sizeof(jump_label_t), 0); + + head = &jump_label_table[hash & (JUMP_LABEL_TABLE_SIZE - 1)]; + hlist_for_each_entry(e, node, head, hlist) { + if (key == e->key) + return e; + } + return NULL; +} + +static struct jump_label_entry * +add_jump_label_entry(jump_label_t key, int nr_entries, struct jump_entry *table) +{ + struct hlist_head *head; + struct jump_label_entry *e; + u32 hash; + + e = get_jump_label_entry(key); + if (e) + return ERR_PTR(-EEXIST); + + e = kmalloc(sizeof(struct jump_label_entry), GFP_KERNEL); + if (!e) + return ERR_PTR(-ENOMEM); + + hash = jhash((void *)&key, sizeof(jump_label_t), 0); + head = &jump_label_table[hash & (JUMP_LABEL_TABLE_SIZE - 1)]; + e->key = key; + e->table = table; + e->nr_entries = nr_entries; + INIT_HLIST_HEAD(&(e->modules)); + hlist_add_head(&e->hlist, head); + return e; +} + +static int +build_jump_label_hashtable(struct jump_entry *start, struct jump_entry *stop) +{ + struct jump_entry *iter, *iter_begin; + struct jump_label_entry *entry; + int count; + + sort_jump_label_entries(start, stop); + iter = start; + while (iter < stop) { + entry = get_jump_label_entry(iter->key); + if (!entry) { + iter_begin = iter; + count = 0; + while ((iter < stop) && + (iter->key == iter_begin->key)) { + iter++; + count++; + } + entry = add_jump_label_entry(iter_begin->key, + count, iter_begin); + if (IS_ERR(entry)) + return PTR_ERR(entry); + } else { + WARN_ONCE(1, KERN_ERR "build_jump_hashtable: unexpected entry!\n"); + return -1; + } + } + return 0; +} + +/*** + * jump_label_update - update jump label text + * @key - key value associated with a a jump label + * @type - enum set to JUMP_LABEL_ENABLE or JUMP_LABEL_DISABLE + * + * Will enable/disable the jump for jump label @key, depending on the + * value of @type. + * + */ + +void jump_label_update(unsigned long key, enum jump_label_type type) +{ + struct jump_entry *iter; + struct jump_label_entry *entry; + struct hlist_node *module_node; + struct jump_label_module_entry *e_module; + int count; + + mutex_lock(&jump_label_mutex); + entry = get_jump_label_entry((jump_label_t)key); + if (entry) { + count = entry->nr_entries; + iter = entry->table; + while (count--) { + if (kernel_text_address(iter->code)) + arch_jump_label_transform(iter, type); + iter++; + } + /* eanble/disable jump labels in modules */ + hlist_for_each_entry(e_module, module_node, &(entry->modules), + hlist) { + count = e_module->nr_entries; + iter = e_module->table; + while (count--) { + if (kernel_text_address(iter->code)) + arch_jump_label_transform(iter, type); + iter++; + } + } + } + mutex_unlock(&jump_label_mutex); +} + +static __init int init_jump_label(void) +{ + int ret; + struct jump_entry *iter_start = __start___jump_table; + struct jump_entry *iter_stop = __stop___jump_table; + struct jump_entry *iter; + + mutex_lock(&jump_label_mutex); + ret = build_jump_label_hashtable(__start___jump_table, + __stop___jump_table); + iter = iter_start; + while (iter < iter_stop) { + arch_jump_label_text_poke_early(iter->code); + iter++; + } + mutex_unlock(&jump_label_mutex); + return ret; +} +early_initcall(init_jump_label); + +#ifdef CONFIG_MODULES + +static struct jump_label_module_entry * +add_jump_label_module_entry(struct jump_label_entry *entry, + struct jump_entry *iter_begin, + int count, struct module *mod) +{ + struct jump_label_module_entry *e; + + e = kmalloc(sizeof(struct jump_label_module_entry), GFP_KERNEL); + if (!e) + return ERR_PTR(-ENOMEM); + e->mod = mod; + e->nr_entries = count; + e->table = iter_begin; + hlist_add_head(&e->hlist, &entry->modules); + return e; +} + +static int add_jump_label_module(struct module *mod) +{ + struct jump_entry *iter, *iter_begin; + struct jump_label_entry *entry; + struct jump_label_module_entry *module_entry; + int count; + + /* if the module doesn't have jump label entries, just return */ + if (!mod->num_jump_entries) + return 0; + + sort_jump_label_entries(mod->jump_entries, + mod->jump_entries + mod->num_jump_entries); + iter = mod->jump_entries; + while (iter < mod->jump_entries + mod->num_jump_entries) { + entry = get_jump_label_entry(iter->key); + iter_begin = iter; + count = 0; + while ((iter < mod->jump_entries + mod->num_jump_entries) && + (iter->key == iter_begin->key)) { + iter++; + count++; + } + if (!entry) { + entry = add_jump_label_entry(iter_begin->key, 0, NULL); + if (IS_ERR(entry)) + return PTR_ERR(entry); + } + module_entry = add_jump_label_module_entry(entry, iter_begin, + count, mod); + if (IS_ERR(module_entry)) + return PTR_ERR(module_entry); + } + return 0; +} + +static void remove_jump_label_module(struct module *mod) +{ + struct hlist_head *head; + struct hlist_node *node, *node_next, *module_node, *module_node_next; + struct jump_label_entry *e; + struct jump_label_module_entry *e_module; + int i; + + /* if the module doesn't have jump label entries, just return */ + if (!mod->num_jump_entries) + return; + + for (i = 0; i < JUMP_LABEL_TABLE_SIZE; i++) { + head = &jump_label_table[i]; + hlist_for_each_entry_safe(e, node, node_next, head, hlist) { + hlist_for_each_entry_safe(e_module, module_node, + module_node_next, + &(e->modules), hlist) { + if (e_module->mod == mod) { + hlist_del(&e_module->hlist); + kfree(e_module); + } + } + if (hlist_empty(&e->modules) && (e->nr_entries == 0)) { + hlist_del(&e->hlist); + kfree(e); + } + } + } +} + +static int +jump_label_module_notify(struct notifier_block *self, unsigned long val, + void *data) +{ + struct module *mod = data; + int ret = 0; + + switch (val) { + case MODULE_STATE_COMING: + mutex_lock(&jump_label_mutex); + ret = add_jump_label_module(mod); + if (ret) + remove_jump_label_module(mod); + mutex_unlock(&jump_label_mutex); + break; + case MODULE_STATE_GOING: + mutex_lock(&jump_label_mutex); + remove_jump_label_module(mod); + mutex_unlock(&jump_label_mutex); + break; + } + return ret; +} + +/*** + * apply_jump_label_nops - patch module jump labels with arch_get_jump_label_nop() + * @mod: module to patch + * + * Allow for run-time selection of the optimal nops. Before the module + * loads patch these with arch_get_jump_label_nop(), which is specified by + * the arch specific jump label code. + */ +void jump_label_apply_nops(struct module *mod) +{ + struct jump_entry *iter; + + /* if the module doesn't have jump label entries, just return */ + if (!mod->num_jump_entries) + return; + + iter = mod->jump_entries; + while (iter < mod->jump_entries + mod->num_jump_entries) { + arch_jump_label_text_poke_early(iter->code); + iter++; + } +} + +struct notifier_block jump_label_module_nb = { + .notifier_call = jump_label_module_notify, + .priority = 0, +}; + +static __init int init_jump_label_module(void) +{ + return register_module_notifier(&jump_label_module_nb); +} +early_initcall(init_jump_label_module); + +#endif /* CONFIG_MODULES */ + +#endif diff --git a/kernel/kprobes.c b/kernel/kprobes.c index 6dd5359e1f0..18904e42a91 100644 --- a/kernel/kprobes.c +++ b/kernel/kprobes.c @@ -47,6 +47,7 @@ #include #include #include +#include #include #include diff --git a/kernel/module.c b/kernel/module.c index d0b5f8db11b..eba134157ef 100644 --- a/kernel/module.c +++ b/kernel/module.c @@ -55,6 +55,7 @@ #include #include #include +#include #define CREATE_TRACE_POINTS #include @@ -2308,6 +2309,11 @@ static void find_module_sections(struct module *mod, struct load_info *info) sizeof(*mod->tracepoints), &mod->num_tracepoints); #endif +#ifdef HAVE_JUMP_LABEL + mod->jump_entries = section_objs(info, "__jump_table", + sizeof(*mod->jump_entries), + &mod->num_jump_entries); +#endif #ifdef CONFIG_EVENT_TRACING mod->trace_events = section_objs(info, "_ftrace_events", sizeof(*mod->trace_events), diff --git a/scripts/gcc-goto.sh b/scripts/gcc-goto.sh new file mode 100644 index 00000000000..8e82424be7a --- /dev/null +++ b/scripts/gcc-goto.sh @@ -0,0 +1,5 @@ +#!/bin/sh +# Test for gcc 'asm goto' suport +# Copyright (C) 2010, Jason Baron + +echo "int main(void) { entry: asm goto (\"\"::::entry); return 0; }" | $1 -x c - -c -o /dev/null >/dev/null 2>&1 && echo "y" -- cgit v1.2.3-70-g09d2 From 5336377d6225959624146629ce3fc88ee8ecda3d Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Tue, 5 Oct 2010 11:29:27 -0700 Subject: modules: Fix module_bug_list list corruption race With all the recent module loading cleanups, we've minimized the code that sits under module_mutex, fixing various deadlocks and making it possible to do most of the module loading in parallel. However, that whole conversion totally missed the rather obscure code that adds a new module to the list for BUG() handling. That code was doubly obscure because (a) the code itself lives in lib/bugs.c (for dubious reasons) and (b) it gets called from the architecture-specific "module_finalize()" rather than from generic code. Calling it from arch-specific code makes no sense what-so-ever to begin with, and is now actively wrong since that code isn't protected by the module loading lock any more. So this commit moves the "module_bug_{finalize,cleanup}()" calls away from the arch-specific code, and into the generic code - and in the process protects it with the module_mutex so that the list operations are now safe. Future fixups: - move the module list handling code into kernel/module.c where it belongs. - get rid of 'module_bug_list' and just use the regular list of modules (called 'modules' - imagine that) that we already create and maintain for other reasons. Reported-and-tested-by: Thomas Gleixner Cc: Rusty Russell Cc: Adrian Bunk Cc: Andrew Morton Cc: stable@kernel.org Signed-off-by: Linus Torvalds --- arch/avr32/kernel/module.c | 3 +-- arch/h8300/kernel/module.c | 3 +-- arch/mn10300/kernel/module.c | 3 +-- arch/parisc/kernel/module.c | 3 +-- arch/powerpc/kernel/module.c | 5 ----- arch/s390/kernel/module.c | 3 +-- arch/sh/kernel/module.c | 2 -- arch/x86/kernel/module.c | 3 +-- include/linux/module.h | 5 ++--- kernel/module.c | 4 ++++ lib/bug.c | 6 ++---- 11 files changed, 14 insertions(+), 26 deletions(-) (limited to 'kernel/module.c') diff --git a/arch/avr32/kernel/module.c b/arch/avr32/kernel/module.c index 98f94d041d9..a727f54d64d 100644 --- a/arch/avr32/kernel/module.c +++ b/arch/avr32/kernel/module.c @@ -314,10 +314,9 @@ int module_finalize(const Elf_Ehdr *hdr, const Elf_Shdr *sechdrs, vfree(module->arch.syminfo); module->arch.syminfo = NULL; - return module_bug_finalize(hdr, sechdrs, module); + return 0; } void module_arch_cleanup(struct module *module) { - module_bug_cleanup(module); } diff --git a/arch/h8300/kernel/module.c b/arch/h8300/kernel/module.c index 0865e291c20..db4953dc4e1 100644 --- a/arch/h8300/kernel/module.c +++ b/arch/h8300/kernel/module.c @@ -112,10 +112,9 @@ int module_finalize(const Elf_Ehdr *hdr, const Elf_Shdr *sechdrs, struct module *me) { - return module_bug_finalize(hdr, sechdrs, me); + return 0; } void module_arch_cleanup(struct module *mod) { - module_bug_cleanup(mod); } diff --git a/arch/mn10300/kernel/module.c b/arch/mn10300/kernel/module.c index 6aea7fd7699..196a111e2e2 100644 --- a/arch/mn10300/kernel/module.c +++ b/arch/mn10300/kernel/module.c @@ -206,7 +206,7 @@ int module_finalize(const Elf_Ehdr *hdr, const Elf_Shdr *sechdrs, struct module *me) { - return module_bug_finalize(hdr, sechdrs, me); + return 0; } /* @@ -214,5 +214,4 @@ int module_finalize(const Elf_Ehdr *hdr, */ void module_arch_cleanup(struct module *mod) { - module_bug_cleanup(mod); } diff --git a/arch/parisc/kernel/module.c b/arch/parisc/kernel/module.c index 159a2b81e90..6e81bb596e5 100644 --- a/arch/parisc/kernel/module.c +++ b/arch/parisc/kernel/module.c @@ -941,11 +941,10 @@ int module_finalize(const Elf_Ehdr *hdr, nsyms = newptr - (Elf_Sym *)symhdr->sh_addr; DEBUGP("NEW num_symtab %lu\n", nsyms); symhdr->sh_size = nsyms * sizeof(Elf_Sym); - return module_bug_finalize(hdr, sechdrs, me); + return 0; } void module_arch_cleanup(struct module *mod) { deregister_unwind_table(mod); - module_bug_cleanup(mod); } diff --git a/arch/powerpc/kernel/module.c b/arch/powerpc/kernel/module.c index 477c663e014..4ef93ae2235 100644 --- a/arch/powerpc/kernel/module.c +++ b/arch/powerpc/kernel/module.c @@ -65,10 +65,6 @@ int module_finalize(const Elf_Ehdr *hdr, const Elf_Shdr *sect; int err; - err = module_bug_finalize(hdr, sechdrs, me); - if (err) - return err; - /* Apply feature fixups */ sect = find_section(hdr, sechdrs, "__ftr_fixup"); if (sect != NULL) @@ -101,5 +97,4 @@ int module_finalize(const Elf_Ehdr *hdr, void module_arch_cleanup(struct module *mod) { - module_bug_cleanup(mod); } diff --git a/arch/s390/kernel/module.c b/arch/s390/kernel/module.c index 22cfd634c35..f7167ee4604 100644 --- a/arch/s390/kernel/module.c +++ b/arch/s390/kernel/module.c @@ -407,10 +407,9 @@ int module_finalize(const Elf_Ehdr *hdr, { vfree(me->arch.syminfo); me->arch.syminfo = NULL; - return module_bug_finalize(hdr, sechdrs, me); + return 0; } void module_arch_cleanup(struct module *mod) { - module_bug_cleanup(mod); } diff --git a/arch/sh/kernel/module.c b/arch/sh/kernel/module.c index 43adddfe4c0..ae0be697a89 100644 --- a/arch/sh/kernel/module.c +++ b/arch/sh/kernel/module.c @@ -149,13 +149,11 @@ int module_finalize(const Elf_Ehdr *hdr, int ret = 0; ret |= module_dwarf_finalize(hdr, sechdrs, me); - ret |= module_bug_finalize(hdr, sechdrs, me); return ret; } void module_arch_cleanup(struct module *mod) { - module_bug_cleanup(mod); module_dwarf_cleanup(mod); } diff --git a/arch/x86/kernel/module.c b/arch/x86/kernel/module.c index e0bc186d750..1c355c55096 100644 --- a/arch/x86/kernel/module.c +++ b/arch/x86/kernel/module.c @@ -239,11 +239,10 @@ int module_finalize(const Elf_Ehdr *hdr, apply_paravirt(pseg, pseg + para->sh_size); } - return module_bug_finalize(hdr, sechdrs, me); + return 0; } void module_arch_cleanup(struct module *mod) { alternatives_smp_module_del(mod); - module_bug_cleanup(mod); } diff --git a/include/linux/module.h b/include/linux/module.h index 8a6b9fdc7ff..aace066bad8 100644 --- a/include/linux/module.h +++ b/include/linux/module.h @@ -686,17 +686,16 @@ extern int module_sysfs_initialized; #ifdef CONFIG_GENERIC_BUG -int module_bug_finalize(const Elf_Ehdr *, const Elf_Shdr *, +void module_bug_finalize(const Elf_Ehdr *, const Elf_Shdr *, struct module *); void module_bug_cleanup(struct module *); #else /* !CONFIG_GENERIC_BUG */ -static inline int module_bug_finalize(const Elf_Ehdr *hdr, +static inline void module_bug_finalize(const Elf_Ehdr *hdr, const Elf_Shdr *sechdrs, struct module *mod) { - return 0; } static inline void module_bug_cleanup(struct module *mod) {} #endif /* CONFIG_GENERIC_BUG */ diff --git a/kernel/module.c b/kernel/module.c index d0b5f8db11b..ccd64199184 100644 --- a/kernel/module.c +++ b/kernel/module.c @@ -1537,6 +1537,7 @@ static int __unlink_module(void *_mod) { struct module *mod = _mod; list_del(&mod->list); + module_bug_cleanup(mod); return 0; } @@ -2625,6 +2626,7 @@ static struct module *load_module(void __user *umod, if (err < 0) goto ddebug; + module_bug_finalize(info.hdr, info.sechdrs, mod); list_add_rcu(&mod->list, &modules); mutex_unlock(&module_mutex); @@ -2650,6 +2652,8 @@ static struct module *load_module(void __user *umod, mutex_lock(&module_mutex); /* Unlink carefully: kallsyms could be walking list. */ list_del_rcu(&mod->list); + module_bug_cleanup(mod); + ddebug: if (!mod->taints) dynamic_debug_remove(info.debug); diff --git a/lib/bug.c b/lib/bug.c index 7cdfad88128..19552096d16 100644 --- a/lib/bug.c +++ b/lib/bug.c @@ -72,8 +72,8 @@ static const struct bug_entry *module_find_bug(unsigned long bugaddr) return NULL; } -int module_bug_finalize(const Elf_Ehdr *hdr, const Elf_Shdr *sechdrs, - struct module *mod) +void module_bug_finalize(const Elf_Ehdr *hdr, const Elf_Shdr *sechdrs, + struct module *mod) { char *secstrings; unsigned int i; @@ -97,8 +97,6 @@ int module_bug_finalize(const Elf_Ehdr *hdr, const Elf_Shdr *sechdrs, * could potentially lead to deadlock and thus be counter-productive. */ list_add(&mod->bug_list, &module_bug_list); - - return 0; } void module_bug_cleanup(struct module *mod) -- cgit v1.2.3-70-g09d2 From abbce906d05ec37289cd0c3b4e35b2db26eab19b Mon Sep 17 00:00:00 2001 From: Michał Mirosław Date: Mon, 20 Sep 2010 01:58:08 +0200 Subject: (trivial) Fix compiler warning in kernel/modules.c MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Building with CONFIG_KALLSYMS=n gives following warning: /mnt/src/linux-git/kernel/module.c: In function ‘post_relocation’: /mnt/src/linux-git/kernel/module.c:2534:2: warning: passing argument 2 of ‘add_kallsyms’ discards qualifiers from pointer target type /mnt/src/linux-git/kernel/module.c:2038:13: note: expected ‘struct load_info *’ but argument is of type ‘const struct load_info *’ Signed-off-by: Michał Mirosław Signed-off-by: Rusty Russell --- kernel/module.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'kernel/module.c') diff --git a/kernel/module.c b/kernel/module.c index 2df46301a7a..437a74a7524 100644 --- a/kernel/module.c +++ b/kernel/module.c @@ -2037,7 +2037,7 @@ static inline void layout_symtab(struct module *mod, struct load_info *info) { } -static void add_kallsyms(struct module *mod, struct load_info *info) +static void add_kallsyms(struct module *mod, const struct load_info *info) { } #endif /* CONFIG_KALLSYMS */ -- cgit v1.2.3-70-g09d2 From 13b9b6e746d753d43270a78dd39694912646b5d9 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Wed, 10 Nov 2010 22:19:24 -0500 Subject: tracing: Fix module use of trace_bprintk() On use of trace_printk() there's a macro that determines if the format is static or a variable. If it is static, it defaults to __trace_bprintk() otherwise it uses __trace_printk(). A while ago, Lai Jiangshan added __trace_bprintk(). In that patch, we discussed a way to allow modules to use it. The difference between __trace_bprintk() and __trace_printk() is that for faster processing, just the format and args are stored in the trace instead of running it through a sprintf function. In order to do this, the format used by the __trace_bprintk() had to be persistent. See commit 1ba28e02a18cbdbea123836f6c98efb09cbf59ec The problem comes with trace_bprintk() where the module is unloaded. The pointer left in the buffer is still pointing to the format. To solve this issue, the formats in the module were copied into kernel core. If the same format was used, they would use the same copy (to prevent memory leak). This all worked well until we tried to merge everything. At the time this was written, Lai Jiangshan, Frederic Weisbecker, Ingo Molnar and myself were all touching the same code. When this was merged, we lost the part of it that was in module.c. This kept out the copying of the formats and unloading the module could cause bad pointers left in the ring buffer. This patch adds back (with updates required for current kernel) the module code that sets up the necessary pointers. Cc: Lai Jiangshan Cc: Rusty Russell Signed-off-by: Steven Rostedt --- kernel/module.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) (limited to 'kernel/module.c') diff --git a/kernel/module.c b/kernel/module.c index 437a74a7524..d190664f25f 100644 --- a/kernel/module.c +++ b/kernel/module.c @@ -2326,6 +2326,18 @@ static void find_module_sections(struct module *mod, struct load_info *info) kmemleak_scan_area(mod->trace_events, sizeof(*mod->trace_events) * mod->num_trace_events, GFP_KERNEL); #endif +#ifdef CONFIG_TRACING + mod->trace_bprintk_fmt_start = section_objs(info, "__trace_printk_fmt", + sizeof(*mod->trace_bprintk_fmt_start), + &mod->num_trace_bprintk_fmt); + /* + * This section contains pointers to allocated objects in the trace + * code and not scanning it leads to false positives. + */ + kmemleak_scan_area(mod->trace_bprintk_fmt_start, + sizeof(*mod->trace_bprintk_fmt_start) * + mod->num_trace_bprintk_fmt, GFP_KERNEL); +#endif #ifdef CONFIG_FTRACE_MCOUNT_RECORD /* sechdrs[0].sh_size is always zero */ mod->ftrace_callsites = section_objs(info, "__mcount_loc", -- cgit v1.2.3-70-g09d2 From 84e1c6bb38eb318e456558b610396d9f1afaabf0 Mon Sep 17 00:00:00 2001 From: matthieu castet Date: Tue, 16 Nov 2010 22:35:16 +0100 Subject: x86: Add RO/NX protection for loadable kernel modules This patch is a logical extension of the protection provided by CONFIG_DEBUG_RODATA to LKMs. The protection is provided by splitting module_core and module_init into three logical parts each and setting appropriate page access permissions for each individual section: 1. Code: RO+X 2. RO data: RO+NX 3. RW data: RW+NX In order to achieve proper protection, layout_sections() have been modified to align each of the three parts mentioned above onto page boundary. Next, the corresponding page access permissions are set right before successful exit from load_module(). Further, free_module() and sys_init_module have been modified to set module_core and module_init as RW+NX right before calling module_free(). By default, the original section layout and access flags are preserved. When compiled with CONFIG_DEBUG_SET_MODULE_RONX=y, the patch will page-align each group of sections to ensure that each page contains only one type of content and will enforce RO/NX for each group of pages. -v1: Initial proof-of-concept patch. -v2: The patch have been re-written to reduce the number of #ifdefs and to make it architecture-agnostic. Code formatting has also been corrected. -v3: Opportunistic RO/NX protection is now unconditional. Section page-alignment is enabled when CONFIG_DEBUG_RODATA=y. -v4: Removed most macros and improved coding style. -v5: Changed page-alignment and RO/NX section size calculation -v6: Fixed comments. Restricted RO/NX enforcement to x86 only -v7: Introduced CONFIG_DEBUG_SET_MODULE_RONX, added calls to set_all_modules_text_rw() and set_all_modules_text_ro() in ftrace -v8: updated for compatibility with linux 2.6.33-rc5 -v9: coding style fixes -v10: more coding style fixes -v11: minor adjustments for -tip -v12: minor adjustments for v2.6.35-rc2-tip -v13: minor adjustments for v2.6.37-rc1-tip Signed-off-by: Siarhei Liakh Signed-off-by: Xuxian Jiang Acked-by: Arjan van de Ven Reviewed-by: James Morris Signed-off-by: H. Peter Anvin Cc: Andi Kleen Cc: Rusty Russell Cc: Stephen Rothwell Cc: Dave Jones Cc: Kees Cook Cc: Linus Torvalds LKML-Reference: <4CE2F914.9070106@free.fr> [ minor cleanliness edits, -v14: build failure fix ] Signed-off-by: Ingo Molnar --- arch/x86/Kconfig.debug | 11 +++ arch/x86/kernel/ftrace.c | 3 + include/linux/module.h | 11 ++- kernel/module.c | 171 ++++++++++++++++++++++++++++++++++++++++++++++- 4 files changed, 193 insertions(+), 3 deletions(-) (limited to 'kernel/module.c') diff --git a/arch/x86/Kconfig.debug b/arch/x86/Kconfig.debug index b59ee765414..45143bbcfe5 100644 --- a/arch/x86/Kconfig.debug +++ b/arch/x86/Kconfig.debug @@ -117,6 +117,17 @@ config DEBUG_RODATA_TEST feature as well as for the change_page_attr() infrastructure. If in doubt, say "N" +config DEBUG_SET_MODULE_RONX + bool "Set loadable kernel module data as NX and text as RO" + depends on MODULES + ---help--- + This option helps catch unintended modifications to loadable + kernel module's text and read-only data. It also prevents execution + of module data. Such protection may interfere with run-time code + patching and dynamic kernel tracing - and they might also protect + against certain classes of kernel exploits. + If in doubt, say "N". + config DEBUG_NX_TEST tristate "Testcase for the NX non-executable stack feature" depends on DEBUG_KERNEL && m diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c index 3afb33f14d2..298448656b6 100644 --- a/arch/x86/kernel/ftrace.c +++ b/arch/x86/kernel/ftrace.c @@ -19,6 +19,7 @@ #include #include #include +#include #include @@ -49,6 +50,7 @@ static DEFINE_PER_CPU(int, save_modifying_code); int ftrace_arch_code_modify_prepare(void) { set_kernel_text_rw(); + set_all_modules_text_rw(); modifying_code = 1; return 0; } @@ -56,6 +58,7 @@ int ftrace_arch_code_modify_prepare(void) int ftrace_arch_code_modify_post_process(void) { modifying_code = 0; + set_all_modules_text_ro(); set_kernel_text_ro(); return 0; } diff --git a/include/linux/module.h b/include/linux/module.h index b29e7458b96..ddaa689d71b 100644 --- a/include/linux/module.h +++ b/include/linux/module.h @@ -308,6 +308,9 @@ struct module /* The size of the executable code in each section. */ unsigned int init_text_size, core_text_size; + /* Size of RO sections of the module (text+rodata) */ + unsigned int init_ro_size, core_ro_size; + /* Arch-specific module values */ struct mod_arch_specific arch; @@ -672,7 +675,6 @@ static inline int module_get_iter_tracepoints(struct tracepoint_iter *iter) { return 0; } - #endif /* CONFIG_MODULES */ #ifdef CONFIG_SYSFS @@ -687,6 +689,13 @@ extern int module_sysfs_initialized; #define __MODULE_STRING(x) __stringify(x) +#ifdef CONFIG_DEBUG_SET_MODULE_RONX +extern void set_all_modules_text_rw(void); +extern void set_all_modules_text_ro(void); +#else +static inline void set_all_modules_text_rw(void) { } +static inline void set_all_modules_text_ro(void) { } +#endif #ifdef CONFIG_GENERIC_BUG void module_bug_finalize(const Elf_Ehdr *, const Elf_Shdr *, diff --git a/kernel/module.c b/kernel/module.c index 437a74a7524..ba421e6b4ad 100644 --- a/kernel/module.c +++ b/kernel/module.c @@ -56,6 +56,7 @@ #include #include #include +#include #define CREATE_TRACE_POINTS #include @@ -70,6 +71,26 @@ #define ARCH_SHF_SMALL 0 #endif +/* + * Modules' sections will be aligned on page boundaries + * to ensure complete separation of code and data, but + * only when CONFIG_DEBUG_SET_MODULE_RONX=y + */ +#ifdef CONFIG_DEBUG_SET_MODULE_RONX +# define debug_align(X) ALIGN(X, PAGE_SIZE) +#else +# define debug_align(X) (X) +#endif + +/* + * Given BASE and SIZE this macro calculates the number of pages the + * memory regions occupies + */ +#define MOD_NUMBER_OF_PAGES(BASE, SIZE) (((SIZE) > 0) ? \ + (PFN_DOWN((unsigned long)(BASE) + (SIZE) - 1) - \ + PFN_DOWN((unsigned long)BASE) + 1) \ + : (0UL)) + /* If this is set, the section belongs in the init part of the module */ #define INIT_OFFSET_MASK (1UL << (BITS_PER_LONG-1)) @@ -1542,6 +1563,115 @@ static int __unlink_module(void *_mod) return 0; } +#ifdef CONFIG_DEBUG_SET_MODULE_RONX +/* + * LKM RO/NX protection: protect module's text/ro-data + * from modification and any data from execution. + */ +void set_page_attributes(void *start, void *end, int (*set)(unsigned long start, int num_pages)) +{ + unsigned long begin_pfn = PFN_DOWN((unsigned long)start); + unsigned long end_pfn = PFN_DOWN((unsigned long)end); + + if (end_pfn > begin_pfn) + set(begin_pfn << PAGE_SHIFT, end_pfn - begin_pfn); +} + +static void set_section_ro_nx(void *base, + unsigned long text_size, + unsigned long ro_size, + unsigned long total_size) +{ + /* begin and end PFNs of the current subsection */ + unsigned long begin_pfn; + unsigned long end_pfn; + + /* + * Set RO for module text and RO-data: + * - Always protect first page. + * - Do not protect last partial page. + */ + if (ro_size > 0) + set_page_attributes(base, base + ro_size, set_memory_ro); + + /* + * Set NX permissions for module data: + * - Do not protect first partial page. + * - Always protect last page. + */ + if (total_size > text_size) { + begin_pfn = PFN_UP((unsigned long)base + text_size); + end_pfn = PFN_UP((unsigned long)base + total_size); + if (end_pfn > begin_pfn) + set_memory_nx(begin_pfn << PAGE_SHIFT, end_pfn - begin_pfn); + } +} + +/* Setting memory back to RW+NX before releasing it */ +void unset_section_ro_nx(struct module *mod, void *module_region) +{ + unsigned long total_pages; + + if (mod->module_core == module_region) { + /* Set core as NX+RW */ + total_pages = MOD_NUMBER_OF_PAGES(mod->module_core, mod->core_size); + set_memory_nx((unsigned long)mod->module_core, total_pages); + set_memory_rw((unsigned long)mod->module_core, total_pages); + + } else if (mod->module_init == module_region) { + /* Set init as NX+RW */ + total_pages = MOD_NUMBER_OF_PAGES(mod->module_init, mod->init_size); + set_memory_nx((unsigned long)mod->module_init, total_pages); + set_memory_rw((unsigned long)mod->module_init, total_pages); + } +} + +/* Iterate through all modules and set each module's text as RW */ +void set_all_modules_text_rw() +{ + struct module *mod; + + mutex_lock(&module_mutex); + list_for_each_entry_rcu(mod, &modules, list) { + if ((mod->module_core) && (mod->core_text_size)) { + set_page_attributes(mod->module_core, + mod->module_core + mod->core_text_size, + set_memory_rw); + } + if ((mod->module_init) && (mod->init_text_size)) { + set_page_attributes(mod->module_init, + mod->module_init + mod->init_text_size, + set_memory_rw); + } + } + mutex_unlock(&module_mutex); +} + +/* Iterate through all modules and set each module's text as RO */ +void set_all_modules_text_ro() +{ + struct module *mod; + + mutex_lock(&module_mutex); + list_for_each_entry_rcu(mod, &modules, list) { + if ((mod->module_core) && (mod->core_text_size)) { + set_page_attributes(mod->module_core, + mod->module_core + mod->core_text_size, + set_memory_ro); + } + if ((mod->module_init) && (mod->init_text_size)) { + set_page_attributes(mod->module_init, + mod->module_init + mod->init_text_size, + set_memory_ro); + } + } + mutex_unlock(&module_mutex); +} +#else +static inline void set_section_ro_nx(void *base, unsigned long text_size, unsigned long ro_size, unsigned long total_size) { } +static inline void unset_section_ro_nx(struct module *mod, void *module_region) { } +#endif + /* Free a module, remove from lists, etc. */ static void free_module(struct module *mod) { @@ -1566,6 +1696,7 @@ static void free_module(struct module *mod) destroy_params(mod->kp, mod->num_kp); /* This may be NULL, but that's OK */ + unset_section_ro_nx(mod, mod->module_init); module_free(mod, mod->module_init); kfree(mod->args); percpu_modfree(mod); @@ -1574,6 +1705,7 @@ static void free_module(struct module *mod) lockdep_free_key_range(mod->module_core, mod->core_size); /* Finally, free the core (containing the module structure) */ + unset_section_ro_nx(mod, mod->module_core); module_free(mod, mod->module_core); #ifdef CONFIG_MPU @@ -1777,8 +1909,19 @@ static void layout_sections(struct module *mod, struct load_info *info) s->sh_entsize = get_offset(mod, &mod->core_size, s, i); DEBUGP("\t%s\n", name); } - if (m == 0) + switch (m) { + case 0: /* executable */ + mod->core_size = debug_align(mod->core_size); mod->core_text_size = mod->core_size; + break; + case 1: /* RO: text and ro-data */ + mod->core_size = debug_align(mod->core_size); + mod->core_ro_size = mod->core_size; + break; + case 3: /* whole core */ + mod->core_size = debug_align(mod->core_size); + break; + } } DEBUGP("Init section allocation order:\n"); @@ -1796,8 +1939,19 @@ static void layout_sections(struct module *mod, struct load_info *info) | INIT_OFFSET_MASK); DEBUGP("\t%s\n", sname); } - if (m == 0) + switch (m) { + case 0: /* executable */ + mod->init_size = debug_align(mod->init_size); mod->init_text_size = mod->init_size; + break; + case 1: /* RO: text and ro-data */ + mod->init_size = debug_align(mod->init_size); + mod->init_ro_size = mod->init_size; + break; + case 3: /* whole init */ + mod->init_size = debug_align(mod->init_size); + break; + } } } @@ -2650,6 +2804,18 @@ static struct module *load_module(void __user *umod, kfree(info.strmap); free_copy(&info); + /* Set RO and NX regions for core */ + set_section_ro_nx(mod->module_core, + mod->core_text_size, + mod->core_ro_size, + mod->core_size); + + /* Set RO and NX regions for init */ + set_section_ro_nx(mod->module_init, + mod->init_text_size, + mod->init_ro_size, + mod->init_size); + /* Done! */ trace_module_load(mod); return mod; @@ -2753,6 +2919,7 @@ SYSCALL_DEFINE3(init_module, void __user *, umod, mod->symtab = mod->core_symtab; mod->strtab = mod->core_strtab; #endif + unset_section_ro_nx(mod, mod->module_init); module_free(mod, mod->module_init); mod->module_init = NULL; mod->init_size = 0; -- cgit v1.2.3-70-g09d2 From 94462ad3b14739d158a1ab87bb30008c1e5a6bc1 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Mon, 29 Nov 2010 13:15:42 -0500 Subject: module: Move RO/NX module protection to after ftrace module update The commit: 84e1c6bb38eb318e456558b610396d9f1afaabf0 x86: Add RO/NX protection for loadable kernel modules Broke the function tracer with this output: ------------[ cut here ]------------ WARNING: at kernel/trace/ftrace.c:1014 ftrace_bug+0x114/0x171() Hardware name: Precision WorkStation 470 Modules linked in: i2c_core(+) Pid: 86, comm: modprobe Not tainted 2.6.37-rc2+ #68 Call Trace: [] warn_slowpath_common+0x85/0x9d [] ? __process_new_adapter+0x7/0x34 [i2c_core] [] ? __process_new_adapter+0x7/0x34 [i2c_core] [] warn_slowpath_null+0x1a/0x1c [] ftrace_bug+0x114/0x171 [] ? __process_new_adapter+0x7/0x34 [i2c_core] [] ftrace_process_locs+0x1ae/0x274 [] ? __process_new_adapter+0x7/0x34 [i2c_core] [] ftrace_module_notify+0x39/0x44 [] notifier_call_chain+0x37/0x63 [] __blocking_notifier_call_chain+0x46/0x5b [] blocking_notifier_call_chain+0x14/0x16 [] sys_init_module+0x73/0x1f3 [] system_call_fastpath+0x16/0x1b ---[ end trace 2aff4f4ca53ec746 ]--- ftrace faulted on writing [] __process_new_adapter+0x7/0x34 [i2c_core] The cause was that the module text was set to read only before ftrace could convert the calls to mcount to nops. Thus, the conversions failed due to not being able to write to the text locations. The simple fix is to move setting the module to read only after the module notifiers are called (where ftrace sets the module mcounts to nops). Reported-by: Peter Zijlstra Acked-by: Rusty Russell Signed-off-by: Steven Rostedt --- kernel/module.c | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) (limited to 'kernel/module.c') diff --git a/kernel/module.c b/kernel/module.c index 562f665c721..34e00b708fa 100644 --- a/kernel/module.c +++ b/kernel/module.c @@ -2816,18 +2816,6 @@ static struct module *load_module(void __user *umod, kfree(info.strmap); free_copy(&info); - /* Set RO and NX regions for core */ - set_section_ro_nx(mod->module_core, - mod->core_text_size, - mod->core_ro_size, - mod->core_size); - - /* Set RO and NX regions for init */ - set_section_ro_nx(mod->module_init, - mod->init_text_size, - mod->init_ro_size, - mod->init_size); - /* Done! */ trace_module_load(mod); return mod; @@ -2888,6 +2876,18 @@ SYSCALL_DEFINE3(init_module, void __user *, umod, blocking_notifier_call_chain(&module_notify_list, MODULE_STATE_COMING, mod); + /* Set RO and NX regions for core */ + set_section_ro_nx(mod->module_core, + mod->core_text_size, + mod->core_ro_size, + mod->core_size); + + /* Set RO and NX regions for init */ + set_section_ro_nx(mod->module_init, + mod->init_text_size, + mod->init_ro_size, + mod->init_size); + do_mod_ctors(mod); /* Start the module */ if (mod->init != NULL) -- cgit v1.2.3-70-g09d2