diff options
Diffstat (limited to 'kernel/module.c')
| -rw-r--r-- | kernel/module.c | 3456 |
1 files changed, 2355 insertions, 1101 deletions
diff --git a/kernel/module.c b/kernel/module.c index 5d437bffd8d..81e727cf6df 100644 --- a/kernel/module.c +++ b/kernel/module.c @@ -1,6 +1,6 @@ /* Copyright (C) 2002 Richard Henderson - Copyright (C) 2001 Rusty Russell, 2002 Rusty Russell IBM. + Copyright (C) 2001 Rusty Russell, 2002, 2010 Rusty Russell IBM. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -16,15 +16,20 @@ along with this program; if not, write to the Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ -#include <linux/module.h> +#include <linux/export.h> #include <linux/moduleloader.h> +#include <linux/ftrace_event.h> #include <linux/init.h> #include <linux/kallsyms.h> +#include <linux/file.h> +#include <linux/fs.h> #include <linux/sysfs.h> #include <linux/kernel.h> #include <linux/slab.h> #include <linux/vmalloc.h> #include <linux/elf.h> +#include <linux/proc_fs.h> +#include <linux/security.h> #include <linux/seq_file.h> #include <linux/syscalls.h> #include <linux/fcntl.h> @@ -41,36 +46,119 @@ #include <linux/device.h> #include <linux/string.h> #include <linux/mutex.h> -#include <linux/unwind.h> +#include <linux/rculist.h> #include <asm/uaccess.h> -#include <asm/semaphore.h> #include <asm/cacheflush.h> +#include <asm/mmu_context.h> #include <linux/license.h> #include <asm/sections.h> - -#if 0 -#define DEBUGP printk -#else -#define DEBUGP(fmt , a...) -#endif +#include <linux/tracepoint.h> +#include <linux/ftrace.h> +#include <linux/async.h> +#include <linux/percpu.h> +#include <linux/kmemleak.h> +#include <linux/jump_label.h> +#include <linux/pfn.h> +#include <linux/bsearch.h> +#include <linux/fips.h> +#include <uapi/linux/module.h> +#include "module-internal.h" + +#define CREATE_TRACE_POINTS +#include <trace/events/module.h> #ifndef ARCH_SHF_SMALL #define ARCH_SHF_SMALL 0 #endif +/* + * Modules' sections will be aligned on page boundaries + * to ensure complete separation of code and data, but + * only when CONFIG_DEBUG_SET_MODULE_RONX=y + */ +#ifdef CONFIG_DEBUG_SET_MODULE_RONX +# define debug_align(X) ALIGN(X, PAGE_SIZE) +#else +# define debug_align(X) (X) +#endif + +/* + * Given BASE and SIZE this macro calculates the number of pages the + * memory regions occupies + */ +#define MOD_NUMBER_OF_PAGES(BASE, SIZE) (((SIZE) > 0) ? \ + (PFN_DOWN((unsigned long)(BASE) + (SIZE) - 1) - \ + PFN_DOWN((unsigned long)BASE) + 1) \ + : (0UL)) + /* If this is set, the section belongs in the init part of the module */ #define INIT_OFFSET_MASK (1UL << (BITS_PER_LONG-1)) -/* List of modules, protected by module_mutex or preempt_disable - * (add/delete uses stop_machine). */ -static DEFINE_MUTEX(module_mutex); +/* + * Mutex protects: + * 1) List of modules (also safely readable with preempt_disable), + * 2) module_use links, + * 3) module_addr_min/module_addr_max. + * (delete uses stop_machine/add uses RCU list operations). */ +DEFINE_MUTEX(module_mutex); +EXPORT_SYMBOL_GPL(module_mutex); static LIST_HEAD(modules); +#ifdef CONFIG_KGDB_KDB +struct list_head *kdb_modules = &modules; /* kdb needs the list of modules */ +#endif /* CONFIG_KGDB_KDB */ + +#ifdef CONFIG_MODULE_SIG +#ifdef CONFIG_MODULE_SIG_FORCE +static bool sig_enforce = true; +#else +static bool sig_enforce = false; + +static int param_set_bool_enable_only(const char *val, + const struct kernel_param *kp) +{ + int err; + bool test; + struct kernel_param dummy_kp = *kp; + + dummy_kp.arg = &test; + + err = param_set_bool(val, &dummy_kp); + if (err) + return err; + + /* Don't let them unset it once it's set! */ + if (!test && sig_enforce) + return -EROFS; + + if (test) + sig_enforce = true; + return 0; +} + +static const struct kernel_param_ops param_ops_bool_enable_only = { + .flags = KERNEL_PARAM_FL_NOARG, + .set = param_set_bool_enable_only, + .get = param_get_bool, +}; +#define param_check_bool_enable_only param_check_bool + +module_param(sig_enforce, bool_enable_only, 0644); +#endif /* !CONFIG_MODULE_SIG_FORCE */ +#endif /* CONFIG_MODULE_SIG */ + +/* Block module loading/unloading? */ +int modules_disabled = 0; +core_param(nomodule, modules_disabled, bint, 0); /* Waiting for a module to finish initializing? */ static DECLARE_WAIT_QUEUE_HEAD(module_wq); static BLOCKING_NOTIFIER_HEAD(module_notify_list); +/* Bounds of module allocation, for speeding __module_address. + * Protected by module_mutex. */ +static unsigned long module_addr_min = -1UL, module_addr_max = 0; + int register_module_notifier(struct notifier_block * nb) { return blocking_notifier_chain_register(&module_notify_list, nb); @@ -83,10 +171,25 @@ int unregister_module_notifier(struct notifier_block * nb) } EXPORT_SYMBOL(unregister_module_notifier); +struct load_info { + Elf_Ehdr *hdr; + unsigned long len; + Elf_Shdr *sechdrs; + char *secstrings, *strtab; + unsigned long symoffs, stroffs; + struct _ddebug *debug; + unsigned int num_debug; + bool sig_ok; + struct { + unsigned int sym, str, mod, vers, info, pcpu; + } index; +}; + /* We require a truly strong try_module_get(): 0 means failure due to ongoing or failed initialization etc. */ static inline int strong_try_module_get(struct module *mod) { + BUG_ON(mod && mod->state == MODULE_STATE_UNFORMED); if (mod && mod->state == MODULE_STATE_COMING) return -EBUSY; if (try_module_get(mod)) @@ -95,10 +198,11 @@ static inline int strong_try_module_get(struct module *mod) return -ENOENT; } -static inline void add_taint_module(struct module *mod, unsigned flag) +static inline void add_taint_module(struct module *mod, unsigned flag, + enum lockdep_ok lockdep_ok) { - add_taint(flag); - mod->taints |= flag; + add_taint(flag, lockdep_ok); + mod->taints |= (1U << flag); } /* @@ -113,21 +217,40 @@ void __module_put_and_exit(struct module *mod, long code) EXPORT_SYMBOL(__module_put_and_exit); /* Find a module section: 0 means not found. */ -static unsigned int find_sec(Elf_Ehdr *hdr, - Elf_Shdr *sechdrs, - const char *secstrings, - const char *name) +static unsigned int find_sec(const struct load_info *info, const char *name) { unsigned int i; - for (i = 1; i < hdr->e_shnum; i++) + for (i = 1; i < info->hdr->e_shnum; i++) { + Elf_Shdr *shdr = &info->sechdrs[i]; /* Alloc bit cleared means "ignore it." */ - if ((sechdrs[i].sh_flags & SHF_ALLOC) - && strcmp(secstrings+sechdrs[i].sh_name, name) == 0) + if ((shdr->sh_flags & SHF_ALLOC) + && strcmp(info->secstrings + shdr->sh_name, name) == 0) return i; + } return 0; } +/* Find a module section, or NULL. */ +static void *section_addr(const struct load_info *info, const char *name) +{ + /* Section 0 has sh_addr 0. */ + return (void *)info->sechdrs[find_sec(info, name)].sh_addr; +} + +/* Find a module section, or NULL. Fill in number of "objects" in section. */ +static void *section_objs(const struct load_info *info, + const char *name, + size_t object_size, + unsigned int *num) +{ + unsigned int sec = find_sec(info, name); + + /* Section 0 has sh_addr 0 and sh_size 0. */ + *num = info->sechdrs[sec].sh_size / object_size; + return (void *)info->sechdrs[sec].sh_addr; +} + /* Provided by the linker */ extern const struct kernel_symbol __start___ksymtab[]; extern const struct kernel_symbol __stop___ksymtab[]; @@ -135,17 +258,17 @@ extern const struct kernel_symbol __start___ksymtab_gpl[]; extern const struct kernel_symbol __stop___ksymtab_gpl[]; extern const struct kernel_symbol __start___ksymtab_gpl_future[]; extern const struct kernel_symbol __stop___ksymtab_gpl_future[]; +extern const unsigned long __start___kcrctab[]; +extern const unsigned long __start___kcrctab_gpl[]; +extern const unsigned long __start___kcrctab_gpl_future[]; +#ifdef CONFIG_UNUSED_SYMBOLS extern const struct kernel_symbol __start___ksymtab_unused[]; extern const struct kernel_symbol __stop___ksymtab_unused[]; extern const struct kernel_symbol __start___ksymtab_unused_gpl[]; extern const struct kernel_symbol __stop___ksymtab_unused_gpl[]; -extern const struct kernel_symbol __start___ksymtab_gpl_future[]; -extern const struct kernel_symbol __stop___ksymtab_gpl_future[]; -extern const unsigned long __start___kcrctab[]; -extern const unsigned long __start___kcrctab_gpl[]; -extern const unsigned long __start___kcrctab_gpl_future[]; extern const unsigned long __start___kcrctab_unused[]; extern const unsigned long __start___kcrctab_unused_gpl[]; +#endif #ifndef CONFIG_MODVERSIONS #define symversion(base, idx) NULL @@ -153,329 +276,321 @@ extern const unsigned long __start___kcrctab_unused_gpl[]; #define symversion(base, idx) ((base != NULL) ? ((base) + (idx)) : NULL) #endif -/* lookup symbol in given range of kernel_symbols */ -static const struct kernel_symbol *lookup_symbol(const char *name, - const struct kernel_symbol *start, - const struct kernel_symbol *stop) +static bool each_symbol_in_section(const struct symsearch *arr, + unsigned int arrsize, + struct module *owner, + bool (*fn)(const struct symsearch *syms, + struct module *owner, + void *data), + void *data) { - const struct kernel_symbol *ks = start; - for (; ks < stop; ks++) - if (strcmp(ks->name, name) == 0) - return ks; - return NULL; -} + unsigned int j; -static void printk_unused_warning(const char *name) -{ - printk(KERN_WARNING "Symbol %s is marked as UNUSED, " - "however this module is using it.\n", name); - printk(KERN_WARNING "This symbol will go away in the future.\n"); - printk(KERN_WARNING "Please evalute if this is the right api to use, " - "and if it really is, submit a report the linux kernel " - "mailinglist together with submitting your code for " - "inclusion.\n"); + for (j = 0; j < arrsize; j++) { + if (fn(&arr[j], owner, data)) + return true; + } + + return false; } -/* Find a symbol, return value, crc and module which owns it */ -static unsigned long __find_symbol(const char *name, - struct module **owner, - const unsigned long **crc, - int gplok) +/* Returns true as soon as fn returns true, otherwise false. */ +bool each_symbol_section(bool (*fn)(const struct symsearch *arr, + struct module *owner, + void *data), + void *data) { struct module *mod; - const struct kernel_symbol *ks; + static const struct symsearch arr[] = { + { __start___ksymtab, __stop___ksymtab, __start___kcrctab, + NOT_GPL_ONLY, false }, + { __start___ksymtab_gpl, __stop___ksymtab_gpl, + __start___kcrctab_gpl, + GPL_ONLY, false }, + { __start___ksymtab_gpl_future, __stop___ksymtab_gpl_future, + __start___kcrctab_gpl_future, + WILL_BE_GPL_ONLY, false }, +#ifdef CONFIG_UNUSED_SYMBOLS + { __start___ksymtab_unused, __stop___ksymtab_unused, + __start___kcrctab_unused, + NOT_GPL_ONLY, true }, + { __start___ksymtab_unused_gpl, __stop___ksymtab_unused_gpl, + __start___kcrctab_unused_gpl, + GPL_ONLY, true }, +#endif + }; - /* Core kernel first. */ - *owner = NULL; - ks = lookup_symbol(name, __start___ksymtab, __stop___ksymtab); - if (ks) { - *crc = symversion(__start___kcrctab, (ks - __start___ksymtab)); - return ks->value; - } - if (gplok) { - ks = lookup_symbol(name, __start___ksymtab_gpl, - __stop___ksymtab_gpl); - if (ks) { - *crc = symversion(__start___kcrctab_gpl, - (ks - __start___ksymtab_gpl)); - return ks->value; - } + if (each_symbol_in_section(arr, ARRAY_SIZE(arr), NULL, fn, data)) + return true; + + list_for_each_entry_rcu(mod, &modules, list) { + struct symsearch arr[] = { + { mod->syms, mod->syms + mod->num_syms, mod->crcs, + NOT_GPL_ONLY, false }, + { mod->gpl_syms, mod->gpl_syms + mod->num_gpl_syms, + mod->gpl_crcs, + GPL_ONLY, false }, + { mod->gpl_future_syms, + mod->gpl_future_syms + mod->num_gpl_future_syms, + mod->gpl_future_crcs, + WILL_BE_GPL_ONLY, false }, +#ifdef CONFIG_UNUSED_SYMBOLS + { mod->unused_syms, + mod->unused_syms + mod->num_unused_syms, + mod->unused_crcs, + NOT_GPL_ONLY, true }, + { mod->unused_gpl_syms, + mod->unused_gpl_syms + mod->num_unused_gpl_syms, + mod->unused_gpl_crcs, + GPL_ONLY, true }, +#endif + }; + + if (mod->state == MODULE_STATE_UNFORMED) + continue; + + if (each_symbol_in_section(arr, ARRAY_SIZE(arr), mod, fn, data)) + return true; } - ks = lookup_symbol(name, __start___ksymtab_gpl_future, - __stop___ksymtab_gpl_future); - if (ks) { - if (!gplok) { - printk(KERN_WARNING "Symbol %s is being used " - "by a non-GPL module, which will not " - "be allowed in the future\n", name); - printk(KERN_WARNING "Please see the file " - "Documentation/feature-removal-schedule.txt " - "in the kernel source tree for more " - "details.\n"); + return false; +} +EXPORT_SYMBOL_GPL(each_symbol_section); + +struct find_symbol_arg { + /* Input */ + const char *name; + bool gplok; + bool warn; + + /* Output */ + struct module *owner; + const unsigned long *crc; + const struct kernel_symbol *sym; +}; + +static bool check_symbol(const struct symsearch *syms, + struct module *owner, + unsigned int symnum, void *data) +{ + struct find_symbol_arg *fsa = data; + + if (!fsa->gplok) { + if (syms->licence == GPL_ONLY) + return false; + if (syms->licence == WILL_BE_GPL_ONLY && fsa->warn) { + pr_warn("Symbol %s is being used by a non-GPL module, " + "which will not be allowed in the future\n", + fsa->name); } - *crc = symversion(__start___kcrctab_gpl_future, - (ks - __start___ksymtab_gpl_future)); - return ks->value; } - ks = lookup_symbol(name, __start___ksymtab_unused, - __stop___ksymtab_unused); - if (ks) { - printk_unused_warning(name); - *crc = symversion(__start___kcrctab_unused, - (ks - __start___ksymtab_unused)); - return ks->value; +#ifdef CONFIG_UNUSED_SYMBOLS + if (syms->unused && fsa->warn) { + pr_warn("Symbol %s is marked as UNUSED, however this module is " + "using it.\n", fsa->name); + pr_warn("This symbol will go away in the future.\n"); + pr_warn("Please evalute if this is the right api to use and if " + "it really is, submit a report the linux kernel " + "mailinglist together with submitting your code for " + "inclusion.\n"); } +#endif - if (gplok) - ks = lookup_symbol(name, __start___ksymtab_unused_gpl, - __stop___ksymtab_unused_gpl); - if (ks) { - printk_unused_warning(name); - *crc = symversion(__start___kcrctab_unused_gpl, - (ks - __start___ksymtab_unused_gpl)); - return ks->value; - } + fsa->owner = owner; + fsa->crc = symversion(syms->crcs, symnum); + fsa->sym = &syms->start[symnum]; + return true; +} - /* Now try modules. */ - list_for_each_entry(mod, &modules, list) { - *owner = mod; - ks = lookup_symbol(name, mod->syms, mod->syms + mod->num_syms); - if (ks) { - *crc = symversion(mod->crcs, (ks - mod->syms)); - return ks->value; - } +static int cmp_name(const void *va, const void *vb) +{ + const char *a; + const struct kernel_symbol *b; + a = va; b = vb; + return strcmp(a, b->name); +} - if (gplok) { - ks = lookup_symbol(name, mod->gpl_syms, - mod->gpl_syms + mod->num_gpl_syms); - if (ks) { - *crc = symversion(mod->gpl_crcs, - (ks - mod->gpl_syms)); - return ks->value; - } - } - ks = lookup_symbol(name, mod->unused_syms, mod->unused_syms + mod->num_unused_syms); - if (ks) { - printk_unused_warning(name); - *crc = symversion(mod->unused_crcs, (ks - mod->unused_syms)); - return ks->value; - } +static bool find_symbol_in_section(const struct symsearch *syms, + struct module *owner, + void *data) +{ + struct find_symbol_arg *fsa = data; + struct kernel_symbol *sym; - if (gplok) { - ks = lookup_symbol(name, mod->unused_gpl_syms, - mod->unused_gpl_syms + mod->num_unused_gpl_syms); - if (ks) { - printk_unused_warning(name); - *crc = symversion(mod->unused_gpl_crcs, - (ks - mod->unused_gpl_syms)); - return ks->value; - } - } - ks = lookup_symbol(name, mod->gpl_future_syms, - (mod->gpl_future_syms + - mod->num_gpl_future_syms)); - if (ks) { - if (!gplok) { - printk(KERN_WARNING "Symbol %s is being used " - "by a non-GPL module, which will not " - "be allowed in the future\n", name); - printk(KERN_WARNING "Please see the file " - "Documentation/feature-removal-schedule.txt " - "in the kernel source tree for more " - "details.\n"); - } - *crc = symversion(mod->gpl_future_crcs, - (ks - mod->gpl_future_syms)); - return ks->value; - } + sym = bsearch(fsa->name, syms->start, syms->stop - syms->start, + sizeof(struct kernel_symbol), cmp_name); + + if (sym != NULL && check_symbol(syms, owner, sym - syms->start, data)) + return true; + + return false; +} + +/* Find a symbol and return it, along with, (optional) crc and + * (optional) module which owns it. Needs preempt disabled or module_mutex. */ +const struct kernel_symbol *find_symbol(const char *name, + struct module **owner, + const unsigned long **crc, + bool gplok, + bool warn) +{ + struct find_symbol_arg fsa; + + fsa.name = name; + fsa.gplok = gplok; + fsa.warn = warn; + + if (each_symbol_section(find_symbol_in_section, &fsa)) { + if (owner) + *owner = fsa.owner; + if (crc) + *crc = fsa.crc; + return fsa.sym; } - DEBUGP("Failed to find symbol %s\n", name); - return -ENOENT; + + pr_debug("Failed to find symbol %s\n", name); + return NULL; } +EXPORT_SYMBOL_GPL(find_symbol); /* Search for module by name: must hold module_mutex. */ -static struct module *find_module(const char *name) +static struct module *find_module_all(const char *name, size_t len, + bool even_unformed) { struct module *mod; list_for_each_entry(mod, &modules, list) { - if (strcmp(mod->name, name) == 0) + if (!even_unformed && mod->state == MODULE_STATE_UNFORMED) + continue; + if (strlen(mod->name) == len && !memcmp(mod->name, name, len)) return mod; } return NULL; } -#ifdef CONFIG_SMP -/* Number of blocks used and allocated. */ -static unsigned int pcpu_num_used, pcpu_num_allocated; -/* Size of each block. -ve means used. */ -static int *pcpu_size; - -static int split_block(unsigned int i, unsigned short size) +struct module *find_module(const char *name) { - /* Reallocation required? */ - if (pcpu_num_used + 1 > pcpu_num_allocated) { - int *new; - - new = krealloc(pcpu_size, sizeof(new[0])*pcpu_num_allocated*2, - GFP_KERNEL); - if (!new) - return 0; - - pcpu_num_allocated *= 2; - pcpu_size = new; - } - - /* Insert a new subblock */ - memmove(&pcpu_size[i+1], &pcpu_size[i], - sizeof(pcpu_size[0]) * (pcpu_num_used - i)); - pcpu_num_used++; - - pcpu_size[i+1] -= size; - pcpu_size[i] = size; - return 1; + return find_module_all(name, strlen(name), false); } +EXPORT_SYMBOL_GPL(find_module); + +#ifdef CONFIG_SMP -static inline unsigned int block_size(int val) +static inline void __percpu *mod_percpu(struct module *mod) { - if (val < 0) - return -val; - return val; + return mod->percpu; } -static void *percpu_modalloc(unsigned long size, unsigned long align, - const char *name) +static int percpu_modalloc(struct module *mod, struct load_info *info) { - unsigned long extra; - unsigned int i; - void *ptr; + Elf_Shdr *pcpusec = &info->sechdrs[info->index.pcpu]; + unsigned long align = pcpusec->sh_addralign; + + if (!pcpusec->sh_size) + return 0; if (align > PAGE_SIZE) { - printk(KERN_WARNING "%s: per-cpu alignment %li > %li\n", - name, align, PAGE_SIZE); + pr_warn("%s: per-cpu alignment %li > %li\n", + mod->name, align, PAGE_SIZE); align = PAGE_SIZE; } - ptr = __per_cpu_start; - for (i = 0; i < pcpu_num_used; ptr += block_size(pcpu_size[i]), i++) { - /* Extra for alignment requirement. */ - extra = ALIGN((unsigned long)ptr, align) - (unsigned long)ptr; - BUG_ON(i == 0 && extra != 0); - - if (pcpu_size[i] < 0 || pcpu_size[i] < extra + size) - continue; - - /* Transfer extra to previous block. */ - if (pcpu_size[i-1] < 0) - pcpu_size[i-1] -= extra; - else - pcpu_size[i-1] += extra; - pcpu_size[i] -= extra; - ptr += extra; - - /* Split block if warranted */ - if (pcpu_size[i] - size > sizeof(unsigned long)) - if (!split_block(i, size)) - return NULL; - - /* Mark allocated */ - pcpu_size[i] = -pcpu_size[i]; - return ptr; + mod->percpu = __alloc_reserved_percpu(pcpusec->sh_size, align); + if (!mod->percpu) { + pr_warn("%s: Could not allocate %lu bytes percpu data\n", + mod->name, (unsigned long)pcpusec->sh_size); + return -ENOMEM; } - - printk(KERN_WARNING "Could not allocate %lu bytes percpu data\n", - size); - return NULL; + mod->percpu_size = pcpusec->sh_size; + return 0; } -static void percpu_modfree(void *freeme) +static void percpu_modfree(struct module *mod) { - unsigned int i; - void *ptr = __per_cpu_start + block_size(pcpu_size[0]); - - /* First entry is core kernel percpu data. */ - for (i = 1; i < pcpu_num_used; ptr += block_size(pcpu_size[i]), i++) { - if (ptr == freeme) { - pcpu_size[i] = -pcpu_size[i]; - goto free; - } - } - BUG(); - - free: - /* Merge with previous? */ - if (pcpu_size[i-1] >= 0) { - pcpu_size[i-1] += pcpu_size[i]; - pcpu_num_used--; - memmove(&pcpu_size[i], &pcpu_size[i+1], - (pcpu_num_used - i) * sizeof(pcpu_size[0])); - i--; - } - /* Merge with next? */ - if (i+1 < pcpu_num_used && pcpu_size[i+1] >= 0) { - pcpu_size[i] += pcpu_size[i+1]; - pcpu_num_used--; - memmove(&pcpu_size[i+1], &pcpu_size[i+2], - (pcpu_num_used - (i+1)) * sizeof(pcpu_size[0])); - } + free_percpu(mod->percpu); } -static unsigned int find_pcpusec(Elf_Ehdr *hdr, - Elf_Shdr *sechdrs, - const char *secstrings) +static unsigned int find_pcpusec(struct load_info *info) { - return find_sec(hdr, sechdrs, secstrings, ".data.percpu"); + return find_sec(info, ".data..percpu"); } -static void percpu_modcopy(void *pcpudest, const void *from, unsigned long size) +static void percpu_modcopy(struct module *mod, + const void *from, unsigned long size) { int cpu; for_each_possible_cpu(cpu) - memcpy(pcpudest + per_cpu_offset(cpu), from, size); + memcpy(per_cpu_ptr(mod->percpu, cpu), from, size); } -static int percpu_modinit(void) +/** + * is_module_percpu_address - test whether address is from module static percpu + * @addr: address to test + * + * Test whether @addr belongs to module static percpu area. + * + * RETURNS: + * %true if @addr is from module static percpu area + */ +bool is_module_percpu_address(unsigned long addr) { - pcpu_num_used = 2; - pcpu_num_allocated = 2; - pcpu_size = kmalloc(sizeof(pcpu_size[0]) * pcpu_num_allocated, - GFP_KERNEL); - /* Static in-kernel percpu data (used). */ - pcpu_size[0] = -(__per_cpu_end-__per_cpu_start); - /* Free room. */ - pcpu_size[1] = PERCPU_ENOUGH_ROOM + pcpu_size[0]; - if (pcpu_size[1] < 0) { - printk(KERN_ERR "No per-cpu room for modules.\n"); - pcpu_num_used = 1; + struct module *mod; + unsigned int cpu; + + preempt_disable(); + + list_for_each_entry_rcu(mod, &modules, list) { + if (mod->state == MODULE_STATE_UNFORMED) + continue; + if (!mod->percpu_size) + continue; + for_each_possible_cpu(cpu) { + void *start = per_cpu_ptr(mod->percpu, cpu); + + if ((void *)addr >= start && + (void *)addr < start + mod->percpu_size) { + preempt_enable(); + return true; + } + } } - return 0; + preempt_enable(); + return false; } -__initcall(percpu_modinit); + #else /* ... !CONFIG_SMP */ -static inline void *percpu_modalloc(unsigned long size, unsigned long align, - const char *name) + +static inline void __percpu *mod_percpu(struct module *mod) { return NULL; } -static inline void percpu_modfree(void *pcpuptr) +static int percpu_modalloc(struct module *mod, struct load_info *info) { - BUG(); + /* UP modules shouldn't have this section: ENOMEM isn't quite right */ + if (info->sechdrs[info->index.pcpu].sh_size != 0) + return -ENOMEM; + return 0; } -static inline unsigned int find_pcpusec(Elf_Ehdr *hdr, - Elf_Shdr *sechdrs, - const char *secstrings) +static inline void percpu_modfree(struct module *mod) +{ +} +static unsigned int find_pcpusec(struct load_info *info) { return 0; } -static inline void percpu_modcopy(void *pcpudst, const void *src, - unsigned long size) +static inline void percpu_modcopy(struct module *mod, + const void *from, unsigned long size) { /* pcpusec should be 0, and size of that section should be 0. */ BUG_ON(size != 0); } +bool is_module_percpu_address(unsigned long addr) +{ + return false; +} + #endif /* CONFIG_SMP */ #define MODINFO_ATTR(field) \ @@ -484,9 +599,9 @@ static void setup_modinfo_##field(struct module *mod, const char *s) \ mod->field = kstrdup(s, GFP_KERNEL); \ } \ static ssize_t show_modinfo_##field(struct module_attribute *mattr, \ - struct module *mod, char *buffer) \ + struct module_kobject *mk, char *buffer) \ { \ - return sprintf(buffer, "%s\n", mod->field); \ + return scnprintf(buffer, PAGE_SIZE, "%s\n", mk->mod->field); \ } \ static int modinfo_##field##_exists(struct module *mod) \ { \ @@ -511,97 +626,104 @@ MODINFO_ATTR(srcversion); static char last_unloaded_module[MODULE_NAME_LEN+1]; #ifdef CONFIG_MODULE_UNLOAD + +EXPORT_TRACEPOINT_SYMBOL(module_get); + /* Init the unload section of the module. */ -static void module_unload_init(struct module *mod) +static int module_unload_init(struct module *mod) { - unsigned int i; + mod->refptr = alloc_percpu(struct module_ref); + if (!mod->refptr) + return -ENOMEM; + + INIT_LIST_HEAD(&mod->source_list); + INIT_LIST_HEAD(&mod->target_list); - INIT_LIST_HEAD(&mod->modules_which_use_me); - for (i = 0; i < NR_CPUS; i++) - local_set(&mod->ref[i].count, 0); /* Hold reference count during initialization. */ - local_set(&mod->ref[raw_smp_processor_id()].count, 1); - /* Backwards compatibility macros put refcount during init. */ - mod->waiter = current; -} + raw_cpu_write(mod->refptr->incs, 1); -/* modules using other modules */ -struct module_use -{ - struct list_head list; - struct module *module_which_uses; -}; + return 0; +} /* Does a already use b? */ static int already_uses(struct module *a, struct module *b) { struct module_use *use; - list_for_each_entry(use, &b->modules_which_use_me, list) { - if (use->module_which_uses == a) { - DEBUGP("%s uses %s!\n", a->name, b->name); + list_for_each_entry(use, &b->source_list, source_list) { + if (use->source == a) { + pr_debug("%s uses %s!\n", a->name, b->name); return 1; } } - DEBUGP("%s does not use %s!\n", a->name, b->name); + pr_debug("%s does not use %s!\n", a->name, b->name); return 0; } -/* Module a uses b */ -static int use_module(struct module *a, struct module *b) +/* + * Module a uses b + * - we add 'a' as a "source", 'b' as a "target" of module use + * - the module_use is added to the list of 'b' sources (so + * 'b' can walk the list to see who sourced them), and of 'a' + * targets (so 'a' can see what modules it targets). + */ +static int add_module_usage(struct module *a, struct module *b) { struct module_use *use; - int no_warn, err; - if (b == NULL || already_uses(a, b)) return 1; + pr_debug("Allocating new usage for %s.\n", a->name); + use = kmalloc(sizeof(*use), GFP_ATOMIC); + if (!use) { + pr_warn("%s: out of memory loading\n", a->name); + return -ENOMEM; + } - /* If we're interrupted or time out, we fail. */ - if (wait_event_interruptible_timeout( - module_wq, (err = strong_try_module_get(b)) != -EBUSY, - 30 * HZ) <= 0) { - printk("%s: gave up waiting for init of module %s.\n", - a->name, b->name); + use->source = a; + use->target = b; + list_add(&use->source_list, &b->source_list); + list_add(&use->target_list, &a->target_list); + return 0; +} + +/* Module a uses b: caller needs module_mutex() */ +int ref_module(struct module *a, struct module *b) +{ + int err; + + if (b == NULL || already_uses(a, b)) return 0; - } - /* If strong_try_module_get() returned a different error, we fail. */ + /* If module isn't available, we fail. */ + err = strong_try_module_get(b); if (err) - return 0; + return err; - DEBUGP("Allocating new usage for %s.\n", a->name); - use = kmalloc(sizeof(*use), GFP_ATOMIC); - if (!use) { - printk("%s: out of memory loading\n", a->name); + err = add_module_usage(a, b); + if (err) { module_put(b); - return 0; + return err; } - - use->module_which_uses = a; - list_add(&use->list, &b->modules_which_use_me); - no_warn = sysfs_create_link(b->holders_dir, &a->mkobj.kobj, a->name); - return 1; + return 0; } +EXPORT_SYMBOL_GPL(ref_module); /* Clear the unload stuff of the module. */ static void module_unload_free(struct module *mod) { - struct module *i; + struct module_use *use, *tmp; - list_for_each_entry(i, &modules, list) { - struct module_use *use; - - list_for_each_entry(use, &i->modules_which_use_me, list) { - if (use->module_which_uses == mod) { - DEBUGP("%s unusing %s\n", mod->name, i->name); - module_put(i); - list_del(&use->list); - kfree(use); - sysfs_remove_link(i->holders_dir, mod->name); - /* There can be at most one match. */ - break; - } - } + mutex_lock(&module_mutex); + list_for_each_entry_safe(use, tmp, &mod->target_list, target_list) { + struct module *i = use->target; + pr_debug("%s unusing %s\n", mod->name, i->name); + module_put(i); + list_del(&use->source_list); + list_del(&use->target_list); + kfree(use); } + mutex_unlock(&module_mutex); + + free_percpu(mod->refptr); } #ifdef CONFIG_MODULE_FORCE_UNLOAD @@ -609,7 +731,7 @@ static inline int try_force_unload(unsigned int flags) { int ret = (flags & O_TRUNC); if (ret) - add_taint(TAINT_FORCED_RMMOD); + add_taint(TAINT_FORCED_RMMOD, LOCKDEP_NOW_UNRELIABLE); return ret; } #else @@ -631,8 +753,8 @@ static int __try_stop_module(void *_sref) { struct stopref *sref = _sref; - /* If it's not unused, quit unless we are told to block. */ - if ((sref->flags & O_NONBLOCK) && module_refcount(sref->mod) != 0) { + /* If it's not unused, quit unless we're forcing. */ + if (module_refcount(sref->mod) != 0) { if (!(*sref->forced = try_force_unload(sref->flags))) return -EWOULDBLOCK; } @@ -646,45 +768,47 @@ static int try_stop_module(struct module *mod, int flags, int *forced) { struct stopref sref = { mod, flags, forced }; - return stop_machine_run(__try_stop_module, &sref, NR_CPUS); + return stop_machine(__try_stop_module, &sref, NULL); } -unsigned int module_refcount(struct module *mod) +unsigned long module_refcount(struct module *mod) { - unsigned int i, total = 0; + unsigned long incs = 0, decs = 0; + int cpu; - for (i = 0; i < NR_CPUS; i++) - total += local_read(&mod->ref[i].count); - return total; + for_each_possible_cpu(cpu) + decs += per_cpu_ptr(mod->refptr, cpu)->decs; + /* + * ensure the incs are added up after the decs. + * module_put ensures incs are visible before decs with smp_wmb. + * + * This 2-count scheme avoids the situation where the refcount + * for CPU0 is read, then CPU0 increments the module refcount, + * then CPU1 drops that refcount, then the refcount for CPU1 is + * read. We would record a decrement but not its corresponding + * increment so we would see a low count (disaster). + * + * Rare situation? But module_refcount can be preempted, and we + * might be tallying up 4096+ CPUs. So it is not impossible. + */ + smp_rmb(); + for_each_possible_cpu(cpu) + incs += per_cpu_ptr(mod->refptr, cpu)->incs; + return incs - decs; } EXPORT_SYMBOL(module_refcount); /* This exists whether we can unload or not */ static void free_module(struct module *mod); -static void wait_for_zero_refcount(struct module *mod) -{ - /* Since we might sleep for some time, drop the semaphore first */ - mutex_unlock(&module_mutex); - for (;;) { - DEBUGP("Looking at refcount...\n"); - set_current_state(TASK_UNINTERRUPTIBLE); - if (module_refcount(mod) == 0) - break; - schedule(); - } - current->state = TASK_RUNNING; - mutex_lock(&module_mutex); -} - -asmlinkage long -sys_delete_module(const char __user *name_user, unsigned int flags) +SYSCALL_DEFINE2(delete_module, const char __user *, name_user, + unsigned int, flags) { struct module *mod; char name[MODULE_NAME_LEN]; int ret, forced = 0; - if (!capable(CAP_SYS_MODULE)) + if (!capable(CAP_SYS_MODULE) || modules_disabled) return -EPERM; if (strncpy_from_user(name, name_user, MODULE_NAME_LEN-1) < 0) @@ -700,7 +824,7 @@ sys_delete_module(const char __user *name_user, unsigned int flags) goto out; } - if (!list_empty(&mod->modules_which_use_me)) { + if (!list_empty(&mod->source_list)) { /* Other modules depend on us: get rid of them first. */ ret = -EWOULDBLOCK; goto out; @@ -708,9 +832,8 @@ sys_delete_module(const char __user *name_user, unsigned int flags) /* Doing init or already dying? */ if (mod->state != MODULE_STATE_LIVE) { - /* FIXME: if (force), slam module count and wake up - waiter --RR */ - DEBUGP("%s already dying\n", mod->name); + /* FIXME: if (force), slam module count damn the torpedoes */ + pr_debug("%s already dying\n", mod->name); ret = -EBUSY; goto out; } @@ -725,45 +848,41 @@ sys_delete_module(const char __user *name_user, unsigned int flags) } } - /* Set this up before setting mod->state */ - mod->waiter = current; - /* Stop the machine so refcounts can't move and disable module. */ ret = try_stop_module(mod, flags, &forced); if (ret != 0) goto out; - /* Never wait if forced. */ - if (!forced && module_refcount(mod) != 0) - wait_for_zero_refcount(mod); - - /* Final destruction now noone is using it. */ - if (mod->exit != NULL) { - mutex_unlock(&module_mutex); + mutex_unlock(&module_mutex); + /* Final destruction now no one is using it. */ + if (mod->exit != NULL) mod->exit(); - mutex_lock(&module_mutex); - } + blocking_notifier_call_chain(&module_notify_list, + MODULE_STATE_GOING, mod); + async_synchronize_full(); + /* Store the name of the last unloaded module for diagnostic purposes */ strlcpy(last_unloaded_module, mod->name, sizeof(last_unloaded_module)); - free_module(mod); - out: + free_module(mod); + return 0; +out: mutex_unlock(&module_mutex); return ret; } -static void print_unload_info(struct seq_file *m, struct module *mod) +static inline void print_unload_info(struct seq_file *m, struct module *mod) { struct module_use *use; int printed_something = 0; - seq_printf(m, " %u ", module_refcount(mod)); + seq_printf(m, " %lu ", module_refcount(mod)); /* Always include a trailing , so userspace can differentiate between this and the old multi-field proc format. */ - list_for_each_entry(use, &mod->modules_which_use_me, list) { + list_for_each_entry(use, &mod->source_list, source_list) { printed_something = 1; - seq_printf(m, "%s,", use->module_which_uses->name); + seq_printf(m, "%s,", use->source->name); } if (mod->init != NULL && mod->exit == NULL) { @@ -778,55 +897,86 @@ static void print_unload_info(struct seq_file *m, struct module *mod) void __symbol_put(const char *symbol) { struct module *owner; - const unsigned long *crc; preempt_disable(); - if (IS_ERR_VALUE(__find_symbol(symbol, &owner, &crc, 1))) + if (!find_symbol(symbol, &owner, NULL, true, false)) BUG(); module_put(owner); preempt_enable(); } EXPORT_SYMBOL(__symbol_put); +/* Note this assumes addr is a function, which it currently always is. */ void symbol_put_addr(void *addr) { struct module *modaddr; + unsigned long a = (unsigned long)dereference_function_descriptor(addr); - if (core_kernel_text((unsigned long)addr)) + if (core_kernel_text(a)) return; - if (!(modaddr = module_text_address((unsigned long)addr))) - BUG(); + /* module_text_address is safe here: we're supposed to have reference + * to module from symbol_get, so it can't go away. */ + modaddr = __module_text_address(a); + BUG_ON(!modaddr); module_put(modaddr); } EXPORT_SYMBOL_GPL(symbol_put_addr); static ssize_t show_refcnt(struct module_attribute *mattr, - struct module *mod, char *buffer) + struct module_kobject *mk, char *buffer) { - return sprintf(buffer, "%u\n", module_refcount(mod)); + return sprintf(buffer, "%lu\n", module_refcount(mk->mod)); } -static struct module_attribute refcnt = { - .attr = { .name = "refcnt", .mode = 0444 }, - .show = show_refcnt, -}; +static struct module_attribute modinfo_refcnt = + __ATTR(refcnt, 0444, show_refcnt, NULL); + +void __module_get(struct module *module) +{ + if (module) { + preempt_disable(); + __this_cpu_inc(module->refptr->incs); + trace_module_get(module, _RET_IP_); + preempt_enable(); + } +} +EXPORT_SYMBOL(__module_get); + +bool try_module_get(struct module *module) +{ + bool ret = true; + + if (module) { + preempt_disable(); + + if (likely(module_is_live(module))) { + __this_cpu_inc(module->refptr->incs); + trace_module_get(module, _RET_IP_); + } else + ret = false; + + preempt_enable(); + } + return ret; +} +EXPORT_SYMBOL(try_module_get); void module_put(struct module *module) { if (module) { - unsigned int cpu = get_cpu(); - local_dec(&module->ref[cpu].count); - /* Maybe they're waiting for us to drop reference? */ - if (unlikely(!module_is_live(module))) - wake_up_process(module->waiter); - put_cpu(); + preempt_disable(); + smp_wmb(); /* see comment in module_refcount */ + __this_cpu_inc(module->refptr->decs); + + trace_module_put(module, _RET_IP_); + preempt_enable(); } } EXPORT_SYMBOL(module_put); #else /* !CONFIG_MODULE_UNLOAD */ -static void print_unload_info(struct seq_file *m, struct module *mod) +static inline void print_unload_info(struct seq_file *m, struct module *mod) { /* We don't know the usage count, or what modules are using. */ seq_printf(m, " - -"); @@ -836,22 +986,46 @@ static inline void module_unload_free(struct module *mod) { } -static inline int use_module(struct module *a, struct module *b) +int ref_module(struct module *a, struct module *b) { - return strong_try_module_get(b) == 0; + return strong_try_module_get(b); } +EXPORT_SYMBOL_GPL(ref_module); -static inline void module_unload_init(struct module *mod) +static inline int module_unload_init(struct module *mod) { + return 0; } #endif /* CONFIG_MODULE_UNLOAD */ +static size_t module_flags_taint(struct module *mod, char *buf) +{ + size_t l = 0; + + if (mod->taints & (1 << TAINT_PROPRIETARY_MODULE)) + buf[l++] = 'P'; + if (mod->taints & (1 << TAINT_OOT_MODULE)) + buf[l++] = 'O'; + if (mod->taints & (1 << TAINT_FORCED_MODULE)) + buf[l++] = 'F'; + if (mod->taints & (1 << TAINT_CRAP)) + buf[l++] = 'C'; + if (mod->taints & (1 << TAINT_UNSIGNED_MODULE)) + buf[l++] = 'E'; + /* + * TAINT_FORCED_RMMOD: could be added. + * TAINT_CPU_OUT_OF_SPEC, TAINT_MACHINE_CHECK, TAINT_BAD_PAGE don't + * apply to modules. + */ + return l; +} + static ssize_t show_initstate(struct module_attribute *mattr, - struct module *mod, char *buffer) + struct module_kobject *mk, char *buffer) { const char *state = "unknown"; - switch (mod->state) { + switch (mk->mod->state) { case MODULE_STATE_LIVE: state = "live"; break; @@ -861,33 +1035,106 @@ static ssize_t show_initstate(struct module_attribute *mattr, case MODULE_STATE_GOING: state = "going"; break; + default: + BUG(); } return sprintf(buffer, "%s\n", state); } -static struct module_attribute initstate = { - .attr = { .name = "initstate", .mode = 0444 }, - .show = show_initstate, -}; +static struct module_attribute modinfo_initstate = + __ATTR(initstate, 0444, show_initstate, NULL); + +static ssize_t store_uevent(struct module_attribute *mattr, + struct module_kobject *mk, + const char *buffer, size_t count) +{ + enum kobject_action action; + + if (kobject_action_type(buffer, count, &action) == 0) + kobject_uevent(&mk->kobj, action); + return count; +} + +struct module_attribute module_uevent = + __ATTR(uevent, 0200, NULL, store_uevent); + +static ssize_t show_coresize(struct module_attribute *mattr, + struct module_kobject *mk, char *buffer) +{ + return sprintf(buffer, "%u\n", mk->mod->core_size); +} + +static struct module_attribute modinfo_coresize = + __ATTR(coresize, 0444, show_coresize, NULL); + +static ssize_t show_initsize(struct module_attribute *mattr, + struct module_kobject *mk, char *buffer) +{ + return sprintf(buffer, "%u\n", mk->mod->init_size); +} + +static struct module_attribute modinfo_initsize = + __ATTR(initsize, 0444, show_initsize, NULL); + +static ssize_t show_taint(struct module_attribute *mattr, + struct module_kobject *mk, char *buffer) +{ + size_t l; + + l = module_flags_taint(mk->mod, buffer); + buffer[l++] = '\n'; + return l; +} + +static struct module_attribute modinfo_taint = + __ATTR(taint, 0444, show_taint, NULL); static struct module_attribute *modinfo_attrs[] = { + &module_uevent, &modinfo_version, &modinfo_srcversion, - &initstate, + &modinfo_initstate, + &modinfo_coresize, + &modinfo_initsize, + &modinfo_taint, #ifdef CONFIG_MODULE_UNLOAD - &refcnt, + &modinfo_refcnt, #endif NULL, }; static const char vermagic[] = VERMAGIC_STRING; +static int try_to_force_load(struct module *mod, const char *reason) +{ +#ifdef CONFIG_MODULE_FORCE_LOAD + if (!test_taint(TAINT_FORCED_MODULE)) + pr_warn("%s: %s: kernel tainted.\n", mod->name, reason); + add_taint_module(mod, TAINT_FORCED_MODULE, LOCKDEP_NOW_UNRELIABLE); + return 0; +#else + return -ENOEXEC; +#endif +} + #ifdef CONFIG_MODVERSIONS +/* If the arch applies (non-zero) relocations to kernel kcrctab, unapply it. */ +static unsigned long maybe_relocated(unsigned long crc, + const struct module *crc_owner) +{ +#ifdef ARCH_RELOCATES_KCRCTAB + if (crc_owner == NULL) + return crc - (unsigned long)reloc_start; +#endif + return crc; +} + static int check_version(Elf_Shdr *sechdrs, unsigned int versindex, const char *symname, struct module *mod, - const unsigned long *crc) + const unsigned long *crc, + const struct module *crc_owner) { unsigned int i, num_versions; struct modversion_info *versions; @@ -896,6 +1143,10 @@ static int check_version(Elf_Shdr *sechdrs, if (!crc) return 1; + /* No versions at all? modprobe --force does this. */ + if (versindex == 0) + return try_to_force_load(mod, symname) == 0; + versions = (void *) sechdrs[versindex].sh_addr; num_versions = sechdrs[versindex].sh_size / sizeof(struct modversion_info); @@ -904,20 +1155,20 @@ static int check_version(Elf_Shdr *sechdrs, if (strcmp(versions[i].name, symname) != 0) continue; - if (versions[i].crc == *crc) + if (versions[i].crc == maybe_relocated(*crc, crc_owner)) return 1; - printk("%s: disagrees about version of symbol %s\n", - mod->name, symname); - DEBUGP("Found checksum %lX vs module %lX\n", - *crc, versions[i].crc); - return 0; + pr_debug("Found checksum %lX vs module %lX\n", + maybe_relocated(*crc, crc_owner), versions[i].crc); + goto bad_version; } - /* Not in module's version table. OK, but that taints the kernel. */ - if (!(tainted & TAINT_FORCED_MODULE)) - printk("%s: no version for \"%s\" found: kernel tainted.\n", - mod->name, symname); - add_taint_module(mod, TAINT_FORCED_MODULE); - return 1; + + pr_warn("%s: no symbol version for %s\n", mod->name, symname); + return 0; + +bad_version: + printk("%s: disagrees about version of symbol %s\n", + mod->name, symname); + return 0; } static inline int check_modstruct_version(Elf_Shdr *sechdrs, @@ -925,20 +1176,25 @@ static inline int check_modstruct_version(Elf_Shdr *sechdrs, struct module *mod) { const unsigned long *crc; - struct module *owner; - if (IS_ERR_VALUE(__find_symbol("struct_module", - &owner, &crc, 1))) + /* Since this should be found in kernel (which can't be removed), + * no locking is necessary. */ + if (!find_symbol(VMLINUX_SYMBOL_STR(module_layout), NULL, + &crc, true, false)) BUG(); - return check_version(sechdrs, versindex, "struct_module", mod, - crc); + return check_version(sechdrs, versindex, + VMLINUX_SYMBOL_STR(module_layout), mod, crc, + NULL); } -/* First part is kernel version, which we ignore. */ -static inline int same_magic(const char *amagic, const char *bmagic) +/* First part is kernel version, which we ignore if module has crcs. */ +static inline int same_magic(const char *amagic, const char *bmagic, + bool has_crcs) { - amagic += strcspn(amagic, " "); - bmagic += strcspn(bmagic, " "); + if (has_crcs) { + amagic += strcspn(amagic, " "); + bmagic += strcspn(bmagic, " "); + } return strcmp(amagic, bmagic) == 0; } #else @@ -946,7 +1202,8 @@ static inline int check_version(Elf_Shdr *sechdrs, unsigned int versindex, const char *symname, struct module *mod, - const unsigned long *crc) + const unsigned long *crc, + const struct module *crc_owner) { return 1; } @@ -958,59 +1215,112 @@ static inline int check_modstruct_version(Elf_Shdr *sechdrs, return 1; } -static inline int same_magic(const char *amagic, const char *bmagic) +static inline int same_magic(const char *amagic, const char *bmagic, + bool has_crcs) { return strcmp(amagic, bmagic) == 0; } #endif /* CONFIG_MODVERSIONS */ -/* Resolve a symbol for this module. I.e. if we find one, record usage. - Must be holding module_mutex. */ -static unsigned long resolve_symbol(Elf_Shdr *sechdrs, - unsigned int versindex, - const char *name, - struct module *mod) +/* Resolve a symbol for this module. I.e. if we find one, record usage. */ +static const struct kernel_symbol *resolve_symbol(struct module *mod, + const struct load_info *info, + const char *name, + char ownername[]) { struct module *owner; - unsigned long ret; + const struct kernel_symbol *sym; const unsigned long *crc; + int err; - ret = __find_symbol(name, &owner, &crc, - !(mod->taints & TAINT_PROPRIETARY_MODULE)); - if (!IS_ERR_VALUE(ret)) { - /* use_module can fail due to OOM, - or module initialization or unloading */ - if (!check_version(sechdrs, versindex, name, mod, crc) || - !use_module(mod, owner)) - ret = -EINVAL; + mutex_lock(&module_mutex); + sym = find_symbol(name, &owner, &crc, + !(mod->taints & (1 << TAINT_PROPRIETARY_MODULE)), true); + if (!sym) + goto unlock; + + if (!check_version(info->sechdrs, info->index.vers, name, mod, crc, + owner)) { + sym = ERR_PTR(-EINVAL); + goto getname; } - return ret; + + err = ref_module(mod, owner); + if (err) { + sym = ERR_PTR(err); + goto getname; + } + +getname: + /* We must make copy under the lock if we failed to get ref. */ + strncpy(ownername, module_name(owner), MODULE_NAME_LEN); +unlock: + mutex_unlock(&module_mutex); + return sym; +} + +static const struct kernel_symbol * +resolve_symbol_wait(struct module *mod, + const struct load_info *info, + const char *name) +{ + const struct kernel_symbol *ksym; + char owner[MODULE_NAME_LEN]; + + if (wait_event_interruptible_timeout(module_wq, + !IS_ERR(ksym = resolve_symbol(mod, info, name, owner)) + || PTR_ERR(ksym) != -EBUSY, + 30 * HZ) <= 0) { + pr_warn("%s: gave up waiting for init of module %s.\n", + mod->name, owner); + } + return ksym; } /* * /sys/module/foo/sections stuff * J. Corbet <corbet@lwn.net> */ -#if defined(CONFIG_KALLSYMS) && defined(CONFIG_SYSFS) +#ifdef CONFIG_SYSFS + +#ifdef CONFIG_KALLSYMS +static inline bool sect_empty(const Elf_Shdr *sect) +{ + return !(sect->sh_flags & SHF_ALLOC) || sect->sh_size == 0; +} + +struct module_sect_attr +{ + struct module_attribute mattr; + char *name; + unsigned long address; +}; + +struct module_sect_attrs +{ + struct attribute_group grp; + unsigned int nsections; + struct module_sect_attr attrs[0]; +}; + static ssize_t module_sect_show(struct module_attribute *mattr, - struct module *mod, char *buf) + struct module_kobject *mk, char *buf) { struct module_sect_attr *sattr = container_of(mattr, struct module_sect_attr, mattr); - return sprintf(buf, "0x%lx\n", sattr->address); + return sprintf(buf, "0x%pK\n", (void *)sattr->address); } static void free_sect_attrs(struct module_sect_attrs *sect_attrs) { - int section; + unsigned int section; for (section = 0; section < sect_attrs->nsections; section++) kfree(sect_attrs->attrs[section].name); kfree(sect_attrs); } -static void add_sect_attrs(struct module *mod, unsigned int nsect, - char *secstrings, Elf_Shdr *sechdrs) +static void add_sect_attrs(struct module *mod, const struct load_info *info) { unsigned int nloaded = 0, i, size[2]; struct module_sect_attrs *sect_attrs; @@ -1018,8 +1328,8 @@ static void add_sect_attrs(struct module *mod, unsigned int nsect, struct attribute **gattr; /* Count loaded sections and allocate structures */ - for (i = 0; i < nsect; i++) - if (sechdrs[i].sh_flags & SHF_ALLOC) + for (i = 0; i < info->hdr->e_shnum; i++) + if (!sect_empty(&info->sechdrs[i])) nloaded++; size[0] = ALIGN(sizeof(*sect_attrs) + nloaded * sizeof(sect_attrs->attrs[0]), @@ -1036,15 +1346,17 @@ static void add_sect_attrs(struct module *mod, unsigned int nsect, sect_attrs->nsections = 0; sattr = §_attrs->attrs[0]; gattr = §_attrs->grp.attrs[0]; - for (i = 0; i < nsect; i++) { - if (! (sechdrs[i].sh_flags & SHF_ALLOC)) + for (i = 0; i < info->hdr->e_shnum; i++) { + Elf_Shdr *sec = &info->sechdrs[i]; + if (sect_empty(sec)) continue; - sattr->address = sechdrs[i].sh_addr; - sattr->name = kstrdup(secstrings + sechdrs[i].sh_name, + sattr->address = sec->sh_addr; + sattr->name = kstrdup(info->secstrings + sec->sh_name, GFP_KERNEL); if (sattr->name == NULL) goto out; sect_attrs->nsections++; + sysfs_attr_init(&sattr->mattr.attr); sattr->mattr.show = module_sect_show; sattr->mattr.store = NULL; sattr->mattr.attr.name = sattr->name; @@ -1084,7 +1396,7 @@ struct module_notes_attrs { struct bin_attribute attrs[0]; }; -static ssize_t module_notes_read(struct kobject *kobj, +static ssize_t module_notes_read(struct file *filp, struct kobject *kobj, struct bin_attribute *bin_attr, char *buf, loff_t pos, size_t count) { @@ -1102,23 +1414,26 @@ static void free_notes_attrs(struct module_notes_attrs *notes_attrs, while (i-- > 0) sysfs_remove_bin_file(notes_attrs->dir, ¬es_attrs->attrs[i]); - kobject_del(notes_attrs->dir); + kobject_put(notes_attrs->dir); } kfree(notes_attrs); } -static void add_notes_attrs(struct module *mod, unsigned int nsect, - char *secstrings, Elf_Shdr *sechdrs) +static void add_notes_attrs(struct module *mod, const struct load_info *info) { unsigned int notes, loaded, i; struct module_notes_attrs *notes_attrs; struct bin_attribute *nattr; + /* failed to create section attributes, so can't create notes */ + if (!mod->sect_attrs) + return; + /* Count notes sections and allocate structures. */ notes = 0; - for (i = 0; i < nsect; i++) - if ((sechdrs[i].sh_flags & SHF_ALLOC) && - (sechdrs[i].sh_type == SHT_NOTE)) + for (i = 0; i < info->hdr->e_shnum; i++) + if (!sect_empty(&info->sechdrs[i]) && + (info->sechdrs[i].sh_type == SHT_NOTE)) ++notes; if (notes == 0) @@ -1132,14 +1447,15 @@ static void add_notes_attrs(struct module *mod, unsigned int nsect, notes_attrs->notes = notes; nattr = ¬es_attrs->attrs[0]; - for (loaded = i = 0; i < nsect; ++i) { - if (!(sechdrs[i].sh_flags & SHF_ALLOC)) + for (loaded = i = 0; i < info->hdr->e_shnum; ++i) { + if (sect_empty(&info->sechdrs[i])) continue; - if (sechdrs[i].sh_type == SHT_NOTE) { + if (info->sechdrs[i].sh_type == SHT_NOTE) { + sysfs_bin_attr_init(nattr); nattr->attr.name = mod->sect_attrs->attrs[loaded].name; nattr->attr.mode = S_IRUGO; - nattr->size = sechdrs[i].sh_size; - nattr->private = (void *) sechdrs[i].sh_addr; + nattr->size = info->sechdrs[i].sh_size; + nattr->private = (void *) info->sechdrs[i].sh_addr; nattr->read = module_notes_read; ++nattr; } @@ -1170,8 +1486,8 @@ static void remove_notes_attrs(struct module *mod) #else -static inline void add_sect_attrs(struct module *mod, unsigned int nsect, - char *sectstrings, Elf_Shdr *sechdrs) +static inline void add_sect_attrs(struct module *mod, + const struct load_info *info) { } @@ -1179,18 +1495,44 @@ static inline void remove_sect_attrs(struct module *mod) { } -static inline void add_notes_attrs(struct module *mod, unsigned int nsect, - char *sectstrings, Elf_Shdr *sechdrs) +static inline void add_notes_attrs(struct module *mod, + const struct load_info *info) { } static inline void remove_notes_attrs(struct module *mod) { } +#endif /* CONFIG_KALLSYMS */ + +static void add_usage_links(struct module *mod) +{ +#ifdef CONFIG_MODULE_UNLOAD + struct module_use *use; + int nowarn; + + mutex_lock(&module_mutex); + list_for_each_entry(use, &mod->target_list, target_list) { + nowarn = sysfs_create_link(use->target->holders_dir, + &mod->mkobj.kobj, mod->name); + } + mutex_unlock(&module_mutex); #endif +} -#ifdef CONFIG_SYSFS -int module_add_modinfo_attrs(struct module *mod) +static void del_usage_links(struct module *mod) +{ +#ifdef CONFIG_MODULE_UNLOAD + struct module_use *use; + + mutex_lock(&module_mutex); + list_for_each_entry(use, &mod->target_list, target_list) + sysfs_remove_link(use->target->holders_dir, mod->name); + mutex_unlock(&module_mutex); +#endif +} + +static int module_add_modinfo_attrs(struct module *mod) { struct module_attribute *attr; struct module_attribute *temp_attr; @@ -1208,6 +1550,7 @@ int module_add_modinfo_attrs(struct module *mod) if (!attr->test || (attr->test && attr->test(mod))) { memcpy(temp_attr, attr, sizeof(*temp_attr)); + sysfs_attr_init(&temp_attr->attr); error = sysfs_create_file(&mod->mkobj.kobj,&temp_attr->attr); ++temp_attr; } @@ -1215,7 +1558,7 @@ int module_add_modinfo_attrs(struct module *mod) return error; } -void module_remove_modinfo_attrs(struct module *mod) +static void module_remove_modinfo_attrs(struct module *mod) { struct module_attribute *attr; int i; @@ -1231,21 +1574,28 @@ void module_remove_modinfo_attrs(struct module *mod) kfree(mod->modinfo_attrs); } -int mod_sysfs_init(struct module *mod) +static void mod_kobject_put(struct module *mod) +{ + DECLARE_COMPLETION_ONSTACK(c); + mod->mkobj.kobj_completion = &c; + kobject_put(&mod->mkobj.kobj); + wait_for_completion(&c); +} + +static int mod_sysfs_init(struct module *mod) { int err; struct kobject *kobj; if (!module_sysfs_initialized) { - printk(KERN_ERR "%s: module sysfs not initialized\n", - mod->name); + pr_err("%s: module sysfs not initialized\n", mod->name); err = -EINVAL; goto out; } kobj = kset_find_obj(module_kset, mod->name); if (kobj) { - printk(KERN_ERR "%s: module is already loaded\n", mod->name); + pr_err("%s: module is already loaded\n", mod->name); kobject_put(kobj); err = -EINVAL; goto out; @@ -1258,19 +1608,24 @@ int mod_sysfs_init(struct module *mod) err = kobject_init_and_add(&mod->mkobj.kobj, &module_ktype, NULL, "%s", mod->name); if (err) - kobject_put(&mod->mkobj.kobj); + mod_kobject_put(mod); /* delay uevent until full sysfs population */ out: return err; } -int mod_sysfs_setup(struct module *mod, +static int mod_sysfs_setup(struct module *mod, + const struct load_info *info, struct kernel_param *kparam, unsigned int num_params) { int err; + err = mod_sysfs_init(mod); + if (err) + goto out; + mod->holders_dir = kobject_create_and_add("holders", &mod->mkobj.kobj); if (!mod->holders_dir) { err = -ENOMEM; @@ -1285,6 +1640,10 @@ int mod_sysfs_setup(struct module *mod, if (err) goto out_unreg_param; + add_usage_links(mod); + add_sect_attrs(mod, info); + add_notes_attrs(mod, info); + kobject_uevent(&mod->mkobj.kobj, KOBJ_ADD); return 0; @@ -1293,52 +1652,201 @@ out_unreg_param: out_unreg_holders: kobject_put(mod->holders_dir); out_unreg: - kobject_put(&mod->mkobj.kobj); + mod_kobject_put(mod); +out: return err; } -#endif -static void mod_kobject_remove(struct module *mod) +static void mod_sysfs_fini(struct module *mod) { + remove_notes_attrs(mod); + remove_sect_attrs(mod); + mod_kobject_put(mod); +} + +#else /* !CONFIG_SYSFS */ + +static int mod_sysfs_setup(struct module *mod, + const struct load_info *info, + struct kernel_param *kparam, + unsigned int num_params) +{ + return 0; +} + +static void mod_sysfs_fini(struct module *mod) +{ +} + +static void module_remove_modinfo_attrs(struct module *mod) +{ +} + +static void del_usage_links(struct module *mod) +{ +} + +#endif /* CONFIG_SYSFS */ + +static void mod_sysfs_teardown(struct module *mod) +{ + del_usage_links(mod); module_remove_modinfo_attrs(mod); module_param_sysfs_remove(mod); kobject_put(mod->mkobj.drivers_dir); kobject_put(mod->holders_dir); - kobject_put(&mod->mkobj.kobj); + mod_sysfs_fini(mod); } /* - * link the module with the whole machine is stopped with interrupts off + * unlink the module with the whole machine is stopped with interrupts off * - this defends against kallsyms not taking locks */ -static int __link_module(void *_mod) +static int __unlink_module(void *_mod) { struct module *mod = _mod; - list_add(&mod->list, &modules); + list_del(&mod->list); + module_bug_cleanup(mod); return 0; } +#ifdef CONFIG_DEBUG_SET_MODULE_RONX /* - * unlink the module with the whole machine is stopped with interrupts off - * - this defends against kallsyms not taking locks + * LKM RO/NX protection: protect module's text/ro-data + * from modification and any data from execution. */ -static int __unlink_module(void *_mod) +void set_page_attributes(void *start, void *end, int (*set)(unsigned long start, int num_pages)) { - struct module *mod = _mod; - list_del(&mod->list); - return 0; + unsigned long begin_pfn = PFN_DOWN((unsigned long)start); + unsigned long end_pfn = PFN_DOWN((unsigned long)end); + + if (end_pfn > begin_pfn) + set(begin_pfn << PAGE_SHIFT, end_pfn - begin_pfn); +} + +static void set_section_ro_nx(void *base, + unsigned long text_size, + unsigned long ro_size, + unsigned long total_size) +{ + /* begin and end PFNs of the current subsection */ + unsigned long begin_pfn; + unsigned long end_pfn; + + /* + * Set RO for module text and RO-data: + * - Always protect first page. + * - Do not protect last partial page. + */ + if (ro_size > 0) + set_page_attributes(base, base + ro_size, set_memory_ro); + + /* + * Set NX permissions for module data: + * - Do not protect first partial page. + * - Always protect last page. + */ + if (total_size > text_size) { + begin_pfn = PFN_UP((unsigned long)base + text_size); + end_pfn = PFN_UP((unsigned long)base + total_size); + if (end_pfn > begin_pfn) + set_memory_nx(begin_pfn << PAGE_SHIFT, end_pfn - begin_pfn); + } +} + +static void unset_module_core_ro_nx(struct module *mod) +{ + set_page_attributes(mod->module_core + mod->core_text_size, + mod->module_core + mod->core_size, + set_memory_x); + set_page_attributes(mod->module_core, + mod->module_core + mod->core_ro_size, + set_memory_rw); +} + +static void unset_module_init_ro_nx(struct module *mod) +{ + set_page_attributes(mod->module_init + mod->init_text_size, + mod->module_init + mod->init_size, + set_memory_x); + set_page_attributes(mod->module_init, + mod->module_init + mod->init_ro_size, + set_memory_rw); +} + +/* Iterate through all modules and set each module's text as RW */ +void set_all_modules_text_rw(void) +{ + struct module *mod; + + mutex_lock(&module_mutex); + list_for_each_entry_rcu(mod, &modules, list) { + if (mod->state == MODULE_STATE_UNFORMED) + continue; + if ((mod->module_core) && (mod->core_text_size)) { + set_page_attributes(mod->module_core, + mod->module_core + mod->core_text_size, + set_memory_rw); + } + if ((mod->module_init) && (mod->init_text_size)) { + set_page_attributes(mod->module_init, + mod->module_init + mod->init_text_size, + set_memory_rw); + } + } + mutex_unlock(&module_mutex); } -/* Free a module, remove from lists, etc (must hold module_mutex). */ +/* Iterate through all modules and set each module's text as RO */ +void set_all_modules_text_ro(void) +{ + struct module *mod; + + mutex_lock(&module_mutex); + list_for_each_entry_rcu(mod, &modules, list) { + if (mod->state == MODULE_STATE_UNFORMED) + continue; + if ((mod->module_core) && (mod->core_text_size)) { + set_page_attributes(mod->module_core, + mod->module_core + mod->core_text_size, + set_memory_ro); + } + if ((mod->module_init) && (mod->init_text_size)) { + set_page_attributes(mod->module_init, + mod->module_init + mod->init_text_size, + set_memory_ro); + } + } + mutex_unlock(&module_mutex); +} +#else +static inline void set_section_ro_nx(void *base, unsigned long text_size, unsigned long ro_size, unsigned long total_size) { } +static void unset_module_core_ro_nx(struct module *mod) { } +static void unset_module_init_ro_nx(struct module *mod) { } +#endif + +void __weak module_free(struct module *mod, void *module_region) +{ + vfree(module_region); +} + +void __weak module_arch_cleanup(struct module *mod) +{ +} + +/* Free a module, remove from lists, etc. */ static void free_module(struct module *mod) { - /* Delete from various lists */ - stop_machine_run(__unlink_module, mod, NR_CPUS); - remove_notes_attrs(mod); - remove_sect_attrs(mod); - mod_kobject_remove(mod); + trace_module_free(mod); - unwind_remove_table(mod->unwind_info, 0); + mod_sysfs_teardown(mod); + + /* We leave it in list to prevent duplicate loads, but make sure + * that noone uses it while it's being deconstructed. */ + mod->state = MODULE_STATE_UNFORMED; + + /* Remove dynamic debug info */ + ddebug_remove_module(mod->name); /* Arch-specific cleanup. */ module_arch_cleanup(mod); @@ -1346,91 +1854,106 @@ static void free_module(struct module *mod) /* Module unload stuff */ module_unload_free(mod); + /* Free any allocated parameters. */ + destroy_params(mod->kp, mod->num_kp); + + /* Now we can delete it from the lists */ + mutex_lock(&module_mutex); + stop_machine(__unlink_module, mod, NULL); + mutex_unlock(&module_mutex); + /* This may be NULL, but that's OK */ + unset_module_init_ro_nx(mod); module_free(mod, mod->module_init); kfree(mod->args); - if (mod->percpu) - percpu_modfree(mod->percpu); + percpu_modfree(mod); /* Free lock-classes: */ lockdep_free_key_range(mod->module_core, mod->core_size); /* Finally, free the core (containing the module structure) */ + unset_module_core_ro_nx(mod); module_free(mod, mod->module_core); + +#ifdef CONFIG_MPU + update_protections(current->mm); +#endif } void *__symbol_get(const char *symbol) { struct module *owner; - unsigned long value; - const unsigned long *crc; + const struct kernel_symbol *sym; preempt_disable(); - value = __find_symbol(symbol, &owner, &crc, 1); - if (IS_ERR_VALUE(value)) - value = 0; - else if (strong_try_module_get(owner)) - value = 0; + sym = find_symbol(symbol, &owner, NULL, true, true); + if (sym && strong_try_module_get(owner)) + sym = NULL; preempt_enable(); - return (void *)value; + return sym ? (void *)sym->value : NULL; } EXPORT_SYMBOL_GPL(__symbol_get); /* * Ensure that an exported symbol [global namespace] does not already exist * in the kernel or in some other module's exported symbol table. + * + * You must hold the module_mutex. */ static int verify_export_symbols(struct module *mod) { - const char *name = NULL; - unsigned long i, ret = 0; + unsigned int i; struct module *owner; - const unsigned long *crc; - - for (i = 0; i < mod->num_syms; i++) - if (!IS_ERR_VALUE(__find_symbol(mod->syms[i].name, - &owner, &crc, 1))) { - name = mod->syms[i].name; - ret = -ENOEXEC; - goto dup; - } + const struct kernel_symbol *s; + struct { + const struct kernel_symbol *sym; + unsigned int num; + } arr[] = { + { mod->syms, mod->num_syms }, + { mod->gpl_syms, mod->num_gpl_syms }, + { mod->gpl_future_syms, mod->num_gpl_future_syms }, +#ifdef CONFIG_UNUSED_SYMBOLS + { mod->unused_syms, mod->num_unused_syms }, + { mod->unused_gpl_syms, mod->num_unused_gpl_syms }, +#endif + }; - for (i = 0; i < mod->num_gpl_syms; i++) - if (!IS_ERR_VALUE(__find_symbol(mod->gpl_syms[i].name, - &owner, &crc, 1))) { - name = mod->gpl_syms[i].name; - ret = -ENOEXEC; - goto dup; + for (i = 0; i < ARRAY_SIZE(arr); i++) { + for (s = arr[i].sym; s < arr[i].sym + arr[i].num; s++) { + if (find_symbol(s->name, &owner, NULL, true, false)) { + pr_err("%s: exports duplicate symbol %s" + " (owned by %s)\n", + mod->name, s->name, module_name(owner)); + return -ENOEXEC; + } } - -dup: - if (ret) - printk(KERN_ERR "%s: exports duplicate symbol %s (owned by %s)\n", - mod->name, name, module_name(owner)); - - return ret; + } + return 0; } /* Change all symbols so that st_value encodes the pointer directly. */ -static int simplify_symbols(Elf_Shdr *sechdrs, - unsigned int symindex, - const char *strtab, - unsigned int versindex, - unsigned int pcpuindex, - struct module *mod) -{ - Elf_Sym *sym = (void *)sechdrs[symindex].sh_addr; +static int simplify_symbols(struct module *mod, const struct load_info *info) +{ + Elf_Shdr *symsec = &info->sechdrs[info->index.sym]; + Elf_Sym *sym = (void *)symsec->sh_addr; unsigned long secbase; - unsigned int i, n = sechdrs[symindex].sh_size / sizeof(Elf_Sym); + unsigned int i; int ret = 0; + const struct kernel_symbol *ksym; + + for (i = 1; i < symsec->sh_size / sizeof(Elf_Sym); i++) { + const char *name = info->strtab + sym[i].st_name; - for (i = 1; i < n; i++) { switch (sym[i].st_shndx) { case SHN_COMMON: + /* Ignore common symbols */ + if (!strncmp(name, "__gnu_lto", 9)) + break; + /* We compiled with -fno-common. These are not supposed to happen. */ - DEBUGP("Common symbol: %s\n", strtab + sym[i].st_name); + pr_debug("Common symbol: %s\n", name); printk("%s: please compile with -fno-common\n", mod->name); ret = -ENOEXEC; @@ -1438,33 +1961,33 @@ static int simplify_symbols(Elf_Shdr *sechdrs, case SHN_ABS: /* Don't need to do anything */ - DEBUGP("Absolute symbol: 0x%08lx\n", + pr_debug("Absolute symbol: 0x%08lx\n", (long)sym[i].st_value); break; case SHN_UNDEF: - sym[i].st_value - = resolve_symbol(sechdrs, versindex, - strtab + sym[i].st_name, mod); - + ksym = resolve_symbol_wait(mod, info, name); /* Ok if resolved. */ - if (!IS_ERR_VALUE(sym[i].st_value)) + if (ksym && !IS_ERR(ksym)) { + sym[i].st_value = ksym->value; break; + } + /* Ok if weak. */ - if (ELF_ST_BIND(sym[i].st_info) == STB_WEAK) + if (!ksym && ELF_ST_BIND(sym[i].st_info) == STB_WEAK) break; - printk(KERN_WARNING "%s: Unknown symbol %s\n", - mod->name, strtab + sym[i].st_name); - ret = -ENOENT; + pr_warn("%s: Unknown symbol %s (err %li)\n", + mod->name, name, PTR_ERR(ksym)); + ret = PTR_ERR(ksym) ?: -ENOENT; break; default: /* Divert to percpu allocation if a percpu var. */ - if (sym[i].st_shndx == pcpuindex) - secbase = (unsigned long)mod->percpu; + if (sym[i].st_shndx == info->index.pcpu) + secbase = (unsigned long)mod_percpu(mod); else - secbase = sechdrs[sym[i].st_shndx].sh_addr; + secbase = info->sechdrs[sym[i].st_shndx].sh_addr; sym[i].st_value += secbase; break; } @@ -1473,11 +1996,50 @@ static int simplify_symbols(Elf_Shdr *sechdrs, return ret; } +static int apply_relocations(struct module *mod, const struct load_info *info) +{ + unsigned int i; + int err = 0; + + /* Now do relocations. */ + for (i = 1; i < info->hdr->e_shnum; i++) { + unsigned int infosec = info->sechdrs[i].sh_info; + + /* Not a valid relocation section? */ + if (infosec >= info->hdr->e_shnum) + continue; + + /* Don't bother with non-allocated sections */ + if (!(info->sechdrs[infosec].sh_flags & SHF_ALLOC)) + continue; + + if (info->sechdrs[i].sh_type == SHT_REL) + err = apply_relocate(info->sechdrs, info->strtab, + info->index.sym, i, mod); + else if (info->sechdrs[i].sh_type == SHT_RELA) + err = apply_relocate_add(info->sechdrs, info->strtab, + info->index.sym, i, mod); + if (err < 0) + break; + } + return err; +} + +/* Additional bytes needed by arch in front of individual sections */ +unsigned int __weak arch_mod_section_prepend(struct module *mod, + unsigned int section) +{ + /* default implementation just returns zero */ + return 0; +} + /* Update size with this section: return offset. */ -static long get_offset(unsigned long *size, Elf_Shdr *sechdr) +static long get_offset(struct module *mod, unsigned int *size, + Elf_Shdr *sechdr, unsigned int section) { long ret; + *size += arch_mod_section_prepend(mod, section); ret = ALIGN(*size, sechdr->sh_addralign ?: 1); *size = ret + sechdr->sh_size; return ret; @@ -1487,10 +2049,7 @@ static long get_offset(unsigned long *size, Elf_Shdr *sechdr) might -- code, read-only data, read-write data, small data. Tally sizes, and place the offsets into sh_entsize fields: high bit means it belongs in init. */ -static void layout_sections(struct module *mod, - const Elf_Ehdr *hdr, - Elf_Shdr *sechdrs, - const char *secstrings) +static void layout_sections(struct module *mod, struct load_info *info) { static unsigned long const masks[][2] = { /* NOTE: all executable code must be the first section @@ -1503,44 +2062,66 @@ static void layout_sections(struct module *mod, }; unsigned int m, i; - for (i = 0; i < hdr->e_shnum; i++) - sechdrs[i].sh_entsize = ~0UL; + for (i = 0; i < info->hdr->e_shnum; i++) + info->sechdrs[i].sh_entsize = ~0UL; - DEBUGP("Core section allocation order:\n"); + pr_debug("Core section allocation order:\n"); for (m = 0; m < ARRAY_SIZE(masks); ++m) { - for (i = 0; i < hdr->e_shnum; ++i) { - Elf_Shdr *s = &sechdrs[i]; + for (i = 0; i < info->hdr->e_shnum; ++i) { + Elf_Shdr *s = &info->sechdrs[i]; + const char *sname = info->secstrings + s->sh_name; if ((s->sh_flags & masks[m][0]) != masks[m][0] || (s->sh_flags & masks[m][1]) || s->sh_entsize != ~0UL - || strncmp(secstrings + s->sh_name, - ".init", 5) == 0) + || strstarts(sname, ".init")) continue; - s->sh_entsize = get_offset(&mod->core_size, s); - DEBUGP("\t%s\n", secstrings + s->sh_name); + s->sh_entsize = get_offset(mod, &mod->core_size, s, i); + pr_debug("\t%s\n", sname); } - if (m == 0) + switch (m) { + case 0: /* executable */ + mod->core_size = debug_align(mod->core_size); mod->core_text_size = mod->core_size; + break; + case 1: /* RO: text and ro-data */ + mod->core_size = debug_align(mod->core_size); + mod->core_ro_size = mod->core_size; + break; + case 3: /* whole core */ + mod->core_size = debug_align(mod->core_size); + break; + } } - DEBUGP("Init section allocation order:\n"); + pr_debug("Init section allocation order:\n"); for (m = 0; m < ARRAY_SIZE(masks); ++m) { - for (i = 0; i < hdr->e_shnum; ++i) { - Elf_Shdr *s = &sechdrs[i]; + for (i = 0; i < info->hdr->e_shnum; ++i) { + Elf_Shdr *s = &info->sechdrs[i]; + const char *sname = info->secstrings + s->sh_name; if ((s->sh_flags & masks[m][0]) != masks[m][0] || (s->sh_flags & masks[m][1]) || s->sh_entsize != ~0UL - || strncmp(secstrings + s->sh_name, - ".init", 5) != 0) + || !strstarts(sname, ".init")) continue; - s->sh_entsize = (get_offset(&mod->init_size, s) + s->sh_entsize = (get_offset(mod, &mod->init_size, s, i) | INIT_OFFSET_MASK); - DEBUGP("\t%s\n", secstrings + s->sh_name); + pr_debug("\t%s\n", sname); } - if (m == 0) + switch (m) { + case 0: /* executable */ + mod->init_size = debug_align(mod->init_size); mod->init_text_size = mod->init_size; + break; + case 1: /* RO: text and ro-data */ + mod->init_size = debug_align(mod->init_size); + mod->init_ro_size = mod->init_size; + break; + case 3: /* whole init */ + mod->init_size = debug_align(mod->init_size); + break; + } } } @@ -1550,10 +2131,11 @@ static void set_license(struct module *mod, const char *license) license = "unspecified"; if (!license_is_gpl_compatible(license)) { - if (!(tainted & TAINT_PROPRIETARY_MODULE)) - printk(KERN_WARNING "%s: module license '%s' taints " - "kernel.\n", mod->name, license); - add_taint_module(mod, TAINT_PROPRIETARY_MODULE); + if (!test_taint(TAINT_PROPRIETARY_MODULE)) + pr_warn("%s: module license '%s' taints kernel.\n", + mod->name, license); + add_taint_module(mod, TAINT_PROPRIETARY_MODULE, + LOCKDEP_NOW_UNRELIABLE); } } @@ -1576,54 +2158,69 @@ static char *next_string(char *string, unsigned long *secsize) return string; } -static char *get_modinfo(Elf_Shdr *sechdrs, - unsigned int info, - const char *tag) +static char *get_modinfo(struct load_info *info, const char *tag) { char *p; unsigned int taglen = strlen(tag); - unsigned long size = sechdrs[info].sh_size; + Elf_Shdr *infosec = &info->sechdrs[info->index.info]; + unsigned long size = infosec->sh_size; - for (p = (char *)sechdrs[info].sh_addr; p; p = next_string(p, &size)) { + for (p = (char *)infosec->sh_addr; p; p = next_string(p, &size)) { if (strncmp(p, tag, taglen) == 0 && p[taglen] == '=') return p + taglen + 1; } return NULL; } -static void setup_modinfo(struct module *mod, Elf_Shdr *sechdrs, - unsigned int infoindex) +static void setup_modinfo(struct module *mod, struct load_info *info) { struct module_attribute *attr; int i; for (i = 0; (attr = modinfo_attrs[i]); i++) { if (attr->setup) - attr->setup(mod, - get_modinfo(sechdrs, - infoindex, - attr->attr.name)); + attr->setup(mod, get_modinfo(info, attr->attr.name)); + } +} + +static void free_modinfo(struct module *mod) +{ + struct module_attribute *attr; + int i; + + for (i = 0; (attr = modinfo_attrs[i]); i++) { + if (attr->free) + attr->free(mod); } } #ifdef CONFIG_KALLSYMS -static int is_exported(const char *name, const struct module *mod) + +/* lookup symbol in given range of kernel_symbols */ +static const struct kernel_symbol *lookup_symbol(const char *name, + const struct kernel_symbol *start, + const struct kernel_symbol *stop) { - if (!mod && lookup_symbol(name, __start___ksymtab, __stop___ksymtab)) - return 1; + return bsearch(name, start, stop - start, + sizeof(struct kernel_symbol), cmp_name); +} + +static int is_exported(const char *name, unsigned long value, + const struct module *mod) +{ + const struct kernel_symbol *ks; + if (!mod) + ks = lookup_symbol(name, __start___ksymtab, __stop___ksymtab); else - if (mod && lookup_symbol(name, mod->syms, mod->syms + mod->num_syms)) - return 1; - else - return 0; + ks = lookup_symbol(name, mod->syms, mod->syms + mod->num_syms); + return ks != NULL && ks->value == value; } /* As per nm */ -static char elf_type(const Elf_Sym *sym, - Elf_Shdr *sechdrs, - const char *secstrings, - struct module *mod) +static char elf_type(const Elf_Sym *sym, const struct load_info *info) { + const Elf_Shdr *sechdrs = info->sechdrs; + if (ELF_ST_BIND(sym->st_info) == STB_WEAK) { if (ELF_ST_TYPE(sym->st_info) == STT_OBJECT) return 'v'; @@ -1653,400 +2250,640 @@ static char elf_type(const Elf_Sym *sym, else return 'b'; } - if (strncmp(secstrings + sechdrs[sym->st_shndx].sh_name, - ".debug", strlen(".debug")) == 0) + if (strstarts(info->secstrings + sechdrs[sym->st_shndx].sh_name, + ".debug")) { return 'n'; + } return '?'; } -static void add_kallsyms(struct module *mod, - Elf_Shdr *sechdrs, - unsigned int symindex, - unsigned int strindex, - const char *secstrings) +static bool is_core_symbol(const Elf_Sym *src, const Elf_Shdr *sechdrs, + unsigned int shnum) { - unsigned int i; + const Elf_Shdr *sec; + + if (src->st_shndx == SHN_UNDEF + || src->st_shndx >= shnum + || !src->st_name) + return false; - mod->symtab = (void *)sechdrs[symindex].sh_addr; - mod->num_symtab = sechdrs[symindex].sh_size / sizeof(Elf_Sym); - mod->strtab = (void *)sechdrs[strindex].sh_addr; + sec = sechdrs + src->st_shndx; + if (!(sec->sh_flags & SHF_ALLOC) +#ifndef CONFIG_KALLSYMS_ALL + || !(sec->sh_flags & SHF_EXECINSTR) +#endif + || (sec->sh_entsize & INIT_OFFSET_MASK)) + return false; + + return true; +} + +/* + * We only allocate and copy the strings needed by the parts of symtab + * we keep. This is simple, but has the effect of making multiple + * copies of duplicates. We could be more sophisticated, see + * linux-kernel thread starting with + * <73defb5e4bca04a6431392cc341112b1@localhost>. + */ +static void layout_symtab(struct module *mod, struct load_info *info) +{ + Elf_Shdr *symsect = info->sechdrs + info->index.sym; + Elf_Shdr *strsect = info->sechdrs + info->index.str; + const Elf_Sym *src; + unsigned int i, nsrc, ndst, strtab_size = 0; + + /* Put symbol section at end of init part of module. */ + symsect->sh_flags |= SHF_ALLOC; + symsect->sh_entsize = get_offset(mod, &mod->init_size, symsect, + info->index.sym) | INIT_OFFSET_MASK; + pr_debug("\t%s\n", info->secstrings + symsect->sh_name); + + src = (void *)info->hdr + symsect->sh_offset; + nsrc = symsect->sh_size / sizeof(*src); + + /* Compute total space required for the core symbols' strtab. */ + for (ndst = i = 0; i < nsrc; i++) { + if (i == 0 || + is_core_symbol(src+i, info->sechdrs, info->hdr->e_shnum)) { + strtab_size += strlen(&info->strtab[src[i].st_name])+1; + ndst++; + } + } + + /* Append room for core symbols at end of core part. */ + info->symoffs = ALIGN(mod->core_size, symsect->sh_addralign ?: 1); + info->stroffs = mod->core_size = info->symoffs + ndst * sizeof(Elf_Sym); + mod->core_size += strtab_size; + + /* Put string table section at end of init part of module. */ + strsect->sh_flags |= SHF_ALLOC; + strsect->sh_entsize = get_offset(mod, &mod->init_size, strsect, + info->index.str) | INIT_OFFSET_MASK; + pr_debug("\t%s\n", info->secstrings + strsect->sh_name); +} + +static void add_kallsyms(struct module *mod, const struct load_info *info) +{ + unsigned int i, ndst; + const Elf_Sym *src; + Elf_Sym *dst; + char *s; + Elf_Shdr *symsec = &info->sechdrs[info->index.sym]; + + mod->symtab = (void *)symsec->sh_addr; + mod->num_symtab = symsec->sh_size / sizeof(Elf_Sym); + /* Make sure we get permanent strtab: don't use info->strtab. */ + mod->strtab = (void *)info->sechdrs[info->index.str].sh_addr; /* Set types up while we still have access to sections. */ for (i = 0; i < mod->num_symtab; i++) - mod->symtab[i].st_info - = elf_type(&mod->symtab[i], sechdrs, secstrings, mod); + mod->symtab[i].st_info = elf_type(&mod->symtab[i], info); + + mod->core_symtab = dst = mod->module_core + info->symoffs; + mod->core_strtab = s = mod->module_core + info->stroffs; + src = mod->symtab; + for (ndst = i = 0; i < mod->num_symtab; i++) { + if (i == 0 || + is_core_symbol(src+i, info->sechdrs, info->hdr->e_shnum)) { + dst[ndst] = src[i]; + dst[ndst++].st_name = s - mod->core_strtab; + s += strlcpy(s, &mod->strtab[src[i].st_name], + KSYM_NAME_LEN) + 1; + } + } + mod->core_num_syms = ndst; } #else -static inline void add_kallsyms(struct module *mod, - Elf_Shdr *sechdrs, - unsigned int symindex, - unsigned int strindex, - const char *secstrings) +static inline void layout_symtab(struct module *mod, struct load_info *info) +{ +} + +static void add_kallsyms(struct module *mod, const struct load_info *info) { } #endif /* CONFIG_KALLSYMS */ -/* Allocate and load the module: note that size of section 0 is always - zero, and we rely on this for optional sections. */ -static struct module *load_module(void __user *umod, - unsigned long len, - const char __user *uargs) +static void dynamic_debug_setup(struct _ddebug *debug, unsigned int num) +{ + if (!debug) + return; +#ifdef CONFIG_DYNAMIC_DEBUG + if (ddebug_add_module(debug, num, debug->modname)) + pr_err("dynamic debug error adding module: %s\n", + debug->modname); +#endif +} + +static void dynamic_debug_remove(struct _ddebug *debug) +{ + if (debug) + ddebug_remove_module(debug->modname); +} + +void * __weak module_alloc(unsigned long size) +{ + return vmalloc_exec(size); +} + +static void *module_alloc_update_bounds(unsigned long size) +{ + void *ret = module_alloc(size); + + if (ret) { + mutex_lock(&module_mutex); + /* Update module bounds. */ + if ((unsigned long)ret < module_addr_min) + module_addr_min = (unsigned long)ret; + if ((unsigned long)ret + size > module_addr_max) + module_addr_max = (unsigned long)ret + size; + mutex_unlock(&module_mutex); + } + return ret; +} + +#ifdef CONFIG_DEBUG_KMEMLEAK +static void kmemleak_load_module(const struct module *mod, + const struct load_info *info) { - Elf_Ehdr *hdr; - Elf_Shdr *sechdrs; - char *secstrings, *args, *modmagic, *strtab = NULL; unsigned int i; - unsigned int symindex = 0; - unsigned int strindex = 0; - unsigned int setupindex; - unsigned int exindex; - unsigned int exportindex; - unsigned int modindex; - unsigned int obsparmindex; - unsigned int infoindex; - unsigned int gplindex; - unsigned int crcindex; - unsigned int gplcrcindex; - unsigned int versindex; - unsigned int pcpuindex; - unsigned int gplfutureindex; - unsigned int gplfuturecrcindex; - unsigned int unwindex = 0; - unsigned int unusedindex; - unsigned int unusedcrcindex; - unsigned int unusedgplindex; - unsigned int unusedgplcrcindex; - unsigned int markersindex; - unsigned int markersstringsindex; - struct module *mod; - long err = 0; - void *percpu = NULL, *ptr = NULL; /* Stops spurious gcc warning */ - struct exception_table_entry *extable; - mm_segment_t old_fs; - DEBUGP("load_module: umod=%p, len=%lu, uargs=%p\n", - umod, len, uargs); - if (len < sizeof(*hdr)) - return ERR_PTR(-ENOEXEC); + /* only scan the sections containing data */ + kmemleak_scan_area(mod, sizeof(struct module), GFP_KERNEL); + + for (i = 1; i < info->hdr->e_shnum; i++) { + /* Scan all writable sections that's not executable */ + if (!(info->sechdrs[i].sh_flags & SHF_ALLOC) || + !(info->sechdrs[i].sh_flags & SHF_WRITE) || + (info->sechdrs[i].sh_flags & SHF_EXECINSTR)) + continue; + + kmemleak_scan_area((void *)info->sechdrs[i].sh_addr, + info->sechdrs[i].sh_size, GFP_KERNEL); + } +} +#else +static inline void kmemleak_load_module(const struct module *mod, + const struct load_info *info) +{ +} +#endif + +#ifdef CONFIG_MODULE_SIG +static int module_sig_check(struct load_info *info) +{ + int err = -ENOKEY; + const unsigned long markerlen = sizeof(MODULE_SIG_STRING) - 1; + const void *mod = info->hdr; + + if (info->len > markerlen && + memcmp(mod + info->len - markerlen, MODULE_SIG_STRING, markerlen) == 0) { + /* We truncate the module to discard the signature */ + info->len -= markerlen; + err = mod_verify_sig(mod, &info->len); + } + + if (!err) { + info->sig_ok = true; + return 0; + } + + /* Not having a signature is only an error if we're strict. */ + if (err < 0 && fips_enabled) + panic("Module verification failed with error %d in FIPS mode\n", + err); + if (err == -ENOKEY && !sig_enforce) + err = 0; + + return err; +} +#else /* !CONFIG_MODULE_SIG */ +static int module_sig_check(struct load_info *info) +{ + return 0; +} +#endif /* !CONFIG_MODULE_SIG */ + +/* Sanity checks against invalid binaries, wrong arch, weird elf version. */ +static int elf_header_check(struct load_info *info) +{ + if (info->len < sizeof(*(info->hdr))) + return -ENOEXEC; + + if (memcmp(info->hdr->e_ident, ELFMAG, SELFMAG) != 0 + || info->hdr->e_type != ET_REL + || !elf_check_arch(info->hdr) + || info->hdr->e_shentsize != sizeof(Elf_Shdr)) + return -ENOEXEC; + + if (info->hdr->e_shoff >= info->len + || (info->hdr->e_shnum * sizeof(Elf_Shdr) > + info->len - info->hdr->e_shoff)) + return -ENOEXEC; + + return 0; +} + +/* Sets info->hdr and info->len. */ +static int copy_module_from_user(const void __user *umod, unsigned long len, + struct load_info *info) +{ + int err; + + info->len = len; + if (info->len < sizeof(*(info->hdr))) + return -ENOEXEC; + + err = security_kernel_module_from_file(NULL); + if (err) + return err; /* Suck in entire file: we'll want most of it. */ - /* vmalloc barfs on "unusual" numbers. Check here */ - if (len > 64 * 1024 * 1024 || (hdr = vmalloc(len)) == NULL) - return ERR_PTR(-ENOMEM); - if (copy_from_user(hdr, umod, len) != 0) { - err = -EFAULT; - goto free_hdr; + info->hdr = vmalloc(info->len); + if (!info->hdr) + return -ENOMEM; + + if (copy_from_user(info->hdr, umod, info->len) != 0) { + vfree(info->hdr); + return -EFAULT; } - /* Sanity checks against insmoding binaries or wrong arch, - weird elf version */ - if (memcmp(hdr->e_ident, ELFMAG, 4) != 0 - || hdr->e_type != ET_REL - || !elf_check_arch(hdr) - || hdr->e_shentsize != sizeof(*sechdrs)) { - err = -ENOEXEC; - goto free_hdr; + return 0; +} + +/* Sets info->hdr and info->len. */ +static int copy_module_from_fd(int fd, struct load_info *info) +{ + struct fd f = fdget(fd); + int err; + struct kstat stat; + loff_t pos; + ssize_t bytes = 0; + + if (!f.file) + return -ENOEXEC; + + err = security_kernel_module_from_file(f.file); + if (err) + goto out; + + err = vfs_getattr(&f.file->f_path, &stat); + if (err) + goto out; + + if (stat.size > INT_MAX) { + err = -EFBIG; + goto out; } - if (len < hdr->e_shoff + hdr->e_shnum * sizeof(Elf_Shdr)) - goto truncated; + /* Don't hand 0 to vmalloc, it whines. */ + if (stat.size == 0) { + err = -EINVAL; + goto out; + } + + info->hdr = vmalloc(stat.size); + if (!info->hdr) { + err = -ENOMEM; + goto out; + } + + pos = 0; + while (pos < stat.size) { + bytes = kernel_read(f.file, pos, (char *)(info->hdr) + pos, + stat.size - pos); + if (bytes < 0) { + vfree(info->hdr); + err = bytes; + goto out; + } + if (bytes == 0) + break; + pos += bytes; + } + info->len = pos; + +out: + fdput(f); + return err; +} + +static void free_copy(struct load_info *info) +{ + vfree(info->hdr); +} - /* Convenience variables */ - sechdrs = (void *)hdr + hdr->e_shoff; - secstrings = (void *)hdr + sechdrs[hdr->e_shstrndx].sh_offset; - sechdrs[0].sh_addr = 0; +static int rewrite_section_headers(struct load_info *info, int flags) +{ + unsigned int i; - for (i = 1; i < hdr->e_shnum; i++) { - if (sechdrs[i].sh_type != SHT_NOBITS - && len < sechdrs[i].sh_offset + sechdrs[i].sh_size) - goto truncated; + /* This should always be true, but let's be sure. */ + info->sechdrs[0].sh_addr = 0; + + for (i = 1; i < info->hdr->e_shnum; i++) { + Elf_Shdr *shdr = &info->sechdrs[i]; + if (shdr->sh_type != SHT_NOBITS + && info->len < shdr->sh_offset + shdr->sh_size) { + pr_err("Module len %lu truncated\n", info->len); + return -ENOEXEC; + } /* Mark all sections sh_addr with their address in the temporary image. */ - sechdrs[i].sh_addr = (size_t)hdr + sechdrs[i].sh_offset; + shdr->sh_addr = (size_t)info->hdr + shdr->sh_offset; - /* Internal symbols and strings. */ - if (sechdrs[i].sh_type == SHT_SYMTAB) { - symindex = i; - strindex = sechdrs[i].sh_link; - strtab = (char *)hdr + sechdrs[strindex].sh_offset; - } #ifndef CONFIG_MODULE_UNLOAD /* Don't load .exit sections */ - if (strncmp(secstrings+sechdrs[i].sh_name, ".exit", 5) == 0) - sechdrs[i].sh_flags &= ~(unsigned long)SHF_ALLOC; + if (strstarts(info->secstrings+shdr->sh_name, ".exit")) + shdr->sh_flags &= ~(unsigned long)SHF_ALLOC; #endif } - modindex = find_sec(hdr, sechdrs, secstrings, - ".gnu.linkonce.this_module"); - if (!modindex) { - printk(KERN_WARNING "No module found in object\n"); - err = -ENOEXEC; - goto free_hdr; + /* Track but don't keep modinfo and version sections. */ + if (flags & MODULE_INIT_IGNORE_MODVERSIONS) + info->index.vers = 0; /* Pretend no __versions section! */ + else + info->index.vers = find_sec(info, "__versions"); + info->index.info = find_sec(info, ".modinfo"); + info->sechdrs[info->index.info].sh_flags &= ~(unsigned long)SHF_ALLOC; + info->sechdrs[info->index.vers].sh_flags &= ~(unsigned long)SHF_ALLOC; + return 0; +} + +/* + * Set up our basic convenience variables (pointers to section headers, + * search for module section index etc), and do some basic section + * verification. + * + * Return the temporary module pointer (we'll replace it with the final + * one when we move the module sections around). + */ +static struct module *setup_load_info(struct load_info *info, int flags) +{ + unsigned int i; + int err; + struct module *mod; + + /* Set up the convenience variables */ + info->sechdrs = (void *)info->hdr + info->hdr->e_shoff; + info->secstrings = (void *)info->hdr + + info->sechdrs[info->hdr->e_shstrndx].sh_offset; + + err = rewrite_section_headers(info, flags); + if (err) + return ERR_PTR(err); + + /* Find internal symbols and strings. */ + for (i = 1; i < info->hdr->e_shnum; i++) { + if (info->sechdrs[i].sh_type == SHT_SYMTAB) { + info->index.sym = i; + info->index.str = info->sechdrs[i].sh_link; + info->strtab = (char *)info->hdr + + info->sechdrs[info->index.str].sh_offset; + break; + } } - mod = (void *)sechdrs[modindex].sh_addr; - if (symindex == 0) { - printk(KERN_WARNING "%s: module has no symbols (stripped?)\n", - mod->name); - err = -ENOEXEC; - goto free_hdr; - } - - /* Optional sections */ - exportindex = find_sec(hdr, sechdrs, secstrings, "__ksymtab"); - gplindex = find_sec(hdr, sechdrs, secstrings, "__ksymtab_gpl"); - gplfutureindex = find_sec(hdr, sechdrs, secstrings, "__ksymtab_gpl_future"); - unusedindex = find_sec(hdr, sechdrs, secstrings, "__ksymtab_unused"); - unusedgplindex = find_sec(hdr, sechdrs, secstrings, "__ksymtab_unused_gpl"); - crcindex = find_sec(hdr, sechdrs, secstrings, "__kcrctab"); - gplcrcindex = find_sec(hdr, sechdrs, secstrings, "__kcrctab_gpl"); - gplfuturecrcindex = find_sec(hdr, sechdrs, secstrings, "__kcrctab_gpl_future"); - unusedcrcindex = find_sec(hdr, sechdrs, secstrings, "__kcrctab_unused"); - unusedgplcrcindex = find_sec(hdr, sechdrs, secstrings, "__kcrctab_unused_gpl"); - setupindex = find_sec(hdr, sechdrs, secstrings, "__param"); - exindex = find_sec(hdr, sechdrs, secstrings, "__ex_table"); - obsparmindex = find_sec(hdr, sechdrs, secstrings, "__obsparm"); - versindex = find_sec(hdr, sechdrs, secstrings, "__versions"); - infoindex = find_sec(hdr, sechdrs, secstrings, ".modinfo"); - pcpuindex = find_pcpusec(hdr, sechdrs, secstrings); -#ifdef ARCH_UNWIND_SECTION_NAME - unwindex = find_sec(hdr, sechdrs, secstrings, ARCH_UNWIND_SECTION_NAME); -#endif + info->index.mod = find_sec(info, ".gnu.linkonce.this_module"); + if (!info->index.mod) { + pr_warn("No module found in object\n"); + return ERR_PTR(-ENOEXEC); + } + /* This is temporary: point mod into copy of data. */ + mod = (void *)info->sechdrs[info->index.mod].sh_addr; - /* Don't keep modinfo section */ - sechdrs[infoindex].sh_flags &= ~(unsigned long)SHF_ALLOC; -#ifdef CONFIG_KALLSYMS - /* Keep symbol and string tables for decoding later. */ - sechdrs[symindex].sh_flags |= SHF_ALLOC; - sechdrs[strindex].sh_flags |= SHF_ALLOC; -#endif - if (unwindex) - sechdrs[unwindex].sh_flags |= SHF_ALLOC; + if (info->index.sym == 0) { + pr_warn("%s: module has no symbols (stripped?)\n", mod->name); + return ERR_PTR(-ENOEXEC); + } + + info->index.pcpu = find_pcpusec(info); /* Check module struct version now, before we try to use module. */ - if (!check_modstruct_version(sechdrs, versindex, mod)) { - err = -ENOEXEC; - goto free_hdr; - } + if (!check_modstruct_version(info->sechdrs, info->index.vers, mod)) + return ERR_PTR(-ENOEXEC); + + return mod; +} + +static int check_modinfo(struct module *mod, struct load_info *info, int flags) +{ + const char *modmagic = get_modinfo(info, "vermagic"); + int err; + + if (flags & MODULE_INIT_IGNORE_VERMAGIC) + modmagic = NULL; - modmagic = get_modinfo(sechdrs, infoindex, "vermagic"); /* This is allowed: modprobe --force will invalidate it. */ if (!modmagic) { - add_taint_module(mod, TAINT_FORCED_MODULE); - printk(KERN_WARNING "%s: no version magic, tainting kernel.\n", - mod->name); - } else if (!same_magic(modmagic, vermagic)) { - printk(KERN_ERR "%s: version magic '%s' should be '%s'\n", + err = try_to_force_load(mod, "bad vermagic"); + if (err) + return err; + } else if (!same_magic(modmagic, vermagic, info->index.vers)) { + pr_err("%s: version magic '%s' should be '%s'\n", mod->name, modmagic, vermagic); - err = -ENOEXEC; - goto free_hdr; + return -ENOEXEC; } - /* Now copy in args */ - args = strndup_user(uargs, ~0UL >> 1); - if (IS_ERR(args)) { - err = PTR_ERR(args); - goto free_hdr; - } + if (!get_modinfo(info, "intree")) + add_taint_module(mod, TAINT_OOT_MODULE, LOCKDEP_STILL_OK); - if (find_module(mod->name)) { - err = -EEXIST; - goto free_mod; + if (get_modinfo(info, "staging")) { + add_taint_module(mod, TAINT_CRAP, LOCKDEP_STILL_OK); + pr_warn("%s: module is from the staging directory, the quality " + "is unknown, you have been warned.\n", mod->name); } - mod->state = MODULE_STATE_COMING; + /* Set up license info based on the info section */ + set_license(mod, get_modinfo(info, "license")); - /* Allow arches to frob section contents and sizes. */ - err = module_frob_arch_sections(hdr, sechdrs, secstrings, mod); - if (err < 0) - goto free_mod; - - if (pcpuindex) { - /* We have a special allocation for this section. */ - percpu = percpu_modalloc(sechdrs[pcpuindex].sh_size, - sechdrs[pcpuindex].sh_addralign, - mod->name); - if (!percpu) { - err = -ENOMEM; - goto free_mod; - } - sechdrs[pcpuindex].sh_flags &= ~(unsigned long)SHF_ALLOC; - mod->percpu = percpu; + return 0; +} + +static int find_module_sections(struct module *mod, struct load_info *info) +{ + mod->kp = section_objs(info, "__param", + sizeof(*mod->kp), &mod->num_kp); + mod->syms = section_objs(info, "__ksymtab", + sizeof(*mod->syms), &mod->num_syms); + mod->crcs = section_addr(info, "__kcrctab"); + mod->gpl_syms = section_objs(info, "__ksymtab_gpl", + sizeof(*mod->gpl_syms), + &mod->num_gpl_syms); + mod->gpl_crcs = section_addr(info, "__kcrctab_gpl"); + mod->gpl_future_syms = section_objs(info, + "__ksymtab_gpl_future", + sizeof(*mod->gpl_future_syms), + &mod->num_gpl_future_syms); + mod->gpl_future_crcs = section_addr(info, "__kcrctab_gpl_future"); + +#ifdef CONFIG_UNUSED_SYMBOLS + mod->unused_syms = section_objs(info, "__ksymtab_unused", + sizeof(*mod->unused_syms), + &mod->num_unused_syms); + mod->unused_crcs = section_addr(info, "__kcrctab_unused"); + mod->unused_gpl_syms = section_objs(info, "__ksymtab_unused_gpl", + sizeof(*mod->unused_gpl_syms), + &mod->num_unused_gpl_syms); + mod->unused_gpl_crcs = section_addr(info, "__kcrctab_unused_gpl"); +#endif +#ifdef CONFIG_CONSTRUCTORS + mod->ctors = section_objs(info, ".ctors", + sizeof(*mod->ctors), &mod->num_ctors); + if (!mod->ctors) + mod->ctors = section_objs(info, ".init_array", + sizeof(*mod->ctors), &mod->num_ctors); + else if (find_sec(info, ".init_array")) { + /* + * This shouldn't happen with same compiler and binutils + * building all parts of the module. + */ + printk(KERN_WARNING "%s: has both .ctors and .init_array.\n", + mod->name); + return -EINVAL; } +#endif - /* Determine total sizes, and put offsets in sh_entsize. For now - this is done generically; there doesn't appear to be any - special cases for the architectures. */ - layout_sections(mod, hdr, sechdrs, secstrings); +#ifdef CONFIG_TRACEPOINTS + mod->tracepoints_ptrs = section_objs(info, "__tracepoints_ptrs", + sizeof(*mod->tracepoints_ptrs), + &mod->num_tracepoints); +#endif +#ifdef HAVE_JUMP_LABEL + mod->jump_entries = section_objs(info, "__jump_table", + sizeof(*mod->jump_entries), + &mod->num_jump_entries); +#endif +#ifdef CONFIG_EVENT_TRACING + mod->trace_events = section_objs(info, "_ftrace_events", + sizeof(*mod->trace_events), + &mod->num_trace_events); +#endif +#ifdef CONFIG_TRACING + mod->trace_bprintk_fmt_start = section_objs(info, "__trace_printk_fmt", + sizeof(*mod->trace_bprintk_fmt_start), + &mod->num_trace_bprintk_fmt); +#endif +#ifdef CONFIG_FTRACE_MCOUNT_RECORD + /* sechdrs[0].sh_size is always zero */ + mod->ftrace_callsites = section_objs(info, "__mcount_loc", + sizeof(*mod->ftrace_callsites), + &mod->num_ftrace_callsites); +#endif + + mod->extable = section_objs(info, "__ex_table", + sizeof(*mod->extable), &mod->num_exentries); + + if (section_addr(info, "__obsparm")) + pr_warn("%s: Ignoring obsolete parameters\n", mod->name); + + info->debug = section_objs(info, "__verbose", + sizeof(*info->debug), &info->num_debug); + + return 0; +} + +static int move_module(struct module *mod, struct load_info *info) +{ + int i; + void *ptr; /* Do the allocs. */ - ptr = module_alloc(mod->core_size); - if (!ptr) { - err = -ENOMEM; - goto free_percpu; - } + ptr = module_alloc_update_bounds(mod->core_size); + /* + * The pointer to this block is stored in the module structure + * which is inside the block. Just mark it as not being a + * leak. + */ + kmemleak_not_leak(ptr); + if (!ptr) + return -ENOMEM; + memset(ptr, 0, mod->core_size); mod->module_core = ptr; - ptr = module_alloc(mod->init_size); - if (!ptr && mod->init_size) { - err = -ENOMEM; - goto free_core; - } - memset(ptr, 0, mod->init_size); - mod->module_init = ptr; + if (mod->init_size) { + ptr = module_alloc_update_bounds(mod->init_size); + /* + * The pointer to this block is stored in the module structure + * which is inside the block. This block doesn't need to be + * scanned as it contains data and code that will be freed + * after the module is initialized. + */ + kmemleak_ignore(ptr); + if (!ptr) { + module_free(mod, mod->module_core); + return -ENOMEM; + } + memset(ptr, 0, mod->init_size); + mod->module_init = ptr; + } else + mod->module_init = NULL; /* Transfer each section which specifies SHF_ALLOC */ - DEBUGP("final section addresses:\n"); - for (i = 0; i < hdr->e_shnum; i++) { + pr_debug("final section addresses:\n"); + for (i = 0; i < info->hdr->e_shnum; i++) { void *dest; + Elf_Shdr *shdr = &info->sechdrs[i]; - if (!(sechdrs[i].sh_flags & SHF_ALLOC)) + if (!(shdr->sh_flags & SHF_ALLOC)) continue; - if (sechdrs[i].sh_entsize & INIT_OFFSET_MASK) + if (shdr->sh_entsize & INIT_OFFSET_MASK) dest = mod->module_init - + (sechdrs[i].sh_entsize & ~INIT_OFFSET_MASK); + + (shdr->sh_entsize & ~INIT_OFFSET_MASK); else - dest = mod->module_core + sechdrs[i].sh_entsize; + dest = mod->module_core + shdr->sh_entsize; - if (sechdrs[i].sh_type != SHT_NOBITS) - memcpy(dest, (void *)sechdrs[i].sh_addr, - sechdrs[i].sh_size); + if (shdr->sh_type != SHT_NOBITS) + memcpy(dest, (void *)shdr->sh_addr, shdr->sh_size); /* Update sh_addr to point to copy in image. */ - sechdrs[i].sh_addr = (unsigned long)dest; - DEBUGP("\t0x%lx %s\n", sechdrs[i].sh_addr, secstrings + sechdrs[i].sh_name); + shdr->sh_addr = (unsigned long)dest; + pr_debug("\t0x%lx %s\n", + (long)shdr->sh_addr, info->secstrings + shdr->sh_name); } - /* Module has been moved. */ - mod = (void *)sechdrs[modindex].sh_addr; - - /* Now we've moved module, initialize linked lists, etc. */ - module_unload_init(mod); - - /* add kobject, so we can reference it. */ - err = mod_sysfs_init(mod); - if (err) - goto free_unload; - /* Set up license info based on the info section */ - set_license(mod, get_modinfo(sechdrs, infoindex, "license")); + return 0; +} +static int check_module_license_and_versions(struct module *mod) +{ /* * ndiswrapper is under GPL by itself, but loads proprietary modules. * Don't use add_taint_module(), as it would prevent ndiswrapper from * using GPL-only symbols it needs. */ if (strcmp(mod->name, "ndiswrapper") == 0) - add_taint(TAINT_PROPRIETARY_MODULE); + add_taint(TAINT_PROPRIETARY_MODULE, LOCKDEP_NOW_UNRELIABLE); /* driverloader was caught wrongly pretending to be under GPL */ if (strcmp(mod->name, "driverloader") == 0) - add_taint_module(mod, TAINT_PROPRIETARY_MODULE); + add_taint_module(mod, TAINT_PROPRIETARY_MODULE, + LOCKDEP_NOW_UNRELIABLE); - /* Set up MODINFO_ATTR fields */ - setup_modinfo(mod, sechdrs, infoindex); - - /* Fix up syms, so that st_value is a pointer to location. */ - err = simplify_symbols(sechdrs, symindex, strtab, versindex, pcpuindex, - mod); - if (err < 0) - goto cleanup; - - /* Set up EXPORTed & EXPORT_GPLed symbols (section 0 is 0 length) */ - mod->num_syms = sechdrs[exportindex].sh_size / sizeof(*mod->syms); - mod->syms = (void *)sechdrs[exportindex].sh_addr; - if (crcindex) - mod->crcs = (void *)sechdrs[crcindex].sh_addr; - mod->num_gpl_syms = sechdrs[gplindex].sh_size / sizeof(*mod->gpl_syms); - mod->gpl_syms = (void *)sechdrs[gplindex].sh_addr; - if (gplcrcindex) - mod->gpl_crcs = (void *)sechdrs[gplcrcindex].sh_addr; - mod->num_gpl_future_syms = sechdrs[gplfutureindex].sh_size / - sizeof(*mod->gpl_future_syms); - mod->num_unused_syms = sechdrs[unusedindex].sh_size / - sizeof(*mod->unused_syms); - mod->num_unused_gpl_syms = sechdrs[unusedgplindex].sh_size / - sizeof(*mod->unused_gpl_syms); - mod->gpl_future_syms = (void *)sechdrs[gplfutureindex].sh_addr; - if (gplfuturecrcindex) - mod->gpl_future_crcs = (void *)sechdrs[gplfuturecrcindex].sh_addr; - - mod->unused_syms = (void *)sechdrs[unusedindex].sh_addr; - if (unusedcrcindex) - mod->unused_crcs = (void *)sechdrs[unusedcrcindex].sh_addr; - mod->unused_gpl_syms = (void *)sechdrs[unusedgplindex].sh_addr; - if (unusedgplcrcindex) - mod->unused_crcs = (void *)sechdrs[unusedgplcrcindex].sh_addr; + /* lve claims to be GPL but upstream won't provide source */ + if (strcmp(mod->name, "lve") == 0) + add_taint_module(mod, TAINT_PROPRIETARY_MODULE, + LOCKDEP_NOW_UNRELIABLE); #ifdef CONFIG_MODVERSIONS - if ((mod->num_syms && !crcindex) || - (mod->num_gpl_syms && !gplcrcindex) || - (mod->num_gpl_future_syms && !gplfuturecrcindex) || - (mod->num_unused_syms && !unusedcrcindex) || - (mod->num_unused_gpl_syms && !unusedgplcrcindex)) { - printk(KERN_WARNING "%s: No versions for exported symbols." - " Tainting kernel.\n", mod->name); - add_taint_module(mod, TAINT_FORCED_MODULE); - } + if ((mod->num_syms && !mod->crcs) + || (mod->num_gpl_syms && !mod->gpl_crcs) + || (mod->num_gpl_future_syms && !mod->gpl_future_crcs) +#ifdef CONFIG_UNUSED_SYMBOLS + || (mod->num_unused_syms && !mod->unused_crcs) + || (mod->num_unused_gpl_syms && !mod->unused_gpl_crcs) #endif - markersindex = find_sec(hdr, sechdrs, secstrings, "__markers"); - markersstringsindex = find_sec(hdr, sechdrs, secstrings, - "__markers_strings"); - - /* Now do relocations. */ - for (i = 1; i < hdr->e_shnum; i++) { - const char *strtab = (char *)sechdrs[strindex].sh_addr; - unsigned int info = sechdrs[i].sh_info; - - /* Not a valid relocation section? */ - if (info >= hdr->e_shnum) - continue; - - /* Don't bother with non-allocated sections */ - if (!(sechdrs[info].sh_flags & SHF_ALLOC)) - continue; - - if (sechdrs[i].sh_type == SHT_REL) - err = apply_relocate(sechdrs, strtab, symindex, i,mod); - else if (sechdrs[i].sh_type == SHT_RELA) - err = apply_relocate_add(sechdrs, strtab, symindex, i, - mod); - if (err < 0) - goto cleanup; + ) { + return try_to_force_load(mod, + "no versions for exported symbols"); } -#ifdef CONFIG_MARKERS - mod->markers = (void *)sechdrs[markersindex].sh_addr; - mod->num_markers = - sechdrs[markersindex].sh_size / sizeof(*mod->markers); #endif + return 0; +} - /* Find duplicate symbols */ - err = verify_export_symbols(mod); - - if (err < 0) - goto cleanup; - - /* Set up and sort exception table */ - mod->num_exentries = sechdrs[exindex].sh_size / sizeof(*mod->extable); - mod->extable = extable = (void *)sechdrs[exindex].sh_addr; - sort_extable(extable, extable + mod->num_exentries); - - /* Finally, copy percpu area over. */ - percpu_modcopy(mod->percpu, (void *)sechdrs[pcpuindex].sh_addr, - sechdrs[pcpuindex].sh_size); - - add_kallsyms(mod, sechdrs, symindex, strindex, secstrings); - -#ifdef CONFIG_MARKERS - if (!mod->taints) - marker_update_probe_range(mod->markers, - mod->markers + mod->num_markers); -#endif - err = module_finalize(hdr, sechdrs, mod); - if (err < 0) - goto cleanup; +static void flush_module_icache(const struct module *mod) +{ + mm_segment_t old_fs; /* flush the icache in correct context */ old_fs = get_fs(); @@ -2065,145 +2902,477 @@ static struct module *load_module(void __user *umod, (unsigned long)mod->module_core + mod->core_size); set_fs(old_fs); +} - mod->args = args; - if (obsparmindex) - printk(KERN_WARNING "%s: Ignoring obsolete parameters\n", - mod->name); +int __weak module_frob_arch_sections(Elf_Ehdr *hdr, + Elf_Shdr *sechdrs, + char *secstrings, + struct module *mod) +{ + return 0; +} - /* Now sew it into the lists so we can get lockdep and oops - * info during argument parsing. Noone should access us, since - * strong_try_module_get() will fail. */ - stop_machine_run(__link_module, mod, NR_CPUS); - - /* Size of section 0 is 0, so this works well if no params */ - err = parse_args(mod->name, mod->args, - (struct kernel_param *) - sechdrs[setupindex].sh_addr, - sechdrs[setupindex].sh_size - / sizeof(struct kernel_param), - NULL); - if (err < 0) - goto unlink; +static struct module *layout_and_allocate(struct load_info *info, int flags) +{ + /* Module within temporary copy. */ + struct module *mod; + int err; - err = mod_sysfs_setup(mod, - (struct kernel_param *) - sechdrs[setupindex].sh_addr, - sechdrs[setupindex].sh_size - / sizeof(struct kernel_param)); + mod = setup_load_info(info, flags); + if (IS_ERR(mod)) + return mod; + + err = check_modinfo(mod, info, flags); + if (err) + return ERR_PTR(err); + + /* Allow arches to frob section contents and sizes. */ + err = module_frob_arch_sections(info->hdr, info->sechdrs, + info->secstrings, mod); if (err < 0) - goto unlink; - add_sect_attrs(mod, hdr->e_shnum, secstrings, sechdrs); - add_notes_attrs(mod, hdr->e_shnum, secstrings, sechdrs); + return ERR_PTR(err); - /* Size of section 0 is 0, so this works well if no unwind info. */ - mod->unwind_info = unwind_add_table(mod, - (void *)sechdrs[unwindex].sh_addr, - sechdrs[unwindex].sh_size); + /* We will do a special allocation for per-cpu sections later. */ + info->sechdrs[info->index.pcpu].sh_flags &= ~(unsigned long)SHF_ALLOC; - /* Get rid of temporary copy */ - vfree(hdr); + /* Determine total sizes, and put offsets in sh_entsize. For now + this is done generically; there doesn't appear to be any + special cases for the architectures. */ + layout_sections(mod, info); + layout_symtab(mod, info); - /* Done! */ + /* Allocate and move to the final place */ + err = move_module(mod, info); + if (err) + return ERR_PTR(err); + + /* Module has been copied to its final place now: return it. */ + mod = (void *)info->sechdrs[info->index.mod].sh_addr; + kmemleak_load_module(mod, info); return mod; +} - unlink: - stop_machine_run(__unlink_module, mod, NR_CPUS); - module_arch_cleanup(mod); - cleanup: - kobject_del(&mod->mkobj.kobj); - kobject_put(&mod->mkobj.kobj); - free_unload: - module_unload_free(mod); +/* mod is no longer valid after this! */ +static void module_deallocate(struct module *mod, struct load_info *info) +{ + percpu_modfree(mod); module_free(mod, mod->module_init); - free_core: module_free(mod, mod->module_core); - free_percpu: - if (percpu) - percpu_modfree(percpu); - free_mod: - kfree(args); - free_hdr: - vfree(hdr); - return ERR_PTR(err); +} - truncated: - printk(KERN_ERR "Module len %lu truncated\n", len); - err = -ENOEXEC; - goto free_hdr; +int __weak module_finalize(const Elf_Ehdr *hdr, + const Elf_Shdr *sechdrs, + struct module *me) +{ + return 0; } -/* This is where the real work happens */ -asmlinkage long -sys_init_module(void __user *umod, - unsigned long len, - const char __user *uargs) +static int post_relocation(struct module *mod, const struct load_info *info) { - struct module *mod; - int ret = 0; + /* Sort exception table now relocations are done. */ + sort_extable(mod->extable, mod->extable + mod->num_exentries); - /* Must have permission */ - if (!capable(CAP_SYS_MODULE)) - return -EPERM; + /* Copy relocated percpu area over. */ + percpu_modcopy(mod, (void *)info->sechdrs[info->index.pcpu].sh_addr, + info->sechdrs[info->index.pcpu].sh_size); - /* Only one module load at a time, please */ - if (mutex_lock_interruptible(&module_mutex) != 0) - return -EINTR; + /* Setup kallsyms-specific fields. */ + add_kallsyms(mod, info); - /* Do all the hard work */ - mod = load_module(umod, len, uargs); - if (IS_ERR(mod)) { - mutex_unlock(&module_mutex); - return PTR_ERR(mod); - } + /* Arch-specific module finalizing. */ + return module_finalize(info->hdr, info->sechdrs, mod); +} - /* Drop lock so they can recurse */ +/* Is this module of this name done loading? No locks held. */ +static bool finished_loading(const char *name) +{ + struct module *mod; + bool ret; + + mutex_lock(&module_mutex); + mod = find_module_all(name, strlen(name), true); + ret = !mod || mod->state == MODULE_STATE_LIVE + || mod->state == MODULE_STATE_GOING; mutex_unlock(&module_mutex); - blocking_notifier_call_chain(&module_notify_list, - MODULE_STATE_COMING, mod); + return ret; +} + +/* Call module constructors. */ +static void do_mod_ctors(struct module *mod) +{ +#ifdef CONFIG_CONSTRUCTORS + unsigned long i; + for (i = 0; i < mod->num_ctors; i++) + mod->ctors[i](); +#endif +} + +/* This is where the real work happens */ +static int do_init_module(struct module *mod) +{ + int ret = 0; + + /* + * We want to find out whether @mod uses async during init. Clear + * PF_USED_ASYNC. async_schedule*() will set it. + */ + current->flags &= ~PF_USED_ASYNC; + + do_mod_ctors(mod); /* Start the module */ if (mod->init != NULL) - ret = mod->init(); + ret = do_one_initcall(mod->init); if (ret < 0) { /* Init routine failed: abort. Try to protect us from buggy refcounters. */ mod->state = MODULE_STATE_GOING; synchronize_sched(); module_put(mod); - mutex_lock(&module_mutex); + blocking_notifier_call_chain(&module_notify_list, + MODULE_STATE_GOING, mod); free_module(mod); - mutex_unlock(&module_mutex); - wake_up(&module_wq); + wake_up_all(&module_wq); return ret; } if (ret > 0) { - printk(KERN_WARNING "%s: '%s'->init suspiciously returned %d, " - "it should follow 0/-E convention\n" - KERN_WARNING "%s: loading module anyway...\n", - __func__, mod->name, ret, - __func__); + pr_warn("%s: '%s'->init suspiciously returned %d, it should " + "follow 0/-E convention\n" + "%s: loading module anyway...\n", + __func__, mod->name, ret, __func__); dump_stack(); } - /* Now it's a first class citizen! Wake up anyone waiting for it. */ + /* Now it's a first class citizen! */ mod->state = MODULE_STATE_LIVE; - wake_up(&module_wq); + blocking_notifier_call_chain(&module_notify_list, + MODULE_STATE_LIVE, mod); + + /* + * We need to finish all async code before the module init sequence + * is done. This has potential to deadlock. For example, a newly + * detected block device can trigger request_module() of the + * default iosched from async probing task. Once userland helper + * reaches here, async_synchronize_full() will wait on the async + * task waiting on request_module() and deadlock. + * + * This deadlock is avoided by perfomring async_synchronize_full() + * iff module init queued any async jobs. This isn't a full + * solution as it will deadlock the same if module loading from + * async jobs nests more than once; however, due to the various + * constraints, this hack seems to be the best option for now. + * Please refer to the following thread for details. + * + * http://thread.gmane.org/gmane.linux.kernel/1420814 + */ + if (current->flags & PF_USED_ASYNC) + async_synchronize_full(); mutex_lock(&module_mutex); /* Drop initial reference. */ module_put(mod); - unwind_remove_table(mod->unwind_info, 1); + trim_init_extable(mod); +#ifdef CONFIG_KALLSYMS + mod->num_symtab = mod->core_num_syms; + mod->symtab = mod->core_symtab; + mod->strtab = mod->core_strtab; +#endif + unset_module_init_ro_nx(mod); module_free(mod, mod->module_init); mod->module_init = NULL; mod->init_size = 0; + mod->init_ro_size = 0; mod->init_text_size = 0; mutex_unlock(&module_mutex); + wake_up_all(&module_wq); + + return 0; +} + +static int may_init_module(void) +{ + if (!capable(CAP_SYS_MODULE) || modules_disabled) + return -EPERM; + + return 0; +} + +/* + * We try to place it in the list now to make sure it's unique before + * we dedicate too many resources. In particular, temporary percpu + * memory exhaustion. + */ +static int add_unformed_module(struct module *mod) +{ + int err; + struct module *old; + + mod->state = MODULE_STATE_UNFORMED; + +again: + mutex_lock(&module_mutex); + old = find_module_all(mod->name, strlen(mod->name), true); + if (old != NULL) { + if (old->state == MODULE_STATE_COMING + || old->state == MODULE_STATE_UNFORMED) { + /* Wait in case it fails to load. */ + mutex_unlock(&module_mutex); + err = wait_event_interruptible(module_wq, + finished_loading(mod->name)); + if (err) + goto out_unlocked; + goto again; + } + err = -EEXIST; + goto out; + } + list_add_rcu(&mod->list, &modules); + err = 0; + +out: + mutex_unlock(&module_mutex); +out_unlocked: + return err; +} + +static int complete_formation(struct module *mod, struct load_info *info) +{ + int err; + + mutex_lock(&module_mutex); + + /* Find duplicate symbols (must be called under lock). */ + err = verify_export_symbols(mod); + if (err < 0) + goto out; + + /* This relies on module_mutex for list integrity. */ + module_bug_finalize(info->hdr, info->sechdrs, mod); + + /* Set RO and NX regions for core */ + set_section_ro_nx(mod->module_core, + mod->core_text_size, + mod->core_ro_size, + mod->core_size); + + /* Set RO and NX regions for init */ + set_section_ro_nx(mod->module_init, + mod->init_text_size, + mod->init_ro_size, + mod->init_size); + + /* Mark state as coming so strong_try_module_get() ignores us, + * but kallsyms etc. can see us. */ + mod->state = MODULE_STATE_COMING; + mutex_unlock(&module_mutex); + + blocking_notifier_call_chain(&module_notify_list, + MODULE_STATE_COMING, mod); + return 0; + +out: + mutex_unlock(&module_mutex); + return err; +} +static int unknown_module_param_cb(char *param, char *val, const char *modname) +{ + /* Check for magic 'dyndbg' arg */ + int ret = ddebug_dyndbg_module_param_cb(param, val, modname); + if (ret != 0) + pr_warn("%s: unknown parameter '%s' ignored\n", modname, param); return 0; } +/* Allocate and load the module: note that size of section 0 is always + zero, and we rely on this for optional sections. */ +static int load_module(struct load_info *info, const char __user *uargs, + int flags) +{ + struct module *mod; + long err; + char *after_dashes; + + err = module_sig_check(info); + if (err) + goto free_copy; + + err = elf_header_check(info); + if (err) + goto free_copy; + + /* Figure out module layout, and allocate all the memory. */ + mod = layout_and_allocate(info, flags); + if (IS_ERR(mod)) { + err = PTR_ERR(mod); + goto free_copy; + } + + /* Reserve our place in the list. */ + err = add_unformed_module(mod); + if (err) + goto free_module; + +#ifdef CONFIG_MODULE_SIG + mod->sig_ok = info->sig_ok; + if (!mod->sig_ok) { + pr_notice_once("%s: module verification failed: signature " + "and/or required key missing - tainting " + "kernel\n", mod->name); + add_taint_module(mod, TAINT_UNSIGNED_MODULE, LOCKDEP_STILL_OK); + } +#endif + + /* To avoid stressing percpu allocator, do this once we're unique. */ + err = percpu_modalloc(mod, info); + if (err) + goto unlink_mod; + + /* Now module is in final location, initialize linked lists, etc. */ + err = module_unload_init(mod); + if (err) + goto unlink_mod; + + /* Now we've got everything in the final locations, we can + * find optional sections. */ + err = find_module_sections(mod, info); + if (err) + goto free_unload; + + err = check_module_license_and_versions(mod); + if (err) + goto free_unload; + + /* Set up MODINFO_ATTR fields */ + setup_modinfo(mod, info); + + /* Fix up syms, so that st_value is a pointer to location. */ + err = simplify_symbols(mod, info); + if (err < 0) + goto free_modinfo; + + err = apply_relocations(mod, info); + if (err < 0) + goto free_modinfo; + + err = post_relocation(mod, info); + if (err < 0) + goto free_modinfo; + + flush_module_icache(mod); + + /* Now copy in args */ + mod->args = strndup_user(uargs, ~0UL >> 1); + if (IS_ERR(mod->args)) { + err = PTR_ERR(mod->args); + goto free_arch_cleanup; + } + + dynamic_debug_setup(info->debug, info->num_debug); + + /* Ftrace init must be called in the MODULE_STATE_UNFORMED state */ + ftrace_module_init(mod); + + /* Finally it's fully formed, ready to start executing. */ + err = complete_formation(mod, info); + if (err) + goto ddebug_cleanup; + + /* Module is ready to execute: parsing args may do that. */ + after_dashes = parse_args(mod->name, mod->args, mod->kp, mod->num_kp, + -32768, 32767, unknown_module_param_cb); + if (IS_ERR(after_dashes)) { + err = PTR_ERR(after_dashes); + goto bug_cleanup; + } else if (after_dashes) { + pr_warn("%s: parameters '%s' after `--' ignored\n", + mod->name, after_dashes); + } + + /* Link in to syfs. */ + err = mod_sysfs_setup(mod, info, mod->kp, mod->num_kp); + if (err < 0) + goto bug_cleanup; + + /* Get rid of temporary copy. */ + free_copy(info); + + /* Done! */ + trace_module_load(mod); + + return do_init_module(mod); + + bug_cleanup: + /* module_bug_cleanup needs module_mutex protection */ + mutex_lock(&module_mutex); + module_bug_cleanup(mod); + mutex_unlock(&module_mutex); + ddebug_cleanup: + dynamic_debug_remove(info->debug); + synchronize_sched(); + kfree(mod->args); + free_arch_cleanup: + module_arch_cleanup(mod); + free_modinfo: + free_modinfo(mod); + free_unload: + module_unload_free(mod); + unlink_mod: + mutex_lock(&module_mutex); + /* Unlink carefully: kallsyms could be walking list. */ + list_del_rcu(&mod->list); + wake_up_all(&module_wq); + mutex_unlock(&module_mutex); + free_module: + module_deallocate(mod, info); + free_copy: + free_copy(info); + return err; +} + +SYSCALL_DEFINE3(init_module, void __user *, umod, + unsigned long, len, const char __user *, uargs) +{ + int err; + struct load_info info = { }; + + err = may_init_module(); + if (err) + return err; + + pr_debug("init_module: umod=%p, len=%lu, uargs=%p\n", + umod, len, uargs); + + err = copy_module_from_user(umod, len, &info); + if (err) + return err; + + return load_module(&info, uargs, 0); +} + +SYSCALL_DEFINE3(finit_module, int, fd, const char __user *, uargs, int, flags) +{ + int err; + struct load_info info = { }; + + err = may_init_module(); + if (err) + return err; + + pr_debug("finit_module: fd=%d, uargs=%p, flags=%i\n", fd, uargs, flags); + + if (flags & ~(MODULE_INIT_IGNORE_MODVERSIONS + |MODULE_INIT_IGNORE_VERMAGIC)) + return -EINVAL; + + err = copy_module_from_fd(fd, &info); + if (err) + return err; + + return load_module(&info, uargs, flags); +} + static inline int within(unsigned long addr, void *start, unsigned long size) { return ((void *)addr >= start && (void *)addr < start + size); @@ -2229,12 +3398,12 @@ static const char *get_ksymbol(struct module *mod, unsigned long nextval; /* At worse, next value is at end of module */ - if (within(addr, mod->module_init, mod->init_size)) + if (within_module_init(addr, mod)) nextval = (unsigned long)mod->module_init+mod->init_text_size; else nextval = (unsigned long)mod->module_core+mod->core_text_size; - /* Scan for closest preceeding symbol, and next symbol. (ELF + /* Scan for closest preceding symbol, and next symbol. (ELF starts real symbols at 1). */ for (i = 1; i < mod->num_symtab; i++) { if (mod->symtab[i].st_shndx == SHN_UNDEF) @@ -2276,9 +3445,11 @@ const char *module_address_lookup(unsigned long addr, const char *ret = NULL; preempt_disable(); - list_for_each_entry(mod, &modules, list) { - if (within(addr, mod->module_init, mod->init_size) - || within(addr, mod->module_core, mod->core_size)) { + list_for_each_entry_rcu(mod, &modules, list) { + if (mod->state == MODULE_STATE_UNFORMED) + continue; + if (within_module_init(addr, mod) || + within_module_core(addr, mod)) { if (modname) *modname = mod->name; ret = get_ksymbol(mod, addr, size, offset); @@ -2299,9 +3470,11 @@ int lookup_module_symbol_name(unsigned long addr, char *symname) struct module *mod; preempt_disable(); - list_for_each_entry(mod, &modules, list) { - if (within(addr, mod->module_init, mod->init_size) || - within(addr, mod->module_core, mod->core_size)) { + list_for_each_entry_rcu(mod, &modules, list) { + if (mod->state == MODULE_STATE_UNFORMED) + continue; + if (within_module_init(addr, mod) || + within_module_core(addr, mod)) { const char *sym; sym = get_ksymbol(mod, addr, NULL, NULL); @@ -2323,9 +3496,11 @@ int lookup_module_symbol_attrs(unsigned long addr, unsigned long *size, struct module *mod; preempt_disable(); - list_for_each_entry(mod, &modules, list) { - if (within(addr, mod->module_init, mod->init_size) || - within(addr, mod->module_core, mod->core_size)) { + list_for_each_entry_rcu(mod, &modules, list) { + if (mod->state == MODULE_STATE_UNFORMED) + continue; + if (within_module_init(addr, mod) || + within_module_core(addr, mod)) { const char *sym; sym = get_ksymbol(mod, addr, size, offset); @@ -2350,14 +3525,16 @@ int module_get_kallsym(unsigned int symnum, unsigned long *value, char *type, struct module *mod; preempt_disable(); - list_for_each_entry(mod, &modules, list) { + list_for_each_entry_rcu(mod, &modules, list) { + if (mod->state == MODULE_STATE_UNFORMED) + continue; if (symnum < mod->num_symtab) { *value = mod->symtab[symnum].st_value; *type = mod->symtab[symnum].st_info; strlcpy(name, mod->strtab + mod->symtab[symnum].st_name, KSYM_NAME_LEN); strlcpy(module_name, mod->name, MODULE_NAME_LEN); - *exported = is_exported(name, mod); + *exported = is_exported(name, *value, mod); preempt_enable(); return 0; } @@ -2388,55 +3565,52 @@ unsigned long module_kallsyms_lookup_name(const char *name) /* Don't lock: we're in enough trouble already. */ preempt_disable(); if ((colon = strchr(name, ':')) != NULL) { - *colon = '\0'; - if ((mod = find_module(name)) != NULL) + if ((mod = find_module_all(name, colon - name, false)) != NULL) ret = mod_find_symname(mod, colon+1); - *colon = ':'; } else { - list_for_each_entry(mod, &modules, list) + list_for_each_entry_rcu(mod, &modules, list) { + if (mod->state == MODULE_STATE_UNFORMED) + continue; if ((ret = mod_find_symname(mod, name)) != 0) break; + } } preempt_enable(); return ret; } -#endif /* CONFIG_KALLSYMS */ -/* Called by the /proc file system to return a list of modules. */ -static void *m_start(struct seq_file *m, loff_t *pos) +int module_kallsyms_on_each_symbol(int (*fn)(void *, const char *, + struct module *, unsigned long), + void *data) { - mutex_lock(&module_mutex); - return seq_list_start(&modules, *pos); -} - -static void *m_next(struct seq_file *m, void *p, loff_t *pos) -{ - return seq_list_next(p, &modules, pos); -} + struct module *mod; + unsigned int i; + int ret; -static void m_stop(struct seq_file *m, void *p) -{ - mutex_unlock(&module_mutex); + list_for_each_entry(mod, &modules, list) { + if (mod->state == MODULE_STATE_UNFORMED) + continue; + for (i = 0; i < mod->num_symtab; i++) { + ret = fn(data, mod->strtab + mod->symtab[i].st_name, + mod, mod->symtab[i].st_value); + if (ret != 0) + return ret; + } + } + return 0; } +#endif /* CONFIG_KALLSYMS */ static char *module_flags(struct module *mod, char *buf) { int bx = 0; + BUG_ON(mod->state == MODULE_STATE_UNFORMED); if (mod->taints || mod->state == MODULE_STATE_GOING || mod->state == MODULE_STATE_COMING) { buf[bx++] = '('; - if (mod->taints & TAINT_PROPRIETARY_MODULE) - buf[bx++] = 'P'; - if (mod->taints & TAINT_FORCED_MODULE) - buf[bx++] = 'F'; - /* - * TAINT_FORCED_RMMOD: could be added. - * TAINT_UNSAFE_SMP, TAINT_MACHINE_CHECK, TAINT_BAD_PAGE don't - * apply to modules. - */ - + bx += module_flags_taint(mod, buf + bx); /* Show a - for module-is-being-unloaded */ if (mod->state == MODULE_STATE_GOING) buf[bx++] = '-'; @@ -2450,12 +3624,34 @@ static char *module_flags(struct module *mod, char *buf) return buf; } +#ifdef CONFIG_PROC_FS +/* Called by the /proc file system to return a list of modules. */ +static void *m_start(struct seq_file *m, loff_t *pos) +{ + mutex_lock(&module_mutex); + return seq_list_start(&modules, *pos); +} + +static void *m_next(struct seq_file *m, void *p, loff_t *pos) +{ + return seq_list_next(p, &modules, pos); +} + +static void m_stop(struct seq_file *m, void *p) +{ + mutex_unlock(&module_mutex); +} + static int m_show(struct seq_file *m, void *p) { struct module *mod = list_entry(p, struct module, list); char buf[8]; - seq_printf(m, "%s %lu", + /* We always ignore unformed modules. */ + if (mod->state == MODULE_STATE_UNFORMED) + return 0; + + seq_printf(m, "%s %u", mod->name, mod->init_size + mod->core_size); print_unload_info(m, mod); @@ -2465,7 +3661,7 @@ static int m_show(struct seq_file *m, void *p) mod->state == MODULE_STATE_COMING ? "Loading": "Live"); /* Used by oprofile and other similar tools. */ - seq_printf(m, " 0x%p", mod->module_core); + seq_printf(m, " 0x%pK", mod->module_core); /* Taints info */ if (mod->taints) @@ -2480,13 +3676,33 @@ static int m_show(struct seq_file *m, void *p) Where refcount is a number or -, and deps is a comma-separated list of depends or -. */ -const struct seq_operations modules_op = { +static const struct seq_operations modules_op = { .start = m_start, .next = m_next, .stop = m_stop, .show = m_show }; +static int modules_open(struct inode *inode, struct file *file) +{ + return seq_open(file, &modules_op); +} + +static const struct file_operations proc_modules_operations = { + .open = modules_open, + .read = seq_read, + .llseek = seq_lseek, + .release = seq_release, +}; + +static int __init proc_modules_init(void) +{ + proc_create("modules", 0, NULL, &proc_modules_operations); + return 0; +} +module_init(proc_modules_init); +#endif + /* Given an address, look for it in the module exception tables. */ const struct exception_table_entry *search_module_extables(unsigned long addr) { @@ -2494,7 +3710,9 @@ const struct exception_table_entry *search_module_extables(unsigned long addr) struct module *mod; preempt_disable(); - list_for_each_entry(mod, &modules, list) { + list_for_each_entry_rcu(mod, &modules, list) { + if (mod->state == MODULE_STATE_UNFORMED) + continue; if (mod->num_exentries == 0) continue; @@ -2512,49 +3730,86 @@ const struct exception_table_entry *search_module_extables(unsigned long addr) } /* - * Is this a valid module address? + * is_module_address - is this address inside a module? + * @addr: the address to check. + * + * See is_module_text_address() if you simply want to see if the address + * is code (not data). */ -int is_module_address(unsigned long addr) +bool is_module_address(unsigned long addr) { - struct module *mod; + bool ret; preempt_disable(); - - list_for_each_entry(mod, &modules, list) { - if (within(addr, mod->module_core, mod->core_size)) { - preempt_enable(); - return 1; - } - } - + ret = __module_address(addr) != NULL; preempt_enable(); - return 0; + return ret; } - -/* Is this a valid kernel address? */ -struct module *__module_text_address(unsigned long addr) +/* + * __module_address - get the module which contains an address. + * @addr: the address. + * + * Must be called with preempt disabled or module mutex held so that + * module doesn't get freed during this. + */ +struct module *__module_address(unsigned long addr) { struct module *mod; - list_for_each_entry(mod, &modules, list) - if (within(addr, mod->module_init, mod->init_text_size) - || within(addr, mod->module_core, mod->core_text_size)) + if (addr < module_addr_min || addr > module_addr_max) + return NULL; + + list_for_each_entry_rcu(mod, &modules, list) { + if (mod->state == MODULE_STATE_UNFORMED) + continue; + if (within_module_core(addr, mod) + || within_module_init(addr, mod)) return mod; + } return NULL; } +EXPORT_SYMBOL_GPL(__module_address); -struct module *module_text_address(unsigned long addr) +/* + * is_module_text_address - is this address inside module code? + * @addr: the address to check. + * + * See is_module_address() if you simply want to see if the address is + * anywhere in a module. See kernel_text_address() for testing if an + * address corresponds to kernel or module code. + */ +bool is_module_text_address(unsigned long addr) { - struct module *mod; + bool ret; preempt_disable(); - mod = __module_text_address(addr); + ret = __module_text_address(addr) != NULL; preempt_enable(); + return ret; +} + +/* + * __module_text_address - get the module whose code contains an address. + * @addr: the address. + * + * Must be called with preempt disabled or module mutex held so that + * module doesn't get freed during this. + */ +struct module *__module_text_address(unsigned long addr) +{ + struct module *mod = __module_address(addr); + if (mod) { + /* Make sure it's within the text section. */ + if (!within(addr, mod->module_init, mod->init_text_size) + && !within(addr, mod->module_core, mod->core_text_size)) + mod = NULL; + } return mod; } +EXPORT_SYMBOL_GPL(__module_text_address); /* Don't grab lock, we're oopsing. */ void print_modules(void) @@ -2562,30 +3817,29 @@ void print_modules(void) struct module *mod; char buf[8]; - printk("Modules linked in:"); - list_for_each_entry(mod, &modules, list) - printk(" %s%s", mod->name, module_flags(mod, buf)); + printk(KERN_DEFAULT "Modules linked in:"); + /* Most callers should already have preempt disabled, but make sure */ + preempt_disable(); + list_for_each_entry_rcu(mod, &modules, list) { + if (mod->state == MODULE_STATE_UNFORMED) + continue; + pr_cont(" %s%s", mod->name, module_flags(mod, buf)); + } + preempt_enable(); if (last_unloaded_module[0]) - printk(" [last unloaded: %s]", last_unloaded_module); - printk("\n"); + pr_cont(" [last unloaded: %s]", last_unloaded_module); + pr_cont("\n"); } #ifdef CONFIG_MODVERSIONS -/* Generate the signature for struct module here, too, for modversions. */ -void struct_module(struct module *mod) { return; } -EXPORT_SYMBOL(struct_module); -#endif - -#ifdef CONFIG_MARKERS -void module_update_markers(void) +/* Generate the signature for all relevant module structures here. + * If these change, we don't want to try to parse the module. */ +void module_layout(struct module *mod, + struct modversion_info *ver, + struct kernel_param *kp, + struct kernel_symbol *ks, + struct tracepoint * const *tp) { - struct module *mod; - - mutex_lock(&module_mutex); - list_for_each_entry(mod, &modules, list) - if (!mod->taints) - marker_update_probe_range(mod->markers, - mod->markers + mod->num_markers); - mutex_unlock(&module_mutex); } +EXPORT_SYMBOL(module_layout); #endif |
