diff options
Diffstat (limited to 'kernel/kexec.c')
| -rw-r--r-- | kernel/kexec.c | 623 |
1 files changed, 465 insertions, 158 deletions
diff --git a/kernel/kexec.c b/kernel/kexec.c index 06a0e277565..4b8f0c92588 100644 --- a/kernel/kexec.c +++ b/kernel/kexec.c @@ -12,7 +12,7 @@ #include <linux/slab.h> #include <linux/fs.h> #include <linux/kexec.h> -#include <linux/spinlock.h> +#include <linux/mutex.h> #include <linux/list.h> #include <linux/highmem.h> #include <linux/syscalls.h> @@ -21,26 +21,37 @@ #include <linux/hardirq.h> #include <linux/elf.h> #include <linux/elfcore.h> -#include <linux/utsrelease.h> #include <linux/utsname.h> #include <linux/numa.h> +#include <linux/suspend.h> +#include <linux/device.h> +#include <linux/freezer.h> +#include <linux/pm.h> +#include <linux/cpu.h> +#include <linux/console.h> +#include <linux/vmalloc.h> +#include <linux/swap.h> +#include <linux/syscore_ops.h> +#include <linux/compiler.h> +#include <linux/hugetlb.h> #include <asm/page.h> #include <asm/uaccess.h> #include <asm/io.h> -#include <asm/system.h> -#include <asm/semaphore.h> #include <asm/sections.h> /* Per cpu memory for storing cpu states in case of system crash. */ -note_buf_t* crash_notes; +note_buf_t __percpu *crash_notes; /* vmcoreinfo stuff */ -unsigned char vmcoreinfo_data[VMCOREINFO_BYTES]; +static unsigned char vmcoreinfo_data[VMCOREINFO_BYTES]; u32 vmcoreinfo_note[VMCOREINFO_NOTE_SIZE/4]; size_t vmcoreinfo_size; size_t vmcoreinfo_max_size = sizeof(vmcoreinfo_data); +/* Flag to indicate we are going to kexec a new kernel */ +bool kexec_in_progress = false; + /* Location of the reserved area for the crash kernel */ struct resource crashk_res = { .name = "Crash kernel", @@ -48,6 +59,12 @@ struct resource crashk_res = { .end = 0, .flags = IORESOURCE_BUSY | IORESOURCE_MEM }; +struct resource crashk_low_res = { + .name = "Crash kernel", + .start = 0, + .end = 0, + .flags = IORESOURCE_BUSY | IORESOURCE_MEM +}; int kexec_should_crash(struct task_struct *p) { @@ -72,7 +89,7 @@ int kexec_should_crash(struct task_struct *p) * * The code for the transition from the current kernel to the * the new kernel is placed in the control_code_buffer, whose size - * is given by KEXEC_CONTROL_CODE_SIZE. In the best case only a single + * is given by KEXEC_CONTROL_PAGE_SIZE. In the best case only a single * page of memory is necessary, but some architectures require more. * Because this memory must be identity mapped in the transition from * virtual to physical addresses it must live in the range @@ -109,8 +126,8 @@ static struct page *kimage_alloc_page(struct kimage *image, unsigned long dest); static int do_kimage_alloc(struct kimage **rimage, unsigned long entry, - unsigned long nr_segments, - struct kexec_segment __user *segments) + unsigned long nr_segments, + struct kexec_segment __user *segments) { size_t segment_bytes; struct kimage *image; @@ -136,15 +153,17 @@ static int do_kimage_alloc(struct kimage **rimage, unsigned long entry, /* Initialize the list of destination pages */ INIT_LIST_HEAD(&image->dest_pages); - /* Initialize the list of unuseable pages */ + /* Initialize the list of unusable pages */ INIT_LIST_HEAD(&image->unuseable_pages); /* Read in the segments */ image->nr_segments = nr_segments; segment_bytes = nr_segments * sizeof(*segments); result = copy_from_user(image->segment, segments, segment_bytes); - if (result) + if (result) { + result = -EFAULT; goto out; + } /* * Verify we have good destination addresses. The caller is @@ -153,7 +172,7 @@ static int do_kimage_alloc(struct kimage **rimage, unsigned long entry, * just verifies it is an address we can use. * * Since the kernel does everything in page size chunks ensure - * the destination addreses are page aligned. Too many + * the destination addresses are page aligned. Too many * special cases crop of when we don't do this. The most * insidious is getting overlapping destination addresses * simply because addresses are changed to page size @@ -215,6 +234,8 @@ out: } +static void kimage_free_page_list(struct list_head *list); + static int kimage_normal_alloc(struct kimage **rimage, unsigned long entry, unsigned long nr_segments, struct kexec_segment __user *segments) @@ -228,8 +249,6 @@ static int kimage_normal_alloc(struct kimage **rimage, unsigned long entry, if (result) goto out; - *rimage = image; - /* * Find a location for the control code buffer, and add it * the vector of segments so that it's pages will also be @@ -237,19 +256,25 @@ static int kimage_normal_alloc(struct kimage **rimage, unsigned long entry, */ result = -ENOMEM; image->control_code_page = kimage_alloc_control_pages(image, - get_order(KEXEC_CONTROL_CODE_SIZE)); + get_order(KEXEC_CONTROL_PAGE_SIZE)); if (!image->control_code_page) { - printk(KERN_ERR "Could not allocate control_code_buffer\n"); - goto out; + pr_err("Could not allocate control_code_buffer\n"); + goto out_free; } - result = 0; - out: - if (result == 0) - *rimage = image; - else - kfree(image); + image->swap_page = kimage_alloc_control_pages(image, 0); + if (!image->swap_page) { + pr_err("Could not allocate swap buffer\n"); + goto out_free; + } + *rimage = image; + return 0; + +out_free: + kimage_free_page_list(&image->control_pages); + kfree(image); +out: return result; } @@ -296,7 +321,7 @@ static int kimage_crash_alloc(struct kimage **rimage, unsigned long entry, mend = mstart + image->segment[i].memsz - 1; /* Ensure we are within the crash kernel limits */ if ((mstart < crashk_res.start) || (mend > crashk_res.end)) - goto out; + goto out_free; } /* @@ -306,19 +331,18 @@ static int kimage_crash_alloc(struct kimage **rimage, unsigned long entry, */ result = -ENOMEM; image->control_code_page = kimage_alloc_control_pages(image, - get_order(KEXEC_CONTROL_CODE_SIZE)); + get_order(KEXEC_CONTROL_PAGE_SIZE)); if (!image->control_code_page) { - printk(KERN_ERR "Could not allocate control_code_buffer\n"); - goto out; + pr_err("Could not allocate control_code_buffer\n"); + goto out_free; } - result = 0; -out: - if (result == 0) - *rimage = image; - else - kfree(image); + *rimage = image; + return 0; +out_free: + kfree(image); +out: return result; } @@ -438,7 +462,7 @@ static struct page *kimage_alloc_normal_control_pages(struct kimage *image, /* Deal with the destination pages I have inadvertently allocated. * * Ideally I would convert multi-page allocations into single - * page allocations, and add everyting to image->dest_pages. + * page allocations, and add everything to image->dest_pages. * * For now it is simpler to just free the pages. */ @@ -481,9 +505,7 @@ static struct page *kimage_alloc_crash_control_pages(struct kimage *image, while (hole_end <= crashk_res.end) { unsigned long i; - if (hole_end > KEXEC_CONTROL_MEMORY_LIMIT) - break; - if (hole_end > crashk_res.end) + if (hole_end > KEXEC_CRASH_CONTROL_MEMORY_LIMIT) break; /* See if I overlap any of the segments */ for (i = 0; i < image->nr_segments; i++) { @@ -586,24 +608,22 @@ static void kimage_free_extra_pages(struct kimage *image) /* Walk through and free any extra destination pages I may have */ kimage_free_page_list(&image->dest_pages); - /* Walk through and free any unuseable pages I have cached */ + /* Walk through and free any unusable pages I have cached */ kimage_free_page_list(&image->unuseable_pages); } -static int kimage_terminate(struct kimage *image) +static void kimage_terminate(struct kimage *image) { if (*image->entry != 0) image->entry++; *image->entry = IND_DONE; - - return 0; } #define for_each_kimage_entry(image, ptr, entry) \ for (ptr = &image->head; (entry = *ptr) && !(entry & IND_DONE); \ - ptr = (entry & IND_INDIRECTION)? \ - phys_to_virt((entry & PAGE_MASK)): ptr +1) + ptr = (entry & IND_INDIRECTION) ? \ + phys_to_virt((entry & PAGE_MASK)) : ptr + 1) static void kimage_free_entry(kimage_entry_t entry) { @@ -631,8 +651,7 @@ static void kimage_free(struct kimage *image) * done with it. */ ind = entry; - } - else if (entry & IND_SOURCE) + } else if (entry & IND_SOURCE) kimage_free_entry(entry); } /* Free the final indirection page */ @@ -744,13 +763,18 @@ static struct page *kimage_alloc_page(struct kimage *image, *old = addr | (*old & ~PAGE_MASK); /* The old page I have found cannot be a - * destination page, so return it. + * destination page, so return it if it's + * gfp_flags honor the ones passed in. */ + if (!(gfp_mask & __GFP_HIGHMEM) && + PageHighMem(old_page)) { + kimage_free_pages(old_page); + continue; + } addr = old_addr; page = old_page; break; - } - else { + } else { /* Place the page on the destination list I * will use it later. */ @@ -765,7 +789,7 @@ static int kimage_load_normal_segment(struct kimage *image, struct kexec_segment *segment) { unsigned long maddr; - unsigned long ubytes, mbytes; + size_t ubytes, mbytes; int result; unsigned char __user *buf; @@ -796,20 +820,16 @@ static int kimage_load_normal_segment(struct kimage *image, ptr = kmap(page); /* Start with a clear page */ - memset(ptr, 0, PAGE_SIZE); + clear_page(ptr); ptr += maddr & ~PAGE_MASK; - mchunk = PAGE_SIZE - (maddr & ~PAGE_MASK); - if (mchunk > mbytes) - mchunk = mbytes; - - uchunk = mchunk; - if (uchunk > ubytes) - uchunk = ubytes; + mchunk = min_t(size_t, mbytes, + PAGE_SIZE - (maddr & ~PAGE_MASK)); + uchunk = min(ubytes, mchunk); result = copy_from_user(ptr, buf, uchunk); kunmap(page); if (result) { - result = (result < 0) ? result : -EIO; + result = -EFAULT; goto out; } ubytes -= uchunk; @@ -829,7 +849,7 @@ static int kimage_load_crash_segment(struct kimage *image, * We do things a page at a time for the sake of kmap. */ unsigned long maddr; - unsigned long ubytes, mbytes; + size_t ubytes, mbytes; int result; unsigned char __user *buf; @@ -850,13 +870,10 @@ static int kimage_load_crash_segment(struct kimage *image, } ptr = kmap(page); ptr += maddr & ~PAGE_MASK; - mchunk = PAGE_SIZE - (maddr & ~PAGE_MASK); - if (mchunk > mbytes) - mchunk = mbytes; - - uchunk = mchunk; - if (uchunk > ubytes) { - uchunk = ubytes; + mchunk = min_t(size_t, mbytes, + PAGE_SIZE - (maddr & ~PAGE_MASK)); + uchunk = min(ubytes, mchunk); + if (mchunk > uchunk) { /* Zero the trailing part of the page */ memset(ptr + uchunk, 0, mchunk - uchunk); } @@ -864,7 +881,7 @@ static int kimage_load_crash_segment(struct kimage *image, kexec_flush_icache_page(page); kunmap(page); if (result) { - result = (result < 0) ? result : -EIO; + result = -EFAULT; goto out; } ubytes -= uchunk; @@ -907,7 +924,7 @@ static int kimage_load_segment(struct kimage *image, * reinitialize them. * * - A machine specific part that includes the syscall number - * and the copies the image to it's final destination. And + * and then copies the image to it's final destination. And * jumps into the image at entry. * * kexec does not sync, or unmount filesystems so if you need @@ -915,23 +932,18 @@ static int kimage_load_segment(struct kimage *image, */ struct kimage *kexec_image; struct kimage *kexec_crash_image; -/* - * A home grown binary mutex. - * Nothing can wait so this mutex is safe to use - * in interrupt context :) - */ -static int kexec_lock; +int kexec_load_disabled; + +static DEFINE_MUTEX(kexec_mutex); -asmlinkage long sys_kexec_load(unsigned long entry, unsigned long nr_segments, - struct kexec_segment __user *segments, - unsigned long flags) +SYSCALL_DEFINE4(kexec_load, unsigned long, entry, unsigned long, nr_segments, + struct kexec_segment __user *, segments, unsigned long, flags) { struct kimage **dest_image, *image; - int locked; int result; /* We only trust the superuser with rebooting the system. */ - if (!capable(CAP_SYS_BOOT)) + if (!capable(CAP_SYS_BOOT) || kexec_load_disabled) return -EPERM; /* @@ -963,8 +975,7 @@ asmlinkage long sys_kexec_load(unsigned long entry, unsigned long nr_segments, * * KISS: always take the mutex. */ - locked = xchg(&kexec_lock, 1); - if (locked) + if (!mutex_trylock(&kexec_mutex)) return -EBUSY; dest_image = &kexec_image; @@ -985,10 +996,13 @@ asmlinkage long sys_kexec_load(unsigned long entry, unsigned long nr_segments, kimage_free(xchg(&kexec_crash_image, NULL)); result = kimage_crash_alloc(&image, entry, nr_segments, segments); + crash_map_reserved_pages(); } if (result) goto out; + if (flags & KEXEC_PRESERVE_CONTEXT) + image->preserve_context = 1; result = machine_kexec_prepare(image); if (result) goto out; @@ -998,26 +1012,37 @@ asmlinkage long sys_kexec_load(unsigned long entry, unsigned long nr_segments, if (result) goto out; } - result = kimage_terminate(image); - if (result) - goto out; + kimage_terminate(image); + if (flags & KEXEC_ON_CRASH) + crash_unmap_reserved_pages(); } /* Install the new kernel, and Uninstall the old */ image = xchg(dest_image, image); out: - locked = xchg(&kexec_lock, 0); /* Release the mutex */ - BUG_ON(!locked); + mutex_unlock(&kexec_mutex); kimage_free(image); return result; } +/* + * Add and remove page tables for crashkernel memory + * + * Provide an empty default implementation here -- architecture + * code may override this + */ +void __weak crash_map_reserved_pages(void) +{} + +void __weak crash_unmap_reserved_pages(void) +{} + #ifdef CONFIG_COMPAT -asmlinkage long compat_sys_kexec_load(unsigned long entry, - unsigned long nr_segments, - struct compat_kexec_segment __user *segments, - unsigned long flags) +COMPAT_SYSCALL_DEFINE4(kexec_load, compat_ulong_t, entry, + compat_ulong_t, nr_segments, + struct compat_kexec_segment __user *, segments, + compat_ulong_t, flags) { struct compat_kexec_segment in; struct kexec_segment out, __user *ksegments; @@ -1033,7 +1058,7 @@ asmlinkage long compat_sys_kexec_load(unsigned long entry, return -EINVAL; ksegments = compat_alloc_user_space(nr_segments * sizeof(out)); - for (i=0; i < nr_segments; i++) { + for (i = 0; i < nr_segments; i++) { result = copy_from_user(&in, &segments[i], sizeof(in)); if (result) return -EFAULT; @@ -1054,10 +1079,7 @@ asmlinkage long compat_sys_kexec_load(unsigned long entry, void crash_kexec(struct pt_regs *regs) { - int locked; - - - /* Take the kexec_lock here to prevent sys_kexec_load + /* Take the kexec_mutex here to prevent sys_kexec_load * running on one cpu from replacing the crash kernel * we are using after a panic on a different cpu. * @@ -1065,18 +1087,87 @@ void crash_kexec(struct pt_regs *regs) * of memory the xchg(&kexec_crash_image) would be * sufficient. But since I reuse the memory... */ - locked = xchg(&kexec_lock, 1); - if (!locked) { + if (mutex_trylock(&kexec_mutex)) { if (kexec_crash_image) { struct pt_regs fixed_regs; + crash_setup_regs(&fixed_regs, regs); crash_save_vmcoreinfo(); machine_crash_shutdown(&fixed_regs); machine_kexec(kexec_crash_image); } - locked = xchg(&kexec_lock, 0); - BUG_ON(!locked); + mutex_unlock(&kexec_mutex); + } +} + +size_t crash_get_memory_size(void) +{ + size_t size = 0; + mutex_lock(&kexec_mutex); + if (crashk_res.end != crashk_res.start) + size = resource_size(&crashk_res); + mutex_unlock(&kexec_mutex); + return size; +} + +void __weak crash_free_reserved_phys_range(unsigned long begin, + unsigned long end) +{ + unsigned long addr; + + for (addr = begin; addr < end; addr += PAGE_SIZE) + free_reserved_page(pfn_to_page(addr >> PAGE_SHIFT)); +} + +int crash_shrink_memory(unsigned long new_size) +{ + int ret = 0; + unsigned long start, end; + unsigned long old_size; + struct resource *ram_res; + + mutex_lock(&kexec_mutex); + + if (kexec_crash_image) { + ret = -ENOENT; + goto unlock; } + start = crashk_res.start; + end = crashk_res.end; + old_size = (end == 0) ? 0 : end - start + 1; + if (new_size >= old_size) { + ret = (new_size == old_size) ? 0 : -EINVAL; + goto unlock; + } + + ram_res = kzalloc(sizeof(*ram_res), GFP_KERNEL); + if (!ram_res) { + ret = -ENOMEM; + goto unlock; + } + + start = roundup(start, KEXEC_CRASH_MEM_ALIGN); + end = roundup(start + new_size, KEXEC_CRASH_MEM_ALIGN); + + crash_map_reserved_pages(); + crash_free_reserved_phys_range(end, crashk_res.end); + + if ((start == end) && (crashk_res.parent != NULL)) + release_resource(&crashk_res); + + ram_res->start = end; + ram_res->end = crashk_res.end; + ram_res->flags = IORESOURCE_BUSY | IORESOURCE_MEM; + ram_res->name = "System RAM"; + + crashk_res.end = end - 1; + + insert_resource(&iomem_resource, ram_res); + crash_unmap_reserved_pages(); + +unlock: + mutex_unlock(&kexec_mutex); + return ret; } static u32 *append_elf_note(u32 *buf, char *name, unsigned type, void *data, @@ -1112,7 +1203,7 @@ void crash_save_cpu(struct pt_regs *regs, int cpu) struct elf_prstatus prstatus; u32 *buf; - if ((cpu < 0) || (cpu >= NR_CPUS)) + if ((cpu < 0) || (cpu >= nr_cpu_ids)) return; /* Using ELF notes here is opportunistic. @@ -1122,14 +1213,14 @@ void crash_save_cpu(struct pt_regs *regs, int cpu) * squirrelled away. ELF notes happen to provide * all of that, so there is no need to invent something new. */ - buf = (u32*)per_cpu_ptr(crash_notes, cpu); + buf = (u32 *)per_cpu_ptr(crash_notes, cpu); if (!buf) return; memset(&prstatus, 0, sizeof(prstatus)); prstatus.pr_pid = current->pid; - elf_core_copy_regs(&prstatus.pr_reg, regs); + elf_core_copy_kernel_regs(&prstatus.pr_reg, regs); buf = append_elf_note(buf, KEXEC_CORE_NOTE_NAME, NT_PRSTATUS, - &prstatus, sizeof(prstatus)); + &prstatus, sizeof(prstatus)); final_note(buf); } @@ -1138,13 +1229,12 @@ static int __init crash_notes_memory_init(void) /* Allocate memory for saving cpu registers. */ crash_notes = alloc_percpu(note_buf_t); if (!crash_notes) { - printk("Kexec: Memory allocation for saving cpu register" - " states failed\n"); + pr_warn("Kexec: Memory allocation for saving cpu register states failed\n"); return -ENOMEM; } return 0; } -module_init(crash_notes_memory_init) +subsys_initcall(crash_notes_memory_init); /* @@ -1161,10 +1251,10 @@ module_init(crash_notes_memory_init) * * The function returns 0 on success and -EINVAL on failure. */ -static int __init parse_crashkernel_mem(char *cmdline, - unsigned long long system_ram, - unsigned long long *crash_size, - unsigned long long *crash_base) +static int __init parse_crashkernel_mem(char *cmdline, + unsigned long long system_ram, + unsigned long long *crash_size, + unsigned long long *crash_base) { char *cur = cmdline, *tmp; @@ -1175,12 +1265,12 @@ static int __init parse_crashkernel_mem(char *cmdline, /* get the start of the range */ start = memparse(cur, &tmp); if (cur == tmp) { - pr_warning("crashkernel: Memory value expected\n"); + pr_warn("crashkernel: Memory value expected\n"); return -EINVAL; } cur = tmp; if (*cur != '-') { - pr_warning("crashkernel: '-' expected\n"); + pr_warn("crashkernel: '-' expected\n"); return -EINVAL; } cur++; @@ -1189,50 +1279,48 @@ static int __init parse_crashkernel_mem(char *cmdline, if (*cur != ':') { end = memparse(cur, &tmp); if (cur == tmp) { - pr_warning("crashkernel: Memory " - "value expected\n"); + pr_warn("crashkernel: Memory value expected\n"); return -EINVAL; } cur = tmp; if (end <= start) { - pr_warning("crashkernel: end <= start\n"); + pr_warn("crashkernel: end <= start\n"); return -EINVAL; } } if (*cur != ':') { - pr_warning("crashkernel: ':' expected\n"); + pr_warn("crashkernel: ':' expected\n"); return -EINVAL; } cur++; size = memparse(cur, &tmp); if (cur == tmp) { - pr_warning("Memory value expected\n"); + pr_warn("Memory value expected\n"); return -EINVAL; } cur = tmp; if (size >= system_ram) { - pr_warning("crashkernel: invalid size\n"); + pr_warn("crashkernel: invalid size\n"); return -EINVAL; } /* match ? */ - if (system_ram >= start && system_ram <= end) { + if (system_ram >= start && system_ram < end) { *crash_size = size; break; } } while (*cur++ == ','); if (*crash_size > 0) { - while (*cur != ' ' && *cur != '@') + while (*cur && *cur != ' ' && *cur != '@') cur++; if (*cur == '@') { cur++; *crash_base = memparse(cur, &tmp); if (cur == tmp) { - pr_warning("Memory value expected " - "after '@'\n"); + pr_warn("Memory value expected after '@'\n"); return -EINVAL; } } @@ -1244,56 +1332,140 @@ static int __init parse_crashkernel_mem(char *cmdline, /* * That function parses "simple" (old) crashkernel command lines like * - * crashkernel=size[@offset] + * crashkernel=size[@offset] * * It returns 0 on success and -EINVAL on failure. */ -static int __init parse_crashkernel_simple(char *cmdline, - unsigned long long *crash_size, - unsigned long long *crash_base) +static int __init parse_crashkernel_simple(char *cmdline, + unsigned long long *crash_size, + unsigned long long *crash_base) { char *cur = cmdline; *crash_size = memparse(cmdline, &cur); if (cmdline == cur) { - pr_warning("crashkernel: memory value expected\n"); + pr_warn("crashkernel: memory value expected\n"); return -EINVAL; } if (*cur == '@') *crash_base = memparse(cur+1, &cur); + else if (*cur != ' ' && *cur != '\0') { + pr_warn("crashkernel: unrecognized char\n"); + return -EINVAL; + } return 0; } +#define SUFFIX_HIGH 0 +#define SUFFIX_LOW 1 +#define SUFFIX_NULL 2 +static __initdata char *suffix_tbl[] = { + [SUFFIX_HIGH] = ",high", + [SUFFIX_LOW] = ",low", + [SUFFIX_NULL] = NULL, +}; + /* - * That function is the entry point for command line parsing and should be - * called from the arch-specific code. + * That function parses "suffix" crashkernel command lines like + * + * crashkernel=size,[high|low] + * + * It returns 0 on success and -EINVAL on failure. */ -int __init parse_crashkernel(char *cmdline, +static int __init parse_crashkernel_suffix(char *cmdline, + unsigned long long *crash_size, + unsigned long long *crash_base, + const char *suffix) +{ + char *cur = cmdline; + + *crash_size = memparse(cmdline, &cur); + if (cmdline == cur) { + pr_warn("crashkernel: memory value expected\n"); + return -EINVAL; + } + + /* check with suffix */ + if (strncmp(cur, suffix, strlen(suffix))) { + pr_warn("crashkernel: unrecognized char\n"); + return -EINVAL; + } + cur += strlen(suffix); + if (*cur != ' ' && *cur != '\0') { + pr_warn("crashkernel: unrecognized char\n"); + return -EINVAL; + } + + return 0; +} + +static __init char *get_last_crashkernel(char *cmdline, + const char *name, + const char *suffix) +{ + char *p = cmdline, *ck_cmdline = NULL; + + /* find crashkernel and use the last one if there are more */ + p = strstr(p, name); + while (p) { + char *end_p = strchr(p, ' '); + char *q; + + if (!end_p) + end_p = p + strlen(p); + + if (!suffix) { + int i; + + /* skip the one with any known suffix */ + for (i = 0; suffix_tbl[i]; i++) { + q = end_p - strlen(suffix_tbl[i]); + if (!strncmp(q, suffix_tbl[i], + strlen(suffix_tbl[i]))) + goto next; + } + ck_cmdline = p; + } else { + q = end_p - strlen(suffix); + if (!strncmp(q, suffix, strlen(suffix))) + ck_cmdline = p; + } +next: + p = strstr(p+1, name); + } + + if (!ck_cmdline) + return NULL; + + return ck_cmdline; +} + +static int __init __parse_crashkernel(char *cmdline, unsigned long long system_ram, unsigned long long *crash_size, - unsigned long long *crash_base) + unsigned long long *crash_base, + const char *name, + const char *suffix) { - char *p = cmdline, *ck_cmdline = NULL; char *first_colon, *first_space; + char *ck_cmdline; BUG_ON(!crash_size || !crash_base); *crash_size = 0; *crash_base = 0; - /* find crashkernel and use the last one if there are more */ - p = strstr(p, "crashkernel="); - while (p) { - ck_cmdline = p; - p = strstr(p+1, "crashkernel="); - } + ck_cmdline = get_last_crashkernel(cmdline, name, suffix); if (!ck_cmdline) return -EINVAL; - ck_cmdline += 12; /* strlen("crashkernel=") */ + ck_cmdline += strlen(name); + if (suffix) + return parse_crashkernel_suffix(ck_cmdline, crash_size, + crash_base, suffix); /* * if the commandline contains a ':', then that's the extended * syntax -- if not, it must be the classic syntax @@ -1303,44 +1475,69 @@ int __init parse_crashkernel(char *cmdline, if (first_colon && (!first_space || first_colon < first_space)) return parse_crashkernel_mem(ck_cmdline, system_ram, crash_size, crash_base); - else - return parse_crashkernel_simple(ck_cmdline, crash_size, - crash_base); - return 0; + return parse_crashkernel_simple(ck_cmdline, crash_size, crash_base); +} + +/* + * That function is the entry point for command line parsing and should be + * called from the arch-specific code. + */ +int __init parse_crashkernel(char *cmdline, + unsigned long long system_ram, + unsigned long long *crash_size, + unsigned long long *crash_base) +{ + return __parse_crashkernel(cmdline, system_ram, crash_size, crash_base, + "crashkernel=", NULL); } +int __init parse_crashkernel_high(char *cmdline, + unsigned long long system_ram, + unsigned long long *crash_size, + unsigned long long *crash_base) +{ + return __parse_crashkernel(cmdline, system_ram, crash_size, crash_base, + "crashkernel=", suffix_tbl[SUFFIX_HIGH]); +} +int __init parse_crashkernel_low(char *cmdline, + unsigned long long system_ram, + unsigned long long *crash_size, + unsigned long long *crash_base) +{ + return __parse_crashkernel(cmdline, system_ram, crash_size, crash_base, + "crashkernel=", suffix_tbl[SUFFIX_LOW]); +} -void crash_save_vmcoreinfo(void) +static void update_vmcoreinfo_note(void) { - u32 *buf; + u32 *buf = vmcoreinfo_note; if (!vmcoreinfo_size) return; - - vmcoreinfo_append_str("CRASHTIME=%ld", get_seconds()); - - buf = (u32 *)vmcoreinfo_note; - buf = append_elf_note(buf, VMCOREINFO_NOTE_NAME, 0, vmcoreinfo_data, vmcoreinfo_size); - final_note(buf); } +void crash_save_vmcoreinfo(void) +{ + vmcoreinfo_append_str("CRASHTIME=%ld\n", get_seconds()); + update_vmcoreinfo_note(); +} + void vmcoreinfo_append_str(const char *fmt, ...) { va_list args; char buf[0x50]; - int r; + size_t r; va_start(args, fmt); - r = vsnprintf(buf, sizeof(buf), fmt, args); + r = vscnprintf(buf, sizeof(buf), fmt, args); va_end(args); - if (r + vmcoreinfo_size > vmcoreinfo_max_size) - r = vmcoreinfo_max_size - vmcoreinfo_size; + r = min(r, vmcoreinfo_max_size - vmcoreinfo_size); memcpy(&vmcoreinfo_data[vmcoreinfo_size], buf, r); @@ -1351,10 +1548,10 @@ void vmcoreinfo_append_str(const char *fmt, ...) * provide an empty default implementation here -- architecture * code may override this */ -void __attribute__ ((weak)) arch_crash_save_vmcoreinfo(void) +void __weak arch_crash_save_vmcoreinfo(void) {} -unsigned long __attribute__ ((weak)) paddr_vmcoreinfo_note(void) +unsigned long __weak paddr_vmcoreinfo_note(void) { return __pa((unsigned long)(char *)&vmcoreinfo_note); } @@ -1366,8 +1563,11 @@ static int __init crash_save_vmcoreinfo_init(void) VMCOREINFO_SYMBOL(init_uts_ns); VMCOREINFO_SYMBOL(node_online_map); +#ifdef CONFIG_MMU VMCOREINFO_SYMBOL(swapper_pg_dir); +#endif VMCOREINFO_SYMBOL(_stext); + VMCOREINFO_SYMBOL(vmap_area_list); #ifndef CONFIG_NEED_MULTIPLE_NODES VMCOREINFO_SYMBOL(mem_map); @@ -1389,6 +1589,8 @@ static int __init crash_save_vmcoreinfo_init(void) VMCOREINFO_OFFSET(page, _count); VMCOREINFO_OFFSET(page, mapping); VMCOREINFO_OFFSET(page, lru); + VMCOREINFO_OFFSET(page, _mapcount); + VMCOREINFO_OFFSET(page, private); VMCOREINFO_OFFSET(pglist_data, node_zones); VMCOREINFO_OFFSET(pglist_data, nr_zones); #ifdef CONFIG_FLAT_NODE_MEM_MAP @@ -1403,13 +1605,118 @@ static int __init crash_save_vmcoreinfo_init(void) VMCOREINFO_OFFSET(free_area, free_list); VMCOREINFO_OFFSET(list_head, next); VMCOREINFO_OFFSET(list_head, prev); + VMCOREINFO_OFFSET(vmap_area, va_start); + VMCOREINFO_OFFSET(vmap_area, list); VMCOREINFO_LENGTH(zone.free_area, MAX_ORDER); + log_buf_kexec_setup(); VMCOREINFO_LENGTH(free_area.free_list, MIGRATE_TYPES); VMCOREINFO_NUMBER(NR_FREE_PAGES); + VMCOREINFO_NUMBER(PG_lru); + VMCOREINFO_NUMBER(PG_private); + VMCOREINFO_NUMBER(PG_swapcache); + VMCOREINFO_NUMBER(PG_slab); +#ifdef CONFIG_MEMORY_FAILURE + VMCOREINFO_NUMBER(PG_hwpoison); +#endif + VMCOREINFO_NUMBER(PG_head_mask); + VMCOREINFO_NUMBER(PAGE_BUDDY_MAPCOUNT_VALUE); +#ifdef CONFIG_HUGETLBFS + VMCOREINFO_SYMBOL(free_huge_page); +#endif arch_crash_save_vmcoreinfo(); + update_vmcoreinfo_note(); return 0; } -module_init(crash_save_vmcoreinfo_init) +subsys_initcall(crash_save_vmcoreinfo_init); + +/* + * Move into place and start executing a preloaded standalone + * executable. If nothing was preloaded return an error. + */ +int kernel_kexec(void) +{ + int error = 0; + + if (!mutex_trylock(&kexec_mutex)) + return -EBUSY; + if (!kexec_image) { + error = -EINVAL; + goto Unlock; + } + +#ifdef CONFIG_KEXEC_JUMP + if (kexec_image->preserve_context) { + lock_system_sleep(); + pm_prepare_console(); + error = freeze_processes(); + if (error) { + error = -EBUSY; + goto Restore_console; + } + suspend_console(); + error = dpm_suspend_start(PMSG_FREEZE); + if (error) + goto Resume_console; + /* At this point, dpm_suspend_start() has been called, + * but *not* dpm_suspend_end(). We *must* call + * dpm_suspend_end() now. Otherwise, drivers for + * some devices (e.g. interrupt controllers) become + * desynchronized with the actual state of the + * hardware at resume time, and evil weirdness ensues. + */ + error = dpm_suspend_end(PMSG_FREEZE); + if (error) + goto Resume_devices; + error = disable_nonboot_cpus(); + if (error) + goto Enable_cpus; + local_irq_disable(); + error = syscore_suspend(); + if (error) + goto Enable_irqs; + } else +#endif + { + kexec_in_progress = true; + kernel_restart_prepare(NULL); + migrate_to_reboot_cpu(); + + /* + * migrate_to_reboot_cpu() disables CPU hotplug assuming that + * no further code needs to use CPU hotplug (which is true in + * the reboot case). However, the kexec path depends on using + * CPU hotplug again; so re-enable it here. + */ + cpu_hotplug_enable(); + pr_emerg("Starting new kernel\n"); + machine_shutdown(); + } + + machine_kexec(kexec_image); + +#ifdef CONFIG_KEXEC_JUMP + if (kexec_image->preserve_context) { + syscore_resume(); + Enable_irqs: + local_irq_enable(); + Enable_cpus: + enable_nonboot_cpus(); + dpm_resume_start(PMSG_RESTORE); + Resume_devices: + dpm_resume_end(PMSG_RESTORE); + Resume_console: + resume_console(); + thaw_processes(); + Restore_console: + pm_restore_console(); + unlock_system_sleep(); + } +#endif + + Unlock: + mutex_unlock(&kexec_mutex); + return error; +} |
