diff options
Diffstat (limited to 'arch/x86/platform')
37 files changed, 1835 insertions, 1008 deletions
diff --git a/arch/x86/platform/Makefile b/arch/x86/platform/Makefile index 20342d4c82c..85afde1fa3e 100644 --- a/arch/x86/platform/Makefile +++ b/arch/x86/platform/Makefile @@ -9,5 +9,4 @@ obj-y += olpc/ obj-y += scx200/ obj-y += sfi/ obj-y += ts5500/ -obj-y += visws/ obj-y += uv/ diff --git a/arch/x86/platform/efi/Makefile b/arch/x86/platform/efi/Makefile index b7b0b35c198..d51045afcaa 100644 --- a/arch/x86/platform/efi/Makefile +++ b/arch/x86/platform/efi/Makefile @@ -1,3 +1,4 @@ obj-$(CONFIG_EFI) += efi.o efi_$(BITS).o efi_stub_$(BITS).o obj-$(CONFIG_ACPI_BGRT) += efi-bgrt.o obj-$(CONFIG_EARLY_PRINTK_EFI) += early_printk.o +obj-$(CONFIG_EFI_MIXED) += efi_thunk_$(BITS).o diff --git a/arch/x86/platform/efi/early_printk.c b/arch/x86/platform/efi/early_printk.c index 6599a0027b7..52414211729 100644 --- a/arch/x86/platform/efi/early_printk.c +++ b/arch/x86/platform/efi/early_printk.c @@ -14,48 +14,92 @@ static const struct font_desc *font; static u32 efi_x, efi_y; +static void *efi_fb; +static bool early_efi_keep; -static __init void early_efi_clear_scanline(unsigned int y) +/* + * efi earlyprintk need use early_ioremap to map the framebuffer. + * But early_ioremap is not usable for earlyprintk=efi,keep, ioremap should + * be used instead. ioremap will be available after paging_init() which is + * earlier than initcall callbacks. Thus adding this early initcall function + * early_efi_map_fb to map the whole efi framebuffer. + */ +static __init int early_efi_map_fb(void) { - unsigned long base, *dst; - u16 len; + unsigned long base, size; + + if (!early_efi_keep) + return 0; base = boot_params.screen_info.lfb_base; - len = boot_params.screen_info.lfb_linelength; + size = boot_params.screen_info.lfb_size; + efi_fb = ioremap(base, size); + + return efi_fb ? 0 : -ENOMEM; +} +early_initcall(early_efi_map_fb); + +/* + * early_efi_map maps efi framebuffer region [start, start + len -1] + * In case earlyprintk=efi,keep we have the whole framebuffer mapped already + * so just return the offset efi_fb + start. + */ +static __init_refok void *early_efi_map(unsigned long start, unsigned long len) +{ + unsigned long base; + + base = boot_params.screen_info.lfb_base; + + if (efi_fb) + return (efi_fb + start); + else + return early_ioremap(base + start, len); +} - dst = early_ioremap(base + y*len, len); +static __init_refok void early_efi_unmap(void *addr, unsigned long len) +{ + if (!efi_fb) + early_iounmap(addr, len); +} + +static void early_efi_clear_scanline(unsigned int y) +{ + unsigned long *dst; + u16 len; + + len = boot_params.screen_info.lfb_linelength; + dst = early_efi_map(y*len, len); if (!dst) return; memset(dst, 0, len); - early_iounmap(dst, len); + early_efi_unmap(dst, len); } -static __init void early_efi_scroll_up(void) +static void early_efi_scroll_up(void) { - unsigned long base, *dst, *src; + unsigned long *dst, *src; u16 len; u32 i, height; - base = boot_params.screen_info.lfb_base; len = boot_params.screen_info.lfb_linelength; height = boot_params.screen_info.lfb_height; for (i = 0; i < height - font->height; i++) { - dst = early_ioremap(base + i*len, len); + dst = early_efi_map(i*len, len); if (!dst) return; - src = early_ioremap(base + (i + font->height) * len, len); + src = early_efi_map((i + font->height) * len, len); if (!src) { - early_iounmap(dst, len); + early_efi_unmap(dst, len); return; } memmove(dst, src, len); - early_iounmap(src, len); - early_iounmap(dst, len); + early_efi_unmap(src, len); + early_efi_unmap(dst, len); } } @@ -79,16 +123,14 @@ static void early_efi_write_char(u32 *dst, unsigned char c, unsigned int h) } } -static __init void +static void early_efi_write(struct console *con, const char *str, unsigned int num) { struct screen_info *si; - unsigned long base; unsigned int len; const char *s; void *dst; - base = boot_params.screen_info.lfb_base; si = &boot_params.screen_info; len = si->lfb_linelength; @@ -109,7 +151,7 @@ early_efi_write(struct console *con, const char *str, unsigned int num) for (h = 0; h < font->height; h++) { unsigned int n, x; - dst = early_ioremap(base + (efi_y + h) * len, len); + dst = early_efi_map((efi_y + h) * len, len); if (!dst) return; @@ -123,7 +165,7 @@ early_efi_write(struct console *con, const char *str, unsigned int num) s++; } - early_iounmap(dst, len); + early_efi_unmap(dst, len); } num -= count; @@ -142,7 +184,7 @@ early_efi_write(struct console *con, const char *str, unsigned int num) efi_y += font->height; } - if (efi_y + font->height >= si->lfb_height) { + if (efi_y + font->height > si->lfb_height) { u32 i; efi_y -= font->height; @@ -179,6 +221,9 @@ static __init int early_efi_setup(struct console *con, char *options) for (i = 0; i < (yres - efi_y) / font->height; i++) early_efi_scroll_up(); + /* early_console_register will unset CON_BOOT in case ,keep */ + if (!(con->flags & CON_BOOT)) + early_efi_keep = true; return 0; } diff --git a/arch/x86/platform/efi/efi-bgrt.c b/arch/x86/platform/efi/efi-bgrt.c index 7145ec63c52..f15103dff4b 100644 --- a/arch/x86/platform/efi/efi-bgrt.c +++ b/arch/x86/platform/efi/efi-bgrt.c @@ -42,14 +42,15 @@ void __init efi_bgrt_init(void) if (bgrt_tab->header.length < sizeof(*bgrt_tab)) return; - if (bgrt_tab->version != 1) + if (bgrt_tab->version != 1 || bgrt_tab->status != 1) return; if (bgrt_tab->image_type != 0 || !bgrt_tab->image_address) return; image = efi_lookup_mapped_addr(bgrt_tab->image_address); if (!image) { - image = ioremap(bgrt_tab->image_address, sizeof(bmp_header)); + image = early_memremap(bgrt_tab->image_address, + sizeof(bmp_header)); ioremapped = true; if (!image) return; @@ -57,7 +58,7 @@ void __init efi_bgrt_init(void) memcpy_fromio(&bmp_header, image, sizeof(bmp_header)); if (ioremapped) - iounmap(image); + early_iounmap(image, sizeof(bmp_header)); bgrt_image_size = bmp_header.size; bgrt_image = kmalloc(bgrt_image_size, GFP_KERNEL); @@ -65,7 +66,8 @@ void __init efi_bgrt_init(void) return; if (ioremapped) { - image = ioremap(bgrt_tab->image_address, bmp_header.size); + image = early_memremap(bgrt_tab->image_address, + bmp_header.size); if (!image) { kfree(bgrt_image); bgrt_image = NULL; @@ -75,5 +77,5 @@ void __init efi_bgrt_init(void) memcpy_fromio(bgrt_image, image, bgrt_image_size); if (ioremapped) - iounmap(image); + early_iounmap(image, bmp_header.size); } diff --git a/arch/x86/platform/efi/efi.c b/arch/x86/platform/efi/efi.c index 92c02344a06..87fc96bcc13 100644 --- a/arch/x86/platform/efi/efi.c +++ b/arch/x86/platform/efi/efi.c @@ -12,6 +12,8 @@ * Bibo Mao <bibo.mao@intel.com> * Chandramouli Narayanan <mouli@linux.intel.com> * Huang Ying <ying.huang@intel.com> + * Copyright (C) 2013 SuSE Labs + * Borislav Petkov <bp@suse.de> - runtime services VA mapping * * Copied from efi_32.c to eliminate the duplicated code between EFI * 32/64 support code. --ying 2007-10-26 @@ -50,8 +52,9 @@ #include <asm/tlbflush.h> #include <asm/x86_init.h> #include <asm/rtc.h> +#include <asm/uv/uv.h> -#define EFI_DEBUG 1 +#define EFI_DEBUG #define EFI_MIN_RESERVE 5120 @@ -65,25 +68,16 @@ struct efi_memory_map memmap; static struct efi efi_phys __initdata; static efi_system_table_t efi_systab __initdata; -unsigned long x86_efi_facility; - -static __initdata efi_config_table_type_t arch_tables[] = { +static efi_config_table_type_t arch_tables[] __initdata = { #ifdef CONFIG_X86_UV {UV_SYSTEM_TABLE_GUID, "UVsystab", &efi.uv_systab}, #endif {NULL_GUID, NULL, NULL}, }; -/* - * Returns 1 if 'facility' is enabled, 0 otherwise. - */ -int efi_enabled(int facility) -{ - return test_bit(facility, &x86_efi_facility) != 0; -} -EXPORT_SYMBOL(efi_enabled); +u64 efi_setup; /* efi setup_data physical address */ -static bool __initdata disable_runtime = false; +static bool disable_runtime __initdata = false; static int __init setup_noefi(char *arg) { disable_runtime = true; @@ -110,14 +104,13 @@ static int __init setup_storage_paranoia(char *arg) } early_param("efi_no_storage_paranoia", setup_storage_paranoia); - static efi_status_t virt_efi_get_time(efi_time_t *tm, efi_time_cap_t *tc) { unsigned long flags; efi_status_t status; spin_lock_irqsave(&rtc_lock, flags); - status = efi_call_virt2(get_time, tm, tc); + status = efi_call_virt(get_time, tm, tc); spin_unlock_irqrestore(&rtc_lock, flags); return status; } @@ -128,7 +121,7 @@ static efi_status_t virt_efi_set_time(efi_time_t *tm) efi_status_t status; spin_lock_irqsave(&rtc_lock, flags); - status = efi_call_virt1(set_time, tm); + status = efi_call_virt(set_time, tm); spin_unlock_irqrestore(&rtc_lock, flags); return status; } @@ -141,8 +134,7 @@ static efi_status_t virt_efi_get_wakeup_time(efi_bool_t *enabled, efi_status_t status; spin_lock_irqsave(&rtc_lock, flags); - status = efi_call_virt3(get_wakeup_time, - enabled, pending, tm); + status = efi_call_virt(get_wakeup_time, enabled, pending, tm); spin_unlock_irqrestore(&rtc_lock, flags); return status; } @@ -153,8 +145,7 @@ static efi_status_t virt_efi_set_wakeup_time(efi_bool_t enabled, efi_time_t *tm) efi_status_t status; spin_lock_irqsave(&rtc_lock, flags); - status = efi_call_virt2(set_wakeup_time, - enabled, tm); + status = efi_call_virt(set_wakeup_time, enabled, tm); spin_unlock_irqrestore(&rtc_lock, flags); return status; } @@ -165,17 +156,17 @@ static efi_status_t virt_efi_get_variable(efi_char16_t *name, unsigned long *data_size, void *data) { - return efi_call_virt5(get_variable, - name, vendor, attr, - data_size, data); + return efi_call_virt(get_variable, + name, vendor, attr, + data_size, data); } static efi_status_t virt_efi_get_next_variable(unsigned long *name_size, efi_char16_t *name, efi_guid_t *vendor) { - return efi_call_virt3(get_next_variable, - name_size, name, vendor); + return efi_call_virt(get_next_variable, + name_size, name, vendor); } static efi_status_t virt_efi_set_variable(efi_char16_t *name, @@ -184,9 +175,9 @@ static efi_status_t virt_efi_set_variable(efi_char16_t *name, unsigned long data_size, void *data) { - return efi_call_virt5(set_variable, - name, vendor, attr, - data_size, data); + return efi_call_virt(set_variable, + name, vendor, attr, + data_size, data); } static efi_status_t virt_efi_query_variable_info(u32 attr, @@ -197,13 +188,13 @@ static efi_status_t virt_efi_query_variable_info(u32 attr, if (efi.runtime_version < EFI_2_00_SYSTEM_TABLE_REVISION) return EFI_UNSUPPORTED; - return efi_call_virt4(query_variable_info, attr, storage_space, - remaining_space, max_variable_size); + return efi_call_virt(query_variable_info, attr, storage_space, + remaining_space, max_variable_size); } static efi_status_t virt_efi_get_next_high_mono_count(u32 *count) { - return efi_call_virt1(get_next_high_mono_count, count); + return efi_call_virt(get_next_high_mono_count, count); } static void virt_efi_reset_system(int reset_type, @@ -211,8 +202,8 @@ static void virt_efi_reset_system(int reset_type, unsigned long data_size, efi_char16_t *data) { - efi_call_virt4(reset_system, reset_type, status, - data_size, data); + __efi_call_virt(reset_system, reset_type, status, + data_size, data); } static efi_status_t virt_efi_update_capsule(efi_capsule_header_t **capsules, @@ -222,7 +213,7 @@ static efi_status_t virt_efi_update_capsule(efi_capsule_header_t **capsules, if (efi.runtime_version < EFI_2_00_SYSTEM_TABLE_REVISION) return EFI_UNSUPPORTED; - return efi_call_virt3(update_capsule, capsules, count, sg_list); + return efi_call_virt(update_capsule, capsules, count, sg_list); } static efi_status_t virt_efi_query_capsule_caps(efi_capsule_header_t **capsules, @@ -233,8 +224,8 @@ static efi_status_t virt_efi_query_capsule_caps(efi_capsule_header_t **capsules, if (efi.runtime_version < EFI_2_00_SYSTEM_TABLE_REVISION) return EFI_UNSUPPORTED; - return efi_call_virt4(query_capsule_caps, capsules, count, max_size, - reset_type); + return efi_call_virt(query_capsule_caps, capsules, count, max_size, + reset_type); } static efi_status_t __init phys_efi_set_virtual_address_map( @@ -246,34 +237,19 @@ static efi_status_t __init phys_efi_set_virtual_address_map( efi_status_t status; efi_call_phys_prelog(); - status = efi_call_phys4(efi_phys.set_virtual_address_map, - memory_map_size, descriptor_size, - descriptor_version, virtual_map); + status = efi_call_phys(efi_phys.set_virtual_address_map, + memory_map_size, descriptor_size, + descriptor_version, virtual_map); efi_call_phys_epilog(); return status; } -static efi_status_t __init phys_efi_get_time(efi_time_t *tm, - efi_time_cap_t *tc) -{ - unsigned long flags; - efi_status_t status; - - spin_lock_irqsave(&rtc_lock, flags); - efi_call_phys_prelog(); - status = efi_call_phys2(efi_phys.get_time, virt_to_phys(tm), - virt_to_phys(tc)); - efi_call_phys_epilog(); - spin_unlock_irqrestore(&rtc_lock, flags); - return status; -} - int efi_set_rtc_mmss(const struct timespec *now) { unsigned long nowtime = now->tv_sec; - efi_status_t status; - efi_time_t eft; - efi_time_cap_t cap; + efi_status_t status; + efi_time_t eft; + efi_time_cap_t cap; struct rtc_time tm; status = efi.get_time(&eft, &cap); @@ -291,9 +267,8 @@ int efi_set_rtc_mmss(const struct timespec *now) eft.second = tm.tm_sec; eft.nanosecond = 0; } else { - printk(KERN_ERR - "%s: Invalid EFI RTC value: write of %lx to EFI RTC failed\n", - __FUNCTION__, nowtime); + pr_err("%s: Invalid EFI RTC value: write of %lx to EFI RTC failed\n", + __func__, nowtime); return -1; } @@ -398,9 +373,9 @@ int __init efi_memblock_x86_reserve_range(void) return 0; } -#if EFI_DEBUG static void __init print_efi_memmap(void) { +#ifdef EFI_DEBUG efi_memory_desc_t *md; void *p; int i; @@ -409,14 +384,13 @@ static void __init print_efi_memmap(void) p < memmap.map_end; p += memmap.desc_size, i++) { md = p; - pr_info("mem%02u: type=%u, attr=0x%llx, " - "range=[0x%016llx-0x%016llx) (%lluMB)\n", + pr_info("mem%02u: type=%u, attr=0x%llx, range=[0x%016llx-0x%016llx) (%lluMB)\n", i, md->type, md->attribute, md->phys_addr, md->phys_addr + (md->num_pages << EFI_PAGE_SHIFT), (md->num_pages >> (20 - EFI_PAGE_SHIFT))); } -} #endif /* EFI_DEBUG */ +} void __init efi_reserve_boot_services(void) { @@ -436,15 +410,14 @@ void __init efi_reserve_boot_services(void) * - Not within any part of the kernel * - Not the bios reserved area */ - if ((start+size >= __pa_symbol(_text) + if ((start + size > __pa_symbol(_text) && start <= __pa_symbol(_end)) || !e820_all_mapped(start, start+size, E820_RAM) || memblock_is_region_reserved(start, size)) { /* Could not reserve, skip it */ md->num_pages = 0; - memblock_dbg("Could not reserve boot range " - "[0x%010llx-0x%010llx]\n", - start, start+size-1); + memblock_dbg("Could not reserve boot range [0x%010llx-0x%010llx]\n", + start, start+size-1); } else memblock_reserve(start, size); } @@ -452,7 +425,7 @@ void __init efi_reserve_boot_services(void) void __init efi_unmap_memmap(void) { - clear_bit(EFI_MEMMAP, &x86_efi_facility); + clear_bit(EFI_MEMMAP, &efi.flags); if (memmap.map) { early_iounmap(memmap.map, memmap.nr_map * memmap.desc_size); memmap.map = NULL; @@ -463,9 +436,6 @@ void __init efi_free_boot_services(void) { void *p; - if (!efi_is_native()) - return; - for (p = memmap.map; p < memmap.map_end; p += memmap.desc_size) { efi_memory_desc_t *md = p; unsigned long long start = md->phys_addr; @@ -489,18 +459,27 @@ static int __init efi_systab_init(void *phys) { if (efi_enabled(EFI_64BIT)) { efi_system_table_64_t *systab64; + struct efi_setup_data *data = NULL; u64 tmp = 0; + if (efi_setup) { + data = early_memremap(efi_setup, sizeof(*data)); + if (!data) + return -ENOMEM; + } systab64 = early_ioremap((unsigned long)phys, sizeof(*systab64)); if (systab64 == NULL) { pr_err("Couldn't map the system table!\n"); + if (data) + early_iounmap(data, sizeof(*data)); return -ENOMEM; } efi_systab.hdr = systab64->hdr; - efi_systab.fw_vendor = systab64->fw_vendor; - tmp |= systab64->fw_vendor; + efi_systab.fw_vendor = data ? (unsigned long)data->fw_vendor : + systab64->fw_vendor; + tmp |= data ? data->fw_vendor : systab64->fw_vendor; efi_systab.fw_revision = systab64->fw_revision; efi_systab.con_in_handle = systab64->con_in_handle; tmp |= systab64->con_in_handle; @@ -514,15 +493,20 @@ static int __init efi_systab_init(void *phys) tmp |= systab64->stderr_handle; efi_systab.stderr = systab64->stderr; tmp |= systab64->stderr; - efi_systab.runtime = (void *)(unsigned long)systab64->runtime; - tmp |= systab64->runtime; + efi_systab.runtime = data ? + (void *)(unsigned long)data->runtime : + (void *)(unsigned long)systab64->runtime; + tmp |= data ? data->runtime : systab64->runtime; efi_systab.boottime = (void *)(unsigned long)systab64->boottime; tmp |= systab64->boottime; efi_systab.nr_tables = systab64->nr_tables; - efi_systab.tables = systab64->tables; - tmp |= systab64->tables; + efi_systab.tables = data ? (unsigned long)data->tables : + systab64->tables; + tmp |= data ? data->tables : systab64->tables; early_iounmap(systab64, sizeof(*systab64)); + if (data) + early_iounmap(data, sizeof(*data)); #ifdef CONFIG_X86_32 if (tmp >> 32) { pr_err("EFI data located above 4GB, disabling EFI.\n"); @@ -566,45 +550,82 @@ static int __init efi_systab_init(void *phys) return -EINVAL; } if ((efi.systab->hdr.revision >> 16) == 0) - pr_err("Warning: System table version " - "%d.%02d, expected 1.00 or greater!\n", + pr_err("Warning: System table version %d.%02d, expected 1.00 or greater!\n", efi.systab->hdr.revision >> 16, efi.systab->hdr.revision & 0xffff); + set_bit(EFI_SYSTEM_TABLES, &efi.flags); + return 0; } -static int __init efi_runtime_init(void) +static int __init efi_runtime_init32(void) { - efi_runtime_services_t *runtime; + efi_runtime_services_32_t *runtime; + + runtime = early_ioremap((unsigned long)efi.systab->runtime, + sizeof(efi_runtime_services_32_t)); + if (!runtime) { + pr_err("Could not map the runtime service table!\n"); + return -ENOMEM; + } /* - * Check out the runtime services table. We need to map - * the runtime services table so that we can grab the physical - * address of several of the EFI runtime functions, needed to - * set the firmware into virtual mode. + * We will only need *early* access to the following two + * EFI runtime services before set_virtual_address_map + * is invoked. */ + efi_phys.set_virtual_address_map = + (efi_set_virtual_address_map_t *) + (unsigned long)runtime->set_virtual_address_map; + early_iounmap(runtime, sizeof(efi_runtime_services_32_t)); + + return 0; +} + +static int __init efi_runtime_init64(void) +{ + efi_runtime_services_64_t *runtime; + runtime = early_ioremap((unsigned long)efi.systab->runtime, - sizeof(efi_runtime_services_t)); + sizeof(efi_runtime_services_64_t)); if (!runtime) { pr_err("Could not map the runtime service table!\n"); return -ENOMEM; } + /* - * We will only need *early* access to the following - * two EFI runtime services before set_virtual_address_map + * We will only need *early* access to the following two + * EFI runtime services before set_virtual_address_map * is invoked. */ - efi_phys.get_time = (efi_get_time_t *)runtime->get_time; efi_phys.set_virtual_address_map = - (efi_set_virtual_address_map_t *) - runtime->set_virtual_address_map; + (efi_set_virtual_address_map_t *) + (unsigned long)runtime->set_virtual_address_map; + early_iounmap(runtime, sizeof(efi_runtime_services_64_t)); + + return 0; +} + +static int __init efi_runtime_init(void) +{ + int rv; + /* - * Make efi_get_time can be called before entering - * virtual mode. + * Check out the runtime services table. We need to map + * the runtime services table so that we can grab the physical + * address of several of the EFI runtime functions, needed to + * set the firmware into virtual mode. */ - efi.get_time = phys_efi_get_time; - early_iounmap(runtime, sizeof(efi_runtime_services_t)); + if (efi_enabled(EFI_64BIT)) + rv = efi_runtime_init64(); + else + rv = efi_runtime_init32(); + + if (rv) + return rv; + + set_bit(EFI_RUNTIME_SERVICES, &efi.flags); return 0; } @@ -623,9 +644,67 @@ static int __init efi_memmap_init(void) if (add_efi_memmap) do_add_efi_memmap(); + set_bit(EFI_MEMMAP, &efi.flags); + return 0; } +/* + * A number of config table entries get remapped to virtual addresses + * after entering EFI virtual mode. However, the kexec kernel requires + * their physical addresses therefore we pass them via setup_data and + * correct those entries to their respective physical addresses here. + * + * Currently only handles smbios which is necessary for some firmware + * implementation. + */ +static int __init efi_reuse_config(u64 tables, int nr_tables) +{ + int i, sz, ret = 0; + void *p, *tablep; + struct efi_setup_data *data; + + if (!efi_setup) + return 0; + + if (!efi_enabled(EFI_64BIT)) + return 0; + + data = early_memremap(efi_setup, sizeof(*data)); + if (!data) { + ret = -ENOMEM; + goto out; + } + + if (!data->smbios) + goto out_memremap; + + sz = sizeof(efi_config_table_64_t); + + p = tablep = early_memremap(tables, nr_tables * sz); + if (!p) { + pr_err("Could not map Configuration table!\n"); + ret = -ENOMEM; + goto out_memremap; + } + + for (i = 0; i < efi.systab->nr_tables; i++) { + efi_guid_t guid; + + guid = ((efi_config_table_64_t *)p)->guid; + + if (!efi_guidcmp(guid, SMBIOS_TABLE_GUID)) + ((efi_config_table_64_t *)p)->table = data->smbios; + p += sz; + } + early_iounmap(tablep, nr_tables * sz); + +out_memremap: + early_iounmap(data, sizeof(*data)); +out: + return ret; +} + void __init efi_init(void) { efi_char16_t *c16; @@ -649,7 +728,11 @@ void __init efi_init(void) if (efi_systab_init(efi_phys.systab)) return; - set_bit(EFI_SYSTEM_TABLES, &x86_efi_facility); + set_bit(EFI_SYSTEM_TABLES, &efi.flags); + + efi.config_table = (unsigned long)efi.systab->tables; + efi.fw_vendor = (unsigned long)efi.systab->fw_vendor; + efi.runtime = (unsigned long)efi.systab->runtime; /* * Show what we know for posterity @@ -667,39 +750,29 @@ void __init efi_init(void) efi.systab->hdr.revision >> 16, efi.systab->hdr.revision & 0xffff, vendor); - if (efi_config_init(arch_tables)) + if (efi_reuse_config(efi.systab->tables, efi.systab->nr_tables)) return; - set_bit(EFI_CONFIG_TABLES, &x86_efi_facility); + if (efi_config_init(arch_tables)) + return; /* * Note: We currently don't support runtime services on an EFI * that doesn't match the kernel 32/64-bit mode. */ - if (!efi_is_native()) + if (!efi_runtime_supported()) pr_info("No EFI runtime due to 32/64-bit mismatch with kernel\n"); else { if (disable_runtime || efi_runtime_init()) return; - set_bit(EFI_RUNTIME_SERVICES, &x86_efi_facility); } - if (efi_memmap_init()) return; - set_bit(EFI_MEMMAP, &x86_efi_facility); + set_bit(EFI_MEMMAP, &efi.flags); -#ifdef CONFIG_X86_32 - if (efi_is_native()) { - x86_platform.get_wallclock = efi_get_time; - x86_platform.set_wallclock = efi_set_rtc_mmss; - } -#endif - -#if EFI_DEBUG print_efi_memmap(); -#endif } void __init efi_late_init(void) @@ -722,7 +795,7 @@ void __init efi_set_executable(efi_memory_desc_t *md, bool executable) set_memory_nx(addr, npages); } -static void __init runtime_code_page_mkexec(void) +void __init runtime_code_page_mkexec(void) { efi_memory_desc_t *md; void *p; @@ -748,36 +821,54 @@ void efi_memory_uc(u64 addr, unsigned long size) set_memory_uc(addr, npages); } -/* - * This function will switch the EFI runtime services to virtual mode. - * Essentially, look through the EFI memmap and map every region that - * has the runtime attribute bit set in its memory descriptor and update - * that memory descriptor with the virtual address obtained from ioremap(). - * This enables the runtime services to be called without having to - * thunk back into physical mode for every invocation. - */ -void __init efi_enter_virtual_mode(void) +void __init old_map_region(efi_memory_desc_t *md) { - efi_memory_desc_t *md, *prev_md = NULL; - efi_status_t status; + u64 start_pfn, end_pfn, end; unsigned long size; - u64 end, systab, start_pfn, end_pfn; - void *p, *va, *new_memmap = NULL; - int count = 0; + void *va; - efi.systab = NULL; + start_pfn = PFN_DOWN(md->phys_addr); + size = md->num_pages << PAGE_SHIFT; + end = md->phys_addr + size; + end_pfn = PFN_UP(end); - /* - * We don't do virtual mode, since we don't do runtime services, on - * non-native EFI - */ + if (pfn_range_is_mapped(start_pfn, end_pfn)) { + va = __va(md->phys_addr); - if (!efi_is_native()) { - efi_unmap_memmap(); - return; - } + if (!(md->attribute & EFI_MEMORY_WB)) + efi_memory_uc((u64)(unsigned long)va, size); + } else + va = efi_ioremap(md->phys_addr, size, + md->type, md->attribute); + + md->virt_addr = (u64) (unsigned long) va; + if (!va) + pr_err("ioremap of 0x%llX failed!\n", + (unsigned long long)md->phys_addr); +} + +static void native_runtime_setup(void) +{ + efi.get_time = virt_efi_get_time; + efi.set_time = virt_efi_set_time; + efi.get_wakeup_time = virt_efi_get_wakeup_time; + efi.set_wakeup_time = virt_efi_set_wakeup_time; + efi.get_variable = virt_efi_get_variable; + efi.get_next_variable = virt_efi_get_next_variable; + efi.set_variable = virt_efi_set_variable; + efi.get_next_high_mono_count = virt_efi_get_next_high_mono_count; + efi.reset_system = virt_efi_reset_system; + efi.query_variable_info = virt_efi_query_variable_info; + efi.update_capsule = virt_efi_update_capsule; + efi.query_capsule_caps = virt_efi_query_capsule_caps; +} + +/* Merge contiguous regions of the same type and attribute */ +static void __init efi_merge_regions(void) +{ + void *p; + efi_memory_desc_t *md, *prev_md = NULL; - /* Merge contiguous regions of the same type and attribute */ for (p = memmap.map; p < memmap.map_end; p += memmap.desc_size) { u64 prev_size; md = p; @@ -803,6 +894,87 @@ void __init efi_enter_virtual_mode(void) } prev_md = md; } +} + +static void __init get_systab_virt_addr(efi_memory_desc_t *md) +{ + unsigned long size; + u64 end, systab; + + size = md->num_pages << EFI_PAGE_SHIFT; + end = md->phys_addr + size; + systab = (u64)(unsigned long)efi_phys.systab; + if (md->phys_addr <= systab && systab < end) { + systab += md->virt_addr - md->phys_addr; + efi.systab = (efi_system_table_t *)(unsigned long)systab; + } +} + +static void __init save_runtime_map(void) +{ +#ifdef CONFIG_KEXEC + efi_memory_desc_t *md; + void *tmp, *p, *q = NULL; + int count = 0; + + if (efi_enabled(EFI_OLD_MEMMAP)) + return; + + for (p = memmap.map; p < memmap.map_end; p += memmap.desc_size) { + md = p; + + if (!(md->attribute & EFI_MEMORY_RUNTIME) || + (md->type == EFI_BOOT_SERVICES_CODE) || + (md->type == EFI_BOOT_SERVICES_DATA)) + continue; + tmp = krealloc(q, (count + 1) * memmap.desc_size, GFP_KERNEL); + if (!tmp) + goto out; + q = tmp; + + memcpy(q + count * memmap.desc_size, md, memmap.desc_size); + count++; + } + + efi_runtime_map_setup(q, count, memmap.desc_size); + return; + +out: + kfree(q); + pr_err("Error saving runtime map, efi runtime on kexec non-functional!!\n"); +#endif +} + +static void *realloc_pages(void *old_memmap, int old_shift) +{ + void *ret; + + ret = (void *)__get_free_pages(GFP_KERNEL, old_shift + 1); + if (!ret) + goto out; + + /* + * A first-time allocation doesn't have anything to copy. + */ + if (!old_memmap) + return ret; + + memcpy(ret, old_memmap, PAGE_SIZE << old_shift); + +out: + free_pages((unsigned long)old_memmap, old_shift); + return ret; +} + +/* + * Map the efi memory ranges of the runtime services and update new_mmap with + * virtual addresses. + */ +static void * __init efi_map_regions(int *count, int *pg_shift) +{ + void *p, *new_memmap = NULL; + unsigned long left = 0; + efi_memory_desc_t *md; for (p = memmap.map; p < memmap.map_end; p += memmap.desc_size) { md = p; @@ -814,52 +986,150 @@ void __init efi_enter_virtual_mode(void) continue; } - size = md->num_pages << EFI_PAGE_SHIFT; - end = md->phys_addr + size; - - start_pfn = PFN_DOWN(md->phys_addr); - end_pfn = PFN_UP(end); - if (pfn_range_is_mapped(start_pfn, end_pfn)) { - va = __va(md->phys_addr); - - if (!(md->attribute & EFI_MEMORY_WB)) - efi_memory_uc((u64)(unsigned long)va, size); - } else - va = efi_ioremap(md->phys_addr, size, - md->type, md->attribute); + efi_map_region(md); + get_systab_virt_addr(md); - md->virt_addr = (u64) (unsigned long) va; + if (left < memmap.desc_size) { + new_memmap = realloc_pages(new_memmap, *pg_shift); + if (!new_memmap) + return NULL; - if (!va) { - pr_err("ioremap of 0x%llX failed!\n", - (unsigned long long)md->phys_addr); - continue; + left += PAGE_SIZE << *pg_shift; + (*pg_shift)++; } - systab = (u64) (unsigned long) efi_phys.systab; - if (md->phys_addr <= systab && systab < end) { - systab += md->virt_addr - md->phys_addr; - efi.systab = (efi_system_table_t *) (unsigned long) systab; - } - new_memmap = krealloc(new_memmap, - (count + 1) * memmap.desc_size, - GFP_KERNEL); - memcpy(new_memmap + (count * memmap.desc_size), md, + memcpy(new_memmap + (*count * memmap.desc_size), md, memmap.desc_size); - count++; + + left -= memmap.desc_size; + (*count)++; + } + + return new_memmap; +} + +static void __init kexec_enter_virtual_mode(void) +{ +#ifdef CONFIG_KEXEC + efi_memory_desc_t *md; + void *p; + + efi.systab = NULL; + + /* + * We don't do virtual mode, since we don't do runtime services, on + * non-native EFI + */ + if (!efi_is_native()) { + efi_unmap_memmap(); + return; + } + + /* + * Map efi regions which were passed via setup_data. The virt_addr is a + * fixed addr which was used in first kernel of a kexec boot. + */ + for (p = memmap.map; p < memmap.map_end; p += memmap.desc_size) { + md = p; + efi_map_region_fixed(md); /* FIXME: add error handling */ + get_systab_virt_addr(md); + } + + save_runtime_map(); + + BUG_ON(!efi.systab); + + efi_sync_low_kernel_mappings(); + + /* + * Now that EFI is in virtual mode, update the function + * pointers in the runtime service table to the new virtual addresses. + * + * Call EFI services through wrapper functions. + */ + efi.runtime_version = efi_systab.hdr.revision; + + native_runtime_setup(); + + efi.set_virtual_address_map = NULL; + + if (efi_enabled(EFI_OLD_MEMMAP) && (__supported_pte_mask & _PAGE_NX)) + runtime_code_page_mkexec(); + + /* clean DUMMY object */ + efi.set_variable(efi_dummy_name, &EFI_DUMMY_GUID, + EFI_VARIABLE_NON_VOLATILE | + EFI_VARIABLE_BOOTSERVICE_ACCESS | + EFI_VARIABLE_RUNTIME_ACCESS, + 0, NULL); +#endif +} + +/* + * This function will switch the EFI runtime services to virtual mode. + * Essentially, we look through the EFI memmap and map every region that + * has the runtime attribute bit set in its memory descriptor into the + * ->trampoline_pgd page table using a top-down VA allocation scheme. + * + * The old method which used to update that memory descriptor with the + * virtual address obtained from ioremap() is still supported when the + * kernel is booted with efi=old_map on its command line. Same old + * method enabled the runtime services to be called without having to + * thunk back into physical mode for every invocation. + * + * The new method does a pagetable switch in a preemption-safe manner + * so that we're in a different address space when calling a runtime + * function. For function arguments passing we do copy the PGDs of the + * kernel page table into ->trampoline_pgd prior to each call. + * + * Specially for kexec boot, efi runtime maps in previous kernel should + * be passed in via setup_data. In that case runtime ranges will be mapped + * to the same virtual addresses as the first kernel, see + * kexec_enter_virtual_mode(). + */ +static void __init __efi_enter_virtual_mode(void) +{ + int count = 0, pg_shift = 0; + void *new_memmap = NULL; + efi_status_t status; + + efi.systab = NULL; + + efi_merge_regions(); + new_memmap = efi_map_regions(&count, &pg_shift); + if (!new_memmap) { + pr_err("Error reallocating memory, EFI runtime non-functional!\n"); + return; } + save_runtime_map(); + BUG_ON(!efi.systab); - status = phys_efi_set_virtual_address_map( - memmap.desc_size * count, - memmap.desc_size, - memmap.desc_version, - (efi_memory_desc_t *)__pa(new_memmap)); + if (efi_setup_page_tables(__pa(new_memmap), 1 << pg_shift)) + return; + + efi_sync_low_kernel_mappings(); + efi_dump_pagetable(); + + if (efi_is_native()) { + status = phys_efi_set_virtual_address_map( + memmap.desc_size * count, + memmap.desc_size, + memmap.desc_version, + (efi_memory_desc_t *)__pa(new_memmap)); + } else { + status = efi_thunk_set_virtual_address_map( + efi_phys.set_virtual_address_map, + memmap.desc_size * count, + memmap.desc_size, + memmap.desc_version, + (efi_memory_desc_t *)__pa(new_memmap)); + } if (status != EFI_SUCCESS) { - pr_alert("Unable to switch EFI into virtual mode " - "(status=%lx)!\n", status); + pr_alert("Unable to switch EFI into virtual mode (status=%lx)!\n", + status); panic("EFI call to SetVirtualAddressMap() failed!"); } @@ -870,23 +1140,43 @@ void __init efi_enter_virtual_mode(void) * Call EFI services through wrapper functions. */ efi.runtime_version = efi_systab.hdr.revision; - efi.get_time = virt_efi_get_time; - efi.set_time = virt_efi_set_time; - efi.get_wakeup_time = virt_efi_get_wakeup_time; - efi.set_wakeup_time = virt_efi_set_wakeup_time; - efi.get_variable = virt_efi_get_variable; - efi.get_next_variable = virt_efi_get_next_variable; - efi.set_variable = virt_efi_set_variable; - efi.get_next_high_mono_count = virt_efi_get_next_high_mono_count; - efi.reset_system = virt_efi_reset_system; + + if (efi_is_native()) + native_runtime_setup(); + else + efi_thunk_runtime_setup(); + efi.set_virtual_address_map = NULL; - efi.query_variable_info = virt_efi_query_variable_info; - efi.update_capsule = virt_efi_update_capsule; - efi.query_capsule_caps = virt_efi_query_capsule_caps; - if (__supported_pte_mask & _PAGE_NX) - runtime_code_page_mkexec(); - kfree(new_memmap); + efi_runtime_mkexec(); + + /* + * We mapped the descriptor array into the EFI pagetable above but we're + * not unmapping it here. Here's why: + * + * We're copying select PGDs from the kernel page table to the EFI page + * table and when we do so and make changes to those PGDs like unmapping + * stuff from them, those changes appear in the kernel page table and we + * go boom. + * + * From setup_real_mode(): + * + * ... + * trampoline_pgd[0] = init_level4_pgt[pgd_index(__PAGE_OFFSET)].pgd; + * + * In this particular case, our allocation is in PGD 0 of the EFI page + * table but we've copied that PGD from PGD[272] of the EFI page table: + * + * pgd_index(__PAGE_OFFSET = 0xffff880000000000) = 272 + * + * where the direct memory mapping in kernel space is. + * + * new_memmap's VA comes from that direct mapping and thus clearing it, + * it would get cleared in the kernel page table too. + * + * efi_cleanup_page_tables(__pa(new_memmap), 1 << pg_shift); + */ + free_pages((unsigned long)new_memmap, pg_shift); /* clean DUMMY object */ efi.set_variable(efi_dummy_name, &EFI_DUMMY_GUID, @@ -896,6 +1186,14 @@ void __init efi_enter_virtual_mode(void) 0, NULL); } +void __init efi_enter_virtual_mode(void) +{ + if (efi_setup) + kexec_enter_virtual_mode(); + else + __efi_enter_virtual_mode(); +} + /* * Convenience functions to obtain memory types and attributes */ @@ -933,9 +1231,8 @@ u64 efi_mem_attributes(unsigned long phys_addr) } /* - * Some firmware has serious problems when using more than 50% of the EFI - * variable store, i.e. it triggers bugs that can brick machines. Ensure that - * we never use more than this safe limit. + * Some firmware implementations refuse to boot if there's insufficient space + * in the variable store. Ensure that we never use more than a safe limit. * * Return EFI_SUCCESS if it is safe to write 'size' bytes to the variable * store. @@ -954,10 +1251,9 @@ efi_status_t efi_query_variable_store(u32 attributes, unsigned long size) return status; /* - * Some firmware implementations refuse to boot if there's insufficient - * space in the variable store. We account for that by refusing the - * write if permitting it would reduce the available space to under - * 5KB. This figure was provided by Samsung, so should be safe. + * We account for that by refusing the write if permitting it would + * reduce the available space to under 5KB. This figure was provided by + * Samsung, so should be safe. */ if ((remaining_size - size < EFI_MIN_RESERVE) && !efi_no_storage_paranoia) { @@ -1013,3 +1309,34 @@ efi_status_t efi_query_variable_store(u32 attributes, unsigned long size) return EFI_SUCCESS; } EXPORT_SYMBOL_GPL(efi_query_variable_store); + +static int __init parse_efi_cmdline(char *str) +{ + if (*str == '=') + str++; + + if (!strncmp(str, "old_map", 7)) + set_bit(EFI_OLD_MEMMAP, &efi.flags); + + return 0; +} +early_param("efi", parse_efi_cmdline); + +void __init efi_apply_memmap_quirks(void) +{ + /* + * Once setup is done earlier, unmap the EFI memory map on mismatched + * firmware/kernel architectures since there is no support for runtime + * services. + */ + if (!efi_runtime_supported()) { + pr_info("efi: Setup done, disabling due to 32/64-bit mismatch\n"); + efi_unmap_memmap(); + } + + /* + * UV doesn't support the new EFI pagetable mapping yet. + */ + if (is_uv_system()) + set_bit(EFI_OLD_MEMMAP, &efi.flags); +} diff --git a/arch/x86/platform/efi/efi_32.c b/arch/x86/platform/efi/efi_32.c index 40e446941dd..9ee3491e31f 100644 --- a/arch/x86/platform/efi/efi_32.c +++ b/arch/x86/platform/efi/efi_32.c @@ -37,9 +37,24 @@ * claim EFI runtime service handler exclusively and to duplicate a memory in * low memory space say 0 - 3G. */ - static unsigned long efi_rt_eflags; +void efi_sync_low_kernel_mappings(void) {} +void __init efi_dump_pagetable(void) {} +int efi_setup_page_tables(unsigned long pa_memmap, unsigned num_pages) +{ + return 0; +} +void efi_cleanup_page_tables(unsigned long pa_memmap, unsigned num_pages) {} + +void __init efi_map_region(efi_memory_desc_t *md) +{ + old_map_region(md); +} + +void __init efi_map_region_fixed(efi_memory_desc_t *md) {} +void __init parse_efi_setup(u64 phys_addr, u32 data_len) {} + void efi_call_phys_prelog(void) { struct desc_ptr gdt_descr; @@ -67,3 +82,9 @@ void efi_call_phys_epilog(void) local_irq_restore(efi_rt_eflags); } + +void __init efi_runtime_mkexec(void) +{ + if (__supported_pte_mask & _PAGE_NX) + runtime_code_page_mkexec(); +} diff --git a/arch/x86/platform/efi/efi_64.c b/arch/x86/platform/efi/efi_64.c index 39a0e7f1f0a..290d397e1dd 100644 --- a/arch/x86/platform/efi/efi_64.c +++ b/arch/x86/platform/efi/efi_64.c @@ -38,10 +38,30 @@ #include <asm/efi.h> #include <asm/cacheflush.h> #include <asm/fixmap.h> +#include <asm/realmode.h> +#include <asm/time.h> static pgd_t *save_pgd __initdata; static unsigned long efi_flags __initdata; +/* + * We allocate runtime services regions bottom-up, starting from -4G, i.e. + * 0xffff_ffff_0000_0000 and limit EFI VA mapping space to 64G. + */ +static u64 efi_va = -4 * (1UL << 30); +#define EFI_VA_END (-68 * (1UL << 30)) + +/* + * Scratch space used for switching the pagetable in the EFI stub + */ +struct efi_scratch { + u64 r15; + u64 prev_cr3; + pgd_t *efi_pgt; + bool use_pgd; + u64 phys_stack; +} __packed; + static void __init early_code_mapping_set_exec(int executable) { efi_memory_desc_t *md; @@ -65,6 +85,9 @@ void __init efi_call_phys_prelog(void) int pgd; int n_pgds; + if (!efi_enabled(EFI_OLD_MEMMAP)) + return; + early_code_mapping_set_exec(1); local_irq_save(efi_flags); @@ -86,6 +109,10 @@ void __init efi_call_phys_epilog(void) */ int pgd; int n_pgds = DIV_ROUND_UP((max_pfn << PAGE_SHIFT) , PGDIR_SIZE); + + if (!efi_enabled(EFI_OLD_MEMMAP)) + return; + for (pgd = 0; pgd < n_pgds; pgd++) set_pgd(pgd_offset_k(pgd * PGDIR_SIZE), save_pgd[pgd]); kfree(save_pgd); @@ -94,6 +121,158 @@ void __init efi_call_phys_epilog(void) early_code_mapping_set_exec(0); } +/* + * Add low kernel mappings for passing arguments to EFI functions. + */ +void efi_sync_low_kernel_mappings(void) +{ + unsigned num_pgds; + pgd_t *pgd = (pgd_t *)__va(real_mode_header->trampoline_pgd); + + if (efi_enabled(EFI_OLD_MEMMAP)) + return; + + num_pgds = pgd_index(MODULES_END - 1) - pgd_index(PAGE_OFFSET); + + memcpy(pgd + pgd_index(PAGE_OFFSET), + init_mm.pgd + pgd_index(PAGE_OFFSET), + sizeof(pgd_t) * num_pgds); +} + +int efi_setup_page_tables(unsigned long pa_memmap, unsigned num_pages) +{ + unsigned long text; + struct page *page; + unsigned npages; + pgd_t *pgd; + + if (efi_enabled(EFI_OLD_MEMMAP)) + return 0; + + efi_scratch.efi_pgt = (pgd_t *)(unsigned long)real_mode_header->trampoline_pgd; + pgd = __va(efi_scratch.efi_pgt); + + /* + * It can happen that the physical address of new_memmap lands in memory + * which is not mapped in the EFI page table. Therefore we need to go + * and ident-map those pages containing the map before calling + * phys_efi_set_virtual_address_map(). + */ + if (kernel_map_pages_in_pgd(pgd, pa_memmap, pa_memmap, num_pages, _PAGE_NX)) { + pr_err("Error ident-mapping new memmap (0x%lx)!\n", pa_memmap); + return 1; + } + + efi_scratch.use_pgd = true; + + /* + * When making calls to the firmware everything needs to be 1:1 + * mapped and addressable with 32-bit pointers. Map the kernel + * text and allocate a new stack because we can't rely on the + * stack pointer being < 4GB. + */ + if (!IS_ENABLED(CONFIG_EFI_MIXED)) + return 0; + + page = alloc_page(GFP_KERNEL|__GFP_DMA32); + if (!page) + panic("Unable to allocate EFI runtime stack < 4GB\n"); + + efi_scratch.phys_stack = virt_to_phys(page_address(page)); + efi_scratch.phys_stack += PAGE_SIZE; /* stack grows down */ + + npages = (_end - _text) >> PAGE_SHIFT; + text = __pa(_text); + + if (kernel_map_pages_in_pgd(pgd, text >> PAGE_SHIFT, text, npages, 0)) { + pr_err("Failed to map kernel text 1:1\n"); + return 1; + } + + return 0; +} + +void efi_cleanup_page_tables(unsigned long pa_memmap, unsigned num_pages) +{ + pgd_t *pgd = (pgd_t *)__va(real_mode_header->trampoline_pgd); + + kernel_unmap_pages_in_pgd(pgd, pa_memmap, num_pages); +} + +static void __init __map_region(efi_memory_desc_t *md, u64 va) +{ + pgd_t *pgd = (pgd_t *)__va(real_mode_header->trampoline_pgd); + unsigned long pf = 0; + + if (!(md->attribute & EFI_MEMORY_WB)) + pf |= _PAGE_PCD; + + if (kernel_map_pages_in_pgd(pgd, md->phys_addr, va, md->num_pages, pf)) + pr_warn("Error mapping PA 0x%llx -> VA 0x%llx!\n", + md->phys_addr, va); +} + +void __init efi_map_region(efi_memory_desc_t *md) +{ + unsigned long size = md->num_pages << PAGE_SHIFT; + u64 pa = md->phys_addr; + + if (efi_enabled(EFI_OLD_MEMMAP)) + return old_map_region(md); + + /* + * Make sure the 1:1 mappings are present as a catch-all for b0rked + * firmware which doesn't update all internal pointers after switching + * to virtual mode and would otherwise crap on us. + */ + __map_region(md, md->phys_addr); + + /* + * Enforce the 1:1 mapping as the default virtual address when + * booting in EFI mixed mode, because even though we may be + * running a 64-bit kernel, the firmware may only be 32-bit. + */ + if (!efi_is_native () && IS_ENABLED(CONFIG_EFI_MIXED)) { + md->virt_addr = md->phys_addr; + return; + } + + efi_va -= size; + + /* Is PA 2M-aligned? */ + if (!(pa & (PMD_SIZE - 1))) { + efi_va &= PMD_MASK; + } else { + u64 pa_offset = pa & (PMD_SIZE - 1); + u64 prev_va = efi_va; + + /* get us the same offset within this 2M page */ + efi_va = (efi_va & PMD_MASK) + pa_offset; + + if (efi_va > prev_va) + efi_va -= PMD_SIZE; + } + + if (efi_va < EFI_VA_END) { + pr_warn(FW_WARN "VA address range overflow!\n"); + return; + } + + /* Do the VA map */ + __map_region(md, efi_va); + md->virt_addr = efi_va; +} + +/* + * kexec kernel will use efi_map_region_fixed to map efi runtime memory ranges. + * md->virt_addr is the original virtual address which had been mapped in kexec + * 1st kernel. + */ +void __init efi_map_region_fixed(efi_memory_desc_t *md) +{ + __map_region(md, md->virt_addr); +} + void __iomem *__init efi_ioremap(unsigned long phys_addr, unsigned long size, u32 type, u64 attribute) { @@ -113,3 +292,313 @@ void __iomem *__init efi_ioremap(unsigned long phys_addr, unsigned long size, return (void __iomem *)__va(phys_addr); } + +void __init parse_efi_setup(u64 phys_addr, u32 data_len) +{ + efi_setup = phys_addr + sizeof(struct setup_data); +} + +void __init efi_runtime_mkexec(void) +{ + if (!efi_enabled(EFI_OLD_MEMMAP)) + return; + + if (__supported_pte_mask & _PAGE_NX) + runtime_code_page_mkexec(); +} + +void __init efi_dump_pagetable(void) +{ +#ifdef CONFIG_EFI_PGT_DUMP + pgd_t *pgd = (pgd_t *)__va(real_mode_header->trampoline_pgd); + + ptdump_walk_pgd_level(NULL, pgd); +#endif +} + +#ifdef CONFIG_EFI_MIXED +extern efi_status_t efi64_thunk(u32, ...); + +#define runtime_service32(func) \ +({ \ + u32 table = (u32)(unsigned long)efi.systab; \ + u32 *rt, *___f; \ + \ + rt = (u32 *)(table + offsetof(efi_system_table_32_t, runtime)); \ + ___f = (u32 *)(*rt + offsetof(efi_runtime_services_32_t, func)); \ + *___f; \ +}) + +/* + * Switch to the EFI page tables early so that we can access the 1:1 + * runtime services mappings which are not mapped in any other page + * tables. This function must be called before runtime_service32(). + * + * Also, disable interrupts because the IDT points to 64-bit handlers, + * which aren't going to function correctly when we switch to 32-bit. + */ +#define efi_thunk(f, ...) \ +({ \ + efi_status_t __s; \ + unsigned long flags; \ + u32 func; \ + \ + efi_sync_low_kernel_mappings(); \ + local_irq_save(flags); \ + \ + efi_scratch.prev_cr3 = read_cr3(); \ + write_cr3((unsigned long)efi_scratch.efi_pgt); \ + __flush_tlb_all(); \ + \ + func = runtime_service32(f); \ + __s = efi64_thunk(func, __VA_ARGS__); \ + \ + write_cr3(efi_scratch.prev_cr3); \ + __flush_tlb_all(); \ + local_irq_restore(flags); \ + \ + __s; \ +}) + +efi_status_t efi_thunk_set_virtual_address_map( + void *phys_set_virtual_address_map, + unsigned long memory_map_size, + unsigned long descriptor_size, + u32 descriptor_version, + efi_memory_desc_t *virtual_map) +{ + efi_status_t status; + unsigned long flags; + u32 func; + + efi_sync_low_kernel_mappings(); + local_irq_save(flags); + + efi_scratch.prev_cr3 = read_cr3(); + write_cr3((unsigned long)efi_scratch.efi_pgt); + __flush_tlb_all(); + + func = (u32)(unsigned long)phys_set_virtual_address_map; + status = efi64_thunk(func, memory_map_size, descriptor_size, + descriptor_version, virtual_map); + + write_cr3(efi_scratch.prev_cr3); + __flush_tlb_all(); + local_irq_restore(flags); + + return status; +} + +static efi_status_t efi_thunk_get_time(efi_time_t *tm, efi_time_cap_t *tc) +{ + efi_status_t status; + u32 phys_tm, phys_tc; + + spin_lock(&rtc_lock); + + phys_tm = virt_to_phys(tm); + phys_tc = virt_to_phys(tc); + + status = efi_thunk(get_time, phys_tm, phys_tc); + + spin_unlock(&rtc_lock); + + return status; +} + +static efi_status_t efi_thunk_set_time(efi_time_t *tm) +{ + efi_status_t status; + u32 phys_tm; + + spin_lock(&rtc_lock); + + phys_tm = virt_to_phys(tm); + + status = efi_thunk(set_time, phys_tm); + + spin_unlock(&rtc_lock); + + return status; +} + +static efi_status_t +efi_thunk_get_wakeup_time(efi_bool_t *enabled, efi_bool_t *pending, + efi_time_t *tm) +{ + efi_status_t status; + u32 phys_enabled, phys_pending, phys_tm; + + spin_lock(&rtc_lock); + + phys_enabled = virt_to_phys(enabled); + phys_pending = virt_to_phys(pending); + phys_tm = virt_to_phys(tm); + + status = efi_thunk(get_wakeup_time, phys_enabled, + phys_pending, phys_tm); + + spin_unlock(&rtc_lock); + + return status; +} + +static efi_status_t +efi_thunk_set_wakeup_time(efi_bool_t enabled, efi_time_t *tm) +{ + efi_status_t status; + u32 phys_tm; + + spin_lock(&rtc_lock); + + phys_tm = virt_to_phys(tm); + + status = efi_thunk(set_wakeup_time, enabled, phys_tm); + + spin_unlock(&rtc_lock); + + return status; +} + + +static efi_status_t +efi_thunk_get_variable(efi_char16_t *name, efi_guid_t *vendor, + u32 *attr, unsigned long *data_size, void *data) +{ + efi_status_t status; + u32 phys_name, phys_vendor, phys_attr; + u32 phys_data_size, phys_data; + + phys_data_size = virt_to_phys(data_size); + phys_vendor = virt_to_phys(vendor); + phys_name = virt_to_phys(name); + phys_attr = virt_to_phys(attr); + phys_data = virt_to_phys(data); + + status = efi_thunk(get_variable, phys_name, phys_vendor, + phys_attr, phys_data_size, phys_data); + + return status; +} + +static efi_status_t +efi_thunk_set_variable(efi_char16_t *name, efi_guid_t *vendor, + u32 attr, unsigned long data_size, void *data) +{ + u32 phys_name, phys_vendor, phys_data; + efi_status_t status; + + phys_name = virt_to_phys(name); + phys_vendor = virt_to_phys(vendor); + phys_data = virt_to_phys(data); + + /* If data_size is > sizeof(u32) we've got problems */ + status = efi_thunk(set_variable, phys_name, phys_vendor, + attr, data_size, phys_data); + + return status; +} + +static efi_status_t +efi_thunk_get_next_variable(unsigned long *name_size, + efi_char16_t *name, + efi_guid_t *vendor) +{ + efi_status_t status; + u32 phys_name_size, phys_name, phys_vendor; + + phys_name_size = virt_to_phys(name_size); + phys_vendor = virt_to_phys(vendor); + phys_name = virt_to_phys(name); + + status = efi_thunk(get_next_variable, phys_name_size, + phys_name, phys_vendor); + + return status; +} + +static efi_status_t +efi_thunk_get_next_high_mono_count(u32 *count) +{ + efi_status_t status; + u32 phys_count; + + phys_count = virt_to_phys(count); + status = efi_thunk(get_next_high_mono_count, phys_count); + + return status; +} + +static void +efi_thunk_reset_system(int reset_type, efi_status_t status, + unsigned long data_size, efi_char16_t *data) +{ + u32 phys_data; + + phys_data = virt_to_phys(data); + + efi_thunk(reset_system, reset_type, status, data_size, phys_data); +} + +static efi_status_t +efi_thunk_update_capsule(efi_capsule_header_t **capsules, + unsigned long count, unsigned long sg_list) +{ + /* + * To properly support this function we would need to repackage + * 'capsules' because the firmware doesn't understand 64-bit + * pointers. + */ + return EFI_UNSUPPORTED; +} + +static efi_status_t +efi_thunk_query_variable_info(u32 attr, u64 *storage_space, + u64 *remaining_space, + u64 *max_variable_size) +{ + efi_status_t status; + u32 phys_storage, phys_remaining, phys_max; + + if (efi.runtime_version < EFI_2_00_SYSTEM_TABLE_REVISION) + return EFI_UNSUPPORTED; + + phys_storage = virt_to_phys(storage_space); + phys_remaining = virt_to_phys(remaining_space); + phys_max = virt_to_phys(max_variable_size); + + status = efi_thunk(query_variable_info, attr, phys_storage, + phys_remaining, phys_max); + + return status; +} + +static efi_status_t +efi_thunk_query_capsule_caps(efi_capsule_header_t **capsules, + unsigned long count, u64 *max_size, + int *reset_type) +{ + /* + * To properly support this function we would need to repackage + * 'capsules' because the firmware doesn't understand 64-bit + * pointers. + */ + return EFI_UNSUPPORTED; +} + +void efi_thunk_runtime_setup(void) +{ + efi.get_time = efi_thunk_get_time; + efi.set_time = efi_thunk_set_time; + efi.get_wakeup_time = efi_thunk_get_wakeup_time; + efi.set_wakeup_time = efi_thunk_set_wakeup_time; + efi.get_variable = efi_thunk_get_variable; + efi.get_next_variable = efi_thunk_get_next_variable; + efi.set_variable = efi_thunk_set_variable; + efi.get_next_high_mono_count = efi_thunk_get_next_high_mono_count; + efi.reset_system = efi_thunk_reset_system; + efi.query_variable_info = efi_thunk_query_variable_info; + efi.update_capsule = efi_thunk_update_capsule; + efi.query_capsule_caps = efi_thunk_query_capsule_caps; +} +#endif /* CONFIG_EFI_MIXED */ diff --git a/arch/x86/platform/efi/efi_stub_64.S b/arch/x86/platform/efi/efi_stub_64.S index 4c07ccab814..5fcda727255 100644 --- a/arch/x86/platform/efi/efi_stub_64.S +++ b/arch/x86/platform/efi/efi_stub_64.S @@ -7,6 +7,10 @@ */ #include <linux/linkage.h> +#include <asm/segment.h> +#include <asm/msr.h> +#include <asm/processor-flags.h> +#include <asm/page_types.h> #define SAVE_XMM \ mov %rsp, %rax; \ @@ -34,72 +38,42 @@ mov %rsi, %cr0; \ mov (%rsp), %rsp -ENTRY(efi_call0) - SAVE_XMM - subq $32, %rsp - call *%rdi - addq $32, %rsp - RESTORE_XMM - ret -ENDPROC(efi_call0) + /* stolen from gcc */ + .macro FLUSH_TLB_ALL + movq %r15, efi_scratch(%rip) + movq %r14, efi_scratch+8(%rip) + movq %cr4, %r15 + movq %r15, %r14 + andb $0x7f, %r14b + movq %r14, %cr4 + movq %r15, %cr4 + movq efi_scratch+8(%rip), %r14 + movq efi_scratch(%rip), %r15 + .endm -ENTRY(efi_call1) - SAVE_XMM - subq $32, %rsp - mov %rsi, %rcx - call *%rdi - addq $32, %rsp - RESTORE_XMM - ret -ENDPROC(efi_call1) + .macro SWITCH_PGT + cmpb $0, efi_scratch+24(%rip) + je 1f + movq %r15, efi_scratch(%rip) # r15 + # save previous CR3 + movq %cr3, %r15 + movq %r15, efi_scratch+8(%rip) # prev_cr3 + movq efi_scratch+16(%rip), %r15 # EFI pgt + movq %r15, %cr3 + 1: + .endm -ENTRY(efi_call2) - SAVE_XMM - subq $32, %rsp - mov %rsi, %rcx - call *%rdi - addq $32, %rsp - RESTORE_XMM - ret -ENDPROC(efi_call2) + .macro RESTORE_PGT + cmpb $0, efi_scratch+24(%rip) + je 2f + movq efi_scratch+8(%rip), %r15 + movq %r15, %cr3 + movq efi_scratch(%rip), %r15 + FLUSH_TLB_ALL + 2: + .endm -ENTRY(efi_call3) - SAVE_XMM - subq $32, %rsp - mov %rcx, %r8 - mov %rsi, %rcx - call *%rdi - addq $32, %rsp - RESTORE_XMM - ret -ENDPROC(efi_call3) - -ENTRY(efi_call4) - SAVE_XMM - subq $32, %rsp - mov %r8, %r9 - mov %rcx, %r8 - mov %rsi, %rcx - call *%rdi - addq $32, %rsp - RESTORE_XMM - ret -ENDPROC(efi_call4) - -ENTRY(efi_call5) - SAVE_XMM - subq $48, %rsp - mov %r9, 32(%rsp) - mov %r8, %r9 - mov %rcx, %r8 - mov %rsi, %rcx - call *%rdi - addq $48, %rsp - RESTORE_XMM - ret -ENDPROC(efi_call5) - -ENTRY(efi_call6) +ENTRY(efi_call) SAVE_XMM mov (%rsp), %rax mov 8(%rax), %rax @@ -109,8 +83,177 @@ ENTRY(efi_call6) mov %r8, %r9 mov %rcx, %r8 mov %rsi, %rcx + SWITCH_PGT call *%rdi + RESTORE_PGT addq $48, %rsp RESTORE_XMM ret -ENDPROC(efi_call6) +ENDPROC(efi_call) + +#ifdef CONFIG_EFI_MIXED + +/* + * We run this function from the 1:1 mapping. + * + * This function must be invoked with a 1:1 mapped stack. + */ +ENTRY(__efi64_thunk) + movl %ds, %eax + push %rax + movl %es, %eax + push %rax + movl %ss, %eax + push %rax + + subq $32, %rsp + movl %esi, 0x0(%rsp) + movl %edx, 0x4(%rsp) + movl %ecx, 0x8(%rsp) + movq %r8, %rsi + movl %esi, 0xc(%rsp) + movq %r9, %rsi + movl %esi, 0x10(%rsp) + + sgdt save_gdt(%rip) + + leaq 1f(%rip), %rbx + movq %rbx, func_rt_ptr(%rip) + + /* Switch to gdt with 32-bit segments */ + movl 64(%rsp), %eax + lgdt (%rax) + + leaq efi_enter32(%rip), %rax + pushq $__KERNEL_CS + pushq %rax + lretq + +1: addq $32, %rsp + + lgdt save_gdt(%rip) + + pop %rbx + movl %ebx, %ss + pop %rbx + movl %ebx, %es + pop %rbx + movl %ebx, %ds + + /* + * Convert 32-bit status code into 64-bit. + */ + test %rax, %rax + jz 1f + movl %eax, %ecx + andl $0x0fffffff, %ecx + andl $0xf0000000, %eax + shl $32, %rax + or %rcx, %rax +1: + ret +ENDPROC(__efi64_thunk) + +ENTRY(efi_exit32) + movq func_rt_ptr(%rip), %rax + push %rax + mov %rdi, %rax + ret +ENDPROC(efi_exit32) + + .code32 +/* + * EFI service pointer must be in %edi. + * + * The stack should represent the 32-bit calling convention. + */ +ENTRY(efi_enter32) + movl $__KERNEL_DS, %eax + movl %eax, %ds + movl %eax, %es + movl %eax, %ss + + /* Reload pgtables */ + movl %cr3, %eax + movl %eax, %cr3 + + /* Disable paging */ + movl %cr0, %eax + btrl $X86_CR0_PG_BIT, %eax + movl %eax, %cr0 + + /* Disable long mode via EFER */ + movl $MSR_EFER, %ecx + rdmsr + btrl $_EFER_LME, %eax + wrmsr + + call *%edi + + /* We must preserve return value */ + movl %eax, %edi + + /* + * Some firmware will return with interrupts enabled. Be sure to + * disable them before we switch GDTs. + */ + cli + + movl 68(%esp), %eax + movl %eax, 2(%eax) + lgdtl (%eax) + + movl %cr4, %eax + btsl $(X86_CR4_PAE_BIT), %eax + movl %eax, %cr4 + + movl %cr3, %eax + movl %eax, %cr3 + + movl $MSR_EFER, %ecx + rdmsr + btsl $_EFER_LME, %eax + wrmsr + + xorl %eax, %eax + lldt %ax + + movl 72(%esp), %eax + pushl $__KERNEL_CS + pushl %eax + + /* Enable paging */ + movl %cr0, %eax + btsl $X86_CR0_PG_BIT, %eax + movl %eax, %cr0 + lret +ENDPROC(efi_enter32) + + .data + .balign 8 + .global efi32_boot_gdt +efi32_boot_gdt: .word 0 + .quad 0 + +save_gdt: .word 0 + .quad 0 +func_rt_ptr: .quad 0 + + .global efi_gdt64 +efi_gdt64: + .word efi_gdt64_end - efi_gdt64 + .long 0 /* Filled out by user */ + .word 0 + .quad 0x0000000000000000 /* NULL descriptor */ + .quad 0x00af9a000000ffff /* __KERNEL_CS */ + .quad 0x00cf92000000ffff /* __KERNEL_DS */ + .quad 0x0080890000000000 /* TS descriptor */ + .quad 0x0000000000000000 /* TS continued */ +efi_gdt64_end: +#endif /* CONFIG_EFI_MIXED */ + + .data +ENTRY(efi_scratch) + .fill 3,8,0 + .byte 0 + .quad 0 diff --git a/arch/x86/platform/efi/efi_thunk_64.S b/arch/x86/platform/efi/efi_thunk_64.S new file mode 100644 index 00000000000..8806fa73e6e --- /dev/null +++ b/arch/x86/platform/efi/efi_thunk_64.S @@ -0,0 +1,65 @@ +/* + * Copyright (C) 2014 Intel Corporation; author Matt Fleming + */ + +#include <linux/linkage.h> +#include <asm/page_types.h> + + .text + .code64 +ENTRY(efi64_thunk) + push %rbp + push %rbx + + /* + * Switch to 1:1 mapped 32-bit stack pointer. + */ + movq %rsp, efi_saved_sp(%rip) + movq efi_scratch+25(%rip), %rsp + + /* + * Calculate the physical address of the kernel text. + */ + movq $__START_KERNEL_map, %rax + subq phys_base(%rip), %rax + + /* + * Push some physical addresses onto the stack. This is easier + * to do now in a code64 section while the assembler can address + * 64-bit values. Note that all the addresses on the stack are + * 32-bit. + */ + subq $16, %rsp + leaq efi_exit32(%rip), %rbx + subq %rax, %rbx + movl %ebx, 8(%rsp) + leaq efi_gdt64(%rip), %rbx + subq %rax, %rbx + movl %ebx, 2(%ebx) + movl %ebx, 4(%rsp) + leaq efi_gdt32(%rip), %rbx + subq %rax, %rbx + movl %ebx, 2(%ebx) + movl %ebx, (%rsp) + + leaq __efi64_thunk(%rip), %rbx + subq %rax, %rbx + call *%rbx + + movq efi_saved_sp(%rip), %rsp + pop %rbx + pop %rbp + retq +ENDPROC(efi64_thunk) + + .data +efi_gdt32: + .word efi_gdt32_end - efi_gdt32 + .long 0 /* Filled out above */ + .word 0 + .quad 0x0000000000000000 /* NULL descriptor */ + .quad 0x00cf9a000000ffff /* __KERNEL_CS */ + .quad 0x00cf93000000ffff /* __KERNEL_DS */ +efi_gdt32_end: + +efi_saved_sp: .quad 0 diff --git a/arch/x86/platform/intel-mid/Makefile b/arch/x86/platform/intel-mid/Makefile index 01cc29ea5ff..0a8ee703b9f 100644 --- a/arch/x86/platform/intel-mid/Makefile +++ b/arch/x86/platform/intel-mid/Makefile @@ -1,6 +1,6 @@ -obj-$(CONFIG_X86_INTEL_MID) += intel-mid.o -obj-$(CONFIG_X86_INTEL_MID) += intel_mid_vrtc.o +obj-$(CONFIG_X86_INTEL_MID) += intel-mid.o intel_mid_vrtc.o mfld.o mrfl.o obj-$(CONFIG_EARLY_PRINTK_INTEL_MID) += early_printk_intel_mid.o + # SFI specific code ifdef CONFIG_X86_INTEL_MID obj-$(CONFIG_SFI) += sfi.o device_libs/ diff --git a/arch/x86/platform/intel-mid/device_libs/Makefile b/arch/x86/platform/intel-mid/device_libs/Makefile index 097e7a7940d..af9307f2cc2 100644 --- a/arch/x86/platform/intel-mid/device_libs/Makefile +++ b/arch/x86/platform/intel-mid/device_libs/Makefile @@ -20,3 +20,4 @@ obj-$(subst m,y,$(CONFIG_DRM_MEDFIELD)) += platform_tc35876x.o obj-$(subst m,y,$(CONFIG_SERIAL_MRST_MAX3110)) += platform_max3111.o # MISC Devices obj-$(subst m,y,$(CONFIG_KEYBOARD_GPIO)) += platform_gpio_keys.o +obj-$(subst m,y,$(CONFIG_INTEL_MID_WATCHDOG)) += platform_wdt.o diff --git a/arch/x86/platform/intel-mid/device_libs/platform_emc1403.c b/arch/x86/platform/intel-mid/device_libs/platform_emc1403.c index 0d942c1d26d..69a783689d2 100644 --- a/arch/x86/platform/intel-mid/device_libs/platform_emc1403.c +++ b/arch/x86/platform/intel-mid/device_libs/platform_emc1403.c @@ -22,7 +22,9 @@ static void __init *emc1403_platform_data(void *info) int intr = get_gpio_by_name("thermal_int"); int intr2nd = get_gpio_by_name("thermal_alert"); - if (intr == -1 || intr2nd == -1) + if (intr < 0) + return NULL; + if (intr2nd < 0) return NULL; i2c_info->irq = intr + INTEL_MID_IRQ_OFFSET; diff --git a/arch/x86/platform/intel-mid/device_libs/platform_gpio_keys.c b/arch/x86/platform/intel-mid/device_libs/platform_gpio_keys.c index a013a4834bb..dccae6b0413 100644 --- a/arch/x86/platform/intel-mid/device_libs/platform_gpio_keys.c +++ b/arch/x86/platform/intel-mid/device_libs/platform_gpio_keys.c @@ -66,7 +66,7 @@ static int __init pb_keys_init(void) gb[i].gpio = get_gpio_by_name(gb[i].desc); pr_debug("info[%2d]: name = %s, gpio = %d\n", i, gb[i].desc, gb[i].gpio); - if (gb[i].gpio == -1) + if (gb[i].gpio < 0) continue; if (i != good) diff --git a/arch/x86/platform/intel-mid/device_libs/platform_ipc.h b/arch/x86/platform/intel-mid/device_libs/platform_ipc.h index 8f568dd7960..79bb09d4f71 100644 --- a/arch/x86/platform/intel-mid/device_libs/platform_ipc.h +++ b/arch/x86/platform/intel-mid/device_libs/platform_ipc.h @@ -12,6 +12,7 @@ #ifndef _PLATFORM_IPC_H_ #define _PLATFORM_IPC_H_ -extern void __init ipc_device_handler(struct sfi_device_table_entry *pentry, - struct devs_id *dev) __attribute__((weak)); +void __init +ipc_device_handler(struct sfi_device_table_entry *pentry, struct devs_id *dev); + #endif diff --git a/arch/x86/platform/intel-mid/device_libs/platform_lis331.c b/arch/x86/platform/intel-mid/device_libs/platform_lis331.c index 15278c11f71..54226de7541 100644 --- a/arch/x86/platform/intel-mid/device_libs/platform_lis331.c +++ b/arch/x86/platform/intel-mid/device_libs/platform_lis331.c @@ -21,7 +21,9 @@ static void __init *lis331dl_platform_data(void *info) int intr = get_gpio_by_name("accel_int"); int intr2nd = get_gpio_by_name("accel_2"); - if (intr == -1 || intr2nd == -1) + if (intr < 0) + return NULL; + if (intr2nd < 0) return NULL; i2c_info->irq = intr + INTEL_MID_IRQ_OFFSET; diff --git a/arch/x86/platform/intel-mid/device_libs/platform_max7315.c b/arch/x86/platform/intel-mid/device_libs/platform_max7315.c index 94ade10024a..2c8acbc1e9a 100644 --- a/arch/x86/platform/intel-mid/device_libs/platform_max7315.c +++ b/arch/x86/platform/intel-mid/device_libs/platform_max7315.c @@ -48,7 +48,7 @@ static void __init *max7315_platform_data(void *info) gpio_base = get_gpio_by_name(base_pin_name); intr = get_gpio_by_name(intr_pin_name); - if (gpio_base == -1) + if (gpio_base < 0) return NULL; max7315->gpio_base = gpio_base; if (intr != -1) { diff --git a/arch/x86/platform/intel-mid/device_libs/platform_mpu3050.c b/arch/x86/platform/intel-mid/device_libs/platform_mpu3050.c index dd28d63c84f..cfe9a47a1e8 100644 --- a/arch/x86/platform/intel-mid/device_libs/platform_mpu3050.c +++ b/arch/x86/platform/intel-mid/device_libs/platform_mpu3050.c @@ -19,7 +19,7 @@ static void *mpu3050_platform_data(void *info) struct i2c_board_info *i2c_info = info; int intr = get_gpio_by_name("mpu3050_int"); - if (intr == -1) + if (intr < 0) return NULL; i2c_info->irq = intr + INTEL_MID_IRQ_OFFSET; diff --git a/arch/x86/platform/intel-mid/device_libs/platform_msic.h b/arch/x86/platform/intel-mid/device_libs/platform_msic.h index 917eb56d77d..b7be1d041da 100644 --- a/arch/x86/platform/intel-mid/device_libs/platform_msic.h +++ b/arch/x86/platform/intel-mid/device_libs/platform_msic.h @@ -14,6 +14,6 @@ extern struct intel_msic_platform_data msic_pdata; -extern void *msic_generic_platform_data(void *info, - enum intel_msic_block block) __attribute__((weak)); +void *msic_generic_platform_data(void *info, enum intel_msic_block block); + #endif diff --git a/arch/x86/platform/intel-mid/device_libs/platform_pmic_gpio.c b/arch/x86/platform/intel-mid/device_libs/platform_pmic_gpio.c index d87182a0926..65c2a9a19db 100644 --- a/arch/x86/platform/intel-mid/device_libs/platform_pmic_gpio.c +++ b/arch/x86/platform/intel-mid/device_libs/platform_pmic_gpio.c @@ -26,7 +26,7 @@ static void __init *pmic_gpio_platform_data(void *info) static struct intel_pmic_gpio_platform_data pmic_gpio_pdata; int gpio_base = get_gpio_by_name("pmic_gpio_base"); - if (gpio_base == -1) + if (gpio_base < 0) gpio_base = 64; pmic_gpio_pdata.gpio_base = gpio_base; pmic_gpio_pdata.irq_base = gpio_base + INTEL_MID_IRQ_OFFSET; diff --git a/arch/x86/platform/intel-mid/device_libs/platform_tca6416.c b/arch/x86/platform/intel-mid/device_libs/platform_tca6416.c index 22881c9a673..33be0b3be6e 100644 --- a/arch/x86/platform/intel-mid/device_libs/platform_tca6416.c +++ b/arch/x86/platform/intel-mid/device_libs/platform_tca6416.c @@ -34,10 +34,10 @@ static void *tca6416_platform_data(void *info) gpio_base = get_gpio_by_name(base_pin_name); intr = get_gpio_by_name(intr_pin_name); - if (gpio_base == -1) + if (gpio_base < 0) return NULL; tca6416.gpio_base = gpio_base; - if (intr != -1) { + if (intr >= 0) { i2c_info->irq = intr + INTEL_MID_IRQ_OFFSET; tca6416.irq_base = gpio_base + INTEL_MID_IRQ_OFFSET; } else { diff --git a/arch/x86/platform/intel-mid/device_libs/platform_wdt.c b/arch/x86/platform/intel-mid/device_libs/platform_wdt.c new file mode 100644 index 00000000000..973cf3bfa9f --- /dev/null +++ b/arch/x86/platform/intel-mid/device_libs/platform_wdt.c @@ -0,0 +1,72 @@ +/* + * platform_wdt.c: Watchdog platform library file + * + * (C) Copyright 2014 Intel Corporation + * Author: David Cohen <david.a.cohen@linux.intel.com> + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; version 2 + * of the License. + */ + +#include <linux/init.h> +#include <linux/interrupt.h> +#include <linux/platform_device.h> +#include <linux/platform_data/intel-mid_wdt.h> +#include <asm/intel-mid.h> +#include <asm/io_apic.h> + +#define TANGIER_EXT_TIMER0_MSI 15 + +static struct platform_device wdt_dev = { + .name = "intel_mid_wdt", + .id = -1, +}; + +static int tangier_probe(struct platform_device *pdev) +{ + int ioapic; + int irq; + struct intel_mid_wdt_pdata *pdata = pdev->dev.platform_data; + struct io_apic_irq_attr irq_attr = { 0 }; + + if (!pdata) + return -EINVAL; + + irq = pdata->irq; + ioapic = mp_find_ioapic(irq); + if (ioapic >= 0) { + int ret; + irq_attr.ioapic = ioapic; + irq_attr.ioapic_pin = irq; + irq_attr.trigger = 1; + /* irq_attr.polarity = 0; -> Active high */ + ret = io_apic_set_pci_routing(NULL, irq, &irq_attr); + if (ret) + return ret; + } else { + dev_warn(&pdev->dev, "cannot find interrupt %d in ioapic\n", + irq); + return -EINVAL; + } + + return 0; +} + +static struct intel_mid_wdt_pdata tangier_pdata = { + .irq = TANGIER_EXT_TIMER0_MSI, + .probe = tangier_probe, +}; + +static int __init register_mid_wdt(void) +{ + if (intel_mid_identify_cpu() == INTEL_MID_CPU_CHIP_TANGIER) { + wdt_dev.dev.platform_data = &tangier_pdata; + return platform_device_register(&wdt_dev); + } + + return -ENODEV; +} + +rootfs_initcall(register_mid_wdt); diff --git a/arch/x86/platform/intel-mid/early_printk_intel_mid.c b/arch/x86/platform/intel-mid/early_printk_intel_mid.c index 4f702f554f6..e0bd082a80e 100644 --- a/arch/x86/platform/intel-mid/early_printk_intel_mid.c +++ b/arch/x86/platform/intel-mid/early_printk_intel_mid.c @@ -22,7 +22,6 @@ #include <linux/console.h> #include <linux/kernel.h> #include <linux/delay.h> -#include <linux/init.h> #include <linux/io.h> #include <asm/fixmap.h> diff --git a/arch/x86/platform/intel-mid/intel-mid.c b/arch/x86/platform/intel-mid/intel-mid.c index f90e290f689..1bbedc4b0f8 100644 --- a/arch/x86/platform/intel-mid/intel-mid.c +++ b/arch/x86/platform/intel-mid/intel-mid.c @@ -35,6 +35,8 @@ #include <asm/apb_timer.h> #include <asm/reboot.h> +#include "intel_mid_weak_decls.h" + /* * the clockevent devices on Moorestown/Medfield can be APBT or LAPIC clock, * cmdline option x86_intel_mid_timer can be used to override the configuration @@ -58,12 +60,16 @@ enum intel_mid_timer_options intel_mid_timer_options; +/* intel_mid_ops to store sub arch ops */ +struct intel_mid_ops *intel_mid_ops; +/* getter function for sub arch ops*/ +static void *(*get_intel_mid_ops[])(void) = INTEL_MID_OPS_INIT; enum intel_mid_cpu_type __intel_mid_cpu_chip; EXPORT_SYMBOL_GPL(__intel_mid_cpu_chip); static void intel_mid_power_off(void) { -} +}; static void intel_mid_reboot(void) { @@ -72,32 +78,6 @@ static void intel_mid_reboot(void) static unsigned long __init intel_mid_calibrate_tsc(void) { - unsigned long fast_calibrate; - u32 lo, hi, ratio, fsb; - - rdmsr(MSR_IA32_PERF_STATUS, lo, hi); - pr_debug("IA32 perf status is 0x%x, 0x%0x\n", lo, hi); - ratio = (hi >> 8) & 0x1f; - pr_debug("ratio is %d\n", ratio); - if (!ratio) { - pr_err("read a zero ratio, should be incorrect!\n"); - pr_err("force tsc ratio to 16 ...\n"); - ratio = 16; - } - rdmsr(MSR_FSB_FREQ, lo, hi); - if ((lo & 0x7) == 0x7) - fsb = PENWELL_FSB_FREQ_83SKU; - else - fsb = PENWELL_FSB_FREQ_100SKU; - fast_calibrate = ratio * fsb; - pr_debug("read penwell tsc %lu khz\n", fast_calibrate); - lapic_timer_frequency = fsb * 1000 / HZ; - /* mark tsc clocksource as reliable */ - set_cpu_cap(&boot_cpu_data, X86_FEATURE_TSC_RELIABLE); - - if (fast_calibrate) - return fast_calibrate; - return 0; } @@ -125,13 +105,37 @@ static void __init intel_mid_time_init(void) static void intel_mid_arch_setup(void) { - if (boot_cpu_data.x86 == 6 && boot_cpu_data.x86_model == 0x27) - __intel_mid_cpu_chip = INTEL_MID_CPU_CHIP_PENWELL; - else { + if (boot_cpu_data.x86 != 6) { pr_err("Unknown Intel MID CPU (%d:%d), default to Penwell\n", boot_cpu_data.x86, boot_cpu_data.x86_model); __intel_mid_cpu_chip = INTEL_MID_CPU_CHIP_PENWELL; + goto out; } + + switch (boot_cpu_data.x86_model) { + case 0x35: + __intel_mid_cpu_chip = INTEL_MID_CPU_CHIP_CLOVERVIEW; + break; + case 0x3C: + case 0x4A: + __intel_mid_cpu_chip = INTEL_MID_CPU_CHIP_TANGIER; + break; + case 0x27: + default: + __intel_mid_cpu_chip = INTEL_MID_CPU_CHIP_PENWELL; + break; + } + + if (__intel_mid_cpu_chip < MAX_CPU_OPS(get_intel_mid_ops)) + intel_mid_ops = get_intel_mid_ops[__intel_mid_cpu_chip](); + else { + intel_mid_ops = get_intel_mid_ops[INTEL_MID_CPU_CHIP_PENWELL](); + pr_info("ARCH: Uknown SoC, assuming PENWELL!\n"); + } + +out: + if (intel_mid_ops->arch_setup) + intel_mid_ops->arch_setup(); } /* MID systems don't have i8042 controller */ diff --git a/arch/x86/platform/intel-mid/intel_mid_weak_decls.h b/arch/x86/platform/intel-mid/intel_mid_weak_decls.h new file mode 100644 index 00000000000..46aa25c8ce0 --- /dev/null +++ b/arch/x86/platform/intel-mid/intel_mid_weak_decls.h @@ -0,0 +1,19 @@ +/* + * intel_mid_weak_decls.h: Weak declarations of intel-mid.c + * + * (C) Copyright 2013 Intel Corporation + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; version 2 + * of the License. + */ + + +/* __attribute__((weak)) makes these declarations overridable */ +/* For every CPU addition a new get_<cpuname>_ops interface needs + * to be added. + */ +extern void *get_penwell_ops(void) __attribute__((weak)); +extern void *get_cloverview_ops(void) __attribute__((weak)); +extern void *get_tangier_ops(void) __attribute__((weak)); diff --git a/arch/x86/platform/intel-mid/mfld.c b/arch/x86/platform/intel-mid/mfld.c new file mode 100644 index 00000000000..23381d2174a --- /dev/null +++ b/arch/x86/platform/intel-mid/mfld.c @@ -0,0 +1,75 @@ +/* + * mfld.c: Intel Medfield platform setup code + * + * (C) Copyright 2013 Intel Corporation + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; version 2 + * of the License. + */ + +#include <linux/init.h> + +#include <asm/apic.h> +#include <asm/intel-mid.h> +#include <asm/intel_mid_vrtc.h> + +#include "intel_mid_weak_decls.h" + +static void penwell_arch_setup(void); +/* penwell arch ops */ +static struct intel_mid_ops penwell_ops = { + .arch_setup = penwell_arch_setup, +}; + +static void mfld_power_off(void) +{ +} + +static unsigned long __init mfld_calibrate_tsc(void) +{ + unsigned long fast_calibrate; + u32 lo, hi, ratio, fsb; + + rdmsr(MSR_IA32_PERF_STATUS, lo, hi); + pr_debug("IA32 perf status is 0x%x, 0x%0x\n", lo, hi); + ratio = (hi >> 8) & 0x1f; + pr_debug("ratio is %d\n", ratio); + if (!ratio) { + pr_err("read a zero ratio, should be incorrect!\n"); + pr_err("force tsc ratio to 16 ...\n"); + ratio = 16; + } + rdmsr(MSR_FSB_FREQ, lo, hi); + if ((lo & 0x7) == 0x7) + fsb = FSB_FREQ_83SKU; + else + fsb = FSB_FREQ_100SKU; + fast_calibrate = ratio * fsb; + pr_debug("read penwell tsc %lu khz\n", fast_calibrate); + lapic_timer_frequency = fsb * 1000 / HZ; + /* mark tsc clocksource as reliable */ + set_cpu_cap(&boot_cpu_data, X86_FEATURE_TSC_RELIABLE); + + if (fast_calibrate) + return fast_calibrate; + + return 0; +} + +static void __init penwell_arch_setup(void) +{ + x86_platform.calibrate_tsc = mfld_calibrate_tsc; + pm_power_off = mfld_power_off; +} + +void *get_penwell_ops(void) +{ + return &penwell_ops; +} + +void *get_cloverview_ops(void) +{ + return &penwell_ops; +} diff --git a/arch/x86/platform/intel-mid/mrfl.c b/arch/x86/platform/intel-mid/mrfl.c new file mode 100644 index 00000000000..aaca91753d3 --- /dev/null +++ b/arch/x86/platform/intel-mid/mrfl.c @@ -0,0 +1,103 @@ +/* + * mrfl.c: Intel Merrifield platform specific setup code + * + * (C) Copyright 2013 Intel Corporation + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; version 2 + * of the License. + */ + +#include <linux/init.h> + +#include <asm/apic.h> +#include <asm/intel-mid.h> + +#include "intel_mid_weak_decls.h" + +static unsigned long __init tangier_calibrate_tsc(void) +{ + unsigned long fast_calibrate; + u32 lo, hi, ratio, fsb, bus_freq; + + /* *********************** */ + /* Compute TSC:Ratio * FSB */ + /* *********************** */ + + /* Compute Ratio */ + rdmsr(MSR_PLATFORM_INFO, lo, hi); + pr_debug("IA32 PLATFORM_INFO is 0x%x : %x\n", hi, lo); + + ratio = (lo >> 8) & 0xFF; + pr_debug("ratio is %d\n", ratio); + if (!ratio) { + pr_err("Read a zero ratio, force tsc ratio to 4 ...\n"); + ratio = 4; + } + + /* Compute FSB */ + rdmsr(MSR_FSB_FREQ, lo, hi); + pr_debug("Actual FSB frequency detected by SOC 0x%x : %x\n", + hi, lo); + + bus_freq = lo & 0x7; + pr_debug("bus_freq = 0x%x\n", bus_freq); + + if (bus_freq == 0) + fsb = FSB_FREQ_100SKU; + else if (bus_freq == 1) + fsb = FSB_FREQ_100SKU; + else if (bus_freq == 2) + fsb = FSB_FREQ_133SKU; + else if (bus_freq == 3) + fsb = FSB_FREQ_167SKU; + else if (bus_freq == 4) + fsb = FSB_FREQ_83SKU; + else if (bus_freq == 5) + fsb = FSB_FREQ_400SKU; + else if (bus_freq == 6) + fsb = FSB_FREQ_267SKU; + else if (bus_freq == 7) + fsb = FSB_FREQ_333SKU; + else { + BUG(); + pr_err("Invalid bus_freq! Setting to minimal value!\n"); + fsb = FSB_FREQ_100SKU; + } + + /* TSC = FSB Freq * Resolved HFM Ratio */ + fast_calibrate = ratio * fsb; + pr_debug("calculate tangier tsc %lu KHz\n", fast_calibrate); + + /* ************************************ */ + /* Calculate Local APIC Timer Frequency */ + /* ************************************ */ + lapic_timer_frequency = (fsb * 1000) / HZ; + + pr_debug("Setting lapic_timer_frequency = %d\n", + lapic_timer_frequency); + + /* mark tsc clocksource as reliable */ + set_cpu_cap(&boot_cpu_data, X86_FEATURE_TSC_RELIABLE); + + if (fast_calibrate) + return fast_calibrate; + + return 0; +} + +static void __init tangier_arch_setup(void) +{ + x86_platform.calibrate_tsc = tangier_calibrate_tsc; +} + +/* tangier arch ops */ +static struct intel_mid_ops tangier_ops = { + .arch_setup = tangier_arch_setup, +}; + +void *get_tangier_ops(void) +{ + return &tangier_ops; +} diff --git a/arch/x86/platform/intel-mid/sfi.c b/arch/x86/platform/intel-mid/sfi.c index c84c1ca396b..994c40bd7cb 100644 --- a/arch/x86/platform/intel-mid/sfi.c +++ b/arch/x86/platform/intel-mid/sfi.c @@ -224,7 +224,7 @@ int get_gpio_by_name(const char *name) if (!strncmp(name, pentry->pin_name, SFI_NAME_LEN)) return pentry->pin_no; } - return -1; + return -EINVAL; } void __init intel_scu_device_register(struct platform_device *pdev) @@ -250,7 +250,7 @@ static void __init intel_scu_spi_device_register(struct spi_board_info *sdev) sdev->modalias); return; } - memcpy(new_dev, sdev, sizeof(*sdev)); + *new_dev = *sdev; spi_devs[spi_next_dev++] = new_dev; } @@ -271,7 +271,7 @@ static void __init intel_scu_i2c_device_register(int bus, idev->type); return; } - memcpy(new_dev, idev, sizeof(*idev)); + *new_dev = *idev; i2c_bus[i2c_next_dev] = bus; i2c_devs[i2c_next_dev++] = new_dev; @@ -337,6 +337,8 @@ static void __init sfi_handle_ipc_dev(struct sfi_device_table_entry *pentry, pr_debug("IPC bus, name = %16.16s, irq = 0x%2x\n", pentry->name, pentry->irq); pdata = intel_mid_sfi_get_pdata(dev, pentry); + if (IS_ERR(pdata)) + return; pdev = platform_device_alloc(pentry->name, 0); if (pdev == NULL) { @@ -370,6 +372,8 @@ static void __init sfi_handle_spi_dev(struct sfi_device_table_entry *pentry, spi_info.chip_select); pdata = intel_mid_sfi_get_pdata(dev, &spi_info); + if (IS_ERR(pdata)) + return; spi_info.platform_data = pdata; if (dev->delay) @@ -395,6 +399,8 @@ static void __init sfi_handle_i2c_dev(struct sfi_device_table_entry *pentry, i2c_info.addr); pdata = intel_mid_sfi_get_pdata(dev, &i2c_info); i2c_info.platform_data = pdata; + if (IS_ERR(pdata)) + return; if (dev->delay) intel_scu_i2c_device_register(pentry->host_num, &i2c_info); @@ -443,13 +449,35 @@ static int __init sfi_parse_devs(struct sfi_table_header *table) * so we have to enable them one by one here */ ioapic = mp_find_ioapic(irq); - irq_attr.ioapic = ioapic; - irq_attr.ioapic_pin = irq; - irq_attr.trigger = 1; - irq_attr.polarity = 1; - io_apic_set_pci_routing(NULL, irq, &irq_attr); - } else + if (ioapic >= 0) { + irq_attr.ioapic = ioapic; + irq_attr.ioapic_pin = irq; + irq_attr.trigger = 1; + if (intel_mid_identify_cpu() == + INTEL_MID_CPU_CHIP_TANGIER) { + if (!strncmp(pentry->name, + "r69001-ts-i2c", 13)) + /* active low */ + irq_attr.polarity = 1; + else if (!strncmp(pentry->name, + "synaptics_3202", 14)) + /* active low */ + irq_attr.polarity = 1; + else if (irq == 41) + /* fast_int_1 */ + irq_attr.polarity = 1; + else + /* active high */ + irq_attr.polarity = 0; + } else { + /* PNW and CLV go with active low */ + irq_attr.polarity = 1; + } + io_apic_set_pci_routing(NULL, irq, &irq_attr); + } + } else { irq = 0; /* No irq */ + } dev = get_device_id(pentry->type, pentry->name); diff --git a/arch/x86/platform/iris/iris.c b/arch/x86/platform/iris/iris.c index e6cb80f620a..4d171e8640e 100644 --- a/arch/x86/platform/iris/iris.c +++ b/arch/x86/platform/iris/iris.c @@ -27,7 +27,6 @@ #include <linux/kernel.h> #include <linux/errno.h> #include <linux/delay.h> -#include <linux/init.h> #include <linux/pm.h> #include <asm/io.h> diff --git a/arch/x86/platform/olpc/olpc-xo1-pm.c b/arch/x86/platform/olpc/olpc-xo1-pm.c index ff0174dda81..a9acde72d4e 100644 --- a/arch/x86/platform/olpc/olpc-xo1-pm.c +++ b/arch/x86/platform/olpc/olpc-xo1-pm.c @@ -75,7 +75,7 @@ static int xo1_power_state_enter(suspend_state_t pm_state) return 0; } -asmlinkage int xo1_do_sleep(u8 sleep_state) +asmlinkage __visible int xo1_do_sleep(u8 sleep_state) { void *pgd_addr = __va(read_cr3()); diff --git a/arch/x86/platform/olpc/olpc-xo15-sci.c b/arch/x86/platform/olpc/olpc-xo15-sci.c index 649a12befba..08e350e757d 100644 --- a/arch/x86/platform/olpc/olpc-xo15-sci.c +++ b/arch/x86/platform/olpc/olpc-xo15-sci.c @@ -15,8 +15,7 @@ #include <linux/power_supply.h> #include <linux/olpc-ec.h> -#include <acpi/acpi_bus.h> -#include <acpi/acpi_drivers.h> +#include <linux/acpi.h> #include <asm/olpc.h> #define DRV_NAME "olpc-xo15-sci" diff --git a/arch/x86/platform/ts5500/ts5500.c b/arch/x86/platform/ts5500/ts5500.c index 39febb214e8..9471b9456f2 100644 --- a/arch/x86/platform/ts5500/ts5500.c +++ b/arch/x86/platform/ts5500/ts5500.c @@ -88,7 +88,7 @@ struct ts5500_sbc { static const struct { const char * const string; const ssize_t offset; -} ts5500_signatures[] __initdata = { +} ts5500_signatures[] __initconst = { { "TS-5x00 AMD Elan", 0xb14 }, }; diff --git a/arch/x86/platform/uv/bios_uv.c b/arch/x86/platform/uv/bios_uv.c index 766612137a6..1584cbed0dc 100644 --- a/arch/x86/platform/uv/bios_uv.c +++ b/arch/x86/platform/uv/bios_uv.c @@ -39,7 +39,7 @@ s64 uv_bios_call(enum uv_bios_cmd which, u64 a1, u64 a2, u64 a3, u64 a4, u64 a5) */ return BIOS_STATUS_UNIMPLEMENTED; - ret = efi_call6((void *)__va(tab->function), (u64)which, + ret = efi_call((void *)__va(tab->function), (u64)which, a1, a2, a3, a4, a5); return ret; } diff --git a/arch/x86/platform/uv/tlb_uv.c b/arch/x86/platform/uv/tlb_uv.c index 0f92173a12b..dfe605ac1bc 100644 --- a/arch/x86/platform/uv/tlb_uv.c +++ b/arch/x86/platform/uv/tlb_uv.c @@ -433,15 +433,49 @@ static void reset_with_ipi(struct pnmask *distribution, struct bau_control *bcp) return; } -static inline unsigned long cycles_2_us(unsigned long long cyc) +/* + * Not to be confused with cycles_2_ns() from tsc.c; this gives a relative + * number, not an absolute. It converts a duration in cycles to a duration in + * ns. + */ +static inline unsigned long long cycles_2_ns(unsigned long long cyc) { + struct cyc2ns_data *data = cyc2ns_read_begin(); unsigned long long ns; - unsigned long us; - int cpu = smp_processor_id(); - ns = (cyc * per_cpu(cyc2ns, cpu)) >> CYC2NS_SCALE_FACTOR; - us = ns / 1000; - return us; + ns = mul_u64_u32_shr(cyc, data->cyc2ns_mul, data->cyc2ns_shift); + + cyc2ns_read_end(data); + return ns; +} + +/* + * The reverse of the above; converts a duration in ns to a duration in cycles. + */ +static inline unsigned long long ns_2_cycles(unsigned long long ns) +{ + struct cyc2ns_data *data = cyc2ns_read_begin(); + unsigned long long cyc; + + cyc = (ns << data->cyc2ns_shift) / data->cyc2ns_mul; + + cyc2ns_read_end(data); + return cyc; +} + +static inline unsigned long cycles_2_us(unsigned long long cyc) +{ + return cycles_2_ns(cyc) / NSEC_PER_USEC; +} + +static inline cycles_t sec_2_cycles(unsigned long sec) +{ + return ns_2_cycles(sec * NSEC_PER_SEC); +} + +static inline unsigned long long usec_2_cycles(unsigned long usec) +{ + return ns_2_cycles(usec * NSEC_PER_USEC); } /* @@ -668,16 +702,6 @@ static int wait_completion(struct bau_desc *bau_desc, bcp, try); } -static inline cycles_t sec_2_cycles(unsigned long sec) -{ - unsigned long ns; - cycles_t cyc; - - ns = sec * 1000000000; - cyc = (ns << CYC2NS_SCALE_FACTOR)/(per_cpu(cyc2ns, smp_processor_id())); - return cyc; -} - /* * Our retries are blocked by all destination sw ack resources being * in use, and a timeout is pending. In that case hardware immediately @@ -1070,12 +1094,13 @@ const struct cpumask *uv_flush_tlb_others(const struct cpumask *cpumask, unsigned long status; bcp = &per_cpu(bau_control, cpu); - stat = bcp->statp; - stat->s_enters++; if (bcp->nobau) return cpumask; + stat = bcp->statp; + stat->s_enters++; + if (bcp->busy) { descriptor_status = read_lmmr(UVH_LB_BAU_SB_ACTIVATION_STATUS_0); @@ -1326,16 +1351,6 @@ static void ptc_seq_stop(struct seq_file *file, void *data) { } -static inline unsigned long long usec_2_cycles(unsigned long microsec) -{ - unsigned long ns; - unsigned long long cyc; - - ns = microsec * 1000; - cyc = (ns << CYC2NS_SCALE_FACTOR)/(per_cpu(cyc2ns, smp_processor_id())); - return cyc; -} - /* * Display the statistics thru /proc/sgi_uv/ptc_statistics * 'data' points to the cpu number diff --git a/arch/x86/platform/uv/uv_irq.c b/arch/x86/platform/uv/uv_irq.c index acf7752da95..b233681af4d 100644 --- a/arch/x86/platform/uv/uv_irq.c +++ b/arch/x86/platform/uv/uv_irq.c @@ -238,11 +238,9 @@ uv_set_irq_affinity(struct irq_data *data, const struct cpumask *mask, int uv_setup_irq(char *irq_name, int cpu, int mmr_blade, unsigned long mmr_offset, int limit) { - int irq, ret; + int ret, irq = irq_alloc_hwirq(uv_blade_to_memory_nid(mmr_blade)); - irq = create_irq_nr(NR_IRQS_LEGACY, uv_blade_to_memory_nid(mmr_blade)); - - if (irq <= 0) + if (!irq) return -EBUSY; ret = arch_enable_uv_irq(irq_name, irq, cpu, mmr_blade, mmr_offset, @@ -250,7 +248,7 @@ int uv_setup_irq(char *irq_name, int cpu, int mmr_blade, if (ret == irq) uv_set_irq_2_mmr_info(irq, mmr_offset, mmr_blade); else - destroy_irq(irq); + irq_free_hwirq(irq); return ret; } @@ -285,6 +283,6 @@ void uv_teardown_irq(unsigned int irq) n = n->rb_right; } spin_unlock_irqrestore(&uv_irq_lock, irqflags); - destroy_irq(irq); + irq_free_hwirq(irq); } EXPORT_SYMBOL_GPL(uv_teardown_irq); diff --git a/arch/x86/platform/uv/uv_nmi.c b/arch/x86/platform/uv/uv_nmi.c index 8eeccba7313..c89c93320c1 100644 --- a/arch/x86/platform/uv/uv_nmi.c +++ b/arch/x86/platform/uv/uv_nmi.c @@ -74,7 +74,6 @@ static atomic_t uv_in_nmi; static atomic_t uv_nmi_cpu = ATOMIC_INIT(-1); static atomic_t uv_nmi_cpus_in_nmi = ATOMIC_INIT(-1); static atomic_t uv_nmi_slave_continue; -static atomic_t uv_nmi_kexec_failed; static cpumask_var_t uv_nmi_cpu_mask; /* Values for uv_nmi_slave_continue */ @@ -86,7 +85,7 @@ static cpumask_var_t uv_nmi_cpu_mask; * Default is all stack dumps go to the console and buffer. * Lower level to send to log buffer only. */ -static int uv_nmi_loglevel = 7; +static int uv_nmi_loglevel = CONSOLE_LOGLEVEL_DEFAULT; module_param_named(dump_loglevel, uv_nmi_loglevel, int, 0644); /* @@ -149,7 +148,8 @@ module_param_named(retry_count, uv_nmi_retry_count, int, 0644); * "dump" - dump process stack for each cpu * "ips" - dump IP info for each cpu * "kdump" - do crash dump - * "kdb" - enter KDB/KGDB (default) + * "kdb" - enter KDB (default) + * "kgdb" - enter KGDB */ static char uv_nmi_action[8] = "kdb"; module_param_string(action, uv_nmi_action, sizeof(uv_nmi_action), 0644); @@ -504,6 +504,7 @@ static void uv_nmi_touch_watchdogs(void) } #if defined(CONFIG_KEXEC) +static atomic_t uv_nmi_kexec_failed; static void uv_nmi_kdump(int cpu, int master, struct pt_regs *regs) { /* Call crash to dump system state */ @@ -537,18 +538,45 @@ static inline void uv_nmi_kdump(int cpu, int master, struct pt_regs *regs) } #endif /* !CONFIG_KEXEC */ +#ifdef CONFIG_KGDB #ifdef CONFIG_KGDB_KDB -/* Call KDB from NMI handler */ -static void uv_call_kdb(int cpu, struct pt_regs *regs, int master) +static inline int uv_nmi_kdb_reason(void) { - int ret; + return KDB_REASON_SYSTEM_NMI; +} +#else /* !CONFIG_KGDB_KDB */ +static inline int uv_nmi_kdb_reason(void) +{ + /* Insure user is expecting to attach gdb remote */ + if (uv_nmi_action_is("kgdb")) + return 0; + + pr_err("UV: NMI error: KDB is not enabled in this kernel\n"); + return -1; +} +#endif /* CONFIG_KGDB_KDB */ +/* + * Call KGDB/KDB from NMI handler + * + * Note that if both KGDB and KDB are configured, then the action of 'kgdb' or + * 'kdb' has no affect on which is used. See the KGDB documention for further + * information. + */ +static void uv_call_kgdb_kdb(int cpu, struct pt_regs *regs, int master) +{ if (master) { + int reason = uv_nmi_kdb_reason(); + int ret; + + if (reason < 0) + return; + /* call KGDB NMI handler as MASTER */ - ret = kgdb_nmicallin(cpu, X86_TRAP_NMI, regs, - &uv_nmi_slave_continue); + ret = kgdb_nmicallin(cpu, X86_TRAP_NMI, regs, reason, + &uv_nmi_slave_continue); if (ret) { - pr_alert("KDB returned error, is kgdboc set?\n"); + pr_alert("KGDB returned error, is kgdboc set?\n"); atomic_set(&uv_nmi_slave_continue, SLAVE_EXIT); } } else { @@ -567,12 +595,12 @@ static void uv_call_kdb(int cpu, struct pt_regs *regs, int master) uv_nmi_sync_exit(master); } -#else /* !CONFIG_KGDB_KDB */ -static inline void uv_call_kdb(int cpu, struct pt_regs *regs, int master) +#else /* !CONFIG_KGDB */ +static inline void uv_call_kgdb_kdb(int cpu, struct pt_regs *regs, int master) { - pr_err("UV: NMI error: KGDB/KDB is not enabled in this kernel\n"); + pr_err("UV: NMI error: KGDB is not enabled in this kernel\n"); } -#endif /* !CONFIG_KGDB_KDB */ +#endif /* !CONFIG_KGDB */ /* * UV NMI handler @@ -606,9 +634,9 @@ int uv_handle_nmi(unsigned int reason, struct pt_regs *regs) if (uv_nmi_action_is("ips") || uv_nmi_action_is("dump")) uv_nmi_dump_state(cpu, regs, master); - /* Call KDB if enabled */ - else if (uv_nmi_action_is("kdb")) - uv_call_kdb(cpu, regs, master); + /* Call KGDB/KDB if enabled */ + else if (uv_nmi_action_is("kdb") || uv_nmi_action_is("kgdb")) + uv_call_kgdb_kdb(cpu, regs, master); /* Clear per_cpu "in nmi" flag */ atomic_set(&uv_cpu_nmi.state, UV_NMI_STATE_OUT); @@ -634,7 +662,7 @@ int uv_handle_nmi(unsigned int reason, struct pt_regs *regs) /* * NMI handler for pulling in CPUs when perf events are grabbing our NMI */ -int uv_handle_nmi_ping(unsigned int reason, struct pt_regs *regs) +static int uv_handle_nmi_ping(unsigned int reason, struct pt_regs *regs) { int ret; @@ -651,7 +679,7 @@ int uv_handle_nmi_ping(unsigned int reason, struct pt_regs *regs) return ret; } -void uv_register_nmi_notifier(void) +static void uv_register_nmi_notifier(void) { if (register_nmi_handler(NMI_UNKNOWN, uv_handle_nmi, 0, "uv")) pr_warn("UV: NMI handler failed to register\n"); @@ -695,6 +723,5 @@ void uv_nmi_setup(void) uv_hub_nmi_per(cpu) = uv_hub_nmi_list[nid]; } BUG_ON(!alloc_cpumask_var(&uv_nmi_cpu_mask, GFP_KERNEL)); + uv_register_nmi_notifier(); } - - diff --git a/arch/x86/platform/visws/Makefile b/arch/x86/platform/visws/Makefile deleted file mode 100644 index 91bc17ab2fd..00000000000 --- a/arch/x86/platform/visws/Makefile +++ /dev/null @@ -1 +0,0 @@ -obj-$(CONFIG_X86_VISWS) += visws_quirks.o diff --git a/arch/x86/platform/visws/visws_quirks.c b/arch/x86/platform/visws/visws_quirks.c deleted file mode 100644 index 94d8a39332e..00000000000 --- a/arch/x86/platform/visws/visws_quirks.c +++ /dev/null @@ -1,608 +0,0 @@ -/* - * SGI Visual Workstation support and quirks, unmaintained. - * - * Split out from setup.c by davej@suse.de - * - * Copyright (C) 1999 Bent Hagemark, Ingo Molnar - * - * SGI Visual Workstation interrupt controller - * - * The Cobalt system ASIC in the Visual Workstation contains a "Cobalt" APIC - * which serves as the main interrupt controller in the system. Non-legacy - * hardware in the system uses this controller directly. Legacy devices - * are connected to the PIIX4 which in turn has its 8259(s) connected to - * a of the Cobalt APIC entry. - * - * 09/02/2000 - Updated for 2.4 by jbarnes@sgi.com - * - * 25/11/2002 - Updated for 2.5 by Andrey Panin <pazke@orbita1.ru> - */ -#include <linux/interrupt.h> -#include <linux/module.h> -#include <linux/init.h> -#include <linux/smp.h> - -#include <asm/visws/cobalt.h> -#include <asm/visws/piix4.h> -#include <asm/io_apic.h> -#include <asm/fixmap.h> -#include <asm/reboot.h> -#include <asm/setup.h> -#include <asm/apic.h> -#include <asm/e820.h> -#include <asm/time.h> -#include <asm/io.h> - -#include <linux/kernel_stat.h> - -#include <asm/i8259.h> -#include <asm/irq_vectors.h> -#include <asm/visws/lithium.h> - -#include <linux/sched.h> -#include <linux/kernel.h> -#include <linux/pci.h> -#include <linux/pci_ids.h> - -extern int no_broadcast; - -char visws_board_type = -1; -char visws_board_rev = -1; - -static void __init visws_time_init(void) -{ - printk(KERN_INFO "Starting Cobalt Timer system clock\n"); - - /* Set the countdown value */ - co_cpu_write(CO_CPU_TIMEVAL, CO_TIME_HZ/HZ); - - /* Start the timer */ - co_cpu_write(CO_CPU_CTRL, co_cpu_read(CO_CPU_CTRL) | CO_CTRL_TIMERUN); - - /* Enable (unmask) the timer interrupt */ - co_cpu_write(CO_CPU_CTRL, co_cpu_read(CO_CPU_CTRL) & ~CO_CTRL_TIMEMASK); - - setup_default_timer_irq(); -} - -/* Replaces the default init_ISA_irqs in the generic setup */ -static void __init visws_pre_intr_init(void); - -/* Quirk for machine specific memory setup. */ - -#define MB (1024 * 1024) - -unsigned long sgivwfb_mem_phys; -unsigned long sgivwfb_mem_size; -EXPORT_SYMBOL(sgivwfb_mem_phys); -EXPORT_SYMBOL(sgivwfb_mem_size); - -long long mem_size __initdata = 0; - -static char * __init visws_memory_setup(void) -{ - long long gfx_mem_size = 8 * MB; - - mem_size = boot_params.alt_mem_k; - - if (!mem_size) { - printk(KERN_WARNING "Bootloader didn't set memory size, upgrade it !\n"); - mem_size = 128 * MB; - } - - /* - * this hardcodes the graphics memory to 8 MB - * it really should be sized dynamically (or at least - * set as a boot param) - */ - if (!sgivwfb_mem_size) { - printk(KERN_WARNING "Defaulting to 8 MB framebuffer size\n"); - sgivwfb_mem_size = 8 * MB; - } - - /* - * Trim to nearest MB - */ - sgivwfb_mem_size &= ~((1 << 20) - 1); - sgivwfb_mem_phys = mem_size - gfx_mem_size; - - e820_add_region(0, LOWMEMSIZE(), E820_RAM); - e820_add_region(HIGH_MEMORY, mem_size - sgivwfb_mem_size - HIGH_MEMORY, E820_RAM); - e820_add_region(sgivwfb_mem_phys, sgivwfb_mem_size, E820_RESERVED); - - return "PROM"; -} - -static void visws_machine_emergency_restart(void) -{ - /* - * Visual Workstations restart after this - * register is poked on the PIIX4 - */ - outb(PIIX4_RESET_VAL, PIIX4_RESET_PORT); -} - -static void visws_machine_power_off(void) -{ - unsigned short pm_status; -/* extern unsigned int pci_bus0; */ - - while ((pm_status = inw(PMSTS_PORT)) & 0x100) - outw(pm_status, PMSTS_PORT); - - outw(PM_SUSPEND_ENABLE, PMCNTRL_PORT); - - mdelay(10); - -#define PCI_CONF1_ADDRESS(bus, devfn, reg) \ - (0x80000000 | (bus << 16) | (devfn << 8) | (reg & ~3)) - -/* outl(PCI_CONF1_ADDRESS(pci_bus0, SPECIAL_DEV, SPECIAL_REG), 0xCF8); */ - outl(PIIX_SPECIAL_STOP, 0xCFC); -} - -static void __init visws_get_smp_config(unsigned int early) -{ -} - -/* - * The Visual Workstation is Intel MP compliant in the hardware - * sense, but it doesn't have a BIOS(-configuration table). - * No problem for Linux. - */ - -static void __init MP_processor_info(struct mpc_cpu *m) -{ - int ver, logical_apicid; - physid_mask_t apic_cpus; - - if (!(m->cpuflag & CPU_ENABLED)) - return; - - logical_apicid = m->apicid; - printk(KERN_INFO "%sCPU #%d %u:%u APIC version %d\n", - m->cpuflag & CPU_BOOTPROCESSOR ? "Bootup " : "", - m->apicid, (m->cpufeature & CPU_FAMILY_MASK) >> 8, - (m->cpufeature & CPU_MODEL_MASK) >> 4, m->apicver); - - if (m->cpuflag & CPU_BOOTPROCESSOR) - boot_cpu_physical_apicid = m->apicid; - - ver = m->apicver; - if ((ver >= 0x14 && m->apicid >= 0xff) || m->apicid >= 0xf) { - printk(KERN_ERR "Processor #%d INVALID. (Max ID: %d).\n", - m->apicid, MAX_LOCAL_APIC); - return; - } - - apic->apicid_to_cpu_present(m->apicid, &apic_cpus); - physids_or(phys_cpu_present_map, phys_cpu_present_map, apic_cpus); - /* - * Validate version - */ - if (ver == 0x0) { - printk(KERN_ERR "BIOS bug, APIC version is 0 for CPU#%d! " - "fixing up to 0x10. (tell your hw vendor)\n", - m->apicid); - ver = 0x10; - } - apic_version[m->apicid] = ver; -} - -static void __init visws_find_smp_config(void) -{ - struct mpc_cpu *mp = phys_to_virt(CO_CPU_TAB_PHYS); - unsigned short ncpus = readw(phys_to_virt(CO_CPU_NUM_PHYS)); - - if (ncpus > CO_CPU_MAX) { - printk(KERN_WARNING "find_visws_smp: got cpu count of %d at %p\n", - ncpus, mp); - - ncpus = CO_CPU_MAX; - } - - if (ncpus > setup_max_cpus) - ncpus = setup_max_cpus; - -#ifdef CONFIG_X86_LOCAL_APIC - smp_found_config = 1; -#endif - while (ncpus--) - MP_processor_info(mp++); - - mp_lapic_addr = APIC_DEFAULT_PHYS_BASE; -} - -static void visws_trap_init(void); - -void __init visws_early_detect(void) -{ - int raw; - - visws_board_type = (char)(inb_p(PIIX_GPI_BD_REG) & PIIX_GPI_BD_REG) - >> PIIX_GPI_BD_SHIFT; - - if (visws_board_type < 0) - return; - - /* - * Override the default platform setup functions - */ - x86_init.resources.memory_setup = visws_memory_setup; - x86_init.mpparse.get_smp_config = visws_get_smp_config; - x86_init.mpparse.find_smp_config = visws_find_smp_config; - x86_init.irqs.pre_vector_init = visws_pre_intr_init; - x86_init.irqs.trap_init = visws_trap_init; - x86_init.timers.timer_init = visws_time_init; - x86_init.pci.init = pci_visws_init; - x86_init.pci.init_irq = x86_init_noop; - - /* - * Install reboot quirks: - */ - pm_power_off = visws_machine_power_off; - machine_ops.emergency_restart = visws_machine_emergency_restart; - - /* - * Do not use broadcast IPIs: - */ - no_broadcast = 0; - -#ifdef CONFIG_X86_IO_APIC - /* - * Turn off IO-APIC detection and initialization: - */ - skip_ioapic_setup = 1; -#endif - - /* - * Get Board rev. - * First, we have to initialize the 307 part to allow us access - * to the GPIO registers. Let's map them at 0x0fc0 which is right - * after the PIIX4 PM section. - */ - outb_p(SIO_DEV_SEL, SIO_INDEX); - outb_p(SIO_GP_DEV, SIO_DATA); /* Talk to GPIO regs. */ - - outb_p(SIO_DEV_MSB, SIO_INDEX); - outb_p(SIO_GP_MSB, SIO_DATA); /* MSB of GPIO base address */ - - outb_p(SIO_DEV_LSB, SIO_INDEX); - outb_p(SIO_GP_LSB, SIO_DATA); /* LSB of GPIO base address */ - - outb_p(SIO_DEV_ENB, SIO_INDEX); - outb_p(1, SIO_DATA); /* Enable GPIO registers. */ - - /* - * Now, we have to map the power management section to write - * a bit which enables access to the GPIO registers. - * What lunatic came up with this shit? - */ - outb_p(SIO_DEV_SEL, SIO_INDEX); - outb_p(SIO_PM_DEV, SIO_DATA); /* Talk to GPIO regs. */ - - outb_p(SIO_DEV_MSB, SIO_INDEX); - outb_p(SIO_PM_MSB, SIO_DATA); /* MSB of PM base address */ - - outb_p(SIO_DEV_LSB, SIO_INDEX); - outb_p(SIO_PM_LSB, SIO_DATA); /* LSB of PM base address */ - - outb_p(SIO_DEV_ENB, SIO_INDEX); - outb_p(1, SIO_DATA); /* Enable PM registers. */ - - /* - * Now, write the PM register which enables the GPIO registers. - */ - outb_p(SIO_PM_FER2, SIO_PM_INDEX); - outb_p(SIO_PM_GP_EN, SIO_PM_DATA); - - /* - * Now, initialize the GPIO registers. - * We want them all to be inputs which is the - * power on default, so let's leave them alone. - * So, let's just read the board rev! - */ - raw = inb_p(SIO_GP_DATA1); - raw &= 0x7f; /* 7 bits of valid board revision ID. */ - - if (visws_board_type == VISWS_320) { - if (raw < 0x6) { - visws_board_rev = 4; - } else if (raw < 0xc) { - visws_board_rev = 5; - } else { - visws_board_rev = 6; - } - } else if (visws_board_type == VISWS_540) { - visws_board_rev = 2; - } else { - visws_board_rev = raw; - } - - printk(KERN_INFO "Silicon Graphics Visual Workstation %s (rev %d) detected\n", - (visws_board_type == VISWS_320 ? "320" : - (visws_board_type == VISWS_540 ? "540" : - "unknown")), visws_board_rev); -} - -#define A01234 (LI_INTA_0 | LI_INTA_1 | LI_INTA_2 | LI_INTA_3 | LI_INTA_4) -#define BCD (LI_INTB | LI_INTC | LI_INTD) -#define ALLDEVS (A01234 | BCD) - -static __init void lithium_init(void) -{ - set_fixmap(FIX_LI_PCIA, LI_PCI_A_PHYS); - set_fixmap(FIX_LI_PCIB, LI_PCI_B_PHYS); - - if ((li_pcia_read16(PCI_VENDOR_ID) != PCI_VENDOR_ID_SGI) || - (li_pcia_read16(PCI_DEVICE_ID) != PCI_DEVICE_ID_SGI_LITHIUM)) { - printk(KERN_EMERG "Lithium hostbridge %c not found\n", 'A'); -/* panic("This machine is not SGI Visual Workstation 320/540"); */ - } - - if ((li_pcib_read16(PCI_VENDOR_ID) != PCI_VENDOR_ID_SGI) || - (li_pcib_read16(PCI_DEVICE_ID) != PCI_DEVICE_ID_SGI_LITHIUM)) { - printk(KERN_EMERG "Lithium hostbridge %c not found\n", 'B'); -/* panic("This machine is not SGI Visual Workstation 320/540"); */ - } - - li_pcia_write16(LI_PCI_INTEN, ALLDEVS); - li_pcib_write16(LI_PCI_INTEN, ALLDEVS); -} - -static __init void cobalt_init(void) -{ - /* - * On normal SMP PC this is used only with SMP, but we have to - * use it and set it up here to start the Cobalt clock - */ - set_fixmap(FIX_APIC_BASE, APIC_DEFAULT_PHYS_BASE); - setup_local_APIC(); - printk(KERN_INFO "Local APIC Version %#x, ID %#x\n", - (unsigned int)apic_read(APIC_LVR), - (unsigned int)apic_read(APIC_ID)); - - set_fixmap(FIX_CO_CPU, CO_CPU_PHYS); - set_fixmap(FIX_CO_APIC, CO_APIC_PHYS); - printk(KERN_INFO "Cobalt Revision %#lx, APIC ID %#lx\n", - co_cpu_read(CO_CPU_REV), co_apic_read(CO_APIC_ID)); - - /* Enable Cobalt APIC being careful to NOT change the ID! */ - co_apic_write(CO_APIC_ID, co_apic_read(CO_APIC_ID) | CO_APIC_ENABLE); - - printk(KERN_INFO "Cobalt APIC enabled: ID reg %#lx\n", - co_apic_read(CO_APIC_ID)); -} - -static void __init visws_trap_init(void) -{ - lithium_init(); - cobalt_init(); -} - -/* - * IRQ controller / APIC support: - */ - -static DEFINE_SPINLOCK(cobalt_lock); - -/* - * Set the given Cobalt APIC Redirection Table entry to point - * to the given IDT vector/index. - */ -static inline void co_apic_set(int entry, int irq) -{ - co_apic_write(CO_APIC_LO(entry), CO_APIC_LEVEL | (irq + FIRST_EXTERNAL_VECTOR)); - co_apic_write(CO_APIC_HI(entry), 0); -} - -/* - * Cobalt (IO)-APIC functions to handle PCI devices. - */ -static inline int co_apic_ide0_hack(void) -{ - extern char visws_board_type; - extern char visws_board_rev; - - if (visws_board_type == VISWS_320 && visws_board_rev == 5) - return 5; - return CO_APIC_IDE0; -} - -static int is_co_apic(unsigned int irq) -{ - if (IS_CO_APIC(irq)) - return CO_APIC(irq); - - switch (irq) { - case 0: return CO_APIC_CPU; - case CO_IRQ_IDE0: return co_apic_ide0_hack(); - case CO_IRQ_IDE1: return CO_APIC_IDE1; - default: return -1; - } -} - - -/* - * This is the SGI Cobalt (IO-)APIC: - */ -static void enable_cobalt_irq(struct irq_data *data) -{ - co_apic_set(is_co_apic(data->irq), data->irq); -} - -static void disable_cobalt_irq(struct irq_data *data) -{ - int entry = is_co_apic(data->irq); - - co_apic_write(CO_APIC_LO(entry), CO_APIC_MASK); - co_apic_read(CO_APIC_LO(entry)); -} - -static void ack_cobalt_irq(struct irq_data *data) -{ - unsigned long flags; - - spin_lock_irqsave(&cobalt_lock, flags); - disable_cobalt_irq(data); - apic_write(APIC_EOI, APIC_EOI_ACK); - spin_unlock_irqrestore(&cobalt_lock, flags); -} - -static struct irq_chip cobalt_irq_type = { - .name = "Cobalt-APIC", - .irq_enable = enable_cobalt_irq, - .irq_disable = disable_cobalt_irq, - .irq_ack = ack_cobalt_irq, -}; - - -/* - * This is the PIIX4-based 8259 that is wired up indirectly to Cobalt - * -- not the manner expected by the code in i8259.c. - * - * there is a 'master' physical interrupt source that gets sent to - * the CPU. But in the chipset there are various 'virtual' interrupts - * waiting to be handled. We represent this to Linux through a 'master' - * interrupt controller type, and through a special virtual interrupt- - * controller. Device drivers only see the virtual interrupt sources. - */ -static unsigned int startup_piix4_master_irq(struct irq_data *data) -{ - legacy_pic->init(0); - enable_cobalt_irq(data); - return 0; -} - -static struct irq_chip piix4_master_irq_type = { - .name = "PIIX4-master", - .irq_startup = startup_piix4_master_irq, - .irq_ack = ack_cobalt_irq, -}; - -static void pii4_mask(struct irq_data *data) { } - -static struct irq_chip piix4_virtual_irq_type = { - .name = "PIIX4-virtual", - .irq_mask = pii4_mask, -}; - -/* - * PIIX4-8259 master/virtual functions to handle interrupt requests - * from legacy devices: floppy, parallel, serial, rtc. - * - * None of these get Cobalt APIC entries, neither do they have IDT - * entries. These interrupts are purely virtual and distributed from - * the 'master' interrupt source: CO_IRQ_8259. - * - * When the 8259 interrupts its handler figures out which of these - * devices is interrupting and dispatches to its handler. - * - * CAREFUL: devices see the 'virtual' interrupt only. Thus disable/ - * enable_irq gets the right irq. This 'master' irq is never directly - * manipulated by any driver. - */ -static irqreturn_t piix4_master_intr(int irq, void *dev_id) -{ - unsigned long flags; - int realirq; - - raw_spin_lock_irqsave(&i8259A_lock, flags); - - /* Find out what's interrupting in the PIIX4 master 8259 */ - outb(0x0c, 0x20); /* OCW3 Poll command */ - realirq = inb(0x20); - - /* - * Bit 7 == 0 means invalid/spurious - */ - if (unlikely(!(realirq & 0x80))) - goto out_unlock; - - realirq &= 7; - - if (unlikely(realirq == 2)) { - outb(0x0c, 0xa0); - realirq = inb(0xa0); - - if (unlikely(!(realirq & 0x80))) - goto out_unlock; - - realirq = (realirq & 7) + 8; - } - - /* mask and ack interrupt */ - cached_irq_mask |= 1 << realirq; - if (unlikely(realirq > 7)) { - inb(0xa1); - outb(cached_slave_mask, 0xa1); - outb(0x60 + (realirq & 7), 0xa0); - outb(0x60 + 2, 0x20); - } else { - inb(0x21); - outb(cached_master_mask, 0x21); - outb(0x60 + realirq, 0x20); - } - - raw_spin_unlock_irqrestore(&i8259A_lock, flags); - - /* - * handle this 'virtual interrupt' as a Cobalt one now. - */ - generic_handle_irq(realirq); - - return IRQ_HANDLED; - -out_unlock: - raw_spin_unlock_irqrestore(&i8259A_lock, flags); - return IRQ_NONE; -} - -static struct irqaction master_action = { - .handler = piix4_master_intr, - .name = "PIIX4-8259", - .flags = IRQF_NO_THREAD, -}; - -static struct irqaction cascade_action = { - .handler = no_action, - .name = "cascade", - .flags = IRQF_NO_THREAD, -}; - -static inline void set_piix4_virtual_irq_type(void) -{ - piix4_virtual_irq_type.irq_enable = i8259A_chip.irq_unmask; - piix4_virtual_irq_type.irq_disable = i8259A_chip.irq_mask; - piix4_virtual_irq_type.irq_unmask = i8259A_chip.irq_unmask; -} - -static void __init visws_pre_intr_init(void) -{ - int i; - - set_piix4_virtual_irq_type(); - - for (i = 0; i < CO_IRQ_APIC0 + CO_APIC_LAST + 1; i++) { - struct irq_chip *chip = NULL; - - if (i == 0) - chip = &cobalt_irq_type; - else if (i == CO_IRQ_IDE0) - chip = &cobalt_irq_type; - else if (i == CO_IRQ_IDE1) - chip = &cobalt_irq_type; - else if (i == CO_IRQ_8259) - chip = &piix4_master_irq_type; - else if (i < CO_IRQ_APIC0) - chip = &piix4_virtual_irq_type; - else if (IS_CO_APIC(i)) - chip = &cobalt_irq_type; - - if (chip) - irq_set_chip(i, chip); - } - - setup_irq(CO_IRQ_8259, &master_action); - setup_irq(2, &cascade_action); -} |
