diff options
Diffstat (limited to 'arch/x86/platform/efi/efi.c')
| -rw-r--r-- | arch/x86/platform/efi/efi.c | 824 |
1 files changed, 524 insertions, 300 deletions
diff --git a/arch/x86/platform/efi/efi.c b/arch/x86/platform/efi/efi.c index 90f6ed12709..87fc96bcc13 100644 --- a/arch/x86/platform/efi/efi.c +++ b/arch/x86/platform/efi/efi.c @@ -12,6 +12,8 @@ * Bibo Mao <bibo.mao@intel.com> * Chandramouli Narayanan <mouli@linux.intel.com> * Huang Ying <ying.huang@intel.com> + * Copyright (C) 2013 SuSE Labs + * Borislav Petkov <bp@suse.de> - runtime services VA mapping * * Copied from efi_32.c to eliminate the duplicated code between EFI * 32/64 support code. --ying 2007-10-26 @@ -50,8 +52,9 @@ #include <asm/tlbflush.h> #include <asm/x86_init.h> #include <asm/rtc.h> +#include <asm/uv/uv.h> -#define EFI_DEBUG 1 +#define EFI_DEBUG #define EFI_MIN_RESERVE 5120 @@ -60,36 +63,21 @@ static efi_char16_t efi_dummy_name[6] = { 'D', 'U', 'M', 'M', 'Y', 0 }; -struct efi __read_mostly efi = { - .mps = EFI_INVALID_TABLE_ADDR, - .acpi = EFI_INVALID_TABLE_ADDR, - .acpi20 = EFI_INVALID_TABLE_ADDR, - .smbios = EFI_INVALID_TABLE_ADDR, - .sal_systab = EFI_INVALID_TABLE_ADDR, - .boot_info = EFI_INVALID_TABLE_ADDR, - .hcdp = EFI_INVALID_TABLE_ADDR, - .uga = EFI_INVALID_TABLE_ADDR, - .uv_systab = EFI_INVALID_TABLE_ADDR, -}; -EXPORT_SYMBOL(efi); - struct efi_memory_map memmap; static struct efi efi_phys __initdata; static efi_system_table_t efi_systab __initdata; -unsigned long x86_efi_facility; +static efi_config_table_type_t arch_tables[] __initdata = { +#ifdef CONFIG_X86_UV + {UV_SYSTEM_TABLE_GUID, "UVsystab", &efi.uv_systab}, +#endif + {NULL_GUID, NULL, NULL}, +}; -/* - * Returns 1 if 'facility' is enabled, 0 otherwise. - */ -int efi_enabled(int facility) -{ - return test_bit(facility, &x86_efi_facility) != 0; -} -EXPORT_SYMBOL(efi_enabled); +u64 efi_setup; /* efi setup_data physical address */ -static bool __initdata disable_runtime = false; +static bool disable_runtime __initdata = false; static int __init setup_noefi(char *arg) { disable_runtime = true; @@ -116,14 +104,13 @@ static int __init setup_storage_paranoia(char *arg) } early_param("efi_no_storage_paranoia", setup_storage_paranoia); - static efi_status_t virt_efi_get_time(efi_time_t *tm, efi_time_cap_t *tc) { unsigned long flags; efi_status_t status; spin_lock_irqsave(&rtc_lock, flags); - status = efi_call_virt2(get_time, tm, tc); + status = efi_call_virt(get_time, tm, tc); spin_unlock_irqrestore(&rtc_lock, flags); return status; } @@ -134,7 +121,7 @@ static efi_status_t virt_efi_set_time(efi_time_t *tm) efi_status_t status; spin_lock_irqsave(&rtc_lock, flags); - status = efi_call_virt1(set_time, tm); + status = efi_call_virt(set_time, tm); spin_unlock_irqrestore(&rtc_lock, flags); return status; } @@ -147,8 +134,7 @@ static efi_status_t virt_efi_get_wakeup_time(efi_bool_t *enabled, efi_status_t status; spin_lock_irqsave(&rtc_lock, flags); - status = efi_call_virt3(get_wakeup_time, - enabled, pending, tm); + status = efi_call_virt(get_wakeup_time, enabled, pending, tm); spin_unlock_irqrestore(&rtc_lock, flags); return status; } @@ -159,8 +145,7 @@ static efi_status_t virt_efi_set_wakeup_time(efi_bool_t enabled, efi_time_t *tm) efi_status_t status; spin_lock_irqsave(&rtc_lock, flags); - status = efi_call_virt2(set_wakeup_time, - enabled, tm); + status = efi_call_virt(set_wakeup_time, enabled, tm); spin_unlock_irqrestore(&rtc_lock, flags); return status; } @@ -171,17 +156,17 @@ static efi_status_t virt_efi_get_variable(efi_char16_t *name, unsigned long *data_size, void *data) { - return efi_call_virt5(get_variable, - name, vendor, attr, - data_size, data); + return efi_call_virt(get_variable, + name, vendor, attr, + data_size, data); } static efi_status_t virt_efi_get_next_variable(unsigned long *name_size, efi_char16_t *name, efi_guid_t *vendor) { - return efi_call_virt3(get_next_variable, - name_size, name, vendor); + return efi_call_virt(get_next_variable, + name_size, name, vendor); } static efi_status_t virt_efi_set_variable(efi_char16_t *name, @@ -190,9 +175,9 @@ static efi_status_t virt_efi_set_variable(efi_char16_t *name, unsigned long data_size, void *data) { - return efi_call_virt5(set_variable, - name, vendor, attr, - data_size, data); + return efi_call_virt(set_variable, + name, vendor, attr, + data_size, data); } static efi_status_t virt_efi_query_variable_info(u32 attr, @@ -203,13 +188,13 @@ static efi_status_t virt_efi_query_variable_info(u32 attr, if (efi.runtime_version < EFI_2_00_SYSTEM_TABLE_REVISION) return EFI_UNSUPPORTED; - return efi_call_virt4(query_variable_info, attr, storage_space, - remaining_space, max_variable_size); + return efi_call_virt(query_variable_info, attr, storage_space, + remaining_space, max_variable_size); } static efi_status_t virt_efi_get_next_high_mono_count(u32 *count) { - return efi_call_virt1(get_next_high_mono_count, count); + return efi_call_virt(get_next_high_mono_count, count); } static void virt_efi_reset_system(int reset_type, @@ -217,8 +202,8 @@ static void virt_efi_reset_system(int reset_type, unsigned long data_size, efi_char16_t *data) { - efi_call_virt4(reset_system, reset_type, status, - data_size, data); + __efi_call_virt(reset_system, reset_type, status, + data_size, data); } static efi_status_t virt_efi_update_capsule(efi_capsule_header_t **capsules, @@ -228,7 +213,7 @@ static efi_status_t virt_efi_update_capsule(efi_capsule_header_t **capsules, if (efi.runtime_version < EFI_2_00_SYSTEM_TABLE_REVISION) return EFI_UNSUPPORTED; - return efi_call_virt3(update_capsule, capsules, count, sg_list); + return efi_call_virt(update_capsule, capsules, count, sg_list); } static efi_status_t virt_efi_query_capsule_caps(efi_capsule_header_t **capsules, @@ -239,8 +224,8 @@ static efi_status_t virt_efi_query_capsule_caps(efi_capsule_header_t **capsules, if (efi.runtime_version < EFI_2_00_SYSTEM_TABLE_REVISION) return EFI_UNSUPPORTED; - return efi_call_virt4(query_capsule_caps, capsules, count, max_size, - reset_type); + return efi_call_virt(query_capsule_caps, capsules, count, max_size, + reset_type); } static efi_status_t __init phys_efi_set_virtual_address_map( @@ -252,34 +237,19 @@ static efi_status_t __init phys_efi_set_virtual_address_map( efi_status_t status; efi_call_phys_prelog(); - status = efi_call_phys4(efi_phys.set_virtual_address_map, - memory_map_size, descriptor_size, - descriptor_version, virtual_map); - efi_call_phys_epilog(); - return status; -} - -static efi_status_t __init phys_efi_get_time(efi_time_t *tm, - efi_time_cap_t *tc) -{ - unsigned long flags; - efi_status_t status; - - spin_lock_irqsave(&rtc_lock, flags); - efi_call_phys_prelog(); - status = efi_call_phys2(efi_phys.get_time, virt_to_phys(tm), - virt_to_phys(tc)); + status = efi_call_phys(efi_phys.set_virtual_address_map, + memory_map_size, descriptor_size, + descriptor_version, virtual_map); efi_call_phys_epilog(); - spin_unlock_irqrestore(&rtc_lock, flags); return status; } int efi_set_rtc_mmss(const struct timespec *now) { unsigned long nowtime = now->tv_sec; - efi_status_t status; - efi_time_t eft; - efi_time_cap_t cap; + efi_status_t status; + efi_time_t eft; + efi_time_cap_t cap; struct rtc_time tm; status = efi.get_time(&eft, &cap); @@ -297,9 +267,8 @@ int efi_set_rtc_mmss(const struct timespec *now) eft.second = tm.tm_sec; eft.nanosecond = 0; } else { - printk(KERN_ERR - "%s: Invalid EFI RTC value: write of %lx to EFI RTC failed\n", - __FUNCTION__, nowtime); + pr_err("%s: Invalid EFI RTC value: write of %lx to EFI RTC failed\n", + __func__, nowtime); return -1; } @@ -399,12 +368,14 @@ int __init efi_memblock_x86_reserve_range(void) memblock_reserve(pmap, memmap.nr_map * memmap.desc_size); + efi.memmap = &memmap; + return 0; } -#if EFI_DEBUG static void __init print_efi_memmap(void) { +#ifdef EFI_DEBUG efi_memory_desc_t *md; void *p; int i; @@ -413,14 +384,13 @@ static void __init print_efi_memmap(void) p < memmap.map_end; p += memmap.desc_size, i++) { md = p; - pr_info("mem%02u: type=%u, attr=0x%llx, " - "range=[0x%016llx-0x%016llx) (%lluMB)\n", + pr_info("mem%02u: type=%u, attr=0x%llx, range=[0x%016llx-0x%016llx) (%lluMB)\n", i, md->type, md->attribute, md->phys_addr, md->phys_addr + (md->num_pages << EFI_PAGE_SHIFT), (md->num_pages >> (20 - EFI_PAGE_SHIFT))); } -} #endif /* EFI_DEBUG */ +} void __init efi_reserve_boot_services(void) { @@ -440,15 +410,14 @@ void __init efi_reserve_boot_services(void) * - Not within any part of the kernel * - Not the bios reserved area */ - if ((start+size >= __pa_symbol(_text) + if ((start + size > __pa_symbol(_text) && start <= __pa_symbol(_end)) || !e820_all_mapped(start, start+size, E820_RAM) || memblock_is_region_reserved(start, size)) { /* Could not reserve, skip it */ md->num_pages = 0; - memblock_dbg("Could not reserve boot range " - "[0x%010llx-0x%010llx]\n", - start, start+size-1); + memblock_dbg("Could not reserve boot range [0x%010llx-0x%010llx]\n", + start, start+size-1); } else memblock_reserve(start, size); } @@ -456,7 +425,7 @@ void __init efi_reserve_boot_services(void) void __init efi_unmap_memmap(void) { - clear_bit(EFI_MEMMAP, &x86_efi_facility); + clear_bit(EFI_MEMMAP, &efi.flags); if (memmap.map) { early_iounmap(memmap.map, memmap.nr_map * memmap.desc_size); memmap.map = NULL; @@ -467,9 +436,6 @@ void __init efi_free_boot_services(void) { void *p; - if (!efi_is_native()) - return; - for (p = memmap.map; p < memmap.map_end; p += memmap.desc_size) { efi_memory_desc_t *md = p; unsigned long long start = md->phys_addr; @@ -493,18 +459,27 @@ static int __init efi_systab_init(void *phys) { if (efi_enabled(EFI_64BIT)) { efi_system_table_64_t *systab64; + struct efi_setup_data *data = NULL; u64 tmp = 0; + if (efi_setup) { + data = early_memremap(efi_setup, sizeof(*data)); + if (!data) + return -ENOMEM; + } systab64 = early_ioremap((unsigned long)phys, sizeof(*systab64)); if (systab64 == NULL) { pr_err("Couldn't map the system table!\n"); + if (data) + early_iounmap(data, sizeof(*data)); return -ENOMEM; } efi_systab.hdr = systab64->hdr; - efi_systab.fw_vendor = systab64->fw_vendor; - tmp |= systab64->fw_vendor; + efi_systab.fw_vendor = data ? (unsigned long)data->fw_vendor : + systab64->fw_vendor; + tmp |= data ? data->fw_vendor : systab64->fw_vendor; efi_systab.fw_revision = systab64->fw_revision; efi_systab.con_in_handle = systab64->con_in_handle; tmp |= systab64->con_in_handle; @@ -518,15 +493,20 @@ static int __init efi_systab_init(void *phys) tmp |= systab64->stderr_handle; efi_systab.stderr = systab64->stderr; tmp |= systab64->stderr; - efi_systab.runtime = (void *)(unsigned long)systab64->runtime; - tmp |= systab64->runtime; + efi_systab.runtime = data ? + (void *)(unsigned long)data->runtime : + (void *)(unsigned long)systab64->runtime; + tmp |= data ? data->runtime : systab64->runtime; efi_systab.boottime = (void *)(unsigned long)systab64->boottime; tmp |= systab64->boottime; efi_systab.nr_tables = systab64->nr_tables; - efi_systab.tables = systab64->tables; - tmp |= systab64->tables; + efi_systab.tables = data ? (unsigned long)data->tables : + systab64->tables; + tmp |= data ? data->tables : systab64->tables; early_iounmap(systab64, sizeof(*systab64)); + if (data) + early_iounmap(data, sizeof(*data)); #ifdef CONFIG_X86_32 if (tmp >> 32) { pr_err("EFI data located above 4GB, disabling EFI.\n"); @@ -570,119 +550,82 @@ static int __init efi_systab_init(void *phys) return -EINVAL; } if ((efi.systab->hdr.revision >> 16) == 0) - pr_err("Warning: System table version " - "%d.%02d, expected 1.00 or greater!\n", + pr_err("Warning: System table version %d.%02d, expected 1.00 or greater!\n", efi.systab->hdr.revision >> 16, efi.systab->hdr.revision & 0xffff); + set_bit(EFI_SYSTEM_TABLES, &efi.flags); + return 0; } -static int __init efi_config_init(u64 tables, int nr_tables) +static int __init efi_runtime_init32(void) { - void *config_tables, *tablep; - int i, sz; - - if (efi_enabled(EFI_64BIT)) - sz = sizeof(efi_config_table_64_t); - else - sz = sizeof(efi_config_table_32_t); + efi_runtime_services_32_t *runtime; - /* - * Let's see what config tables the firmware passed to us. - */ - config_tables = early_ioremap(tables, nr_tables * sz); - if (config_tables == NULL) { - pr_err("Could not map Configuration table!\n"); + runtime = early_ioremap((unsigned long)efi.systab->runtime, + sizeof(efi_runtime_services_32_t)); + if (!runtime) { + pr_err("Could not map the runtime service table!\n"); return -ENOMEM; } - tablep = config_tables; - pr_info(""); - for (i = 0; i < efi.systab->nr_tables; i++) { - efi_guid_t guid; - unsigned long table; + /* + * We will only need *early* access to the following two + * EFI runtime services before set_virtual_address_map + * is invoked. + */ + efi_phys.set_virtual_address_map = + (efi_set_virtual_address_map_t *) + (unsigned long)runtime->set_virtual_address_map; + early_iounmap(runtime, sizeof(efi_runtime_services_32_t)); - if (efi_enabled(EFI_64BIT)) { - u64 table64; - guid = ((efi_config_table_64_t *)tablep)->guid; - table64 = ((efi_config_table_64_t *)tablep)->table; - table = table64; -#ifdef CONFIG_X86_32 - if (table64 >> 32) { - pr_cont("\n"); - pr_err("Table located above 4GB, disabling EFI.\n"); - early_iounmap(config_tables, - efi.systab->nr_tables * sz); - return -EINVAL; - } -#endif - } else { - guid = ((efi_config_table_32_t *)tablep)->guid; - table = ((efi_config_table_32_t *)tablep)->table; - } - if (!efi_guidcmp(guid, MPS_TABLE_GUID)) { - efi.mps = table; - pr_cont(" MPS=0x%lx ", table); - } else if (!efi_guidcmp(guid, ACPI_20_TABLE_GUID)) { - efi.acpi20 = table; - pr_cont(" ACPI 2.0=0x%lx ", table); - } else if (!efi_guidcmp(guid, ACPI_TABLE_GUID)) { - efi.acpi = table; - pr_cont(" ACPI=0x%lx ", table); - } else if (!efi_guidcmp(guid, SMBIOS_TABLE_GUID)) { - efi.smbios = table; - pr_cont(" SMBIOS=0x%lx ", table); -#ifdef CONFIG_X86_UV - } else if (!efi_guidcmp(guid, UV_SYSTEM_TABLE_GUID)) { - efi.uv_systab = table; - pr_cont(" UVsystab=0x%lx ", table); -#endif - } else if (!efi_guidcmp(guid, HCDP_TABLE_GUID)) { - efi.hcdp = table; - pr_cont(" HCDP=0x%lx ", table); - } else if (!efi_guidcmp(guid, UGA_IO_PROTOCOL_GUID)) { - efi.uga = table; - pr_cont(" UGA=0x%lx ", table); - } - tablep += sz; - } - pr_cont("\n"); - early_iounmap(config_tables, efi.systab->nr_tables * sz); return 0; } -static int __init efi_runtime_init(void) +static int __init efi_runtime_init64(void) { - efi_runtime_services_t *runtime; + efi_runtime_services_64_t *runtime; - /* - * Check out the runtime services table. We need to map - * the runtime services table so that we can grab the physical - * address of several of the EFI runtime functions, needed to - * set the firmware into virtual mode. - */ runtime = early_ioremap((unsigned long)efi.systab->runtime, - sizeof(efi_runtime_services_t)); + sizeof(efi_runtime_services_64_t)); if (!runtime) { pr_err("Could not map the runtime service table!\n"); return -ENOMEM; } + /* - * We will only need *early* access to the following - * two EFI runtime services before set_virtual_address_map + * We will only need *early* access to the following two + * EFI runtime services before set_virtual_address_map * is invoked. */ - efi_phys.get_time = (efi_get_time_t *)runtime->get_time; efi_phys.set_virtual_address_map = - (efi_set_virtual_address_map_t *) - runtime->set_virtual_address_map; + (efi_set_virtual_address_map_t *) + (unsigned long)runtime->set_virtual_address_map; + early_iounmap(runtime, sizeof(efi_runtime_services_64_t)); + + return 0; +} + +static int __init efi_runtime_init(void) +{ + int rv; + /* - * Make efi_get_time can be called before entering - * virtual mode. + * Check out the runtime services table. We need to map + * the runtime services table so that we can grab the physical + * address of several of the EFI runtime functions, needed to + * set the firmware into virtual mode. */ - efi.get_time = phys_efi_get_time; - early_iounmap(runtime, sizeof(efi_runtime_services_t)); + if (efi_enabled(EFI_64BIT)) + rv = efi_runtime_init64(); + else + rv = efi_runtime_init32(); + + if (rv) + return rv; + + set_bit(EFI_RUNTIME_SERVICES, &efi.flags); return 0; } @@ -701,9 +644,67 @@ static int __init efi_memmap_init(void) if (add_efi_memmap) do_add_efi_memmap(); + set_bit(EFI_MEMMAP, &efi.flags); + return 0; } +/* + * A number of config table entries get remapped to virtual addresses + * after entering EFI virtual mode. However, the kexec kernel requires + * their physical addresses therefore we pass them via setup_data and + * correct those entries to their respective physical addresses here. + * + * Currently only handles smbios which is necessary for some firmware + * implementation. + */ +static int __init efi_reuse_config(u64 tables, int nr_tables) +{ + int i, sz, ret = 0; + void *p, *tablep; + struct efi_setup_data *data; + + if (!efi_setup) + return 0; + + if (!efi_enabled(EFI_64BIT)) + return 0; + + data = early_memremap(efi_setup, sizeof(*data)); + if (!data) { + ret = -ENOMEM; + goto out; + } + + if (!data->smbios) + goto out_memremap; + + sz = sizeof(efi_config_table_64_t); + + p = tablep = early_memremap(tables, nr_tables * sz); + if (!p) { + pr_err("Could not map Configuration table!\n"); + ret = -ENOMEM; + goto out_memremap; + } + + for (i = 0; i < efi.systab->nr_tables; i++) { + efi_guid_t guid; + + guid = ((efi_config_table_64_t *)p)->guid; + + if (!efi_guidcmp(guid, SMBIOS_TABLE_GUID)) + ((efi_config_table_64_t *)p)->table = data->smbios; + p += sz; + } + early_iounmap(tablep, nr_tables * sz); + +out_memremap: + early_iounmap(data, sizeof(*data)); +out: + return ret; +} + void __init efi_init(void) { efi_char16_t *c16; @@ -727,7 +728,11 @@ void __init efi_init(void) if (efi_systab_init(efi_phys.systab)) return; - set_bit(EFI_SYSTEM_TABLES, &x86_efi_facility); + set_bit(EFI_SYSTEM_TABLES, &efi.flags); + + efi.config_table = (unsigned long)efi.systab->tables; + efi.fw_vendor = (unsigned long)efi.systab->fw_vendor; + efi.runtime = (unsigned long)efi.systab->runtime; /* * Show what we know for posterity @@ -745,39 +750,29 @@ void __init efi_init(void) efi.systab->hdr.revision >> 16, efi.systab->hdr.revision & 0xffff, vendor); - if (efi_config_init(efi.systab->tables, efi.systab->nr_tables)) + if (efi_reuse_config(efi.systab->tables, efi.systab->nr_tables)) return; - set_bit(EFI_CONFIG_TABLES, &x86_efi_facility); + if (efi_config_init(arch_tables)) + return; /* * Note: We currently don't support runtime services on an EFI * that doesn't match the kernel 32/64-bit mode. */ - if (!efi_is_native()) + if (!efi_runtime_supported()) pr_info("No EFI runtime due to 32/64-bit mismatch with kernel\n"); else { if (disable_runtime || efi_runtime_init()) return; - set_bit(EFI_RUNTIME_SERVICES, &x86_efi_facility); } - if (efi_memmap_init()) return; - set_bit(EFI_MEMMAP, &x86_efi_facility); - -#ifdef CONFIG_X86_32 - if (efi_is_native()) { - x86_platform.get_wallclock = efi_get_time; - x86_platform.set_wallclock = efi_set_rtc_mmss; - } -#endif + set_bit(EFI_MEMMAP, &efi.flags); -#if EFI_DEBUG print_efi_memmap(); -#endif } void __init efi_late_init(void) @@ -800,7 +795,7 @@ void __init efi_set_executable(efi_memory_desc_t *md, bool executable) set_memory_nx(addr, npages); } -static void __init runtime_code_page_mkexec(void) +void __init runtime_code_page_mkexec(void) { efi_memory_desc_t *md; void *p; @@ -816,34 +811,6 @@ static void __init runtime_code_page_mkexec(void) } } -/* - * We can't ioremap data in EFI boot services RAM, because we've already mapped - * it as RAM. So, look it up in the existing EFI memory map instead. Only - * callable after efi_enter_virtual_mode and before efi_free_boot_services. - */ -void __iomem *efi_lookup_mapped_addr(u64 phys_addr) -{ - void *p; - if (WARN_ON(!memmap.map)) - return NULL; - for (p = memmap.map; p < memmap.map_end; p += memmap.desc_size) { - efi_memory_desc_t *md = p; - u64 size = md->num_pages << EFI_PAGE_SHIFT; - u64 end = md->phys_addr + size; - if (!(md->attribute & EFI_MEMORY_RUNTIME) && - md->type != EFI_BOOT_SERVICES_CODE && - md->type != EFI_BOOT_SERVICES_DATA) - continue; - if (!md->virt_addr) - continue; - if (phys_addr >= md->phys_addr && phys_addr < end) { - phys_addr += md->virt_addr - md->phys_addr; - return (__force void __iomem *)(unsigned long)phys_addr; - } - } - return NULL; -} - void efi_memory_uc(u64 addr, unsigned long size) { unsigned long page_shift = 1UL << EFI_PAGE_SHIFT; @@ -854,36 +821,54 @@ void efi_memory_uc(u64 addr, unsigned long size) set_memory_uc(addr, npages); } -/* - * This function will switch the EFI runtime services to virtual mode. - * Essentially, look through the EFI memmap and map every region that - * has the runtime attribute bit set in its memory descriptor and update - * that memory descriptor with the virtual address obtained from ioremap(). - * This enables the runtime services to be called without having to - * thunk back into physical mode for every invocation. - */ -void __init efi_enter_virtual_mode(void) +void __init old_map_region(efi_memory_desc_t *md) { - efi_memory_desc_t *md, *prev_md = NULL; - efi_status_t status; + u64 start_pfn, end_pfn, end; unsigned long size; - u64 end, systab, start_pfn, end_pfn; - void *p, *va, *new_memmap = NULL; - int count = 0; + void *va; - efi.systab = NULL; + start_pfn = PFN_DOWN(md->phys_addr); + size = md->num_pages << PAGE_SHIFT; + end = md->phys_addr + size; + end_pfn = PFN_UP(end); - /* - * We don't do virtual mode, since we don't do runtime services, on - * non-native EFI - */ + if (pfn_range_is_mapped(start_pfn, end_pfn)) { + va = __va(md->phys_addr); - if (!efi_is_native()) { - efi_unmap_memmap(); - return; - } + if (!(md->attribute & EFI_MEMORY_WB)) + efi_memory_uc((u64)(unsigned long)va, size); + } else + va = efi_ioremap(md->phys_addr, size, + md->type, md->attribute); + + md->virt_addr = (u64) (unsigned long) va; + if (!va) + pr_err("ioremap of 0x%llX failed!\n", + (unsigned long long)md->phys_addr); +} + +static void native_runtime_setup(void) +{ + efi.get_time = virt_efi_get_time; + efi.set_time = virt_efi_set_time; + efi.get_wakeup_time = virt_efi_get_wakeup_time; + efi.set_wakeup_time = virt_efi_set_wakeup_time; + efi.get_variable = virt_efi_get_variable; + efi.get_next_variable = virt_efi_get_next_variable; + efi.set_variable = virt_efi_set_variable; + efi.get_next_high_mono_count = virt_efi_get_next_high_mono_count; + efi.reset_system = virt_efi_reset_system; + efi.query_variable_info = virt_efi_query_variable_info; + efi.update_capsule = virt_efi_update_capsule; + efi.query_capsule_caps = virt_efi_query_capsule_caps; +} + +/* Merge contiguous regions of the same type and attribute */ +static void __init efi_merge_regions(void) +{ + void *p; + efi_memory_desc_t *md, *prev_md = NULL; - /* Merge contiguous regions of the same type and attribute */ for (p = memmap.map; p < memmap.map_end; p += memmap.desc_size) { u64 prev_size; md = p; @@ -909,60 +894,242 @@ void __init efi_enter_virtual_mode(void) } prev_md = md; } +} + +static void __init get_systab_virt_addr(efi_memory_desc_t *md) +{ + unsigned long size; + u64 end, systab; + + size = md->num_pages << EFI_PAGE_SHIFT; + end = md->phys_addr + size; + systab = (u64)(unsigned long)efi_phys.systab; + if (md->phys_addr <= systab && systab < end) { + systab += md->virt_addr - md->phys_addr; + efi.systab = (efi_system_table_t *)(unsigned long)systab; + } +} + +static void __init save_runtime_map(void) +{ +#ifdef CONFIG_KEXEC + efi_memory_desc_t *md; + void *tmp, *p, *q = NULL; + int count = 0; + + if (efi_enabled(EFI_OLD_MEMMAP)) + return; for (p = memmap.map; p < memmap.map_end; p += memmap.desc_size) { md = p; - if (!(md->attribute & EFI_MEMORY_RUNTIME) && - md->type != EFI_BOOT_SERVICES_CODE && - md->type != EFI_BOOT_SERVICES_DATA) + + if (!(md->attribute & EFI_MEMORY_RUNTIME) || + (md->type == EFI_BOOT_SERVICES_CODE) || + (md->type == EFI_BOOT_SERVICES_DATA)) continue; + tmp = krealloc(q, (count + 1) * memmap.desc_size, GFP_KERNEL); + if (!tmp) + goto out; + q = tmp; + + memcpy(q + count * memmap.desc_size, md, memmap.desc_size); + count++; + } - size = md->num_pages << EFI_PAGE_SHIFT; - end = md->phys_addr + size; + efi_runtime_map_setup(q, count, memmap.desc_size); + return; - start_pfn = PFN_DOWN(md->phys_addr); - end_pfn = PFN_UP(end); - if (pfn_range_is_mapped(start_pfn, end_pfn)) { - va = __va(md->phys_addr); +out: + kfree(q); + pr_err("Error saving runtime map, efi runtime on kexec non-functional!!\n"); +#endif +} - if (!(md->attribute & EFI_MEMORY_WB)) - efi_memory_uc((u64)(unsigned long)va, size); - } else - va = efi_ioremap(md->phys_addr, size, - md->type, md->attribute); +static void *realloc_pages(void *old_memmap, int old_shift) +{ + void *ret; - md->virt_addr = (u64) (unsigned long) va; + ret = (void *)__get_free_pages(GFP_KERNEL, old_shift + 1); + if (!ret) + goto out; - if (!va) { - pr_err("ioremap of 0x%llX failed!\n", - (unsigned long long)md->phys_addr); - continue; + /* + * A first-time allocation doesn't have anything to copy. + */ + if (!old_memmap) + return ret; + + memcpy(ret, old_memmap, PAGE_SIZE << old_shift); + +out: + free_pages((unsigned long)old_memmap, old_shift); + return ret; +} + +/* + * Map the efi memory ranges of the runtime services and update new_mmap with + * virtual addresses. + */ +static void * __init efi_map_regions(int *count, int *pg_shift) +{ + void *p, *new_memmap = NULL; + unsigned long left = 0; + efi_memory_desc_t *md; + + for (p = memmap.map; p < memmap.map_end; p += memmap.desc_size) { + md = p; + if (!(md->attribute & EFI_MEMORY_RUNTIME)) { +#ifdef CONFIG_X86_64 + if (md->type != EFI_BOOT_SERVICES_CODE && + md->type != EFI_BOOT_SERVICES_DATA) +#endif + continue; } - systab = (u64) (unsigned long) efi_phys.systab; - if (md->phys_addr <= systab && systab < end) { - systab += md->virt_addr - md->phys_addr; - efi.systab = (efi_system_table_t *) (unsigned long) systab; + efi_map_region(md); + get_systab_virt_addr(md); + + if (left < memmap.desc_size) { + new_memmap = realloc_pages(new_memmap, *pg_shift); + if (!new_memmap) + return NULL; + + left += PAGE_SIZE << *pg_shift; + (*pg_shift)++; } - new_memmap = krealloc(new_memmap, - (count + 1) * memmap.desc_size, - GFP_KERNEL); - memcpy(new_memmap + (count * memmap.desc_size), md, + + memcpy(new_memmap + (*count * memmap.desc_size), md, memmap.desc_size); - count++; + + left -= memmap.desc_size; + (*count)++; + } + + return new_memmap; +} + +static void __init kexec_enter_virtual_mode(void) +{ +#ifdef CONFIG_KEXEC + efi_memory_desc_t *md; + void *p; + + efi.systab = NULL; + + /* + * We don't do virtual mode, since we don't do runtime services, on + * non-native EFI + */ + if (!efi_is_native()) { + efi_unmap_memmap(); + return; + } + + /* + * Map efi regions which were passed via setup_data. The virt_addr is a + * fixed addr which was used in first kernel of a kexec boot. + */ + for (p = memmap.map; p < memmap.map_end; p += memmap.desc_size) { + md = p; + efi_map_region_fixed(md); /* FIXME: add error handling */ + get_systab_virt_addr(md); } + save_runtime_map(); + BUG_ON(!efi.systab); - status = phys_efi_set_virtual_address_map( - memmap.desc_size * count, - memmap.desc_size, - memmap.desc_version, - (efi_memory_desc_t *)__pa(new_memmap)); + efi_sync_low_kernel_mappings(); + + /* + * Now that EFI is in virtual mode, update the function + * pointers in the runtime service table to the new virtual addresses. + * + * Call EFI services through wrapper functions. + */ + efi.runtime_version = efi_systab.hdr.revision; + + native_runtime_setup(); + + efi.set_virtual_address_map = NULL; + + if (efi_enabled(EFI_OLD_MEMMAP) && (__supported_pte_mask & _PAGE_NX)) + runtime_code_page_mkexec(); + + /* clean DUMMY object */ + efi.set_variable(efi_dummy_name, &EFI_DUMMY_GUID, + EFI_VARIABLE_NON_VOLATILE | + EFI_VARIABLE_BOOTSERVICE_ACCESS | + EFI_VARIABLE_RUNTIME_ACCESS, + 0, NULL); +#endif +} + +/* + * This function will switch the EFI runtime services to virtual mode. + * Essentially, we look through the EFI memmap and map every region that + * has the runtime attribute bit set in its memory descriptor into the + * ->trampoline_pgd page table using a top-down VA allocation scheme. + * + * The old method which used to update that memory descriptor with the + * virtual address obtained from ioremap() is still supported when the + * kernel is booted with efi=old_map on its command line. Same old + * method enabled the runtime services to be called without having to + * thunk back into physical mode for every invocation. + * + * The new method does a pagetable switch in a preemption-safe manner + * so that we're in a different address space when calling a runtime + * function. For function arguments passing we do copy the PGDs of the + * kernel page table into ->trampoline_pgd prior to each call. + * + * Specially for kexec boot, efi runtime maps in previous kernel should + * be passed in via setup_data. In that case runtime ranges will be mapped + * to the same virtual addresses as the first kernel, see + * kexec_enter_virtual_mode(). + */ +static void __init __efi_enter_virtual_mode(void) +{ + int count = 0, pg_shift = 0; + void *new_memmap = NULL; + efi_status_t status; + + efi.systab = NULL; + + efi_merge_regions(); + new_memmap = efi_map_regions(&count, &pg_shift); + if (!new_memmap) { + pr_err("Error reallocating memory, EFI runtime non-functional!\n"); + return; + } + + save_runtime_map(); + + BUG_ON(!efi.systab); + + if (efi_setup_page_tables(__pa(new_memmap), 1 << pg_shift)) + return; + + efi_sync_low_kernel_mappings(); + efi_dump_pagetable(); + + if (efi_is_native()) { + status = phys_efi_set_virtual_address_map( + memmap.desc_size * count, + memmap.desc_size, + memmap.desc_version, + (efi_memory_desc_t *)__pa(new_memmap)); + } else { + status = efi_thunk_set_virtual_address_map( + efi_phys.set_virtual_address_map, + memmap.desc_size * count, + memmap.desc_size, + memmap.desc_version, + (efi_memory_desc_t *)__pa(new_memmap)); + } if (status != EFI_SUCCESS) { - pr_alert("Unable to switch EFI into virtual mode " - "(status=%lx)!\n", status); + pr_alert("Unable to switch EFI into virtual mode (status=%lx)!\n", + status); panic("EFI call to SetVirtualAddressMap() failed!"); } @@ -973,23 +1140,43 @@ void __init efi_enter_virtual_mode(void) * Call EFI services through wrapper functions. */ efi.runtime_version = efi_systab.hdr.revision; - efi.get_time = virt_efi_get_time; - efi.set_time = virt_efi_set_time; - efi.get_wakeup_time = virt_efi_get_wakeup_time; - efi.set_wakeup_time = virt_efi_set_wakeup_time; - efi.get_variable = virt_efi_get_variable; - efi.get_next_variable = virt_efi_get_next_variable; - efi.set_variable = virt_efi_set_variable; - efi.get_next_high_mono_count = virt_efi_get_next_high_mono_count; - efi.reset_system = virt_efi_reset_system; + + if (efi_is_native()) + native_runtime_setup(); + else + efi_thunk_runtime_setup(); + efi.set_virtual_address_map = NULL; - efi.query_variable_info = virt_efi_query_variable_info; - efi.update_capsule = virt_efi_update_capsule; - efi.query_capsule_caps = virt_efi_query_capsule_caps; - if (__supported_pte_mask & _PAGE_NX) - runtime_code_page_mkexec(); - kfree(new_memmap); + efi_runtime_mkexec(); + + /* + * We mapped the descriptor array into the EFI pagetable above but we're + * not unmapping it here. Here's why: + * + * We're copying select PGDs from the kernel page table to the EFI page + * table and when we do so and make changes to those PGDs like unmapping + * stuff from them, those changes appear in the kernel page table and we + * go boom. + * + * From setup_real_mode(): + * + * ... + * trampoline_pgd[0] = init_level4_pgt[pgd_index(__PAGE_OFFSET)].pgd; + * + * In this particular case, our allocation is in PGD 0 of the EFI page + * table but we've copied that PGD from PGD[272] of the EFI page table: + * + * pgd_index(__PAGE_OFFSET = 0xffff880000000000) = 272 + * + * where the direct memory mapping in kernel space is. + * + * new_memmap's VA comes from that direct mapping and thus clearing it, + * it would get cleared in the kernel page table too. + * + * efi_cleanup_page_tables(__pa(new_memmap), 1 << pg_shift); + */ + free_pages((unsigned long)new_memmap, pg_shift); /* clean DUMMY object */ efi.set_variable(efi_dummy_name, &EFI_DUMMY_GUID, @@ -999,6 +1186,14 @@ void __init efi_enter_virtual_mode(void) 0, NULL); } +void __init efi_enter_virtual_mode(void) +{ + if (efi_setup) + kexec_enter_virtual_mode(); + else + __efi_enter_virtual_mode(); +} + /* * Convenience functions to obtain memory types and attributes */ @@ -1036,9 +1231,8 @@ u64 efi_mem_attributes(unsigned long phys_addr) } /* - * Some firmware has serious problems when using more than 50% of the EFI - * variable store, i.e. it triggers bugs that can brick machines. Ensure that - * we never use more than this safe limit. + * Some firmware implementations refuse to boot if there's insufficient space + * in the variable store. Ensure that we never use more than a safe limit. * * Return EFI_SUCCESS if it is safe to write 'size' bytes to the variable * store. @@ -1057,10 +1251,9 @@ efi_status_t efi_query_variable_store(u32 attributes, unsigned long size) return status; /* - * Some firmware implementations refuse to boot if there's insufficient - * space in the variable store. We account for that by refusing the - * write if permitting it would reduce the available space to under - * 5KB. This figure was provided by Samsung, so should be safe. + * We account for that by refusing the write if permitting it would + * reduce the available space to under 5KB. This figure was provided by + * Samsung, so should be safe. */ if ((remaining_size - size < EFI_MIN_RESERVE) && !efi_no_storage_paranoia) { @@ -1116,3 +1309,34 @@ efi_status_t efi_query_variable_store(u32 attributes, unsigned long size) return EFI_SUCCESS; } EXPORT_SYMBOL_GPL(efi_query_variable_store); + +static int __init parse_efi_cmdline(char *str) +{ + if (*str == '=') + str++; + + if (!strncmp(str, "old_map", 7)) + set_bit(EFI_OLD_MEMMAP, &efi.flags); + + return 0; +} +early_param("efi", parse_efi_cmdline); + +void __init efi_apply_memmap_quirks(void) +{ + /* + * Once setup is done earlier, unmap the EFI memory map on mismatched + * firmware/kernel architectures since there is no support for runtime + * services. + */ + if (!efi_runtime_supported()) { + pr_info("efi: Setup done, disabling due to 32/64-bit mismatch\n"); + efi_unmap_memmap(); + } + + /* + * UV doesn't support the new EFI pagetable mapping yet. + */ + if (is_uv_system()) + set_bit(EFI_OLD_MEMMAP, &efi.flags); +} |
