diff options
Diffstat (limited to 'arch/x86/platform/efi')
| -rw-r--r-- | arch/x86/platform/efi/Makefile | 2 | ||||
| -rw-r--r-- | arch/x86/platform/efi/early_printk.c | 236 | ||||
| -rw-r--r-- | arch/x86/platform/efi/efi-bgrt.c | 12 | ||||
| -rw-r--r-- | arch/x86/platform/efi/efi.c | 821 | ||||
| -rw-r--r-- | arch/x86/platform/efi/efi_32.c | 23 | ||||
| -rw-r--r-- | arch/x86/platform/efi/efi_64.c | 489 | ||||
| -rw-r--r-- | arch/x86/platform/efi/efi_stub_64.S | 271 | ||||
| -rw-r--r-- | arch/x86/platform/efi/efi_thunk_64.S | 65 | 
8 files changed, 1549 insertions, 370 deletions
diff --git a/arch/x86/platform/efi/Makefile b/arch/x86/platform/efi/Makefile index 6db1cc4c753..d51045afcaa 100644 --- a/arch/x86/platform/efi/Makefile +++ b/arch/x86/platform/efi/Makefile @@ -1,2 +1,4 @@  obj-$(CONFIG_EFI) 		+= efi.o efi_$(BITS).o efi_stub_$(BITS).o  obj-$(CONFIG_ACPI_BGRT) += efi-bgrt.o +obj-$(CONFIG_EARLY_PRINTK_EFI)	+= early_printk.o +obj-$(CONFIG_EFI_MIXED)		+= efi_thunk_$(BITS).o diff --git a/arch/x86/platform/efi/early_printk.c b/arch/x86/platform/efi/early_printk.c new file mode 100644 index 00000000000..52414211729 --- /dev/null +++ b/arch/x86/platform/efi/early_printk.c @@ -0,0 +1,236 @@ +/* + * Copyright (C) 2013 Intel Corporation; author Matt Fleming + * + *  This file is part of the Linux kernel, and is made available under + *  the terms of the GNU General Public License version 2. + */ + +#include <linux/console.h> +#include <linux/efi.h> +#include <linux/font.h> +#include <linux/io.h> +#include <linux/kernel.h> +#include <asm/setup.h> + +static const struct font_desc *font; +static u32 efi_x, efi_y; +static void *efi_fb; +static bool early_efi_keep; + +/* + * efi earlyprintk need use early_ioremap to map the framebuffer. + * But early_ioremap is not usable for earlyprintk=efi,keep, ioremap should + * be used instead. ioremap will be available after paging_init() which is + * earlier than initcall callbacks. Thus adding this early initcall function + * early_efi_map_fb to map the whole efi framebuffer. + */ +static __init int early_efi_map_fb(void) +{ +	unsigned long base, size; + +	if (!early_efi_keep) +		return 0; + +	base = boot_params.screen_info.lfb_base; +	size = boot_params.screen_info.lfb_size; +	efi_fb = ioremap(base, size); + +	return efi_fb ? 0 : -ENOMEM; +} +early_initcall(early_efi_map_fb); + +/* + * early_efi_map maps efi framebuffer region [start, start + len -1] + * In case earlyprintk=efi,keep we have the whole framebuffer mapped already + * so just return the offset efi_fb + start. + */ +static __init_refok void *early_efi_map(unsigned long start, unsigned long len) +{ +	unsigned long base; + +	base = boot_params.screen_info.lfb_base; + +	if (efi_fb) +		return (efi_fb + start); +	else +		return early_ioremap(base + start, len); +} + +static __init_refok void early_efi_unmap(void *addr, unsigned long len) +{ +	if (!efi_fb) +		early_iounmap(addr, len); +} + +static void early_efi_clear_scanline(unsigned int y) +{ +	unsigned long *dst; +	u16 len; + +	len = boot_params.screen_info.lfb_linelength; +	dst = early_efi_map(y*len, len); +	if (!dst) +		return; + +	memset(dst, 0, len); +	early_efi_unmap(dst, len); +} + +static void early_efi_scroll_up(void) +{ +	unsigned long *dst, *src; +	u16 len; +	u32 i, height; + +	len = boot_params.screen_info.lfb_linelength; +	height = boot_params.screen_info.lfb_height; + +	for (i = 0; i < height - font->height; i++) { +		dst = early_efi_map(i*len, len); +		if (!dst) +			return; + +		src = early_efi_map((i + font->height) * len, len); +		if (!src) { +			early_efi_unmap(dst, len); +			return; +		} + +		memmove(dst, src, len); + +		early_efi_unmap(src, len); +		early_efi_unmap(dst, len); +	} +} + +static void early_efi_write_char(u32 *dst, unsigned char c, unsigned int h) +{ +	const u32 color_black = 0x00000000; +	const u32 color_white = 0x00ffffff; +	const u8 *src; +	u8 s8; +	int m; + +	src = font->data + c * font->height; +	s8 = *(src + h); + +	for (m = 0; m < 8; m++) { +		if ((s8 >> (7 - m)) & 1) +			*dst = color_white; +		else +			*dst = color_black; +		dst++; +	} +} + +static void +early_efi_write(struct console *con, const char *str, unsigned int num) +{ +	struct screen_info *si; +	unsigned int len; +	const char *s; +	void *dst; + +	si = &boot_params.screen_info; +	len = si->lfb_linelength; + +	while (num) { +		unsigned int linemax; +		unsigned int h, count = 0; + +		for (s = str; *s && *s != '\n'; s++) { +			if (count == num) +				break; +			count++; +		} + +		linemax = (si->lfb_width - efi_x) / font->width; +		if (count > linemax) +			count = linemax; + +		for (h = 0; h < font->height; h++) { +			unsigned int n, x; + +			dst = early_efi_map((efi_y + h) * len, len); +			if (!dst) +				return; + +			s = str; +			n = count; +			x = efi_x; + +			while (n-- > 0) { +				early_efi_write_char(dst + x*4, *s, h); +				x += font->width; +				s++; +			} + +			early_efi_unmap(dst, len); +		} + +		num -= count; +		efi_x += count * font->width; +		str += count; + +		if (num > 0 && *s == '\n') { +			efi_x = 0; +			efi_y += font->height; +			str++; +			num--; +		} + +		if (efi_x >= si->lfb_width) { +			efi_x = 0; +			efi_y += font->height; +		} + +		if (efi_y + font->height > si->lfb_height) { +			u32 i; + +			efi_y -= font->height; +			early_efi_scroll_up(); + +			for (i = 0; i < font->height; i++) +				early_efi_clear_scanline(efi_y + i); +		} +	} +} + +static __init int early_efi_setup(struct console *con, char *options) +{ +	struct screen_info *si; +	u16 xres, yres; +	u32 i; + +	si = &boot_params.screen_info; +	xres = si->lfb_width; +	yres = si->lfb_height; + +	/* +	 * early_efi_write_char() implicitly assumes a framebuffer with +	 * 32-bits per pixel. +	 */ +	if (si->lfb_depth != 32) +		return -ENODEV; + +	font = get_default_font(xres, yres, -1, -1); +	if (!font) +		return -ENODEV; + +	efi_y = rounddown(yres, font->height) - font->height; +	for (i = 0; i < (yres - efi_y) / font->height; i++) +		early_efi_scroll_up(); + +	/* early_console_register will unset CON_BOOT in case ,keep */ +	if (!(con->flags & CON_BOOT)) +		early_efi_keep = true; +	return 0; +} + +struct console early_efi_console = { +	.name =		"earlyefi", +	.write =	early_efi_write, +	.setup =	early_efi_setup, +	.flags =	CON_PRINTBUFFER, +	.index =	-1, +}; diff --git a/arch/x86/platform/efi/efi-bgrt.c b/arch/x86/platform/efi/efi-bgrt.c index 7145ec63c52..f15103dff4b 100644 --- a/arch/x86/platform/efi/efi-bgrt.c +++ b/arch/x86/platform/efi/efi-bgrt.c @@ -42,14 +42,15 @@ void __init efi_bgrt_init(void)  	if (bgrt_tab->header.length < sizeof(*bgrt_tab))  		return; -	if (bgrt_tab->version != 1) +	if (bgrt_tab->version != 1 || bgrt_tab->status != 1)  		return;  	if (bgrt_tab->image_type != 0 || !bgrt_tab->image_address)  		return;  	image = efi_lookup_mapped_addr(bgrt_tab->image_address);  	if (!image) { -		image = ioremap(bgrt_tab->image_address, sizeof(bmp_header)); +		image = early_memremap(bgrt_tab->image_address, +				       sizeof(bmp_header));  		ioremapped = true;  		if (!image)  			return; @@ -57,7 +58,7 @@ void __init efi_bgrt_init(void)  	memcpy_fromio(&bmp_header, image, sizeof(bmp_header));  	if (ioremapped) -		iounmap(image); +		early_iounmap(image, sizeof(bmp_header));  	bgrt_image_size = bmp_header.size;  	bgrt_image = kmalloc(bgrt_image_size, GFP_KERNEL); @@ -65,7 +66,8 @@ void __init efi_bgrt_init(void)  		return;  	if (ioremapped) { -		image = ioremap(bgrt_tab->image_address, bmp_header.size); +		image = early_memremap(bgrt_tab->image_address, +				       bmp_header.size);  		if (!image) {  			kfree(bgrt_image);  			bgrt_image = NULL; @@ -75,5 +77,5 @@ void __init efi_bgrt_init(void)  	memcpy_fromio(bgrt_image, image, bgrt_image_size);  	if (ioremapped) -		iounmap(image); +		early_iounmap(image, bmp_header.size);  } diff --git a/arch/x86/platform/efi/efi.c b/arch/x86/platform/efi/efi.c index c7e22ab29a5..87fc96bcc13 100644 --- a/arch/x86/platform/efi/efi.c +++ b/arch/x86/platform/efi/efi.c @@ -12,6 +12,8 @@   *	Bibo Mao <bibo.mao@intel.com>   *	Chandramouli Narayanan <mouli@linux.intel.com>   *	Huang Ying <ying.huang@intel.com> + * Copyright (C) 2013 SuSE Labs + *	Borislav Petkov <bp@suse.de> - runtime services VA mapping   *   * Copied from efi_32.c to eliminate the duplicated code between EFI   * 32/64 support code. --ying 2007-10-26 @@ -50,8 +52,9 @@  #include <asm/tlbflush.h>  #include <asm/x86_init.h>  #include <asm/rtc.h> +#include <asm/uv/uv.h> -#define EFI_DEBUG	1 +#define EFI_DEBUG  #define EFI_MIN_RESERVE 5120 @@ -60,36 +63,21 @@  static efi_char16_t efi_dummy_name[6] = { 'D', 'U', 'M', 'M', 'Y', 0 }; -struct efi __read_mostly efi = { -	.mps        = EFI_INVALID_TABLE_ADDR, -	.acpi       = EFI_INVALID_TABLE_ADDR, -	.acpi20     = EFI_INVALID_TABLE_ADDR, -	.smbios     = EFI_INVALID_TABLE_ADDR, -	.sal_systab = EFI_INVALID_TABLE_ADDR, -	.boot_info  = EFI_INVALID_TABLE_ADDR, -	.hcdp       = EFI_INVALID_TABLE_ADDR, -	.uga        = EFI_INVALID_TABLE_ADDR, -	.uv_systab  = EFI_INVALID_TABLE_ADDR, -}; -EXPORT_SYMBOL(efi); -  struct efi_memory_map memmap;  static struct efi efi_phys __initdata;  static efi_system_table_t efi_systab __initdata; -unsigned long x86_efi_facility; +static efi_config_table_type_t arch_tables[] __initdata = { +#ifdef CONFIG_X86_UV +	{UV_SYSTEM_TABLE_GUID, "UVsystab", &efi.uv_systab}, +#endif +	{NULL_GUID, NULL, NULL}, +}; -/* - * Returns 1 if 'facility' is enabled, 0 otherwise. - */ -int efi_enabled(int facility) -{ -	return test_bit(facility, &x86_efi_facility) != 0; -} -EXPORT_SYMBOL(efi_enabled); +u64 efi_setup;		/* efi setup_data physical address */ -static bool __initdata disable_runtime = false; +static bool disable_runtime __initdata = false;  static int __init setup_noefi(char *arg)  {  	disable_runtime = true; @@ -116,14 +104,13 @@ static int __init setup_storage_paranoia(char *arg)  }  early_param("efi_no_storage_paranoia", setup_storage_paranoia); -  static efi_status_t virt_efi_get_time(efi_time_t *tm, efi_time_cap_t *tc)  {  	unsigned long flags;  	efi_status_t status;  	spin_lock_irqsave(&rtc_lock, flags); -	status = efi_call_virt2(get_time, tm, tc); +	status = efi_call_virt(get_time, tm, tc);  	spin_unlock_irqrestore(&rtc_lock, flags);  	return status;  } @@ -134,7 +121,7 @@ static efi_status_t virt_efi_set_time(efi_time_t *tm)  	efi_status_t status;  	spin_lock_irqsave(&rtc_lock, flags); -	status = efi_call_virt1(set_time, tm); +	status = efi_call_virt(set_time, tm);  	spin_unlock_irqrestore(&rtc_lock, flags);  	return status;  } @@ -147,8 +134,7 @@ static efi_status_t virt_efi_get_wakeup_time(efi_bool_t *enabled,  	efi_status_t status;  	spin_lock_irqsave(&rtc_lock, flags); -	status = efi_call_virt3(get_wakeup_time, -				enabled, pending, tm); +	status = efi_call_virt(get_wakeup_time, enabled, pending, tm);  	spin_unlock_irqrestore(&rtc_lock, flags);  	return status;  } @@ -159,8 +145,7 @@ static efi_status_t virt_efi_set_wakeup_time(efi_bool_t enabled, efi_time_t *tm)  	efi_status_t status;  	spin_lock_irqsave(&rtc_lock, flags); -	status = efi_call_virt2(set_wakeup_time, -				enabled, tm); +	status = efi_call_virt(set_wakeup_time, enabled, tm);  	spin_unlock_irqrestore(&rtc_lock, flags);  	return status;  } @@ -171,17 +156,17 @@ static efi_status_t virt_efi_get_variable(efi_char16_t *name,  					  unsigned long *data_size,  					  void *data)  { -	return efi_call_virt5(get_variable, -			      name, vendor, attr, -			      data_size, data); +	return efi_call_virt(get_variable, +			     name, vendor, attr, +			     data_size, data);  }  static efi_status_t virt_efi_get_next_variable(unsigned long *name_size,  					       efi_char16_t *name,  					       efi_guid_t *vendor)  { -	return efi_call_virt3(get_next_variable, -			      name_size, name, vendor); +	return efi_call_virt(get_next_variable, +			     name_size, name, vendor);  }  static efi_status_t virt_efi_set_variable(efi_char16_t *name, @@ -190,9 +175,9 @@ static efi_status_t virt_efi_set_variable(efi_char16_t *name,  					  unsigned long data_size,  					  void *data)  { -	return efi_call_virt5(set_variable, -			      name, vendor, attr, -			      data_size, data); +	return efi_call_virt(set_variable, +			     name, vendor, attr, +			     data_size, data);  }  static efi_status_t virt_efi_query_variable_info(u32 attr, @@ -203,13 +188,13 @@ static efi_status_t virt_efi_query_variable_info(u32 attr,  	if (efi.runtime_version < EFI_2_00_SYSTEM_TABLE_REVISION)  		return EFI_UNSUPPORTED; -	return efi_call_virt4(query_variable_info, attr, storage_space, -			      remaining_space, max_variable_size); +	return efi_call_virt(query_variable_info, attr, storage_space, +			     remaining_space, max_variable_size);  }  static efi_status_t virt_efi_get_next_high_mono_count(u32 *count)  { -	return efi_call_virt1(get_next_high_mono_count, count); +	return efi_call_virt(get_next_high_mono_count, count);  }  static void virt_efi_reset_system(int reset_type, @@ -217,8 +202,8 @@ static void virt_efi_reset_system(int reset_type,  				  unsigned long data_size,  				  efi_char16_t *data)  { -	efi_call_virt4(reset_system, reset_type, status, -		       data_size, data); +	__efi_call_virt(reset_system, reset_type, status, +			data_size, data);  }  static efi_status_t virt_efi_update_capsule(efi_capsule_header_t **capsules, @@ -228,7 +213,7 @@ static efi_status_t virt_efi_update_capsule(efi_capsule_header_t **capsules,  	if (efi.runtime_version < EFI_2_00_SYSTEM_TABLE_REVISION)  		return EFI_UNSUPPORTED; -	return efi_call_virt3(update_capsule, capsules, count, sg_list); +	return efi_call_virt(update_capsule, capsules, count, sg_list);  }  static efi_status_t virt_efi_query_capsule_caps(efi_capsule_header_t **capsules, @@ -239,8 +224,8 @@ static efi_status_t virt_efi_query_capsule_caps(efi_capsule_header_t **capsules,  	if (efi.runtime_version < EFI_2_00_SYSTEM_TABLE_REVISION)  		return EFI_UNSUPPORTED; -	return efi_call_virt4(query_capsule_caps, capsules, count, max_size, -			      reset_type); +	return efi_call_virt(query_capsule_caps, capsules, count, max_size, +			     reset_type);  }  static efi_status_t __init phys_efi_set_virtual_address_map( @@ -252,34 +237,19 @@ static efi_status_t __init phys_efi_set_virtual_address_map(  	efi_status_t status;  	efi_call_phys_prelog(); -	status = efi_call_phys4(efi_phys.set_virtual_address_map, -				memory_map_size, descriptor_size, -				descriptor_version, virtual_map); +	status = efi_call_phys(efi_phys.set_virtual_address_map, +			       memory_map_size, descriptor_size, +			       descriptor_version, virtual_map);  	efi_call_phys_epilog();  	return status;  } -static efi_status_t __init phys_efi_get_time(efi_time_t *tm, -					     efi_time_cap_t *tc) -{ -	unsigned long flags; -	efi_status_t status; - -	spin_lock_irqsave(&rtc_lock, flags); -	efi_call_phys_prelog(); -	status = efi_call_phys2(efi_phys.get_time, virt_to_phys(tm), -				virt_to_phys(tc)); -	efi_call_phys_epilog(); -	spin_unlock_irqrestore(&rtc_lock, flags); -	return status; -} -  int efi_set_rtc_mmss(const struct timespec *now)  {  	unsigned long nowtime = now->tv_sec; -	efi_status_t 	status; -	efi_time_t 	eft; -	efi_time_cap_t 	cap; +	efi_status_t	status; +	efi_time_t	eft; +	efi_time_cap_t	cap;  	struct rtc_time	tm;  	status = efi.get_time(&eft, &cap); @@ -297,9 +267,8 @@ int efi_set_rtc_mmss(const struct timespec *now)  		eft.second = tm.tm_sec;  		eft.nanosecond = 0;  	} else { -		printk(KERN_ERR -		       "%s: Invalid EFI RTC value: write of %lx to EFI RTC failed\n", -		       __FUNCTION__, nowtime); +		pr_err("%s: Invalid EFI RTC value: write of %lx to EFI RTC failed\n", +		       __func__, nowtime);  		return -1;  	} @@ -399,12 +368,14 @@ int __init efi_memblock_x86_reserve_range(void)  	memblock_reserve(pmap, memmap.nr_map * memmap.desc_size); +	efi.memmap = &memmap; +  	return 0;  } -#if EFI_DEBUG  static void __init print_efi_memmap(void)  { +#ifdef EFI_DEBUG  	efi_memory_desc_t *md;  	void *p;  	int i; @@ -413,14 +384,13 @@ static void __init print_efi_memmap(void)  	     p < memmap.map_end;  	     p += memmap.desc_size, i++) {  		md = p; -		pr_info("mem%02u: type=%u, attr=0x%llx, " -			"range=[0x%016llx-0x%016llx) (%lluMB)\n", +		pr_info("mem%02u: type=%u, attr=0x%llx, range=[0x%016llx-0x%016llx) (%lluMB)\n",  			i, md->type, md->attribute, md->phys_addr,  			md->phys_addr + (md->num_pages << EFI_PAGE_SHIFT),  			(md->num_pages >> (20 - EFI_PAGE_SHIFT)));  	} -}  #endif  /*  EFI_DEBUG  */ +}  void __init efi_reserve_boot_services(void)  { @@ -440,15 +410,14 @@ void __init efi_reserve_boot_services(void)  		 * - Not within any part of the kernel  		 * - Not the bios reserved area  		*/ -		if ((start+size >= __pa_symbol(_text) +		if ((start + size > __pa_symbol(_text)  				&& start <= __pa_symbol(_end)) ||  			!e820_all_mapped(start, start+size, E820_RAM) ||  			memblock_is_region_reserved(start, size)) {  			/* Could not reserve, skip it */  			md->num_pages = 0; -			memblock_dbg("Could not reserve boot range " -					"[0x%010llx-0x%010llx]\n", -						start, start+size-1); +			memblock_dbg("Could not reserve boot range [0x%010llx-0x%010llx]\n", +				     start, start+size-1);  		} else  			memblock_reserve(start, size);  	} @@ -456,7 +425,7 @@ void __init efi_reserve_boot_services(void)  void __init efi_unmap_memmap(void)  { -	clear_bit(EFI_MEMMAP, &x86_efi_facility); +	clear_bit(EFI_MEMMAP, &efi.flags);  	if (memmap.map) {  		early_iounmap(memmap.map, memmap.nr_map * memmap.desc_size);  		memmap.map = NULL; @@ -467,9 +436,6 @@ void __init efi_free_boot_services(void)  {  	void *p; -	if (!efi_is_native()) -		return; -  	for (p = memmap.map; p < memmap.map_end; p += memmap.desc_size) {  		efi_memory_desc_t *md = p;  		unsigned long long start = md->phys_addr; @@ -493,18 +459,27 @@ static int __init efi_systab_init(void *phys)  {  	if (efi_enabled(EFI_64BIT)) {  		efi_system_table_64_t *systab64; +		struct efi_setup_data *data = NULL;  		u64 tmp = 0; +		if (efi_setup) { +			data = early_memremap(efi_setup, sizeof(*data)); +			if (!data) +				return -ENOMEM; +		}  		systab64 = early_ioremap((unsigned long)phys,  					 sizeof(*systab64));  		if (systab64 == NULL) {  			pr_err("Couldn't map the system table!\n"); +			if (data) +				early_iounmap(data, sizeof(*data));  			return -ENOMEM;  		}  		efi_systab.hdr = systab64->hdr; -		efi_systab.fw_vendor = systab64->fw_vendor; -		tmp |= systab64->fw_vendor; +		efi_systab.fw_vendor = data ? (unsigned long)data->fw_vendor : +					      systab64->fw_vendor; +		tmp |= data ? data->fw_vendor : systab64->fw_vendor;  		efi_systab.fw_revision = systab64->fw_revision;  		efi_systab.con_in_handle = systab64->con_in_handle;  		tmp |= systab64->con_in_handle; @@ -518,15 +493,20 @@ static int __init efi_systab_init(void *phys)  		tmp |= systab64->stderr_handle;  		efi_systab.stderr = systab64->stderr;  		tmp |= systab64->stderr; -		efi_systab.runtime = (void *)(unsigned long)systab64->runtime; -		tmp |= systab64->runtime; +		efi_systab.runtime = data ? +				     (void *)(unsigned long)data->runtime : +				     (void *)(unsigned long)systab64->runtime; +		tmp |= data ? data->runtime : systab64->runtime;  		efi_systab.boottime = (void *)(unsigned long)systab64->boottime;  		tmp |= systab64->boottime;  		efi_systab.nr_tables = systab64->nr_tables; -		efi_systab.tables = systab64->tables; -		tmp |= systab64->tables; +		efi_systab.tables = data ? (unsigned long)data->tables : +					   systab64->tables; +		tmp |= data ? data->tables : systab64->tables;  		early_iounmap(systab64, sizeof(*systab64)); +		if (data) +			early_iounmap(data, sizeof(*data));  #ifdef CONFIG_X86_32  		if (tmp >> 32) {  			pr_err("EFI data located above 4GB, disabling EFI.\n"); @@ -570,119 +550,82 @@ static int __init efi_systab_init(void *phys)  		return -EINVAL;  	}  	if ((efi.systab->hdr.revision >> 16) == 0) -		pr_err("Warning: System table version " -		       "%d.%02d, expected 1.00 or greater!\n", +		pr_err("Warning: System table version %d.%02d, expected 1.00 or greater!\n",  		       efi.systab->hdr.revision >> 16,  		       efi.systab->hdr.revision & 0xffff); +	set_bit(EFI_SYSTEM_TABLES, &efi.flags); +  	return 0;  } -static int __init efi_config_init(u64 tables, int nr_tables) +static int __init efi_runtime_init32(void)  { -	void *config_tables, *tablep; -	int i, sz; - -	if (efi_enabled(EFI_64BIT)) -		sz = sizeof(efi_config_table_64_t); -	else -		sz = sizeof(efi_config_table_32_t); +	efi_runtime_services_32_t *runtime; -	/* -	 * Let's see what config tables the firmware passed to us. -	 */ -	config_tables = early_ioremap(tables, nr_tables * sz); -	if (config_tables == NULL) { -		pr_err("Could not map Configuration table!\n"); +	runtime = early_ioremap((unsigned long)efi.systab->runtime, +			sizeof(efi_runtime_services_32_t)); +	if (!runtime) { +		pr_err("Could not map the runtime service table!\n");  		return -ENOMEM;  	} -	tablep = config_tables; -	pr_info(""); -	for (i = 0; i < efi.systab->nr_tables; i++) { -		efi_guid_t guid; -		unsigned long table; +	/* +	 * We will only need *early* access to the following two +	 * EFI runtime services before set_virtual_address_map +	 * is invoked. +	 */ +	efi_phys.set_virtual_address_map = +			(efi_set_virtual_address_map_t *) +			(unsigned long)runtime->set_virtual_address_map; +	early_iounmap(runtime, sizeof(efi_runtime_services_32_t)); -		if (efi_enabled(EFI_64BIT)) { -			u64 table64; -			guid = ((efi_config_table_64_t *)tablep)->guid; -			table64 = ((efi_config_table_64_t *)tablep)->table; -			table = table64; -#ifdef CONFIG_X86_32 -			if (table64 >> 32) { -				pr_cont("\n"); -				pr_err("Table located above 4GB, disabling EFI.\n"); -				early_iounmap(config_tables, -					      efi.systab->nr_tables * sz); -				return -EINVAL; -			} -#endif -		} else { -			guid = ((efi_config_table_32_t *)tablep)->guid; -			table = ((efi_config_table_32_t *)tablep)->table; -		} -		if (!efi_guidcmp(guid, MPS_TABLE_GUID)) { -			efi.mps = table; -			pr_cont(" MPS=0x%lx ", table); -		} else if (!efi_guidcmp(guid, ACPI_20_TABLE_GUID)) { -			efi.acpi20 = table; -			pr_cont(" ACPI 2.0=0x%lx ", table); -		} else if (!efi_guidcmp(guid, ACPI_TABLE_GUID)) { -			efi.acpi = table; -			pr_cont(" ACPI=0x%lx ", table); -		} else if (!efi_guidcmp(guid, SMBIOS_TABLE_GUID)) { -			efi.smbios = table; -			pr_cont(" SMBIOS=0x%lx ", table); -#ifdef CONFIG_X86_UV -		} else if (!efi_guidcmp(guid, UV_SYSTEM_TABLE_GUID)) { -			efi.uv_systab = table; -			pr_cont(" UVsystab=0x%lx ", table); -#endif -		} else if (!efi_guidcmp(guid, HCDP_TABLE_GUID)) { -			efi.hcdp = table; -			pr_cont(" HCDP=0x%lx ", table); -		} else if (!efi_guidcmp(guid, UGA_IO_PROTOCOL_GUID)) { -			efi.uga = table; -			pr_cont(" UGA=0x%lx ", table); -		} -		tablep += sz; -	} -	pr_cont("\n"); -	early_iounmap(config_tables, efi.systab->nr_tables * sz);  	return 0;  } -static int __init efi_runtime_init(void) +static int __init efi_runtime_init64(void)  { -	efi_runtime_services_t *runtime; +	efi_runtime_services_64_t *runtime; -	/* -	 * Check out the runtime services table. We need to map -	 * the runtime services table so that we can grab the physical -	 * address of several of the EFI runtime functions, needed to -	 * set the firmware into virtual mode. -	 */  	runtime = early_ioremap((unsigned long)efi.systab->runtime, -				sizeof(efi_runtime_services_t)); +			sizeof(efi_runtime_services_64_t));  	if (!runtime) {  		pr_err("Could not map the runtime service table!\n");  		return -ENOMEM;  	} +  	/* -	 * We will only need *early* access to the following -	 * two EFI runtime services before set_virtual_address_map +	 * We will only need *early* access to the following two +	 * EFI runtime services before set_virtual_address_map  	 * is invoked.  	 */ -	efi_phys.get_time = (efi_get_time_t *)runtime->get_time;  	efi_phys.set_virtual_address_map = -		(efi_set_virtual_address_map_t *) -		runtime->set_virtual_address_map; +			(efi_set_virtual_address_map_t *) +			(unsigned long)runtime->set_virtual_address_map; +	early_iounmap(runtime, sizeof(efi_runtime_services_64_t)); + +	return 0; +} + +static int __init efi_runtime_init(void) +{ +	int rv; +  	/* -	 * Make efi_get_time can be called before entering -	 * virtual mode. +	 * Check out the runtime services table. We need to map +	 * the runtime services table so that we can grab the physical +	 * address of several of the EFI runtime functions, needed to +	 * set the firmware into virtual mode.  	 */ -	efi.get_time = phys_efi_get_time; -	early_iounmap(runtime, sizeof(efi_runtime_services_t)); +	if (efi_enabled(EFI_64BIT)) +		rv = efi_runtime_init64(); +	else +		rv = efi_runtime_init32(); + +	if (rv) +		return rv; + +	set_bit(EFI_RUNTIME_SERVICES, &efi.flags);  	return 0;  } @@ -701,9 +644,67 @@ static int __init efi_memmap_init(void)  	if (add_efi_memmap)  		do_add_efi_memmap(); +	set_bit(EFI_MEMMAP, &efi.flags); +  	return 0;  } +/* + * A number of config table entries get remapped to virtual addresses + * after entering EFI virtual mode. However, the kexec kernel requires + * their physical addresses therefore we pass them via setup_data and + * correct those entries to their respective physical addresses here. + * + * Currently only handles smbios which is necessary for some firmware + * implementation. + */ +static int __init efi_reuse_config(u64 tables, int nr_tables) +{ +	int i, sz, ret = 0; +	void *p, *tablep; +	struct efi_setup_data *data; + +	if (!efi_setup) +		return 0; + +	if (!efi_enabled(EFI_64BIT)) +		return 0; + +	data = early_memremap(efi_setup, sizeof(*data)); +	if (!data) { +		ret = -ENOMEM; +		goto out; +	} + +	if (!data->smbios) +		goto out_memremap; + +	sz = sizeof(efi_config_table_64_t); + +	p = tablep = early_memremap(tables, nr_tables * sz); +	if (!p) { +		pr_err("Could not map Configuration table!\n"); +		ret = -ENOMEM; +		goto out_memremap; +	} + +	for (i = 0; i < efi.systab->nr_tables; i++) { +		efi_guid_t guid; + +		guid = ((efi_config_table_64_t *)p)->guid; + +		if (!efi_guidcmp(guid, SMBIOS_TABLE_GUID)) +			((efi_config_table_64_t *)p)->table = data->smbios; +		p += sz; +	} +	early_iounmap(tablep, nr_tables * sz); + +out_memremap: +	early_iounmap(data, sizeof(*data)); +out: +	return ret; +} +  void __init efi_init(void)  {  	efi_char16_t *c16; @@ -727,7 +728,11 @@ void __init efi_init(void)  	if (efi_systab_init(efi_phys.systab))  		return; -	set_bit(EFI_SYSTEM_TABLES, &x86_efi_facility); +	set_bit(EFI_SYSTEM_TABLES, &efi.flags); + +	efi.config_table = (unsigned long)efi.systab->tables; +	efi.fw_vendor	 = (unsigned long)efi.systab->fw_vendor; +	efi.runtime	 = (unsigned long)efi.systab->runtime;  	/*  	 * Show what we know for posterity @@ -745,39 +750,29 @@ void __init efi_init(void)  		efi.systab->hdr.revision >> 16,  		efi.systab->hdr.revision & 0xffff, vendor); -	if (efi_config_init(efi.systab->tables, efi.systab->nr_tables)) +	if (efi_reuse_config(efi.systab->tables, efi.systab->nr_tables))  		return; -	set_bit(EFI_CONFIG_TABLES, &x86_efi_facility); +	if (efi_config_init(arch_tables)) +		return;  	/*  	 * Note: We currently don't support runtime services on an EFI  	 * that doesn't match the kernel 32/64-bit mode.  	 */ -	if (!efi_is_native()) +	if (!efi_runtime_supported())  		pr_info("No EFI runtime due to 32/64-bit mismatch with kernel\n");  	else {  		if (disable_runtime || efi_runtime_init())  			return; -		set_bit(EFI_RUNTIME_SERVICES, &x86_efi_facility);  	} -  	if (efi_memmap_init())  		return; -	set_bit(EFI_MEMMAP, &x86_efi_facility); +	set_bit(EFI_MEMMAP, &efi.flags); -#ifdef CONFIG_X86_32 -	if (efi_is_native()) { -		x86_platform.get_wallclock = efi_get_time; -		x86_platform.set_wallclock = efi_set_rtc_mmss; -	} -#endif - -#if EFI_DEBUG  	print_efi_memmap(); -#endif  }  void __init efi_late_init(void) @@ -800,7 +795,7 @@ void __init efi_set_executable(efi_memory_desc_t *md, bool executable)  		set_memory_nx(addr, npages);  } -static void __init runtime_code_page_mkexec(void) +void __init runtime_code_page_mkexec(void)  {  	efi_memory_desc_t *md;  	void *p; @@ -816,34 +811,6 @@ static void __init runtime_code_page_mkexec(void)  	}  } -/* - * We can't ioremap data in EFI boot services RAM, because we've already mapped - * it as RAM.  So, look it up in the existing EFI memory map instead.  Only - * callable after efi_enter_virtual_mode and before efi_free_boot_services. - */ -void __iomem *efi_lookup_mapped_addr(u64 phys_addr) -{ -	void *p; -	if (WARN_ON(!memmap.map)) -		return NULL; -	for (p = memmap.map; p < memmap.map_end; p += memmap.desc_size) { -		efi_memory_desc_t *md = p; -		u64 size = md->num_pages << EFI_PAGE_SHIFT; -		u64 end = md->phys_addr + size; -		if (!(md->attribute & EFI_MEMORY_RUNTIME) && -		    md->type != EFI_BOOT_SERVICES_CODE && -		    md->type != EFI_BOOT_SERVICES_DATA) -			continue; -		if (!md->virt_addr) -			continue; -		if (phys_addr >= md->phys_addr && phys_addr < end) { -			phys_addr += md->virt_addr - md->phys_addr; -			return (__force void __iomem *)(unsigned long)phys_addr; -		} -	} -	return NULL; -} -  void efi_memory_uc(u64 addr, unsigned long size)  {  	unsigned long page_shift = 1UL << EFI_PAGE_SHIFT; @@ -854,36 +821,54 @@ void efi_memory_uc(u64 addr, unsigned long size)  	set_memory_uc(addr, npages);  } -/* - * This function will switch the EFI runtime services to virtual mode. - * Essentially, look through the EFI memmap and map every region that - * has the runtime attribute bit set in its memory descriptor and update - * that memory descriptor with the virtual address obtained from ioremap(). - * This enables the runtime services to be called without having to - * thunk back into physical mode for every invocation. - */ -void __init efi_enter_virtual_mode(void) +void __init old_map_region(efi_memory_desc_t *md)  { -	efi_memory_desc_t *md, *prev_md = NULL; -	efi_status_t status; +	u64 start_pfn, end_pfn, end;  	unsigned long size; -	u64 end, systab, start_pfn, end_pfn; -	void *p, *va, *new_memmap = NULL; -	int count = 0; +	void *va; -	efi.systab = NULL; +	start_pfn = PFN_DOWN(md->phys_addr); +	size	  = md->num_pages << PAGE_SHIFT; +	end	  = md->phys_addr + size; +	end_pfn   = PFN_UP(end); -	/* -	 * We don't do virtual mode, since we don't do runtime services, on -	 * non-native EFI -	 */ +	if (pfn_range_is_mapped(start_pfn, end_pfn)) { +		va = __va(md->phys_addr); -	if (!efi_is_native()) { -		efi_unmap_memmap(); -		return; -	} +		if (!(md->attribute & EFI_MEMORY_WB)) +			efi_memory_uc((u64)(unsigned long)va, size); +	} else +		va = efi_ioremap(md->phys_addr, size, +				 md->type, md->attribute); + +	md->virt_addr = (u64) (unsigned long) va; +	if (!va) +		pr_err("ioremap of 0x%llX failed!\n", +		       (unsigned long long)md->phys_addr); +} + +static void native_runtime_setup(void) +{ +	efi.get_time = virt_efi_get_time; +	efi.set_time = virt_efi_set_time; +	efi.get_wakeup_time = virt_efi_get_wakeup_time; +	efi.set_wakeup_time = virt_efi_set_wakeup_time; +	efi.get_variable = virt_efi_get_variable; +	efi.get_next_variable = virt_efi_get_next_variable; +	efi.set_variable = virt_efi_set_variable; +	efi.get_next_high_mono_count = virt_efi_get_next_high_mono_count; +	efi.reset_system = virt_efi_reset_system; +	efi.query_variable_info = virt_efi_query_variable_info; +	efi.update_capsule = virt_efi_update_capsule; +	efi.query_capsule_caps = virt_efi_query_capsule_caps; +} + +/* Merge contiguous regions of the same type and attribute */ +static void __init efi_merge_regions(void) +{ +	void *p; +	efi_memory_desc_t *md, *prev_md = NULL; -	/* Merge contiguous regions of the same type and attribute */  	for (p = memmap.map; p < memmap.map_end; p += memmap.desc_size) {  		u64 prev_size;  		md = p; @@ -909,6 +894,87 @@ void __init efi_enter_virtual_mode(void)  		}  		prev_md = md;  	} +} + +static void __init get_systab_virt_addr(efi_memory_desc_t *md) +{ +	unsigned long size; +	u64 end, systab; + +	size = md->num_pages << EFI_PAGE_SHIFT; +	end = md->phys_addr + size; +	systab = (u64)(unsigned long)efi_phys.systab; +	if (md->phys_addr <= systab && systab < end) { +		systab += md->virt_addr - md->phys_addr; +		efi.systab = (efi_system_table_t *)(unsigned long)systab; +	} +} + +static void __init save_runtime_map(void) +{ +#ifdef CONFIG_KEXEC +	efi_memory_desc_t *md; +	void *tmp, *p, *q = NULL; +	int count = 0; + +	if (efi_enabled(EFI_OLD_MEMMAP)) +		return; + +	for (p = memmap.map; p < memmap.map_end; p += memmap.desc_size) { +		md = p; + +		if (!(md->attribute & EFI_MEMORY_RUNTIME) || +		    (md->type == EFI_BOOT_SERVICES_CODE) || +		    (md->type == EFI_BOOT_SERVICES_DATA)) +			continue; +		tmp = krealloc(q, (count + 1) * memmap.desc_size, GFP_KERNEL); +		if (!tmp) +			goto out; +		q = tmp; + +		memcpy(q + count * memmap.desc_size, md, memmap.desc_size); +		count++; +	} + +	efi_runtime_map_setup(q, count, memmap.desc_size); +	return; + +out: +	kfree(q); +	pr_err("Error saving runtime map, efi runtime on kexec non-functional!!\n"); +#endif +} + +static void *realloc_pages(void *old_memmap, int old_shift) +{ +	void *ret; + +	ret = (void *)__get_free_pages(GFP_KERNEL, old_shift + 1); +	if (!ret) +		goto out; + +	/* +	 * A first-time allocation doesn't have anything to copy. +	 */ +	if (!old_memmap) +		return ret; + +	memcpy(ret, old_memmap, PAGE_SIZE << old_shift); + +out: +	free_pages((unsigned long)old_memmap, old_shift); +	return ret; +} + +/* + * Map the efi memory ranges of the runtime services and update new_mmap with + * virtual addresses. + */ +static void * __init efi_map_regions(int *count, int *pg_shift) +{ +	void *p, *new_memmap = NULL; +	unsigned long left = 0; +	efi_memory_desc_t *md;  	for (p = memmap.map; p < memmap.map_end; p += memmap.desc_size) {  		md = p; @@ -920,52 +986,150 @@ void __init efi_enter_virtual_mode(void)  				continue;  		} -		size = md->num_pages << EFI_PAGE_SHIFT; -		end = md->phys_addr + size; +		efi_map_region(md); +		get_systab_virt_addr(md); -		start_pfn = PFN_DOWN(md->phys_addr); -		end_pfn = PFN_UP(end); -		if (pfn_range_is_mapped(start_pfn, end_pfn)) { -			va = __va(md->phys_addr); +		if (left < memmap.desc_size) { +			new_memmap = realloc_pages(new_memmap, *pg_shift); +			if (!new_memmap) +				return NULL; -			if (!(md->attribute & EFI_MEMORY_WB)) -				efi_memory_uc((u64)(unsigned long)va, size); -		} else -			va = efi_ioremap(md->phys_addr, size, -					 md->type, md->attribute); - -		md->virt_addr = (u64) (unsigned long) va; - -		if (!va) { -			pr_err("ioremap of 0x%llX failed!\n", -			       (unsigned long long)md->phys_addr); -			continue; +			left += PAGE_SIZE << *pg_shift; +			(*pg_shift)++;  		} -		systab = (u64) (unsigned long) efi_phys.systab; -		if (md->phys_addr <= systab && systab < end) { -			systab += md->virt_addr - md->phys_addr; -			efi.systab = (efi_system_table_t *) (unsigned long) systab; -		} -		new_memmap = krealloc(new_memmap, -				      (count + 1) * memmap.desc_size, -				      GFP_KERNEL); -		memcpy(new_memmap + (count * memmap.desc_size), md, +		memcpy(new_memmap + (*count * memmap.desc_size), md,  		       memmap.desc_size); -		count++; + +		left -= memmap.desc_size; +		(*count)++; +	} + +	return new_memmap; +} + +static void __init kexec_enter_virtual_mode(void) +{ +#ifdef CONFIG_KEXEC +	efi_memory_desc_t *md; +	void *p; + +	efi.systab = NULL; + +	/* +	 * We don't do virtual mode, since we don't do runtime services, on +	 * non-native EFI +	 */ +	if (!efi_is_native()) { +		efi_unmap_memmap(); +		return; +	} + +	/* +	* Map efi regions which were passed via setup_data. The virt_addr is a +	* fixed addr which was used in first kernel of a kexec boot. +	*/ +	for (p = memmap.map; p < memmap.map_end; p += memmap.desc_size) { +		md = p; +		efi_map_region_fixed(md); /* FIXME: add error handling */ +		get_systab_virt_addr(md);  	} +	save_runtime_map(); +  	BUG_ON(!efi.systab); -	status = phys_efi_set_virtual_address_map( -		memmap.desc_size * count, -		memmap.desc_size, -		memmap.desc_version, -		(efi_memory_desc_t *)__pa(new_memmap)); +	efi_sync_low_kernel_mappings(); + +	/* +	 * Now that EFI is in virtual mode, update the function +	 * pointers in the runtime service table to the new virtual addresses. +	 * +	 * Call EFI services through wrapper functions. +	 */ +	efi.runtime_version = efi_systab.hdr.revision; + +	native_runtime_setup(); + +	efi.set_virtual_address_map = NULL; + +	if (efi_enabled(EFI_OLD_MEMMAP) && (__supported_pte_mask & _PAGE_NX)) +		runtime_code_page_mkexec(); + +	/* clean DUMMY object */ +	efi.set_variable(efi_dummy_name, &EFI_DUMMY_GUID, +			 EFI_VARIABLE_NON_VOLATILE | +			 EFI_VARIABLE_BOOTSERVICE_ACCESS | +			 EFI_VARIABLE_RUNTIME_ACCESS, +			 0, NULL); +#endif +} + +/* + * This function will switch the EFI runtime services to virtual mode. + * Essentially, we look through the EFI memmap and map every region that + * has the runtime attribute bit set in its memory descriptor into the + * ->trampoline_pgd page table using a top-down VA allocation scheme. + * + * The old method which used to update that memory descriptor with the + * virtual address obtained from ioremap() is still supported when the + * kernel is booted with efi=old_map on its command line. Same old + * method enabled the runtime services to be called without having to + * thunk back into physical mode for every invocation. + * + * The new method does a pagetable switch in a preemption-safe manner + * so that we're in a different address space when calling a runtime + * function. For function arguments passing we do copy the PGDs of the + * kernel page table into ->trampoline_pgd prior to each call. + * + * Specially for kexec boot, efi runtime maps in previous kernel should + * be passed in via setup_data. In that case runtime ranges will be mapped + * to the same virtual addresses as the first kernel, see + * kexec_enter_virtual_mode(). + */ +static void __init __efi_enter_virtual_mode(void) +{ +	int count = 0, pg_shift = 0; +	void *new_memmap = NULL; +	efi_status_t status; + +	efi.systab = NULL; + +	efi_merge_regions(); +	new_memmap = efi_map_regions(&count, &pg_shift); +	if (!new_memmap) { +		pr_err("Error reallocating memory, EFI runtime non-functional!\n"); +		return; +	} + +	save_runtime_map(); + +	BUG_ON(!efi.systab); + +	if (efi_setup_page_tables(__pa(new_memmap), 1 << pg_shift)) +		return; + +	efi_sync_low_kernel_mappings(); +	efi_dump_pagetable(); + +	if (efi_is_native()) { +		status = phys_efi_set_virtual_address_map( +				memmap.desc_size * count, +				memmap.desc_size, +				memmap.desc_version, +				(efi_memory_desc_t *)__pa(new_memmap)); +	} else { +		status = efi_thunk_set_virtual_address_map( +				efi_phys.set_virtual_address_map, +				memmap.desc_size * count, +				memmap.desc_size, +				memmap.desc_version, +				(efi_memory_desc_t *)__pa(new_memmap)); +	}  	if (status != EFI_SUCCESS) { -		pr_alert("Unable to switch EFI into virtual mode " -			 "(status=%lx)!\n", status); +		pr_alert("Unable to switch EFI into virtual mode (status=%lx)!\n", +			 status);  		panic("EFI call to SetVirtualAddressMap() failed!");  	} @@ -976,23 +1140,43 @@ void __init efi_enter_virtual_mode(void)  	 * Call EFI services through wrapper functions.  	 */  	efi.runtime_version = efi_systab.hdr.revision; -	efi.get_time = virt_efi_get_time; -	efi.set_time = virt_efi_set_time; -	efi.get_wakeup_time = virt_efi_get_wakeup_time; -	efi.set_wakeup_time = virt_efi_set_wakeup_time; -	efi.get_variable = virt_efi_get_variable; -	efi.get_next_variable = virt_efi_get_next_variable; -	efi.set_variable = virt_efi_set_variable; -	efi.get_next_high_mono_count = virt_efi_get_next_high_mono_count; -	efi.reset_system = virt_efi_reset_system; + +	if (efi_is_native()) +		native_runtime_setup(); +	else +		efi_thunk_runtime_setup(); +  	efi.set_virtual_address_map = NULL; -	efi.query_variable_info = virt_efi_query_variable_info; -	efi.update_capsule = virt_efi_update_capsule; -	efi.query_capsule_caps = virt_efi_query_capsule_caps; -	if (__supported_pte_mask & _PAGE_NX) -		runtime_code_page_mkexec(); -	kfree(new_memmap); +	efi_runtime_mkexec(); + +	/* +	 * We mapped the descriptor array into the EFI pagetable above but we're +	 * not unmapping it here. Here's why: +	 * +	 * We're copying select PGDs from the kernel page table to the EFI page +	 * table and when we do so and make changes to those PGDs like unmapping +	 * stuff from them, those changes appear in the kernel page table and we +	 * go boom. +	 * +	 * From setup_real_mode(): +	 * +	 * ... +	 * trampoline_pgd[0] = init_level4_pgt[pgd_index(__PAGE_OFFSET)].pgd; +	 * +	 * In this particular case, our allocation is in PGD 0 of the EFI page +	 * table but we've copied that PGD from PGD[272] of the EFI page table: +	 * +	 *	pgd_index(__PAGE_OFFSET = 0xffff880000000000) = 272 +	 * +	 * where the direct memory mapping in kernel space is. +	 * +	 * new_memmap's VA comes from that direct mapping and thus clearing it, +	 * it would get cleared in the kernel page table too. +	 * +	 * efi_cleanup_page_tables(__pa(new_memmap), 1 << pg_shift); +	 */ +	free_pages((unsigned long)new_memmap, pg_shift);  	/* clean DUMMY object */  	efi.set_variable(efi_dummy_name, &EFI_DUMMY_GUID, @@ -1002,6 +1186,14 @@ void __init efi_enter_virtual_mode(void)  			 0, NULL);  } +void __init efi_enter_virtual_mode(void) +{ +	if (efi_setup) +		kexec_enter_virtual_mode(); +	else +		__efi_enter_virtual_mode(); +} +  /*   * Convenience functions to obtain memory types and attributes   */ @@ -1039,9 +1231,8 @@ u64 efi_mem_attributes(unsigned long phys_addr)  }  /* - * Some firmware has serious problems when using more than 50% of the EFI - * variable store, i.e. it triggers bugs that can brick machines. Ensure that - * we never use more than this safe limit. + * Some firmware implementations refuse to boot if there's insufficient space + * in the variable store. Ensure that we never use more than a safe limit.   *   * Return EFI_SUCCESS if it is safe to write 'size' bytes to the variable   * store. @@ -1060,10 +1251,9 @@ efi_status_t efi_query_variable_store(u32 attributes, unsigned long size)  		return status;  	/* -	 * Some firmware implementations refuse to boot if there's insufficient -	 * space in the variable store. We account for that by refusing the -	 * write if permitting it would reduce the available space to under -	 * 5KB. This figure was provided by Samsung, so should be safe. +	 * We account for that by refusing the write if permitting it would +	 * reduce the available space to under 5KB. This figure was provided by +	 * Samsung, so should be safe.  	 */  	if ((remaining_size - size < EFI_MIN_RESERVE) &&  		!efi_no_storage_paranoia) { @@ -1119,3 +1309,34 @@ efi_status_t efi_query_variable_store(u32 attributes, unsigned long size)  	return EFI_SUCCESS;  }  EXPORT_SYMBOL_GPL(efi_query_variable_store); + +static int __init parse_efi_cmdline(char *str) +{ +	if (*str == '=') +		str++; + +	if (!strncmp(str, "old_map", 7)) +		set_bit(EFI_OLD_MEMMAP, &efi.flags); + +	return 0; +} +early_param("efi", parse_efi_cmdline); + +void __init efi_apply_memmap_quirks(void) +{ +	/* +	 * Once setup is done earlier, unmap the EFI memory map on mismatched +	 * firmware/kernel architectures since there is no support for runtime +	 * services. +	 */ +	if (!efi_runtime_supported()) { +		pr_info("efi: Setup done, disabling due to 32/64-bit mismatch\n"); +		efi_unmap_memmap(); +	} + +	/* +	 * UV doesn't support the new EFI pagetable mapping yet. +	 */ +	if (is_uv_system()) +		set_bit(EFI_OLD_MEMMAP, &efi.flags); +} diff --git a/arch/x86/platform/efi/efi_32.c b/arch/x86/platform/efi/efi_32.c index 40e446941dd..9ee3491e31f 100644 --- a/arch/x86/platform/efi/efi_32.c +++ b/arch/x86/platform/efi/efi_32.c @@ -37,9 +37,24 @@   * claim EFI runtime service handler exclusively and to duplicate a memory in   * low memory space say 0 - 3G.   */ -  static unsigned long efi_rt_eflags; +void efi_sync_low_kernel_mappings(void) {} +void __init efi_dump_pagetable(void) {} +int efi_setup_page_tables(unsigned long pa_memmap, unsigned num_pages) +{ +	return 0; +} +void efi_cleanup_page_tables(unsigned long pa_memmap, unsigned num_pages) {} + +void __init efi_map_region(efi_memory_desc_t *md) +{ +	old_map_region(md); +} + +void __init efi_map_region_fixed(efi_memory_desc_t *md) {} +void __init parse_efi_setup(u64 phys_addr, u32 data_len) {} +  void efi_call_phys_prelog(void)  {  	struct desc_ptr gdt_descr; @@ -67,3 +82,9 @@ void efi_call_phys_epilog(void)  	local_irq_restore(efi_rt_eflags);  } + +void __init efi_runtime_mkexec(void) +{ +	if (__supported_pte_mask & _PAGE_NX) +		runtime_code_page_mkexec(); +} diff --git a/arch/x86/platform/efi/efi_64.c b/arch/x86/platform/efi/efi_64.c index 39a0e7f1f0a..290d397e1dd 100644 --- a/arch/x86/platform/efi/efi_64.c +++ b/arch/x86/platform/efi/efi_64.c @@ -38,10 +38,30 @@  #include <asm/efi.h>  #include <asm/cacheflush.h>  #include <asm/fixmap.h> +#include <asm/realmode.h> +#include <asm/time.h>  static pgd_t *save_pgd __initdata;  static unsigned long efi_flags __initdata; +/* + * We allocate runtime services regions bottom-up, starting from -4G, i.e. + * 0xffff_ffff_0000_0000 and limit EFI VA mapping space to 64G. + */ +static u64 efi_va	= -4 * (1UL << 30); +#define EFI_VA_END	(-68 * (1UL << 30)) + +/* + * Scratch space used for switching the pagetable in the EFI stub + */ +struct efi_scratch { +	u64 r15; +	u64 prev_cr3; +	pgd_t *efi_pgt; +	bool use_pgd; +	u64 phys_stack; +} __packed; +  static void __init early_code_mapping_set_exec(int executable)  {  	efi_memory_desc_t *md; @@ -65,6 +85,9 @@ void __init efi_call_phys_prelog(void)  	int pgd;  	int n_pgds; +	if (!efi_enabled(EFI_OLD_MEMMAP)) +		return; +  	early_code_mapping_set_exec(1);  	local_irq_save(efi_flags); @@ -86,6 +109,10 @@ void __init efi_call_phys_epilog(void)  	 */  	int pgd;  	int n_pgds = DIV_ROUND_UP((max_pfn << PAGE_SHIFT) , PGDIR_SIZE); + +	if (!efi_enabled(EFI_OLD_MEMMAP)) +		return; +  	for (pgd = 0; pgd < n_pgds; pgd++)  		set_pgd(pgd_offset_k(pgd * PGDIR_SIZE), save_pgd[pgd]);  	kfree(save_pgd); @@ -94,6 +121,158 @@ void __init efi_call_phys_epilog(void)  	early_code_mapping_set_exec(0);  } +/* + * Add low kernel mappings for passing arguments to EFI functions. + */ +void efi_sync_low_kernel_mappings(void) +{ +	unsigned num_pgds; +	pgd_t *pgd = (pgd_t *)__va(real_mode_header->trampoline_pgd); + +	if (efi_enabled(EFI_OLD_MEMMAP)) +		return; + +	num_pgds = pgd_index(MODULES_END - 1) - pgd_index(PAGE_OFFSET); + +	memcpy(pgd + pgd_index(PAGE_OFFSET), +		init_mm.pgd + pgd_index(PAGE_OFFSET), +		sizeof(pgd_t) * num_pgds); +} + +int efi_setup_page_tables(unsigned long pa_memmap, unsigned num_pages) +{ +	unsigned long text; +	struct page *page; +	unsigned npages; +	pgd_t *pgd; + +	if (efi_enabled(EFI_OLD_MEMMAP)) +		return 0; + +	efi_scratch.efi_pgt = (pgd_t *)(unsigned long)real_mode_header->trampoline_pgd; +	pgd = __va(efi_scratch.efi_pgt); + +	/* +	 * It can happen that the physical address of new_memmap lands in memory +	 * which is not mapped in the EFI page table. Therefore we need to go +	 * and ident-map those pages containing the map before calling +	 * phys_efi_set_virtual_address_map(). +	 */ +	if (kernel_map_pages_in_pgd(pgd, pa_memmap, pa_memmap, num_pages, _PAGE_NX)) { +		pr_err("Error ident-mapping new memmap (0x%lx)!\n", pa_memmap); +		return 1; +	} + +	efi_scratch.use_pgd = true; + +	/* +	 * When making calls to the firmware everything needs to be 1:1 +	 * mapped and addressable with 32-bit pointers. Map the kernel +	 * text and allocate a new stack because we can't rely on the +	 * stack pointer being < 4GB. +	 */ +	if (!IS_ENABLED(CONFIG_EFI_MIXED)) +		return 0; + +	page = alloc_page(GFP_KERNEL|__GFP_DMA32); +	if (!page) +		panic("Unable to allocate EFI runtime stack < 4GB\n"); + +	efi_scratch.phys_stack = virt_to_phys(page_address(page)); +	efi_scratch.phys_stack += PAGE_SIZE; /* stack grows down */ + +	npages = (_end - _text) >> PAGE_SHIFT; +	text = __pa(_text); + +	if (kernel_map_pages_in_pgd(pgd, text >> PAGE_SHIFT, text, npages, 0)) { +		pr_err("Failed to map kernel text 1:1\n"); +		return 1; +	} + +	return 0; +} + +void efi_cleanup_page_tables(unsigned long pa_memmap, unsigned num_pages) +{ +	pgd_t *pgd = (pgd_t *)__va(real_mode_header->trampoline_pgd); + +	kernel_unmap_pages_in_pgd(pgd, pa_memmap, num_pages); +} + +static void __init __map_region(efi_memory_desc_t *md, u64 va) +{ +	pgd_t *pgd = (pgd_t *)__va(real_mode_header->trampoline_pgd); +	unsigned long pf = 0; + +	if (!(md->attribute & EFI_MEMORY_WB)) +		pf |= _PAGE_PCD; + +	if (kernel_map_pages_in_pgd(pgd, md->phys_addr, va, md->num_pages, pf)) +		pr_warn("Error mapping PA 0x%llx -> VA 0x%llx!\n", +			   md->phys_addr, va); +} + +void __init efi_map_region(efi_memory_desc_t *md) +{ +	unsigned long size = md->num_pages << PAGE_SHIFT; +	u64 pa = md->phys_addr; + +	if (efi_enabled(EFI_OLD_MEMMAP)) +		return old_map_region(md); + +	/* +	 * Make sure the 1:1 mappings are present as a catch-all for b0rked +	 * firmware which doesn't update all internal pointers after switching +	 * to virtual mode and would otherwise crap on us. +	 */ +	__map_region(md, md->phys_addr); + +	/* +	 * Enforce the 1:1 mapping as the default virtual address when +	 * booting in EFI mixed mode, because even though we may be +	 * running a 64-bit kernel, the firmware may only be 32-bit. +	 */ +	if (!efi_is_native () && IS_ENABLED(CONFIG_EFI_MIXED)) { +		md->virt_addr = md->phys_addr; +		return; +	} + +	efi_va -= size; + +	/* Is PA 2M-aligned? */ +	if (!(pa & (PMD_SIZE - 1))) { +		efi_va &= PMD_MASK; +	} else { +		u64 pa_offset = pa & (PMD_SIZE - 1); +		u64 prev_va = efi_va; + +		/* get us the same offset within this 2M page */ +		efi_va = (efi_va & PMD_MASK) + pa_offset; + +		if (efi_va > prev_va) +			efi_va -= PMD_SIZE; +	} + +	if (efi_va < EFI_VA_END) { +		pr_warn(FW_WARN "VA address range overflow!\n"); +		return; +	} + +	/* Do the VA map */ +	__map_region(md, efi_va); +	md->virt_addr = efi_va; +} + +/* + * kexec kernel will use efi_map_region_fixed to map efi runtime memory ranges. + * md->virt_addr is the original virtual address which had been mapped in kexec + * 1st kernel. + */ +void __init efi_map_region_fixed(efi_memory_desc_t *md) +{ +	__map_region(md, md->virt_addr); +} +  void __iomem *__init efi_ioremap(unsigned long phys_addr, unsigned long size,  				 u32 type, u64 attribute)  { @@ -113,3 +292,313 @@ void __iomem *__init efi_ioremap(unsigned long phys_addr, unsigned long size,  	return (void __iomem *)__va(phys_addr);  } + +void __init parse_efi_setup(u64 phys_addr, u32 data_len) +{ +	efi_setup = phys_addr + sizeof(struct setup_data); +} + +void __init efi_runtime_mkexec(void) +{ +	if (!efi_enabled(EFI_OLD_MEMMAP)) +		return; + +	if (__supported_pte_mask & _PAGE_NX) +		runtime_code_page_mkexec(); +} + +void __init efi_dump_pagetable(void) +{ +#ifdef CONFIG_EFI_PGT_DUMP +	pgd_t *pgd = (pgd_t *)__va(real_mode_header->trampoline_pgd); + +	ptdump_walk_pgd_level(NULL, pgd); +#endif +} + +#ifdef CONFIG_EFI_MIXED +extern efi_status_t efi64_thunk(u32, ...); + +#define runtime_service32(func)						 \ +({									 \ +	u32 table = (u32)(unsigned long)efi.systab;			 \ +	u32 *rt, *___f;							 \ +									 \ +	rt = (u32 *)(table + offsetof(efi_system_table_32_t, runtime));	 \ +	___f = (u32 *)(*rt + offsetof(efi_runtime_services_32_t, func)); \ +	*___f;								 \ +}) + +/* + * Switch to the EFI page tables early so that we can access the 1:1 + * runtime services mappings which are not mapped in any other page + * tables. This function must be called before runtime_service32(). + * + * Also, disable interrupts because the IDT points to 64-bit handlers, + * which aren't going to function correctly when we switch to 32-bit. + */ +#define efi_thunk(f, ...)						\ +({									\ +	efi_status_t __s;						\ +	unsigned long flags;						\ +	u32 func;							\ +									\ +	efi_sync_low_kernel_mappings();					\ +	local_irq_save(flags);						\ +									\ +	efi_scratch.prev_cr3 = read_cr3();				\ +	write_cr3((unsigned long)efi_scratch.efi_pgt);			\ +	__flush_tlb_all();						\ +									\ +	func = runtime_service32(f);					\ +	__s = efi64_thunk(func, __VA_ARGS__);			\ +									\ +	write_cr3(efi_scratch.prev_cr3);				\ +	__flush_tlb_all();						\ +	local_irq_restore(flags);					\ +									\ +	__s;								\ +}) + +efi_status_t efi_thunk_set_virtual_address_map( +	void *phys_set_virtual_address_map, +	unsigned long memory_map_size, +	unsigned long descriptor_size, +	u32 descriptor_version, +	efi_memory_desc_t *virtual_map) +{ +	efi_status_t status; +	unsigned long flags; +	u32 func; + +	efi_sync_low_kernel_mappings(); +	local_irq_save(flags); + +	efi_scratch.prev_cr3 = read_cr3(); +	write_cr3((unsigned long)efi_scratch.efi_pgt); +	__flush_tlb_all(); + +	func = (u32)(unsigned long)phys_set_virtual_address_map; +	status = efi64_thunk(func, memory_map_size, descriptor_size, +			     descriptor_version, virtual_map); + +	write_cr3(efi_scratch.prev_cr3); +	__flush_tlb_all(); +	local_irq_restore(flags); + +	return status; +} + +static efi_status_t efi_thunk_get_time(efi_time_t *tm, efi_time_cap_t *tc) +{ +	efi_status_t status; +	u32 phys_tm, phys_tc; + +	spin_lock(&rtc_lock); + +	phys_tm = virt_to_phys(tm); +	phys_tc = virt_to_phys(tc); + +	status = efi_thunk(get_time, phys_tm, phys_tc); + +	spin_unlock(&rtc_lock); + +	return status; +} + +static efi_status_t efi_thunk_set_time(efi_time_t *tm) +{ +	efi_status_t status; +	u32 phys_tm; + +	spin_lock(&rtc_lock); + +	phys_tm = virt_to_phys(tm); + +	status = efi_thunk(set_time, phys_tm); + +	spin_unlock(&rtc_lock); + +	return status; +} + +static efi_status_t +efi_thunk_get_wakeup_time(efi_bool_t *enabled, efi_bool_t *pending, +			  efi_time_t *tm) +{ +	efi_status_t status; +	u32 phys_enabled, phys_pending, phys_tm; + +	spin_lock(&rtc_lock); + +	phys_enabled = virt_to_phys(enabled); +	phys_pending = virt_to_phys(pending); +	phys_tm = virt_to_phys(tm); + +	status = efi_thunk(get_wakeup_time, phys_enabled, +			     phys_pending, phys_tm); + +	spin_unlock(&rtc_lock); + +	return status; +} + +static efi_status_t +efi_thunk_set_wakeup_time(efi_bool_t enabled, efi_time_t *tm) +{ +	efi_status_t status; +	u32 phys_tm; + +	spin_lock(&rtc_lock); + +	phys_tm = virt_to_phys(tm); + +	status = efi_thunk(set_wakeup_time, enabled, phys_tm); + +	spin_unlock(&rtc_lock); + +	return status; +} + + +static efi_status_t +efi_thunk_get_variable(efi_char16_t *name, efi_guid_t *vendor, +		       u32 *attr, unsigned long *data_size, void *data) +{ +	efi_status_t status; +	u32 phys_name, phys_vendor, phys_attr; +	u32 phys_data_size, phys_data; + +	phys_data_size = virt_to_phys(data_size); +	phys_vendor = virt_to_phys(vendor); +	phys_name = virt_to_phys(name); +	phys_attr = virt_to_phys(attr); +	phys_data = virt_to_phys(data); + +	status = efi_thunk(get_variable, phys_name, phys_vendor, +			   phys_attr, phys_data_size, phys_data); + +	return status; +} + +static efi_status_t +efi_thunk_set_variable(efi_char16_t *name, efi_guid_t *vendor, +		       u32 attr, unsigned long data_size, void *data) +{ +	u32 phys_name, phys_vendor, phys_data; +	efi_status_t status; + +	phys_name = virt_to_phys(name); +	phys_vendor = virt_to_phys(vendor); +	phys_data = virt_to_phys(data); + +	/* If data_size is > sizeof(u32) we've got problems */ +	status = efi_thunk(set_variable, phys_name, phys_vendor, +			   attr, data_size, phys_data); + +	return status; +} + +static efi_status_t +efi_thunk_get_next_variable(unsigned long *name_size, +			    efi_char16_t *name, +			    efi_guid_t *vendor) +{ +	efi_status_t status; +	u32 phys_name_size, phys_name, phys_vendor; + +	phys_name_size = virt_to_phys(name_size); +	phys_vendor = virt_to_phys(vendor); +	phys_name = virt_to_phys(name); + +	status = efi_thunk(get_next_variable, phys_name_size, +			   phys_name, phys_vendor); + +	return status; +} + +static efi_status_t +efi_thunk_get_next_high_mono_count(u32 *count) +{ +	efi_status_t status; +	u32 phys_count; + +	phys_count = virt_to_phys(count); +	status = efi_thunk(get_next_high_mono_count, phys_count); + +	return status; +} + +static void +efi_thunk_reset_system(int reset_type, efi_status_t status, +		       unsigned long data_size, efi_char16_t *data) +{ +	u32 phys_data; + +	phys_data = virt_to_phys(data); + +	efi_thunk(reset_system, reset_type, status, data_size, phys_data); +} + +static efi_status_t +efi_thunk_update_capsule(efi_capsule_header_t **capsules, +			 unsigned long count, unsigned long sg_list) +{ +	/* +	 * To properly support this function we would need to repackage +	 * 'capsules' because the firmware doesn't understand 64-bit +	 * pointers. +	 */ +	return EFI_UNSUPPORTED; +} + +static efi_status_t +efi_thunk_query_variable_info(u32 attr, u64 *storage_space, +			      u64 *remaining_space, +			      u64 *max_variable_size) +{ +	efi_status_t status; +	u32 phys_storage, phys_remaining, phys_max; + +	if (efi.runtime_version < EFI_2_00_SYSTEM_TABLE_REVISION) +		return EFI_UNSUPPORTED; + +	phys_storage = virt_to_phys(storage_space); +	phys_remaining = virt_to_phys(remaining_space); +	phys_max = virt_to_phys(max_variable_size); + +	status = efi_thunk(query_variable_info, attr, phys_storage, +			   phys_remaining, phys_max); + +	return status; +} + +static efi_status_t +efi_thunk_query_capsule_caps(efi_capsule_header_t **capsules, +			     unsigned long count, u64 *max_size, +			     int *reset_type) +{ +	/* +	 * To properly support this function we would need to repackage +	 * 'capsules' because the firmware doesn't understand 64-bit +	 * pointers. +	 */ +	return EFI_UNSUPPORTED; +} + +void efi_thunk_runtime_setup(void) +{ +	efi.get_time = efi_thunk_get_time; +	efi.set_time = efi_thunk_set_time; +	efi.get_wakeup_time = efi_thunk_get_wakeup_time; +	efi.set_wakeup_time = efi_thunk_set_wakeup_time; +	efi.get_variable = efi_thunk_get_variable; +	efi.get_next_variable = efi_thunk_get_next_variable; +	efi.set_variable = efi_thunk_set_variable; +	efi.get_next_high_mono_count = efi_thunk_get_next_high_mono_count; +	efi.reset_system = efi_thunk_reset_system; +	efi.query_variable_info = efi_thunk_query_variable_info; +	efi.update_capsule = efi_thunk_update_capsule; +	efi.query_capsule_caps = efi_thunk_query_capsule_caps; +} +#endif /* CONFIG_EFI_MIXED */ diff --git a/arch/x86/platform/efi/efi_stub_64.S b/arch/x86/platform/efi/efi_stub_64.S index 4c07ccab814..5fcda727255 100644 --- a/arch/x86/platform/efi/efi_stub_64.S +++ b/arch/x86/platform/efi/efi_stub_64.S @@ -7,6 +7,10 @@   */  #include <linux/linkage.h> +#include <asm/segment.h> +#include <asm/msr.h> +#include <asm/processor-flags.h> +#include <asm/page_types.h>  #define SAVE_XMM			\  	mov %rsp, %rax;			\ @@ -34,72 +38,42 @@  	mov %rsi, %cr0;			\  	mov (%rsp), %rsp -ENTRY(efi_call0) -	SAVE_XMM -	subq $32, %rsp -	call *%rdi -	addq $32, %rsp -	RESTORE_XMM -	ret -ENDPROC(efi_call0) +	/* stolen from gcc */ +	.macro FLUSH_TLB_ALL +	movq %r15, efi_scratch(%rip) +	movq %r14, efi_scratch+8(%rip) +	movq %cr4, %r15 +	movq %r15, %r14 +	andb $0x7f, %r14b +	movq %r14, %cr4 +	movq %r15, %cr4 +	movq efi_scratch+8(%rip), %r14 +	movq efi_scratch(%rip), %r15 +	.endm -ENTRY(efi_call1) -	SAVE_XMM -	subq $32, %rsp -	mov  %rsi, %rcx -	call *%rdi -	addq $32, %rsp -	RESTORE_XMM -	ret -ENDPROC(efi_call1) +	.macro SWITCH_PGT +	cmpb $0, efi_scratch+24(%rip) +	je 1f +	movq %r15, efi_scratch(%rip)		# r15 +	# save previous CR3 +	movq %cr3, %r15 +	movq %r15, efi_scratch+8(%rip)		# prev_cr3 +	movq efi_scratch+16(%rip), %r15		# EFI pgt +	movq %r15, %cr3 +	1: +	.endm -ENTRY(efi_call2) -	SAVE_XMM -	subq $32, %rsp -	mov  %rsi, %rcx -	call *%rdi -	addq $32, %rsp -	RESTORE_XMM -	ret -ENDPROC(efi_call2) +	.macro RESTORE_PGT +	cmpb $0, efi_scratch+24(%rip) +	je 2f +	movq efi_scratch+8(%rip), %r15 +	movq %r15, %cr3 +	movq efi_scratch(%rip), %r15 +	FLUSH_TLB_ALL +	2: +	.endm -ENTRY(efi_call3) -	SAVE_XMM -	subq $32, %rsp -	mov  %rcx, %r8 -	mov  %rsi, %rcx -	call *%rdi -	addq $32, %rsp -	RESTORE_XMM -	ret -ENDPROC(efi_call3) - -ENTRY(efi_call4) -	SAVE_XMM -	subq $32, %rsp -	mov %r8, %r9 -	mov %rcx, %r8 -	mov %rsi, %rcx -	call *%rdi -	addq $32, %rsp -	RESTORE_XMM -	ret -ENDPROC(efi_call4) - -ENTRY(efi_call5) -	SAVE_XMM -	subq $48, %rsp -	mov %r9, 32(%rsp) -	mov %r8, %r9 -	mov %rcx, %r8 -	mov %rsi, %rcx -	call *%rdi -	addq $48, %rsp -	RESTORE_XMM -	ret -ENDPROC(efi_call5) - -ENTRY(efi_call6) +ENTRY(efi_call)  	SAVE_XMM  	mov (%rsp), %rax  	mov 8(%rax), %rax @@ -109,8 +83,177 @@ ENTRY(efi_call6)  	mov %r8, %r9  	mov %rcx, %r8  	mov %rsi, %rcx +	SWITCH_PGT  	call *%rdi +	RESTORE_PGT  	addq $48, %rsp  	RESTORE_XMM  	ret -ENDPROC(efi_call6) +ENDPROC(efi_call) + +#ifdef CONFIG_EFI_MIXED + +/* + * We run this function from the 1:1 mapping. + * + * This function must be invoked with a 1:1 mapped stack. + */ +ENTRY(__efi64_thunk) +	movl	%ds, %eax +	push	%rax +	movl	%es, %eax +	push	%rax +	movl	%ss, %eax +	push	%rax + +	subq	$32, %rsp +	movl	%esi, 0x0(%rsp) +	movl	%edx, 0x4(%rsp) +	movl	%ecx, 0x8(%rsp) +	movq	%r8, %rsi +	movl	%esi, 0xc(%rsp) +	movq	%r9, %rsi +	movl	%esi,  0x10(%rsp) + +	sgdt	save_gdt(%rip) + +	leaq	1f(%rip), %rbx +	movq	%rbx, func_rt_ptr(%rip) + +	/* Switch to gdt with 32-bit segments */ +	movl	64(%rsp), %eax +	lgdt	(%rax) + +	leaq	efi_enter32(%rip), %rax +	pushq	$__KERNEL_CS +	pushq	%rax +	lretq + +1:	addq	$32, %rsp + +	lgdt	save_gdt(%rip) + +	pop	%rbx +	movl	%ebx, %ss +	pop	%rbx +	movl	%ebx, %es +	pop	%rbx +	movl	%ebx, %ds + +	/* +	 * Convert 32-bit status code into 64-bit. +	 */ +	test	%rax, %rax +	jz	1f +	movl	%eax, %ecx +	andl	$0x0fffffff, %ecx +	andl	$0xf0000000, %eax +	shl	$32, %rax +	or	%rcx, %rax +1: +	ret +ENDPROC(__efi64_thunk) + +ENTRY(efi_exit32) +	movq	func_rt_ptr(%rip), %rax +	push	%rax +	mov	%rdi, %rax +	ret +ENDPROC(efi_exit32) + +	.code32 +/* + * EFI service pointer must be in %edi. + * + * The stack should represent the 32-bit calling convention. + */ +ENTRY(efi_enter32) +	movl	$__KERNEL_DS, %eax +	movl	%eax, %ds +	movl	%eax, %es +	movl	%eax, %ss + +	/* Reload pgtables */ +	movl	%cr3, %eax +	movl	%eax, %cr3 + +	/* Disable paging */ +	movl	%cr0, %eax +	btrl	$X86_CR0_PG_BIT, %eax +	movl	%eax, %cr0 + +	/* Disable long mode via EFER */ +	movl	$MSR_EFER, %ecx +	rdmsr +	btrl	$_EFER_LME, %eax +	wrmsr + +	call	*%edi + +	/* We must preserve return value */ +	movl	%eax, %edi + +	/* +	 * Some firmware will return with interrupts enabled. Be sure to +	 * disable them before we switch GDTs. +	 */ +	cli + +	movl	68(%esp), %eax +	movl	%eax, 2(%eax) +	lgdtl	(%eax) + +	movl	%cr4, %eax +	btsl	$(X86_CR4_PAE_BIT), %eax +	movl	%eax, %cr4 + +	movl	%cr3, %eax +	movl	%eax, %cr3 + +	movl	$MSR_EFER, %ecx +	rdmsr +	btsl	$_EFER_LME, %eax +	wrmsr + +	xorl	%eax, %eax +	lldt	%ax + +	movl	72(%esp), %eax +	pushl	$__KERNEL_CS +	pushl	%eax + +	/* Enable paging */ +	movl	%cr0, %eax +	btsl	$X86_CR0_PG_BIT, %eax +	movl	%eax, %cr0 +	lret +ENDPROC(efi_enter32) + +	.data +	.balign	8 +	.global	efi32_boot_gdt +efi32_boot_gdt:	.word	0 +		.quad	0 + +save_gdt:	.word	0 +		.quad	0 +func_rt_ptr:	.quad	0 + +	.global efi_gdt64 +efi_gdt64: +	.word	efi_gdt64_end - efi_gdt64 +	.long	0			/* Filled out by user */ +	.word	0 +	.quad	0x0000000000000000	/* NULL descriptor */ +	.quad	0x00af9a000000ffff	/* __KERNEL_CS */ +	.quad	0x00cf92000000ffff	/* __KERNEL_DS */ +	.quad	0x0080890000000000	/* TS descriptor */ +	.quad   0x0000000000000000	/* TS continued */ +efi_gdt64_end: +#endif /* CONFIG_EFI_MIXED */ + +	.data +ENTRY(efi_scratch) +	.fill 3,8,0 +	.byte 0 +	.quad 0 diff --git a/arch/x86/platform/efi/efi_thunk_64.S b/arch/x86/platform/efi/efi_thunk_64.S new file mode 100644 index 00000000000..8806fa73e6e --- /dev/null +++ b/arch/x86/platform/efi/efi_thunk_64.S @@ -0,0 +1,65 @@ +/* + * Copyright (C) 2014 Intel Corporation; author Matt Fleming + */ + +#include <linux/linkage.h> +#include <asm/page_types.h> + +	.text +	.code64 +ENTRY(efi64_thunk) +	push	%rbp +	push	%rbx + +	/* +	 * Switch to 1:1 mapped 32-bit stack pointer. +	 */ +	movq	%rsp, efi_saved_sp(%rip) +	movq	efi_scratch+25(%rip), %rsp + +	/* +	 * Calculate the physical address of the kernel text. +	 */ +	movq	$__START_KERNEL_map, %rax +	subq	phys_base(%rip), %rax + +	/* +	 * Push some physical addresses onto the stack. This is easier +	 * to do now in a code64 section while the assembler can address +	 * 64-bit values. Note that all the addresses on the stack are +	 * 32-bit. +	 */ +	subq	$16, %rsp +	leaq	efi_exit32(%rip), %rbx +	subq	%rax, %rbx +	movl	%ebx, 8(%rsp) +	leaq	efi_gdt64(%rip), %rbx +	subq	%rax, %rbx +	movl	%ebx, 2(%ebx) +	movl	%ebx, 4(%rsp) +	leaq	efi_gdt32(%rip), %rbx +	subq	%rax, %rbx +	movl	%ebx, 2(%ebx) +	movl	%ebx, (%rsp) + +	leaq	__efi64_thunk(%rip), %rbx +	subq	%rax, %rbx +	call	*%rbx + +	movq	efi_saved_sp(%rip), %rsp +	pop	%rbx +	pop	%rbp +	retq +ENDPROC(efi64_thunk) + +	.data +efi_gdt32: +	.word 	efi_gdt32_end - efi_gdt32 +	.long	0			/* Filled out above */ +	.word	0 +	.quad	0x0000000000000000	/* NULL descriptor */ +	.quad	0x00cf9a000000ffff	/* __KERNEL_CS */ +	.quad	0x00cf93000000ffff	/* __KERNEL_DS */ +efi_gdt32_end: + +efi_saved_sp:		.quad 0  | 
