aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2013-02-21 18:06:55 -0800
committerLinus Torvalds <torvalds@linux-foundation.org>2013-02-21 18:06:55 -0800
commit2ef14f465b9e096531343f5b734cffc5f759f4a6 (patch)
tree07b504d7105842a4b1a74cf1e153023a02fb9c1e
parentcb715a836642e0ec69350670d1c2f800f3e2d2e4 (diff)
parent0da3e7f526fde7a6522a3038b7ce609fc50f6707 (diff)
Merge branch 'x86-mm-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull x86 mm changes from Peter Anvin: "This is a huge set of several partly interrelated (and concurrently developed) changes, which is why the branch history is messier than one would like. The *really* big items are two humonguous patchsets mostly developed by Yinghai Lu at my request, which completely revamps the way we create initial page tables. In particular, rather than estimating how much memory we will need for page tables and then build them into that memory -- a calculation that has shown to be incredibly fragile -- we now build them (on 64 bits) with the aid of a "pseudo-linear mode" -- a #PF handler which creates temporary page tables on demand. This has several advantages: 1. It makes it much easier to support things that need access to data very early (a followon patchset uses this to load microcode way early in the kernel startup). 2. It allows the kernel and all the kernel data objects to be invoked from above the 4 GB limit. This allows kdump to work on very large systems. 3. It greatly reduces the difference between Xen and native (Xen's equivalent of the #PF handler are the temporary page tables created by the domain builder), eliminating a bunch of fragile hooks. The patch series also gets us a bit closer to W^X. Additional work in this pull is the 64-bit get_user() work which you were also involved with, and a bunch of cleanups/speedups to __phys_addr()/__pa()." * 'x86-mm-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (105 commits) x86, mm: Move reserving low memory later in initialization x86, doc: Clarify the use of asm("%edx") in uaccess.h x86, mm: Redesign get_user with a __builtin_choose_expr hack x86: Be consistent with data size in getuser.S x86, mm: Use a bitfield to mask nuisance get_user() warnings x86/kvm: Fix compile warning in kvm_register_steal_time() x86-32: Add support for 64bit get_user() x86-32, mm: Remove reference to alloc_remap() x86-32, mm: Remove reference to resume_map_numa_kva() x86-32, mm: Rip out x86_32 NUMA remapping code x86/numa: Use __pa_nodebug() instead x86: Don't panic if can not alloc buffer for swiotlb mm: Add alloc_bootmem_low_pages_nopanic() x86, 64bit, mm: hibernate use generic mapping_init x86, 64bit, mm: Mark data/bss/brk to nx x86: Merge early kernel reserve for 32bit and 64bit x86: Add Crash kernel low reservation x86, kdump: Remove crashkernel range find limit for 64bit memblock: Add memblock_mem_size() x86, boot: Not need to check setup_header version for setup_data ...
-rw-r--r--Documentation/kernel-parameters.txt3
-rw-r--r--Documentation/x86/boot.txt38
-rw-r--r--arch/mips/cavium-octeon/dma-octeon.c3
-rw-r--r--arch/sparc/mm/init_64.c24
-rw-r--r--arch/x86/Kconfig4
-rw-r--r--arch/x86/boot/boot.h18
-rw-r--r--arch/x86/boot/cmdline.c12
-rw-r--r--arch/x86/boot/compressed/cmdline.c12
-rw-r--r--arch/x86/boot/compressed/head_64.S48
-rw-r--r--arch/x86/boot/header.S10
-rw-r--r--arch/x86/include/asm/init.h28
-rw-r--r--arch/x86/include/asm/kexec.h6
-rw-r--r--arch/x86/include/asm/mmzone_32.h6
-rw-r--r--arch/x86/include/asm/numa.h2
-rw-r--r--arch/x86/include/asm/numa_64.h6
-rw-r--r--arch/x86/include/asm/page.h7
-rw-r--r--arch/x86/include/asm/page_32.h1
-rw-r--r--arch/x86/include/asm/page_64.h36
-rw-r--r--arch/x86/include/asm/page_64_types.h22
-rw-r--r--arch/x86/include/asm/page_types.h2
-rw-r--r--arch/x86/include/asm/pgtable.h17
-rw-r--r--arch/x86/include/asm/pgtable_64.h5
-rw-r--r--arch/x86/include/asm/pgtable_64_types.h4
-rw-r--r--arch/x86/include/asm/pgtable_types.h4
-rw-r--r--arch/x86/include/asm/processor.h1
-rw-r--r--arch/x86/include/asm/realmode.h3
-rw-r--r--arch/x86/include/asm/uaccess.h55
-rw-r--r--arch/x86/include/asm/x86_init.h12
-rw-r--r--arch/x86/kernel/acpi/boot.c1
-rw-r--r--arch/x86/kernel/acpi/sleep.c2
-rw-r--r--arch/x86/kernel/amd_gart_64.c5
-rw-r--r--arch/x86/kernel/apic/apic_numachip.c1
-rw-r--r--arch/x86/kernel/cpu/amd.c9
-rw-r--r--arch/x86/kernel/cpu/intel.c3
-rw-r--r--arch/x86/kernel/e820.c16
-rw-r--r--arch/x86/kernel/ftrace.c4
-rw-r--r--arch/x86/kernel/head32.c20
-rw-r--r--arch/x86/kernel/head64.c131
-rw-r--r--arch/x86/kernel/head_64.S210
-rw-r--r--arch/x86/kernel/i386_ksyms_32.c1
-rw-r--r--arch/x86/kernel/kvm.c11
-rw-r--r--arch/x86/kernel/kvmclock.c4
-rw-r--r--arch/x86/kernel/machine_kexec_64.c171
-rw-r--r--arch/x86/kernel/setup.c260
-rw-r--r--arch/x86/kernel/traps.c9
-rw-r--r--arch/x86/kernel/x8664_ksyms_64.c3
-rw-r--r--arch/x86/kernel/x86_init.c4
-rw-r--r--arch/x86/lguest/boot.c3
-rw-r--r--arch/x86/lib/getuser.S43
-rw-r--r--arch/x86/mm/init.c459
-rw-r--r--arch/x86/mm/init_32.c106
-rw-r--r--arch/x86/mm/init_64.c255
-rw-r--r--arch/x86/mm/mm_internal.h19
-rw-r--r--arch/x86/mm/numa.c32
-rw-r--r--arch/x86/mm/numa_32.c161
-rw-r--r--arch/x86/mm/numa_64.c13
-rw-r--r--arch/x86/mm/numa_internal.h6
-rw-r--r--arch/x86/mm/pageattr.c66
-rw-r--r--arch/x86/mm/pat.c4
-rw-r--r--arch/x86/mm/pgtable.c7
-rw-r--r--arch/x86/mm/physaddr.c60
-rw-r--r--arch/x86/platform/efi/efi.c11
-rw-r--r--arch/x86/power/hibernate_32.c2
-rw-r--r--arch/x86/power/hibernate_64.c66
-rw-r--r--arch/x86/realmode/init.c49
-rw-r--r--arch/x86/xen/mmu.c28
-rw-r--r--drivers/xen/swiotlb-xen.c4
-rw-r--r--include/linux/bootmem.h5
-rw-r--r--include/linux/kexec.h3
-rw-r--r--include/linux/memblock.h1
-rw-r--r--include/linux/mm.h1
-rw-r--r--include/linux/swiotlb.h2
-rw-r--r--kernel/kexec.c34
-rw-r--r--lib/swiotlb.c47
-rw-r--r--mm/bootmem.c8
-rw-r--r--mm/memblock.c17
-rw-r--r--mm/nobootmem.c23
77 files changed, 1542 insertions, 1247 deletions
diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt
index 4c5b3f993bb..9aa8ff3e54d 100644
--- a/Documentation/kernel-parameters.txt
+++ b/Documentation/kernel-parameters.txt
@@ -594,6 +594,9 @@ bytes respectively. Such letter suffixes can also be entirely omitted.
is selected automatically. Check
Documentation/kdump/kdump.txt for further details.
+ crashkernel_low=size[KMG]
+ [KNL, x86] parts under 4G.
+
crashkernel=range1:size1[,range2:size2,...][@offset]
[KNL] Same as above, but depends on the memory
in the running system. The syntax of range is
diff --git a/Documentation/x86/boot.txt b/Documentation/x86/boot.txt
index b443f1de0e5..3840b6f28af 100644
--- a/Documentation/x86/boot.txt
+++ b/Documentation/x86/boot.txt
@@ -1055,6 +1055,44 @@ must have read/write permission; CS must be __BOOT_CS and DS, ES, SS
must be __BOOT_DS; interrupt must be disabled; %esi must hold the base
address of the struct boot_params; %ebp, %edi and %ebx must be zero.
+**** 64-bit BOOT PROTOCOL
+
+For machine with 64bit cpus and 64bit kernel, we could use 64bit bootloader
+and we need a 64-bit boot protocol.
+
+In 64-bit boot protocol, the first step in loading a Linux kernel
+should be to setup the boot parameters (struct boot_params,
+traditionally known as "zero page"). The memory for struct boot_params
+could be allocated anywhere (even above 4G) and initialized to all zero.
+Then, the setup header at offset 0x01f1 of kernel image on should be
+loaded into struct boot_params and examined. The end of setup header
+can be calculated as follows:
+
+ 0x0202 + byte value at offset 0x0201
+
+In addition to read/modify/write the setup header of the struct
+boot_params as that of 16-bit boot protocol, the boot loader should
+also fill the additional fields of the struct boot_params as described
+in zero-page.txt.
+
+After setting up the struct boot_params, the boot loader can load
+64-bit kernel in the same way as that of 16-bit boot protocol, but
+kernel could be loaded above 4G.
+
+In 64-bit boot protocol, the kernel is started by jumping to the
+64-bit kernel entry point, which is the start address of loaded
+64-bit kernel plus 0x200.
+
+At entry, the CPU must be in 64-bit mode with paging enabled.
+The range with setup_header.init_size from start address of loaded
+kernel and zero page and command line buffer get ident mapping;
+a GDT must be loaded with the descriptors for selectors
+__BOOT_CS(0x10) and __BOOT_DS(0x18); both descriptors must be 4G flat
+segment; __BOOT_CS must have execute/read permission, and __BOOT_DS
+must have read/write permission; CS must be __BOOT_CS and DS, ES, SS
+must be __BOOT_DS; interrupt must be disabled; %rsi must hold the base
+address of the struct boot_params.
+
**** EFI HANDOVER PROTOCOL
This protocol allows boot loaders to defer initialisation to the EFI
diff --git a/arch/mips/cavium-octeon/dma-octeon.c b/arch/mips/cavium-octeon/dma-octeon.c
index 41dd0088497..02f24447520 100644
--- a/arch/mips/cavium-octeon/dma-octeon.c
+++ b/arch/mips/cavium-octeon/dma-octeon.c
@@ -317,7 +317,8 @@ void __init plat_swiotlb_setup(void)
octeon_swiotlb = alloc_bootmem_low_pages(swiotlbsize);
- swiotlb_init_with_tbl(octeon_swiotlb, swiotlb_nslabs, 1);
+ if (swiotlb_init_with_tbl(octeon_swiotlb, swiotlb_nslabs, 1) == -ENOMEM)
+ panic("Cannot allocate SWIOTLB buffer");
mips_dma_map_ops = &octeon_linear_dma_map_ops.dma_map_ops;
}
diff --git a/arch/sparc/mm/init_64.c b/arch/sparc/mm/init_64.c
index 82bbf048a5b..5c2c6e61fac 100644
--- a/arch/sparc/mm/init_64.c
+++ b/arch/sparc/mm/init_64.c
@@ -2027,6 +2027,16 @@ static void __init patch_tlb_miss_handler_bitmap(void)
flushi(&valid_addr_bitmap_insn[0]);
}
+static void __init register_page_bootmem_info(void)
+{
+#ifdef CONFIG_NEED_MULTIPLE_NODES
+ int i;
+
+ for_each_online_node(i)
+ if (NODE_DATA(i)->node_spanned_pages)
+ register_page_bootmem_info_node(NODE_DATA(i));
+#endif
+}
void __init mem_init(void)
{
unsigned long codepages, datapages, initpages;
@@ -2044,20 +2054,8 @@ void __init mem_init(void)
high_memory = __va(last_valid_pfn << PAGE_SHIFT);
-#ifdef CONFIG_NEED_MULTIPLE_NODES
- {
- int i;
- for_each_online_node(i) {
- if (NODE_DATA(i)->node_spanned_pages != 0) {
- totalram_pages +=
- free_all_bootmem_node(NODE_DATA(i));
- }
- }
- totalram_pages += free_low_memory_core_early(MAX_NUMNODES);
- }
-#else
+ register_page_bootmem_info();
totalram_pages = free_all_bootmem();
-#endif
/* We subtract one to account for the mem_map_zero page
* allocated below.
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index b44c0b50e56..ff0e5f3c844 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -1277,10 +1277,6 @@ config NODES_SHIFT
Specify the maximum number of NUMA Nodes available on the target
system. Increases memory reserved to accommodate various tables.
-config HAVE_ARCH_ALLOC_REMAP
- def_bool y
- depends on X86_32 && NUMA
-
config ARCH_HAVE_MEMORY_PRESENT
def_bool y
depends on X86_32 && DISCONTIGMEM
diff --git a/arch/x86/boot/boot.h b/arch/x86/boot/boot.h
index 18997e5a105..5b7531966b8 100644
--- a/arch/x86/boot/boot.h
+++ b/arch/x86/boot/boot.h
@@ -285,16 +285,26 @@ struct biosregs {
void intcall(u8 int_no, const struct biosregs *ireg, struct biosregs *oreg);
/* cmdline.c */
-int __cmdline_find_option(u32 cmdline_ptr, const char *option, char *buffer, int bufsize);
-int __cmdline_find_option_bool(u32 cmdline_ptr, const char *option);
+int __cmdline_find_option(unsigned long cmdline_ptr, const char *option, char *buffer, int bufsize);
+int __cmdline_find_option_bool(unsigned long cmdline_ptr, const char *option);
static inline int cmdline_find_option(const char *option, char *buffer, int bufsize)
{
- return __cmdline_find_option(boot_params.hdr.cmd_line_ptr, option, buffer, bufsize);
+ unsigned long cmd_line_ptr = boot_params.hdr.cmd_line_ptr;
+
+ if (cmd_line_ptr >= 0x100000)
+ return -1; /* inaccessible */
+
+ return __cmdline_find_option(cmd_line_ptr, option, buffer, bufsize);
}
static inline int cmdline_find_option_bool(const char *option)
{
- return __cmdline_find_option_bool(boot_params.hdr.cmd_line_ptr, option);
+ unsigned long cmd_line_ptr = boot_params.hdr.cmd_line_ptr;
+
+ if (cmd_line_ptr >= 0x100000)
+ return -1; /* inaccessible */
+
+ return __cmdline_find_option_bool(cmd_line_ptr, option);
}
diff --git a/arch/x86/boot/cmdline.c b/arch/x86/boot/cmdline.c
index 6b3b6f708c0..625d21b0cd3 100644
--- a/arch/x86/boot/cmdline.c
+++ b/arch/x86/boot/cmdline.c
@@ -27,7 +27,7 @@ static inline int myisspace(u8 c)
* Returns the length of the argument (regardless of if it was
* truncated to fit in the buffer), or -1 on not found.
*/
-int __cmdline_find_option(u32 cmdline_ptr, const char *option, char *buffer, int bufsize)
+int __cmdline_find_option(unsigned long cmdline_ptr, const char *option, char *buffer, int bufsize)
{
addr_t cptr;
char c;
@@ -41,8 +41,8 @@ int __cmdline_find_option(u32 cmdline_ptr, const char *option, char *buffer, int
st_bufcpy /* Copying this to buffer */
} state = st_wordstart;
- if (!cmdline_ptr || cmdline_ptr >= 0x100000)
- return -1; /* No command line, or inaccessible */
+ if (!cmdline_ptr)
+ return -1; /* No command line */
cptr = cmdline_ptr & 0xf;
set_fs(cmdline_ptr >> 4);
@@ -99,7 +99,7 @@ int __cmdline_find_option(u32 cmdline_ptr, const char *option, char *buffer, int
* Returns the position of that option (starts counting with 1)
* or 0 on not found
*/
-int __cmdline_find_option_bool(u32 cmdline_ptr, const char *option)
+int __cmdline_find_option_bool(unsigned long cmdline_ptr, const char *option)
{
addr_t cptr;
char c;
@@ -111,8 +111,8 @@ int __cmdline_find_option_bool(u32 cmdline_ptr, const char *option)
st_wordskip, /* Miscompare, skip */
} state = st_wordstart;
- if (!cmdline_ptr || cmdline_ptr >= 0x100000)
- return -1; /* No command line, or inaccessible */
+ if (!cmdline_ptr)
+ return -1; /* No command line */
cptr = cmdline_ptr & 0xf;
set_fs(cmdline_ptr >> 4);
diff --git a/arch/x86/boot/compressed/cmdline.c b/arch/x86/boot/compressed/cmdline.c
index 10f6b1178c6..bffd73b45b1 100644
--- a/arch/x86/boot/compressed/cmdline.c
+++ b/arch/x86/boot/compressed/cmdline.c
@@ -13,13 +13,21 @@ static inline char rdfs8(addr_t addr)
return *((char *)(fs + addr));
}
#include "../cmdline.c"
+static unsigned long get_cmd_line_ptr(void)
+{
+ unsigned long cmd_line_ptr = real_mode->hdr.cmd_line_ptr;
+
+ cmd_line_ptr |= (u64)real_mode->ext_cmd_line_ptr << 32;
+
+ return cmd_line_ptr;
+}
int cmdline_find_option(const char *option, char *buffer, int bufsize)
{
- return __cmdline_find_option(real_mode->hdr.cmd_line_ptr, option, buffer, bufsize);
+ return __cmdline_find_option(get_cmd_line_ptr(), option, buffer, bufsize);
}
int cmdline_find_option_bool(const char *option)
{
- return __cmdline_find_option_bool(real_mode->hdr.cmd_line_ptr, option);
+ return __cmdline_find_option_bool(get_cmd_line_ptr(), option);
}
#endif
diff --git a/arch/x86/boot/compressed/head_64.S b/arch/x86/boot/compressed/head_64.S
index f5d1aaa0dec..c1d383d1fb7 100644
--- a/arch/x86/boot/compressed/head_64.S
+++ b/arch/x86/boot/compressed/head_64.S
@@ -37,6 +37,12 @@
__HEAD
.code32
ENTRY(startup_32)
+ /*
+ * 32bit entry is 0 and it is ABI so immutable!
+ * If we come here directly from a bootloader,
+ * kernel(text+data+bss+brk) ramdisk, zero_page, command line
+ * all need to be under the 4G limit.
+ */
cld
/*
* Test KEEP_SEGMENTS flag to see if the bootloader is asking
@@ -154,6 +160,12 @@ ENTRY(startup_32)
btsl $_EFER_LME, %eax
wrmsr
+ /* After gdt is loaded */
+ xorl %eax, %eax
+ lldt %ax
+ movl $0x20, %eax
+ ltr %ax
+
/*
* Setup for the jump to 64bit mode
*
@@ -176,28 +188,18 @@ ENTRY(startup_32)
lret
ENDPROC(startup_32)
-no_longmode:
- /* This isn't an x86-64 CPU so hang */
-1:
- hlt
- jmp 1b
-
-#include "../../kernel/verify_cpu.S"
-
- /*
- * Be careful here startup_64 needs to be at a predictable
- * address so I can export it in an ELF header. Bootloaders
- * should look at the ELF header to find this address, as
- * it may change in the future.
- */
.code64
.org 0x200
ENTRY(startup_64)
/*
+ * 64bit entry is 0x200 and it is ABI so immutable!
* We come here either from startup_32 or directly from a
- * 64bit bootloader. If we come here from a bootloader we depend on
- * an identity mapped page table being provied that maps our
- * entire text+data+bss and hopefully all of memory.
+ * 64bit bootloader.
+ * If we come here from a bootloader, kernel(text+data+bss+brk),
+ * ramdisk, zero_page, command line could be above 4G.
+ * We depend on an identity mapped page table being provided
+ * that maps our entire kernel(text+data+bss+brk), zero page
+ * and command line.
*/
#ifdef CONFIG_EFI_STUB
/*
@@ -247,9 +249,6 @@ preferred_addr:
movl %eax, %ss
movl %eax, %fs
movl %eax, %gs
- lldt %ax
- movl $0x20, %eax
- ltr %ax
/*
* Compute the decompressed kernel start address. It is where
@@ -349,6 +348,15 @@ relocated:
*/
jmp *%rbp
+ .code32
+no_longmode:
+ /* This isn't an x86-64 CPU so hang */
+1:
+ hlt
+ jmp 1b
+
+#include "../../kernel/verify_cpu.S"
+
.data
gdt:
.word gdt_end - gdt
diff --git a/arch/x86/boot/header.S b/arch/x86/boot/header.S
index 944ce595f76..9ec06a1f6d6 100644
--- a/arch/x86/boot/header.S
+++ b/arch/x86/boot/header.S
@@ -374,6 +374,14 @@ xloadflags:
#else
# define XLF0 0
#endif
+
+#if defined(CONFIG_RELOCATABLE) && defined(CONFIG_X86_64)
+ /* kernel/boot_param/ramdisk could be loaded above 4g */
+# define XLF1 XLF_CAN_BE_LOADED_ABOVE_4G
+#else
+# define XLF1 0
+#endif
+
#ifdef CONFIG_EFI_STUB
# ifdef CONFIG_X86_64
# define XLF23 XLF_EFI_HANDOVER_64 /* 64-bit EFI handover ok */
@@ -383,7 +391,7 @@ xloadflags:
#else
# define XLF23 0
#endif
- .word XLF0 | XLF23
+ .word XLF0 | XLF1 | XLF23
cmdline_size: .long COMMAND_LINE_SIZE-1 #length of the command line,
#added with boot protocol
diff --git a/arch/x86/include/asm/init.h b/arch/x86/include/asm/init.h
index adcc0ae73d0..223042086f4 100644
--- a/arch/x86/include/asm/init.h
+++ b/arch/x86/include/asm/init.h
@@ -1,20 +1,14 @@
-#ifndef _ASM_X86_INIT_32_H
-#define _ASM_X86_INIT_32_H
+#ifndef _ASM_X86_INIT_H
+#define _ASM_X86_INIT_H
-#ifdef CONFIG_X86_32
-extern void __init early_ioremap_page_table_range_init(void);
-#endif
+struct x86_mapping_info {
+ void *(*alloc_pgt_page)(void *); /* allocate buf for page table */
+ void *context; /* context for alloc_pgt_page */
+ unsigned long pmd_flag; /* page flag for PMD entry */
+ bool kernel_mapping; /* kernel mapping or ident mapping */
+};
-extern void __init zone_sizes_init(void);
+int kernel_ident_mapping_init(struct x86_mapping_info *info, pgd_t *pgd_page,
+ unsigned long addr, unsigned long end);
-extern unsigned long __init
-kernel_physical_mapping_init(unsigned long start,
- unsigned long end,
- unsigned long page_size_mask);
-
-
-extern unsigned long __initdata pgt_buf_start;
-extern unsigned long __meminitdata pgt_buf_end;
-extern unsigned long __meminitdata pgt_buf_top;
-
-#endif /* _ASM_X86_INIT_32_H */
+#endif /* _ASM_X86_INIT_H */
diff --git a/arch/x86/include/asm/kexec.h b/arch/x86/include/asm/kexec.h
index 6080d2694ba..17483a492f1 100644
--- a/arch/x86/include/asm/kexec.h
+++ b/arch/x86/include/asm/kexec.h