diff options
Diffstat (limited to 'arch/x86/include/asm')
123 files changed, 2161 insertions, 1656 deletions
diff --git a/arch/x86/include/asm/Kbuild b/arch/x86/include/asm/Kbuild index 7f669853317..3ca9762e164 100644 --- a/arch/x86/include/asm/Kbuild +++ b/arch/x86/include/asm/Kbuild @@ -5,3 +5,6 @@ genhdr-y += unistd_64.h  genhdr-y += unistd_x32.h  generic-y += clkdev.h +generic-y += early_ioremap.h +generic-y += cputime.h +generic-y += mcs_spinlock.h diff --git a/arch/x86/include/asm/acenv.h b/arch/x86/include/asm/acenv.h new file mode 100644 index 00000000000..66873297e9f --- /dev/null +++ b/arch/x86/include/asm/acenv.h @@ -0,0 +1,49 @@ +/* + * X86 specific ACPICA environments and implementation + * + * Copyright (C) 2014, Intel Corporation + *   Author: Lv Zheng <lv.zheng@intel.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#ifndef _ASM_X86_ACENV_H +#define _ASM_X86_ACENV_H + +#include <asm/special_insns.h> + +/* Asm macros */ + +#define ACPI_FLUSH_CPU_CACHE()	wbinvd() + +#ifdef CONFIG_ACPI + +int __acpi_acquire_global_lock(unsigned int *lock); +int __acpi_release_global_lock(unsigned int *lock); + +#define ACPI_ACQUIRE_GLOBAL_LOCK(facs, Acq) \ +	((Acq) = __acpi_acquire_global_lock(&facs->global_lock)) + +#define ACPI_RELEASE_GLOBAL_LOCK(facs, Acq) \ +	((Acq) = __acpi_release_global_lock(&facs->global_lock)) + +/* + * Math helper asm macros + */ +#define ACPI_DIV_64_BY_32(n_hi, n_lo, d32, q32, r32) \ +	asm("divl %2;"				     \ +	    : "=a"(q32), "=d"(r32)		     \ +	    : "r"(d32),				     \ +	     "0"(n_lo), "1"(n_hi)) + +#define ACPI_SHIFT_RIGHT_64(n_hi, n_lo) \ +	asm("shrl   $1,%2	;"	\ +	    "rcrl   $1,%3;"		\ +	    : "=r"(n_hi), "=r"(n_lo)	\ +	    : "0"(n_hi), "1"(n_lo)) + +#endif + +#endif /* _ASM_X86_ACENV_H */ diff --git a/arch/x86/include/asm/acpi.h b/arch/x86/include/asm/acpi.h index b1977bad543..e06225eda63 100644 --- a/arch/x86/include/asm/acpi.h +++ b/arch/x86/include/asm/acpi.h @@ -26,56 +26,12 @@  #include <acpi/pdc_intel.h>  #include <asm/numa.h> +#include <asm/fixmap.h>  #include <asm/processor.h>  #include <asm/mmu.h>  #include <asm/mpspec.h>  #include <asm/realmode.h> -#define COMPILER_DEPENDENT_INT64   long long -#define COMPILER_DEPENDENT_UINT64  unsigned long long - -/* - * Calling conventions: - * - * ACPI_SYSTEM_XFACE        - Interfaces to host OS (handlers, threads) - * ACPI_EXTERNAL_XFACE      - External ACPI interfaces - * ACPI_INTERNAL_XFACE      - Internal ACPI interfaces - * ACPI_INTERNAL_VAR_XFACE  - Internal variable-parameter list interfaces - */ -#define ACPI_SYSTEM_XFACE -#define ACPI_EXTERNAL_XFACE -#define ACPI_INTERNAL_XFACE -#define ACPI_INTERNAL_VAR_XFACE - -/* Asm macros */ - -#define ACPI_FLUSH_CPU_CACHE()	wbinvd() - -int __acpi_acquire_global_lock(unsigned int *lock); -int __acpi_release_global_lock(unsigned int *lock); - -#define ACPI_ACQUIRE_GLOBAL_LOCK(facs, Acq) \ -	((Acq) = __acpi_acquire_global_lock(&facs->global_lock)) - -#define ACPI_RELEASE_GLOBAL_LOCK(facs, Acq) \ -	((Acq) = __acpi_release_global_lock(&facs->global_lock)) - -/* - * Math helper asm macros - */ -#define ACPI_DIV_64_BY_32(n_hi, n_lo, d32, q32, r32) \ -	asm("divl %2;"				     \ -	    : "=a"(q32), "=d"(r32)		     \ -	    : "r"(d32),				     \ -	     "0"(n_lo), "1"(n_hi)) - - -#define ACPI_SHIFT_RIGHT_64(n_hi, n_lo) \ -	asm("shrl   $1,%2	;"	\ -	    "rcrl   $1,%3;"		\ -	    : "=r"(n_hi), "=r"(n_lo)	\ -	    : "0"(n_hi), "1"(n_lo)) -  #ifdef CONFIG_ACPI  extern int acpi_lapic;  extern int acpi_ioapic; diff --git a/arch/x86/include/asm/amd_nb.h b/arch/x86/include/asm/amd_nb.h index a54ee1d054d..aaac3b2fb74 100644 --- a/arch/x86/include/asm/amd_nb.h +++ b/arch/x86/include/asm/amd_nb.h @@ -19,7 +19,7 @@ extern int amd_cache_northbridges(void);  extern void amd_flush_garts(void);  extern int amd_numa_init(void);  extern int amd_get_subcaches(int); -extern int amd_set_subcaches(int, int); +extern int amd_set_subcaches(int, unsigned long);  struct amd_l3_cache {  	unsigned indices; diff --git a/arch/x86/include/asm/apic.h b/arch/x86/include/asm/apic.h index 1d2091a226b..19b0ebafcd3 100644 --- a/arch/x86/include/asm/apic.h +++ b/arch/x86/include/asm/apic.h @@ -93,9 +93,6 @@ static inline int is_vsmp_box(void)  	return 0;  }  #endif -extern void xapic_wait_icr_idle(void); -extern u32 safe_xapic_wait_icr_idle(void); -extern void xapic_icr_write(u32, u32);  extern int setup_profiling_timer(unsigned int);  static inline void native_apic_mem_write(u32 reg, u32 v) @@ -184,7 +181,6 @@ extern int x2apic_phys;  extern int x2apic_preenabled;  extern void check_x2apic(void);  extern void enable_x2apic(void); -extern void x2apic_icr_write(u32 low, u32 id);  static inline int x2apic_enabled(void)  {  	u64 msr; @@ -221,7 +217,6 @@ static inline void x2apic_force_phys(void)  {  } -#define	nox2apic	0  #define	x2apic_preenabled 0  #define	x2apic_supported()	0  #endif @@ -351,7 +346,7 @@ struct apic {  	int trampoline_phys_low;  	int trampoline_phys_high; -	void (*wait_for_init_deassert)(atomic_t *deassert); +	bool wait_for_init_deassert;  	void (*smp_callin_clear_local_apic)(void);  	void (*inquire_remote_apic)(int apicid); @@ -517,13 +512,6 @@ extern int default_cpu_present_to_apicid(int mps_cpu);  extern int default_check_phys_apicid_present(int phys_apicid);  #endif -static inline void default_wait_for_init_deassert(atomic_t *deassert) -{ -	while (!atomic_read(deassert)) -		cpu_relax(); -	return; -} -  extern void generic_bigsmp_probe(void); diff --git a/arch/x86/include/asm/archrandom.h b/arch/x86/include/asm/archrandom.h index 0d9ec770f2f..69f1366f1aa 100644 --- a/arch/x86/include/asm/archrandom.h +++ b/arch/x86/include/asm/archrandom.h @@ -1,7 +1,7 @@  /*   * This file is part of the Linux kernel.   * - * Copyright (c) 2011, Intel Corporation + * Copyright (c) 2011-2014, Intel Corporation   * Authors: Fenghua Yu <fenghua.yu@intel.com>,   *          H. Peter Anvin <hpa@linux.intel.com>   * @@ -31,14 +31,41 @@  #define RDRAND_RETRY_LOOPS	10  #define RDRAND_INT	".byte 0x0f,0xc7,0xf0" +#define RDSEED_INT	".byte 0x0f,0xc7,0xf8"  #ifdef CONFIG_X86_64  # define RDRAND_LONG	".byte 0x48,0x0f,0xc7,0xf0" +# define RDSEED_LONG	".byte 0x48,0x0f,0xc7,0xf8"  #else  # define RDRAND_LONG	RDRAND_INT +# define RDSEED_LONG	RDSEED_INT  #endif  #ifdef CONFIG_ARCH_RANDOM +/* Instead of arch_get_random_long() when alternatives haven't run. */ +static inline int rdrand_long(unsigned long *v) +{ +	int ok; +	asm volatile("1: " RDRAND_LONG "\n\t" +		     "jc 2f\n\t" +		     "decl %0\n\t" +		     "jnz 1b\n\t" +		     "2:" +		     : "=r" (ok), "=a" (*v) +		     : "0" (RDRAND_RETRY_LOOPS)); +	return ok; +} + +/* A single attempt at RDSEED */ +static inline bool rdseed_long(unsigned long *v) +{ +	unsigned char ok; +	asm volatile(RDSEED_LONG "\n\t" +		     "setc %0" +		     : "=qm" (ok), "=a" (*v)); +	return ok; +} +  #define GET_RANDOM(name, type, rdrand, nop)			\  static inline int name(type *v)					\  {								\ @@ -56,18 +83,52 @@ static inline int name(type *v)					\  	return ok;						\  } +#define GET_SEED(name, type, rdseed, nop)			\ +static inline int name(type *v)					\ +{								\ +	unsigned char ok;					\ +	alternative_io("movb $0, %0\n\t"			\ +		       nop,					\ +		       rdseed "\n\t"				\ +		       "setc %0",				\ +		       X86_FEATURE_RDSEED,                      \ +		       ASM_OUTPUT2("=q" (ok), "=a" (*v)));	\ +	return ok;						\ +} +  #ifdef CONFIG_X86_64  GET_RANDOM(arch_get_random_long, unsigned long, RDRAND_LONG, ASM_NOP5);  GET_RANDOM(arch_get_random_int, unsigned int, RDRAND_INT, ASM_NOP4); +GET_SEED(arch_get_random_seed_long, unsigned long, RDSEED_LONG, ASM_NOP5); +GET_SEED(arch_get_random_seed_int, unsigned int, RDSEED_INT, ASM_NOP4); +  #else  GET_RANDOM(arch_get_random_long, unsigned long, RDRAND_LONG, ASM_NOP3);  GET_RANDOM(arch_get_random_int, unsigned int, RDRAND_INT, ASM_NOP3); +GET_SEED(arch_get_random_seed_long, unsigned long, RDSEED_LONG, ASM_NOP4); +GET_SEED(arch_get_random_seed_int, unsigned int, RDSEED_INT, ASM_NOP4); +  #endif /* CONFIG_X86_64 */ +#define arch_has_random()	static_cpu_has(X86_FEATURE_RDRAND) +#define arch_has_random_seed()	static_cpu_has(X86_FEATURE_RDSEED) + +#else + +static inline int rdrand_long(unsigned long *v) +{ +	return 0; +} + +static inline bool rdseed_long(unsigned long *v) +{ +	return 0; +} +  #endif  /* CONFIG_ARCH_RANDOM */  extern void x86_init_rdrand(struct cpuinfo_x86 *c); diff --git a/arch/x86/include/asm/asm.h b/arch/x86/include/asm/asm.h index 4582e8e1cd1..7730c1c5c83 100644 --- a/arch/x86/include/asm/asm.h +++ b/arch/x86/include/asm/asm.h @@ -57,6 +57,12 @@  	.long (from) - . ;					\  	.long (to) - . + 0x7ffffff0 ;				\  	.popsection + +# define _ASM_NOKPROBE(entry)					\ +	.pushsection "_kprobe_blacklist","aw" ;			\ +	_ASM_ALIGN ;						\ +	_ASM_PTR (entry);					\ +	.popsection  #else  # define _ASM_EXTABLE(from,to)					\  	" .pushsection \"__ex_table\",\"a\"\n"			\ @@ -71,6 +77,7 @@  	" .long (" #from ") - .\n"				\  	" .long (" #to ") - . + 0x7ffffff0\n"			\  	" .popsection\n" +/* For C file, we already have NOKPROBE_SYMBOL macro */  #endif  #endif /* _ASM_X86_ASM_H */ diff --git a/arch/x86/include/asm/atomic.h b/arch/x86/include/asm/atomic.h index 722aa3b0462..6dd1c7dd047 100644 --- a/arch/x86/include/asm/atomic.h +++ b/arch/x86/include/asm/atomic.h @@ -6,6 +6,8 @@  #include <asm/processor.h>  #include <asm/alternative.h>  #include <asm/cmpxchg.h> +#include <asm/rmwcc.h> +#include <asm/barrier.h>  /*   * Atomic operations that C can't guarantee us.  Useful for @@ -76,12 +78,7 @@ static inline void atomic_sub(int i, atomic_t *v)   */  static inline int atomic_sub_and_test(int i, atomic_t *v)  { -	unsigned char c; - -	asm volatile(LOCK_PREFIX "subl %2,%0; sete %1" -		     : "+m" (v->counter), "=qm" (c) -		     : "ir" (i) : "memory"); -	return c; +	GEN_BINARY_RMWcc(LOCK_PREFIX "subl", v->counter, "er", i, "%0", "e");  }  /** @@ -118,12 +115,7 @@ static inline void atomic_dec(atomic_t *v)   */  static inline int atomic_dec_and_test(atomic_t *v)  { -	unsigned char c; - -	asm volatile(LOCK_PREFIX "decl %0; sete %1" -		     : "+m" (v->counter), "=qm" (c) -		     : : "memory"); -	return c != 0; +	GEN_UNARY_RMWcc(LOCK_PREFIX "decl", v->counter, "%0", "e");  }  /** @@ -136,12 +128,7 @@ static inline int atomic_dec_and_test(atomic_t *v)   */  static inline int atomic_inc_and_test(atomic_t *v)  { -	unsigned char c; - -	asm volatile(LOCK_PREFIX "incl %0; sete %1" -		     : "+m" (v->counter), "=qm" (c) -		     : : "memory"); -	return c != 0; +	GEN_UNARY_RMWcc(LOCK_PREFIX "incl", v->counter, "%0", "e");  }  /** @@ -155,12 +142,7 @@ static inline int atomic_inc_and_test(atomic_t *v)   */  static inline int atomic_add_negative(int i, atomic_t *v)  { -	unsigned char c; - -	asm volatile(LOCK_PREFIX "addl %2,%0; sets %1" -		     : "+m" (v->counter), "=qm" (c) -		     : "ir" (i) : "memory"); -	return c; +	GEN_BINARY_RMWcc(LOCK_PREFIX "addl", v->counter, "er", i, "%0", "s");  }  /** @@ -262,12 +244,6 @@ static inline void atomic_or_long(unsigned long *v1, unsigned long v2)  		     : : "r" ((unsigned)(mask)), "m" (*(addr))	\  		     : "memory") -/* Atomic operations are already serializing on x86 */ -#define smp_mb__before_atomic_dec()	barrier() -#define smp_mb__after_atomic_dec()	barrier() -#define smp_mb__before_atomic_inc()	barrier() -#define smp_mb__after_atomic_inc()	barrier() -  #ifdef CONFIG_X86_32  # include <asm/atomic64_32.h>  #else diff --git a/arch/x86/include/asm/atomic64_64.h b/arch/x86/include/asm/atomic64_64.h index 0e1cbfc8ee0..46e9052bbd2 100644 --- a/arch/x86/include/asm/atomic64_64.h +++ b/arch/x86/include/asm/atomic64_64.h @@ -72,12 +72,7 @@ static inline void atomic64_sub(long i, atomic64_t *v)   */  static inline int atomic64_sub_and_test(long i, atomic64_t *v)  { -	unsigned char c; - -	asm volatile(LOCK_PREFIX "subq %2,%0; sete %1" -		     : "=m" (v->counter), "=qm" (c) -		     : "er" (i), "m" (v->counter) : "memory"); -	return c; +	GEN_BINARY_RMWcc(LOCK_PREFIX "subq", v->counter, "er", i, "%0", "e");  }  /** @@ -116,12 +111,7 @@ static inline void atomic64_dec(atomic64_t *v)   */  static inline int atomic64_dec_and_test(atomic64_t *v)  { -	unsigned char c; - -	asm volatile(LOCK_PREFIX "decq %0; sete %1" -		     : "=m" (v->counter), "=qm" (c) -		     : "m" (v->counter) : "memory"); -	return c != 0; +	GEN_UNARY_RMWcc(LOCK_PREFIX "decq", v->counter, "%0", "e");  }  /** @@ -134,12 +124,7 @@ static inline int atomic64_dec_and_test(atomic64_t *v)   */  static inline int atomic64_inc_and_test(atomic64_t *v)  { -	unsigned char c; - -	asm volatile(LOCK_PREFIX "incq %0; sete %1" -		     : "=m" (v->counter), "=qm" (c) -		     : "m" (v->counter) : "memory"); -	return c != 0; +	GEN_UNARY_RMWcc(LOCK_PREFIX "incq", v->counter, "%0", "e");  }  /** @@ -153,12 +138,7 @@ static inline int atomic64_inc_and_test(atomic64_t *v)   */  static inline int atomic64_add_negative(long i, atomic64_t *v)  { -	unsigned char c; - -	asm volatile(LOCK_PREFIX "addq %2,%0; sets %1" -		     : "=m" (v->counter), "=qm" (c) -		     : "er" (i), "m" (v->counter) : "memory"); -	return c; +	GEN_BINARY_RMWcc(LOCK_PREFIX "addq", v->counter, "er", i, "%0", "s");  }  /** diff --git a/arch/x86/include/asm/barrier.h b/arch/x86/include/asm/barrier.h index c6cd358a1ee..5c7198cca5e 100644 --- a/arch/x86/include/asm/barrier.h +++ b/arch/x86/include/asm/barrier.h @@ -85,21 +85,62 @@  #else  # define smp_rmb()	barrier()  #endif -#ifdef CONFIG_X86_OOSTORE -# define smp_wmb() 	wmb() -#else -# define smp_wmb()	barrier() -#endif +#define smp_wmb()	barrier()  #define smp_read_barrier_depends()	read_barrier_depends()  #define set_mb(var, value) do { (void)xchg(&var, value); } while (0) -#else +#else /* !SMP */  #define smp_mb()	barrier()  #define smp_rmb()	barrier()  #define smp_wmb()	barrier()  #define smp_read_barrier_depends()	do { } while (0)  #define set_mb(var, value) do { var = value; barrier(); } while (0) +#endif /* SMP */ + +#if defined(CONFIG_X86_PPRO_FENCE) + +/* + * For either of these options x86 doesn't have a strong TSO memory + * model and we should fall back to full barriers. + */ + +#define smp_store_release(p, v)						\ +do {									\ +	compiletime_assert_atomic_type(*p);				\ +	smp_mb();							\ +	ACCESS_ONCE(*p) = (v);						\ +} while (0) + +#define smp_load_acquire(p)						\ +({									\ +	typeof(*p) ___p1 = ACCESS_ONCE(*p);				\ +	compiletime_assert_atomic_type(*p);				\ +	smp_mb();							\ +	___p1;								\ +}) + +#else /* regular x86 TSO memory ordering */ + +#define smp_store_release(p, v)						\ +do {									\ +	compiletime_assert_atomic_type(*p);				\ +	barrier();							\ +	ACCESS_ONCE(*p) = (v);						\ +} while (0) + +#define smp_load_acquire(p)						\ +({									\ +	typeof(*p) ___p1 = ACCESS_ONCE(*p);				\ +	compiletime_assert_atomic_type(*p);				\ +	barrier();							\ +	___p1;								\ +}) +  #endif +/* Atomic operations are already serializing on x86 */ +#define smp_mb__before_atomic()	barrier() +#define smp_mb__after_atomic()	barrier() +  /*   * Stop RDTSC speculation. This is needed when you need to use RDTSC   * (or get_cycles or vread that possibly accesses the TSC) in a defined diff --git a/arch/x86/include/asm/bitops.h b/arch/x86/include/asm/bitops.h index 41639ce8fd6..afcd35d331d 100644 --- a/arch/x86/include/asm/bitops.h +++ b/arch/x86/include/asm/bitops.h @@ -14,6 +14,8 @@  #include <linux/compiler.h>  #include <asm/alternative.h> +#include <asm/rmwcc.h> +#include <asm/barrier.h>  #if BITS_PER_LONG == 32  # define _BITOPS_LONG_SHIFT 5 @@ -101,7 +103,7 @@ static inline void __set_bit(long nr, volatile unsigned long *addr)   *   * clear_bit() is atomic and may not be reordered.  However, it does   * not contain a memory barrier, so if it is used for locking purposes, - * you should call smp_mb__before_clear_bit() and/or smp_mb__after_clear_bit() + * you should call smp_mb__before_atomic() and/or smp_mb__after_atomic()   * in order to ensure changes are visible on other processors.   */  static __always_inline void @@ -155,9 +157,6 @@ static inline void __clear_bit_unlock(long nr, volatile unsigned long *addr)  	__clear_bit(nr, addr);  } -#define smp_mb__before_clear_bit()	barrier() -#define smp_mb__after_clear_bit()	barrier() -  /**   * __change_bit - Toggle a bit in memory   * @nr: the bit to change @@ -204,12 +203,7 @@ static inline void change_bit(long nr, volatile unsigned long *addr)   */  static inline int test_and_set_bit(long nr, volatile unsigned long *addr)  { -	int oldbit; - -	asm volatile(LOCK_PREFIX "bts %2,%1\n\t" -		     "sbb %0,%0" : "=r" (oldbit), ADDR : "Ir" (nr) : "memory"); - -	return oldbit; +	GEN_BINARY_RMWcc(LOCK_PREFIX "bts", *addr, "Ir", nr, "%0", "c");  }  /** @@ -255,13 +249,7 @@ static inline int __test_and_set_bit(long nr, volatile unsigned long *addr)   */  static inline int test_and_clear_bit(long nr, volatile unsigned long *addr)  { -	int oldbit; - -	asm volatile(LOCK_PREFIX "btr %2,%1\n\t" -		     "sbb %0,%0" -		     : "=r" (oldbit), ADDR : "Ir" (nr) : "memory"); - -	return oldbit; +	GEN_BINARY_RMWcc(LOCK_PREFIX "btr", *addr, "Ir", nr, "%0", "c");  }  /** @@ -314,13 +302,7 @@ static inline int __test_and_change_bit(long nr, volatile unsigned long *addr)   */  static inline int test_and_change_bit(long nr, volatile unsigned long *addr)  { -	int oldbit; - -	asm volatile(LOCK_PREFIX "btc %2,%1\n\t" -		     "sbb %0,%0" -		     : "=r" (oldbit), ADDR : "Ir" (nr) : "memory"); - -	return oldbit; +	GEN_BINARY_RMWcc(LOCK_PREFIX "btc", *addr, "Ir", nr, "%0", "c");  }  static __always_inline int constant_test_bit(long nr, const volatile unsigned long *addr) diff --git a/arch/x86/include/asm/bug.h b/arch/x86/include/asm/bug.h index 2f03ff018d3..ba38ebbaced 100644 --- a/arch/x86/include/asm/bug.h +++ b/arch/x86/include/asm/bug.h @@ -1,7 +1,6 @@  #ifndef _ASM_X86_BUG_H  #define _ASM_X86_BUG_H -#ifdef CONFIG_BUG  #define HAVE_ARCH_BUG  #ifdef CONFIG_DEBUG_BUGVERBOSE @@ -33,8 +32,6 @@ do {								\  } while (0)  #endif -#endif /* !CONFIG_BUG */ -  #include <asm-generic/bug.h>  #endif /* _ASM_X86_BUG_H */ diff --git a/arch/x86/include/asm/calling.h b/arch/x86/include/asm/calling.h index 0fa67503391..cb4c73bfeb4 100644 --- a/arch/x86/include/asm/calling.h +++ b/arch/x86/include/asm/calling.h @@ -48,6 +48,8 @@ For 32-bit we have the following conventions - kernel is built with  #include <asm/dwarf2.h> +#ifdef CONFIG_X86_64 +  /*   * 64-bit system call stack frame layout defines and helpers,   * for assembly code: @@ -192,3 +194,51 @@ For 32-bit we have the following conventions - kernel is built with  	.macro icebp  	.byte 0xf1  	.endm + +#else /* CONFIG_X86_64 */ + +/* + * For 32bit only simplified versions of SAVE_ALL/RESTORE_ALL. These + * are different from the entry_32.S versions in not changing the segment + * registers. So only suitable for in kernel use, not when transitioning + * from or to user space. The resulting stack frame is not a standard + * pt_regs frame. The main use case is calling C code from assembler + * when all the registers need to be preserved. + */ + +	.macro SAVE_ALL +	pushl_cfi %eax +	CFI_REL_OFFSET eax, 0 +	pushl_cfi %ebp +	CFI_REL_OFFSET ebp, 0 +	pushl_cfi %edi +	CFI_REL_OFFSET edi, 0 +	pushl_cfi %esi +	CFI_REL_OFFSET esi, 0 +	pushl_cfi %edx +	CFI_REL_OFFSET edx, 0 +	pushl_cfi %ecx +	CFI_REL_OFFSET ecx, 0 +	pushl_cfi %ebx +	CFI_REL_OFFSET ebx, 0 +	.endm + +	.macro RESTORE_ALL +	popl_cfi %ebx +	CFI_RESTORE ebx +	popl_cfi %ecx +	CFI_RESTORE ecx +	popl_cfi %edx +	CFI_RESTORE edx +	popl_cfi %esi +	CFI_RESTORE esi +	popl_cfi %edi +	CFI_RESTORE edi +	popl_cfi %ebp +	CFI_RESTORE ebp +	popl_cfi %eax +	CFI_RESTORE eax +	.endm + +#endif /* CONFIG_X86_64 */ + diff --git a/arch/x86/include/asm/checksum_64.h b/arch/x86/include/asm/checksum_64.h index e6fd8a026c7..cd00e177449 100644 --- a/arch/x86/include/asm/checksum_64.h +++ b/arch/x86/include/asm/checksum_64.h @@ -184,8 +184,15 @@ static inline unsigned add32_with_carry(unsigned a, unsigned b)  	asm("addl %2,%0\n\t"  	    "adcl $0,%0"  	    : "=r" (a) -	    : "0" (a), "r" (b)); +	    : "0" (a), "rm" (b));  	return a;  } +#define HAVE_ARCH_CSUM_ADD +static inline __wsum csum_add(__wsum csum, __wsum addend) +{ +	return (__force __wsum)add32_with_carry((__force unsigned)csum, +						(__force unsigned)addend); +} +  #endif /* _ASM_X86_CHECKSUM_64_H */ diff --git a/arch/x86/include/asm/clocksource.h b/arch/x86/include/asm/clocksource.h index 16a57f4ed64..eda81dc0f4a 100644 --- a/arch/x86/include/asm/clocksource.h +++ b/arch/x86/include/asm/clocksource.h @@ -3,8 +3,6 @@  #ifndef _ASM_X86_CLOCKSOURCE_H  #define _ASM_X86_CLOCKSOURCE_H -#ifdef CONFIG_X86_64 -  #define VCLOCK_NONE 0  /* No vDSO clock available.	*/  #define VCLOCK_TSC  1  /* vDSO should use vread_tsc.	*/  #define VCLOCK_HPET 2  /* vDSO should use vread_hpet.	*/ @@ -14,6 +12,4 @@ struct arch_clocksource_data {  	int vclock_mode;  }; -#endif /* CONFIG_X86_64 */ -  #endif /* _ASM_X86_CLOCKSOURCE_H */ diff --git a/arch/x86/include/asm/cmdline.h b/arch/x86/include/asm/cmdline.h new file mode 100644 index 00000000000..e01f7f7ccb0 --- /dev/null +++ b/arch/x86/include/asm/cmdline.h @@ -0,0 +1,6 @@ +#ifndef _ASM_X86_CMDLINE_H +#define _ASM_X86_CMDLINE_H + +int cmdline_find_option_bool(const char *cmdline_ptr, const char *option); + +#endif /* _ASM_X86_CMDLINE_H */ diff --git a/arch/x86/include/asm/cpufeature.h b/arch/x86/include/asm/cpufeature.h index d3f5c63078d..e265ff95d16 100644 --- a/arch/x86/include/asm/cpufeature.h +++ b/arch/x86/include/asm/cpufeature.h @@ -37,7 +37,7 @@  #define X86_FEATURE_PAT		(0*32+16) /* Page Attribute Table */  #define X86_FEATURE_PSE36	(0*32+17) /* 36-bit PSEs */  #define X86_FEATURE_PN		(0*32+18) /* Processor serial number */ -#define X86_FEATURE_CLFLSH	(0*32+19) /* "clflush" CLFLUSH instruction */ +#define X86_FEATURE_CLFLUSH	(0*32+19) /* CLFLUSH instruction */  #define X86_FEATURE_DS		(0*32+21) /* "dts" Debug Store */  #define X86_FEATURE_ACPI	(0*32+22) /* ACPI via MSR */  #define X86_FEATURE_MMX		(0*32+23) /* Multimedia Extensions */ @@ -216,9 +216,15 @@  #define X86_FEATURE_ERMS	(9*32+ 9) /* Enhanced REP MOVSB/STOSB */  #define X86_FEATURE_INVPCID	(9*32+10) /* Invalidate Processor Context ID */  #define X86_FEATURE_RTM		(9*32+11) /* Restricted Transactional Memory */ +#define X86_FEATURE_MPX		(9*32+14) /* Memory Protection Extension */ +#define X86_FEATURE_AVX512F	(9*32+16) /* AVX-512 Foundation */  #define X86_FEATURE_RDSEED	(9*32+18) /* The RDSEED instruction */  #define X86_FEATURE_ADX		(9*32+19) /* The ADCX and ADOX instructions */  #define X86_FEATURE_SMAP	(9*32+20) /* Supervisor Mode Access Prevention */ +#define X86_FEATURE_CLFLUSHOPT	(9*32+23) /* CLFLUSHOPT instruction */ +#define X86_FEATURE_AVX512PF	(9*32+26) /* AVX-512 Prefetch */ +#define X86_FEATURE_AVX512ER	(9*32+27) /* AVX-512 Exponential and Reciprocal */ +#define X86_FEATURE_AVX512CD	(9*32+28) /* AVX-512 Conflict Detection */  /*   * BUG word(s) @@ -312,7 +318,7 @@ extern const char * const x86_power_flags[32];  #define cpu_has_pmm_enabled	boot_cpu_has(X86_FEATURE_PMM_EN)  #define cpu_has_ds		boot_cpu_has(X86_FEATURE_DS)  #define cpu_has_pebs		boot_cpu_has(X86_FEATURE_PEBS) -#define cpu_has_clflush		boot_cpu_has(X86_FEATURE_CLFLSH) +#define cpu_has_clflush		boot_cpu_has(X86_FEATURE_CLFLUSH)  #define cpu_has_bts		boot_cpu_has(X86_FEATURE_BTS)  #define cpu_has_gbpages		boot_cpu_has(X86_FEATURE_GBPAGES)  #define cpu_has_arch_perfmon	boot_cpu_has(X86_FEATURE_ARCH_PERFMON) @@ -374,7 +380,7 @@ static __always_inline __pure bool __static_cpu_has(u16 bit)  		 * Catch too early usage of this before alternatives  		 * have run.  		 */ -		asm goto("1: jmp %l[t_warn]\n" +		asm_volatile_goto("1: jmp %l[t_warn]\n"  			 "2:\n"  			 ".section .altinstructions,\"a\"\n"  			 " .long 1b - .\n" @@ -388,7 +394,7 @@ static __always_inline __pure bool __static_cpu_has(u16 bit)  #endif -		asm goto("1: jmp %l[t_no]\n" +		asm_volatile_goto("1: jmp %l[t_no]\n"  			 "2:\n"  			 ".section .altinstructions,\"a\"\n"  			 " .long 1b - .\n" @@ -453,7 +459,7 @@ static __always_inline __pure bool _static_cpu_has_safe(u16 bit)   * have. Thus, we force the jump to the widest, 4-byte, signed relative   * offset even though the last would often fit in less bytes.   */ -		asm goto("1: .byte 0xe9\n .long %l[t_dynamic] - 2f\n" +		asm_volatile_goto("1: .byte 0xe9\n .long %l[t_dynamic] - 2f\n"  			 "2:\n"  			 ".section .altinstructions,\"a\"\n"  			 " .long 1b - .\n"		/* src offset */ @@ -540,6 +546,13 @@ static __always_inline __pure bool _static_cpu_has_safe(u16 bit)  #define static_cpu_has_bug(bit)	static_cpu_has((bit))  #define boot_cpu_has_bug(bit)	cpu_has_bug(&boot_cpu_data, (bit)) +#define MAX_CPU_FEATURES	(NCAPINTS * 32) +#define cpu_have_feature	boot_cpu_has + +#define CPU_FEATURE_TYPEFMT	"x86,ven%04Xfam%04Xmod%04X" +#define CPU_FEATURE_TYPEVAL	boot_cpu_data.x86_vendor, boot_cpu_data.x86, \ +				boot_cpu_data.x86_model +  #endif /* defined(__KERNEL__) && !defined(__ASSEMBLY__) */  #endif /* _ASM_X86_CPUFEATURE_H */ diff --git a/arch/x86/include/asm/cputime.h b/arch/x86/include/asm/cputime.h deleted file mode 100644 index 6d68ad7e0ea..00000000000 --- a/arch/x86/include/asm/cputime.h +++ /dev/null @@ -1 +0,0 @@ -#include <asm-generic/cputime.h> diff --git a/arch/x86/include/asm/crypto/ablk_helper.h b/arch/x86/include/asm/crypto/ablk_helper.h deleted file mode 100644 index 4f93df50c23..00000000000 --- a/arch/x86/include/asm/crypto/ablk_helper.h +++ /dev/null @@ -1,31 +0,0 @@ -/* - * Shared async block cipher helpers - */ - -#ifndef _CRYPTO_ABLK_HELPER_H -#define _CRYPTO_ABLK_HELPER_H - -#include <linux/crypto.h> -#include <linux/kernel.h> -#include <crypto/cryptd.h> - -struct async_helper_ctx { -	struct cryptd_ablkcipher *cryptd_tfm; -}; - -extern int ablk_set_key(struct crypto_ablkcipher *tfm, const u8 *key, -			unsigned int key_len); - -extern int __ablk_encrypt(struct ablkcipher_request *req); - -extern int ablk_encrypt(struct ablkcipher_request *req); - -extern int ablk_decrypt(struct ablkcipher_request *req); - -extern void ablk_exit(struct crypto_tfm *tfm); - -extern int ablk_init_common(struct crypto_tfm *tfm, const char *drv_name); - -extern int ablk_init(struct crypto_tfm *tfm); - -#endif /* _CRYPTO_ABLK_HELPER_H */ diff --git a/arch/x86/include/asm/desc.h b/arch/x86/include/asm/desc.h index b90e5dfeee4..50d033a8947 100644 --- a/arch/x86/include/asm/desc.h +++ b/arch/x86/include/asm/desc.h @@ -327,10 +327,25 @@ static inline void write_trace_idt_entry(int entry, const gate_desc *gate)  {  	write_idt_entry(trace_idt_table, entry, gate);  } + +static inline void _trace_set_gate(int gate, unsigned type, void *addr, +				   unsigned dpl, unsigned ist, unsigned seg) +{ +	gate_desc s; + +	pack_gate(&s, type, (unsigned long)addr, dpl, ist, seg); +	/* +	 * does not need to be atomic because it is only done once at +	 * setup time +	 */ +	write_trace_idt_entry(gate, &s); +}  #else  static inline void write_trace_idt_entry(int entry, const gate_desc *gate)  {  } + +#define _trace_set_gate(gate, type, addr, dpl, ist, seg)  #endif  static inline void _set_gate(int gate, unsigned type, void *addr, @@ -353,11 +368,14 @@ static inline void _set_gate(int gate, unsigned type, void *addr,   * Pentium F0 0F bugfix can have resulted in the mapped   * IDT being write-protected.   */ -static inline void set_intr_gate(unsigned int n, void *addr) -{ -	BUG_ON((unsigned)n > 0xFF); -	_set_gate(n, GATE_INTERRUPT, addr, 0, 0, __KERNEL_CS); -} +#define set_intr_gate(n, addr)						\ +	do {								\ +		BUG_ON((unsigned)n > 0xFF);				\ +		_set_gate(n, GATE_INTERRUPT, (void *)addr, 0, 0,	\ +			  __KERNEL_CS);					\ +		_trace_set_gate(n, GATE_INTERRUPT, (void *)trace_##addr,\ +				0, 0, __KERNEL_CS);			\ +	} while (0)  extern int first_system_vector;  /* used_vectors is BITMAP for irq is not managed by percpu vector_irq */ @@ -374,37 +392,10 @@ static inline void alloc_system_vector(int vector)  	}  } -#ifdef CONFIG_TRACING -static inline void trace_set_intr_gate(unsigned int gate, void *addr) -{ -	gate_desc s; - -	pack_gate(&s, GATE_INTERRUPT, (unsigned long)addr, 0, 0, __KERNEL_CS); -	write_idt_entry(trace_idt_table, gate, &s); -} - -static inline void __trace_alloc_intr_gate(unsigned int n, void *addr) -{ -	trace_set_intr_gate(n, addr); -} -#else -static inline void trace_set_intr_gate(unsigned int gate, void *addr) -{ -} - -#define __trace_alloc_intr_gate(n, addr) -#endif - -static inline void __alloc_intr_gate(unsigned int n, void *addr) -{ -	set_intr_gate(n, addr); -} -  #define alloc_intr_gate(n, addr)				\  	do {							\  		alloc_system_vector(n);				\ -		__alloc_intr_gate(n, addr);			\ -		__trace_alloc_intr_gate(n, trace_##addr);	\ +		set_intr_gate(n, addr);				\  	} while (0)  /* diff --git a/arch/x86/include/asm/dmi.h b/arch/x86/include/asm/dmi.h index fd8f9e2ca35..535192f6bfa 100644 --- a/arch/x86/include/asm/dmi.h +++ b/arch/x86/include/asm/dmi.h @@ -13,7 +13,9 @@ static __always_inline __init void *dmi_alloc(unsigned len)  }  /* Use early IO mappings for DMI because it's initialized early */ -#define dmi_ioremap early_ioremap -#define dmi_iounmap early_iounmap +#define dmi_early_remap		early_ioremap +#define dmi_early_unmap		early_iounmap +#define dmi_remap		ioremap +#define dmi_unmap		iounmap  #endif /* _ASM_X86_DMI_H */ diff --git a/arch/x86/include/asm/efi.h b/arch/x86/include/asm/efi.h index 0062a012504..1eb5f6433ad 100644 --- a/arch/x86/include/asm/efi.h +++ b/arch/x86/include/asm/efi.h @@ -1,39 +1,56 @@  #ifndef _ASM_X86_EFI_H  #define _ASM_X86_EFI_H +#include <asm/i387.h> +/* + * We map the EFI regions needed for runtime services non-contiguously, + * with preserved alignment on virtual addresses starting from -4G down + * for a total max space of 64G. This way, we provide for stable runtime + * services addresses across kernels so that a kexec'd kernel can still + * use them. + * + * This is the main reason why we're doing stable VA mappings for RT + * services. + * + * This flag is used in conjuction with a chicken bit called + * "efi=old_map" which can be used as a fallback to the old runtime + * services mapping method in case there's some b0rkage with a + * particular EFI implementation (haha, it is hard to hold up the + * sarcasm here...). + */ +#define EFI_OLD_MEMMAP		EFI_ARCH_1 + +#define EFI32_LOADER_SIGNATURE	"EL32" +#define EFI64_LOADER_SIGNATURE	"EL64" +  #ifdef CONFIG_X86_32 -#define EFI_LOADER_SIGNATURE	"EL32"  extern unsigned long asmlinkage efi_call_phys(void *, ...); -#define efi_call_phys0(f)		efi_call_phys(f) -#define efi_call_phys1(f, a1)		efi_call_phys(f, a1) -#define efi_call_phys2(f, a1, a2)	efi_call_phys(f, a1, a2) -#define efi_call_phys3(f, a1, a2, a3)	efi_call_phys(f, a1, a2, a3) -#define efi_call_phys4(f, a1, a2, a3, a4)	\ -	efi_call_phys(f, a1, a2, a3, a4) -#define efi_call_phys5(f, a1, a2, a3, a4, a5)	\ -	efi_call_phys(f, a1, a2, a3, a4, a5) -#define efi_call_phys6(f, a1, a2, a3, a4, a5, a6)	\ -	efi_call_phys(f, a1, a2, a3, a4, a5, a6)  /*   * Wrap all the virtual calls in a way that forces the parameters on the stack.   */ +/* Use this macro if your virtual returns a non-void value */  #define efi_call_virt(f, args...) \ -	((efi_##f##_t __attribute__((regparm(0)))*)efi.systab->runtime->f)(args) - -#define efi_call_virt0(f)		efi_call_virt(f) -#define efi_call_virt1(f, a1)		efi_call_virt(f, a1) -#define efi_call_virt2(f, a1, a2)	efi_call_virt(f, a1, a2) -#define efi_call_virt3(f, a1, a2, a3)	efi_call_virt(f, a1, a2, a3) -#define efi_call_virt4(f, a1, a2, a3, a4)	\ -	efi_call_virt(f, a1, a2, a3, a4) -#define efi_call_virt5(f, a1, a2, a3, a4, a5)	\ -	efi_call_virt(f, a1, a2, a3, a4, a5) -#define efi_call_virt6(f, a1, a2, a3, a4, a5, a6)	\ -	efi_call_virt(f, a1, a2, a3, a4, a5, a6) +({									\ +	efi_status_t __s;						\ +	kernel_fpu_begin();						\ +	__s = ((efi_##f##_t __attribute__((regparm(0)))*)		\ +		efi.systab->runtime->f)(args);				\ +	kernel_fpu_end();						\ +	__s;								\ +}) + +/* Use this macro if your virtual call does not return any value */ +#define __efi_call_virt(f, args...) \ +({									\ +	kernel_fpu_begin();						\ +	((efi_##f##_t __attribute__((regparm(0)))*)			\ +		efi.systab->runtime->f)(args);				\ +	kernel_fpu_end();						\ +})  #define efi_ioremap(addr, size, type, attr)	ioremap_cache(addr, size) @@ -41,52 +58,28 @@ extern unsigned long asmlinkage efi_call_phys(void *, ...);  #define EFI_LOADER_SIGNATURE	"EL64" -extern u64 efi_call0(void *fp); -extern u64 efi_call1(void *fp, u64 arg1); -extern u64 efi_call2(void *fp, u64 arg1, u64 arg2); -extern u64 efi_call3(void *fp, u64 arg1, u64 arg2, u64 arg3); -extern u64 efi_call4(void *fp, u64 arg1, u64 arg2, u64 arg3, u64 arg4); -extern u64 efi_call5(void *fp, u64 arg1, u64 arg2, u64 arg3, -		     u64 arg4, u64 arg5); -extern u64 efi_call6(void *fp, u64 arg1, u64 arg2, u64 arg3, -		     u64 arg4, u64 arg5, u64 arg6); - -#define efi_call_phys0(f)			\ -	efi_call0((f)) -#define efi_call_phys1(f, a1)			\ -	efi_call1((f), (u64)(a1)) -#define efi_call_phys2(f, a1, a2)			\ -	efi_call2((f), (u64)(a1), (u64)(a2)) -#define efi_call_phys3(f, a1, a2, a3)				\ -	efi_call3((f), (u64)(a1), (u64)(a2), (u64)(a3)) -#define efi_call_phys4(f, a1, a2, a3, a4)				\ -	efi_call4((f), (u64)(a1), (u64)(a2), (u64)(a3),		\ -		  (u64)(a4)) -#define efi_call_phys5(f, a1, a2, a3, a4, a5)				\ -	efi_call5((f), (u64)(a1), (u64)(a2), (u64)(a3),		\ -		  (u64)(a4), (u64)(a5)) -#define efi_call_phys6(f, a1, a2, a3, a4, a5, a6)			\ -	efi_call6((f), (u64)(a1), (u64)(a2), (u64)(a3),		\ -		  (u64)(a4), (u64)(a5), (u64)(a6)) - -#define efi_call_virt0(f)				\ -	efi_call0((efi.systab->runtime->f)) -#define efi_call_virt1(f, a1)					\ -	efi_call1((efi.systab->runtime->f), (u64)(a1)) -#define efi_call_virt2(f, a1, a2)					\ -	efi_call2((efi.systab->runtime->f), (u64)(a1), (u64)(a2)) -#define efi_call_virt3(f, a1, a2, a3)					\ -	efi_call3((efi.systab->runtime->f), (u64)(a1), (u64)(a2), \ -		  (u64)(a3)) -#define efi_call_virt4(f, a1, a2, a3, a4)				\ -	efi_call4((efi.systab->runtime->f), (u64)(a1), (u64)(a2), \ -		  (u64)(a3), (u64)(a4)) -#define efi_call_virt5(f, a1, a2, a3, a4, a5)				\ -	efi_call5((efi.systab->runtime->f), (u64)(a1), (u64)(a2), \ -		  (u64)(a3), (u64)(a4), (u64)(a5)) -#define efi_call_virt6(f, a1, a2, a3, a4, a5, a6)			\ -	efi_call6((efi.systab->runtime->f), (u64)(a1), (u64)(a2), \ -		  (u64)(a3), (u64)(a4), (u64)(a5), (u64)(a6)) +extern u64 asmlinkage efi_call(void *fp, ...); + +#define efi_call_phys(f, args...)		efi_call((f), args) + +#define efi_call_virt(f, ...)						\ +({									\ +	efi_status_t __s;						\ +									\ +	efi_sync_low_kernel_mappings();					\ +	preempt_disable();						\ +	__kernel_fpu_begin();						\ +	__s = efi_call((void *)efi.systab->runtime->f, __VA_ARGS__);	\ +	__kernel_fpu_end();						\ +	preempt_enable();						\ +	__s;								\ +}) + +/* + * All X86_64 virt calls return non-void values. Thus, use non-void call for + * virt calls that would be void on X86_32. + */ +#define __efi_call_virt(f, args...) efi_call_virt(f, args)  extern void __iomem *efi_ioremap(unsigned long addr, unsigned long size,  				 u32 type, u64 attribute); @@ -94,13 +87,33 @@ extern void __iomem *efi_ioremap(unsigned long addr, unsigned long size,  #endif /* CONFIG_X86_32 */  extern int add_efi_memmap; -extern unsigned long x86_efi_facility; +extern struct efi_scratch efi_scratch;  extern void efi_set_executable(efi_memory_desc_t *md, bool executable);  extern int efi_memblock_x86_reserve_range(void);  extern void efi_call_phys_prelog(void);  extern void efi_call_phys_epilog(void);  extern void efi_unmap_memmap(void);  extern void efi_memory_uc(u64 addr, unsigned long size); +extern void __init efi_map_region(efi_memory_desc_t *md); +extern void __init efi_map_region_fixed(efi_memory_desc_t *md); +extern void efi_sync_low_kernel_mappings(void); +extern int efi_setup_page_tables(unsigned long pa_memmap, unsigned num_pages); +extern void efi_cleanup_page_tables(unsigned long pa_memmap, unsigned num_pages); +extern void __init old_map_region(efi_memory_desc_t *md); +extern void __init runtime_code_page_mkexec(void); +extern void __init efi_runtime_mkexec(void); +extern void __init efi_dump_pagetable(void); +extern void __init efi_apply_memmap_quirks(void); + +struct efi_setup_data { +	u64 fw_vendor; +	u64 runtime; +	u64 tables; +	u64 smbios; +	u64 reserved[8]; +}; + +extern u64 efi_setup;  #ifdef CONFIG_EFI @@ -109,6 +122,40 @@ static inline bool efi_is_native(void)  	return IS_ENABLED(CONFIG_X86_64) == efi_enabled(EFI_64BIT);  } +static inline bool efi_runtime_supported(void) +{ +	if (efi_is_native()) +		return true; + +	if (IS_ENABLED(CONFIG_EFI_MIXED) && !efi_enabled(EFI_OLD_MEMMAP)) +		return true; + +	return false; +} + +extern struct console early_efi_console; +extern void parse_efi_setup(u64 phys_addr, u32 data_len); + +#ifdef CONFIG_EFI_MIXED +extern void efi_thunk_runtime_setup(void); +extern efi_status_t efi_thunk_set_virtual_address_map( +	void *phys_set_virtual_address_map, +	unsigned long memory_map_size, +	unsigned long descriptor_size, +	u32 descriptor_version, +	efi_memory_desc_t *virtual_map); +#else +static inline void efi_thunk_runtime_setup(void) {} +static inline efi_status_t efi_thunk_set_virtual_address_map( +	void *phys_set_virtual_address_map, +	unsigned long memory_map_size, +	unsigned long descriptor_size, +	u32 descriptor_version, +	efi_memory_desc_t *virtual_map) +{ +	return EFI_SUCCESS; +} +#endif /* CONFIG_EFI_MIXED */  #else  /*   * IF EFI is not configured, have the EFI calls return -ENOSYS. @@ -120,6 +167,7 @@ static inline bool efi_is_native(void)  #define efi_call4(_f, _a1, _a2, _a3, _a4)		(-ENOSYS)  #define efi_call5(_f, _a1, _a2, _a3, _a4, _a5)		(-ENOSYS)  #define efi_call6(_f, _a1, _a2, _a3, _a4, _a5, _a6)	(-ENOSYS) +static inline void parse_efi_setup(u64 phys_addr, u32 data_len) {}  #endif /* CONFIG_EFI */  #endif /* _ASM_X86_EFI_H */ diff --git a/arch/x86/include/asm/elf.h b/arch/x86/include/asm/elf.h index 9c999c1674f..1a055c81d86 100644 --- a/arch/x86/include/asm/elf.h +++ b/arch/x86/include/asm/elf.h @@ -75,7 +75,12 @@ typedef struct user_fxsr_struct elf_fpxregset_t;  #include <asm/vdso.h> -extern unsigned int vdso_enabled; +#ifdef CONFIG_X86_64 +extern unsigned int vdso64_enabled; +#endif +#if defined(CONFIG_X86_32) || defined(CONFIG_COMPAT) +extern unsigned int vdso32_enabled; +#endif  /*   * This is used to ensure we don't load something for the wrong architecture. @@ -269,9 +274,9 @@ extern int force_personality32;  struct task_struct; -#define	ARCH_DLINFO_IA32(vdso_enabled)					\ +#define	ARCH_DLINFO_IA32						\  do {									\ -	if (vdso_enabled) {						\ +	if (vdso32_enabled) {						\  		NEW_AUX_ENT(AT_SYSINFO,	VDSO_ENTRY);			\  		NEW_AUX_ENT(AT_SYSINFO_EHDR, VDSO_CURRENT_BASE);	\  	}								\ @@ -281,31 +286,28 @@ do {									\  #define STACK_RND_MASK (0x7ff) -#define VDSO_HIGH_BASE		(__fix_to_virt(FIX_VDSO)) - -#define ARCH_DLINFO		ARCH_DLINFO_IA32(vdso_enabled) +#define ARCH_DLINFO		ARCH_DLINFO_IA32  /* update AT_VECTOR_SIZE_ARCH if the number of NEW_AUX_ENT entries changes */  #else /* CONFIG_X86_32 */ -#define VDSO_HIGH_BASE		0xffffe000U /* CONFIG_COMPAT_VDSO address */ -  /* 1GB for 64bit, 8MB for 32bit */  #define STACK_RND_MASK (test_thread_flag(TIF_ADDR32) ? 0x7ff : 0x3fffff)  #define ARCH_DLINFO							\  do {									\ -	if (vdso_enabled)						\ +	if (vdso64_enabled)						\  		NEW_AUX_ENT(AT_SYSINFO_EHDR,				\ -			    (unsigned long)current->mm->context.vdso);	\ +			    (unsigned long __force)current->mm->context.vdso); \  } while (0) +/* As a historical oddity, the x32 and x86_64 vDSOs are controlled together. */  #define ARCH_DLINFO_X32							\  do {									\ -	if (vdso_enabled)						\ +	if (vdso64_enabled)						\  		NEW_AUX_ENT(AT_SYSINFO_EHDR,				\ -			    (unsigned long)current->mm->context.vdso);	\ +			    (unsigned long __force)current->mm->context.vdso); \  } while (0)  #define AT_SYSINFO		32 @@ -314,7 +316,7 @@ do {									\  if (test_thread_flag(TIF_X32))						\  	ARCH_DLINFO_X32;						\  else									\ -	ARCH_DLINFO_IA32(sysctl_vsyscall32) +	ARCH_DLINFO_IA32  #define COMPAT_ELF_ET_DYN_BASE	(TASK_UNMAPPED_BASE + 0x1000000) @@ -323,18 +325,17 @@ else									\  #define VDSO_CURRENT_BASE	((unsigned long)current->mm->context.vdso)  #define VDSO_ENTRY							\ -	((unsigned long)VDSO32_SYMBOL(VDSO_CURRENT_BASE, vsyscall)) +	((unsigned long)current->mm->context.vdso +			\ +	 selected_vdso32->sym___kernel_vsyscall)  struct linux_binprm;  #define ARCH_HAS_SETUP_ADDITIONAL_PAGES 1  extern int arch_setup_additional_pages(struct linux_binprm *bprm,  				       int uses_interp); -extern int x32_setup_additional_pages(struct linux_binprm *bprm, -				      int uses_interp); - -extern int syscall32_setup_pages(struct linux_binprm *, int exstack); -#define compat_arch_setup_additional_pages	syscall32_setup_pages +extern int compat_arch_setup_additional_pages(struct linux_binprm *bprm, +					      int uses_interp); +#define compat_arch_setup_additional_pages compat_arch_setup_additional_pages  extern unsigned long arch_randomize_brk(struct mm_struct *mm);  #define arch_randomize_brk arch_randomize_brk diff --git a/arch/x86/include/asm/espfix.h b/arch/x86/include/asm/espfix.h new file mode 100644 index 00000000000..99efebb2f69 --- /dev/null +++ b/arch/x86/include/asm/espfix.h @@ -0,0 +1,16 @@ +#ifndef _ASM_X86_ESPFIX_H +#define _ASM_X86_ESPFIX_H + +#ifdef CONFIG_X86_64 + +#include <asm/percpu.h> + +DECLARE_PER_CPU_READ_MOSTLY(unsigned long, espfix_stack); +DECLARE_PER_CPU_READ_MOSTLY(unsigned long, espfix_waddr); + +extern void init_espfix_bsp(void); +extern void init_espfix_ap(void); + +#endif /* CONFIG_X86_64 */ + +#endif /* _ASM_X86_ESPFIX_H */ diff --git a/arch/x86/include/asm/fixmap.h b/arch/x86/include/asm/fixmap.h index e846225265e..b0910f97a3e 100644 --- a/arch/x86/include/asm/fixmap.h +++ b/arch/x86/include/asm/fixmap.h @@ -24,7 +24,7 @@  #include <linux/threads.h>  #include <asm/kmap_types.h>  #else -#include <asm/vsyscall.h> +#include <uapi/asm/vsyscall.h>  #endif  /* @@ -40,15 +40,9 @@   */  extern unsigned long __FIXADDR_TOP;  #define FIXADDR_TOP	((unsigned long)__FIXADDR_TOP) - -#define FIXADDR_USER_START     __fix_to_virt(FIX_VDSO) -#define FIXADDR_USER_END       __fix_to_virt(FIX_VDSO - 1)  #else -#define FIXADDR_TOP	(VSYSCALL_END-PAGE_SIZE) - -/* Only covers 32bit vsyscalls currently. Need another set for 64bit. */ -#define FIXADDR_USER_START	((unsigned long)VSYSCALL32_VSYSCALL) -#define FIXADDR_USER_END	(FIXADDR_USER_START + PAGE_SIZE) +#define FIXADDR_TOP	(round_up(VSYSCALL_ADDR + PAGE_SIZE, 1<<PMD_SHIFT) - \ +			 PAGE_SIZE)  #endif @@ -74,13 +68,8 @@ extern unsigned long __FIXADDR_TOP;  enum fixed_addresses {  #ifdef CONFIG_X86_32  	FIX_HOLE, -	FIX_VDSO,  #else -	VSYSCALL_LAST_PAGE, -	VSYSCALL_FIRST_PAGE = VSYSCALL_LAST_PAGE -			    + ((VSYSCALL_END-VSYSCALL_START) >> PAGE_SHIFT) - 1, -	VVAR_PAGE, -	VSYSCALL_HPET, +	VSYSCALL_PAGE = (FIXADDR_TOP - VSYSCALL_ADDR) >> PAGE_SHIFT,  #ifdef CONFIG_PARAVIRT_CLOCK  	PVCLOCK_FIXMAP_BEGIN,  	PVCLOCK_FIXMAP_END = PVCLOCK_FIXMAP_BEGIN+PVCLOCK_VSYSCALL_NR_PAGES-1, @@ -98,12 +87,6 @@ enum fixed_addresses {  	FIX_IO_APIC_BASE_0,  	FIX_IO_APIC_BASE_END = FIX_IO_APIC_BASE_0 + MAX_IO_APICS - 1,  #endif -#ifdef CONFIG_X86_VISWS_APIC -	FIX_CO_CPU,	/* Cobalt timer */ -	FIX_CO_APIC,	/* Cobalt APIC Redirection Table */ -	FIX_LI_PCIA,	/* Lithium PCI Bridge A */ -	FIX_LI_PCIB,	/* Lithium PCI Bridge B */ -#endif  	FIX_RO_IDT,	/* Virtual mapping for read-only IDT */  #ifdef CONFIG_X86_32  	FIX_KMAP_BEGIN,	/* reserved pte's for temporary kernel mappings */ @@ -175,64 +158,13 @@ static inline void __set_fixmap(enum fixed_addresses idx,  }  #endif -#define set_fixmap(idx, phys)				\ -	__set_fixmap(idx, phys, PAGE_KERNEL) - -/* - * Some hardware wants to get fixmapped without caching. - */ -#define set_fixmap_nocache(idx, phys)			\ -	__set_fixmap(idx, phys, PAGE_KERNEL_NOCACHE) - -#define clear_fixmap(idx)			\ -	__set_fixmap(idx, 0, __pgprot(0)) - -#define __fix_to_virt(x)	(FIXADDR_TOP - ((x) << PAGE_SHIFT)) -#define __virt_to_fix(x)	((FIXADDR_TOP - ((x)&PAGE_MASK)) >> PAGE_SHIFT) - -extern void __this_fixmap_does_not_exist(void); - -/* - * 'index to address' translation. If anyone tries to use the idx - * directly without translation, we catch the bug with a NULL-deference - * kernel oops. Illegal ranges of incoming indices are caught too. - */ -static __always_inline unsigned long fix_to_virt(const unsigned int idx) -{ -	/* -	 * this branch gets completely eliminated after inlining, -	 * except when someone tries to use fixaddr indices in an -	 * illegal way. (such as mixing up address types or using -	 * out-of-range indices). -	 * -	 * If it doesn't get removed, the linker will complain -	 * loudly with a reasonably clear error message.. -	 */ -	if (idx >= __end_of_fixed_addresses) -		__this_fixmap_does_not_exist(); - -	return __fix_to_virt(idx); -} - -static inline unsigned long virt_to_fix(const unsigned long vaddr) -{ -	BUG_ON(vaddr >= FIXADDR_TOP || vaddr < FIXADDR_START); -	return __virt_to_fix(vaddr); -} - -/* Return an pointer with offset calculated */ -static __always_inline unsigned long -__set_fixmap_offset(enum fixed_addresses idx, phys_addr_t phys, pgprot_t flags) -{ -	__set_fixmap(idx, phys, flags); -	return fix_to_virt(idx) + (phys & (PAGE_SIZE - 1)); -} +#include <asm-generic/fixmap.h> -#define set_fixmap_offset(idx, phys)			\ -	__set_fixmap_offset(idx, phys, PAGE_KERNEL) +#define __late_set_fixmap(idx, phys, flags) __set_fixmap(idx, phys, flags) +#define __late_clear_fixmap(idx) __set_fixmap(idx, 0, __pgprot(0)) -#define set_fixmap_offset_nocache(idx, phys)			\ -	__set_fixmap_offset(idx, phys, PAGE_KERNEL_NOCACHE) +void __early_set_fixmap(enum fixed_addresses idx, +			phys_addr_t phys, pgprot_t flags);  #endif /* !__ASSEMBLY__ */  #endif /* _ASM_X86_FIXMAP_H */ diff --git a/arch/x86/include/asm/floppy.h b/arch/x86/include/asm/floppy.h index d3d74698dce..1c7eefe3250 100644 --- a/arch/x86/include/asm/floppy.h +++ b/arch/x86/include/asm/floppy.h @@ -145,10 +145,10 @@ static int fd_request_irq(void)  {  	if (can_use_virtual_dma)  		return request_irq(FLOPPY_IRQ, floppy_hardint, -				   IRQF_DISABLED, "floppy", NULL); +				   0, "floppy", NULL);  	else  		return request_irq(FLOPPY_IRQ, floppy_interrupt, -				   IRQF_DISABLED, "floppy", NULL); +				   0, "floppy", NULL);  }  static unsigned long dma_mem_alloc(unsigned long size) diff --git a/arch/x86/include/asm/fpu-internal.h b/arch/x86/include/asm/fpu-internal.h index 4d0bda7b11e..115e3689cd5 100644 --- a/arch/x86/include/asm/fpu-internal.h +++ b/arch/x86/include/asm/fpu-internal.h @@ -87,22 +87,22 @@ static inline int is_x32_frame(void)  static __always_inline __pure bool use_eager_fpu(void)  { -	return static_cpu_has(X86_FEATURE_EAGER_FPU); +	return static_cpu_has_safe(X86_FEATURE_EAGER_FPU);  }  static __always_inline __pure bool use_xsaveopt(void)  { -	return static_cpu_has(X86_FEATURE_XSAVEOPT); +	return static_cpu_has_safe(X86_FEATURE_XSAVEOPT);  }  static __always_inline __pure bool use_xsave(void)  { -	return static_cpu_has(X86_FEATURE_XSAVE); +	return static_cpu_has_safe(X86_FEATURE_XSAVE);  }  static __always_inline __pure bool use_fxsr(void)  { -        return static_cpu_has(X86_FEATURE_FXSR); +	return static_cpu_has_safe(X86_FEATURE_FXSR);  }  static inline void fx_finit(struct i387_fxsave_struct *fx) @@ -293,12 +293,13 @@ static inline int restore_fpu_checking(struct task_struct *tsk)  	/* AMD K7/K8 CPUs don't save/restore FDP/FIP/FOP unless an exception  	   is pending.  Clear the x87 state here by setting it to fixed  	   values. "m" is a random variable that should be in L1 */ -	alternative_input( -		ASM_NOP8 ASM_NOP2, -		"emms\n\t"		/* clear stack tags */ -		"fildl %P[addr]",	/* set F?P to defined value */ -		X86_FEATURE_FXSAVE_LEAK, -		[addr] "m" (tsk->thread.fpu.has_fpu)); +	if (unlikely(static_cpu_has_safe(X86_FEATURE_FXSAVE_LEAK))) { +		asm volatile( +			"fnclex\n\t" +			"emms\n\t" +			"fildl %P[addr]"	/* set F?P to defined value */ +			: : [addr] "m" (tsk->thread.fpu.has_fpu)); +	}  	return fpu_restore_checking(&tsk->thread.fpu);  } @@ -365,7 +366,7 @@ static inline void drop_fpu(struct task_struct *tsk)  	 * Forget coprocessor state..  	 */  	preempt_disable(); -	tsk->fpu_counter = 0; +	tsk->thread.fpu_counter = 0;  	__drop_fpu(tsk);  	clear_used_math();  	preempt_enable(); @@ -424,7 +425,7 @@ static inline fpu_switch_t switch_fpu_prepare(struct task_struct *old, struct ta  	 * or if the past 5 consecutive context-switches used math.  	 */  	fpu.preload = tsk_used_math(new) && (use_eager_fpu() || -					     new->fpu_counter > 5); +					     new->thread.fpu_counter > 5);  	if (__thread_has_fpu(old)) {  		if (!__save_init_fpu(old))  			cpu = ~0; @@ -433,16 +434,16 @@ static inline fpu_switch_t switch_fpu_prepare(struct task_struct *old, struct ta  		/* Don't change CR0.TS if we just switch! */  		if (fpu.preload) { -			new->fpu_counter++; +			new->thread.fpu_counter++;  			__thread_set_has_fpu(new);  			prefetch(new->thread.fpu.state);  		} else if (!use_eager_fpu())  			stts();  	} else { -		old->fpu_counter = 0; +		old->thread.fpu_counter = 0;  		old->thread.fpu.last_cpu = ~0;  		if (fpu.preload) { -			new->fpu_counter++; +			new->thread.fpu_counter++;  			if (!use_eager_fpu() && fpu_lazy_restore(new, cpu))  				fpu.preload = 0;  			else diff --git a/arch/x86/include/asm/futex.h b/arch/x86/include/asm/futex.h index be27ba1e947..b4c1f545343 100644 --- a/arch/x86/include/asm/futex.h +++ b/arch/x86/include/asm/futex.h @@ -110,26 +110,7 @@ static inline int futex_atomic_op_inuser(int encoded_op, u32 __user *uaddr)  static inline int futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr,  						u32 oldval, u32 newval)  { -	int ret = 0; - -	if (!access_ok(VERIFY_WRITE, uaddr, sizeof(u32))) -		return -EFAULT; - -	asm volatile("\t" ASM_STAC "\n" -		     "1:\t" LOCK_PREFIX "cmpxchgl %4, %2\n" -		     "2:\t" ASM_CLAC "\n" -		     "\t.section .fixup, \"ax\"\n" -		     "3:\tmov     %3, %0\n" -		     "\tjmp     2b\n" -		     "\t.previous\n" -		     _ASM_EXTABLE(1b, 3b) -		     : "+r" (ret), "=a" (oldval), "+m" (*uaddr) -		     : "i" (-EFAULT), "r" (newval), "1" (oldval) -		     : "memory" -	); - -	*uval = oldval; -	return ret; +	return user_atomic_cmpxchg_inatomic(uval, uaddr, oldval, newval);  }  #endif diff --git a/arch/x86/include/asm/hardirq.h b/arch/x86/include/asm/hardirq.h index ab0ae1aa6d0..230853da4ec 100644 --- a/arch/x86/include/asm/hardirq.h +++ b/arch/x86/include/asm/hardirq.h @@ -33,6 +33,9 @@ typedef struct {  #ifdef CONFIG_X86_MCE_THRESHOLD  	unsigned int irq_threshold_count;  #endif +#if IS_ENABLED(CONFIG_HYPERV) || defined(CONFIG_XEN) +	unsigned int irq_hv_callback_count; +#endif  } ____cacheline_aligned irq_cpustat_t;  DECLARE_PER_CPU_SHARED_ALIGNED(irq_cpustat_t, irq_stat); diff --git a/arch/x86/include/asm/hash.h b/arch/x86/include/asm/hash.h new file mode 100644 index 00000000000..e8c58f88b1d --- /dev/null +++ b/arch/x86/include/asm/hash.h @@ -0,0 +1,7 @@ +#ifndef _ASM_X86_HASH_H +#define _ASM_X86_HASH_H + +struct fast_hash_ops; +extern void setup_arch_fast_hash(struct fast_hash_ops *ops); + +#endif /* _ASM_X86_HASH_H */ diff --git a/arch/x86/include/asm/hpet.h b/arch/x86/include/asm/hpet.h index b18df579c0e..36f7125945e 100644 --- a/arch/x86/include/asm/hpet.h +++ b/arch/x86/include/asm/hpet.h @@ -63,6 +63,7 @@  /* hpet memory map physical address */  extern unsigned long hpet_address;  extern unsigned long force_hpet_address; +extern int boot_hpet_disable;  extern u8 hpet_blockid;  extern int hpet_force_user;  extern u8 hpet_msi_disable; diff --git a/arch/x86/include/asm/hugetlb.h b/arch/x86/include/asm/hugetlb.h index a8091216963..68c05398bba 100644 --- a/arch/x86/include/asm/hugetlb.h +++ b/arch/x86/include/asm/hugetlb.h @@ -52,6 +52,7 @@ static inline pte_t huge_ptep_get_and_clear(struct mm_struct *mm,  static inline void huge_ptep_clear_flush(struct vm_area_struct *vma,  					 unsigned long addr, pte_t *ptep)  { +	ptep_clear_flush(vma, addr, ptep);  }  static inline int huge_pte_none(pte_t pte) diff --git a/arch/x86/include/asm/hw_irq.h b/arch/x86/include/asm/hw_irq.h index 92b3bae08b7..4615906d83d 100644 --- a/arch/x86/include/asm/hw_irq.h +++ b/arch/x86/include/asm/hw_irq.h @@ -98,7 +98,6 @@ extern void trace_call_function_single_interrupt(void);  #define IO_APIC_IRQ(x) (((x) >= NR_IRQS_LEGACY) || ((1<<(x)) & io_apic_irqs))  extern unsigned long io_apic_irqs; -extern void init_VISWS_APIC_irqs(void);  extern void setup_IO_APIC(void);  extern void disable_IO_APIC(void); @@ -187,6 +186,12 @@ extern __visible void smp_invalidate_interrupt(struct pt_regs *);  #endif  extern void (*__initconst interrupt[NR_VECTORS-FIRST_EXTERNAL_VECTOR])(void); +#ifdef CONFIG_TRACING +#define trace_interrupt interrupt +#endif + +#define VECTOR_UNDEFINED	(-1) +#define VECTOR_RETRIGGERED	(-2)  typedef int vector_irq_t[NR_VECTORS];  DECLARE_PER_CPU(vector_irq_t, vector_irq); diff --git a/arch/x86/include/asm/intel-mid.h b/arch/x86/include/asm/intel-mid.h new file mode 100644 index 00000000000..e34e097b6f9 --- /dev/null +++ b/arch/x86/include/asm/intel-mid.h @@ -0,0 +1,157 @@ +/* + * intel-mid.h: Intel MID specific setup code + * + * (C) Copyright 2009 Intel Corporation + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; version 2 + * of the License. + */ +#ifndef _ASM_X86_INTEL_MID_H +#define _ASM_X86_INTEL_MID_H + +#include <linux/sfi.h> +#include <linux/platform_device.h> + +extern int intel_mid_pci_init(void); +extern int get_gpio_by_name(const char *name); +extern void intel_scu_device_register(struct platform_device *pdev); +extern int __init sfi_parse_mrtc(struct sfi_table_header *table); +extern int __init sfi_parse_mtmr(struct sfi_table_header *table); +extern int sfi_mrtc_num; +extern struct sfi_rtc_table_entry sfi_mrtc_array[]; + +/* + * Here defines the array of devices platform data that IAFW would export + * through SFI "DEVS" table, we use name and type to match the device and + * its platform data. + */ +struct devs_id { +	char name[SFI_NAME_LEN + 1]; +	u8 type; +	u8 delay; +	void *(*get_platform_data)(void *info); +	/* Custom handler for devices */ +	void (*device_handler)(struct sfi_device_table_entry *pentry, +				struct devs_id *dev); +}; + +#define sfi_device(i)   \ +	static const struct devs_id *const __intel_mid_sfi_##i##_dev __used \ +	__attribute__((__section__(".x86_intel_mid_dev.init"))) = &i + +/* + * Medfield is the follow-up of Moorestown, it combines two chip solution into + * one. Other than that it also added always-on and constant tsc and lapic + * timers. Medfield is the platform name, and the chip name is called Penwell + * we treat Medfield/Penwell as a variant of Moorestown. Penwell can be + * identified via MSRs. + */ +enum intel_mid_cpu_type { +	/* 1 was Moorestown */ +	INTEL_MID_CPU_CHIP_PENWELL = 2, +	INTEL_MID_CPU_CHIP_CLOVERVIEW, +	INTEL_MID_CPU_CHIP_TANGIER, +}; + +extern enum intel_mid_cpu_type __intel_mid_cpu_chip; + +/** + * struct intel_mid_ops - Interface between intel-mid & sub archs + * @arch_setup: arch_setup function to re-initialize platform + *             structures (x86_init, x86_platform_init) + * + * This structure can be extended if any new interface is required + * between intel-mid & its sub arch files. + */ +struct intel_mid_ops { +	void (*arch_setup)(void); +}; + +/* Helper API's for INTEL_MID_OPS_INIT */ +#define DECLARE_INTEL_MID_OPS_INIT(cpuname, cpuid)	\ +				[cpuid] = get_##cpuname##_ops + +/* Maximum number of CPU ops */ +#define MAX_CPU_OPS(a) (sizeof(a)/sizeof(void *)) + +/* + * For every new cpu addition, a weak get_<cpuname>_ops() function needs be + * declared in arch/x86/platform/intel_mid/intel_mid_weak_decls.h. + */ +#define INTEL_MID_OPS_INIT {\ +	DECLARE_INTEL_MID_OPS_INIT(penwell, INTEL_MID_CPU_CHIP_PENWELL), \ +	DECLARE_INTEL_MID_OPS_INIT(cloverview, INTEL_MID_CPU_CHIP_CLOVERVIEW), \ +	DECLARE_INTEL_MID_OPS_INIT(tangier, INTEL_MID_CPU_CHIP_TANGIER) \ +}; + +#ifdef CONFIG_X86_INTEL_MID + +static inline enum intel_mid_cpu_type intel_mid_identify_cpu(void) +{ +	return __intel_mid_cpu_chip; +} + +static inline bool intel_mid_has_msic(void) +{ +	return (intel_mid_identify_cpu() == INTEL_MID_CPU_CHIP_PENWELL); +} + +#else /* !CONFIG_X86_INTEL_MID */ + +#define intel_mid_identify_cpu()    (0) +#define intel_mid_has_msic()    (0) + +#endif /* !CONFIG_X86_INTEL_MID */ + +enum intel_mid_timer_options { +	INTEL_MID_TIMER_DEFAULT, +	INTEL_MID_TIMER_APBT_ONLY, +	INTEL_MID_TIMER_LAPIC_APBT, +}; + +extern enum intel_mid_timer_options intel_mid_timer_options; + +/* + * Penwell uses spread spectrum clock, so the freq number is not exactly + * the same as reported by MSR based on SDM. + */ +#define FSB_FREQ_83SKU	83200 +#define FSB_FREQ_100SKU	99840 +#define FSB_FREQ_133SKU	133000 + +#define FSB_FREQ_167SKU	167000 +#define FSB_FREQ_200SKU	200000 +#define FSB_FREQ_267SKU	267000 +#define FSB_FREQ_333SKU	333000 +#define FSB_FREQ_400SKU	400000 + +/* Bus Select SoC Fuse value */ +#define BSEL_SOC_FUSE_MASK	0x7 +#define BSEL_SOC_FUSE_001	0x1 /* FSB 133MHz */ +#define BSEL_SOC_FUSE_101	0x5 /* FSB 100MHz */ +#define BSEL_SOC_FUSE_111	0x7 /* FSB 83MHz */ + +#define SFI_MTMR_MAX_NUM 8 +#define SFI_MRTC_MAX	8 + +extern struct console early_mrst_console; +extern void mrst_early_console_init(void); + +extern struct console early_hsu_console; +extern void hsu_early_console_init(const char *); + +extern void intel_scu_devices_create(void); +extern void intel_scu_devices_destroy(void); + +/* VRTC timer */ +#define MRST_VRTC_MAP_SZ	(1024) +/*#define MRST_VRTC_PGOFFSET	(0xc00) */ + +extern void intel_mid_rtc_init(void); + +/* the offset for the mapping of global gpio pin to irq */ +#define INTEL_MID_IRQ_OFFSET 0x100 + +#endif /* _ASM_X86_INTEL_MID_H */ diff --git a/arch/x86/include/asm/mrst-vrtc.h b/arch/x86/include/asm/intel_mid_vrtc.h index 1e69a75412a..86ff4685c40 100644 --- a/arch/x86/include/asm/mrst-vrtc.h +++ b/arch/x86/include/asm/intel_mid_vrtc.h @@ -1,5 +1,5 @@ -#ifndef _MRST_VRTC_H -#define _MRST_VRTC_H +#ifndef _INTEL_MID_VRTC_H +#define _INTEL_MID_VRTC_H  extern unsigned char vrtc_cmos_read(unsigned char reg);  extern void vrtc_cmos_write(unsigned char val, unsigned char reg); diff --git a/arch/x86/include/asm/io.h b/arch/x86/include/asm/io.h index 34f69cb9350..b8237d8a1e0 100644 --- a/arch/x86/include/asm/io.h +++ b/arch/x86/include/asm/io.h @@ -39,6 +39,7 @@  #include <linux/string.h>  #include <linux/compiler.h>  #include <asm/page.h> +#include <asm/early_ioremap.h>  #define build_mmio_read(name, size, type, reg, barrier) \  static inline type name(const volatile void __iomem *addr) \ @@ -237,7 +238,7 @@ memcpy_toio(volatile void __iomem *dst, const void *src, size_t count)  static inline void flush_write_buffers(void)  { -#if defined(CONFIG_X86_OOSTORE) || defined(CONFIG_X86_PPRO_FENCE) +#if defined(CONFIG_X86_PPRO_FENCE)  	asm volatile("lock; addl $0,0(%%esp)": : :"memory");  #endif  } @@ -316,19 +317,6 @@ extern int ioremap_change_attr(unsigned long vaddr, unsigned long size,  				unsigned long prot_val);  extern void __iomem *ioremap_wc(resource_size_t offset, unsigned long size); -/* - * early_ioremap() and early_iounmap() are for temporary early boot-time - * mappings, before the real ioremap() is functional. - * A boot-time mapping is currently limited to at most 16 pages. - */ -extern void early_ioremap_init(void); -extern void early_ioremap_reset(void); -extern void __iomem *early_ioremap(resource_size_t phys_addr, -				   unsigned long size); -extern void __iomem *early_memremap(resource_size_t phys_addr, -				    unsigned long size); -extern void early_iounmap(void __iomem *addr, unsigned long size); -extern void fixup_early_ioremap(void);  extern bool is_early_ioremap_ptep(pte_t *ptep);  #ifdef CONFIG_XEN diff --git a/arch/x86/include/asm/io_apic.h b/arch/x86/include/asm/io_apic.h index 459e50a424d..90f97b4b934 100644 --- a/arch/x86/include/asm/io_apic.h +++ b/arch/x86/include/asm/io_apic.h @@ -168,8 +168,6 @@ extern int save_ioapic_entries(void);  extern void mask_ioapic_entries(void);  extern int restore_ioapic_entries(void); -extern int get_nr_irqs_gsi(void); -  extern void setup_ioapic_ids_from_mpc(void);  extern void setup_ioapic_ids_from_mpc_nocheck(void); diff --git a/arch/x86/include/asm/iosf_mbi.h b/arch/x86/include/asm/iosf_mbi.h new file mode 100644 index 00000000000..57995f0596a --- /dev/null +++ b/arch/x86/include/asm/iosf_mbi.h @@ -0,0 +1,145 @@ +/* + * iosf_mbi.h: Intel OnChip System Fabric MailBox access support + */ + +#ifndef IOSF_MBI_SYMS_H +#define IOSF_MBI_SYMS_H + +#define MBI_MCR_OFFSET		0xD0 +#define MBI_MDR_OFFSET		0xD4 +#define MBI_MCRX_OFFSET		0xD8 + +#define MBI_RD_MASK		0xFEFFFFFF +#define MBI_WR_MASK		0X01000000 + +#define MBI_MASK_HI		0xFFFFFF00 +#define MBI_MASK_LO		0x000000FF +#define MBI_ENABLE		0xF0 + +/* Baytrail available units */ +#define BT_MBI_UNIT_AUNIT	0x00 +#define BT_MBI_UNIT_SMC		0x01 +#define BT_MBI_UNIT_CPU		0x02 +#define BT_MBI_UNIT_BUNIT	0x03 +#define BT_MBI_UNIT_PMC		0x04 +#define BT_MBI_UNIT_GFX		0x06 +#define BT_MBI_UNIT_SMI		0x0C +#define BT_MBI_UNIT_USB		0x43 +#define BT_MBI_UNIT_SATA	0xA3 +#define BT_MBI_UNIT_PCIE	0xA6 + +/* Baytrail read/write opcodes */ +#define BT_MBI_AUNIT_READ	0x10 +#define BT_MBI_AUNIT_WRITE	0x11 +#define BT_MBI_SMC_READ		0x10 +#define BT_MBI_SMC_WRITE	0x11 +#define BT_MBI_CPU_READ		0x10 +#define BT_MBI_CPU_WRITE	0x11 +#define BT_MBI_BUNIT_READ	0x10 +#define BT_MBI_BUNIT_WRITE	0x11 +#define BT_MBI_PMC_READ		0x06 +#define BT_MBI_PMC_WRITE	0x07 +#define BT_MBI_GFX_READ		0x00 +#define BT_MBI_GFX_WRITE	0x01 +#define BT_MBI_SMIO_READ	0x06 +#define BT_MBI_SMIO_WRITE	0x07 +#define BT_MBI_USB_READ		0x06 +#define BT_MBI_USB_WRITE	0x07 +#define BT_MBI_SATA_READ	0x00 +#define BT_MBI_SATA_WRITE	0x01 +#define BT_MBI_PCIE_READ	0x00 +#define BT_MBI_PCIE_WRITE	0x01 + +/* Quark available units */ +#define QRK_MBI_UNIT_HBA	0x00 +#define QRK_MBI_UNIT_HB	0x03 +#define QRK_MBI_UNIT_RMU	0x04 +#define QRK_MBI_UNIT_MM	0x05 +#define QRK_MBI_UNIT_MMESRAM	0x05 +#define QRK_MBI_UNIT_SOC	0x31 + +/* Quark read/write opcodes */ +#define QRK_MBI_HBA_READ	0x10 +#define QRK_MBI_HBA_WRITE	0x11 +#define QRK_MBI_HB_READ	0x10 +#define QRK_MBI_HB_WRITE	0x11 +#define QRK_MBI_RMU_READ	0x10 +#define QRK_MBI_RMU_WRITE	0x11 +#define QRK_MBI_MM_READ	0x10 +#define QRK_MBI_MM_WRITE	0x11 +#define QRK_MBI_MMESRAM_READ	0x12 +#define QRK_MBI_MMESRAM_WRITE	0x13 +#define QRK_MBI_SOC_READ	0x06 +#define QRK_MBI_SOC_WRITE	0x07 + +#if IS_ENABLED(CONFIG_IOSF_MBI) + +bool iosf_mbi_available(void); + +/** + * iosf_mbi_read() - MailBox Interface read command + * @port:	port indicating subunit being accessed + * @opcode:	port specific read or write opcode + * @offset:	register address offset + * @mdr:	register data to be read + * + * Locking is handled by spinlock - cannot sleep. + * Return: Nonzero on error + */ +int iosf_mbi_read(u8 port, u8 opcode, u32 offset, u32 *mdr); + +/** + * iosf_mbi_write() - MailBox unmasked write command + * @port:	port indicating subunit being accessed + * @opcode:	port specific read or write opcode + * @offset:	register address offset + * @mdr:	register data to be written + * + * Locking is handled by spinlock - cannot sleep. + * Return: Nonzero on error + */ +int iosf_mbi_write(u8 port, u8 opcode, u32 offset, u32 mdr); + +/** + * iosf_mbi_modify() - MailBox masked write command + * @port:	port indicating subunit being accessed + * @opcode:	port specific read or write opcode + * @offset:	register address offset + * @mdr:	register data being modified + * @mask:	mask indicating bits in mdr to be modified + * + * Locking is handled by spinlock - cannot sleep. + * Return: Nonzero on error + */ +int iosf_mbi_modify(u8 port, u8 opcode, u32 offset, u32 mdr, u32 mask); + +#else /* CONFIG_IOSF_MBI is not enabled */ +static inline +bool iosf_mbi_available(void) +{ +	return false; +} + +static inline +int iosf_mbi_read(u8 port, u8 opcode, u32 offset, u32 *mdr) +{ +	WARN(1, "IOSF_MBI driver not available"); +	return -EPERM; +} + +static inline +int iosf_mbi_write(u8 port, u8 opcode, u32 offset, u32 mdr) +{ +	WARN(1, "IOSF_MBI driver not available"); +	return -EPERM; +} + +static inline +int iosf_mbi_modify(u8 port, u8 opcode, u32 offset, u32 mdr, u32 mask) +{ +	WARN(1, "IOSF_MBI driver not available"); +	return -EPERM; +} +#endif /* CONFIG_IOSF_MBI */ + +#endif /* IOSF_MBI_SYMS_H */ diff --git a/arch/x86/include/asm/irq.h b/arch/x86/include/asm/irq.h index 0ea10f27d61..a80cbb88ea9 100644 --- a/arch/x86/include/asm/irq.h +++ b/arch/x86/include/asm/irq.h @@ -25,6 +25,7 @@ extern void irq_ctx_init(int cpu);  #ifdef CONFIG_HOTPLUG_CPU  #include <linux/cpumask.h> +extern int check_irq_vectors_for_cpu_disable(void);  extern void fixup_irqs(void);  extern void irq_force_complete_move(int);  #endif @@ -42,7 +43,7 @@ extern int vector_used_by_percpu_irq(unsigned int vector);  extern void init_ISA_irqs(void);  #ifdef CONFIG_X86_LOCAL_APIC -void arch_trigger_all_cpu_backtrace(void); +void arch_trigger_all_cpu_backtrace(bool);  #define arch_trigger_all_cpu_backtrace arch_trigger_all_cpu_backtrace  #endif diff --git a/arch/x86/include/asm/irq_remapping.h b/arch/x86/include/asm/irq_remapping.h index d806b228d2c..b7747c4c2cf 100644 --- a/arch/x86/include/asm/irq_remapping.h +++ b/arch/x86/include/asm/irq_remapping.h @@ -103,4 +103,7 @@ static inline bool setup_remapped_irq(int irq,  }  #endif /* CONFIG_IRQ_REMAP */ +#define dmar_alloc_hwirq()	irq_alloc_hwirq(-1) +#define dmar_free_hwirq		irq_free_hwirq +  #endif /* __X86_IRQ_REMAPPING_H */ diff --git a/arch/x86/include/asm/irqflags.h b/arch/x86/include/asm/irqflags.h index bba3cf88e62..0a8b519226b 100644 --- a/arch/x86/include/asm/irqflags.h +++ b/arch/x86/include/asm/irqflags.h @@ -129,7 +129,7 @@ static inline notrace unsigned long arch_local_irq_save(void)  #define PARAVIRT_ADJUST_EXCEPTION_FRAME	/*  */ -#define INTERRUPT_RETURN	iretq +#define INTERRUPT_RETURN	jmp native_iret  #define USERGS_SYSRET64				\  	swapgs;					\  	sysretq; diff --git a/arch/x86/include/asm/jump_label.h b/arch/x86/include/asm/jump_label.h index 64507f35800..6a2cefb4395 100644 --- a/arch/x86/include/asm/jump_label.h +++ b/arch/x86/include/asm/jump_label.h @@ -18,7 +18,7 @@  static __always_inline bool arch_static_branch(struct static_key *key)  { -	asm goto("1:" +	asm_volatile_goto("1:"  		".byte " __stringify(STATIC_KEY_INIT_NOP) "\n\t"  		".pushsection __jump_table,  \"aw\" \n\t"  		_ASM_ALIGN "\n\t" diff --git a/arch/x86/include/asm/kdebug.h b/arch/x86/include/asm/kdebug.h index 2c37aadcbc3..32ce71375b2 100644 --- a/arch/x86/include/asm/kdebug.h +++ b/arch/x86/include/asm/kdebug.h @@ -21,7 +21,7 @@ enum die_val {  	DIE_NMIUNKNOWN,  }; -extern void printk_address(unsigned long address, int reliable); +extern void printk_address(unsigned long address);  extern void die(const char *, struct pt_regs *,long);  extern int __must_check __die(const char *, struct pt_regs *, long);  extern void show_trace(struct task_struct *t, struct pt_regs *regs, diff --git a/arch/x86/include/asm/kprobes.h b/arch/x86/include/asm/kprobes.h index 9454c167629..53cdfb2857a 100644 --- a/arch/x86/include/asm/kprobes.h +++ b/arch/x86/include/asm/kprobes.h @@ -116,4 +116,6 @@ struct kprobe_ctlblk {  extern int kprobe_fault_handler(struct pt_regs *regs, int trapnr);  extern int kprobe_exceptions_notify(struct notifier_block *self,  				    unsigned long val, void *data); +extern int kprobe_int3_handler(struct pt_regs *regs); +extern int kprobe_debug_handler(struct pt_regs *regs);  #endif /* _ASM_X86_KPROBES_H */ diff --git a/arch/x86/include/asm/kvm_emulate.h b/arch/x86/include/asm/kvm_emulate.h index 15f960c06ff..a04fe4eb237 100644 --- a/arch/x86/include/asm/kvm_emulate.h +++ b/arch/x86/include/asm/kvm_emulate.h @@ -189,7 +189,6 @@ struct x86_emulate_ops {  	void (*set_idt)(struct x86_emulate_ctxt *ctxt, struct desc_ptr *dt);  	ulong (*get_cr)(struct x86_emulate_ctxt *ctxt, int cr);  	int (*set_cr)(struct x86_emulate_ctxt *ctxt, int cr, ulong val); -	void (*set_rflags)(struct x86_emulate_ctxt *ctxt, ulong val);  	int (*cpl)(struct x86_emulate_ctxt *ctxt);  	int (*get_dr)(struct x86_emulate_ctxt *ctxt, int dr, ulong *dest);  	int (*set_dr)(struct x86_emulate_ctxt *ctxt, int dr, ulong value); @@ -274,13 +273,17 @@ struct x86_emulate_ctxt {  	bool guest_mode; /* guest running a nested guest */  	bool perm_ok; /* do not check permissions if true */ -	bool only_vendor_specific_insn; +	bool ud;	/* inject an #UD if host doesn't support insn */  	bool have_exception;  	struct x86_exception exception; -	/* decode cache */ -	u8 twobyte; +	/* +	 * decode cache +	 */ + +	/* current opcode length in bytes */ +	u8 opcode_len;  	u8 b;  	u8 intercept;  	u8 lock_prefix; diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index c76ff74a98f..49205d01b9a 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -50,17 +50,13 @@  			  | X86_CR0_ET | X86_CR0_NE | X86_CR0_WP | X86_CR0_AM \  			  | X86_CR0_NW | X86_CR0_CD | X86_CR0_PG)) -#define CR3_PAE_RESERVED_BITS ((X86_CR3_PWT | X86_CR3_PCD) - 1) -#define CR3_NONPAE_RESERVED_BITS ((PAGE_SIZE-1) & ~(X86_CR3_PWT | X86_CR3_PCD)) -#define CR3_PCID_ENABLED_RESERVED_BITS 0xFFFFFF0000000000ULL -#define CR3_L_MODE_RESERVED_BITS (CR3_NONPAE_RESERVED_BITS |	\ -				  0xFFFFFF0000000000ULL) +#define CR3_L_MODE_RESERVED_BITS 0xFFFFFF0000000000ULL  #define CR4_RESERVED_BITS                                               \  	(~(unsigned long)(X86_CR4_VME | X86_CR4_PVI | X86_CR4_TSD | X86_CR4_DE\  			  | X86_CR4_PSE | X86_CR4_PAE | X86_CR4_MCE     \  			  | X86_CR4_PGE | X86_CR4_PCE | X86_CR4_OSFXSR | X86_CR4_PCIDE \  			  | X86_CR4_OSXSAVE | X86_CR4_SMEP | X86_CR4_FSGSBASE \ -			  | X86_CR4_OSXMMEXCPT | X86_CR4_VMXE)) +			  | X86_CR4_OSXMMEXCPT | X86_CR4_VMXE | X86_CR4_SMAP))  #define CR8_RESERVED_BITS (~(unsigned long)X86_CR8_TPR) @@ -79,6 +75,13 @@  #define KVM_HPAGE_MASK(x)	(~(KVM_HPAGE_SIZE(x) - 1))  #define KVM_PAGES_PER_HPAGE(x)	(KVM_HPAGE_SIZE(x) / PAGE_SIZE) +static inline gfn_t gfn_to_index(gfn_t gfn, gfn_t base_gfn, int level) +{ +	/* KVM_HPAGE_GFN_SHIFT(PT_PAGE_TABLE_LEVEL) must be 0. */ +	return (gfn >> KVM_HPAGE_GFN_SHIFT(level)) - +		(base_gfn >> KVM_HPAGE_GFN_SHIFT(level)); +} +  #define SELECTOR_TI_MASK (1 << 2)  #define SELECTOR_RPL_MASK 0x03 @@ -92,7 +95,7 @@  #define KVM_REFILL_PAGES 25  #define KVM_MAX_CPUID_ENTRIES 80  #define KVM_NR_FIXED_MTRR_REGION 88 -#define KVM_NR_VAR_MTRR 8 +#define KVM_NR_VAR_MTRR 10  #define ASYNC_PF_PER_VCPU 64 @@ -127,7 +130,6 @@ enum kvm_reg_ex {  	VCPU_EXREG_PDPTR = NR_VCPU_REGS,  	VCPU_EXREG_CR3,  	VCPU_EXREG_RFLAGS, -	VCPU_EXREG_CPL,  	VCPU_EXREG_SEGMENTS,  }; @@ -253,7 +255,6 @@ struct kvm_pio_request {   * mode.   */  struct kvm_mmu { -	void (*new_cr3)(struct kvm_vcpu *vcpu);  	void (*set_cr3)(struct kvm_vcpu *vcpu, unsigned long root);  	unsigned long (*get_cr3)(struct kvm_vcpu *vcpu);  	u64 (*get_pdptr)(struct kvm_vcpu *vcpu, int index); @@ -261,7 +262,6 @@ struct kvm_mmu {  			  bool prefault);  	void (*inject_page_fault)(struct kvm_vcpu *vcpu,  				  struct x86_exception *fault); -	void (*free)(struct kvm_vcpu *vcpu);  	gpa_t (*gva_to_gpa)(struct kvm_vcpu *vcpu, gva_t gva, u32 access,  			    struct x86_exception *exception);  	gpa_t (*translate_gpa)(struct kvm_vcpu *vcpu, gpa_t gpa, u32 access); @@ -332,6 +332,11 @@ struct kvm_pmu {  	u64 reprogram_pmi;  }; +enum { +	KVM_DEBUGREG_BP_ENABLED = 1, +	KVM_DEBUGREG_WONT_EXIT = 2, +}; +  struct kvm_vcpu_arch {  	/*  	 * rip and regs accesses must go through @@ -389,6 +394,8 @@ struct kvm_vcpu_arch {  	struct fpu guest_fpu;  	u64 xcr0; +	u64 guest_supported_xcr0; +	u32 guest_xstate_size;  	struct kvm_pio_request pio;  	void *pio_data; @@ -437,7 +444,6 @@ struct kvm_vcpu_arch {  	} st;  	u64 last_guest_tsc; -	u64 last_kernel_ns;  	u64 last_host_tsc;  	u64 tsc_offset_adjustment;  	u64 this_tsc_nsec; @@ -455,9 +461,9 @@ struct kvm_vcpu_arch {  	bool nmi_injected;    /* Trying to inject an NMI this entry */  	struct mtrr_state_type mtrr_state; -	u32 pat; +	u64 pat; -	int switch_db_regs; +	unsigned switch_db_regs;  	unsigned long db[KVM_NR_DB_REGS];  	unsigned long dr6;  	unsigned long dr7; @@ -557,7 +563,9 @@ struct kvm_arch {  	struct list_head assigned_dev_head;  	struct iommu_domain *iommu_domain; -	int iommu_flags; +	bool iommu_noncoherent; +#define __KVM_HAVE_ARCH_NONCOHERENT_DMA +	atomic_t noncoherent_dma_count;  	struct kvm_pic *vpic;  	struct kvm_ioapic *vioapic;  	struct kvm_pit *vpit; @@ -590,12 +598,15 @@ struct kvm_arch {  	bool use_master_clock;  	u64 master_kernel_ns;  	cycle_t master_cycle_now; +	struct delayed_work kvmclock_update_work; +	struct delayed_work kvmclock_sync_work;  	struct kvm_xen_hvm_config xen_hvm_config;  	/* fields used by HYPER-V emulation */  	u64 hv_guest_os_id;  	u64 hv_hypercall; +	u64 hv_tsc_page;  	#ifdef CONFIG_KVM_MMU_AUDIT  	int audit_point; @@ -690,6 +701,9 @@ struct kvm_x86_ops {  	void (*set_idt)(struct kvm_vcpu *vcpu, struct desc_ptr *dt);  	void (*get_gdt)(struct kvm_vcpu *vcpu, struct desc_ptr *dt);  	void (*set_gdt)(struct kvm_vcpu *vcpu, struct desc_ptr *dt); +	u64 (*get_dr6)(struct kvm_vcpu *vcpu); +	void (*set_dr6)(struct kvm_vcpu *vcpu, unsigned long value); +	void (*sync_dirty_debug_regs)(struct kvm_vcpu *vcpu);  	void (*set_dr7)(struct kvm_vcpu *vcpu, unsigned long value);  	void (*cache_reg)(struct kvm_vcpu *vcpu, enum kvm_reg reg);  	unsigned long (*get_rflags)(struct kvm_vcpu *vcpu); @@ -716,8 +730,8 @@ struct kvm_x86_ops {  	int (*nmi_allowed)(struct kvm_vcpu *vcpu);  	bool (*get_nmi_mask)(struct kvm_vcpu *vcpu);  	void (*set_nmi_mask)(struct kvm_vcpu *vcpu, bool masked); -	int (*enable_nmi_window)(struct kvm_vcpu *vcpu); -	int (*enable_irq_window)(struct kvm_vcpu *vcpu); +	void (*enable_nmi_window)(struct kvm_vcpu *vcpu); +	void (*enable_irq_window)(struct kvm_vcpu *vcpu);  	void (*update_cr8_intercept)(struct kvm_vcpu *vcpu, int tpr, int irr);  	int (*vm_has_apicv)(struct kvm *kvm);  	void (*hwapic_irr_update)(struct kvm_vcpu *vcpu, int max_irr); @@ -753,6 +767,9 @@ struct kvm_x86_ops {  			       struct x86_instruction_info *info,  			       enum x86_intercept_stage stage);  	void (*handle_external_intr)(struct kvm_vcpu *vcpu); +	bool (*mpx_supported)(void); + +	int (*check_nested_events)(struct kvm_vcpu *vcpu, bool external_intr);  };  struct kvm_arch_async_pf { @@ -780,11 +797,11 @@ void kvm_mmu_module_exit(void);  void kvm_mmu_destroy(struct kvm_vcpu *vcpu);  int kvm_mmu_create(struct kvm_vcpu *vcpu); -int kvm_mmu_setup(struct kvm_vcpu *vcpu); +void kvm_mmu_setup(struct kvm_vcpu *vcpu);  void kvm_mmu_set_mask_ptes(u64 user_mask, u64 accessed_mask,  		u64 dirty_mask, u64 nx_mask, u64 x_mask); -int kvm_mmu_reset_context(struct kvm_vcpu *vcpu); +void kvm_mmu_reset_context(struct kvm_vcpu *vcpu);  void kvm_mmu_slot_remove_write_access(struct kvm *kvm, int slot);  void kvm_mmu_write_protect_pt_masked(struct kvm *kvm,  				     struct kvm_memory_slot *slot, @@ -922,13 +939,11 @@ int kvm_emulate_hypercall(struct kvm_vcpu *vcpu);  int kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gva_t gva, u32 error_code,  		       void *insn, int insn_len);  void kvm_mmu_invlpg(struct kvm_vcpu *vcpu, gva_t gva); +void kvm_mmu_new_cr3(struct kvm_vcpu *vcpu);  void kvm_enable_tdp(void);  void kvm_disable_tdp(void); -int complete_pio(struct kvm_vcpu *vcpu); -bool kvm_check_iopl(struct kvm_vcpu *vcpu); -  static inline gpa_t translate_gpa(struct kvm_vcpu *vcpu, gpa_t gpa, u32 access)  {  	return gpa; diff --git a/arch/x86/include/asm/kvm_para.h b/arch/x86/include/asm/kvm_para.h index 1df11590975..c7678e43465 100644 --- a/arch/x86/include/asm/kvm_para.h +++ b/arch/x86/include/asm/kvm_para.h @@ -85,28 +85,9 @@ static inline long kvm_hypercall4(unsigned int nr, unsigned long p1,  	return ret;  } -static inline uint32_t kvm_cpuid_base(void) -{ -	if (boot_cpu_data.cpuid_level < 0) -		return 0;	/* So we don't blow up on old processors */ - -	if (cpu_has_hypervisor) -		return hypervisor_cpuid_base("KVMKVMKVM\0\0\0", 0); - -	return 0; -} - -static inline bool kvm_para_available(void) -{ -	return kvm_cpuid_base() != 0; -} - -static inline unsigned int kvm_arch_para_features(void) -{ -	return cpuid_eax(KVM_CPUID_FEATURES); -} -  #ifdef CONFIG_KVM_GUEST +bool kvm_para_available(void); +unsigned int kvm_arch_para_features(void);  void __init kvm_guest_init(void);  void kvm_async_pf_task_wait(u32 token);  void kvm_async_pf_task_wake(u32 token); @@ -126,6 +107,16 @@ static inline void kvm_spinlock_init(void)  #define kvm_async_pf_task_wait(T) do {} while(0)  #define kvm_async_pf_task_wake(T) do {} while(0) +static inline bool kvm_para_available(void) +{ +	return 0; +} + +static inline unsigned int kvm_arch_para_features(void) +{ +	return 0; +} +  static inline u32 kvm_read_and_reset_pf_reason(void)  {  	return 0; diff --git a/arch/x86/include/asm/local.h b/arch/x86/include/asm/local.h index 2d89e3980cb..4ad6560847b 100644 --- a/arch/x86/include/asm/local.h +++ b/arch/x86/include/asm/local.h @@ -52,12 +52,7 @@ static inline void local_sub(long i, local_t *l)   */  static inline int local_sub_and_test(long i, local_t *l)  { -	unsigned char c; - -	asm volatile(_ASM_SUB "%2,%0; sete %1" -		     : "+m" (l->a.counter), "=qm" (c) -		     : "ir" (i) : "memory"); -	return c; +	GEN_BINARY_RMWcc(_ASM_SUB, l->a.counter, "er", i, "%0", "e");  }  /** @@ -70,12 +65,7 @@ static inline int local_sub_and_test(long i, local_t *l)   */  static inline int local_dec_and_test(local_t *l)  { -	unsigned char c; - -	asm volatile(_ASM_DEC "%0; sete %1" -		     : "+m" (l->a.counter), "=qm" (c) -		     : : "memory"); -	return c != 0; +	GEN_UNARY_RMWcc(_ASM_DEC, l->a.counter, "%0", "e");  }  /** @@ -88,12 +78,7 @@ static inline int local_dec_and_test(local_t *l)   */  static inline int local_inc_and_test(local_t *l)  { -	unsigned char c; - -	asm volatile(_ASM_INC "%0; sete %1" -		     : "+m" (l->a.counter), "=qm" (c) -		     : : "memory"); -	return c != 0; +	GEN_UNARY_RMWcc(_ASM_INC, l->a.counter, "%0", "e");  }  /** @@ -107,12 +92,7 @@ static inline int local_inc_and_test(local_t *l)   */  static inline int local_add_negative(long i, local_t *l)  { -	unsigned char c; - -	asm volatile(_ASM_ADD "%2,%0; sets %1" -		     : "+m" (l->a.counter), "=qm" (c) -		     : "ir" (i) : "memory"); -	return c; +	GEN_BINARY_RMWcc(_ASM_ADD, l->a.counter, "er", i, "%0", "s");  }  /** diff --git a/arch/x86/include/asm/mce.h b/arch/x86/include/asm/mce.h index cbe6b9e404c..958b90f761e 100644 --- a/arch/x86/include/asm/mce.h +++ b/arch/x86/include/asm/mce.h @@ -16,6 +16,7 @@  #define MCG_EXT_CNT_SHIFT	16  #define MCG_EXT_CNT(c)		(((c) & MCG_EXT_CNT_MASK) >> MCG_EXT_CNT_SHIFT)  #define MCG_SER_P		(1ULL<<24)   /* MCA recovery/new status bits */ +#define MCG_ELOG_P		(1ULL<<26)   /* Extended error log supported */  /* MCG_STATUS register defines */  #define MCG_STATUS_RIPV  (1ULL<<0)   /* restart ip valid */ @@ -117,7 +118,6 @@ extern void mce_register_decode_chain(struct notifier_block *nb);  extern void mce_unregister_decode_chain(struct notifier_block *nb);  #include <linux/percpu.h> -#include <linux/init.h>  #include <linux/atomic.h>  extern int mce_p5_enabled; @@ -176,8 +176,6 @@ int mce_available(struct cpuinfo_x86 *c);  DECLARE_PER_CPU(unsigned, mce_exception_count);  DECLARE_PER_CPU(unsigned, mce_poll_count); -extern atomic_t mce_entry; -  typedef DECLARE_BITMAP(mce_banks_t, MAX_NR_BANKS);  DECLARE_PER_CPU(mce_banks_t, mce_poll_banks); diff --git a/arch/x86/include/asm/microcode.h b/arch/x86/include/asm/microcode.h index f98bd662531..64dc362506b 100644 --- a/arch/x86/include/asm/microcode.h +++ b/arch/x86/include/asm/microcode.h @@ -1,6 +1,21 @@  #ifndef _ASM_X86_MICROCODE_H  #define _ASM_X86_MICROCODE_H +#define native_rdmsr(msr, val1, val2)			\ +do {							\ +	u64 __val = native_read_msr((msr));		\ +	(void)((val1) = (u32)__val);			\ +	(void)((val2) = (u32)(__val >> 32));		\ +} while (0) + +#define native_wrmsr(msr, low, high)			\ +	native_write_msr(msr, low, high) + +#define native_wrmsrl(msr, val)				\ +	native_write_msr((msr),				\ +			 (u32)((u64)(val)),		\ +			 (u32)((u64)(val) >> 32)) +  struct cpu_signature {  	unsigned int sig;  	unsigned int pf; @@ -10,6 +25,7 @@ struct cpu_signature {  struct device;  enum ucode_state { UCODE_ERROR, UCODE_OK, UCODE_NFOUND }; +extern bool dis_ucode_ldr;  struct microcode_ops {  	enum ucode_state (*request_microcode_user) (int cpu, diff --git a/arch/x86/include/asm/microcode_amd.h b/arch/x86/include/asm/microcode_amd.h index 4c019179a57..b7b10b82d3e 100644 --- a/arch/x86/include/asm/microcode_amd.h +++ b/arch/x86/include/asm/microcode_amd.h @@ -61,11 +61,10 @@ extern int __apply_microcode_amd(struct microcode_amd *mc_amd);  extern int apply_microcode_amd(int cpu);  extern enum ucode_state load_microcode_amd(u8 family, const u8 *data, size_t size); +#define PATCH_MAX_SIZE PAGE_SIZE +extern u8 amd_ucode_patch[PATCH_MAX_SIZE]; +  #ifdef CONFIG_MICROCODE_AMD_EARLY -#ifdef CONFIG_X86_32 -#define MPB_MAX_SIZE PAGE_SIZE -extern u8 amd_bsp_mpb[MPB_MAX_SIZE]; -#endif  extern void __init load_ucode_amd_bsp(void);  extern void load_ucode_amd_ap(void);  extern int __init save_microcode_in_initrd_amd(void); diff --git a/arch/x86/include/asm/misc.h b/arch/x86/include/asm/misc.h new file mode 100644 index 00000000000..475f5bbc7f5 --- /dev/null +++ b/arch/x86/include/asm/misc.h @@ -0,0 +1,6 @@ +#ifndef _ASM_X86_MISC_H +#define _ASM_X86_MISC_H + +int num_digits(int val); + +#endif /* _ASM_X86_MISC_H */ diff --git a/arch/x86/include/asm/mmu.h b/arch/x86/include/asm/mmu.h index 5f55e696276..876e74e8eec 100644 --- a/arch/x86/include/asm/mmu.h +++ b/arch/x86/include/asm/mmu.h @@ -18,7 +18,7 @@ typedef struct {  #endif  	struct mutex lock; -	void *vdso; +	void __user *vdso;  } mm_context_t;  #ifdef CONFIG_SMP diff --git a/arch/x86/include/asm/mmzone_32.h b/arch/x86/include/asm/mmzone_32.h index 8a9b3e288cb..1ec990bd7dc 100644 --- a/arch/x86/include/asm/mmzone_32.h +++ b/arch/x86/include/asm/mmzone_32.h @@ -11,9 +11,6 @@  #ifdef CONFIG_NUMA  extern struct pglist_data *node_data[];  #define NODE_DATA(nid)	(node_data[nid]) - -#include <asm/numaq.h> -  #endif /* CONFIG_NUMA */  #ifdef CONFIG_DISCONTIGMEM diff --git a/arch/x86/include/asm/mpspec.h b/arch/x86/include/asm/mpspec.h index 626cf70082d..f5a61795673 100644 --- a/arch/x86/include/asm/mpspec.h +++ b/arch/x86/include/asm/mpspec.h @@ -1,7 +1,6 @@  #ifndef _ASM_X86_MPSPEC_H  #define _ASM_X86_MPSPEC_H -#include <linux/init.h>  #include <asm/mpspec_def.h>  #include <asm/x86_init.h> @@ -26,12 +25,6 @@ extern int pic_mode;  extern unsigned int def_to_bigsmp; -#ifdef CONFIG_X86_NUMAQ -extern int mp_bus_id_to_node[MAX_MP_BUSSES]; -extern int mp_bus_id_to_local[MAX_MP_BUSSES]; -extern int quad_local_to_mp_bus_id [NR_CPUS/4][4]; -#endif -  #else /* CONFIG_X86_64: */  #define MAX_MP_BUSSES		256 @@ -94,7 +87,7 @@ static inline void early_reserve_e820_mpc_new(void) { }  #define default_get_smp_config x86_init_uint_noop  #endif -void generic_processor_info(int apicid, int version); +int generic_processor_info(int apicid, int version);  #ifdef CONFIG_ACPI  extern void mp_register_ioapic(int id, u32 address, u32 gsi_base);  extern void mp_override_legacy_irq(u8 bus_irq, u8 polarity, u8 trigger, diff --git a/arch/x86/include/asm/mrst.h b/arch/x86/include/asm/mrst.h deleted file mode 100644 index fc18bf3ce7c..00000000000 --- a/arch/x86/include/asm/mrst.h +++ /dev/null @@ -1,81 +0,0 @@ -/* - * mrst.h: Intel Moorestown platform specific setup code - * - * (C) Copyright 2009 Intel Corporation - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; version 2 - * of the License. - */ -#ifndef _ASM_X86_MRST_H -#define _ASM_X86_MRST_H - -#include <linux/sfi.h> - -extern int pci_mrst_init(void); -extern int __init sfi_parse_mrtc(struct sfi_table_header *table); -extern int sfi_mrtc_num; -extern struct sfi_rtc_table_entry sfi_mrtc_array[]; - -/* - * Medfield is the follow-up of Moorestown, it combines two chip solution into - * one. Other than that it also added always-on and constant tsc and lapic - * timers. Medfield is the platform name, and the chip name is called Penwell - * we treat Medfield/Penwell as a variant of Moorestown. Penwell can be - * identified via MSRs. - */ -enum mrst_cpu_type { -	/* 1 was Moorestown */ -	MRST_CPU_CHIP_PENWELL = 2, -}; - -extern enum mrst_cpu_type __mrst_cpu_chip; - -#ifdef CONFIG_X86_INTEL_MID - -static inline enum mrst_cpu_type mrst_identify_cpu(void) -{ -	return __mrst_cpu_chip; -} - -#else /* !CONFIG_X86_INTEL_MID */ - -#define mrst_identify_cpu()    (0) - -#endif /* !CONFIG_X86_INTEL_MID */ - -enum mrst_timer_options { -	MRST_TIMER_DEFAULT, -	MRST_TIMER_APBT_ONLY, -	MRST_TIMER_LAPIC_APBT, -}; - -extern enum mrst_timer_options mrst_timer_options; - -/* - * Penwell uses spread spectrum clock, so the freq number is not exactly - * the same as reported by MSR based on SDM. - */ -#define PENWELL_FSB_FREQ_83SKU         83200 -#define PENWELL_FSB_FREQ_100SKU        99840 - -#define SFI_MTMR_MAX_NUM 8 -#define SFI_MRTC_MAX	8 - -extern struct console early_mrst_console; -extern void mrst_early_console_init(void); - -extern struct console early_hsu_console; -extern void hsu_early_console_init(const char *); - -extern void intel_scu_devices_create(void); -extern void intel_scu_devices_destroy(void); - -/* VRTC timer */ -#define MRST_VRTC_MAP_SZ	(1024) -/*#define MRST_VRTC_PGOFFSET	(0xc00) */ - -extern void mrst_rtc_init(void); - -#endif /* _ASM_X86_MRST_H */ diff --git a/arch/x86/include/asm/mshyperv.h b/arch/x86/include/asm/mshyperv.h index cd9c41938b8..c163215abb9 100644 --- a/arch/x86/include/asm/mshyperv.h +++ b/arch/x86/include/asm/mshyperv.h @@ -2,6 +2,7 @@  #define _ASM_X86_MSHYPER_H  #include <linux/types.h> +#include <linux/interrupt.h>  #include <asm/hyperv.h>  struct ms_hyperv_info { @@ -16,6 +17,7 @@ void hyperv_callback_vector(void);  #define trace_hyperv_callback_vector hyperv_callback_vector  #endif  void hyperv_vector_handler(struct pt_regs *regs); -void hv_register_vmbus_handler(int irq, irq_handler_t handler); +void hv_setup_vmbus_irq(void (*handler)(void)); +void hv_remove_vmbus_irq(void);  #endif diff --git a/arch/x86/include/asm/msr.h b/arch/x86/include/asm/msr.h index cb7502852ac..de36f22eb0b 100644 --- a/arch/x86/include/asm/msr.h +++ b/arch/x86/include/asm/msr.h @@ -214,14 +214,20 @@ do {                                                            \  struct msr *msrs_alloc(void);  void msrs_free(struct msr *msrs); +int msr_set_bit(u32 msr, u8 bit); +int msr_clear_bit(u32 msr, u8 bit);  #ifdef CONFIG_SMP  int rdmsr_on_cpu(unsigned int cpu, u32 msr_no, u32 *l, u32 *h);  int wrmsr_on_cpu(unsigned int cpu, u32 msr_no, u32 l, u32 h); +int rdmsrl_on_cpu(unsigned int cpu, u32 msr_no, u64 *q); +int wrmsrl_on_cpu(unsigned int cpu, u32 msr_no, u64 q);  void rdmsr_on_cpus(const struct cpumask *mask, u32 msr_no, struct msr *msrs);  void wrmsr_on_cpus(const struct cpumask *mask, u32 msr_no, struct msr *msrs);  int rdmsr_safe_on_cpu(unsigned int cpu, u32 msr_no, u32 *l, u32 *h);  int wrmsr_safe_on_cpu(unsigned int cpu, u32 msr_no, u32 l, u32 h); +int rdmsrl_safe_on_cpu(unsigned int cpu, u32 msr_no, u64 *q); +int wrmsrl_safe_on_cpu(unsigned int cpu, u32 msr_no, u64 q);  int rdmsr_safe_regs_on_cpu(unsigned int cpu, u32 regs[8]);  int wrmsr_safe_regs_on_cpu(unsigned int cpu, u32 regs[8]);  #else  /*  CONFIG_SMP  */ @@ -235,6 +241,16 @@ static inline int wrmsr_on_cpu(unsigned int cpu, u32 msr_no, u32 l, u32 h)  	wrmsr(msr_no, l, h);  	return 0;  } +static inline int rdmsrl_on_cpu(unsigned int cpu, u32 msr_no, u64 *q) +{ +	rdmsrl(msr_no, *q); +	return 0; +} +static inline int wrmsrl_on_cpu(unsigned int cpu, u32 msr_no, u64 q) +{ +	wrmsrl(msr_no, q); +	return 0; +}  static inline void rdmsr_on_cpus(const struct cpumask *m, u32 msr_no,  				struct msr *msrs)  { @@ -254,6 +270,14 @@ static inline int wrmsr_safe_on_cpu(unsigned int cpu, u32 msr_no, u32 l, u32 h)  {  	return wrmsr_safe(msr_no, l, h);  } +static inline int rdmsrl_safe_on_cpu(unsigned int cpu, u32 msr_no, u64 *q) +{ +	return rdmsrl_safe(msr_no, q); +} +static inline int wrmsrl_safe_on_cpu(unsigned int cpu, u32 msr_no, u64 q) +{ +	return wrmsrl_safe(msr_no, q); +}  static inline int rdmsr_safe_regs_on_cpu(unsigned int cpu, u32 regs[8])  {  	return rdmsr_safe_regs(regs); diff --git a/arch/x86/include/asm/mutex_64.h b/arch/x86/include/asm/mutex_64.h index e7e6751648e..07537a44216 100644 --- a/arch/x86/include/asm/mutex_64.h +++ b/arch/x86/include/asm/mutex_64.h @@ -20,7 +20,7 @@  static inline void __mutex_fastpath_lock(atomic_t *v,  					 void (*fail_fn)(atomic_t *))  { -	asm volatile goto(LOCK_PREFIX "   decl %0\n" +	asm_volatile_goto(LOCK_PREFIX "   decl %0\n"  			  "   jns %l[exit]\n"  			  : : "m" (v->counter)  			  : "memory", "cc" @@ -75,7 +75,7 @@ static inline int __mutex_fastpath_lock_retval(atomic_t *count)  static inline void __mutex_fastpath_unlock(atomic_t *v,  					   void (*fail_fn)(atomic_t *))  { -	asm volatile goto(LOCK_PREFIX "   incl %0\n" +	asm_volatile_goto(LOCK_PREFIX "   incl %0\n"  			  "   jg %l[exit]\n"  			  : : "m" (v->counter)  			  : "memory", "cc" diff --git a/arch/x86/include/asm/mwait.h b/arch/x86/include/asm/mwait.h index 2f366d0ac6b..1da25a5f96f 100644 --- a/arch/x86/include/asm/mwait.h +++ b/arch/x86/include/asm/mwait.h @@ -1,6 +1,8 @@  #ifndef _ASM_X86_MWAIT_H  #define _ASM_X86_MWAIT_H +#include <linux/sched.h> +  #define MWAIT_SUBSTATE_MASK		0xf  #define MWAIT_CSTATE_MASK		0xf  #define MWAIT_SUBSTATE_SIZE		4 @@ -13,4 +15,45 @@  #define MWAIT_ECX_INTERRUPT_BREAK	0x1 +static inline void __monitor(const void *eax, unsigned long ecx, +			     unsigned long edx) +{ +	/* "monitor %eax, %ecx, %edx;" */ +	asm volatile(".byte 0x0f, 0x01, 0xc8;" +		     :: "a" (eax), "c" (ecx), "d"(edx)); +} + +static inline void __mwait(unsigned long eax, unsigned long ecx) +{ +	/* "mwait %eax, %ecx;" */ +	asm volatile(".byte 0x0f, 0x01, 0xc9;" +		     :: "a" (eax), "c" (ecx)); +} + +/* + * This uses new MONITOR/MWAIT instructions on P4 processors with PNI, + * which can obviate IPI to trigger checking of need_resched. + * We execute MONITOR against need_resched and enter optimized wait state + * through MWAIT. Whenever someone changes need_resched, we would be woken + * up from MWAIT (without an IPI). + * + * New with Core Duo processors, MWAIT can take some hints based on CPU + * capability. + */ +static inline void mwait_idle_with_hints(unsigned long eax, unsigned long ecx) +{ +	if (!current_set_polling_and_test()) { +		if (static_cpu_has(X86_FEATURE_CLFLUSH_MONITOR)) { +			mb(); +			clflush((void *)¤t_thread_info()->flags); +			mb(); +		} + +		__monitor((void *)¤t_thread_info()->flags, 0, 0); +		if (!need_resched()) +			__mwait(eax, ecx); +	} +	current_clr_polling(); +} +  #endif /* _ASM_X86_MWAIT_H */ diff --git a/arch/x86/include/asm/nmi.h b/arch/x86/include/asm/nmi.h index 86f9301903c..5f2fc4441b1 100644 --- a/arch/x86/include/asm/nmi.h +++ b/arch/x86/include/asm/nmi.h @@ -1,6 +1,7 @@  #ifndef _ASM_X86_NMI_H  #define _ASM_X86_NMI_H +#include <linux/irq_work.h>  #include <linux/pm.h>  #include <asm/irq.h>  #include <asm/io.h> @@ -38,6 +39,8 @@ typedef int (*nmi_handler_t)(unsigned int, struct pt_regs *);  struct nmiaction {  	struct list_head	list;  	nmi_handler_t		handler; +	u64			max_duration; +	struct irq_work		irq_work;  	unsigned long		flags;  	const char		*name;  }; diff --git a/arch/x86/include/asm/numaq.h b/arch/x86/include/asm/numaq.h deleted file mode 100644 index c3b3c322fd8..00000000000 --- a/arch/x86/include/asm/numaq.h +++ /dev/null @@ -1,171 +0,0 @@ -/* - * Written by: Patricia Gaughen, IBM Corporation - * - * Copyright (C) 2002, IBM Corp. - * - * All rights reserved. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or - * NON INFRINGEMENT.  See the GNU General Public License for more - * details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. - * - * Send feedback to <gone@us.ibm.com> - */ - -#ifndef _ASM_X86_NUMAQ_H -#define _ASM_X86_NUMAQ_H - -#ifdef CONFIG_X86_NUMAQ - -extern int found_numaq; -extern int numaq_numa_init(void); -extern int pci_numaq_init(void); - -extern void *xquad_portio; - -#define XQUAD_PORTIO_BASE 0xfe400000 -#define XQUAD_PORTIO_QUAD 0x40000  /* 256k per quad. */ -#define XQUAD_PORT_ADDR(port, quad) (xquad_portio + (XQUAD_PORTIO_QUAD*quad) + port) - -/* - * SYS_CFG_DATA_PRIV_ADDR, struct eachquadmem, and struct sys_cfg_data are the - */ -#define SYS_CFG_DATA_PRIV_ADDR		0x0009d000 /* place for scd in private -						      quad space */ - -/* - * Communication area for each processor on lynxer-processor tests. - * - * NOTE: If you change the size of this eachproc structure you need - *       to change the definition for EACH_QUAD_SIZE. - */ -struct eachquadmem { -	unsigned int	priv_mem_start;		/* Starting address of this */ -						/* quad's private memory. */ -						/* This is always 0. */ -						/* In MB. */ -	unsigned int	priv_mem_size;		/* Size of this quad's */ -						/* private memory. */ -						/* In MB. */ -	unsigned int	low_shrd_mem_strp_start;/* Starting address of this */ -						/* quad's low shared block */ -						/* (untranslated). */ -						/* In MB. */ -	unsigned int	low_shrd_mem_start;	/* Starting address of this */ -						/* quad's low shared memory */ -						/* (untranslated). */ -						/* In MB. */ -	unsigned int	low_shrd_mem_size;	/* Size of this quad's low */ -						/* shared memory. */ -						/* In MB. */ -	unsigned int	lmmio_copb_start;	/* Starting address of this */ -						/* quad's local memory */ -						/* mapped I/O in the */ -						/* compatibility OPB. */ -						/* In MB. */ -	unsigned int	lmmio_copb_size;	/* Size of this quad's local */ -						/* memory mapped I/O in the */ -						/* compatibility OPB. */ -						/* In MB. */ -	unsigned int	lmmio_nopb_start;	/* Starting address of this */ -						/* quad's local memory */ -						/* mapped I/O in the */ -						/* non-compatibility OPB. */ -						/* In MB. */ -	unsigned int	lmmio_nopb_size;	/* Size of this quad's local */ -						/* memory mapped I/O in the */ -						/* non-compatibility OPB. */ -						/* In MB. */ -	unsigned int	io_apic_0_start;	/* Starting address of I/O */ -						/* APIC 0. */ -	unsigned int	io_apic_0_sz;		/* Size I/O APIC 0. */ -	unsigned int	io_apic_1_start;	/* Starting address of I/O */ -						/* APIC 1. */ -	unsigned int	io_apic_1_sz;		/* Size I/O APIC 1. */ -	unsigned int	hi_shrd_mem_start;	/* Starting address of this */ -						/* quad's high shared memory.*/ -						/* In MB. */ -	unsigned int	hi_shrd_mem_size;	/* Size of this quad's high */ -						/* shared memory. */ -						/* In MB. */ -	unsigned int	mps_table_addr;		/* Address of this quad's */ -						/* MPS tables from BIOS, */ -						/* in system space.*/ -	unsigned int	lcl_MDC_pio_addr;	/* Port-I/O address for */ -						/* local access of MDC. */ -	unsigned int	rmt_MDC_mmpio_addr;	/* MM-Port-I/O address for */ -						/* remote access of MDC. */ -	unsigned int	mm_port_io_start;	/* Starting address of this */ -						/* quad's memory mapped Port */ -						/* I/O space. */ -	unsigned int	mm_port_io_size;	/* Size of this quad's memory*/ -						/* mapped Port I/O space. */ -	unsigned int	mm_rmt_io_apic_start;	/* Starting address of this */ -						/* quad's memory mapped */ -						/* remote I/O APIC space. */ -	unsigned int	mm_rmt_io_apic_size;	/* Size of this quad's memory*/ -						/* mapped remote I/O APIC */ -						/* space. */ -	unsigned int	mm_isa_start;		/* Starting address of this */ -						/* quad's memory mapped ISA */ -						/* space (contains MDC */ -						/* memory space). */ -	unsigned int	mm_isa_size;		/* Size of this quad's memory*/ -						/* mapped ISA space (contains*/ -						/* MDC memory space). */ -	unsigned int	rmt_qmi_addr;		/* Remote addr to access QMI.*/ -	unsigned int	lcl_qmi_addr;		/* Local addr to access QMI. */ -}; - -/* - * Note: This structure must be NOT be changed unless the multiproc and - * OS are changed to reflect the new structure. - */ -struct sys_cfg_data { -	unsigned int	quad_id; -	unsigned int	bsp_proc_id; /* Boot Strap Processor in this quad. */ -	unsigned int	scd_version; /* Version number of this table. */ -	unsigned int	first_quad_id; -	unsigned int	quads_present31_0; /* 1 bit for each quad */ -	unsigned int	quads_present63_32; /* 1 bit for each quad */ -	unsigned int	config_flags; -	unsigned int	boot_flags; -	unsigned int	csr_start_addr; /* Absolute value (not in MB) */ -	unsigned int	csr_size; /* Absolute value (not in MB) */ -	unsigned int	lcl_apic_start_addr; /* Absolute value (not in MB) */ -	unsigned int	lcl_apic_size; /* Absolute value (not in MB) */ -	unsigned int	low_shrd_mem_base; /* 0 or 512MB or 1GB */ -	unsigned int	low_shrd_mem_quad_offset; /* 0,128M,256M,512M,1G */ -					/* may not be totally populated */ -	unsigned int	split_mem_enbl; /* 0 for no low shared memory */ -	unsigned int	mmio_sz; /* Size of total system memory mapped I/O */ -				 /* (in MB). */ -	unsigned int	quad_spin_lock; /* Spare location used for quad */ -					/* bringup. */ -	unsigned int	nonzero55; /* For checksumming. */ -	unsigned int	nonzeroaa; /* For checksumming. */ -	unsigned int	scd_magic_number; -	unsigned int	system_type; -	unsigned int	checksum; -	/* -	 *	memory configuration area for each quad -	 */ -	struct		eachquadmem eq[MAX_NUMNODES];	/* indexed by quad id */ -}; - -void numaq_tsc_disable(void); - -#endif /* CONFIG_X86_NUMAQ */ -#endif /* _ASM_X86_NUMAQ_H */ - diff --git a/arch/x86/include/asm/page.h b/arch/x86/include/asm/page.h index c87892442e5..775873d3be5 100644 --- a/arch/x86/include/asm/page.h +++ b/arch/x86/include/asm/page.h @@ -71,6 +71,7 @@ extern bool __virt_addr_valid(unsigned long kaddr);  #include <asm-generic/getorder.h>  #define __HAVE_ARCH_GATE_AREA 1 +#define HAVE_ARCH_HUGETLB_UNMAPPED_AREA  #endif	/* __KERNEL__ */  #endif /* _ASM_X86_PAGE_H */ diff --git a/arch/x86/include/asm/page_32.h b/arch/x86/include/asm/page_32.h index 4d550d04b60..904f528cc8e 100644 --- a/arch/x86/include/asm/page_32.h +++ b/arch/x86/include/asm/page_32.h @@ -5,10 +5,6 @@  #ifndef __ASSEMBLY__ -#ifdef CONFIG_HUGETLB_PAGE -#define HAVE_ARCH_HUGETLB_UNMAPPED_AREA -#endif -  #define __phys_addr_nodebug(x)	((x) - PAGE_OFFSET)  #ifdef CONFIG_DEBUG_VIRTUAL  extern unsigned long __phys_addr(unsigned long); diff --git a/arch/x86/include/asm/page_64_types.h b/arch/x86/include/asm/page_64_types.h index 43dcd804ebd..678205195ae 100644 --- a/arch/x86/include/asm/page_64_types.h +++ b/arch/x86/include/asm/page_64_types.h @@ -1,7 +1,7 @@  #ifndef _ASM_X86_PAGE_64_DEFS_H  #define _ASM_X86_PAGE_64_DEFS_H -#define THREAD_SIZE_ORDER	1 +#define THREAD_SIZE_ORDER	2  #define THREAD_SIZE  (PAGE_SIZE << THREAD_SIZE_ORDER)  #define CURRENT_MASK (~(THREAD_SIZE - 1)) @@ -39,9 +39,18 @@  #define __VIRTUAL_MASK_SHIFT	47  /* - * Kernel image size is limited to 512 MB (see level2_kernel_pgt in - * arch/x86/kernel/head_64.S), and it is mapped here: + * Kernel image size is limited to 1GiB due to the fixmap living in the + * next 1GiB (see level2_kernel_pgt in arch/x86/kernel/head_64.S). Use + * 512MiB by default, leaving 1.5GiB for modules once the page tables + * are fully set up. If kernel ASLR is configured, it can extend the + * kernel page table mapping, reducing the size of the modules area.   */ -#define KERNEL_IMAGE_SIZE	(512 * 1024 * 1024) +#define KERNEL_IMAGE_SIZE_DEFAULT      (512 * 1024 * 1024) +#if defined(CONFIG_RANDOMIZE_BASE) && \ +	CONFIG_RANDOMIZE_BASE_MAX_OFFSET > KERNEL_IMAGE_SIZE_DEFAULT +#define KERNEL_IMAGE_SIZE   CONFIG_RANDOMIZE_BASE_MAX_OFFSET +#else +#define KERNEL_IMAGE_SIZE      KERNEL_IMAGE_SIZE_DEFAULT +#endif  #endif /* _ASM_X86_PAGE_64_DEFS_H */ diff --git a/arch/x86/include/asm/paravirt.h b/arch/x86/include/asm/paravirt.h index 401f350ef71..cd6e1610e29 100644 --- a/arch/x86/include/asm/paravirt.h +++ b/arch/x86/include/asm/paravirt.h @@ -781,9 +781,9 @@ static __always_inline void __ticket_unlock_kick(struct arch_spinlock *lock,   */  #define PV_CALLEE_SAVE_REGS_THUNK(func)					\  	extern typeof(func) __raw_callee_save_##func;			\ -	static void *__##func##__ __used = func;			\  									\  	asm(".pushsection .text;"					\ +	    ".globl __raw_callee_save_" #func " ; "			\  	    "__raw_callee_save_" #func ": "				\  	    PV_SAVE_ALL_CALLER_REGS					\  	    "call " #func ";"						\ diff --git a/arch/x86/include/asm/paravirt_types.h b/arch/x86/include/asm/paravirt_types.h index aab8f671b52..7549b8b369e 100644 --- a/arch/x86/include/asm/paravirt_types.h +++ b/arch/x86/include/asm/paravirt_types.h @@ -388,10 +388,11 @@ extern struct pv_lock_ops pv_lock_ops;  	_paravirt_alt(insn_string, "%c[paravirt_typenum]", "%c[paravirt_clobber]")  /* Simple instruction patching code. */ -#define DEF_NATIVE(ops, name, code) 					\ -	extern const char start_##ops##_##name[] __visible,		\ -			  end_##ops##_##name[] __visible;		\ -	asm("start_" #ops "_" #name ": " code "; end_" #ops "_" #name ":") +#define NATIVE_LABEL(a,x,b) "\n\t.globl " a #x "_" #b "\n" a #x "_" #b ":\n\t" + +#define DEF_NATIVE(ops, name, code)					\ +	__visible extern const char start_##ops##_##name[], end_##ops##_##name[];	\ +	asm(NATIVE_LABEL("start_", ops, name) code NATIVE_LABEL("end_", ops, name))  unsigned paravirt_patch_nop(void);  unsigned paravirt_patch_ident_32(void *insnbuf, unsigned len); diff --git a/arch/x86/include/asm/pci.h b/arch/x86/include/asm/pci.h index 7d7443283a9..0892ea0e683 100644 --- a/arch/x86/include/asm/pci.h +++ b/arch/x86/include/asm/pci.h @@ -15,7 +15,7 @@ struct pci_sysdata {  	int		domain;		/* PCI domain */  	int		node;		/* NUMA node */  #ifdef CONFIG_ACPI -	void		*acpi;		/* ACPI-specific data */ +	struct acpi_device *companion;	/* ACPI companion device */  #endif  #ifdef CONFIG_X86_64  	void		*iommu;		/* IOMMU private data */ @@ -26,11 +26,6 @@ extern int pci_routeirq;  extern int noioapicquirk;  extern int noioapicreroute; -/* scan a bus after allocating a pci_sysdata for it */ -extern struct pci_bus *pci_scan_bus_on_node(int busno, struct pci_ops *ops, -					    int node); -extern struct pci_bus *pci_scan_bus_with_sysdata(int busno); -  #ifdef CONFIG_PCI  #ifdef CONFIG_PCI_DOMAINS @@ -70,10 +65,9 @@ extern unsigned long pci_mem_start;  extern int pcibios_enabled;  void pcibios_config_init(void); -struct pci_bus *pcibios_scan_root(int bus); +void pcibios_scan_root(int bus);  void pcibios_set_master(struct pci_dev *dev); -void pcibios_penalize_isa_irq(int irq, int active);  struct irq_routing_table *pcibios_get_irq_routing_table(void);  int pcibios_set_irq_routing(struct pci_dev *dev, int pin, int irq); @@ -104,7 +98,7 @@ extern void pci_iommu_alloc(void);  struct msi_desc;  int native_setup_msi_irqs(struct pci_dev *dev, int nvec, int type);  void native_teardown_msi_irq(unsigned int irq); -void native_restore_msi_irqs(struct pci_dev *dev, int irq); +void native_restore_msi_irqs(struct pci_dev *dev);  int setup_msi_irq(struct pci_dev *dev, struct msi_desc *msidesc,  		  unsigned int irq_base, unsigned int irq_offset);  #else @@ -125,7 +119,6 @@ int setup_msi_irq(struct pci_dev *dev, struct msi_desc *msidesc,  /* generic pci stuff */  #include <asm-generic/pci.h> -#define PCIBIOS_MAX_MEM_32 0xffffffff  #ifdef CONFIG_NUMA  /* Returns the node based on pci bus */ diff --git a/arch/x86/include/asm/percpu.h b/arch/x86/include/asm/percpu.h index 0da5200ee79..851bcdc5db0 100644 --- a/arch/x86/include/asm/percpu.h +++ b/arch/x86/include/asm/percpu.h @@ -52,7 +52,7 @@   * Compared to the generic __my_cpu_offset version, the following   * saves one instruction and avoids clobbering a temp register.   */ -#define __this_cpu_ptr(ptr)				\ +#define raw_cpu_ptr(ptr)				\  ({							\  	unsigned long tcp_ptr__;			\  	__verify_pcpu_ptr(ptr);				\ @@ -128,7 +128,8 @@ do {							\  do {									\  	typedef typeof(var) pao_T__;					\  	const int pao_ID__ = (__builtin_constant_p(val) &&		\ -			      ((val) == 1 || (val) == -1)) ? (val) : 0;	\ +			      ((val) == 1 || (val) == -1)) ?		\ +				(int)(val) : 0;				\  	if (0) {							\  		pao_T__ pao_tmp__;					\  		pao_tmp__ = (val);					\ @@ -361,28 +362,25 @@ do {									\   */  #define this_cpu_read_stable(var)	percpu_from_op("mov", var, "p" (&(var))) -#define __this_cpu_read_1(pcp)		percpu_from_op("mov", (pcp), "m"(pcp)) -#define __this_cpu_read_2(pcp)		percpu_from_op("mov", (pcp), "m"(pcp)) -#define __this_cpu_read_4(pcp)		percpu_from_op("mov", (pcp), "m"(pcp)) - -#define __this_cpu_write_1(pcp, val)	percpu_to_op("mov", (pcp), val) -#define __this_cpu_write_2(pcp, val)	percpu_to_op("mov", (pcp), val) -#define __this_cpu_write_4(pcp, val)	percpu_to_op("mov", (pcp), val) -#define __this_cpu_add_1(pcp, val)	percpu_add_op((pcp), val) -#define __this_cpu_add_2(pcp, val)	percpu_add_op((pcp), val) -#define __this_cpu_add_4(pcp, val)	percpu_add_op((pcp), val) -#define __this_cpu_and_1(pcp, val)	percpu_to_op("and", (pcp), val) -#define __this_cpu_and_2(pcp, val)	percpu_to_op("and", (pcp), val) -#define __this_cpu_and_4(pcp, val)	percpu_to_op("and", (pcp), val) -#define __this_cpu_or_1(pcp, val)	percpu_to_op("or", (pcp), val) -#define __this_cpu_or_2(pcp, val)	percpu_to_op("or", (pcp), val) -#define __this_cpu_or_4(pcp, val)	percpu_to_op("or", (pcp), val) -#define __this_cpu_xor_1(pcp, val)	percpu_to_op("xor", (pcp), val) -#define __this_cpu_xor_2(pcp, val)	percpu_to_op("xor", (pcp), val) -#define __this_cpu_xor_4(pcp, val)	percpu_to_op("xor", (pcp), val) -#define __this_cpu_xchg_1(pcp, val)	percpu_xchg_op(pcp, val) -#define __this_cpu_xchg_2(pcp, val)	percpu_xchg_op(pcp, val) -#define __this_cpu_xchg_4(pcp, val)	percpu_xchg_op(pcp, val) +#define raw_cpu_read_1(pcp)		percpu_from_op("mov", (pcp), "m"(pcp)) +#define raw_cpu_read_2(pcp)		percpu_from_op("mov", (pcp), "m"(pcp)) +#define raw_cpu_read_4(pcp)		percpu_from_op("mov", (pcp), "m"(pcp)) + +#define raw_cpu_write_1(pcp, val)	percpu_to_op("mov", (pcp), val) +#define raw_cpu_write_2(pcp, val)	percpu_to_op("mov", (pcp), val) +#define raw_cpu_write_4(pcp, val)	percpu_to_op("mov", (pcp), val) +#define raw_cpu_add_1(pcp, val)		percpu_add_op((pcp), val) +#define raw_cpu_add_2(pcp, val)		percpu_add_op((pcp), val) +#define raw_cpu_add_4(pcp, val)		percpu_add_op((pcp), val) +#define raw_cpu_and_1(pcp, val)		percpu_to_op("and", (pcp), val) +#define raw_cpu_and_2(pcp, val)		percpu_to_op("and", (pcp), val) +#define raw_cpu_and_4(pcp, val)		percpu_to_op("and", (pcp), val) +#define raw_cpu_or_1(pcp, val)		percpu_to_op("or", (pcp), val) +#define raw_cpu_or_2(pcp, val)		percpu_to_op("or", (pcp), val) +#define raw_cpu_or_4(pcp, val)		percpu_to_op("or", (pcp), val) +#define raw_cpu_xchg_1(pcp, val)	percpu_xchg_op(pcp, val) +#define raw_cpu_xchg_2(pcp, val)	percpu_xchg_op(pcp, val) +#define raw_cpu_xchg_4(pcp, val)	percpu_xchg_op(pcp, val)  #define this_cpu_read_1(pcp)		percpu_from_op("mov", (pcp), "m"(pcp))  #define this_cpu_read_2(pcp)		percpu_from_op("mov", (pcp), "m"(pcp)) @@ -399,23 +397,20 @@ do {									\  #define this_cpu_or_1(pcp, val)		percpu_to_op("or", (pcp), val)  #define this_cpu_or_2(pcp, val)		percpu_to_op("or", (pcp), val)  #define this_cpu_or_4(pcp, val)		percpu_to_op("or", (pcp), val) -#define this_cpu_xor_1(pcp, val)	percpu_to_op("xor", (pcp), val) -#define this_cpu_xor_2(pcp, val)	percpu_to_op("xor", (pcp), val) -#define this_cpu_xor_4(pcp, val)	percpu_to_op("xor", (pcp), val)  #define this_cpu_xchg_1(pcp, nval)	percpu_xchg_op(pcp, nval)  #define this_cpu_xchg_2(pcp, nval)	percpu_xchg_op(pcp, nval)  #define this_cpu_xchg_4(pcp, nval)	percpu_xchg_op(pcp, nval) -#define __this_cpu_add_return_1(pcp, val) percpu_add_return_op(pcp, val) -#define __this_cpu_add_return_2(pcp, val) percpu_add_return_op(pcp, val) -#define __this_cpu_add_return_4(pcp, val) percpu_add_return_op(pcp, val) -#define __this_cpu_cmpxchg_1(pcp, oval, nval)	percpu_cmpxchg_op(pcp, oval, nval) -#define __this_cpu_cmpxchg_2(pcp, oval, nval)	percpu_cmpxchg_op(pcp, oval, nval) -#define __this_cpu_cmpxchg_4(pcp, oval, nval)	percpu_cmpxchg_op(pcp, oval, nval) +#define raw_cpu_add_return_1(pcp, val)		percpu_add_return_op(pcp, val) +#define raw_cpu_add_return_2(pcp, val)		percpu_add_return_op(pcp, val) +#define raw_cpu_add_return_4(pcp, val)		percpu_add_return_op(pcp, val) +#define raw_cpu_cmpxchg_1(pcp, oval, nval)	percpu_cmpxchg_op(pcp, oval, nval) +#define raw_cpu_cmpxchg_2(pcp, oval, nval)	percpu_cmpxchg_op(pcp, oval, nval) +#define raw_cpu_cmpxchg_4(pcp, oval, nval)	percpu_cmpxchg_op(pcp, oval, nval) -#define this_cpu_add_return_1(pcp, val)	percpu_add_return_op(pcp, val) -#define this_cpu_add_return_2(pcp, val)	percpu_add_return_op(pcp, val) -#define this_cpu_add_return_4(pcp, val)	percpu_add_return_op(pcp, val) +#define this_cpu_add_return_1(pcp, val)		percpu_add_return_op(pcp, val) +#define this_cpu_add_return_2(pcp, val)		percpu_add_return_op(pcp, val) +#define this_cpu_add_return_4(pcp, val)		percpu_add_return_op(pcp, val)  #define this_cpu_cmpxchg_1(pcp, oval, nval)	percpu_cmpxchg_op(pcp, oval, nval)  #define this_cpu_cmpxchg_2(pcp, oval, nval)	percpu_cmpxchg_op(pcp, oval, nval)  #define this_cpu_cmpxchg_4(pcp, oval, nval)	percpu_cmpxchg_op(pcp, oval, nval) @@ -432,7 +427,7 @@ do {									\  	__ret;								\  }) -#define __this_cpu_cmpxchg_double_4	percpu_cmpxchg8b_double +#define raw_cpu_cmpxchg_double_4	percpu_cmpxchg8b_double  #define this_cpu_cmpxchg_double_4	percpu_cmpxchg8b_double  #endif /* CONFIG_X86_CMPXCHG64 */ @@ -441,24 +436,22 @@ do {									\   * 32 bit must fall back to generic operations.   */  #ifdef CONFIG_X86_64 -#define __this_cpu_read_8(pcp)		percpu_from_op("mov", (pcp), "m"(pcp)) -#define __this_cpu_write_8(pcp, val)	percpu_to_op("mov", (pcp), val) -#define __this_cpu_add_8(pcp, val)	percpu_add_op((pcp), val) -#define __this_cpu_and_8(pcp, val)	percpu_to_op("and", (pcp), val) -#define __this_cpu_or_8(pcp, val)	percpu_to_op("or", (pcp), val) -#define __this_cpu_xor_8(pcp, val)	percpu_to_op("xor", (pcp), val) -#define __this_cpu_add_return_8(pcp, val) percpu_add_return_op(pcp, val) -#define __this_cpu_xchg_8(pcp, nval)	percpu_xchg_op(pcp, nval) -#define __this_cpu_cmpxchg_8(pcp, oval, nval)	percpu_cmpxchg_op(pcp, oval, nval) - -#define this_cpu_read_8(pcp)		percpu_from_op("mov", (pcp), "m"(pcp)) -#define this_cpu_write_8(pcp, val)	percpu_to_op("mov", (pcp), val) -#define this_cpu_add_8(pcp, val)	percpu_add_op((pcp), val) -#define this_cpu_and_8(pcp, val)	percpu_to_op("and", (pcp), val) -#define this_cpu_or_8(pcp, val)		percpu_to_op("or", (pcp), val) -#define this_cpu_xor_8(pcp, val)	percpu_to_op("xor", (pcp), val) -#define this_cpu_add_return_8(pcp, val)	percpu_add_return_op(pcp, val) -#define this_cpu_xchg_8(pcp, nval)	percpu_xchg_op(pcp, nval) +#define raw_cpu_read_8(pcp)			percpu_from_op("mov", (pcp), "m"(pcp)) +#define raw_cpu_write_8(pcp, val)		percpu_to_op("mov", (pcp), val) +#define raw_cpu_add_8(pcp, val)			percpu_add_op((pcp), val) +#define raw_cpu_and_8(pcp, val)			percpu_to_op("and", (pcp), val) +#define raw_cpu_or_8(pcp, val)			percpu_to_op("or", (pcp), val) +#define raw_cpu_add_return_8(pcp, val)		percpu_add_return_op(pcp, val) +#define raw_cpu_xchg_8(pcp, nval)		percpu_xchg_op(pcp, nval) +#define raw_cpu_cmpxchg_8(pcp, oval, nval)	percpu_cmpxchg_op(pcp, oval, nval) + +#define this_cpu_read_8(pcp)			percpu_from_op("mov", (pcp), "m"(pcp)) +#define this_cpu_write_8(pcp, val)		percpu_to_op("mov", (pcp), val) +#define this_cpu_add_8(pcp, val)		percpu_add_op((pcp), val) +#define this_cpu_and_8(pcp, val)		percpu_to_op("and", (pcp), val) +#define this_cpu_or_8(pcp, val)			percpu_to_op("or", (pcp), val) +#define this_cpu_add_return_8(pcp, val)		percpu_add_return_op(pcp, val) +#define this_cpu_xchg_8(pcp, nval)		percpu_xchg_op(pcp, nval)  #define this_cpu_cmpxchg_8(pcp, oval, nval)	percpu_cmpxchg_op(pcp, oval, nval)  /* @@ -481,7 +474,7 @@ do {									\  	__ret;								\  }) -#define __this_cpu_cmpxchg_double_8	percpu_cmpxchg16b_double +#define raw_cpu_cmpxchg_double_8	percpu_cmpxchg16b_double  #define this_cpu_cmpxchg_double_8	percpu_cmpxchg16b_double  #endif @@ -502,9 +495,9 @@ static __always_inline int x86_this_cpu_constant_test_bit(unsigned int nr,  	unsigned long __percpu *a = (unsigned long *)addr + nr / BITS_PER_LONG;  #ifdef CONFIG_X86_64 -	return ((1UL << (nr % BITS_PER_LONG)) & __this_cpu_read_8(*a)) != 0; +	return ((1UL << (nr % BITS_PER_LONG)) & raw_cpu_read_8(*a)) != 0;  #else -	return ((1UL << (nr % BITS_PER_LONG)) & __this_cpu_read_4(*a)) != 0; +	return ((1UL << (nr % BITS_PER_LONG)) & raw_cpu_read_4(*a)) != 0;  #endif  } diff --git a/arch/x86/include/asm/pgalloc.h b/arch/x86/include/asm/pgalloc.h index b4389a468fb..c4412e972bb 100644 --- a/arch/x86/include/asm/pgalloc.h +++ b/arch/x86/include/asm/pgalloc.h @@ -80,12 +80,21 @@ static inline void pmd_populate(struct mm_struct *mm, pmd_t *pmd,  #if PAGETABLE_LEVELS > 2  static inline pmd_t *pmd_alloc_one(struct mm_struct *mm, unsigned long addr)  { -	return (pmd_t *)get_zeroed_page(GFP_KERNEL|__GFP_REPEAT); +	struct page *page; +	page = alloc_pages(GFP_KERNEL | __GFP_REPEAT | __GFP_ZERO, 0); +	if (!page) +		return NULL; +	if (!pgtable_pmd_page_ctor(page)) { +		__free_pages(page, 0); +		return NULL; +	} +	return (pmd_t *)page_address(page);  }  static inline void pmd_free(struct mm_struct *mm, pmd_t *pmd)  {  	BUG_ON((unsigned long)pmd & (PAGE_SIZE-1)); +	pgtable_pmd_page_dtor(virt_to_page(pmd));  	free_page((unsigned long)pmd);  } diff --git a/arch/x86/include/asm/pgtable-2level.h b/arch/x86/include/asm/pgtable-2level.h index 3bf2dd0cf61..206a87fdd22 100644 --- a/arch/x86/include/asm/pgtable-2level.h +++ b/arch/x86/include/asm/pgtable-2level.h @@ -55,49 +55,12 @@ static inline pmd_t native_pmdp_get_and_clear(pmd_t *xp)  #define native_pmdp_get_and_clear(xp) native_local_pmdp_get_and_clear(xp)  #endif -#ifdef CONFIG_MEM_SOFT_DIRTY - -/* - * Bits _PAGE_BIT_PRESENT, _PAGE_BIT_FILE, _PAGE_BIT_SOFT_DIRTY and - * _PAGE_BIT_PROTNONE are taken, split up the 28 bits of offset - * into this range. - */ -#define PTE_FILE_MAX_BITS	28 -#define PTE_FILE_SHIFT1		(_PAGE_BIT_PRESENT + 1) -#define PTE_FILE_SHIFT2		(_PAGE_BIT_FILE + 1) -#define PTE_FILE_SHIFT3		(_PAGE_BIT_PROTNONE + 1) -#define PTE_FILE_SHIFT4		(_PAGE_BIT_SOFT_DIRTY + 1) -#define PTE_FILE_BITS1		(PTE_FILE_SHIFT2 - PTE_FILE_SHIFT1 - 1) -#define PTE_FILE_BITS2		(PTE_FILE_SHIFT3 - PTE_FILE_SHIFT2 - 1) -#define PTE_FILE_BITS3		(PTE_FILE_SHIFT4 - PTE_FILE_SHIFT3 - 1) - -#define pte_to_pgoff(pte)						\ -	((((pte).pte_low >> (PTE_FILE_SHIFT1))				\ -	  & ((1U << PTE_FILE_BITS1) - 1)))				\ -	+ ((((pte).pte_low >> (PTE_FILE_SHIFT2))			\ -	    & ((1U << PTE_FILE_BITS2) - 1))				\ -	   << (PTE_FILE_BITS1))						\ -	+ ((((pte).pte_low >> (PTE_FILE_SHIFT3))			\ -	    & ((1U << PTE_FILE_BITS3) - 1))				\ -	   << (PTE_FILE_BITS1 + PTE_FILE_BITS2))			\ -	+ ((((pte).pte_low >> (PTE_FILE_SHIFT4)))			\ -	    << (PTE_FILE_BITS1 + PTE_FILE_BITS2 + PTE_FILE_BITS3)) - -#define pgoff_to_pte(off)						\ -	((pte_t) { .pte_low =						\ -	 ((((off)) & ((1U << PTE_FILE_BITS1) - 1)) << PTE_FILE_SHIFT1)	\ -	 + ((((off) >> PTE_FILE_BITS1)					\ -	     & ((1U << PTE_FILE_BITS2) - 1))				\ -	    << PTE_FILE_SHIFT2)						\ -	 + ((((off) >> (PTE_FILE_BITS1 + PTE_FILE_BITS2))		\ -	     & ((1U << PTE_FILE_BITS3) - 1))				\ -	    << PTE_FILE_SHIFT3)						\ -	 + ((((off) >>							\ -	      (PTE_FILE_BITS1 + PTE_FILE_BITS2 + PTE_FILE_BITS3)))	\ -	    << PTE_FILE_SHIFT4)						\ -	 + _PAGE_FILE }) - -#else /* CONFIG_MEM_SOFT_DIRTY */ +/* Bit manipulation helper on pte/pgoff entry */ +static inline unsigned long pte_bitop(unsigned long value, unsigned int rightshift, +				      unsigned long mask, unsigned int leftshift) +{ +	return ((value >> rightshift) & mask) << leftshift; +}  /*   * Bits _PAGE_BIT_PRESENT, _PAGE_BIT_FILE and _PAGE_BIT_PROTNONE are taken, @@ -105,43 +68,39 @@ static inline pmd_t native_pmdp_get_and_clear(pmd_t *xp)   */  #define PTE_FILE_MAX_BITS	29  #define PTE_FILE_SHIFT1		(_PAGE_BIT_PRESENT + 1) -#if _PAGE_BIT_FILE < _PAGE_BIT_PROTNONE  #define PTE_FILE_SHIFT2		(_PAGE_BIT_FILE + 1)  #define PTE_FILE_SHIFT3		(_PAGE_BIT_PROTNONE + 1) -#else -#define PTE_FILE_SHIFT2		(_PAGE_BIT_PROTNONE + 1) -#define PTE_FILE_SHIFT3		(_PAGE_BIT_FILE + 1) -#endif  #define PTE_FILE_BITS1		(PTE_FILE_SHIFT2 - PTE_FILE_SHIFT1 - 1)  #define PTE_FILE_BITS2		(PTE_FILE_SHIFT3 - PTE_FILE_SHIFT2 - 1) -#define pte_to_pgoff(pte)						\ -	((((pte).pte_low >> PTE_FILE_SHIFT1)				\ -	  & ((1U << PTE_FILE_BITS1) - 1))				\ -	 + ((((pte).pte_low >> PTE_FILE_SHIFT2)				\ -	     & ((1U << PTE_FILE_BITS2) - 1)) << PTE_FILE_BITS1)		\ -	 + (((pte).pte_low >> PTE_FILE_SHIFT3)				\ -	    << (PTE_FILE_BITS1 + PTE_FILE_BITS2))) - -#define pgoff_to_pte(off)						\ -	((pte_t) { .pte_low =						\ -	 (((off) & ((1U << PTE_FILE_BITS1) - 1)) << PTE_FILE_SHIFT1)	\ -	 + ((((off) >> PTE_FILE_BITS1) & ((1U << PTE_FILE_BITS2) - 1))	\ -	    << PTE_FILE_SHIFT2)						\ -	 + (((off) >> (PTE_FILE_BITS1 + PTE_FILE_BITS2))		\ -	    << PTE_FILE_SHIFT3)						\ -	 + _PAGE_FILE }) - -#endif /* CONFIG_MEM_SOFT_DIRTY */ +#define PTE_FILE_MASK1		((1U << PTE_FILE_BITS1) - 1) +#define PTE_FILE_MASK2		((1U << PTE_FILE_BITS2) - 1) + +#define PTE_FILE_LSHIFT2	(PTE_FILE_BITS1) +#define PTE_FILE_LSHIFT3	(PTE_FILE_BITS1 + PTE_FILE_BITS2) + +static __always_inline pgoff_t pte_to_pgoff(pte_t pte) +{ +	return (pgoff_t) +		(pte_bitop(pte.pte_low, PTE_FILE_SHIFT1, PTE_FILE_MASK1,  0)		    + +		 pte_bitop(pte.pte_low, PTE_FILE_SHIFT2, PTE_FILE_MASK2,  PTE_FILE_LSHIFT2) + +		 pte_bitop(pte.pte_low, PTE_FILE_SHIFT3,           -1UL,  PTE_FILE_LSHIFT3)); +} + +static __always_inline pte_t pgoff_to_pte(pgoff_t off) +{ +	return (pte_t){ +		.pte_low = +			pte_bitop(off,                0, PTE_FILE_MASK1,  PTE_FILE_SHIFT1) + +			pte_bitop(off, PTE_FILE_LSHIFT2, PTE_FILE_MASK2,  PTE_FILE_SHIFT2) + +			pte_bitop(off, PTE_FILE_LSHIFT3,           -1UL,  PTE_FILE_SHIFT3) + +			_PAGE_FILE, +	}; +}  /* Encode and de-code a swap entry */ -#if _PAGE_BIT_FILE < _PAGE_BIT_PROTNONE  #define SWP_TYPE_BITS (_PAGE_BIT_FILE - _PAGE_BIT_PRESENT - 1)  #define SWP_OFFSET_SHIFT (_PAGE_BIT_PROTNONE + 1) -#else -#define SWP_TYPE_BITS (_PAGE_BIT_PROTNONE - _PAGE_BIT_PRESENT - 1) -#define SWP_OFFSET_SHIFT (_PAGE_BIT_FILE + 1) -#endif  #define MAX_SWAPFILES_CHECK() BUILD_BUG_ON(MAX_SWAPFILES_SHIFT > SWP_TYPE_BITS) diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h index 3d199945870..0ec05601261 100644 --- a/arch/x86/include/asm/pgtable.h +++ b/arch/x86/include/asm/pgtable.h @@ -15,9 +15,10 @@  	 : (prot))  #ifndef __ASSEMBLY__ -  #include <asm/x86_init.h> +void ptdump_walk_pgd_level(struct seq_file *m, pgd_t *pgd); +  /*   * ZERO_PAGE is a global shared page that is always zero: used   * for zero-mapped memory areas etc.. @@ -130,7 +131,8 @@ static inline int pte_exec(pte_t pte)  static inline int pte_special(pte_t pte)  { -	return pte_flags(pte) & _PAGE_SPECIAL; +	return (pte_flags(pte) & (_PAGE_PRESENT|_PAGE_SPECIAL)) == +				 (_PAGE_PRESENT|_PAGE_SPECIAL);  }  static inline unsigned long pte_pfn(pte_t pte) @@ -295,6 +297,7 @@ static inline pmd_t pmd_mknotpresent(pmd_t pmd)  	return pmd_clear_flags(pmd, _PAGE_PRESENT);  } +#ifdef CONFIG_HAVE_ARCH_SOFT_DIRTY  static inline int pte_soft_dirty(pte_t pte)  {  	return pte_flags(pte) & _PAGE_SOFT_DIRTY; @@ -330,6 +333,8 @@ static inline int pte_file_soft_dirty(pte_t pte)  	return pte_flags(pte) & _PAGE_SOFT_DIRTY;  } +#endif /* CONFIG_HAVE_ARCH_SOFT_DIRTY */ +  /*   * Mask out unsupported bits in a present pgprot.  Non-present pgprots   * can use those bits for other purposes, so leave them be. @@ -451,10 +456,23 @@ static inline int pte_present(pte_t a)  			       _PAGE_NUMA);  } +#define pte_present_nonuma pte_present_nonuma +static inline int pte_present_nonuma(pte_t a) +{ +	return pte_flags(a) & (_PAGE_PRESENT | _PAGE_PROTNONE); +} +  #define pte_accessible pte_accessible -static inline int pte_accessible(pte_t a) +static inline bool pte_accessible(struct mm_struct *mm, pte_t a)  { -	return pte_flags(a) & _PAGE_PRESENT; +	if (pte_flags(a) & _PAGE_PRESENT) +		return true; + +	if ((pte_flags(a) & (_PAGE_PROTNONE | _PAGE_NUMA)) && +			mm_tlb_flush_pending(mm)) +		return true; + +	return false;  }  static inline int pte_hidden(pte_t pte) @@ -850,23 +868,25 @@ static inline void update_mmu_cache_pmd(struct vm_area_struct *vma,  {  } +#ifdef CONFIG_HAVE_ARCH_SOFT_DIRTY  static inline pte_t pte_swp_mksoft_dirty(pte_t pte)  { -	VM_BUG_ON(pte_present(pte)); +	VM_BUG_ON(pte_present_nonuma(pte));  	return pte_set_flags(pte, _PAGE_SWP_SOFT_DIRTY);  }  static inline int pte_swp_soft_dirty(pte_t pte)  { -	VM_BUG_ON(pte_present(pte)); +	VM_BUG_ON(pte_present_nonuma(pte));  	return pte_flags(pte) & _PAGE_SWP_SOFT_DIRTY;  }  static inline pte_t pte_swp_clear_soft_dirty(pte_t pte)  { -	VM_BUG_ON(pte_present(pte)); +	VM_BUG_ON(pte_present_nonuma(pte));  	return pte_clear_flags(pte, _PAGE_SWP_SOFT_DIRTY);  } +#endif  #include <asm-generic/pgtable.h>  #endif	/* __ASSEMBLY__ */ diff --git a/arch/x86/include/asm/pgtable_64.h b/arch/x86/include/asm/pgtable_64.h index e22c1dbf7fe..5be9063545d 100644 --- a/arch/x86/include/asm/pgtable_64.h +++ b/arch/x86/include/asm/pgtable_64.h @@ -143,12 +143,12 @@ static inline int pgd_large(pgd_t pgd) { return 0; }  #define pte_unmap(pte) ((void)(pte))/* NOP */  /* Encode and de-code a swap entry */ -#if _PAGE_BIT_FILE < _PAGE_BIT_PROTNONE  #define SWP_TYPE_BITS (_PAGE_BIT_FILE - _PAGE_BIT_PRESENT - 1) -#define SWP_OFFSET_SHIFT (_PAGE_BIT_PROTNONE + 1) +#ifdef CONFIG_NUMA_BALANCING +/* Automatic NUMA balancing needs to be distinguishable from swap entries */ +#define SWP_OFFSET_SHIFT (_PAGE_BIT_PROTNONE + 2)  #else -#define SWP_TYPE_BITS (_PAGE_BIT_PROTNONE - _PAGE_BIT_PRESENT - 1) -#define SWP_OFFSET_SHIFT (_PAGE_BIT_FILE + 1) +#define SWP_OFFSET_SHIFT (_PAGE_BIT_PROTNONE + 1)  #endif  #define MAX_SWAPFILES_CHECK() BUILD_BUG_ON(MAX_SWAPFILES_SHIFT > SWP_TYPE_BITS) diff --git a/arch/x86/include/asm/pgtable_64_types.h b/arch/x86/include/asm/pgtable_64_types.h index 2d883440cb9..7166e25ecb5 100644 --- a/arch/x86/include/asm/pgtable_64_types.h +++ b/arch/x86/include/asm/pgtable_64_types.h @@ -58,9 +58,11 @@ typedef struct { pteval_t pte; } pte_t;  #define VMALLOC_START    _AC(0xffffc90000000000, UL)  #define VMALLOC_END      _AC(0xffffe8ffffffffff, UL)  #define VMEMMAP_START	 _AC(0xffffea0000000000, UL) -#define MODULES_VADDR    _AC(0xffffffffa0000000, UL) +#define MODULES_VADDR    (__START_KERNEL_map + KERNEL_IMAGE_SIZE)  #define MODULES_END      _AC(0xffffffffff000000, UL)  #define MODULES_LEN   (MODULES_END - MODULES_VADDR) +#define ESPFIX_PGD_ENTRY _AC(-2, UL) +#define ESPFIX_BASE_ADDR (ESPFIX_PGD_ENTRY << PGDIR_SHIFT)  #define EARLY_DYNAMIC_PAGE_TABLES	64 diff --git a/arch/x86/include/asm/pgtable_types.h b/arch/x86/include/asm/pgtable_types.h index 0ecac257fb2..f216963760e 100644 --- a/arch/x86/include/asm/pgtable_types.h +++ b/arch/x86/include/asm/pgtable_types.h @@ -16,15 +16,26 @@  #define _PAGE_BIT_PSE		7	/* 4 MB (or 2MB) page */  #define _PAGE_BIT_PAT		7	/* on 4KB pages */  #define _PAGE_BIT_GLOBAL	8	/* Global TLB entry PPro+ */ -#define _PAGE_BIT_UNUSED1	9	/* available for programmer */ -#define _PAGE_BIT_IOMAP		10	/* flag used to indicate IO mapping */ -#define _PAGE_BIT_HIDDEN	11	/* hidden by kmemcheck */ +#define _PAGE_BIT_SOFTW1	9	/* available for programmer */ +#define _PAGE_BIT_SOFTW2	10	/* " */ +#define _PAGE_BIT_SOFTW3	11	/* " */  #define _PAGE_BIT_PAT_LARGE	12	/* On 2MB or 1GB pages */ -#define _PAGE_BIT_SPECIAL	_PAGE_BIT_UNUSED1 -#define _PAGE_BIT_CPA_TEST	_PAGE_BIT_UNUSED1 -#define _PAGE_BIT_SPLITTING	_PAGE_BIT_UNUSED1 /* only valid on a PSE pmd */ +#define _PAGE_BIT_SPECIAL	_PAGE_BIT_SOFTW1 +#define _PAGE_BIT_CPA_TEST	_PAGE_BIT_SOFTW1 +#define _PAGE_BIT_SPLITTING	_PAGE_BIT_SOFTW2 /* only valid on a PSE pmd */ +#define _PAGE_BIT_IOMAP		_PAGE_BIT_SOFTW2 /* flag used to indicate IO mapping */ +#define _PAGE_BIT_HIDDEN	_PAGE_BIT_SOFTW3 /* hidden by kmemcheck */ +#define _PAGE_BIT_SOFT_DIRTY	_PAGE_BIT_SOFTW3 /* software dirty tracking */  #define _PAGE_BIT_NX           63       /* No execute: only valid after cpuid check */ +/* + * Swap offsets on configurations that allow automatic NUMA balancing use the + * bits after _PAGE_BIT_GLOBAL. To uniquely distinguish NUMA hinting PTEs from + * swap entries, we use the first bit after _PAGE_BIT_GLOBAL and shrink the + * maximum possible swap space from 16TB to 8TB. + */ +#define _PAGE_BIT_NUMA		(_PAGE_BIT_GLOBAL+1) +  /* If _PAGE_BIT_PRESENT is clear, we use these: */  /* - if the user mapped it with PROT_NONE; pte_present gives true */  #define _PAGE_BIT_PROTNONE	_PAGE_BIT_GLOBAL @@ -40,7 +51,7 @@  #define _PAGE_DIRTY	(_AT(pteval_t, 1) << _PAGE_BIT_DIRTY)  #define _PAGE_PSE	(_AT(pteval_t, 1) << _PAGE_BIT_PSE)  #define _PAGE_GLOBAL	(_AT(pteval_t, 1) << _PAGE_BIT_GLOBAL) -#define _PAGE_UNUSED1	(_AT(pteval_t, 1) << _PAGE_BIT_UNUSED1) +#define _PAGE_SOFTW1	(_AT(pteval_t, 1) << _PAGE_BIT_SOFTW1)  #define _PAGE_IOMAP	(_AT(pteval_t, 1) << _PAGE_BIT_IOMAP)  #define _PAGE_PAT	(_AT(pteval_t, 1) << _PAGE_BIT_PAT)  #define _PAGE_PAT_LARGE (_AT(pteval_t, 1) << _PAGE_BIT_PAT_LARGE) @@ -61,8 +72,6 @@   * they do not conflict with each other.   */ -#define _PAGE_BIT_SOFT_DIRTY	_PAGE_BIT_HIDDEN -  #ifdef CONFIG_MEM_SOFT_DIRTY  #define _PAGE_SOFT_DIRTY	(_AT(pteval_t, 1) << _PAGE_BIT_SOFT_DIRTY)  #else @@ -70,6 +79,21 @@  #endif  /* + * _PAGE_NUMA distinguishes between a numa hinting minor fault and a page + * that is not present. The hinting fault gathers numa placement statistics + * (see pte_numa()). The bit is always zero when the PTE is not present. + * + * The bit picked must be always zero when the pmd is present and not + * present, so that we don't lose information when we set it while + * atomically clearing the present bit. + */ +#ifdef CONFIG_NUMA_BALANCING +#define _PAGE_NUMA	(_AT(pteval_t, 1) << _PAGE_BIT_NUMA) +#else +#define _PAGE_NUMA	(_AT(pteval_t, 0)) +#endif + +/*   * Tracking soft dirty bit when a page goes to a swap is tricky.   * We need a bit which can be stored in pte _and_ not conflict   * with swap entry format. On x86 bits 6 and 7 are *not* involved @@ -94,26 +118,6 @@  #define _PAGE_FILE	(_AT(pteval_t, 1) << _PAGE_BIT_FILE)  #define _PAGE_PROTNONE	(_AT(pteval_t, 1) << _PAGE_BIT_PROTNONE) -/* - * _PAGE_NUMA indicates that this page will trigger a numa hinting - * minor page fault to gather numa placement statistics (see - * pte_numa()). The bit picked (8) is within the range between - * _PAGE_FILE (6) and _PAGE_PROTNONE (8) bits. Therefore, it doesn't - * require changes to the swp entry format because that bit is always - * zero when the pte is not present. - * - * The bit picked must be always zero when the pmd is present and not - * present, so that we don't lose information when we set it while - * atomically clearing the present bit. - * - * Because we shared the same bit (8) with _PAGE_PROTNONE this can be - * interpreted as _PAGE_NUMA only in places that _PAGE_PROTNONE - * couldn't reach, like handle_mm_fault() (see access_error in - * arch/x86/mm/fault.c, the vma protection must not be PROT_NONE for - * handle_mm_fault() to be invoked). - */ -#define _PAGE_NUMA	_PAGE_PROTNONE -  #define _PAGE_TABLE	(_PAGE_PRESENT | _PAGE_RW | _PAGE_USER |	\  			 _PAGE_ACCESSED | _PAGE_DIRTY)  #define _KERNPG_TABLE	(_PAGE_PRESENT | _PAGE_RW | _PAGE_ACCESSED |	\ @@ -121,8 +125,9 @@  /* Set of bits not changed in pte_modify */  #define _PAGE_CHG_MASK	(PTE_PFN_MASK | _PAGE_PCD | _PAGE_PWT |		\ -			 _PAGE_SPECIAL | _PAGE_ACCESSED | _PAGE_DIRTY) -#define _HPAGE_CHG_MASK (_PAGE_CHG_MASK | _PAGE_PSE) +			 _PAGE_SPECIAL | _PAGE_ACCESSED | _PAGE_DIRTY |	\ +			 _PAGE_SOFT_DIRTY | _PAGE_NUMA) +#define _HPAGE_CHG_MASK (_PAGE_CHG_MASK | _PAGE_PSE | _PAGE_NUMA)  #define _PAGE_CACHE_MASK	(_PAGE_PCD | _PAGE_PWT)  #define _PAGE_CACHE_WB		(0) @@ -213,13 +218,8 @@  #ifdef CONFIG_X86_64  #define __PAGE_KERNEL_IDENT_LARGE_EXEC	__PAGE_KERNEL_LARGE_EXEC  #else -/* - * For PDE_IDENT_ATTR include USER bit. As the PDE and PTE protection - * bits are combined, this will alow user to access the high address mapped - * VDSO in the presence of CONFIG_COMPAT_VDSO - */  #define PTE_IDENT_ATTR	 0x003		/* PRESENT+RW */ -#define PDE_IDENT_ATTR	 0x067		/* PRESENT+RW+USER+DIRTY+ACCESSED */ +#define PDE_IDENT_ATTR	 0x063		/* PRESENT+RW+DIRTY+ACCESSED */  #define PGD_IDENT_ATTR	 0x001		/* PRESENT (no other attributes) */  #endif @@ -381,8 +381,13 @@ static inline void update_page_count(int level, unsigned long pages) { }   * as a pte too.   */  extern pte_t *lookup_address(unsigned long address, unsigned int *level); +extern pte_t *lookup_address_in_pgd(pgd_t *pgd, unsigned long address, +				    unsigned int *level);  extern phys_addr_t slow_virt_to_phys(void *__address); - +extern int kernel_map_pages_in_pgd(pgd_t *pgd, u64 pfn, unsigned long address, +				   unsigned numpages, unsigned long page_flags); +void kernel_unmap_pages_in_pgd(pgd_t *root, unsigned long address, +			       unsigned numpages);  #endif	/* !__ASSEMBLY__ */  #endif /* _ASM_X86_PGTABLE_DEFS_H */ diff --git a/arch/x86/include/asm/preempt.h b/arch/x86/include/asm/preempt.h new file mode 100644 index 00000000000..7024c12f7bf --- /dev/null +++ b/arch/x86/include/asm/preempt.h @@ -0,0 +1,111 @@ +#ifndef __ASM_PREEMPT_H +#define __ASM_PREEMPT_H + +#include <asm/rmwcc.h> +#include <asm/percpu.h> +#include <linux/thread_info.h> + +DECLARE_PER_CPU(int, __preempt_count); + +/* + * We use the PREEMPT_NEED_RESCHED bit as an inverted NEED_RESCHED such + * that a decrement hitting 0 means we can and should reschedule. + */ +#define PREEMPT_ENABLED	(0 + PREEMPT_NEED_RESCHED) + +/* + * We mask the PREEMPT_NEED_RESCHED bit so as not to confuse all current users + * that think a non-zero value indicates we cannot preempt. + */ +static __always_inline int preempt_count(void) +{ +	return raw_cpu_read_4(__preempt_count) & ~PREEMPT_NEED_RESCHED; +} + +static __always_inline void preempt_count_set(int pc) +{ +	raw_cpu_write_4(__preempt_count, pc); +} + +/* + * must be macros to avoid header recursion hell + */ +#define task_preempt_count(p) \ +	(task_thread_info(p)->saved_preempt_count & ~PREEMPT_NEED_RESCHED) + +#define init_task_preempt_count(p) do { \ +	task_thread_info(p)->saved_preempt_count = PREEMPT_DISABLED; \ +} while (0) + +#define init_idle_preempt_count(p, cpu) do { \ +	task_thread_info(p)->saved_preempt_count = PREEMPT_ENABLED; \ +	per_cpu(__preempt_count, (cpu)) = PREEMPT_ENABLED; \ +} while (0) + +/* + * We fold the NEED_RESCHED bit into the preempt count such that + * preempt_enable() can decrement and test for needing to reschedule with a + * single instruction. + * + * We invert the actual bit, so that when the decrement hits 0 we know we both + * need to resched (the bit is cleared) and can resched (no preempt count). + */ + +static __always_inline void set_preempt_need_resched(void) +{ +	raw_cpu_and_4(__preempt_count, ~PREEMPT_NEED_RESCHED); +} + +static __always_inline void clear_preempt_need_resched(void) +{ +	raw_cpu_or_4(__preempt_count, PREEMPT_NEED_RESCHED); +} + +static __always_inline bool test_preempt_need_resched(void) +{ +	return !(raw_cpu_read_4(__preempt_count) & PREEMPT_NEED_RESCHED); +} + +/* + * The various preempt_count add/sub methods + */ + +static __always_inline void __preempt_count_add(int val) +{ +	raw_cpu_add_4(__preempt_count, val); +} + +static __always_inline void __preempt_count_sub(int val) +{ +	raw_cpu_add_4(__preempt_count, -val); +} + +/* + * Because we keep PREEMPT_NEED_RESCHED set when we do _not_ need to reschedule + * a decrement which hits zero means we have no preempt_count and should + * reschedule. + */ +static __always_inline bool __preempt_count_dec_and_test(void) +{ +	GEN_UNARY_RMWcc("decl", __preempt_count, __percpu_arg(0), "e"); +} + +/* + * Returns true when we need to resched and can (barring IRQ state). + */ +static __always_inline bool should_resched(void) +{ +	return unlikely(!raw_cpu_read_4(__preempt_count)); +} + +#ifdef CONFIG_PREEMPT +  extern asmlinkage void ___preempt_schedule(void); +# define __preempt_schedule() asm ("call ___preempt_schedule") +  extern asmlinkage void preempt_schedule(void); +# ifdef CONFIG_CONTEXT_TRACKING +    extern asmlinkage void ___preempt_schedule_context(void); +#   define __preempt_schedule_context() asm ("call ___preempt_schedule_context") +# endif +#endif + +#endif /* __ASM_PREEMPT_H */ diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h index 987c75ecc33..a4ea02351f4 100644 --- a/arch/x86/include/asm/processor.h +++ b/arch/x86/include/asm/processor.h @@ -27,7 +27,6 @@ struct mm_struct;  #include <linux/cache.h>  #include <linux/threads.h>  #include <linux/math64.h> -#include <linux/init.h>  #include <linux/err.h>  #include <linux/irqflags.h> @@ -72,6 +71,7 @@ extern u16 __read_mostly tlb_lli_4m[NR_INFO];  extern u16 __read_mostly tlb_lld_4k[NR_INFO];  extern u16 __read_mostly tlb_lld_2m[NR_INFO];  extern u16 __read_mostly tlb_lld_4m[NR_INFO]; +extern u16 __read_mostly tlb_lld_1g[NR_INFO];  extern s8  __read_mostly tlb_flushall_shift;  /* @@ -370,6 +370,20 @@ struct ymmh_struct {  	u32 ymmh_space[64];  }; +/* We don't support LWP yet: */ +struct lwp_struct { +	u8 reserved[128]; +}; + +struct bndregs_struct { +	u64 bndregs[8]; +} __packed; + +struct bndcsr_struct { +	u64 cfg_reg_u; +	u64 status_reg; +} __packed; +  struct xsave_hdr_struct {  	u64 xstate_bv;  	u64 reserved1[2]; @@ -380,6 +394,9 @@ struct xsave_struct {  	struct i387_fxsave_struct i387;  	struct xsave_hdr_struct xsave_hdr;  	struct ymmh_struct ymmh; +	struct lwp_struct lwp; +	struct bndregs_struct bndregs; +	struct bndcsr_struct bndcsr;  	/* new processor state extensions will go here */  } __attribute__ ((packed, aligned (64))); @@ -432,6 +449,15 @@ struct stack_canary {  };  DECLARE_PER_CPU_ALIGNED(struct stack_canary, stack_canary);  #endif +/* + * per-CPU IRQ handling stacks + */ +struct irq_stack { +	u32                     stack[THREAD_SIZE/sizeof(u32)]; +} __aligned(THREAD_SIZE); + +DECLARE_PER_CPU(struct irq_stack *, hardirq_stack); +DECLARE_PER_CPU(struct irq_stack *, softirq_stack);  #endif	/* X86_64 */  extern unsigned int xstate_size; @@ -488,6 +514,15 @@ struct thread_struct {  	unsigned long		iopl;  	/* Max allowed port in the bitmap, in bytes: */  	unsigned		io_bitmap_max; +	/* +	 * fpu_counter contains the number of consecutive context switches +	 * that the FPU is used. If this is over a threshold, the lazy fpu +	 * saving becomes unlazy to save the trap. This is an unsigned char +	 * so that after 256 times the counter wraps and the behavior turns +	 * lazy again; this to deal with bursty apps that only use FPU for +	 * a short time +	 */ +	unsigned char fpu_counter;  };  /* @@ -691,29 +726,6 @@ static inline void sync_core(void)  #endif  } -static inline void __monitor(const void *eax, unsigned long ecx, -			     unsigned long edx) -{ -	/* "monitor %eax, %ecx, %edx;" */ -	asm volatile(".byte 0x0f, 0x01, 0xc8;" -		     :: "a" (eax), "c" (ecx), "d"(edx)); -} - -static inline void __mwait(unsigned long eax, unsigned long ecx) -{ -	/* "mwait %eax, %ecx;" */ -	asm volatile(".byte 0x0f, 0x01, 0xc9;" -		     :: "a" (eax), "c" (ecx)); -} - -static inline void __sti_mwait(unsigned long eax, unsigned long ecx) -{ -	trace_hardirqs_on(); -	/* "mwait %eax, %ecx;" */ -	asm volatile("sti; .byte 0x0f, 0x01, 0xc9;" -		     :: "a" (eax), "c" (ecx)); -} -  extern void select_idle_routine(const struct cpuinfo_x86 *c);  extern void init_amd_e400_c1e_mask(void); diff --git a/arch/x86/include/asm/prom.h b/arch/x86/include/asm/prom.h index bade6ac3b14..fbeb06ed0ea 100644 --- a/arch/x86/include/asm/prom.h +++ b/arch/x86/include/asm/prom.h @@ -39,10 +39,5 @@ static inline void x86_dtb_init(void) { }  extern char cmd_line[COMMAND_LINE_SIZE]; -#define pci_address_to_pio pci_address_to_pio -unsigned long pci_address_to_pio(phys_addr_t addr); - -#define HAVE_ARCH_DEVTREE_FIXUPS -  #endif /* __ASSEMBLY__ */  #endif diff --git a/arch/x86/include/asm/proto.h b/arch/x86/include/asm/proto.h index 6fd3fd76979..a90f8972dad 100644 --- a/arch/x86/include/asm/proto.h +++ b/arch/x86/include/asm/proto.h @@ -12,8 +12,6 @@ void ia32_syscall(void);  void ia32_cstar_target(void);  void ia32_sysenter_target(void); -void syscall32_cpu_init(void); -  void x86_configure_nx(void);  void x86_report_nx(void); diff --git a/arch/x86/include/asm/ptrace.h b/arch/x86/include/asm/ptrace.h index 942a08623a1..6205f0c434d 100644 --- a/arch/x86/include/asm/ptrace.h +++ b/arch/x86/include/asm/ptrace.h @@ -60,7 +60,6 @@ struct pt_regs {  #endif /* !__i386__ */ -#include <linux/init.h>  #ifdef CONFIG_PARAVIRT  #include <asm/paravirt_types.h>  #endif @@ -232,6 +231,22 @@ static inline unsigned long regs_get_kernel_stack_nth(struct pt_regs *regs,  #define ARCH_HAS_USER_SINGLE_STEP_INFO +/* + * When hitting ptrace_stop(), we cannot return using SYSRET because + * that does not restore the full CPU state, only a minimal set.  The + * ptracer can change arbitrary register values, which is usually okay + * because the usual ptrace stops run off the signal delivery path which + * forces IRET; however, ptrace_event() stops happen in arbitrary places + * in the kernel and don't force IRET path. + * + * So force IRET path after a ptrace stop. + */ +#define arch_ptrace_stop_needed(code, info)				\ +({									\ +	set_thread_flag(TIF_NOTIFY_RESUME);				\ +	false;								\ +}) +  struct user_desc;  extern int do_get_thread_area(struct task_struct *p, int idx,  			      struct user_desc __user *info); diff --git a/arch/x86/include/asm/pvclock.h b/arch/x86/include/asm/pvclock.h index be8269b00e2..d6b078e9fa2 100644 --- a/arch/x86/include/asm/pvclock.h +++ b/arch/x86/include/asm/pvclock.h @@ -14,6 +14,8 @@ void pvclock_read_wallclock(struct pvclock_wall_clock *wall,  			    struct timespec *ts);  void pvclock_resume(void); +void pvclock_touch_watchdogs(void); +  /*   * Scale a 64-bit delta by scaling and multiplying by a 32-bit fraction,   * yielding a 64-bit result. diff --git a/arch/x86/include/asm/qrwlock.h b/arch/x86/include/asm/qrwlock.h new file mode 100644 index 00000000000..70f46f07f94 --- /dev/null +++ b/arch/x86/include/asm/qrwlock.h @@ -0,0 +1,17 @@ +#ifndef _ASM_X86_QRWLOCK_H +#define _ASM_X86_QRWLOCK_H + +#include <asm-generic/qrwlock_types.h> + +#if !defined(CONFIG_X86_OOSTORE) && !defined(CONFIG_X86_PPRO_FENCE) +#define queue_write_unlock queue_write_unlock +static inline void queue_write_unlock(struct qrwlock *lock) +{ +        barrier(); +        ACCESS_ONCE(*(u8 *)&lock->cnts) = 0; +} +#endif + +#include <asm-generic/qrwlock.h> + +#endif /* _ASM_X86_QRWLOCK_H */ diff --git a/arch/x86/include/asm/rmwcc.h b/arch/x86/include/asm/rmwcc.h new file mode 100644 index 00000000000..8f7866a5b9a --- /dev/null +++ b/arch/x86/include/asm/rmwcc.h @@ -0,0 +1,41 @@ +#ifndef _ASM_X86_RMWcc +#define _ASM_X86_RMWcc + +#ifdef CC_HAVE_ASM_GOTO + +#define __GEN_RMWcc(fullop, var, cc, ...)				\ +do {									\ +	asm_volatile_goto (fullop "; j" cc " %l[cc_label]"		\ +			: : "m" (var), ## __VA_ARGS__ 			\ +			: "memory" : cc_label);				\ +	return 0;							\ +cc_label:								\ +	return 1;							\ +} while (0) + +#define GEN_UNARY_RMWcc(op, var, arg0, cc) 				\ +	__GEN_RMWcc(op " " arg0, var, cc) + +#define GEN_BINARY_RMWcc(op, var, vcon, val, arg0, cc)			\ +	__GEN_RMWcc(op " %1, " arg0, var, cc, vcon (val)) + +#else /* !CC_HAVE_ASM_GOTO */ + +#define __GEN_RMWcc(fullop, var, cc, ...)				\ +do {									\ +	char c;								\ +	asm volatile (fullop "; set" cc " %1"				\ +			: "+m" (var), "=qm" (c)				\ +			: __VA_ARGS__ : "memory");			\ +	return c != 0;							\ +} while (0) + +#define GEN_UNARY_RMWcc(op, var, arg0, cc)				\ +	__GEN_RMWcc(op " " arg0, var, cc) + +#define GEN_BINARY_RMWcc(op, var, vcon, val, arg0, cc)			\ +	__GEN_RMWcc(op " %2, " arg0, var, cc, vcon (val)) + +#endif /* CC_HAVE_ASM_GOTO */ + +#endif /* _ASM_X86_RMWcc */ diff --git a/arch/x86/include/asm/segment.h b/arch/x86/include/asm/segment.h index c48a95035a7..6f1c3a8a33a 100644 --- a/arch/x86/include/asm/segment.h +++ b/arch/x86/include/asm/segment.h @@ -214,6 +214,9 @@  #ifdef __KERNEL__  #ifndef __ASSEMBLY__  extern const char early_idt_handlers[NUM_EXCEPTION_VECTORS][2+2+5]; +#ifdef CONFIG_TRACING +#define trace_early_idt_handlers early_idt_handlers +#endif  /*   * Load a segment. Fall back on loading the zero diff --git a/arch/x86/include/asm/setup.h b/arch/x86/include/asm/setup.h index 347555492da..ff4e7b236e2 100644 --- a/arch/x86/include/asm/setup.h +++ b/arch/x86/include/asm/setup.h @@ -3,7 +3,6 @@  #include <uapi/asm/setup.h> -  #define COMMAND_LINE_SIZE 2048  #include <linux/linkage.h> @@ -29,6 +28,8 @@  #include <asm/bootparam.h>  #include <asm/x86_init.h> +extern u64 relocated_ramdisk; +  /* Interrupt control for vSMPowered x86_64 systems */  #ifdef CONFIG_X86_64  void vsmp_init(void); @@ -38,12 +39,6 @@ static inline void vsmp_init(void) { }  void setup_bios_corruption_check(void); -#ifdef CONFIG_X86_VISWS -extern void visws_early_detect(void); -#else -static inline void visws_early_detect(void) { } -#endif -  extern unsigned long saved_video_mode;  extern void reserve_standard_io_resources(void); @@ -51,9 +46,9 @@ extern void i386_reserve_resources(void);  extern void setup_default_timer_irq(void);  #ifdef CONFIG_X86_INTEL_MID -extern void x86_mrst_early_setup(void); +extern void x86_intel_mid_early_setup(void);  #else -static inline void x86_mrst_early_setup(void) { } +static inline void x86_intel_mid_early_setup(void) { }  #endif  #ifdef CONFIG_X86_INTEL_CE @@ -64,6 +59,8 @@ static inline void x86_ce4100_early_setup(void) { }  #ifndef _SETUP +#include <asm/espfix.h> +  /*   * This is set up by the setup-routine at boot-time   */ diff --git a/arch/x86/include/asm/signal.h b/arch/x86/include/asm/signal.h index 35e67a45718..31eab867e6d 100644 --- a/arch/x86/include/asm/signal.h +++ b/arch/x86/include/asm/signal.h @@ -92,12 +92,6 @@ static inline int __gen_sigismember(sigset_t *set, int _sig)  	 ? __const_sigismember((set), (sig))	\  	 : __gen_sigismember((set), (sig))) -static inline int sigfindinword(unsigned long word) -{ -	asm("bsfl %1,%0" : "=r"(word) : "rm"(word) : "cc"); -	return word; -} -  struct pt_regs;  #else /* __i386__ */ diff --git a/arch/x86/include/asm/simd.h b/arch/x86/include/asm/simd.h new file mode 100644 index 00000000000..ee80b92f009 --- /dev/null +++ b/arch/x86/include/asm/simd.h @@ -0,0 +1,11 @@ + +#include <asm/i387.h> + +/* + * may_use_simd - whether it is allowable at this time to issue SIMD + *                instructions or access the SIMD register file + */ +static __must_check inline bool may_use_simd(void) +{ +	return irq_fpu_usable(); +} diff --git a/arch/x86/include/asm/smp.h b/arch/x86/include/asm/smp.h index 4137890e88e..8cd27e08e23 100644 --- a/arch/x86/include/asm/smp.h +++ b/arch/x86/include/asm/smp.h @@ -2,7 +2,6 @@  #define _ASM_X86_SMP_H  #ifndef __ASSEMBLY__  #include <linux/cpumask.h> -#include <linux/init.h>  #include <asm/percpu.h>  /* diff --git a/arch/x86/include/asm/special_insns.h b/arch/x86/include/asm/special_insns.h index 645cad2c95f..e820c080a4e 100644 --- a/arch/x86/include/asm/special_insns.h +++ b/arch/x86/include/asm/special_insns.h @@ -191,6 +191,14 @@ static inline void clflush(volatile void *__p)  	asm volatile("clflush %0" : "+m" (*(volatile char __force *)__p));  } +static inline void clflushopt(volatile void *__p) +{ +	alternative_io(".byte " __stringify(NOP_DS_PREFIX) "; clflush %P0", +		       ".byte 0x66; clflush %P0", +		       X86_FEATURE_CLFLUSHOPT, +		       "+m" (*(volatile char __force *)__p)); +} +  #define nop() asm volatile ("nop") diff --git a/arch/x86/include/asm/spinlock.h b/arch/x86/include/asm/spinlock.h index bf156ded74b..54f1c8068c0 100644 --- a/arch/x86/include/asm/spinlock.h +++ b/arch/x86/include/asm/spinlock.h @@ -26,10 +26,9 @@  # define LOCK_PTR_REG "D"  #endif -#if defined(CONFIG_X86_32) && \ -	(defined(CONFIG_X86_OOSTORE) || defined(CONFIG_X86_PPRO_FENCE)) +#if defined(CONFIG_X86_32) && (defined(CONFIG_X86_PPRO_FENCE))  /* - * On PPro SMP or if we are using OOSTORE, we use a locked operation to unlock + * On PPro SMP, we use a locked operation to unlock   * (PPro errata 66, 92)   */  # define UNLOCK_LOCK_PREFIX LOCK_PREFIX @@ -188,6 +187,7 @@ static inline void arch_spin_unlock_wait(arch_spinlock_t *lock)  		cpu_relax();  } +#ifndef CONFIG_QUEUE_RWLOCK  /*   * Read-write spinlocks, allowing multiple readers   * but only one writer. @@ -270,6 +270,9 @@ static inline void arch_write_unlock(arch_rwlock_t *rw)  	asm volatile(LOCK_PREFIX WRITE_LOCK_ADD(%1) "%0"  		     : "+m" (rw->write) : "i" (RW_LOCK_BIAS) : "memory");  } +#else +#include <asm/qrwlock.h> +#endif /* CONFIG_QUEUE_RWLOCK */  #define arch_read_lock_flags(lock, flags) arch_read_lock(lock)  #define arch_write_lock_flags(lock, flags) arch_write_lock(lock) diff --git a/arch/x86/include/asm/spinlock_types.h b/arch/x86/include/asm/spinlock_types.h index 4f1bea19945..73c4c007200 100644 --- a/arch/x86/include/asm/spinlock_types.h +++ b/arch/x86/include/asm/spinlock_types.h @@ -34,6 +34,10 @@ typedef struct arch_spinlock {  #define __ARCH_SPIN_LOCK_UNLOCKED	{ { 0 } } +#ifdef CONFIG_QUEUE_RWLOCK +#include <asm-generic/qrwlock_types.h> +#else  #include <asm/rwlock.h> +#endif  #endif /* _ASM_X86_SPINLOCK_TYPES_H */ diff --git a/arch/x86/include/asm/swiotlb.h b/arch/x86/include/asm/swiotlb.h index 977f1761a25..ab05d73e2bb 100644 --- a/arch/x86/include/asm/swiotlb.h +++ b/arch/x86/include/asm/swiotlb.h @@ -29,4 +29,11 @@ static inline void pci_swiotlb_late_init(void)  static inline void dma_mark_clean(void *addr, size_t size) {} +extern void *x86_swiotlb_alloc_coherent(struct device *hwdev, size_t size, +					dma_addr_t *dma_handle, gfp_t flags, +					struct dma_attrs *attrs); +extern void x86_swiotlb_free_coherent(struct device *dev, size_t size, +					void *vaddr, dma_addr_t dma_addr, +					struct dma_attrs *attrs); +  #endif /* _ASM_X86_SWIOTLB_H */ diff --git a/arch/x86/include/asm/sync_bitops.h b/arch/x86/include/asm/sync_bitops.h index 05af3b31d52..f28a24b51dc 100644 --- a/arch/x86/include/asm/sync_bitops.h +++ b/arch/x86/include/asm/sync_bitops.h @@ -41,7 +41,7 @@ static inline void sync_set_bit(long nr, volatile unsigned long *addr)   *   * sync_clear_bit() is atomic and may not be reordered.  However, it does   * not contain a memory barrier, so if it is used for locking purposes, - * you should call smp_mb__before_clear_bit() and/or smp_mb__after_clear_bit() + * you should call smp_mb__before_atomic() and/or smp_mb__after_atomic()   * in order to ensure changes are visible on other processors.   */  static inline void sync_clear_bit(long nr, volatile unsigned long *addr) diff --git a/arch/x86/include/asm/syscall.h b/arch/x86/include/asm/syscall.h index aea284b4131..d6a756ae04c 100644 --- a/arch/x86/include/asm/syscall.h +++ b/arch/x86/include/asm/syscall.h @@ -13,7 +13,7 @@  #ifndef _ASM_X86_SYSCALL_H  #define _ASM_X86_SYSCALL_H -#include <linux/audit.h> +#include <uapi/linux/audit.h>  #include <linux/sched.h>  #include <linux/err.h>  #include <asm/asm-offsets.h>	/* For NR_syscalls */ @@ -91,8 +91,7 @@ static inline void syscall_set_arguments(struct task_struct *task,  	memcpy(®s->bx + i, args, n * sizeof(args[0]));  } -static inline int syscall_get_arch(struct task_struct *task, -				   struct pt_regs *regs) +static inline int syscall_get_arch(void)  {  	return AUDIT_ARCH_I386;  } @@ -221,8 +220,7 @@ static inline void syscall_set_arguments(struct task_struct *task,  		}  } -static inline int syscall_get_arch(struct task_struct *task, -				   struct pt_regs *regs) +static inline int syscall_get_arch(void)  {  #ifdef CONFIG_IA32_EMULATION  	/* @@ -234,7 +232,7 @@ static inline int syscall_get_arch(struct task_struct *task,  	 *  	 * x32 tasks should be considered AUDIT_ARCH_X86_64.  	 */ -	if (task_thread_info(task)->status & TS_COMPAT) +	if (task_thread_info(current)->status & TS_COMPAT)  		return AUDIT_ARCH_I386;  #endif  	/* Both x32 and x86_64 are considered "64-bit". */ diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h index 27811190cbd..854053889d4 100644 --- a/arch/x86/include/asm/thread_info.h +++ b/arch/x86/include/asm/thread_info.h @@ -9,6 +9,7 @@  #include <linux/compiler.h>  #include <asm/page.h> +#include <asm/percpu.h>  #include <asm/types.h>  /* @@ -28,17 +29,10 @@ struct thread_info {  	__u32			flags;		/* low level flags */  	__u32			status;		/* thread synchronous flags */  	__u32			cpu;		/* current CPU */ -	int			preempt_count;	/* 0 => preemptable, -						   <0 => BUG */ +	int			saved_preempt_count;  	mm_segment_t		addr_limit;  	struct restart_block    restart_block;  	void __user		*sysenter_return; -#ifdef CONFIG_X86_32 -	unsigned long           previous_esp;   /* ESP of the previous stack in -						   case of nested (IRQ) stacks -						*/ -	__u8			supervisor_stack[0]; -#endif  	unsigned int		sig_on_uaccess_error:1;  	unsigned int		uaccess_err:1;	/* uaccess failed */  }; @@ -49,7 +43,7 @@ struct thread_info {  	.exec_domain	= &default_exec_domain,	\  	.flags		= 0,			\  	.cpu		= 0,			\ -	.preempt_count	= INIT_PREEMPT_COUNT,	\ +	.saved_preempt_count = INIT_PREEMPT_COUNT,	\  	.addr_limit	= KERNEL_DS,		\  	.restart_block = {			\  		.fn = do_no_restart_syscall,	\ @@ -89,6 +83,7 @@ struct thread_info {  #define TIF_FORK		18	/* ret_from_fork */  #define TIF_NOHZ		19	/* in adaptive nohz mode */  #define TIF_MEMDIE		20	/* is terminating due to OOM killer */ +#define TIF_POLLING_NRFLAG	21	/* idle is polling for TIF_NEED_RESCHED */  #define TIF_IO_BITMAP		22	/* uses I/O bitmap */  #define TIF_FORCED_TF		24	/* true if TF in eflags artificially */  #define TIF_BLOCKSTEP		25	/* set when we want DEBUGCTLMSR_BTF */ @@ -112,6 +107,7 @@ struct thread_info {  #define _TIF_IA32		(1 << TIF_IA32)  #define _TIF_FORK		(1 << TIF_FORK)  #define _TIF_NOHZ		(1 << TIF_NOHZ) +#define _TIF_POLLING_NRFLAG	(1 << TIF_POLLING_NRFLAG)  #define _TIF_IO_BITMAP		(1 << TIF_IO_BITMAP)  #define _TIF_FORCED_TF		(1 << TIF_FORCED_TF)  #define _TIF_BLOCKSTEP		(1 << TIF_BLOCKSTEP) @@ -154,11 +150,9 @@ struct thread_info {  #define _TIF_WORK_CTXSW_PREV (_TIF_WORK_CTXSW|_TIF_USER_RETURN_NOTIFY)  #define _TIF_WORK_CTXSW_NEXT (_TIF_WORK_CTXSW) -#define PREEMPT_ACTIVE		0x10000000 +#define STACK_WARN		(THREAD_SIZE/8) +#define KERNEL_STACK_OFFSET	(5*(BITS_PER_LONG/8)) -#ifdef CONFIG_X86_32 - -#define STACK_WARN	(THREAD_SIZE/8)  /*   * macros/functions for gaining access to the thread information structure   * @@ -166,40 +160,6 @@ struct thread_info {   */  #ifndef __ASSEMBLY__ - -/* how to get the current stack pointer from C */ -register unsigned long current_stack_pointer asm("esp") __used; - -/* how to get the thread information struct from C */ -static inline struct thread_info *current_thread_info(void) -{ -	return (struct thread_info *) -		(current_stack_pointer & ~(THREAD_SIZE - 1)); -} - -#else /* !__ASSEMBLY__ */ - -/* how to get the thread information struct from ASM */ -#define GET_THREAD_INFO(reg)	 \ -	movl $-THREAD_SIZE, reg; \ -	andl %esp, reg - -/* use this one if reg already contains %esp */ -#define GET_THREAD_INFO_WITH_ESP(reg) \ -	andl $-THREAD_SIZE, reg - -#endif - -#else /* X86_32 */ - -#include <asm/percpu.h> -#define KERNEL_STACK_OFFSET (5*8) - -/* - * macros/functions for gaining access to the thread information structure - * preempt_count needs to be 1 initially, until the scheduler is functional. - */ -#ifndef __ASSEMBLY__  DECLARE_PER_CPU(unsigned long, kernel_stack);  static inline struct thread_info *current_thread_info(void) @@ -214,8 +174,8 @@ static inline struct thread_info *current_thread_info(void)  /* how to get the thread information struct from ASM */  #define GET_THREAD_INFO(reg) \ -	movq PER_CPU_VAR(kernel_stack),reg ; \ -	subq $(THREAD_SIZE-KERNEL_STACK_OFFSET),reg +	_ASM_MOV PER_CPU_VAR(kernel_stack),reg ; \ +	_ASM_SUB $(THREAD_SIZE-KERNEL_STACK_OFFSET),reg ;  /*   * Same if PER_CPU_VAR(kernel_stack) is, perhaps with some offset, already in @@ -225,8 +185,6 @@ static inline struct thread_info *current_thread_info(void)  #endif -#endif /* !X86_32 */ -  /*   * Thread-synchronous status.   * @@ -235,8 +193,6 @@ static inline struct thread_info *current_thread_info(void)   * have to worry about atomic accesses.   */  #define TS_COMPAT		0x0002	/* 32bit syscall active (64BIT)*/ -#define TS_POLLING		0x0004	/* idle task polling need_resched, -					   skip sending interrupt */  #define TS_RESTORE_SIGMASK	0x0008	/* restore signal mask in do_signal() */  #ifndef __ASSEMBLY__ diff --git a/arch/x86/include/asm/timer.h b/arch/x86/include/asm/timer.h index 34baa0eb5d0..a04eabd43d0 100644 --- a/arch/x86/include/asm/timer.h +++ b/arch/x86/include/asm/timer.h @@ -1,9 +1,9 @@  #ifndef _ASM_X86_TIMER_H  #define _ASM_X86_TIMER_H -#include <linux/init.h>  #include <linux/pm.h>  #include <linux/percpu.h>  #include <linux/interrupt.h> +#include <linux/math64.h>  #define TICK_SIZE (tick_nsec / 1000) @@ -12,68 +12,26 @@ extern int recalibrate_cpu_khz(void);  extern int no_timer_check; -/* Accelerators for sched_clock() - * convert from cycles(64bits) => nanoseconds (64bits) - *  basic equation: - *		ns = cycles / (freq / ns_per_sec) - *		ns = cycles * (ns_per_sec / freq) - *		ns = cycles * (10^9 / (cpu_khz * 10^3)) - *		ns = cycles * (10^6 / cpu_khz) +/* + * We use the full linear equation: f(x) = a + b*x, in order to allow + * a continuous function in the face of dynamic freq changes.   * - *	Then we use scaling math (suggested by george@mvista.com) to get: - *		ns = cycles * (10^6 * SC / cpu_khz) / SC - *		ns = cycles * cyc2ns_scale / SC + * Continuity means that when our frequency changes our slope (b); we want to + * ensure that: f(t) == f'(t), which gives: a + b*t == a' + b'*t.   * - *	And since SC is a constant power of two, we can convert the div - *  into a shift. + * Without an offset (a) the above would not be possible.   * - *  We can use khz divisor instead of mhz to keep a better precision, since - *  cyc2ns_scale is limited to 10^6 * 2^10, which fits in 32 bits. - *  (mathieu.desnoyers@polymtl.ca) - * - *			-johnstul@us.ibm.com "math is hard, lets go shopping!" - * - * In: - * - * ns = cycles * cyc2ns_scale / SC - * - * Although we may still have enough bits to store the value of ns, - * in some cases, we may not have enough bits to store cycles * cyc2ns_scale, - * leading to an incorrect result. - * - * To avoid this, we can decompose 'cycles' into quotient and remainder - * of division by SC.  Then, - * - * ns = (quot * SC + rem) * cyc2ns_scale / SC - *    = quot * cyc2ns_scale + (rem * cyc2ns_scale) / SC - * - *			- sqazi@google.com + * See the comment near cycles_2_ns() for details on how we compute (b).   */ - -DECLARE_PER_CPU(unsigned long, cyc2ns); -DECLARE_PER_CPU(unsigned long long, cyc2ns_offset); - -#define CYC2NS_SCALE_FACTOR 10 /* 2^10, carefully chosen */ - -static inline unsigned long long __cycles_2_ns(unsigned long long cyc) -{ -	int cpu = smp_processor_id(); -	unsigned long long ns = per_cpu(cyc2ns_offset, cpu); -	ns += mult_frac(cyc, per_cpu(cyc2ns, cpu), -			(1UL << CYC2NS_SCALE_FACTOR)); -	return ns; -} - -static inline unsigned long long cycles_2_ns(unsigned long long cyc) -{ -	unsigned long long ns; -	unsigned long flags; - -	local_irq_save(flags); -	ns = __cycles_2_ns(cyc); -	local_irq_restore(flags); - -	return ns; -} +struct cyc2ns_data { +	u32 cyc2ns_mul; +	u32 cyc2ns_shift; +	u64 cyc2ns_offset; +	u32 __count; +	/* u32 hole */ +}; /* 24 bytes -- do not grow */ + +extern struct cyc2ns_data *cyc2ns_read_begin(void); +extern void cyc2ns_read_end(struct cyc2ns_data *);  #endif /* _ASM_X86_TIMER_H */ diff --git a/arch/x86/include/asm/tlbflush.h b/arch/x86/include/asm/tlbflush.h index e6d90babc24..04905bfc508 100644 --- a/arch/x86/include/asm/tlbflush.h +++ b/arch/x86/include/asm/tlbflush.h @@ -62,7 +62,7 @@ static inline void __flush_tlb_all(void)  static inline void __flush_tlb_one(unsigned long addr)  { -	count_vm_event(NR_TLB_LOCAL_FLUSH_ONE); +	count_vm_tlb_event(NR_TLB_LOCAL_FLUSH_ONE);  	__flush_tlb_single(addr);  } @@ -93,13 +93,13 @@ static inline void __flush_tlb_one(unsigned long addr)   */  static inline void __flush_tlb_up(void)  { -	count_vm_event(NR_TLB_LOCAL_FLUSH_ALL); +	count_vm_tlb_event(NR_TLB_LOCAL_FLUSH_ALL);  	__flush_tlb();  }  static inline void flush_tlb_all(void)  { -	count_vm_event(NR_TLB_LOCAL_FLUSH_ALL); +	count_vm_tlb_event(NR_TLB_LOCAL_FLUSH_ALL);  	__flush_tlb_all();  } diff --git a/arch/x86/include/asm/topology.h b/arch/x86/include/asm/topology.h index d35f24e231c..0e8f04f2c26 100644 --- a/arch/x86/include/asm/topology.h +++ b/arch/x86/include/asm/topology.h @@ -119,9 +119,10 @@ static inline void setup_node_to_cpumask_map(void) { }  extern const struct cpumask *cpu_coregroup_mask(int cpu); -#ifdef ENABLE_TOPO_DEFINES  #define topology_physical_package_id(cpu)	(cpu_data(cpu).phys_proc_id)  #define topology_core_id(cpu)			(cpu_data(cpu).cpu_core_id) + +#ifdef ENABLE_TOPO_DEFINES  #define topology_core_cpumask(cpu)		(per_cpu(cpu_core_map, cpu))  #define topology_thread_cpumask(cpu)		(per_cpu(cpu_sibling_map, cpu))  #endif @@ -131,25 +132,7 @@ static inline void arch_fix_phys_package_id(int num, u32 slot)  }  struct pci_bus; +int x86_pci_root_bus_node(int bus);  void x86_pci_root_bus_resources(int bus, struct list_head *resources); -#ifdef CONFIG_SMP -#define mc_capable()	((boot_cpu_data.x86_max_cores > 1) && \ -			(cpumask_weight(cpu_core_mask(0)) != nr_cpu_ids)) -#define smt_capable()			(smp_num_siblings > 1) -#endif - -#ifdef CONFIG_NUMA -extern int get_mp_bus_to_node(int busnum); -extern void set_mp_bus_to_node(int busnum, int node); -#else -static inline int get_mp_bus_to_node(int busnum) -{ -	return 0; -} -static inline void set_mp_bus_to_node(int busnum, int node) -{ -} -#endif -  #endif /* _ASM_X86_TOPOLOGY_H */ diff --git a/arch/x86/include/asm/trace/exceptions.h b/arch/x86/include/asm/trace/exceptions.h new file mode 100644 index 00000000000..2fbc66c7885 --- /dev/null +++ b/arch/x86/include/asm/trace/exceptions.h @@ -0,0 +1,52 @@ +#undef TRACE_SYSTEM +#define TRACE_SYSTEM exceptions + +#if !defined(_TRACE_PAGE_FAULT_H) || defined(TRACE_HEADER_MULTI_READ) +#define _TRACE_PAGE_FAULT_H + +#include <linux/tracepoint.h> + +extern void trace_irq_vector_regfunc(void); +extern void trace_irq_vector_unregfunc(void); + +DECLARE_EVENT_CLASS(x86_exceptions, + +	TP_PROTO(unsigned long address, struct pt_regs *regs, +		 unsigned long error_code), + +	TP_ARGS(address, regs, error_code), + +	TP_STRUCT__entry( +		__field(		unsigned long, address	) +		__field(		unsigned long, ip	) +		__field(		unsigned long, error_code ) +	), + +	TP_fast_assign( +		__entry->address = address; +		__entry->ip = regs->ip; +		__entry->error_code = error_code; +	), + +	TP_printk("address=%pf ip=%pf error_code=0x%lx", +		  (void *)__entry->address, (void *)__entry->ip, +		  __entry->error_code) ); + +#define DEFINE_PAGE_FAULT_EVENT(name)				\ +DEFINE_EVENT_FN(x86_exceptions, name,				\ +	TP_PROTO(unsigned long address,	struct pt_regs *regs,	\ +		 unsigned long error_code),			\ +	TP_ARGS(address, regs, error_code),			\ +	trace_irq_vector_regfunc,				\ +	trace_irq_vector_unregfunc); + +DEFINE_PAGE_FAULT_EVENT(page_fault_user); +DEFINE_PAGE_FAULT_EVENT(page_fault_kernel); + +#undef TRACE_INCLUDE_PATH +#define TRACE_INCLUDE_PATH . +#define TRACE_INCLUDE_FILE exceptions +#endif /*  _TRACE_PAGE_FAULT_H */ + +/* This part must be outside protection */ +#include <trace/define_trace.h> diff --git a/arch/x86/include/asm/trace/irq_vectors.h b/arch/x86/include/asm/trace/irq_vectors.h index 2874df24e7a..4cab890007a 100644 --- a/arch/x86/include/asm/trace/irq_vectors.h +++ b/arch/x86/include/asm/trace/irq_vectors.h @@ -72,6 +72,17 @@ DEFINE_IRQ_VECTOR_EVENT(x86_platform_ipi);  DEFINE_IRQ_VECTOR_EVENT(irq_work);  /* + * We must dis-allow sampling irq_work_exit() because perf event sampling + * itself can cause irq_work, which would lead to an infinite loop; + * + *  1) irq_work_exit happens + *  2) generates perf sample + *  3) generates irq_work + *  4) goto 1 + */ +TRACE_EVENT_PERF_PERM(irq_work_exit, is_sampling_event(p_event) ? -EPERM : 0); + +/*   * call_function - called when entering/exiting a call function interrupt   * vector handler   */ diff --git a/arch/x86/include/asm/traps.h b/arch/x86/include/asm/traps.h index 7036cb60cd8..bc8352e7010 100644 --- a/arch/x86/include/asm/traps.h +++ b/arch/x86/include/asm/traps.h @@ -37,6 +37,23 @@ asmlinkage void machine_check(void);  #endif /* CONFIG_X86_MCE */  asmlinkage void simd_coprocessor_error(void); +#ifdef CONFIG_TRACING +asmlinkage void trace_page_fault(void); +#define trace_divide_error divide_error +#define trace_bounds bounds +#define trace_invalid_op invalid_op +#define trace_device_not_available device_not_available +#define trace_coprocessor_segment_overrun coprocessor_segment_overrun +#define trace_invalid_TSS invalid_TSS +#define trace_segment_not_present segment_not_present +#define trace_general_protection general_protection +#define trace_spurious_interrupt_bug spurious_interrupt_bug +#define trace_coprocessor_error coprocessor_error +#define trace_alignment_check alignment_check +#define trace_simd_coprocessor_error simd_coprocessor_error +#define trace_async_page_fault async_page_fault +#endif +  dotraplinkage void do_divide_error(struct pt_regs *, long);  dotraplinkage void do_debug(struct pt_regs *, long);  dotraplinkage void do_nmi(struct pt_regs *, long); @@ -51,10 +68,18 @@ dotraplinkage void do_segment_not_present(struct pt_regs *, long);  dotraplinkage void do_stack_segment(struct pt_regs *, long);  #ifdef CONFIG_X86_64  dotraplinkage void do_double_fault(struct pt_regs *, long); -asmlinkage __kprobes struct pt_regs *sync_regs(struct pt_regs *); +asmlinkage struct pt_regs *sync_regs(struct pt_regs *);  #endif  dotraplinkage void do_general_protection(struct pt_regs *, long);  dotraplinkage void do_page_fault(struct pt_regs *, unsigned long); +#ifdef CONFIG_TRACING +dotraplinkage void trace_do_page_fault(struct pt_regs *, unsigned long); +#else +static inline void trace_do_page_fault(struct pt_regs *regs, unsigned long error) +{ +	do_page_fault(regs, error); +} +#endif  dotraplinkage void do_spurious_interrupt_bug(struct pt_regs *, long);  dotraplinkage void do_coprocessor_error(struct pt_regs *, long);  dotraplinkage void do_alignment_check(struct pt_regs *, long); @@ -78,7 +103,6 @@ static inline int get_si_code(unsigned long condition)  extern int panic_on_unrecovered_nmi; -void math_error(struct pt_regs *, int, int);  void math_emulate(struct math_emu_info *);  #ifndef CONFIG_X86_32  asmlinkage void smp_thermal_interrupt(void); diff --git a/arch/x86/include/asm/tsc.h b/arch/x86/include/asm/tsc.h index 235be70d5bb..94605c0e9ce 100644 --- a/arch/x86/include/asm/tsc.h +++ b/arch/x86/include/asm/tsc.h @@ -65,4 +65,7 @@ extern int notsc_setup(char *);  extern void tsc_save_sched_clock_state(void);  extern void tsc_restore_sched_clock_state(void); +/* MSR based TSC calibration for Intel Atom SoC platforms */ +unsigned long try_msr_calibrate_tsc(void); +  #endif /* _ASM_X86_TSC_H */ diff --git a/arch/x86/include/asm/uaccess.h b/arch/x86/include/asm/uaccess.h index 5838fa911aa..0d592e0a5b8 100644 --- a/arch/x86/include/asm/uaccess.h +++ b/arch/x86/include/asm/uaccess.h @@ -40,22 +40,30 @@  /*   * Test whether a block of memory is a valid user space address.   * Returns 0 if the range is valid, nonzero otherwise. - * - * This is equivalent to the following test: - * (u33)addr + (u33)size > (u33)current->addr_limit.seg (u65 for x86_64) - * - * This needs 33-bit (65-bit for x86_64) arithmetic. We have a carry...   */ +static inline bool __chk_range_not_ok(unsigned long addr, unsigned long size, unsigned long limit) +{ +	/* +	 * If we have used "sizeof()" for the size, +	 * we know it won't overflow the limit (but +	 * it might overflow the 'addr', so it's +	 * important to subtract the size from the +	 * limit, not add it to the address). +	 */ +	if (__builtin_constant_p(size)) +		return addr > limit - size; + +	/* Arbitrary sizes? Be careful about overflow */ +	addr += size; +	if (addr < size) +		return true; +	return addr > limit; +}  #define __range_not_ok(addr, size, limit)				\  ({									\ -	unsigned long flag, roksum;					\  	__chk_user_ptr(addr);						\ -	asm("add %3,%1 ; sbb %0,%0 ; cmp %1,%4 ; sbb $0,%0"		\ -	    : "=&r" (flag), "=r" (roksum)				\ -	    : "1" (addr), "g" ((long)(size)),				\ -	      "rm" (limit));						\ -	flag;								\ +	__chk_range_not_ok((unsigned long __force)(addr), size, limit); \  })  /** @@ -78,7 +86,7 @@   * this function, memory access functions may still return -EFAULT.   */  #define access_ok(type, addr, size) \ -	(likely(__range_not_ok(addr, size, user_addr_max()) == 0)) +	likely(!__range_not_ok(addr, size, user_addr_max()))  /*   * The exception table consists of pairs of addresses relative to the @@ -525,6 +533,98 @@ extern __must_check long strnlen_user(const char __user *str, long n);  unsigned long __must_check clear_user(void __user *mem, unsigned long len);  unsigned long __must_check __clear_user(void __user *mem, unsigned long len); +extern void __cmpxchg_wrong_size(void) +	__compiletime_error("Bad argument size for cmpxchg"); + +#define __user_atomic_cmpxchg_inatomic(uval, ptr, old, new, size)	\ +({									\ +	int __ret = 0;							\ +	__typeof__(ptr) __uval = (uval);				\ +	__typeof__(*(ptr)) __old = (old);				\ +	__typeof__(*(ptr)) __new = (new);				\ +	switch (size) {							\ +	case 1:								\ +	{								\ +		asm volatile("\t" ASM_STAC "\n"				\ +			"1:\t" LOCK_PREFIX "cmpxchgb %4, %2\n"		\ +			"2:\t" ASM_CLAC "\n"				\ +			"\t.section .fixup, \"ax\"\n"			\ +			"3:\tmov     %3, %0\n"				\ +			"\tjmp     2b\n"				\ +			"\t.previous\n"					\ +			_ASM_EXTABLE(1b, 3b)				\ +			: "+r" (__ret), "=a" (__old), "+m" (*(ptr))	\ +			: "i" (-EFAULT), "q" (__new), "1" (__old)	\ +			: "memory"					\ +		);							\ +		break;							\ +	}								\ +	case 2:								\ +	{								\ +		asm volatile("\t" ASM_STAC "\n"				\ +			"1:\t" LOCK_PREFIX "cmpxchgw %4, %2\n"		\ +			"2:\t" ASM_CLAC "\n"				\ +			"\t.section .fixup, \"ax\"\n"			\ +			"3:\tmov     %3, %0\n"				\ +			"\tjmp     2b\n"				\ +			"\t.previous\n"					\ +			_ASM_EXTABLE(1b, 3b)				\ +			: "+r" (__ret), "=a" (__old), "+m" (*(ptr))	\ +			: "i" (-EFAULT), "r" (__new), "1" (__old)	\ +			: "memory"					\ +		);							\ +		break;							\ +	}								\ +	case 4:								\ +	{								\ +		asm volatile("\t" ASM_STAC "\n"				\ +			"1:\t" LOCK_PREFIX "cmpxchgl %4, %2\n"		\ +			"2:\t" ASM_CLAC "\n"				\ +			"\t.section .fixup, \"ax\"\n"			\ +			"3:\tmov     %3, %0\n"				\ +			"\tjmp     2b\n"				\ +			"\t.previous\n"					\ +			_ASM_EXTABLE(1b, 3b)				\ +			: "+r" (__ret), "=a" (__old), "+m" (*(ptr))	\ +			: "i" (-EFAULT), "r" (__new), "1" (__old)	\ +			: "memory"					\ +		);							\ +		break;							\ +	}								\ +	case 8:								\ +	{								\ +		if (!IS_ENABLED(CONFIG_X86_64))				\ +			__cmpxchg_wrong_size();				\ +									\ +		asm volatile("\t" ASM_STAC "\n"				\ +			"1:\t" LOCK_PREFIX "cmpxchgq %4, %2\n"		\ +			"2:\t" ASM_CLAC "\n"				\ +			"\t.section .fixup, \"ax\"\n"			\ +			"3:\tmov     %3, %0\n"				\ +			"\tjmp     2b\n"				\ +			"\t.previous\n"					\ +			_ASM_EXTABLE(1b, 3b)				\ +			: "+r" (__ret), "=a" (__old), "+m" (*(ptr))	\ +			: "i" (-EFAULT), "r" (__new), "1" (__old)	\ +			: "memory"					\ +		);							\ +		break;							\ +	}								\ +	default:							\ +		__cmpxchg_wrong_size();					\ +	}								\ +	*__uval = __old;						\ +	__ret;								\ +}) + +#define user_atomic_cmpxchg_inatomic(uval, ptr, old, new)		\ +({									\ +	access_ok(VERIFY_WRITE, (ptr), sizeof(*(ptr))) ?		\ +		__user_atomic_cmpxchg_inatomic((uval), (ptr),		\ +				(old), (new), sizeof(*(ptr))) :		\ +		-EFAULT;						\ +}) +  /*   * movsl can be slow when source and dest are not both 8-byte aligned   */ @@ -542,5 +642,103 @@ extern struct movsl_mask {  # include <asm/uaccess_64.h>  #endif +unsigned long __must_check _copy_from_user(void *to, const void __user *from, +					   unsigned n); +unsigned long __must_check _copy_to_user(void __user *to, const void *from, +					 unsigned n); + +#ifdef CONFIG_DEBUG_STRICT_USER_COPY_CHECKS +# define copy_user_diag __compiletime_error +#else +# define copy_user_diag __compiletime_warning +#endif + +extern void copy_user_diag("copy_from_user() buffer size is too small") +copy_from_user_overflow(void); +extern void copy_user_diag("copy_to_user() buffer size is too small") +copy_to_user_overflow(void) __asm__("copy_from_user_overflow"); + +#undef copy_user_diag + +#ifdef CONFIG_DEBUG_STRICT_USER_COPY_CHECKS + +extern void +__compiletime_warning("copy_from_user() buffer size is not provably correct") +__copy_from_user_overflow(void) __asm__("copy_from_user_overflow"); +#define __copy_from_user_overflow(size, count) __copy_from_user_overflow() + +extern void +__compiletime_warning("copy_to_user() buffer size is not provably correct") +__copy_to_user_overflow(void) __asm__("copy_from_user_overflow"); +#define __copy_to_user_overflow(size, count) __copy_to_user_overflow() + +#else + +static inline void +__copy_from_user_overflow(int size, unsigned long count) +{ +	WARN(1, "Buffer overflow detected (%d < %lu)!\n", size, count); +} + +#define __copy_to_user_overflow __copy_from_user_overflow + +#endif + +static inline unsigned long __must_check +copy_from_user(void *to, const void __user *from, unsigned long n) +{ +	int sz = __compiletime_object_size(to); + +	might_fault(); + +	/* +	 * While we would like to have the compiler do the checking for us +	 * even in the non-constant size case, any false positives there are +	 * a problem (especially when DEBUG_STRICT_USER_COPY_CHECKS, but even +	 * without - the [hopefully] dangerous looking nature of the warning +	 * would make people go look at the respecitive call sites over and +	 * over again just to find that there's no problem). +	 * +	 * And there are cases where it's just not realistic for the compiler +	 * to prove the count to be in range. For example when multiple call +	 * sites of a helper function - perhaps in different source files - +	 * all doing proper range checking, yet the helper function not doing +	 * so again. +	 * +	 * Therefore limit the compile time checking to the constant size +	 * case, and do only runtime checking for non-constant sizes. +	 */ + +	if (likely(sz < 0 || sz >= n)) +		n = _copy_from_user(to, from, n); +	else if(__builtin_constant_p(n)) +		copy_from_user_overflow(); +	else +		__copy_from_user_overflow(sz, n); + +	return n; +} + +static inline unsigned long __must_check +copy_to_user(void __user *to, const void *from, unsigned long n) +{ +	int sz = __compiletime_object_size(from); + +	might_fault(); + +	/* See the comment in copy_from_user() above. */ +	if (likely(sz < 0 || sz >= n)) +		n = _copy_to_user(to, from, n); +	else if(__builtin_constant_p(n)) +		copy_to_user_overflow(); +	else +		__copy_to_user_overflow(sz, n); + +	return n; +} + +#undef __copy_from_user_overflow +#undef __copy_to_user_overflow +  #endif /* _ASM_X86_UACCESS_H */ diff --git a/arch/x86/include/asm/uaccess_32.h b/arch/x86/include/asm/uaccess_32.h index 7f760a9f1f6..3c03a5de64d 100644 --- a/arch/x86/include/asm/uaccess_32.h +++ b/arch/x86/include/asm/uaccess_32.h @@ -184,33 +184,4 @@ __copy_from_user_inatomic_nocache(void *to, const void __user *from,         return __copy_from_user_ll_nocache_nozero(to, from, n);  } -unsigned long __must_check copy_to_user(void __user *to, -					const void *from, unsigned long n); -unsigned long __must_check _copy_from_user(void *to, -					  const void __user *from, -					  unsigned long n); - - -extern void copy_from_user_overflow(void) -#ifdef CONFIG_DEBUG_STRICT_USER_COPY_CHECKS -	__compiletime_error("copy_from_user() buffer size is not provably correct") -#else -	__compiletime_warning("copy_from_user() buffer size is not provably correct") -#endif -; - -static inline unsigned long __must_check copy_from_user(void *to, -					  const void __user *from, -					  unsigned long n) -{ -	int sz = __compiletime_object_size(to); - -	if (likely(sz == -1 || sz >= n)) -		n = _copy_from_user(to, from, n); -	else -		copy_from_user_overflow(); - -	return n; -} -  #endif /* _ASM_X86_UACCESS_32_H */ diff --git a/arch/x86/include/asm/uaccess_64.h b/arch/x86/include/asm/uaccess_64.h index 4f7923dd000..12a26b979bf 100644 --- a/arch/x86/include/asm/uaccess_64.h +++ b/arch/x86/include/asm/uaccess_64.h @@ -46,42 +46,13 @@ copy_user_generic(void *to, const void *from, unsigned len)  }  __must_check unsigned long -_copy_to_user(void __user *to, const void *from, unsigned len); -__must_check unsigned long -_copy_from_user(void *to, const void __user *from, unsigned len); -__must_check unsigned long  copy_in_user(void __user *to, const void __user *from, unsigned len); -static inline unsigned long __must_check copy_from_user(void *to, -					  const void __user *from, -					  unsigned long n) -{ -	int sz = __compiletime_object_size(to); - -	might_fault(); -	if (likely(sz == -1 || sz >= n)) -		n = _copy_from_user(to, from, n); -#ifdef CONFIG_DEBUG_VM -	else -		WARN(1, "Buffer overflow detected!\n"); -#endif -	return n; -} -  static __always_inline __must_check -int copy_to_user(void __user *dst, const void *src, unsigned size) -{ -	might_fault(); - -	return _copy_to_user(dst, src, size); -} - -static __always_inline __must_check -int __copy_from_user(void *dst, const void __user *src, unsigned size) +int __copy_from_user_nocheck(void *dst, const void __user *src, unsigned size)  {  	int ret = 0; -	might_fault();  	if (!__builtin_constant_p(size))  		return copy_user_generic(dst, (__force void *)src, size);  	switch (size) { @@ -121,11 +92,17 @@ int __copy_from_user(void *dst, const void __user *src, unsigned size)  }  static __always_inline __must_check -int __copy_to_user(void __user *dst, const void *src, unsigned size) +int __copy_from_user(void *dst, const void __user *src, unsigned size) +{ +	might_fault(); +	return __copy_from_user_nocheck(dst, src, size); +} + +static __always_inline __must_check +int __copy_to_user_nocheck(void __user *dst, const void *src, unsigned size)  {  	int ret = 0; -	might_fault();  	if (!__builtin_constant_p(size))  		return copy_user_generic((__force void *)dst, src, size);  	switch (size) { @@ -165,6 +142,13 @@ int __copy_to_user(void __user *dst, const void *src, unsigned size)  }  static __always_inline __must_check +int __copy_to_user(void __user *dst, const void *src, unsigned size) +{ +	might_fault(); +	return __copy_to_user_nocheck(dst, src, size); +} + +static __always_inline __must_check  int __copy_in_user(void __user *dst, const void __user *src, unsigned size)  {  	int ret = 0; @@ -220,13 +204,13 @@ int __copy_in_user(void __user *dst, const void __user *src, unsigned size)  static __must_check __always_inline int  __copy_from_user_inatomic(void *dst, const void __user *src, unsigned size)  { -	return copy_user_generic(dst, (__force const void *)src, size); +	return __copy_from_user_nocheck(dst, src, size);  }  static __must_check __always_inline int  __copy_to_user_inatomic(void __user *dst, const void *src, unsigned size)  { -	return copy_user_generic((__force void *)dst, src, size); +	return __copy_to_user_nocheck(dst, src, size);  }  extern long __copy_user_nocache(void *dst, const void __user *src, diff --git a/arch/x86/include/asm/unistd.h b/arch/x86/include/asm/unistd.h index c2a48139c34..2b19caa4081 100644 --- a/arch/x86/include/asm/unistd.h +++ b/arch/x86/include/asm/unistd.h @@ -23,6 +23,9 @@  #  include <asm/unistd_64.h>  #  include <asm/unistd_64_x32.h>  #  define __ARCH_WANT_COMPAT_SYS_TIME +#  define __ARCH_WANT_COMPAT_SYS_GETDENTS64 +#  define __ARCH_WANT_COMPAT_SYS_PREADV64 +#  define __ARCH_WANT_COMPAT_SYS_PWRITEV64  # endif @@ -38,7 +41,6 @@  # define __ARCH_WANT_SYS_OLD_GETRLIMIT  # define __ARCH_WANT_SYS_OLD_UNAME  # define __ARCH_WANT_SYS_PAUSE -# define __ARCH_WANT_SYS_SGETMASK  # define __ARCH_WANT_SYS_SIGNAL  # define __ARCH_WANT_SYS_SIGPENDING  # define __ARCH_WANT_SYS_SIGPROCMASK diff --git a/arch/x86/include/asm/uprobes.h b/arch/x86/include/asm/uprobes.h index 6e5197910fd..74f4c2ff642 100644 --- a/arch/x86/include/asm/uprobes.h +++ b/arch/x86/include/asm/uprobes.h @@ -33,12 +33,27 @@ typedef u8 uprobe_opcode_t;  #define UPROBE_SWBP_INSN		0xcc  #define UPROBE_SWBP_INSN_SIZE		   1 +struct uprobe_xol_ops; +  struct arch_uprobe { -	u16				fixups; -	u8				insn[MAX_UINSN_BYTES]; -#ifdef CONFIG_X86_64 -	unsigned long			rip_rela_target_address; -#endif +	union { +		u8			insn[MAX_UINSN_BYTES]; +		u8			ixol[MAX_UINSN_BYTES]; +	}; + +	const struct uprobe_xol_ops	*ops; + +	union { +		struct { +			s32	offs; +			u8	ilen; +			u8	opc1; +		}			branch; +		struct { +			u8	fixups; +			u8	ilen; +		} 			defparam; +	};  };  struct arch_uprobe_task { @@ -49,11 +64,4 @@ struct arch_uprobe_task {  	unsigned int			saved_tf;  }; -extern int  arch_uprobe_analyze_insn(struct arch_uprobe *aup, struct mm_struct *mm, unsigned long addr); -extern int  arch_uprobe_pre_xol(struct arch_uprobe *aup, struct pt_regs *regs); -extern int  arch_uprobe_post_xol(struct arch_uprobe *aup, struct pt_regs *regs); -extern bool arch_uprobe_xol_was_trapped(struct task_struct *tsk); -extern int  arch_uprobe_exception_notify(struct notifier_block *self, unsigned long val, void *data); -extern void arch_uprobe_abort_xol(struct arch_uprobe *aup, struct pt_regs *regs); -extern unsigned long arch_uretprobe_hijack_return_addr(unsigned long trampoline_vaddr, struct pt_regs *regs);  #endif	/* _ASM_UPROBES_H */ diff --git a/arch/x86/include/asm/uv/uv_hub.h b/arch/x86/include/asm/uv/uv_hub.h index 2c32df95bb7..c63e925fd6b 100644 --- a/arch/x86/include/asm/uv/uv_hub.h +++ b/arch/x86/include/asm/uv/uv_hub.h @@ -5,7 +5,7 @@   *   * SGI UV architectural definitions   * - * Copyright (C) 2007-2013 Silicon Graphics, Inc. All rights reserved. + * Copyright (C) 2007-2014 Silicon Graphics, Inc. All rights reserved.   */  #ifndef _ASM_X86_UV_UV_HUB_H @@ -204,16 +204,6 @@ static inline int is_uvx_hub(void)  	return uv_hub_info->hub_revision >= UV2_HUB_REVISION_BASE;  } -static inline int is_uv2_1_hub(void) -{ -	return uv_hub_info->hub_revision == UV2_HUB_REVISION_BASE; -} - -static inline int is_uv2_2_hub(void) -{ -	return uv_hub_info->hub_revision == UV2_HUB_REVISION_BASE + 1; -} -  union uvh_apicid {      unsigned long       v;      struct uvh_apicid_s { @@ -502,8 +492,8 @@ struct uv_blade_info {  	unsigned short	nr_online_cpus;  	unsigned short	pnode;  	short		memory_nid; -	spinlock_t	nmi_lock; -	unsigned long	nmi_count; +	spinlock_t	nmi_lock;	/* obsolete, see uv_hub_nmi */ +	unsigned long	nmi_count;	/* obsolete, see uv_hub_nmi */  };  extern struct uv_blade_info *uv_blade_info;  extern short *uv_node_to_blade; @@ -576,6 +566,59 @@ static inline int uv_num_possible_blades(void)  	return uv_possible_blades;  } +/* Per Hub NMI support */ +extern void uv_nmi_setup(void); + +/* BMC sets a bit this MMR non-zero before sending an NMI */ +#define UVH_NMI_MMR		UVH_SCRATCH5 +#define UVH_NMI_MMR_CLEAR	UVH_SCRATCH5_ALIAS +#define UVH_NMI_MMR_SHIFT	63 +#define	UVH_NMI_MMR_TYPE	"SCRATCH5" + +/* Newer SMM NMI handler, not present in all systems */ +#define UVH_NMI_MMRX		UVH_EVENT_OCCURRED0 +#define UVH_NMI_MMRX_CLEAR	UVH_EVENT_OCCURRED0_ALIAS +#define UVH_NMI_MMRX_SHIFT	(is_uv1_hub() ? \ +					UV1H_EVENT_OCCURRED0_EXTIO_INT0_SHFT :\ +					UVXH_EVENT_OCCURRED0_EXTIO_INT0_SHFT) +#define	UVH_NMI_MMRX_TYPE	"EXTIO_INT0" + +/* Non-zero indicates newer SMM NMI handler present */ +#define UVH_NMI_MMRX_SUPPORTED	UVH_EXTIO_INT0_BROADCAST + +/* Indicates to BIOS that we want to use the newer SMM NMI handler */ +#define UVH_NMI_MMRX_REQ	UVH_SCRATCH5_ALIAS_2 +#define UVH_NMI_MMRX_REQ_SHIFT	62 + +struct uv_hub_nmi_s { +	raw_spinlock_t	nmi_lock; +	atomic_t	in_nmi;		/* flag this node in UV NMI IRQ */ +	atomic_t	cpu_owner;	/* last locker of this struct */ +	atomic_t	read_mmr_count;	/* count of MMR reads */ +	atomic_t	nmi_count;	/* count of true UV NMIs */ +	unsigned long	nmi_value;	/* last value read from NMI MMR */ +}; + +struct uv_cpu_nmi_s { +	struct uv_hub_nmi_s	*hub; +	atomic_t		state; +	atomic_t		pinging; +	int			queries; +	int			pings; +}; + +DECLARE_PER_CPU(struct uv_cpu_nmi_s, __uv_cpu_nmi); +#define uv_cpu_nmi			(__get_cpu_var(__uv_cpu_nmi)) +#define uv_hub_nmi			(uv_cpu_nmi.hub) +#define uv_cpu_nmi_per(cpu)		(per_cpu(__uv_cpu_nmi, cpu)) +#define uv_hub_nmi_per(cpu)		(uv_cpu_nmi_per(cpu).hub) + +/* uv_cpu_nmi_states */ +#define	UV_NMI_STATE_OUT		0 +#define	UV_NMI_STATE_IN			1 +#define	UV_NMI_STATE_DUMP		2 +#define	UV_NMI_STATE_DUMP_DONE		3 +  /* Update SCIR state */  static inline void uv_set_scir_bits(unsigned char value)  { diff --git a/arch/x86/include/asm/uv/uv_mmrs.h b/arch/x86/include/asm/uv/uv_mmrs.h index bd5f80e58a2..ddd8db6b6e7 100644 --- a/arch/x86/include/asm/uv/uv_mmrs.h +++ b/arch/x86/include/asm/uv/uv_mmrs.h @@ -5,7 +5,7 @@   *   * SGI UV MMR definitions   * - * Copyright (C) 2007-2013 Silicon Graphics, Inc. All rights reserved. + * Copyright (C) 2007-2014 Silicon Graphics, Inc. All rights reserved.   */  #ifndef _ASM_X86_UV_UV_MMRS_H @@ -461,6 +461,23 @@ union uvh_event_occurred0_u {  /* ========================================================================= */ +/*                         UVH_EXTIO_INT0_BROADCAST                          */ +/* ========================================================================= */ +#define UVH_EXTIO_INT0_BROADCAST 0x61448UL +#define UVH_EXTIO_INT0_BROADCAST_32 0x3f0 + +#define UVH_EXTIO_INT0_BROADCAST_ENABLE_SHFT		0 +#define UVH_EXTIO_INT0_BROADCAST_ENABLE_MASK		0x0000000000000001UL + +union uvh_extio_int0_broadcast_u { +	unsigned long	v; +	struct uvh_extio_int0_broadcast_s { +		unsigned long	enable:1;			/* RW */ +		unsigned long	rsvd_1_63:63; +	} s; +}; + +/* ========================================================================= */  /*                         UVH_GR0_TLB_INT0_CONFIG                           */  /* ========================================================================= */  #define UVH_GR0_TLB_INT0_CONFIG 0x61b00UL @@ -2606,6 +2623,20 @@ union uvh_scratch5_u {  };  /* ========================================================================= */ +/*                            UVH_SCRATCH5_ALIAS                             */ +/* ========================================================================= */ +#define UVH_SCRATCH5_ALIAS 0x2d0208UL +#define UVH_SCRATCH5_ALIAS_32 0x780 + + +/* ========================================================================= */ +/*                           UVH_SCRATCH5_ALIAS_2                            */ +/* ========================================================================= */ +#define UVH_SCRATCH5_ALIAS_2 0x2d0210UL +#define UVH_SCRATCH5_ALIAS_2_32 0x788 + + +/* ========================================================================= */  /*                          UVXH_EVENT_OCCURRED2                             */  /* ========================================================================= */  #define UVXH_EVENT_OCCURRED2 0x70100UL @@ -2772,6 +2803,46 @@ union uv1h_lb_target_physical_apic_id_mask_u {  };  /* ========================================================================= */ +/*                          UV3H_GR0_GAM_GR_CONFIG                           */ +/* ========================================================================= */ +#define UV3H_GR0_GAM_GR_CONFIG				0xc00028UL + +#define UV3H_GR0_GAM_GR_CONFIG_M_SKT_SHFT		0 +#define UV3H_GR0_GAM_GR_CONFIG_SUBSPACE_SHFT		10 +#define UV3H_GR0_GAM_GR_CONFIG_M_SKT_MASK		0x000000000000003fUL +#define UV3H_GR0_GAM_GR_CONFIG_SUBSPACE_MASK		0x0000000000000400UL + +union uv3h_gr0_gam_gr_config_u { +	unsigned long	v; +	struct uv3h_gr0_gam_gr_config_s { +		unsigned long	m_skt:6;			/* RW */ +		unsigned long	undef_6_9:4;			/* Undefined */ +		unsigned long	subspace:1;			/* RW */ +		unsigned long	reserved:53; +	} s3; +}; + +/* ========================================================================= */ +/*                          UV3H_GR1_GAM_GR_CONFIG                           */ +/* ========================================================================= */ +#define UV3H_GR1_GAM_GR_CONFIG				0x1000028UL + +#define UV3H_GR1_GAM_GR_CONFIG_M_SKT_SHFT		0 +#define UV3H_GR1_GAM_GR_CONFIG_SUBSPACE_SHFT		10 +#define UV3H_GR1_GAM_GR_CONFIG_M_SKT_MASK		0x000000000000003fUL +#define UV3H_GR1_GAM_GR_CONFIG_SUBSPACE_MASK		0x0000000000000400UL + +union uv3h_gr1_gam_gr_config_u { +	unsigned long	v; +	struct uv3h_gr1_gam_gr_config_s { +		unsigned long	m_skt:6;			/* RW */ +		unsigned long	undef_6_9:4;			/* Undefined */ +		unsigned long	subspace:1;			/* RW */ +		unsigned long	reserved:53; +	} s3; +}; + +/* ========================================================================= */  /*                   UV3H_RH_GAM_MMIOH_OVERLAY_CONFIG0_MMR                   */  /* ========================================================================= */  #define UV3H_RH_GAM_MMIOH_OVERLAY_CONFIG0_MMR		0x1603000UL diff --git a/arch/x86/include/asm/vdso.h b/arch/x86/include/asm/vdso.h index fddb53d6391..30be253dd28 100644 --- a/arch/x86/include/asm/vdso.h +++ b/arch/x86/include/asm/vdso.h @@ -1,34 +1,54 @@  #ifndef _ASM_X86_VDSO_H  #define _ASM_X86_VDSO_H +#include <asm/page_types.h> +#include <linux/linkage.h> +#include <linux/init.h> + +#ifndef __ASSEMBLER__ + +#include <linux/mm_types.h> + +struct vdso_image { +	void *data; +	unsigned long size;   /* Always a multiple of PAGE_SIZE */ + +	/* text_mapping.pages is big enough for data/size page pointers */ +	struct vm_special_mapping text_mapping; + +	unsigned long alt, alt_len; + +	unsigned long sym_end_mapping;  /* Total size of the mapping */ + +	unsigned long sym_vvar_page; +	unsigned long sym_hpet_page; +	unsigned long sym_VDSO32_NOTE_MASK; +	unsigned long sym___kernel_sigreturn; +	unsigned long sym___kernel_rt_sigreturn; +	unsigned long sym___kernel_vsyscall; +	unsigned long sym_VDSO32_SYSENTER_RETURN; +}; + +#ifdef CONFIG_X86_64 +extern const struct vdso_image vdso_image_64; +#endif + +#ifdef CONFIG_X86_X32 +extern const struct vdso_image vdso_image_x32; +#endif +  #if defined CONFIG_X86_32 || defined CONFIG_COMPAT -extern const char VDSO32_PRELINK[]; - -/* - * Given a pointer to the vDSO image, find the pointer to VDSO32_name - * as that symbol is defined in the vDSO sources or linker script. - */ -#define VDSO32_SYMBOL(base, name)					\ -({									\ -	extern const char VDSO32_##name[];				\ -	(void __user *)(VDSO32_##name - VDSO32_PRELINK +		\ -			(unsigned long)(base));				\ -}) +extern const struct vdso_image vdso_image_32_int80; +#ifdef CONFIG_COMPAT +extern const struct vdso_image vdso_image_32_syscall;  #endif +extern const struct vdso_image vdso_image_32_sysenter; + +extern const struct vdso_image *selected_vdso32; +#endif + +extern void __init init_vdso_image(const struct vdso_image *image); -/* - * These symbols are defined with the addresses in the vsyscall page. - * See vsyscall-sigreturn.S. - */ -extern void __user __kernel_sigreturn; -extern void __user __kernel_rt_sigreturn; - -/* - * These symbols are defined by vdso32.S to mark the bounds - * of the ELF DSO images included therein. - */ -extern const char vdso32_int80_start, vdso32_int80_end; -extern const char vdso32_syscall_start, vdso32_syscall_end; -extern const char vdso32_sysenter_start, vdso32_sysenter_end; +#endif /* __ASSEMBLER__ */  #endif /* _ASM_X86_VDSO_H */ diff --git a/arch/x86/include/asm/vgtod.h b/arch/x86/include/asm/vgtod.h index 46e24d36b7d..3c3366c2e37 100644 --- a/arch/x86/include/asm/vgtod.h +++ b/arch/x86/include/asm/vgtod.h @@ -1,30 +1,73 @@  #ifndef _ASM_X86_VGTOD_H  #define _ASM_X86_VGTOD_H -#include <asm/vsyscall.h> +#include <linux/compiler.h>  #include <linux/clocksource.h> +#ifdef BUILD_VDSO32_64 +typedef u64 gtod_long_t; +#else +typedef unsigned long gtod_long_t; +#endif +/* + * vsyscall_gtod_data will be accessed by 32 and 64 bit code at the same time + * so be carefull by modifying this structure. + */  struct vsyscall_gtod_data { -	seqcount_t	seq; +	unsigned seq; -	struct { /* extract of a clocksource struct */ -		int vclock_mode; -		cycle_t	cycle_last; -		cycle_t	mask; -		u32	mult; -		u32	shift; -	} clock; +	int vclock_mode; +	cycle_t	cycle_last; +	cycle_t	mask; +	u32	mult; +	u32	shift;  	/* open coded 'struct timespec' */ -	time_t		wall_time_sec;  	u64		wall_time_snsec; +	gtod_long_t	wall_time_sec; +	gtod_long_t	monotonic_time_sec;  	u64		monotonic_time_snsec; -	time_t		monotonic_time_sec; +	gtod_long_t	wall_time_coarse_sec; +	gtod_long_t	wall_time_coarse_nsec; +	gtod_long_t	monotonic_time_coarse_sec; +	gtod_long_t	monotonic_time_coarse_nsec; -	struct timezone sys_tz; -	struct timespec wall_time_coarse; -	struct timespec monotonic_time_coarse; +	int		tz_minuteswest; +	int		tz_dsttime;  };  extern struct vsyscall_gtod_data vsyscall_gtod_data; +static inline unsigned gtod_read_begin(const struct vsyscall_gtod_data *s) +{ +	unsigned ret; + +repeat: +	ret = ACCESS_ONCE(s->seq); +	if (unlikely(ret & 1)) { +		cpu_relax(); +		goto repeat; +	} +	smp_rmb(); +	return ret; +} + +static inline int gtod_read_retry(const struct vsyscall_gtod_data *s, +					unsigned start) +{ +	smp_rmb(); +	return unlikely(s->seq != start); +} + +static inline void gtod_write_begin(struct vsyscall_gtod_data *s) +{ +	++s->seq; +	smp_wmb(); +} + +static inline void gtod_write_end(struct vsyscall_gtod_data *s) +{ +	smp_wmb(); +	++s->seq; +} +  #endif /* _ASM_X86_VGTOD_H */ diff --git a/arch/x86/include/asm/visws/cobalt.h b/arch/x86/include/asm/visws/cobalt.h deleted file mode 100644 index 2edb37637ea..00000000000 --- a/arch/x86/include/asm/visws/cobalt.h +++ /dev/null @@ -1,127 +0,0 @@ -#ifndef _ASM_X86_VISWS_COBALT_H -#define _ASM_X86_VISWS_COBALT_H - -#include <asm/fixmap.h> - -/* - * Cobalt SGI Visual Workstation system ASIC - */  - -#define CO_CPU_NUM_PHYS 0x1e00 -#define CO_CPU_TAB_PHYS (CO_CPU_NUM_PHYS + 2) - -#define CO_CPU_MAX 4 - -#define	CO_CPU_PHYS		0xc2000000 -#define	CO_APIC_PHYS		0xc4000000 - -/* see set_fixmap() and asm/fixmap.h */ -#define	CO_CPU_VADDR		(fix_to_virt(FIX_CO_CPU)) -#define	CO_APIC_VADDR		(fix_to_virt(FIX_CO_APIC)) - -/* Cobalt CPU registers -- relative to CO_CPU_VADDR, use co_cpu_*() */ -#define	CO_CPU_REV		0x08 -#define	CO_CPU_CTRL		0x10 -#define	CO_CPU_STAT		0x20 -#define	CO_CPU_TIMEVAL		0x30 - -/* CO_CPU_CTRL bits */ -#define	CO_CTRL_TIMERUN		0x04		/* 0 == disabled */ -#define	CO_CTRL_TIMEMASK	0x08		/* 0 == unmasked */ - -/* CO_CPU_STATUS bits */ -#define	CO_STAT_TIMEINTR	0x02	/* (r) 1 == int pend, (w) 0 == clear */ - -/* CO_CPU_TIMEVAL value */ -#define	CO_TIME_HZ		100000000	/* Cobalt core rate */ - -/* Cobalt APIC registers -- relative to CO_APIC_VADDR, use co_apic_*() */ -#define	CO_APIC_HI(n)		(((n) * 0x10) + 4) -#define	CO_APIC_LO(n)		((n) * 0x10) -#define	CO_APIC_ID		0x0ffc - -/* CO_APIC_ID bits */ -#define	CO_APIC_ENABLE		0x00000100 - -/* CO_APIC_LO bits */ -#define	CO_APIC_MASK		0x00010000	/* 0 = enabled */ -#define	CO_APIC_LEVEL		0x00008000	/* 0 = edge */ - -/* - * Where things are physically wired to Cobalt - * #defines with no board _<type>_<rev>_ are common to all (thus far) - */ -#define	CO_APIC_IDE0		4 -#define CO_APIC_IDE1		2		/* Only on 320 */ - -#define	CO_APIC_8259		12		/* serial, floppy, par-l-l */ - -/* Lithium PCI Bridge A -- "the one with 82557 Ethernet" */ -#define	CO_APIC_PCIA_BASE0	0 /* and 1 */	/* slot 0, line 0 */ -#define	CO_APIC_PCIA_BASE123	5 /* and 6 */	/* slot 0, line 1 */ - -#define	CO_APIC_PIIX4_USB	7		/* this one is weird */ - -/* Lithium PCI Bridge B -- "the one with PIIX4" */ -#define	CO_APIC_PCIB_BASE0	8 /* and 9-12 *//* slot 0, line 0 */ -#define	CO_APIC_PCIB_BASE123	13 /* 14.15 */	/* slot 0, line 1 */ - -#define	CO_APIC_VIDOUT0		16 -#define	CO_APIC_VIDOUT1		17 -#define	CO_APIC_VIDIN0		18 -#define	CO_APIC_VIDIN1		19 - -#define	CO_APIC_LI_AUDIO	22 - -#define	CO_APIC_AS		24 -#define	CO_APIC_RE		25 - -#define CO_APIC_CPU		28		/* Timer and Cache interrupt */ -#define	CO_APIC_NMI		29 -#define	CO_APIC_LAST		CO_APIC_NMI - -/* - * This is how irqs are assigned on the Visual Workstation. - * Legacy devices get irq's 1-15 (system clock is 0 and is CO_APIC_CPU). - * All other devices (including PCI) go to Cobalt and are irq's 16 on up. - */ -#define	CO_IRQ_APIC0	16			/* irq of apic entry 0 */ -#define	IS_CO_APIC(irq)	((irq) >= CO_IRQ_APIC0) -#define	CO_IRQ(apic)	(CO_IRQ_APIC0 + (apic))	/* apic ent to irq */ -#define	CO_APIC(irq)	((irq) - CO_IRQ_APIC0)	/* irq to apic ent */ -#define CO_IRQ_IDE0	14			/* knowledge of... */ -#define CO_IRQ_IDE1	15			/* ... ide driver defaults! */ -#define	CO_IRQ_8259	CO_IRQ(CO_APIC_8259) - -#ifdef CONFIG_X86_VISWS_APIC -static inline void co_cpu_write(unsigned long reg, unsigned long v) -{ -	*((volatile unsigned long *)(CO_CPU_VADDR+reg))=v; -} - -static inline unsigned long co_cpu_read(unsigned long reg) -{ -	return *((volatile unsigned long *)(CO_CPU_VADDR+reg)); -}             -              -static inline void co_apic_write(unsigned long reg, unsigned long v) -{ -	*((volatile unsigned long *)(CO_APIC_VADDR+reg))=v; -}             -              -static inline unsigned long co_apic_read(unsigned long reg) -{ -	return *((volatile unsigned long *)(CO_APIC_VADDR+reg)); -} -#endif - -extern char visws_board_type; - -#define	VISWS_320	0 -#define	VISWS_540	1 - -extern char visws_board_rev; - -extern int pci_visws_init(void); - -#endif /* _ASM_X86_VISWS_COBALT_H */ diff --git a/arch/x86/include/asm/visws/lithium.h b/arch/x86/include/asm/visws/lithium.h deleted file mode 100644 index a10d89bc127..00000000000 --- a/arch/x86/include/asm/visws/lithium.h +++ /dev/null @@ -1,53 +0,0 @@ -#ifndef _ASM_X86_VISWS_LITHIUM_H -#define _ASM_X86_VISWS_LITHIUM_H - -#include <asm/fixmap.h> - -/* - * Lithium is the SGI Visual Workstation I/O ASIC - */ - -#define	LI_PCI_A_PHYS		0xfc000000	/* Enet is dev 3 */ -#define	LI_PCI_B_PHYS		0xfd000000	/* PIIX4 is here */ - -/* see set_fixmap() and asm/fixmap.h */ -#define LI_PCIA_VADDR   (fix_to_virt(FIX_LI_PCIA)) -#define LI_PCIB_VADDR   (fix_to_virt(FIX_LI_PCIB)) - -/* Not a standard PCI? (not in linux/pci.h) */ -#define	LI_PCI_BUSNUM	0x44			/* lo8: primary, hi8: sub */ -#define LI_PCI_INTEN    0x46 - -/* LI_PCI_INTENT bits */ -#define	LI_INTA_0	0x0001 -#define	LI_INTA_1	0x0002 -#define	LI_INTA_2	0x0004 -#define	LI_INTA_3	0x0008 -#define	LI_INTA_4	0x0010 -#define	LI_INTB		0x0020 -#define	LI_INTC		0x0040 -#define	LI_INTD		0x0080 - -/* More special purpose macros... */ -static inline void li_pcia_write16(unsigned long reg, unsigned short v) -{ -	*((volatile unsigned short *)(LI_PCIA_VADDR+reg))=v; -} - -static inline unsigned short li_pcia_read16(unsigned long reg) -{ -	 return *((volatile unsigned short *)(LI_PCIA_VADDR+reg)); -} - -static inline void li_pcib_write16(unsigned long reg, unsigned short v) -{ -	*((volatile unsigned short *)(LI_PCIB_VADDR+reg))=v; -} - -static inline unsigned short li_pcib_read16(unsigned long reg) -{ -	return *((volatile unsigned short *)(LI_PCIB_VADDR+reg)); -} - -#endif /* _ASM_X86_VISWS_LITHIUM_H */ - diff --git a/arch/x86/include/asm/visws/piix4.h b/arch/x86/include/asm/visws/piix4.h deleted file mode 100644 index d0af4d338e7..00000000000 --- a/arch/x86/include/asm/visws/piix4.h +++ /dev/null @@ -1,107 +0,0 @@ -#ifndef _ASM_X86_VISWS_PIIX4_H -#define _ASM_X86_VISWS_PIIX4_H - -/* - * PIIX4 as used on SGI Visual Workstations - */ - -#define	PIIX_PM_START		0x0F80 - -#define	SIO_GPIO_START		0x0FC0 - -#define	SIO_PM_START		0x0FC8 - -#define	PMBASE			PIIX_PM_START -#define	GPIREG0			(PMBASE+0x30) -#define	GPIREG(x)		(GPIREG0+((x)/8)) -#define	GPIBIT(x)		(1 << ((x)%8)) - -#define	PIIX_GPI_BD_ID1		18 -#define	PIIX_GPI_BD_ID2		19 -#define	PIIX_GPI_BD_ID3		20 -#define	PIIX_GPI_BD_ID4		21 -#define	PIIX_GPI_BD_REG		GPIREG(PIIX_GPI_BD_ID1) -#define	PIIX_GPI_BD_MASK	(GPIBIT(PIIX_GPI_BD_ID1) | \ -				GPIBIT(PIIX_GPI_BD_ID2) | \ -				GPIBIT(PIIX_GPI_BD_ID3) | \ -				GPIBIT(PIIX_GPI_BD_ID4) ) - -#define	PIIX_GPI_BD_SHIFT	(PIIX_GPI_BD_ID1 % 8) - -#define	SIO_INDEX		0x2e -#define	SIO_DATA		0x2f - -#define	SIO_DEV_SEL		0x7 -#define	SIO_DEV_ENB		0x30 -#define	SIO_DEV_MSB		0x60 -#define	SIO_DEV_LSB		0x61 - -#define	SIO_GP_DEV		0x7 - -#define	SIO_GP_BASE		SIO_GPIO_START -#define	SIO_GP_MSB		(SIO_GP_BASE>>8) -#define	SIO_GP_LSB		(SIO_GP_BASE&0xff) - -#define	SIO_GP_DATA1		(SIO_GP_BASE+0) - -#define	SIO_PM_DEV		0x8 - -#define	SIO_PM_BASE		SIO_PM_START -#define	SIO_PM_MSB		(SIO_PM_BASE>>8) -#define	SIO_PM_LSB		(SIO_PM_BASE&0xff) -#define	SIO_PM_INDEX		(SIO_PM_BASE+0) -#define	SIO_PM_DATA		(SIO_PM_BASE+1) - -#define	SIO_PM_FER2		0x1 - -#define	SIO_PM_GP_EN		0x80 - - - -/* - * This is the dev/reg where generating a config cycle will - * result in a PCI special cycle. - */ -#define SPECIAL_DEV		0xff -#define SPECIAL_REG		0x00 - -/* - * PIIX4 needs to see a special cycle with the following data - * to be convinced the processor has gone into the stop grant - * state.  PIIX4 insists on seeing this before it will power - * down a system. - */ -#define PIIX_SPECIAL_STOP		0x00120002 - -#define PIIX4_RESET_PORT	0xcf9 -#define PIIX4_RESET_VAL		0x6 - -#define PMSTS_PORT		0xf80	// 2 bytes	PM Status -#define PMEN_PORT		0xf82	// 2 bytes	PM Enable -#define	PMCNTRL_PORT		0xf84	// 2 bytes	PM Control - -#define PM_SUSPEND_ENABLE	0x2000	// start sequence to suspend state - -/* - * PMSTS and PMEN I/O bit definitions. - * (Bits are the same in both registers) - */ -#define PM_STS_RSM		(1<<15)	// Resume Status -#define PM_STS_PWRBTNOR		(1<<11)	// Power Button Override -#define PM_STS_RTC		(1<<10)	// RTC status -#define PM_STS_PWRBTN		(1<<8)	// Power Button Pressed? -#define PM_STS_GBL		(1<<5)	// Global Status -#define PM_STS_BM		(1<<4)	// Bus Master Status -#define PM_STS_TMROF		(1<<0)	// Timer Overflow Status. - -/* - * Stop clock GPI register - */ -#define PIIX_GPIREG0			(0xf80 + 0x30) - -/* - * Stop clock GPI bit in GPIREG0 - */ -#define	PIIX_GPI_STPCLK		0x4	// STPCLK signal routed back in - -#endif /* _ASM_X86_VISWS_PIIX4_H */ diff --git a/arch/x86/include/asm/visws/sgivw.h b/arch/x86/include/asm/visws/sgivw.h deleted file mode 100644 index 5fbf63e1003..00000000000 --- a/arch/x86/include/asm/visws/sgivw.h +++ /dev/null @@ -1,5 +0,0 @@ -/* - * Frame buffer position and size: - */ -extern unsigned long sgivwfb_mem_phys; -extern unsigned long sgivwfb_mem_size; diff --git a/arch/x86/include/asm/vmx.h b/arch/x86/include/asm/vmx.h index 966502d4682..7004d21e621 100644 --- a/arch/x86/include/asm/vmx.h +++ b/arch/x86/include/asm/vmx.h @@ -85,6 +85,7 @@  #define VM_EXIT_SAVE_IA32_EFER                  0x00100000  #define VM_EXIT_LOAD_IA32_EFER                  0x00200000  #define VM_EXIT_SAVE_VMX_PREEMPTION_TIMER       0x00400000 +#define VM_EXIT_CLEAR_BNDCFGS                   0x00800000  #define VM_EXIT_ALWAYSON_WITHOUT_TRUE_MSR	0x00036dff @@ -95,11 +96,13 @@  #define VM_ENTRY_LOAD_IA32_PERF_GLOBAL_CTRL     0x00002000  #define VM_ENTRY_LOAD_IA32_PAT			0x00004000  #define VM_ENTRY_LOAD_IA32_EFER                 0x00008000 +#define VM_ENTRY_LOAD_BNDCFGS                   0x00010000  #define VM_ENTRY_ALWAYSON_WITHOUT_TRUE_MSR	0x000011ff  #define VMX_MISC_PREEMPTION_TIMER_RATE_MASK	0x0000001f  #define VMX_MISC_SAVE_EFER_LMA			0x00000020 +#define VMX_MISC_ACTIVITY_HLT			0x00000040  /* VMCS Encodings */  enum vmcs_field { @@ -173,6 +176,8 @@ enum vmcs_field {  	GUEST_PDPTR2_HIGH               = 0x0000280f,  	GUEST_PDPTR3                    = 0x00002810,  	GUEST_PDPTR3_HIGH               = 0x00002811, +	GUEST_BNDCFGS                   = 0x00002812, +	GUEST_BNDCFGS_HIGH              = 0x00002813,  	HOST_IA32_PAT			= 0x00002c00,  	HOST_IA32_PAT_HIGH		= 0x00002c01,  	HOST_IA32_EFER			= 0x00002c02, diff --git a/arch/x86/include/asm/vvar.h b/arch/x86/include/asm/vvar.h index d76ac40da20..5d2b9ad2c6d 100644 --- a/arch/x86/include/asm/vvar.h +++ b/arch/x86/include/asm/vvar.h @@ -16,8 +16,8 @@   * you mess up, the linker will catch it.)   */ -/* Base address of vvars.  This is not ABI. */ -#define VVAR_ADDRESS (-10*1024*1024 - 4096) +#ifndef _ASM_X86_VVAR_H +#define _ASM_X86_VVAR_H  #if defined(__VVAR_KERNEL_LDS) @@ -29,16 +29,17 @@  #else +extern char __vvar_page; +  #define DECLARE_VVAR(offset, type, name)				\ -	static type const * const vvaraddr_ ## name =			\ -		(void *)(VVAR_ADDRESS + (offset)); +	extern type vvar_ ## name __attribute__((visibility("hidden"))); + +#define VVAR(name) (vvar_ ## name)  #define DEFINE_VVAR(type, name)						\  	type name							\  	__attribute__((section(".vvar_" #name), aligned(16))) __visible -#define VVAR(name) (*vvaraddr_ ## name) -  #endif  /* DECLARE_VVAR(offset, type, name) */ @@ -48,3 +49,5 @@ DECLARE_VVAR(16, int, vgetcpu_mode)  DECLARE_VVAR(128, struct vsyscall_gtod_data, vsyscall_gtod_data)  #undef DECLARE_VVAR + +#endif diff --git a/arch/x86/include/asm/x86_init.h b/arch/x86/include/asm/x86_init.h index 828a1565ba5..e45e4da96bf 100644 --- a/arch/x86/include/asm/x86_init.h +++ b/arch/x86/include/asm/x86_init.h @@ -172,6 +172,7 @@ struct x86_platform_ops {  struct pci_dev;  struct msi_msg; +struct msi_desc;  struct x86_msi_ops {  	int (*setup_msi_irqs)(struct pci_dev *dev, int nvec, int type); @@ -180,8 +181,10 @@ struct x86_msi_ops {  			       u8 hpet_id);  	void (*teardown_msi_irq)(unsigned int irq);  	void (*teardown_msi_irqs)(struct pci_dev *dev); -	void (*restore_msi_irqs)(struct pci_dev *dev, int irq); +	void (*restore_msi_irqs)(struct pci_dev *dev);  	int  (*setup_hpet_msi)(unsigned int irq, unsigned int id); +	u32 (*msi_mask_irq)(struct msi_desc *desc, u32 mask, u32 flag); +	u32 (*msix_mask_irq)(struct msi_desc *desc, u32 flag);  };  struct IO_APIC_route_entry; diff --git a/arch/x86/include/asm/xen/hypercall.h b/arch/x86/include/asm/xen/hypercall.h index e709884d0ef..ca08a27b90b 100644 --- a/arch/x86/include/asm/xen/hypercall.h +++ b/arch/x86/include/asm/xen/hypercall.h @@ -343,7 +343,7 @@ HYPERVISOR_memory_op(unsigned int cmd, void *arg)  }  static inline int -HYPERVISOR_multicall(void *call_list, int nr_calls) +HYPERVISOR_multicall(void *call_list, uint32_t nr_calls)  {  	return _hypercall2(int, multicall, call_list, nr_calls);  } diff --git a/arch/x86/include/asm/xen/interface.h b/arch/x86/include/asm/xen/interface.h index fd9cb7695b5..3400dbaec3c 100644 --- a/arch/x86/include/asm/xen/interface.h +++ b/arch/x86/include/asm/xen/interface.h @@ -54,6 +54,9 @@ typedef unsigned long xen_pfn_t;  #define PRI_xen_pfn "lx"  typedef unsigned long xen_ulong_t;  #define PRI_xen_ulong "lx" +typedef long xen_long_t; +#define PRI_xen_long "lx" +  /* Guest handles for primitive C types. */  __DEFINE_GUEST_HANDLE(uchar, unsigned char);  __DEFINE_GUEST_HANDLE(uint,  unsigned int); diff --git a/arch/x86/include/asm/xen/page-coherent.h b/arch/x86/include/asm/xen/page-coherent.h new file mode 100644 index 00000000000..7f02fe4e2c7 --- /dev/null +++ b/arch/x86/include/asm/xen/page-coherent.h @@ -0,0 +1,38 @@ +#ifndef _ASM_X86_XEN_PAGE_COHERENT_H +#define _ASM_X86_XEN_PAGE_COHERENT_H + +#include <asm/page.h> +#include <linux/dma-attrs.h> +#include <linux/dma-mapping.h> + +static inline void *xen_alloc_coherent_pages(struct device *hwdev, size_t size, +		dma_addr_t *dma_handle, gfp_t flags, +		struct dma_attrs *attrs) +{ +	void *vstart = (void*)__get_free_pages(flags, get_order(size)); +	*dma_handle = virt_to_phys(vstart); +	return vstart; +} + +static inline void xen_free_coherent_pages(struct device *hwdev, size_t size, +		void *cpu_addr, dma_addr_t dma_handle, +		struct dma_attrs *attrs) +{ +	free_pages((unsigned long) cpu_addr, get_order(size)); +} + +static inline void xen_dma_map_page(struct device *hwdev, struct page *page, +	     unsigned long offset, size_t size, enum dma_data_direction dir, +	     struct dma_attrs *attrs) { } + +static inline void xen_dma_unmap_page(struct device *hwdev, dma_addr_t handle, +		size_t size, enum dma_data_direction dir, +		struct dma_attrs *attrs) { } + +static inline void xen_dma_sync_single_for_cpu(struct device *hwdev, +		dma_addr_t handle, size_t size, enum dma_data_direction dir) { } + +static inline void xen_dma_sync_single_for_device(struct device *hwdev, +		dma_addr_t handle, size_t size, enum dma_data_direction dir) { } + +#endif /* _ASM_X86_XEN_PAGE_COHERENT_H */ diff --git a/arch/x86/include/asm/xen/page.h b/arch/x86/include/asm/xen/page.h index 6aef9fbc09b..c949923a566 100644 --- a/arch/x86/include/asm/xen/page.h +++ b/arch/x86/include/asm/xen/page.h @@ -49,10 +49,17 @@ extern bool __set_phys_to_machine(unsigned long pfn, unsigned long mfn);  extern unsigned long set_phys_range_identity(unsigned long pfn_s,  					     unsigned long pfn_e); +extern int set_foreign_p2m_mapping(struct gnttab_map_grant_ref *map_ops, +				   struct gnttab_map_grant_ref *kmap_ops, +				   struct page **pages, unsigned int count);  extern int m2p_add_override(unsigned long mfn, struct page *page,  			    struct gnttab_map_grant_ref *kmap_op); +extern int clear_foreign_p2m_mapping(struct gnttab_unmap_grant_ref *unmap_ops, +				     struct gnttab_map_grant_ref *kmap_ops, +				     struct page **pages, unsigned int count);  extern int m2p_remove_override(struct page *page, -				struct gnttab_map_grant_ref *kmap_op); +			       struct gnttab_map_grant_ref *kmap_op, +			       unsigned long mfn);  extern struct page *m2p_find_override(unsigned long mfn);  extern unsigned long m2p_find_override_pfn(unsigned long mfn, unsigned long pfn); @@ -79,30 +86,38 @@ static inline int phys_to_machine_mapping_valid(unsigned long pfn)  	return get_phys_to_machine(pfn) != INVALID_P2M_ENTRY;  } -static inline unsigned long mfn_to_pfn(unsigned long mfn) +static inline unsigned long mfn_to_pfn_no_overrides(unsigned long mfn)  {  	unsigned long pfn; -	int ret = 0; +	int ret;  	if (xen_feature(XENFEAT_auto_translated_physmap))  		return mfn; -	if (unlikely(mfn >= machine_to_phys_nr)) { -		pfn = ~0; -		goto try_override; -	} -	pfn = 0; +	if (unlikely(mfn >= machine_to_phys_nr)) +		return ~0; +  	/*  	 * The array access can fail (e.g., device space beyond end of RAM).  	 * In such cases it doesn't matter what we return (we return garbage),  	 * but we must handle the fault without crashing!  	 */  	ret = __get_user(pfn, &machine_to_phys_mapping[mfn]); -try_override: -	/* ret might be < 0 if there are no entries in the m2p for mfn */  	if (ret < 0) -		pfn = ~0; -	else if (get_phys_to_machine(pfn) != mfn) +		return ~0; + +	return pfn; +} + +static inline unsigned long mfn_to_pfn(unsigned long mfn) +{ +	unsigned long pfn; + +	if (xen_feature(XENFEAT_auto_translated_physmap)) +		return mfn; + +	pfn = mfn_to_pfn_no_overrides(mfn); +	if (get_phys_to_machine(pfn) != mfn) {  		/*  		 * If this appears to be a foreign mfn (because the pfn  		 * doesn't map back to the mfn), then check the local override @@ -111,8 +126,9 @@ try_override:  		 * m2p_find_override_pfn returns ~0 if it doesn't find anything.  		 */  		pfn = m2p_find_override_pfn(mfn, ~0); +	} -	/*  +	/*  	 * pfn is ~0 if there are no entries in the m2p for mfn or if the  	 * entry doesn't map back to the mfn and m2p_override doesn't have a  	 * valid entry for it. @@ -158,7 +174,12 @@ static inline xpaddr_t machine_to_phys(xmaddr_t machine)   */  static inline unsigned long mfn_to_local_pfn(unsigned long mfn)  { -	unsigned long pfn = mfn_to_pfn(mfn); +	unsigned long pfn; + +	if (xen_feature(XENFEAT_auto_translated_physmap)) +		return mfn; + +	pfn = mfn_to_pfn(mfn);  	if (get_phys_to_machine(pfn) != mfn)  		return -1; /* force !pfn_valid() */  	return pfn; @@ -213,5 +234,6 @@ void make_lowmem_page_readonly(void *vaddr);  void make_lowmem_page_readwrite(void *vaddr);  #define xen_remap(cookie, size) ioremap((cookie), (size)); +#define xen_unmap(cookie) iounmap((cookie))  #endif /* _ASM_X86_XEN_PAGE_H */ diff --git a/arch/x86/include/asm/xsave.h b/arch/x86/include/asm/xsave.h index 0415cdabb5a..d949ef28c48 100644 --- a/arch/x86/include/asm/xsave.h +++ b/arch/x86/include/asm/xsave.h @@ -6,11 +6,18 @@  #define XSTATE_CPUID		0x0000000d -#define XSTATE_FP	0x1 -#define XSTATE_SSE	0x2 -#define XSTATE_YMM	0x4 +#define XSTATE_FP		0x1 +#define XSTATE_SSE		0x2 +#define XSTATE_YMM		0x4 +#define XSTATE_BNDREGS		0x8 +#define XSTATE_BNDCSR		0x10 +#define XSTATE_OPMASK		0x20 +#define XSTATE_ZMM_Hi256	0x40 +#define XSTATE_Hi16_ZMM		0x80  #define XSTATE_FPSSE	(XSTATE_FP | XSTATE_SSE) +/* Bit 63 of XCR0 is reserved for future expansion */ +#define XSTATE_EXTEND_MASK	(~(XSTATE_FPSSE | (1ULL << 63)))  #define FXSAVE_SIZE	512 @@ -20,10 +27,15 @@  #define XSAVE_YMM_SIZE	    256  #define XSAVE_YMM_OFFSET    (XSAVE_HDR_SIZE + XSAVE_HDR_OFFSET) -/* - * These are the features that the OS can handle currently. - */ -#define XCNTXT_MASK	(XSTATE_FP | XSTATE_SSE | XSTATE_YMM) +/* Supported features which support lazy state saving */ +#define XSTATE_LAZY	(XSTATE_FP | XSTATE_SSE | XSTATE_YMM		      \ +			| XSTATE_OPMASK | XSTATE_ZMM_Hi256 | XSTATE_Hi16_ZMM) + +/* Supported features which require eager state saving */ +#define XSTATE_EAGER	(XSTATE_BNDREGS | XSTATE_BNDCSR) + +/* All currently supported features */ +#define XCNTXT_MASK	(XSTATE_LAZY | XSTATE_EAGER)  #ifdef CONFIG_X86_64  #define REX_PREFIX	"0x48, "  | 
