From c1f64a58003fd2efaa725a857e269a15f765791a Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Tue, 27 May 2008 09:47:13 -0700 Subject: x86: MMIO and gcc re-ordering issue On Tue, 27 May 2008, Linus Torvalds wrote: > > Expecting people to fix up all drivers is simply not going to happen. And > serializing things shouldn't be *that* expensive. People who cannot take > the expense can continue to use the magic __raw_writel() etc stuff. Of course, for non-x86, you kind of have to expect drivers to be well-behaved, so non-x86 can probably avoid this simply because there are less relevant drivers involved. Here's a UNTESTED patch for x86 that may or may not compile and work, and which serializes (on a compiler level) the IO accesses against regular memory accesses. __read[bwlq]()/__write[bwlq]() are not serialized with a :"memory" barrier, although since they still use "asm volatile" I suspect that i practice they are probably serial too. Did not look very closely at any generated code (only did a trivial test to see that the code looks *roughly* correct). Signed-off-by: Ingo Molnar --- include/asm-x86/io.h | 56 ++++++++++++++++++++++++++++++++++++++ include/asm-x86/io_32.h | 49 ---------------------------------- include/asm-x86/io_64.h | 71 ------------------------------------------------- 3 files changed, 56 insertions(+), 120 deletions(-) diff --git a/include/asm-x86/io.h b/include/asm-x86/io.h index d5b11f60dbd..8e9eca93f9b 100644 --- a/include/asm-x86/io.h +++ b/include/asm-x86/io.h @@ -3,6 +3,62 @@ #define ARCH_HAS_IOREMAP_WC +#include + +#define build_mmio_read(name, size, type, reg, barrier) \ +static inline type name(const volatile void __iomem *addr) \ +{ type ret; asm volatile("mov" size " %1,%0":"=" reg (ret) \ +:"m" (*(volatile type __force *)addr) barrier); return ret; } + +#define build_mmio_write(name, size, type, reg, barrier) \ +static inline void name(type val, volatile void __iomem *addr) \ +{ asm volatile("mov" size " %0,%1": :reg (val), \ +"m" (*(volatile type __force *)addr) barrier); } + +build_mmio_read(readb, "b", unsigned char, "q", :"memory") +build_mmio_read(readw, "w", unsigned short, "r", :"memory") +build_mmio_read(readl, "l", unsigned int, "r", :"memory") + +build_mmio_read(__readb, "b", unsigned char, "q", ) +build_mmio_read(__readw, "w", unsigned short, "r", ) +build_mmio_read(__readl, "l", unsigned int, "r", ) + +build_mmio_write(writeb, "b", unsigned char, "q", :"memory") +build_mmio_write(writew, "w", unsigned short, "r", :"memory") +build_mmio_write(writel, "l", unsigned int, "r", :"memory") + +build_mmio_write(__writeb, "b", unsigned char, "q", ) +build_mmio_write(__writew, "w", unsigned short, "r", ) +build_mmio_write(__writel, "l", unsigned int, "r", ) + +#define readb_relaxed(a) __readb(a) +#define readw_relaxed(a) __readw(a) +#define readl_relaxed(a) __readl(a) +#define __raw_readb __readb +#define __raw_readw __readw +#define __raw_readl __readl + +#define __raw_writeb __writeb +#define __raw_writew __writew +#define __raw_writel __writel + +#define mmiowb() barrier() + +#ifdef CONFIG_X86_64 +build_mmio_read(readq, "q", unsigned long, "r", :"memory") +build_mmio_read(__readq, "q", unsigned long, "r", ) +build_mmio_write(writeq, "q", unsigned long, "r", :"memory") +build_mmio_write(__writeq, "q", unsigned long, "r", ) + +#define readq_relaxed(a) __readq(a) +#define __raw_readq __readq +#define __raw_writeq writeq + +/* Let people know we have them */ +#define readq readq +#define writeq writeq +#endif + #ifdef CONFIG_X86_32 # include "io_32.h" #else diff --git a/include/asm-x86/io_32.h b/include/asm-x86/io_32.h index 049e81e797a..d71be8df979 100644 --- a/include/asm-x86/io_32.h +++ b/include/asm-x86/io_32.h @@ -149,55 +149,6 @@ extern void __iomem *fix_ioremap(unsigned idx, unsigned long phys); #define virt_to_bus virt_to_phys #define bus_to_virt phys_to_virt -/* - * readX/writeX() are used to access memory mapped devices. On some - * architectures the memory mapped IO stuff needs to be accessed - * differently. On the x86 architecture, we just read/write the - * memory location directly. - */ - -static inline unsigned char readb(const volatile void __iomem *addr) -{ - return *(volatile unsigned char __force *)addr; -} - -static inline unsigned short readw(const volatile void __iomem *addr) -{ - return *(volatile unsigned short __force *)addr; -} - -static inline unsigned int readl(const volatile void __iomem *addr) -{ - return *(volatile unsigned int __force *) addr; -} - -#define readb_relaxed(addr) readb(addr) -#define readw_relaxed(addr) readw(addr) -#define readl_relaxed(addr) readl(addr) -#define __raw_readb readb -#define __raw_readw readw -#define __raw_readl readl - -static inline void writeb(unsigned char b, volatile void __iomem *addr) -{ - *(volatile unsigned char __force *)addr = b; -} - -static inline void writew(unsigned short b, volatile void __iomem *addr) -{ - *(volatile unsigned short __force *)addr = b; -} - -static inline void writel(unsigned int b, volatile void __iomem *addr) -{ - *(volatile unsigned int __force *)addr = b; -} -#define __raw_writeb writeb -#define __raw_writew writew -#define __raw_writel writel - -#define mmiowb() - static inline void memset_io(volatile void __iomem *addr, unsigned char val, int count) { diff --git a/include/asm-x86/io_64.h b/include/asm-x86/io_64.h index 0930bedf9e4..ddd8058a502 100644 --- a/include/asm-x86/io_64.h +++ b/include/asm-x86/io_64.h @@ -204,77 +204,6 @@ extern void __iomem *fix_ioremap(unsigned idx, unsigned long phys); #define virt_to_bus virt_to_phys #define bus_to_virt phys_to_virt -/* - * readX/writeX() are used to access memory mapped devices. On some - * architectures the memory mapped IO stuff needs to be accessed - * differently. On the x86 architecture, we just read/write the - * memory location directly. - */ - -static inline __u8 __readb(const volatile void __iomem *addr) -{ - return *(__force volatile __u8 *)addr; -} - -static inline __u16 __readw(const volatile void __iomem *addr) -{ - return *(__force volatile __u16 *)addr; -} - -static __always_inline __u32 __readl(const volatile void __iomem *addr) -{ - return *(__force volatile __u32 *)addr; -} - -static inline __u64 __readq(const volatile void __iomem *addr) -{ - return *(__force volatile __u64 *)addr; -} - -#define readb(x) __readb(x) -#define readw(x) __readw(x) -#define readl(x) __readl(x) -#define readq(x) __readq(x) -#define readb_relaxed(a) readb(a) -#define readw_relaxed(a) readw(a) -#define readl_relaxed(a) readl(a) -#define readq_relaxed(a) readq(a) -#define __raw_readb readb -#define __raw_readw readw -#define __raw_readl readl -#define __raw_readq readq - -#define mmiowb() - -static inline void __writel(__u32 b, volatile void __iomem *addr) -{ - *(__force volatile __u32 *)addr = b; -} - -static inline void __writeq(__u64 b, volatile void __iomem *addr) -{ - *(__force volatile __u64 *)addr = b; -} - -static inline void __writeb(__u8 b, volatile void __iomem *addr) -{ - *(__force volatile __u8 *)addr = b; -} - -static inline void __writew(__u16 b, volatile void __iomem *addr) -{ - *(__force volatile __u16 *)addr = b; -} - -#define writeq(val, addr) __writeq((val), (addr)) -#define writel(val, addr) __writel((val), (addr)) -#define writew(val, addr) __writew((val), (addr)) -#define writeb(val, addr) __writeb((val), (addr)) -#define __raw_writeb writeb -#define __raw_writew writew -#define __raw_writel writel -#define __raw_writeq writeq - void __memcpy_fromio(void *, unsigned long, unsigned); void __memcpy_toio(unsigned long, const void *, unsigned); -- cgit v1.2.3-18-g5258 From e01b70ef3eb3080fecc35e15f68cd274c0a48163 Mon Sep 17 00:00:00 2001 From: Jiri Hladky Date: Mon, 2 Jun 2008 12:00:19 +0200 Subject: x86: fix bug in arch/i386/lib/delay.c file, delay_loop function when trying to understand how Bogomips are implemented I have found a bug in arch/i386/lib/delay.c file, delay_loop function. The function fails for loops > 2^31+1. It because SF is set when dec returns numbers > 2^31. The fix is to use jnz instruction instead of jns (and add one decl instruction to the end to have exactly the same number of loops as in original version). Martin Mares observed: > It is a long time since I have hacked that file, but you should definitely > make sure that the function is never called with a zero argument. In such > case, the original version made just a single pass, but your version > makes 2^32 of them. fixed that. Signed-off-by: Ingo Molnar --- arch/x86/lib/delay_32.c | 25 ++++++++++++++++--------- 1 file changed, 16 insertions(+), 9 deletions(-) diff --git a/arch/x86/lib/delay_32.c b/arch/x86/lib/delay_32.c index d710f2d167b..ef691316f8b 100644 --- a/arch/x86/lib/delay_32.c +++ b/arch/x86/lib/delay_32.c @@ -3,6 +3,7 @@ * * Copyright (C) 1993 Linus Torvalds * Copyright (C) 1997 Martin Mares + * Copyright (C) 2008 Jiri Hladky * * The __delay function must _NOT_ be inlined as its execution time * depends wildly on alignment on many x86 processors. The additional @@ -28,16 +29,22 @@ /* simple loop based delay: */ static void delay_loop(unsigned long loops) { - int d0; - __asm__ __volatile__( - "\tjmp 1f\n" - ".align 16\n" - "1:\tjmp 2f\n" - ".align 16\n" - "2:\tdecl %0\n\tjns 2b" - :"=&a" (d0) - :"0" (loops)); + " test %0,%0 \n" + " jz 3f \n" + " jmp 1f \n" + + ".align 16 \n" + "1: jmp 2f \n" + + ".align 16 \n" + "2: decl %0 \n" + " jnz 2b \n" + "3: decl %0 \n" + + : /* we don't need output */ + :"a" (loops) + ); } /* TSC based delay: */ -- cgit v1.2.3-18-g5258 From 3da757daf86e498872855f0b5e101f763ba79499 Mon Sep 17 00:00:00 2001 From: Alok Kataria Date: Fri, 20 Jun 2008 15:06:33 -0700 Subject: x86: use cpu_khz for loops_per_jiffy calculation On the x86 platform we can use the value of tsc_khz computed during tsc calibration to calculate the loops_per_jiffy value. Its very important to keep the error in lpj values to minimum as any error in that may result in kernel panic in check_timer. In virtualization environment, On a highly overloaded host the guest delay calibration may sometimes result in errors beyond the ~50% that timer_irq_works can handle, resulting in the guest panicking. Does some formating changes to lpj_setup code to now have a single printk to print the bogomips value. We do this only for the boot processor because the AP's can have different base frequencies or the BIOS might boot a AP at a different frequency. Signed-off-by: Alok N Kataria Cc: Arjan van de Ven Cc: Daniel Hecht Cc: Tim Mann Cc: Zach Amsden Cc: Sahil Rihan Signed-off-by: Ingo Molnar --- arch/x86/kernel/time_64.c | 2 ++ arch/x86/kernel/tsc_32.c | 5 +++++ include/linux/delay.h | 1 + init/calibrate.c | 36 +++++++++++++++++++----------------- 4 files changed, 27 insertions(+), 17 deletions(-) diff --git a/arch/x86/kernel/time_64.c b/arch/x86/kernel/time_64.c index c737849e2ef..12b4a71bd07 100644 --- a/arch/x86/kernel/time_64.c +++ b/arch/x86/kernel/time_64.c @@ -123,6 +123,8 @@ void __init time_init(void) (boot_cpu_data.x86_vendor == X86_VENDOR_AMD)) cpu_khz = calculate_cpu_khz(); + lpj_tsc = ((unsigned long)tsc_khz * 1000)/HZ; + if (unsynchronized_tsc()) mark_tsc_unstable("TSCs unsynchronized"); diff --git a/arch/x86/kernel/tsc_32.c b/arch/x86/kernel/tsc_32.c index 068759db63d..be729035b30 100644 --- a/arch/x86/kernel/tsc_32.c +++ b/arch/x86/kernel/tsc_32.c @@ -401,6 +401,7 @@ static inline void check_geode_tsc_reliable(void) { } void __init tsc_init(void) { int cpu; + u64 lpj; if (!cpu_has_tsc || tsc_disabled) { /* Disable the TSC in case of !cpu_has_tsc */ @@ -421,6 +422,10 @@ void __init tsc_init(void) return; } + lpj = ((u64)tsc_khz * 1000); + do_div(lpj, HZ); + lpj_tsc = lpj; + printk("Detected %lu.%03lu MHz processor.\n", (unsigned long)cpu_khz / 1000, (unsigned long)cpu_khz % 1000); diff --git a/include/linux/delay.h b/include/linux/delay.h index 54552d21296..01aec60590a 100644 --- a/include/linux/delay.h +++ b/include/linux/delay.h @@ -41,6 +41,7 @@ static inline void ndelay(unsigned long x) #define ndelay(x) ndelay(x) #endif +extern unsigned long lpj_tsc; void calibrate_delay(void); void msleep(unsigned int msecs); unsigned long msleep_interruptible(unsigned int msecs); diff --git a/init/calibrate.c b/init/calibrate.c index ecb3822d4f7..86286974dad 100644 --- a/init/calibrate.c +++ b/init/calibrate.c @@ -8,7 +8,9 @@ #include #include #include +#include +unsigned long lpj_tsc; unsigned long preset_lpj; static int __init lpj_setup(char *str) { @@ -108,6 +110,10 @@ static unsigned long __cpuinit calibrate_delay_direct(void) {return 0;} * This is the number of bits of precision for the loops_per_jiffy. Each * bit takes on average 1.5/HZ seconds. This (like the original) is a little * better than 1% + * For the boot cpu we can skip the delay calibration and assign it a value + * calculated based on the tsc frequency. + * For the rest of the CPUs we cannot assume that the tsc frequency is same as + * the cpu frequency, hence do the calibration for those. */ #define LPS_PREC 8 @@ -118,20 +124,20 @@ void __cpuinit calibrate_delay(void) if (preset_lpj) { loops_per_jiffy = preset_lpj; - printk("Calibrating delay loop (skipped)... " - "%lu.%02lu BogoMIPS preset\n", - loops_per_jiffy/(500000/HZ), - (loops_per_jiffy/(5000/HZ)) % 100); + printk(KERN_INFO + "Calibrating delay loop (skipped) preset value.. "); + } else if ((smp_processor_id() == 0) && lpj_tsc) { + loops_per_jiffy = lpj_tsc; + printk(KERN_INFO + "Calibrating delay loop (skipped), " + "using tsc calculated value.. "); } else if ((loops_per_jiffy = calibrate_delay_direct()) != 0) { - printk("Calibrating delay using timer specific routine.. "); - printk("%lu.%02lu BogoMIPS (lpj=%lu)\n", - loops_per_jiffy/(500000/HZ), - (loops_per_jiffy/(5000/HZ)) % 100, - loops_per_jiffy); + printk(KERN_INFO + "Calibrating delay using timer specific routine.. "); } else { loops_per_jiffy = (1<<12); - printk(KERN_DEBUG "Calibrating delay loop... "); + printk(KERN_INFO "Calibrating delay loop... "); while ((loops_per_jiffy <<= 1) != 0) { /* wait for "start of" clock tick */ ticks = jiffies; @@ -161,12 +167,8 @@ void __cpuinit calibrate_delay(void) if (jiffies != ticks) /* longer than 1 tick */ loops_per_jiffy &= ~loopbit; } - - /* Round the value and print it */ - printk("%lu.%02lu BogoMIPS (lpj=%lu)\n", - loops_per_jiffy/(500000/HZ), - (loops_per_jiffy/(5000/HZ)) % 100, - loops_per_jiffy); } - + printk(KERN_INFO "%lu.%02lu BogoMIPS (lpj=%lu)\n", + loops_per_jiffy/(500000/HZ), + (loops_per_jiffy/(5000/HZ)) % 100, loops_per_jiffy); } -- cgit v1.2.3-18-g5258 From 6ff10de374cc68ff2024247793176dc8a1b317ea Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Tue, 24 Jun 2008 01:19:49 +0200 Subject: x86: fix "x86: use cpu_khz for loops_per_jiffy calculation" MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit fix: arch/x86/kernel/tsc_32.c: In function ‘tsc_init': arch/x86/kernel/tsc_32.c:421: error: ‘lpj_tsc' undeclared (first use in this function) arch/x86/kernel/tsc_32.c:421: error: (Each undeclared identifier is reported only once arch/x86/kernel/tsc_32.c:421: error: for each function it appears in.) Signed-off-by: Ingo Molnar --- arch/x86/kernel/tsc_32.c | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/x86/kernel/tsc_32.c b/arch/x86/kernel/tsc_32.c index be729035b30..0af49fb533e 100644 --- a/arch/x86/kernel/tsc_32.c +++ b/arch/x86/kernel/tsc_32.c @@ -1,6 +1,7 @@ #include #include #include +#include #include #include #include -- cgit v1.2.3-18-g5258 From 93e1ade5382206d597e9d6de2d1383e69f54d064 Mon Sep 17 00:00:00 2001 From: Vegard Nossum Date: Sun, 22 Jun 2008 09:40:18 +0200 Subject: x86/oprofile: disable preemption in nmi_shutdown fix: BUG: using smp_processor_id() in preemptible [00000000] code: oprofiled/27301 caller is nmi_shutdown+0x11/0x60 Pid: 27301, comm: oprofiled Not tainted 2.6.26-rc7 #25 [] debug_smp_processor_id+0xbd/0xc0 [] nmi_shutdown+0x11/0x60 [] oprofile_shutdown+0x2a/0x60 Note that we don't need this for the other functions, since they are all called with on_each_cpu() (which disables preemption for us anyway). Signed-off-by: Vegard Nossum Cc: Philippe Elie Cc: oprofile-list@lists.sf.net Cc: Johannes Weiner Signed-off-by: Ingo Molnar --- arch/x86/oprofile/nmi_int.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/arch/x86/oprofile/nmi_int.c b/arch/x86/oprofile/nmi_int.c index cc48d3fde54..2b6ad5b9f9d 100644 --- a/arch/x86/oprofile/nmi_int.c +++ b/arch/x86/oprofile/nmi_int.c @@ -269,12 +269,13 @@ static void nmi_cpu_shutdown(void *dummy) static void nmi_shutdown(void) { - struct op_msrs *msrs = &__get_cpu_var(cpu_msrs); + struct op_msrs *msrs = &get_cpu_var(cpu_msrs); nmi_enabled = 0; on_each_cpu(nmi_cpu_shutdown, NULL, 0, 1); unregister_die_notifier(&profile_exceptions_nb); model->shutdown(msrs); free_msrs(); + put_cpu_var(cpu_msrs); } static void nmi_cpu_start(void *dummy) -- cgit v1.2.3-18-g5258 From f3f3149f35b9195ef4b761b1353fc0766b5f53be Mon Sep 17 00:00:00 2001 From: Alok Kataria Date: Mon, 23 Jun 2008 18:21:56 -0700 Subject: x86: use cpu_khz for loops_per_jiffy calculation, cleanup As suggested by Ingo, remove all references to tsc from init/calibrate.c TSC is x86 specific, and using tsc in variable names in a generic file should be avoided. lpj_tsc is now called lpj_fine, since it is related to fine tuning of lpj value. Also tsc_rate_* is called timer_rate_* Signed-off-by: Alok N Kataria Cc: Arjan van de Ven Cc: Daniel Hecht Cc: Tim Mann Cc: Zach Amsden Cc: Sahil Rihan Signed-off-by: Ingo Molnar --- arch/x86/kernel/time_64.c | 2 +- arch/x86/kernel/tsc_32.c | 2 +- include/linux/delay.h | 2 +- init/calibrate.c | 36 +++++++++++++++++++----------------- 4 files changed, 22 insertions(+), 20 deletions(-) diff --git a/arch/x86/kernel/time_64.c b/arch/x86/kernel/time_64.c index 12b4a71bd07..39ae8511a13 100644 --- a/arch/x86/kernel/time_64.c +++ b/arch/x86/kernel/time_64.c @@ -123,7 +123,7 @@ void __init time_init(void) (boot_cpu_data.x86_vendor == X86_VENDOR_AMD)) cpu_khz = calculate_cpu_khz(); - lpj_tsc = ((unsigned long)tsc_khz * 1000)/HZ; + lpj_fine = ((unsigned long)tsc_khz * 1000)/HZ; if (unsynchronized_tsc()) mark_tsc_unstable("TSCs unsynchronized"); diff --git a/arch/x86/kernel/tsc_32.c b/arch/x86/kernel/tsc_32.c index 0af49fb533e..048baab7726 100644 --- a/arch/x86/kernel/tsc_32.c +++ b/arch/x86/kernel/tsc_32.c @@ -425,7 +425,7 @@ void __init tsc_init(void) lpj = ((u64)tsc_khz * 1000); do_div(lpj, HZ); - lpj_tsc = lpj; + lpj_fine = lpj; printk("Detected %lu.%03lu MHz processor.\n", (unsigned long)cpu_khz / 1000, diff --git a/include/linux/delay.h b/include/linux/delay.h index 01aec60590a..fd832c6d419 100644 --- a/include/linux/delay.h +++ b/include/linux/delay.h @@ -41,7 +41,7 @@ static inline void ndelay(unsigned long x) #define ndelay(x) ndelay(x) #endif -extern unsigned long lpj_tsc; +extern unsigned long lpj_fine; void calibrate_delay(void); void msleep(unsigned int msecs); unsigned long msleep_interruptible(unsigned int msecs); diff --git a/init/calibrate.c b/init/calibrate.c index 86286974dad..7963e3fc51d 100644 --- a/init/calibrate.c +++ b/init/calibrate.c @@ -10,7 +10,7 @@ #include #include -unsigned long lpj_tsc; +unsigned long lpj_fine; unsigned long preset_lpj; static int __init lpj_setup(char *str) { @@ -35,9 +35,9 @@ static unsigned long __cpuinit calibrate_delay_direct(void) unsigned long pre_start, start, post_start; unsigned long pre_end, end, post_end; unsigned long start_jiffies; - unsigned long tsc_rate_min, tsc_rate_max; - unsigned long good_tsc_sum = 0; - unsigned long good_tsc_count = 0; + unsigned long timer_rate_min, timer_rate_max; + unsigned long good_timer_sum = 0; + unsigned long good_timer_count = 0; int i; if (read_current_timer(&pre_start) < 0 ) @@ -81,22 +81,24 @@ static unsigned long __cpuinit calibrate_delay_direct(void) } read_current_timer(&post_end); - tsc_rate_max = (post_end - pre_start) / DELAY_CALIBRATION_TICKS; - tsc_rate_min = (pre_end - post_start) / DELAY_CALIBRATION_TICKS; + timer_rate_max = (post_end - pre_start) / + DELAY_CALIBRATION_TICKS; + timer_rate_min = (pre_end - post_start) / + DELAY_CALIBRATION_TICKS; /* - * If the upper limit and lower limit of the tsc_rate is + * If the upper limit and lower limit of the timer_rate is * >= 12.5% apart, redo calibration. */ if (pre_start != 0 && pre_end != 0 && - (tsc_rate_max - tsc_rate_min) < (tsc_rate_max >> 3)) { - good_tsc_count++; - good_tsc_sum += tsc_rate_max; + (timer_rate_max - timer_rate_min) < (timer_rate_max >> 3)) { + good_timer_count++; + good_timer_sum += timer_rate_max; } } - if (good_tsc_count) - return (good_tsc_sum/good_tsc_count); + if (good_timer_count) + return (good_timer_sum/good_timer_count); printk(KERN_WARNING "calibrate_delay_direct() failed to get a good " "estimate for loops_per_jiffy.\nProbably due to long platform interrupts. Consider using \"lpj=\" boot option.\n"); @@ -111,8 +113,8 @@ static unsigned long __cpuinit calibrate_delay_direct(void) {return 0;} * bit takes on average 1.5/HZ seconds. This (like the original) is a little * better than 1% * For the boot cpu we can skip the delay calibration and assign it a value - * calculated based on the tsc frequency. - * For the rest of the CPUs we cannot assume that the tsc frequency is same as + * calculated based on the timer frequency. + * For the rest of the CPUs we cannot assume that the timer frequency is same as * the cpu frequency, hence do the calibration for those. */ #define LPS_PREC 8 @@ -126,11 +128,11 @@ void __cpuinit calibrate_delay(void) loops_per_jiffy = preset_lpj; printk(KERN_INFO "Calibrating delay loop (skipped) preset value.. "); - } else if ((smp_processor_id() == 0) && lpj_tsc) { - loops_per_jiffy = lpj_tsc; + } else if ((smp_processor_id() == 0) && lpj_fine) { + loops_per_jiffy = lpj_fine; printk(KERN_INFO "Calibrating delay loop (skipped), " - "using tsc calculated value.. "); + "value calculated using timer frequency.. "); } else if ((loops_per_jiffy = calibrate_delay_direct()) != 0) { printk(KERN_INFO "Calibrating delay using timer specific routine.. "); -- cgit v1.2.3-18-g5258 From b664d6bbeeddc77b93f5fea16006b428054f1cd1 Mon Sep 17 00:00:00 2001 From: Robert Richter Date: Tue, 24 Jun 2008 12:31:05 +0200 Subject: x86: add X86_FEATURE_IBS cpu feature This adds IBS to the cpu feature flags allowing Perfmon and OProfile to use cpu_has(). Signed-off-by: Robert Richter Signed-off-by: Ingo Molnar --- include/asm-x86/cpufeature.h | 1 + 1 file changed, 1 insertion(+) diff --git a/include/asm-x86/cpufeature.h b/include/asm-x86/cpufeature.h index 0d609c837a4..e2469a78195 100644 --- a/include/asm-x86/cpufeature.h +++ b/include/asm-x86/cpufeature.h @@ -106,6 +106,7 @@ /* More extended AMD flags: CPUID level 0x80000001, ecx, word 6 */ #define X86_FEATURE_LAHF_LM (6*32+ 0) /* LAHF/SAHF in long mode */ #define X86_FEATURE_CMP_LEGACY (6*32+ 1) /* If yes HyperThreading not valid */ +#define X86_FEATURE_IBS (6*32+ 10) /* Instruction Based Sampling */ /* * Auxiliary flags: Linux defined - For features scattered in various -- cgit v1.2.3-18-g5258 From 2b188723ee1707ca902ddb98ce1decdeafb5190a Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Thu, 26 Jun 2008 21:27:37 +0200 Subject: x86, AMD IOMMU: add Kconfig entry This patch adds the Kconfig entry for the AMD IOMMU driver. Signed-off-by: Joerg Roedel Cc: iommu@lists.linux-foundation.org Cc: bhavna.sarathy@amd.com Cc: Sebastian.Biemueller@amd.com Cc: robert.richter@amd.com Cc: joro@8bytes.org Signed-off-by: Ingo Molnar --- arch/x86/Kconfig | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index e0edaaa6920..5a82f18ab5e 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -549,6 +549,13 @@ config CALGARY_IOMMU_ENABLED_BY_DEFAULT Calgary anyway, pass 'iommu=calgary' on the kernel command line. If unsure, say Y. +config AMD_IOMMU + bool "AMD IOMMU support" + select SWIOTL + depends on X86_64 && PCI + help + Select this to get support for AMD IOMMU hardware in your system. + # need this always selected by IOMMU for the VIA workaround config SWIOTLB bool -- cgit v1.2.3-18-g5258 From 8d283c35a293e6091fdf7ef86842c1174c48a941 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Thu, 26 Jun 2008 21:27:38 +0200 Subject: x86, AMD IOMMU: add header file for driver data structures and defines This patch adds a header file local to the AMD IOMMU driver with constants and data structures needed in the code. Signed-off-by: Joerg Roedel Cc: iommu@lists.linux-foundation.org Cc: bhavna.sarathy@amd.com Cc: Sebastian.Biemueller@amd.com Cc: robert.richter@amd.com Cc: joro@8bytes.org Signed-off-by: Ingo Molnar --- include/asm-x86/amd_iommu_types.h | 242 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 242 insertions(+) create mode 100644 include/asm-x86/amd_iommu_types.h diff --git a/include/asm-x86/amd_iommu_types.h b/include/asm-x86/amd_iommu_types.h new file mode 100644 index 00000000000..0f395501ab8 --- /dev/null +++ b/include/asm-x86/amd_iommu_types.h @@ -0,0 +1,242 @@ +/* + * Copyright (C) 2007-2008 Advanced Micro Devices, Inc. + * Author: Joerg Roedel + * Leo Duran + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published + * by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#ifndef __AMD_IOMMU_TYPES_H__ +#define __AMD_IOMMU_TYPES_H__ + +#include +#include +#include + +/* + * some size calculation constants + */ +#define DEV_TABLE_ENTRY_SIZE 256 +#define ALIAS_TABLE_ENTRY_SIZE 2 +#define RLOOKUP_TABLE_ENTRY_SIZE (sizeof(void *)) + +/* helper macros */ +#define LOW_U32(x) ((x) & ((1ULL << 32)-1)) +#define HIGH_U32(x) (LOW_U32((x) >> 32)) + +/* Length of the MMIO region for the AMD IOMMU */ +#define MMIO_REGION_LENGTH 0x4000 + +/* Capability offsets used by the driver */ +#define MMIO_CAP_HDR_OFFSET 0x00 +#define MMIO_RANGE_OFFSET 0x0c + +/* Masks, shifts and macros to parse the device range capability */ +#define MMIO_RANGE_LD_MASK 0xff000000 +#define MMIO_RANGE_FD_MASK 0x00ff0000 +#define MMIO_RANGE_BUS_MASK 0x0000ff00 +#define MMIO_RANGE_LD_SHIFT 24 +#define MMIO_RANGE_FD_SHIFT 16 +#define MMIO_RANGE_BUS_SHIFT 8 +#define MMIO_GET_LD(x) (((x) & MMIO_RANGE_LD_MASK) >> MMIO_RANGE_LD_SHIFT) +#define MMIO_GET_FD(x) (((x) & MMIO_RANGE_FD_MASK) >> MMIO_RANGE_FD_SHIFT) +#define MMIO_GET_BUS(x) (((x) & MMIO_RANGE_BUS_MASK) >> MMIO_RANGE_BUS_SHIFT) + +/* Flag masks for the AMD IOMMU exclusion range */ +#define MMIO_EXCL_ENABLE_MASK 0x01ULL +#define MMIO_EXCL_ALLOW_MASK 0x02ULL + +/* Used offsets into the MMIO space */ +#define MMIO_DEV_TABLE_OFFSET 0x0000 +#define MMIO_CMD_BUF_OFFSET 0x0008 +#define MMIO_EVT_BUF_OFFSET 0x0010 +#define MMIO_CONTROL_OFFSET 0x0018 +#define MMIO_EXCL_BASE_OFFSET 0x0020 +#define MMIO_EXCL_LIMIT_OFFSET 0x0028 +#define MMIO_CMD_HEAD_OFFSET 0x2000 +#define MMIO_CMD_TAIL_OFFSET 0x2008 +#define MMIO_EVT_HEAD_OFFSET 0x2010 +#define MMIO_EVT_TAIL_OFFSET 0x2018 +#define MMIO_STATUS_OFFSET 0x2020 + +/* feature control bits */ +#define CONTROL_IOMMU_EN 0x00ULL +#define CONTROL_HT_TUN_EN 0x01ULL +#define CONTROL_EVT_LOG_EN 0x02ULL +#define CONTROL_EVT_INT_EN 0x03ULL +#define CONTROL_COMWAIT_EN 0x04ULL +#define CONTROL_PASSPW_EN 0x08ULL +#define CONTROL_RESPASSPW_EN 0x09ULL +#define CONTROL_COHERENT_EN 0x0aULL +#define CONTROL_ISOC_EN 0x0bULL +#define CONTROL_CMDBUF_EN 0x0cULL +#define CONTROL_PPFLOG_EN 0x0dULL +#define CONTROL_PPFINT_EN 0x0eULL + +/* command specific defines */ +#define CMD_COMPL_WAIT 0x01 +#define CMD_INV_DEV_ENTRY 0x02 +#define CMD_INV_IOMMU_PAGES 0x03 + +#define CMD_COMPL_WAIT_STORE_MASK 0x01 +#define CMD_INV_IOMMU_PAGES_SIZE_MASK 0x01 +#define CMD_INV_IOMMU_PAGES_PDE_MASK 0x02 + +/* macros and definitions for device table entries */ +#define DEV_ENTRY_VALID 0x00 +#define DEV_ENTRY_TRANSLATION 0x01 +#define DEV_ENTRY_IR 0x3d +#define DEV_ENTRY_IW 0x3e +#define DEV_ENTRY_EX 0x67 +#define DEV_ENTRY_SYSMGT1 0x68 +#define DEV_ENTRY_SYSMGT2 0x69 +#define DEV_ENTRY_INIT_PASS 0xb8 +#define DEV_ENTRY_EINT_PASS 0xb9 +#define DEV_ENTRY_NMI_PASS 0xba +#define DEV_ENTRY_LINT0_PASS 0xbe +#define DEV_ENTRY_LINT1_PASS 0xbf + +/* constants to configure the command buffer */ +#define CMD_BUFFER_SIZE 8192 +#define CMD_BUFFER_ENTRIES 512 +#define MMIO_CMD_SIZE_SHIFT 56 +#define MMIO_CMD_SIZE_512 (0x9ULL << MMIO_CMD_SIZE_SHIFT) + +#define PAGE_MODE_1_LEVEL 0x01 +#define PAGE_MODE_2_LEVEL 0x02 +#define PAGE_MODE_3_LEVEL 0x03 + +#define IOMMU_PDE_NL_0 0x000ULL +#define IOMMU_PDE_NL_1 0x200ULL +#define IOMMU_PDE_NL_2 0x400ULL +#define IOMMU_PDE_NL_3 0x600ULL + +#define IOMMU_PTE_L2_INDEX(address) (((address) >> 30) & 0x1ffULL) +#define IOMMU_PTE_L1_INDEX(address) (((address) >> 21) & 0x1ffULL) +#define IOMMU_PTE_L0_INDEX(address) (((address) >> 12) & 0x1ffULL) + +#define IOMMU_MAP_SIZE_L1 (1ULL << 21) +#define IOMMU_MAP_SIZE_L2 (1ULL << 30) +#define IOMMU_MAP_SIZE_L3 (1ULL << 39) + +#define IOMMU_PTE_P (1ULL << 0) +#define IOMMU_PTE_U (1ULL << 59) +#define IOMMU_PTE_FC (1ULL << 60) +#define IOMMU_PTE_IR (1ULL << 61) +#define IOMMU_PTE_IW (1ULL << 62) + +#define IOMMU_L1_PDE(address) \ + ((address) | IOMMU_PDE_NL_1 | IOMMU_PTE_P | IOMMU_PTE_IR | IOMMU_PTE_IW) +#define IOMMU_L2_PDE(address) \ + ((address) | IOMMU_PDE_NL_2 | IOMMU_PTE_P | IOMMU_PTE_IR | IOMMU_PTE_IW) + +#define IOMMU_PAGE_MASK (((1ULL << 52) - 1) & ~0xfffULL) +#define IOMMU_PTE_PRESENT(pte) ((pte) & IOMMU_PTE_P) +#define IOMMU_PTE_PAGE(pte) (phys_to_virt((pte) & IOMMU_PAGE_MASK)) +#define IOMMU_PTE_MODE(pte) (((pte) >> 9) & 0x07) + +#define IOMMU_PROT_MASK 0x03 +#define IOMMU_PROT_IR 0x01 +#define IOMMU_PROT_IW 0x02 + +/* IOMMU capabilities */ +#define IOMMU_CAP_IOTLB 24 +#define IOMMU_CAP_NPCACHE 26 + +#define MAX_DOMAIN_ID 65536 + +struct protection_domain { + spinlock_t lock; + u16 id; + int mode; + u64 *pt_root; + void *priv; +}; + +struct dma_ops_domain { + struct list_head list; + struct protection_domain domain; + unsigned long aperture_size; + unsigned long next_bit; + unsigned long *bitmap; + u64 **pte_pages; +}; + +struct amd_iommu { + struct list_head list; + spinlock_t lock; + + u16 devid; + u16 cap_ptr; + + u64 mmio_phys; + u8 *mmio_base; + u32 cap; + u16 first_device; + u16 last_device; + u64 exclusion_start; + u64 exclusion_length; + + u8 *cmd_buf; + u32 cmd_buf_size; + + int need_sync; + + struct dma_ops_domain *default_dom; +}; + +extern struct list_head amd_iommu_list; + +struct dev_table_entry { + u32 data[8]; +}; + +struct unity_map_entry { + struct list_head list; + u16 devid_start; + u16 devid_end; + u64 address_start; + u64 address_end; + int prot; +}; + +extern struct list_head amd_iommu_unity_map; + +/* data structures for device handling */ +extern struct dev_table_entry *amd_iommu_dev_table; +extern u16 *amd_iommu_alias_table; +extern struct amd_iommu **amd_iommu_rlookup_table; + +extern unsigned amd_iommu_aperture_order; + +extern u16 amd_iommu_last_bdf; + +/* data structures for protection domain handling */ +extern struct protection_domain **amd_iommu_pd_table; +extern unsigned long *amd_iommu_pd_alloc_bitmap; + +extern int amd_iommu_isolate; + +static inline void print_devid(u16 devid, int nl) +{ + int bus = devid >> 8; + int dev = devid >> 3 & 0x1f; + int fn = devid & 0x07; + + printk("%02x:%02x.%x", bus, dev, fn); + if (nl) + printk("\n"); +} + +#endif -- cgit v1.2.3-18-g5258 From f6e2e6b6fc465bc3cc4eae8d53fcf573ca1cfa14 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Thu, 26 Jun 2008 21:27:39 +0200 Subject: x86, AMD IOMMU: add defines and structures for ACPI scanning code This patch adds the required data structures and constants required to parse the ACPI table for the AMD IOMMU. Signed-off-by: Joerg Roedel Cc: iommu@lists.linux-foundation.org Cc: bhavna.sarathy@amd.com Cc: Sebastian.Biemueller@amd.com Cc: robert.richter@amd.com Cc: joro@8bytes.org Signed-off-by: Ingo Molnar --- arch/x86/kernel/amd_iommu_init.c | 101 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 101 insertions(+) create mode 100644 arch/x86/kernel/amd_iommu_init.c diff --git a/arch/x86/kernel/amd_iommu_init.c b/arch/x86/kernel/amd_iommu_init.c new file mode 100644 index 00000000000..6fce5ab683d --- /dev/null +++ b/arch/x86/kernel/amd_iommu_init.c @@ -0,0 +1,101 @@ +/* + * Copyright (C) 2007-2008 Advanced Micro Devices, Inc. + * Author: Joerg Roedel + * Leo Duran + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published + * by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#include +#include +#include +#include +#include +#include +#include + +/* + * definitions for the ACPI scanning code + */ +#define UPDATE_LAST_BDF(x) do {\ + if ((x) > amd_iommu_last_bdf) \ + amd_iommu_last_bdf = (x); \ + } while (0); + +#define DEVID(bus, devfn) (((bus) << 8) | (devfn)) +#define PCI_BUS(x) (((x) >> 8) & 0xff) +#define IVRS_HEADER_LENGTH 48 +#define TBL_SIZE(x) (1 << (PAGE_SHIFT + get_order(amd_iommu_last_bdf * (x)))) + +#define ACPI_IVHD_TYPE 0x10 +#define ACPI_IVMD_TYPE_ALL 0x20 +#define ACPI_IVMD_TYPE 0x21 +#define ACPI_IVMD_TYPE_RANGE 0x22 + +#define IVHD_DEV_ALL 0x01 +#define IVHD_DEV_SELECT 0x02 +#define IVHD_DEV_SELECT_RANGE_START 0x03 +#define IVHD_DEV_RANGE_END 0x04 +#define IVHD_DEV_ALIAS 0x42 +#define IVHD_DEV_ALIAS_RANGE 0x43 +#define IVHD_DEV_EXT_SELECT 0x46 +#define IVHD_DEV_EXT_SELECT_RANGE 0x47 + +#define IVHD_FLAG_HT_TUN_EN 0x00 +#define IVHD_FLAG_PASSPW_EN 0x01 +#define IVHD_FLAG_RESPASSPW_EN 0x02 +#define IVHD_FLAG_ISOC_EN 0x03 + +#define IVMD_FLAG_EXCL_RANGE 0x08 +#define IVMD_FLAG_UNITY_MAP 0x01 + +#define ACPI_DEVFLAG_INITPASS 0x01 +#define ACPI_DEVFLAG_EXTINT 0x02 +#define ACPI_DEVFLAG_NMI 0x04 +#define ACPI_DEVFLAG_SYSMGT1 0x10 +#define ACPI_DEVFLAG_SYSMGT2 0x20 +#define ACPI_DEVFLAG_LINT0 0x40 +#define ACPI_DEVFLAG_LINT1 0x80 +#define ACPI_DEVFLAG_ATSDIS 0x10000000 + +struct ivhd_header { + u8 type; + u8 flags; + u16 length; + u16 devid; + u16 cap_ptr; + u64 mmio_phys; + u16 pci_seg; + u16 info; + u32 reserved; +} __attribute__((packed)); + +struct ivhd_entry { + u8 type; + u16 devid; + u8 flags; + u32 ext; +} __attribute__((packed)); + +struct ivmd_header { + u8 type; + u8 flags; + u16 length; + u16 devid; + u16 aux; + u64 resv; + u64 range_start; + u64 range_length; +} __attribute__((packed)); + -- cgit v1.2.3-18-g5258 From 928abd2545fe367ea3ff3cb8a5076e1d6d2a9574 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Thu, 26 Jun 2008 21:27:40 +0200 Subject: x86, AMD IOMMU: add data structures to manage the IOMMUs in the system This patch adds the data structures which will contain the information read from the ACPI table. Signed-off-by: Joerg Roedel Cc: iommu@lists.linux-foundation.org Cc: bhavna.sarathy@amd.com Cc: Sebastian.Biemueller@amd.com Cc: robert.richter@amd.com Cc: joro@8bytes.org Signed-off-by: Ingo Molnar --- arch/x86/kernel/amd_iommu_init.c | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/arch/x86/kernel/amd_iommu_init.c b/arch/x86/kernel/amd_iommu_init.c index 6fce5ab683d..0ad8cf9e7ba 100644 --- a/arch/x86/kernel/amd_iommu_init.c +++ b/arch/x86/kernel/amd_iommu_init.c @@ -99,3 +99,20 @@ struct ivmd_header { u64 range_length; } __attribute__((packed)); +static int __initdata amd_iommu_disable; + +u16 amd_iommu_last_bdf; +struct list_head amd_iommu_unity_map; +unsigned amd_iommu_aperture_order = 26; +int amd_iommu_isolate; + +struct list_head amd_iommu_list; +struct dev_table_entry *amd_iommu_dev_table; +u16 *amd_iommu_alias_table; +struct amd_iommu **amd_iommu_rlookup_table; +struct protection_domain **amd_iommu_pd_table; +unsigned long *amd_iommu_pd_alloc_bitmap; + +static u32 dev_table_size; +static u32 alias_table_size; +static u32 rlookup_table_size; -- cgit v1.2.3-18-g5258 From 3e8064ba59128bcb1405079a0789b27b356832b9 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Thu, 26 Jun 2008 21:27:41 +0200 Subject: x86, AMD IOMMU: add functions to find last possible PCI device for IOMMU This patch adds functions to find the last PCI bus/device/function the IOMMU driver has to handle. This information is used later to allocate the memory for the data structures. Signed-off-by: Joerg Roedel Cc: iommu@lists.linux-foundation.org Cc: bhavna.sarathy@amd.com Cc: Sebastian.Biemueller@amd.com Cc: robert.richter@amd.com Cc: joro@8bytes.org Signed-off-by: Ingo Molnar --- arch/x86/kernel/amd_iommu_init.c | 79 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 79 insertions(+) diff --git a/arch/x86/kernel/amd_iommu_init.c b/arch/x86/kernel/amd_iommu_init.c index 0ad8cf9e7ba..ee0b2da027b 100644 --- a/arch/x86/kernel/amd_iommu_init.c +++ b/arch/x86/kernel/amd_iommu_init.c @@ -116,3 +116,82 @@ unsigned long *amd_iommu_pd_alloc_bitmap; static u32 dev_table_size; static u32 alias_table_size; static u32 rlookup_table_size; + +static int __init find_last_devid_on_pci(int bus, int dev, int fn, int cap_ptr) +{ + u32 cap; + + cap = read_pci_config(bus, dev, fn, cap_ptr+MMIO_RANGE_OFFSET); + UPDATE_LAST_BDF(DEVID(MMIO_GET_BUS(cap), MMIO_GET_LD(cap))); + + return 0; +} + +static int __init find_last_devid_from_ivhd(struct ivhd_header *h) +{ + u8 *p = (void *)h, *end = (void *)h; + struct ivhd_entry *dev; + + p += sizeof(*h); + end += h->length; + + find_last_devid_on_pci(PCI_BUS(h->devid), + PCI_SLOT(h->devid), + PCI_FUNC(h->devid), + h->cap_ptr); + + while (p < end) { + dev = (struct ivhd_entry *)p; + switch (dev->type) { + case IVHD_DEV_SELECT: + case IVHD_DEV_RANGE_END: + case IVHD_DEV_ALIAS: + case IVHD_DEV_EXT_SELECT: + UPDATE_LAST_BDF(dev->devid); + break; + default: + break; + } + p += 0x04 << (*p >> 6); + } + + WARN_ON(p != end); + + return 0; +} + +static int __init find_last_devid_acpi(struct acpi_table_header *table) +{ + int i; + u8 checksum = 0, *p = (u8 *)table, *end = (u8 *)table; + struct ivhd_header *h; + + /* + * Validate checksum here so we don't need to do it when + * we actually parse the table + */ + for (i = 0; i < table->length; ++i) + checksum += p[i]; + if (checksum != 0) + /* ACPI table corrupt */ + return -ENODEV; + + p += IVRS_HEADER_LENGTH; + + end += table->length; + while (p < end) { + h = (struct ivhd_header *)p; + switch (h->type) { + case ACPI_IVHD_TYPE: + find_last_devid_from_ivhd(h); + break; + default: + break; + } + p += h->length; + } + WARN_ON(p != end); + + return 0; +} + -- cgit v1.2.3-18-g5258 From ca7ed057ae25e5e60814f950995f22f051d2e449 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Thu, 26 Jun 2008 21:27:42 +0200 Subject: x86, AMD IOMMU: add amd_iommu_init.c to Makefile This patch adds the source file amd_iommu_init.c to the kernel Makefile for the x86 architecture. Signed-off-by: Joerg Roedel Cc: iommu@lists.linux-foundation.org Cc: bhavna.sarathy@amd.com Cc: Sebastian.Biemueller@amd.com Cc: robert.richter@amd.com Cc: joro@8bytes.org Signed-off-by: Ingo Molnar --- arch/x86/kernel/Makefile | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile index 77807d4769c..1e4e00aca18 100644 --- a/arch/x86/kernel/Makefile +++ b/arch/x86/kernel/Makefile @@ -100,6 +100,7 @@ ifeq ($(CONFIG_X86_64),y) obj-$(CONFIG_GART_IOMMU) += pci-gart_64.o aperture_64.o obj-$(CONFIG_CALGARY_IOMMU) += pci-calgary_64.o tce_64.o + obj-$(CONFIG_AMD_IOMMU) += amd_iommu_init.o obj-$(CONFIG_SWIOTLB) += pci-swiotlb_64.o obj-$(CONFIG_PCI_MMCONFIG) += mmconf-fam10h_64.o -- cgit v1.2.3-18-g5258 From 6c56747b46717b4c6a890b35e8518f4be961dc7e Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Thu, 26 Jun 2008 21:27:43 +0200 Subject: x86, AMD IOMMU: add functions for mapping/unmapping the MMIO space This patch contains two functions to map and unmap the MMIO region of an IOMMU. Signed-off-by: Joerg Roedel Cc: iommu@lists.linux-foundation.org Cc: bhavna.sarathy@amd.com Cc: Sebastian.Biemueller@amd.com Cc: robert.richter@amd.com Cc: joro@8bytes.org Signed-off-by: Ingo Molnar --- arch/x86/kernel/amd_iommu_init.c | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) diff --git a/arch/x86/kernel/amd_iommu_init.c b/arch/x86/kernel/amd_iommu_init.c index ee0b2da027b..3147e699100 100644 --- a/arch/x86/kernel/amd_iommu_init.c +++ b/arch/x86/kernel/amd_iommu_init.c @@ -117,6 +117,29 @@ static u32 dev_table_size; static u32 alias_table_size; static u32 rlookup_table_size; +static u8 * __init iommu_map_mmio_space(u64 address) +{ + u8 *ret; + + if (!request_mem_region(address, MMIO_REGION_LENGTH, "amd_iommu")) + return NULL; + + ret = ioremap_nocache(address, MMIO_REGION_LENGTH); + if (ret != NULL) + return ret; + + release_mem_region(address, MMIO_REGION_LENGTH); + + return NULL; +} + +static void __init iommu_unmap_mmio_space(struct amd_iommu *iommu) +{ + if (iommu->mmio_base) + iounmap(iommu->mmio_base); + release_mem_region(iommu->mmio_phys, MMIO_REGION_LENGTH); +} + static int __init find_last_devid_on_pci(int bus, int dev, int fn, int cap_ptr) { u32 cap; -- cgit v1.2.3-18-g5258 From b2026aa2dce4454950ccd9c410790f310d65696a Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Thu, 26 Jun 2008 21:27:44 +0200 Subject: x86, AMD IOMMU: add functions for programming IOMMU MMIO space This patch adds the functions required to programm the IOMMU with the MMIO space. Signed-off-by: Joerg Roedel Cc: iommu@lists.linux-foundation.org Cc: bhavna.sarathy@amd.com Cc: Sebastian.Biemueller@amd.com Cc: robert.richter@amd.com Cc: joro@8bytes.org Signed-off-by: Ingo Molnar --- arch/x86/kernel/amd_iommu_init.c | 60 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 60 insertions(+) diff --git a/arch/x86/kernel/amd_iommu_init.c b/arch/x86/kernel/amd_iommu_init.c index 3147e699100..ffb8ac82e32 100644 --- a/arch/x86/kernel/amd_iommu_init.c +++ b/arch/x86/kernel/amd_iommu_init.c @@ -117,6 +117,66 @@ static u32 dev_table_size; static u32 alias_table_size; static u32 rlookup_table_size; +static void __init iommu_set_exclusion_range(struct amd_iommu *iommu) +{ + u64 start = iommu->exclusion_start & PAGE_MASK; + u64 limit = (start + iommu->exclusion_length) & PAGE_MASK; + u64 entry; + + if (!iommu->exclusion_start) + return; + + entry = start | MMIO_EXCL_ENABLE_MASK; + memcpy_toio(iommu->mmio_base + MMIO_EXCL_BASE_OFFSET, + &entry, sizeof(entry)); + + entry = limit; + memcpy_toio(iommu->mmio_base + MMIO_EXCL_LIMIT_OFFSET, + &entry, sizeof(entry)); +} + +static void __init iommu_set_device_table(struct amd_iommu *iommu) +{ + u32 entry; + + BUG_ON(iommu->mmio_base == NULL); + + entry = virt_to_phys(amd_iommu_dev_table); + entry |= (dev_table_size >> 12) - 1; + memcpy_toio(iommu->mmio_base + MMIO_DEV_TABLE_OFFSET, + &entry, sizeof(entry)); +} + +static void __init iommu_feature_enable(struct amd_iommu *iommu, u8 bit) +{ + u32 ctrl; + + ctrl = readl(iommu->mmio_base + MMIO_CONTROL_OFFSET); + ctrl |= (1 << bit); + writel(ctrl, iommu->mmio_base + MMIO_CONTROL_OFFSET); +} + +static void __init iommu_feature_disable(struct amd_iommu *iommu, u8 bit) +{ + u32 ctrl; + + ctrl = (u64)readl(iommu->mmio_base + MMIO_CONTROL_OFFSET); + ctrl &= ~(1 << bit); + writel(ctrl, iommu->mmio_base + MMIO_CONTROL_OFFSET); +} + +void __init iommu_enable(struct amd_iommu *iommu) +{ + u32 ctrl; + + printk(KERN_INFO "AMD IOMMU: Enabling IOMMU at "); + print_devid(iommu->devid, 0); + printk(" cap 0x%hx\n", iommu->cap_ptr); + + iommu_feature_enable(iommu, CONTROL_IOMMU_EN); + ctrl = readl(iommu->mmio_base + MMIO_CONTROL_OFFSET); +} + static u8 * __init iommu_map_mmio_space(u64 address) { u8 *ret; -- cgit v1.2.3-18-g5258 From b36ca91e1d2d7e846844820784d57d20ad73dbd8 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Thu, 26 Jun 2008 21:27:45 +0200 Subject: x86, AMD IOMMU: add command buffer (de-)allocation This patch adds the functions to allocate and deallocate the command buffer for one IOMMU in the system. Signed-off-by: Joerg Roedel Cc: iommu@lists.linux-foundation.org Cc: bhavna.sarathy@amd.com Cc: Sebastian.Biemueller@amd.com Cc: robert.richter@amd.com Cc: joro@8bytes.org Signed-off-by: Ingo Molnar --- arch/x86/kernel/amd_iommu_init.c | 30 ++++++++++++++++++++++++++++++ 1 file changed, 30 insertions(+) diff --git a/arch/x86/kernel/amd_iommu_init.c b/arch/x86/kernel/amd_iommu_init.c index ffb8ac82e32..c2be3adee87 100644 --- a/arch/x86/kernel/amd_iommu_init.c +++ b/arch/x86/kernel/amd_iommu_init.c @@ -278,3 +278,33 @@ static int __init find_last_devid_acpi(struct acpi_table_header *table) return 0; } +static u8 * __init alloc_command_buffer(struct amd_iommu *iommu) +{ + u8 *cmd_buf = (u8 *)__get_free_pages(GFP_KERNEL, + get_order(CMD_BUFFER_SIZE)); + u64 entry = 0; + + if (cmd_buf == NULL) + return NULL; + + iommu->cmd_buf_size = CMD_BUFFER_SIZE; + + memset(cmd_buf, 0, CMD_BUFFER_SIZE); + + entry = (u64)virt_to_phys(cmd_buf); + entry |= MMIO_CMD_SIZE_512; + memcpy_toio(iommu->mmio_base + MMIO_CMD_BUF_OFFSET, + &entry, sizeof(entry)); + + iommu_feature_enable(iommu, CONTROL_CMDBUF_EN); + + return cmd_buf; +} + +static void __init free_command_buffer(struct amd_iommu *iommu) +{ + if (iommu->cmd_buf) + free_pages((unsigned long)iommu->cmd_buf, + get_order(CMD_BUFFER_SIZE)); +} + -- cgit v1.2.3-18-g5258 From 3566b7786afd7c14c62726f359df3c827054670b Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Thu, 26 Jun 2008 21:27:46 +0200 Subject: x86, AMD IOMMU: add device table initialization functions This patch adds functions necessary to initialize the device table from the ACPI definitions. Signed-off-by: Joerg Roedel Cc: iommu@lists.linux-foundation.org Cc: bhavna.sarathy@amd.com Cc: Sebastian.Biemueller@amd.com Cc: robert.richter@amd.com Cc: joro@8bytes.org Signed-off-by: Ingo Molnar --- arch/x86/kernel/amd_iommu_init.c | 45 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 45 insertions(+) diff --git a/arch/x86/kernel/amd_iommu_init.c b/arch/x86/kernel/amd_iommu_init.c index c2be3adee87..4c37abb3435 100644 --- a/arch/x86/kernel/amd_iommu_init.c +++ b/arch/x86/kernel/amd_iommu_init.c @@ -308,3 +308,48 @@ static void __init free_command_buffer(struct amd_iommu *iommu) get_order(CMD_BUFFER_SIZE)); } +static void set_dev_entry_bit(u16 devid, u8 bit) +{ + int i = (bit >> 5) & 0x07; + int _bit = bit & 0x1f; + + amd_iommu_dev_table[devid].data[i] |= (1 << _bit); +} + +static void __init set_dev_entry_from_acpi(u16 devid, u32 flags, u32 ext_flags) +{ + if (flags & ACPI_DEVFLAG_INITPASS) + set_dev_entry_bit(devid, DEV_ENTRY_INIT_PASS); + if (flags & ACPI_DEVFLAG_EXTINT) + set_dev_entry_bit(devid, DEV_ENTRY_EINT_PASS); + if (flags & ACPI_DEVFLAG_NMI) + set_dev_entry_bit(devid, DEV_ENTRY_NMI_PASS); + if (flags & ACPI_DEVFLAG_SYSMGT1) + set_dev_entry_bit(devid, DEV_ENTRY_SYSMGT1); + if (flags & ACPI_DEVFLAG_SYSMGT2) + set_dev_entry_bit(devid, DEV_ENTRY_SYSMGT2); + if (flags & ACPI_DEVFLAG_LINT0) + set_dev_entry_bit(devid, DEV_ENTRY_LINT0_PASS); + if (flags & ACPI_DEVFLAG_LINT1) + set_dev_entry_bit(devid, DEV_ENTRY_LINT1_PASS); +} + +static void __init set_iommu_for_device(struct amd_iommu *iommu, u16 devid) +{ + amd_iommu_rlookup_table[devid] = iommu; +} + +static void __init set_device_exclusion_range(u16 devid, struct ivmd_header *m) +{ + struct amd_iommu *iommu = amd_iommu_rlookup_table[devid]; + + if (!(m->flags & IVMD_FLAG_EXCL_RANGE)) + return; + + if (iommu) { + set_dev_entry_bit(m->devid, DEV_ENTRY_EX); + iommu->exclusion_start = m->range_start; + iommu->exclusion_length = m->range_length; + } +} + -- cgit v1.2.3-18-g5258 From 5d0c8e49f88b908a8fcc913c4b9843108ae8897b Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Thu, 26 Jun 2008 21:27:47 +0200 Subject: x86, AMD IOMMU: add functions for IOMMU hardware initialization from ACPI This patch adds functions to initialize the IOMMU hardware with information from ACPI and PCI. Signed-off-by: Joerg Roedel Cc: iommu@lists.linux-foundation.org Cc: bhavna.sarathy@amd.com Cc: Sebastian.Biemueller@amd.com Cc: robert.richter@amd.com Cc: joro@8bytes.org Signed-off-by: Ingo Molnar --- arch/x86/kernel/amd_iommu_init.c | 125 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 125 insertions(+) diff --git a/arch/x86/kernel/amd_iommu_init.c b/arch/x86/kernel/amd_iommu_init.c index 4c37abb3435..8ec48f1f39d 100644 --- a/arch/x86/kernel/amd_iommu_init.c +++ b/arch/x86/kernel/amd_iommu_init.c @@ -353,3 +353,128 @@ static void __init set_device_exclusion_range(u16 devid, struct ivmd_header *m) } } +static void __init init_iommu_from_pci(struct amd_iommu *iommu) +{ + int bus = PCI_BUS(iommu->devid); + int dev = PCI_SLOT(iommu->devid); + int fn = PCI_FUNC(iommu->devid); + int cap_ptr = iommu->cap_ptr; + u32 range; + + iommu->cap = read_pci_config(bus, dev, fn, cap_ptr+MMIO_CAP_HDR_OFFSET); + + range = read_pci_config(bus, dev, fn, cap_ptr+MMIO_RANGE_OFFSET); + iommu->first_device = DEVID(MMIO_GET_BUS(range), MMIO_GET_FD(range)); + iommu->last_device = DEVID(MMIO_GET_BUS(range), MMIO_GET_LD(range)); +} + +static void __init init_iommu_from_acpi(struct amd_iommu *iommu, + struct ivhd_header *h) +{ + u8 *p = (u8 *)h; + u8 *end = p, flags = 0; + u16 dev_i, devid = 0, devid_start = 0, devid_to = 0; + u32 ext_flags = 0; + bool alias = 0; + struct ivhd_entry *e; + + /* + * First set the recommended feature enable bits from ACPI + * into the IOMMU control registers + */ + h->flags & IVHD_FLAG_HT_TUN_EN ? + iommu_feature_enable(iommu, CONTROL_HT_TUN_EN) : + iommu_feature_disable(iommu, CONTROL_HT_TUN_EN); + + h->flags & IVHD_FLAG_PASSPW_EN ? + iommu_feature_enable(iommu, CONTROL_PASSPW_EN) : + iommu_feature_disable(iommu, CONTROL_PASSPW_EN); + + h->flags & IVHD_FLAG_RESPASSPW_EN ? + iommu_feature_enable(iommu, CONTROL_RESPASSPW_EN) : + iommu_feature_disable(iommu, CONTROL_RESPASSPW_EN); + + h->flags & IVHD_FLAG_ISOC_EN ? + iommu_feature_enable(iommu, CONTROL_ISOC_EN) : + iommu_feature_disable(iommu, CONTROL_ISOC_EN); + + /* + * make IOMMU memory accesses cache coherent + */ + iommu_feature_enable(iommu, CONTROL_COHERENT_EN); + + /* + * Done. Now parse the device entries + */ + p += sizeof(struct ivhd_header); + end += h->length; + + while (p < end) { + e = (struct ivhd_entry *)p; + switch (e->type) { + case IVHD_DEV_ALL: + for (dev_i = iommu->first_device; + dev_i <= iommu->last_device; ++dev_i) + set_dev_entry_from_acpi(dev_i, e->flags, 0); + break; + case IVHD_DEV_SELECT: + devid = e->devid; + set_dev_entry_from_acpi(devid, e->flags, 0); + break; + case IVHD_DEV_SELECT_RANGE_START: + devid_start = e->devid; + flags = e->flags; + ext_flags = 0; + alias = 0; + break; + case IVHD_DEV_ALIAS: + devid = e->devid; + devid_to = e->ext >> 8; + set_dev_entry_from_acpi(devid, e->flags, 0); + amd_iommu_alias_table[devid] = devid_to; + break; + case IVHD_DEV_ALIAS_RANGE: + devid_start = e->devid; + flags = e->flags; + devid_to = e->ext >> 8; + ext_flags = 0; + alias = 1; + break; + case IVHD_DEV_EXT_SELECT: + devid = e->devid; + set_dev_entry_from_acpi(devid, e->flags, e->ext); + break; + case IVHD_DEV_EXT_SELECT_RANGE: + devid_start = e->devid; + flags = e->flags; + ext_flags = e->ext; + alias = 0; + break; + case IVHD_DEV_RANGE_END: + devid = e->devid; + for (dev_i = devid_start; dev_i <= devid; ++dev_i) { + if (alias) + amd_iommu_alias_table[dev_i] = devid_to; + set_dev_entry_from_acpi( + amd_iommu_alias_table[dev_i], + flags, ext_flags); + } + break; + default: + break; + } + + p += 0x04 << (e->type >> 6); + } +} + +static int __init init_iommu_devices(struct amd_iommu *iommu) +{ + u16 i; + + for (i = iommu->first_device; i <= iommu->last_device; ++i) + set_iommu_for_device(iommu, i); + + return 0; +} + -- cgit v1.2.3-18-g5258 From e47d402d2df89bb1aa77b7573597a9dd2241aafe Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Thu, 26 Jun 2008 21:27:48 +0200 Subject: x86, AMD IOMMU: add detect code for AMD IOMMU hardware This patch adds the detection of AMD IOMMU hardware provided on information from ACPI provided by the BIOS. Signed-off-by: Joerg Roedel Cc: iommu@lists.linux-foundation.org Cc: bhavna.sarathy@amd.com Cc: Sebastian.Biemueller@amd.com Cc: robert.richter@amd.com Cc: joro@8bytes.org Signed-off-by: Ingo Molnar --- arch/x86/kernel/amd_iommu_init.c | 78 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 78 insertions(+) diff --git a/arch/x86/kernel/amd_iommu_init.c b/arch/x86/kernel/amd_iommu_init.c index 8ec48f1f39d..3f4f7b89044 100644 --- a/arch/x86/kernel/amd_iommu_init.c +++ b/arch/x86/kernel/amd_iommu_init.c @@ -478,3 +478,81 @@ static int __init init_iommu_devices(struct amd_iommu *iommu) return 0; } +static void __init free_iommu_one(struct amd_iommu *iommu) +{ + free_command_buffer(iommu); + iommu_unmap_mmio_space(iommu); +} + +static void __init free_iommu_all(void) +{ + struct amd_iommu *iommu, *next; + + list_for_each_entry_safe(iommu, next, &amd_iommu_list, list) { + list_del(&iommu->list); + free_iommu_one(iommu); + kfree(iommu); + } +} + +static int __init init_iommu_one(struct amd_iommu *iommu, struct ivhd_header *h) +{ + spin_lock_init(&iommu->lock); + list_add_tail(&iommu->list, &amd_iommu_list); + + /* + * Copy data from ACPI table entry to the iommu struct + */ + iommu->devid = h->devid; + iommu->cap_ptr = h->cap_ptr; + iommu->mmio_phys = h->mmio_phys; + iommu->mmio_base = iommu_map_mmio_space(h->mmio_phys); + if (!iommu->mmio_base) + return -ENOMEM; + + iommu_set_device_table(iommu); + iommu->cmd_buf = alloc_command_buffer(iommu); + if (!iommu->cmd_buf) + return -ENOMEM; + + init_iommu_from_pci(iommu); + init_iommu_from_acpi(iommu, h); + init_iommu_devices(iommu); + + return 0; +} + +static int __init init_iommu_all(struct acpi_table_header *table) +{ + u8 *p = (u8 *)table, *end = (u8 *)table; + struct ivhd_header *h; + struct amd_iommu *iommu; + int ret; + + INIT_LIST_HEAD(&amd_iommu_list); + + end += table->length; + p += IVRS_HEADER_LENGTH; + + while (p < end) { + h = (struct ivhd_header *)p; + switch (*p) { + case ACPI_IVHD_TYPE: + iommu = kzalloc(sizeof(struct amd_iommu), GFP_KERNEL); + if (iommu == NULL) + return -ENOMEM; + ret = init_iommu_one(iommu, h); + if (ret) + return ret; + break; + default: + break; + } + p += h->length; + + } + WARN_ON(p != end); + + return 0; +} + -- cgit v1.2.3-18-g5258 From be2a022c0dd0f630b06f83de284df53cb60a308f Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Thu, 26 Jun 2008 21:27:49 +0200 Subject: x86, AMD IOMMU: add functions to parse IOMMU memory mapping requirements for devices This patch adds the functions to parse the information about IOMMU exclusion ranges and required unity mappings for the devices handled by the IOMMU. Signed-off-by: Joerg Roedel Cc: iommu@lists.linux-foundation.org Cc: bhavna.sarathy@amd.com Cc: Sebastian.Biemueller@amd.com Cc: robert.richter@amd.com Cc: joro@8bytes.org Signed-off-by: Ingo Molnar --- arch/x86/kernel/amd_iommu_init.c | 87 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 87 insertions(+) diff --git a/arch/x86/kernel/amd_iommu_init.c b/arch/x86/kernel/amd_iommu_init.c index 3f4f7b89044..555fcc9830c 100644 --- a/arch/x86/kernel/amd_iommu_init.c +++ b/arch/x86/kernel/amd_iommu_init.c @@ -556,3 +556,90 @@ static int __init init_iommu_all(struct acpi_table_header *table) return 0; } +static void __init free_unity_maps(void) +{ + struct unity_map_entry *entry, *next; + + list_for_each_entry_safe(entry, next, &amd_iommu_unity_map, list) { + list_del(&entry->list); + kfree(entry); + } +} + +static int __init init_exclusion_range(struct ivmd_header *m) +{ + int i; + + switch (m->type) { + case ACPI_IVMD_TYPE: + set_device_exclusion_range(m->devid, m); + break; + case ACPI_IVMD_TYPE_ALL: + for (i = 0; i < amd_iommu_last_bdf; ++i) + set_device_exclusion_range(i, m); + break; + case ACPI_IVMD_TYPE_RANGE: + for (i = m->devid; i <= m->aux; ++i) + set_device_exclusion_range(i, m); + break; + default: + break; + } + + return 0; +} + +static int __init init_unity_map_range(struct ivmd_header *m) +{ + struct unity_map_entry *e = 0; + + e = kzalloc(sizeof(*e), GFP_KERNEL); + if (e == NULL) + return -ENOMEM; + + switch (m->type) { + default: + case ACPI_IVMD_TYPE: + e->devid_start = e->devid_end = m->devid; + break; + case ACPI_IVMD_TYPE_ALL: + e->devid_start = 0; + e->devid_end = amd_iommu_last_bdf; + break; + case ACPI_IVMD_TYPE_RANGE: + e->devid_start = m->devid; + e->devid_end = m->aux; + break; + } + e->address_start = PAGE_ALIGN(m->range_start); + e->address_end = e->address_start + PAGE_ALIGN(m->range_length); + e->prot = m->flags >> 1; + + list_add_tail(&e->list, &amd_iommu_unity_map); + + return 0; +} + +static int __init init_memory_definitions(struct acpi_table_header *table) +{ + u8 *p = (u8 *)table, *end = (u8 *)table; + struct ivmd_header *m; + + INIT_LIST_HEAD(&amd_iommu_unity_map); + + end += table->length; + p += IVRS_HEADER_LENGTH; + + while (p < end) { + m = (struct ivmd_header *)p; + if (m->flags & IVMD_FLAG_EXCL_RANGE) + init_exclusion_range(m); + else if (m->flags & IVMD_FLAG_UNITY_MAP) + init_unity_map_range(m); + + p += m->length; + } + + return 0; +} + -- cgit v1.2.3-18-g5258 From fe74c9cf3985e307e9734296d08a270d510e3fb7 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Thu, 26 Jun 2008 21:27:50 +0200 Subject: x86, AMD IOMMU: clue initialization code together This patch puts the AMD IOMMU ACPI table parsing and hardware initialization functions together to the main intialization routine. Signed-off-by: Joerg Roedel Cc: iommu@lists.linux-foundation.org Cc: bhavna.sarathy@amd.com Cc: Sebastian.Biemueller@amd.com Cc: robert.richter@amd.com Cc: joro@8bytes.org Signed-off-by: Ingo Molnar --- arch/x86/kernel/amd_iommu_init.c | 126 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 126 insertions(+) diff --git a/arch/x86/kernel/amd_iommu_init.c b/arch/x86/kernel/amd_iommu_init.c index 555fcc9830c..c792ddc4fec 100644 --- a/arch/x86/kernel/amd_iommu_init.c +++ b/arch/x86/kernel/amd_iommu_init.c @@ -643,3 +643,129 @@ static int __init init_memory_definitions(struct acpi_table_header *table) return 0; } +int __init amd_iommu_init(void) +{ + int i, ret = 0; + + + if (amd_iommu_disable) { + printk(KERN_INFO "AMD IOMMU disabled by kernel command line\n"); + return 0; + } + + /* + * First parse ACPI tables to find the largest Bus/Dev/Func + * we need to handle. Upon this information the shared data + * structures for the IOMMUs in the system will be allocated + */ + if (acpi_table_parse("IVRS", find_last_devid_acpi) != 0) + return -ENODEV; + + dev_table_size = TBL_SIZE(DEV_TABLE_ENTRY_SIZE); + alias_table_size = TBL_SIZE(ALIAS_TABLE_ENTRY_SIZE); + rlookup_table_size = TBL_SIZE(RLOOKUP_TABLE_ENTRY_SIZE); + + ret = -ENOMEM; + + /* Device table - directly used by all IOMMUs */ + amd_iommu_dev_table = (void *)__get_free_pages(GFP_KERNEL, + get_order(dev_table_size)); + if (amd_iommu_dev_table == NULL) + goto out; + + /* + * Alias table - map PCI Bus/Dev/Func to Bus/Dev/Func the + * IOMMU see for that device + */ + amd_iommu_alias_table = (void *)__get_free_pages(GFP_KERNEL, + get_order(alias_table_size)); + if (amd_iommu_alias_table == NULL) + goto free; + + /* IOMMU rlookup table - find the IOMMU for a specific device */ + amd_iommu_rlookup_table = (void *)__get_free_pages(GFP_KERNEL, + get_order(rlookup_table_size)); + if (amd_iommu_rlookup_table == NULL) + goto free; + + /* + * Protection Domain table - maps devices to protection domains + * This table has the same size as the rlookup_table + */ + amd_iommu_pd_table = (void *)__get_free_pages(GFP_KERNEL, + get_order(rlookup_table_size)); + if (amd_iommu_pd_table == NULL) + goto free; + + amd_iommu_pd_alloc_bitmap = (void *)__get_free_pages(GFP_KERNEL, + get_order(MAX_DOMAIN_ID/8)); + if (amd_iommu_pd_alloc_bitmap == NULL) + goto free; + + /* + * memory is allocated now; initialize the device table with all zeroes + * and let all alias entries point to itself + */ + memset(amd_iommu_dev_table, 0, dev_table_size); + for (i = 0; i < amd_iommu_last_bdf; ++i) + amd_iommu_alias_table[i] = i; + + memset(amd_iommu_pd_table, 0, rlookup_table_size); + memset(amd_iommu_pd_alloc_bitmap, 0, MAX_DOMAIN_ID / 8); + + /* + * never allocate domain 0 because its used as the non-allocated and + * error value placeholder + */ + amd_iommu_pd_alloc_bitmap[0] = 1; + + /* + * now the data structures are allocated and basically initialized + * start the real acpi table scan + */ + ret = -ENODEV; + if (acpi_table_parse("IVRS", init_iommu_all) != 0) + goto free; + + if (acpi_table_parse("IVRS", init_memory_definitions) != 0) + goto free; + + printk(KERN_INFO "AMD IOMMU: aperture size is %d MB\n", + (1 << (amd_iommu_aperture_order-20))); + + printk(KERN_INFO "AMD IOMMU: device isolation "); + if (amd_iommu_isolate) + printk("enabled\n"); + else + printk("disabled\n"); + +out: + return ret; + +free: + if (amd_iommu_pd_alloc_bitmap) + free_pages((unsigned long)amd_iommu