diff options
Diffstat (limited to 'arch')
385 files changed, 8879 insertions, 6665 deletions
diff --git a/arch/alpha/include/asm/smp.h b/arch/alpha/include/asm/smp.h index 544c69af816..547e90951ce 100644 --- a/arch/alpha/include/asm/smp.h +++ b/arch/alpha/include/asm/smp.h @@ -45,7 +45,6 @@ extern struct cpuinfo_alpha cpu_data[NR_CPUS]; #define raw_smp_processor_id() (current_thread_info()->cpu) extern int smp_num_cpus; -#define cpu_possible_map cpu_present_map extern void arch_send_call_function_single_ipi(int cpu); extern void arch_send_call_function_ipi(cpumask_t mask); diff --git a/arch/alpha/include/asm/topology.h b/arch/alpha/include/asm/topology.h index 149532e162c..b4f284c72ff 100644 --- a/arch/alpha/include/asm/topology.h +++ b/arch/alpha/include/asm/topology.h @@ -39,7 +39,24 @@ static inline cpumask_t node_to_cpumask(int node) return node_cpu_mask; } +extern struct cpumask node_to_cpumask_map[]; +/* FIXME: This is dumb, recalculating every time. But simple. */ +static const struct cpumask *cpumask_of_node(int node) +{ + int cpu; + + cpumask_clear(&node_to_cpumask_map[node]); + + for_each_online_cpu(cpu) { + if (cpu_to_node(cpu) == node) + cpumask_set_cpu(cpu, node_to_cpumask_map[node]); + } + + return &node_to_cpumask_map[node]; +} + #define pcibus_to_cpumask(bus) (cpu_online_map) +#define cpumask_of_pcibus(bus) (cpu_online_mask) #endif /* !CONFIG_NUMA */ # include <asm-generic/topology.h> diff --git a/arch/alpha/kernel/Makefile b/arch/alpha/kernel/Makefile index ac706c1d7ad..b4697759a12 100644 --- a/arch/alpha/kernel/Makefile +++ b/arch/alpha/kernel/Makefile @@ -8,7 +8,7 @@ EXTRA_CFLAGS := -Werror -Wno-sign-compare obj-y := entry.o traps.o process.o init_task.o osf_sys.o irq.o \ irq_alpha.o signal.o setup.o ptrace.o time.o \ - alpha_ksyms.o systbls.o err_common.o io.o + alpha_ksyms.o systbls.o err_common.o io.o binfmt_loader.o obj-$(CONFIG_VGA_HOSE) += console.o obj-$(CONFIG_SMP) += smp.o diff --git a/arch/alpha/kernel/binfmt_loader.c b/arch/alpha/kernel/binfmt_loader.c new file mode 100644 index 00000000000..4a0af906b00 --- /dev/null +++ b/arch/alpha/kernel/binfmt_loader.c @@ -0,0 +1,51 @@ +#include <linux/init.h> +#include <linux/fs.h> +#include <linux/file.h> +#include <linux/mm_types.h> +#include <linux/binfmts.h> +#include <linux/a.out.h> + +static int load_binary(struct linux_binprm *bprm, struct pt_regs *regs) +{ + struct exec *eh = (struct exec *)bprm->buf; + unsigned long loader; + struct file *file; + int retval; + + if (eh->fh.f_magic != 0x183 || (eh->fh.f_flags & 0x3000) != 0x3000) + return -ENOEXEC; + + if (bprm->loader) + return -ENOEXEC; + + allow_write_access(bprm->file); + fput(bprm->file); + bprm->file = NULL; + + loader = bprm->vma->vm_end - sizeof(void *); + + file = open_exec("/sbin/loader"); + retval = PTR_ERR(file); + if (IS_ERR(file)) + return retval; + + /* Remember if the application is TASO. */ + bprm->taso = eh->ah.entry < 0x100000000UL; + + bprm->file = file; + bprm->loader = loader; + retval = prepare_binprm(bprm); + if (retval < 0) + return retval; + return search_binary_handler(bprm,regs); +} + +static struct linux_binfmt loader_format = { + .load_binary = load_binary, +}; + +static int __init init_loader_binfmt(void) +{ + return register_binfmt(&loader_format); +} +arch_initcall(init_loader_binfmt); diff --git a/arch/alpha/kernel/init_task.c b/arch/alpha/kernel/init_task.c index 1f762189fa6..c2938e574a4 100644 --- a/arch/alpha/kernel/init_task.c +++ b/arch/alpha/kernel/init_task.c @@ -8,7 +8,6 @@ #include <asm/uaccess.h> -static struct fs_struct init_fs = INIT_FS; static struct signal_struct init_signals = INIT_SIGNALS(init_signals); static struct sighand_struct init_sighand = INIT_SIGHAND(init_sighand); struct mm_struct init_mm = INIT_MM(init_mm); diff --git a/arch/alpha/kernel/irq.c b/arch/alpha/kernel/irq.c index c626a821cdc..703731accda 100644 --- a/arch/alpha/kernel/irq.c +++ b/arch/alpha/kernel/irq.c @@ -50,12 +50,13 @@ int irq_select_affinity(unsigned int irq) if (!irq_desc[irq].chip->set_affinity || irq_user_affinity[irq]) return 1; - while (!cpu_possible(cpu) || !cpu_isset(cpu, irq_default_affinity)) + while (!cpu_possible(cpu) || + !cpumask_test_cpu(cpu, irq_default_affinity)) cpu = (cpu < (NR_CPUS-1) ? cpu + 1 : 0); last_cpu = cpu; irq_desc[irq].affinity = cpumask_of_cpu(cpu); - irq_desc[irq].chip->set_affinity(irq, cpumask_of_cpu(cpu)); + irq_desc[irq].chip->set_affinity(irq, cpumask_of(cpu)); return 0; } #endif /* CONFIG_SMP */ diff --git a/arch/alpha/kernel/process.c b/arch/alpha/kernel/process.c index 351407e07e7..f238370c907 100644 --- a/arch/alpha/kernel/process.c +++ b/arch/alpha/kernel/process.c @@ -94,6 +94,7 @@ common_shutdown_1(void *generic_ptr) flags |= 0x00040000UL; /* "remain halted" */ *pflags = flags; cpu_clear(cpuid, cpu_present_map); + cpu_clear(cpuid, cpu_possible_map); halt(); } #endif @@ -120,6 +121,7 @@ common_shutdown_1(void *generic_ptr) #ifdef CONFIG_SMP /* Wait for the secondaries to halt. */ cpu_clear(boot_cpuid, cpu_present_map); + cpu_clear(boot_cpuid, cpu_possible_map); while (cpus_weight(cpu_present_map)) barrier(); #endif diff --git a/arch/alpha/kernel/setup.c b/arch/alpha/kernel/setup.c index a449e999027..02bee6983ce 100644 --- a/arch/alpha/kernel/setup.c +++ b/arch/alpha/kernel/setup.c @@ -79,6 +79,11 @@ int alpha_l3_cacheshape; unsigned long alpha_verbose_mcheck = CONFIG_VERBOSE_MCHECK_ON; #endif +#ifdef CONFIG_NUMA +struct cpumask node_to_cpumask_map[MAX_NUMNODES] __read_mostly; +EXPORT_SYMBOL(node_to_cpumask_map); +#endif + /* Which processor we booted from. */ int boot_cpuid; diff --git a/arch/alpha/kernel/smp.c b/arch/alpha/kernel/smp.c index cf7da10097b..d953e510f68 100644 --- a/arch/alpha/kernel/smp.c +++ b/arch/alpha/kernel/smp.c @@ -70,11 +70,6 @@ enum ipi_message_type { /* Set to a secondary's cpuid when it comes online. */ static int smp_secondary_alive __devinitdata = 0; -/* Which cpus ids came online. */ -cpumask_t cpu_online_map; - -EXPORT_SYMBOL(cpu_online_map); - int smp_num_probed; /* Internal processor count */ int smp_num_cpus = 1; /* Number that came online. */ EXPORT_SYMBOL(smp_num_cpus); @@ -440,6 +435,7 @@ setup_smp(void) ((char *)cpubase + i*hwrpb->processor_size); if ((cpu->flags & 0x1cc) == 0x1cc) { smp_num_probed++; + cpu_set(i, cpu_possible_map); cpu_set(i, cpu_present_map); cpu->pal_revision = boot_cpu_palrev; } @@ -473,6 +469,7 @@ smp_prepare_cpus(unsigned int max_cpus) /* Nothing to do on a UP box, or when told not to. */ if (smp_num_probed == 1 || max_cpus == 0) { + cpu_possible_map = cpumask_of_cpu(boot_cpuid); cpu_present_map = cpumask_of_cpu(boot_cpuid); printk(KERN_INFO "SMP mode deactivated.\n"); return; diff --git a/arch/alpha/kernel/sys_dp264.c b/arch/alpha/kernel/sys_dp264.c index c71b0fd7a61..ab44c164d9d 100644 --- a/arch/alpha/kernel/sys_dp264.c +++ b/arch/alpha/kernel/sys_dp264.c @@ -177,19 +177,19 @@ cpu_set_irq_affinity(unsigned int irq, cpumask_t affinity) } static void -dp264_set_affinity(unsigned int irq, cpumask_t affinity) +dp264_set_affinity(unsigned int irq, const struct cpumask *affinity) { spin_lock(&dp264_irq_lock); - cpu_set_irq_affinity(irq, affinity); + cpu_set_irq_affinity(irq, *affinity); tsunami_update_irq_hw(cached_irq_mask); spin_unlock(&dp264_irq_lock); } static void -clipper_set_affinity(unsigned int irq, cpumask_t affinity) +clipper_set_affinity(unsigned int irq, const struct cpumask *affinity) { spin_lock(&dp264_irq_lock); - cpu_set_irq_affinity(irq - 16, affinity); + cpu_set_irq_affinity(irq - 16, *affinity); tsunami_update_irq_hw(cached_irq_mask); spin_unlock(&dp264_irq_lock); } diff --git a/arch/alpha/kernel/sys_titan.c b/arch/alpha/kernel/sys_titan.c index 52c91ccc164..27f840a4ad3 100644 --- a/arch/alpha/kernel/sys_titan.c +++ b/arch/alpha/kernel/sys_titan.c @@ -158,10 +158,10 @@ titan_cpu_set_irq_affinity(unsigned int irq, cpumask_t affinity) } static void -titan_set_irq_affinity(unsigned int irq, cpumask_t affinity) +titan_set_irq_affinity(unsigned int irq, const struct cpumask *affinity) { spin_lock(&titan_irq_lock); - titan_cpu_set_irq_affinity(irq - 16, affinity); + titan_cpu_set_irq_affinity(irq - 16, *affinity); titan_update_irq_hw(titan_cached_irq_mask); spin_unlock(&titan_irq_lock); } diff --git a/arch/arm/common/gic.c b/arch/arm/common/gic.c index 7fc9860a97d..c6884ba1d5e 100644 --- a/arch/arm/common/gic.c +++ b/arch/arm/common/gic.c @@ -109,11 +109,11 @@ static void gic_unmask_irq(unsigned int irq) } #ifdef CONFIG_SMP -static void gic_set_cpu(unsigned int irq, cpumask_t mask_val) +static void gic_set_cpu(unsigned int irq, const struct cpumask *mask_val) { void __iomem *reg = gic_dist_base(irq) + GIC_DIST_TARGET + (gic_irq(irq) & ~3); unsigned int shift = (irq % 4) * 8; - unsigned int cpu = first_cpu(mask_val); + unsigned int cpu = cpumask_first(mask_val); u32 val; spin_lock(&irq_controller_lock); diff --git a/arch/arm/kernel/init_task.c b/arch/arm/kernel/init_task.c index 0bbf8062539..e859af34946 100644 --- a/arch/arm/kernel/init_task.c +++ b/arch/arm/kernel/init_task.c @@ -12,7 +12,6 @@ #include <asm/pgtable.h> -static struct fs_struct init_fs = INIT_FS; static struct signal_struct init_signals = INIT_SIGNALS(init_signals); static struct sighand_struct init_sighand = INIT_SIGHAND(init_sighand); struct mm_struct init_mm = INIT_MM(init_mm); diff --git a/arch/arm/kernel/irq.c b/arch/arm/kernel/irq.c index 2f3eb795fa6..7141cee1fab 100644 --- a/arch/arm/kernel/irq.c +++ b/arch/arm/kernel/irq.c @@ -174,7 +174,7 @@ static void route_irq(struct irq_desc *desc, unsigned int irq, unsigned int cpu) pr_debug("IRQ%u: moving from cpu%u to cpu%u\n", irq, desc->cpu, cpu); spin_lock_irq(&desc->lock); - desc->chip->set_affinity(irq, cpumask_of_cpu(cpu)); + desc->chip->set_affinity(irq, cpumask_of(cpu)); spin_unlock_irq(&desc->lock); } diff --git a/arch/arm/kernel/smp.c b/arch/arm/kernel/smp.c index 019237d2162..55fa7ff96a3 100644 --- a/arch/arm/kernel/smp.c +++ b/arch/arm/kernel/smp.c @@ -34,16 +34,6 @@ #include <asm/ptrace.h> /* - * bitmask of present and online CPUs. - * The present bitmask indicates that the CPU is physically present. - * The online bitmask indicates that the CPU is up and running. - */ -cpumask_t cpu_possible_map; -EXPORT_SYMBOL(cpu_possible_map); -cpumask_t cpu_online_map; -EXPORT_SYMBOL(cpu_online_map); - -/* * as from 2.5, kernels no longer have an init_tasks structure * so we need some other way of telling a new secondary core * where to place its SVC stack diff --git a/arch/arm/mach-at91/at91rm9200_time.c b/arch/arm/mach-at91/at91rm9200_time.c index d140eae53de..1ff1bda0a89 100644 --- a/arch/arm/mach-at91/at91rm9200_time.c +++ b/arch/arm/mach-at91/at91rm9200_time.c @@ -178,7 +178,6 @@ static struct clock_event_device clkevt = { .features = CLOCK_EVT_FEAT_PERIODIC | CLOCK_EVT_FEAT_ONESHOT, .shift = 32, .rating = 150, - .cpumask = CPU_MASK_CPU0, .set_next_event = clkevt32k_next_event, .set_mode = clkevt32k_mode, }; @@ -206,7 +205,7 @@ void __init at91rm9200_timer_init(void) clkevt.mult = div_sc(AT91_SLOW_CLOCK, NSEC_PER_SEC, clkevt.shift); clkevt.max_delta_ns = clockevent_delta2ns(AT91_ST_ALMV, &clkevt); clkevt.min_delta_ns = clockevent_delta2ns(2, &clkevt) + 1; - clkevt.cpumask = cpumask_of_cpu(0); + clkevt.cpumask = cpumask_of(0); clockevents_register_device(&clkevt); /* register clocksource */ diff --git a/arch/arm/mach-at91/at91sam926x_time.c b/arch/arm/mach-at91/at91sam926x_time.c index 122fd77ed58..b63e1d5f1ba 100644 --- a/arch/arm/mach-at91/at91sam926x_time.c +++ b/arch/arm/mach-at91/at91sam926x_time.c @@ -91,7 +91,6 @@ static struct clock_event_device pit_clkevt = { .features = CLOCK_EVT_FEAT_PERIODIC, .shift = 32, .rating = 100, - .cpumask = CPU_MASK_CPU0, .set_mode = pit_clkevt_mode, }; @@ -173,6 +172,7 @@ static void __init at91sam926x_pit_init(void) /* Set up and register clockevents */ pit_clkevt.mult = div_sc(pit_rate, NSEC_PER_SEC, pit_clkevt.shift); + pit_clkevt.cpumask = cpumask_of(0); clockevents_register_device(&pit_clkevt); } diff --git a/arch/arm/mach-davinci/time.c b/arch/arm/mach-davinci/time.c index 3b9a296b5c4..f8bcd29d17a 100644 --- a/arch/arm/mach-davinci/time.c +++ b/arch/arm/mach-davinci/time.c @@ -322,7 +322,7 @@ static void __init davinci_timer_init(void) clockevent_davinci.min_delta_ns = clockevent_delta2ns(1, &clockevent_davinci); - clockevent_davinci.cpumask = cpumask_of_cpu(0); + clockevent_davinci.cpumask = cpumask_of(0); clockevents_register_device(&clockevent_davinci); } diff --git a/arch/arm/mach-imx/time.c b/arch/arm/mach-imx/time.c index a11765f5f23..aff0ebcfa84 100644 --- a/arch/arm/mach-imx/time.c +++ b/arch/arm/mach-imx/time.c @@ -184,7 +184,7 @@ static int __init imx_clockevent_init(unsigned long rate) clockevent_imx.min_delta_ns = clockevent_delta2ns(0xf, &clockevent_imx); - clockevent_imx.cpumask = cpumask_of_cpu(0); + clockevent_imx.cpumask = cpumask_of(0); clockevents_register_device(&clockevent_imx); diff --git a/arch/arm/mach-ixp4xx/common.c b/arch/arm/mach-ixp4xx/common.c index 7766f469456..f4656d2ac8a 100644 --- a/arch/arm/mach-ixp4xx/common.c +++ b/arch/arm/mach-ixp4xx/common.c @@ -487,7 +487,7 @@ static int __init ixp4xx_clockevent_init(void) clockevent_delta2ns(0xfffffffe, &clockevent_ixp4xx); clockevent_ixp4xx.min_delta_ns = clockevent_delta2ns(0xf, &clockevent_ixp4xx); - clockevent_ixp4xx.cpumask = cpumask_of_cpu(0); + clockevent_ixp4xx.cpumask = cpumask_of(0); clockevents_register_device(&clockevent_ixp4xx); return 0; diff --git a/arch/arm/mach-msm/timer.c b/arch/arm/mach-msm/timer.c index 345a14cb73c..444d9c0f5ca 100644 --- a/arch/arm/mach-msm/timer.c +++ b/arch/arm/mach-msm/timer.c @@ -182,7 +182,7 @@ static void __init msm_timer_init(void) clockevent_delta2ns(0xf0000000 >> clock->shift, ce); /* 4 gets rounded down to 3 */ ce->min_delta_ns = clockevent_delta2ns(4, ce); - ce->cpumask = cpumask_of_cpu(0); + ce->cpumask = cpumask_of(0); cs->mult = clocksource_hz2mult(clock->freq, cs->shift); res = clocksource_register(cs); diff --git a/arch/arm/mach-ns9xxx/time-ns9360.c b/arch/arm/mach-ns9xxx/time-ns9360.c index a63424d083d..41df6972176 100644 --- a/arch/arm/mach-ns9xxx/time-ns9360.c +++ b/arch/arm/mach-ns9xxx/time-ns9360.c @@ -173,7 +173,7 @@ static void __init ns9360_timer_init(void) ns9360_clockevent_device.min_delta_ns = clockevent_delta2ns(1, &ns9360_clockevent_device); - ns9360_clockevent_device.cpumask = cpumask_of_cpu(0); + ns9360_clockevent_device.cpumask = cpumask_of(0); clockevents_register_device(&ns9360_clockevent_device); setup_irq(IRQ_NS9360_TIMER0 + TIMER_CLOCKEVENT, diff --git a/arch/arm/mach-omap1/time.c b/arch/arm/mach-omap1/time.c index 2cf7e32bd29..495a32c287b 100644 --- a/arch/arm/mach-omap1/time.c +++ b/arch/arm/mach-omap1/time.c @@ -173,7 +173,7 @@ static __init void omap_init_mpu_timer(unsigned long rate) clockevent_mpu_timer1.min_delta_ns = clockevent_delta2ns(1, &clockevent_mpu_timer1); - clockevent_mpu_timer1.cpumask = cpumask_of_cpu(0); + clockevent_mpu_timer1.cpumask = cpumask_of(0); clockevents_register_device(&clockevent_mpu_timer1); } diff --git a/arch/arm/mach-omap1/timer32k.c b/arch/arm/mach-omap1/timer32k.c index 705367ece17..fd3f7396e16 100644 --- a/arch/arm/mach-omap1/timer32k.c +++ b/arch/arm/mach-omap1/timer32k.c @@ -187,7 +187,7 @@ static __init void omap_init_32k_timer(void) clockevent_32k_timer.min_delta_ns = clockevent_delta2ns(1, &clockevent_32k_timer); - clockevent_32k_timer.cpumask = cpumask_of_cpu(0); + clockevent_32k_timer.cpumask = cpumask_of(0); clockevents_register_device(&clockevent_32k_timer); } diff --git a/arch/arm/mach-omap2/clock24xx.h b/arch/arm/mach-omap2/clock24xx.h index ff6cd14d254..ad6d98d177c 100644 --- a/arch/arm/mach-omap2/clock24xx.h +++ b/arch/arm/mach-omap2/clock24xx.h @@ -2321,7 +2321,7 @@ static struct clk i2c2_fck = { }; static struct clk i2chs2_fck = { - .name = "i2chs_fck", + .name = "i2c_fck", .id = 2, .parent = &func_96m_ck, .flags = CLOCK_IN_OMAP243X, @@ -2354,7 +2354,7 @@ static struct clk i2c1_fck = { }; static struct clk i2chs1_fck = { - .name = "i2chs_fck", + .name = "i2c_fck", .id = 1, .parent = &func_96m_ck, .flags = CLOCK_IN_OMAP243X, diff --git a/arch/arm/mach-omap2/timer-gp.c b/arch/arm/mach-omap2/timer-gp.c index 589393bedad..ae6036300f6 100644 --- a/arch/arm/mach-omap2/timer-gp.c +++ b/arch/arm/mach-omap2/timer-gp.c @@ -120,7 +120,7 @@ static void __init omap2_gp_clockevent_init(void) clockevent_gpt.min_delta_ns = clockevent_delta2ns(1, &clockevent_gpt); - clockevent_gpt.cpumask = cpumask_of_cpu(0); + clockevent_gpt.cpumask = cpumask_of(0); clockevents_register_device(&clockevent_gpt); } diff --git a/arch/arm/mach-pxa/time.c b/arch/arm/mach-pxa/time.c index 00162415851..95656a72268 100644 --- a/arch/arm/mach-pxa/time.c +++ b/arch/arm/mach-pxa/time.c @@ -122,7 +122,6 @@ static struct clock_event_device ckevt_pxa_osmr0 = { .features = CLOCK_EVT_FEAT_ONESHOT, .shift = 32, .rating = 200, - .cpumask = CPU_MASK_CPU0, .set_next_event = pxa_osmr0_set_next_event, .set_mode = pxa_osmr0_set_mode, }; @@ -163,6 +162,7 @@ static void __init pxa_timer_init(void) clockevent_delta2ns(0x7fffffff, &ckevt_pxa_osmr0); ckevt_pxa_osmr0.min_delta_ns = clockevent_delta2ns(MIN_OSCR_DELTA * 2, &ckevt_pxa_osmr0) + 1; + ckevt_pxa_osmr0.cpumask = cpumask_of(0); cksrc_pxa_oscr0.mult = clocksource_hz2mult(clock_tick_rate, cksrc_pxa_oscr0.shift); diff --git a/arch/arm/mach-realview/core.c b/arch/arm/mach-realview/core.c index 5f1d55963ce..bd2aa4f1614 100644 --- a/arch/arm/mach-realview/core.c +++ b/arch/arm/mach-realview/core.c @@ -624,7 +624,7 @@ static struct clock_event_device timer0_clockevent = { .set_mode = timer_set_mode, .set_next_event = timer_set_next_event, .rating = 300, - .cpumask = CPU_MASK_ALL, + .cpumask = cpu_all_mask, }; static void __init realview_clockevents_init(unsigned int timer_irq) diff --git a/arch/arm/mach-realview/localtimer.c b/arch/arm/mach-realview/localtimer.c index 9019ef2e561..67d6d9cc68b 100644 --- a/arch/arm/mach-realview/localtimer.c +++ b/arch/arm/mach-realview/localtimer.c @@ -154,7 +154,7 @@ void __cpuinit local_timer_setup(void) clk->set_mode = local_timer_set_mode; clk->set_next_event = local_timer_set_next_event; clk->irq = IRQ_LOCALTIMER; - clk->cpumask = cpumask_of_cpu(cpu); + clk->cpumask = cpumask_of(cpu); clk->shift = 20; clk->mult = div_sc(mpcore_timer_rate, NSEC_PER_SEC, clk->shift); clk->max_delta_ns = clockevent_delta2ns(0xffffffff, clk); @@ -193,7 +193,7 @@ void __cpuinit local_timer_setup(void) clk->rating = 200; clk->set_mode = dummy_timer_set_mode; clk->broadcast = smp_timer_broadcast; - clk->cpumask = cpumask_of_cpu(cpu); + clk->cpumask = cpumask_of(cpu); clockevents_register_device(clk); } diff --git a/arch/arm/mach-sa1100/time.c b/arch/arm/mach-sa1100/time.c index 8c5e727f3b7..711c0295c66 100644 --- a/arch/arm/mach-sa1100/time.c +++ b/arch/arm/mach-sa1100/time.c @@ -73,7 +73,6 @@ static struct clock_event_device ckevt_sa1100_osmr0 = { .features = CLOCK_EVT_FEAT_ONESHOT, .shift = 32, .rating = 200, - .cpumask = CPU_MASK_CPU0, .set_next_event = sa1100_osmr0_set_next_event, .set_mode = sa1100_osmr0_set_mode, }; @@ -110,6 +109,7 @@ static void __init sa1100_timer_init(void) clockevent_delta2ns(0x7fffffff, &ckevt_sa1100_osmr0); ckevt_sa1100_osmr0.min_delta_ns = clockevent_delta2ns(MIN_OSCR_DELTA * 2, &ckevt_sa1100_osmr0) + 1; + ckevt_sa1100_osmr0.cpumask = cpumask_of(0); cksrc_sa1100_oscr.mult = clocksource_hz2mult(CLOCK_TICK_RATE, cksrc_sa1100_oscr.shift); diff --git a/arch/arm/mach-versatile/core.c b/arch/arm/mach-versatile/core.c index df25aa13850..1c43494f5c4 100644 --- a/arch/arm/mach-versatile/core.c +++ b/arch/arm/mach-versatile/core.c @@ -1005,7 +1005,7 @@ static void __init versatile_timer_init(void) timer0_clockevent.min_delta_ns = clockevent_delta2ns(0xf, &timer0_clockevent); - timer0_clockevent.cpumask = cpumask_of_cpu(0); + timer0_clockevent.cpumask = cpumask_of(0); clockevents_register_device(&timer0_clockevent); } diff --git a/arch/arm/oprofile/op_model_mpcore.c b/arch/arm/oprofile/op_model_mpcore.c index 4de366e8b4c..6d6bd589924 100644 --- a/arch/arm/oprofile/op_model_mpcore.c +++ b/arch/arm/oprofile/op_model_mpcore.c @@ -260,10 +260,10 @@ static void em_stop(void) static void em_route_irq(int irq, unsigned int cpu) { struct irq_desc *desc = irq_desc + irq; - cpumask_t mask = cpumask_of_cpu(cpu); + const struct cpumask *mask = cpumask_of(cpu); spin_lock_irq(&desc->lock); - desc->affinity = mask; + desc->affinity = *mask; desc->chip->set_affinity(irq, mask); spin_unlock_irq(&desc->lock); } diff --git a/arch/arm/plat-mxc/time.c b/arch/arm/plat-mxc/time.c index fd28f5194f7..758a1293bcf 100644 --- a/arch/arm/plat-mxc/time.c +++ b/arch/arm/plat-mxc/time.c @@ -190,7 +190,7 @@ static int __init mxc_clockevent_init(void) clockevent_mxc.min_delta_ns = clockevent_delta2ns(0xff, &clockevent_mxc); - clockevent_mxc.cpumask = cpumask_of_cpu(0); + clockevent_mxc.cpumask = cpumask_of(0); clockevents_register_device(&clockevent_mxc); diff --git a/arch/arm/plat-orion/time.c b/arch/arm/plat-orion/time.c index 544d6b327f3..6fa2923e6dc 100644 --- a/arch/arm/plat-orion/time.c +++ b/arch/arm/plat-orion/time.c @@ -149,7 +149,6 @@ static struct clock_event_device orion_clkevt = { .features = CLOCK_EVT_FEAT_ONESHOT | CLOCK_EVT_FEAT_PERIODIC, .shift = 32, .rating = 300, - .cpumask = CPU_MASK_CPU0, .set_next_event = orion_clkevt_next_event, .set_mode = orion_clkevt_mode, }; @@ -199,5 +198,6 @@ void __init orion_time_init(unsigned int irq, unsigned int tclk) orion_clkevt.mult = div_sc(tclk, NSEC_PER_SEC, orion_clkevt.shift); orion_clkevt.max_delta_ns = clockevent_delta2ns(0xfffffffe, &orion_clkevt); orion_clkevt.min_delta_ns = clockevent_delta2ns(1, &orion_clkevt); + orion_clkevt.cpumask = cpumask_of(0); clockevents_register_device(&orion_clkevt); } diff --git a/arch/avr32/include/asm/bitops.h b/arch/avr32/include/asm/bitops.h index 1a50b69b1a1..f7dd5f71edf 100644 --- a/arch/avr32/include/asm/bitops.h +++ b/arch/avr32/include/asm/bitops.h @@ -263,6 +263,11 @@ static inline int fls(unsigned long word) return 32 - result; } +static inline int __fls(unsigned long word) +{ + return fls(word) - 1; +} + unsigned long find_first_zero_bit(const unsigned long *addr, unsigned long size); unsigned long find_next_zero_bit(const unsigned long *addr, diff --git a/arch/avr32/kernel/init_task.c b/arch/avr32/kernel/init_task.c index 44058469c6e..993d56ee3cf 100644 --- a/arch/avr32/kernel/init_task.c +++ b/arch/avr32/kernel/init_task.c @@ -13,7 +13,6 @@ #include <asm/pgtable.h> -static struct fs_struct init_fs = INIT_FS; static struct signal_struct init_signals = INIT_SIGNALS(init_signals); static struct sighand_struct init_sighand = INIT_SIGHAND(init_sighand); struct mm_struct init_mm = INIT_MM(init_mm); diff --git a/arch/avr32/kernel/time.c b/arch/avr32/kernel/time.c index 283481d74a5..0ff46bf873b 100644 --- a/arch/avr32/kernel/time.c +++ b/arch/avr32/kernel/time.c @@ -106,7 +106,6 @@ static struct clock_event_device comparator = { .features = CLOCK_EVT_FEAT_ONESHOT, .shift = 16, .rating = 50, - .cpumask = CPU_MASK_CPU0, .set_next_event = comparator_next_event, .set_mode = comparator_mode, }; @@ -134,6 +133,7 @@ void __init time_init(void) comparator.mult = div_sc(counter_hz, NSEC_PER_SEC, comparator.shift); comparator.max_delta_ns = clockevent_delta2ns((u32)~0, &comparator); comparator.min_delta_ns = clockevent_delta2ns(50, &comparator) + 1; + comparator.cpumask = cpumask_of(0); sysreg_write(COMPARE, 0); timer_irqaction.dev_id = &comparator; diff --git a/arch/blackfin/include/asm/bitops.h b/arch/blackfin/include/asm/bitops.h index b39a175c79c..c428e4106f8 100644 --- a/arch/blackfin/include/asm/bitops.h +++ b/arch/blackfin/include/asm/bitops.h @@ -213,6 +213,7 @@ static __inline__ int __test_bit(int nr, const void *addr) #endif /* __KERNEL__ */ #include <asm-generic/bitops/fls.h> +#include <asm-generic/bitops/__fls.h> #include <asm-generic/bitops/fls64.h> #endif /* _BLACKFIN_BITOPS_H */ diff --git a/arch/blackfin/kernel/init_task.c b/arch/blackfin/kernel/init_task.c index 6bdba7b2110..2c228c02097 100644 --- a/arch/blackfin/kernel/init_task.c +++ b/arch/blackfin/kernel/init_task.c @@ -33,7 +33,6 @@ #include <linux/mqueue.h> #include <linux/fs.h> -static struct fs_struct init_fs = INIT_FS; static struct signal_struct init_signals = INIT_SIGNALS(init_signals); static struct sighand_struct init_sighand = INIT_SIGHAND(init_sighand); diff --git a/arch/blackfin/kernel/time-ts.c b/arch/blackfin/kernel/time-ts.c index e887efc86c2..0ed2badfd74 100644 --- a/arch/blackfin/kernel/time-ts.c +++ b/arch/blackfin/kernel/time-ts.c @@ -162,7 +162,6 @@ static struct clock_event_device clockevent_bfin = { .name = "bfin_core_timer", .features = CLOCK_EVT_FEAT_PERIODIC | CLOCK_EVT_FEAT_ONESHOT, .shift = 32, - .cpumask = CPU_MASK_CPU0, .set_next_event = bfin_timer_set_next_event, .set_mode = bfin_timer_set_mode, }; @@ -193,6 +192,7 @@ static int __init bfin_clockevent_init(void) clockevent_bfin.mult = div_sc(timer_clk, NSEC_PER_SEC, clockevent_bfin.shift); clockevent_bfin.max_delta_ns = clockevent_delta2ns(-1, &clockevent_bfin); clockevent_bfin.min_delta_ns = clockevent_delta2ns(100, &clockevent_bfin); + clockevent_bfin.cpumask = cpumask_of(0); clockevents_register_device(&clockevent_bfin); return 0; diff --git a/arch/cris/arch-v32/kernel/irq.c b/arch/cris/arch-v32/kernel/irq.c index 173c141ac9b..295131fee71 100644 --- a/arch/cris/arch-v32/kernel/irq.c +++ b/arch/cris/arch-v32/kernel/irq.c @@ -325,11 +325,11 @@ static void end_crisv32_irq(unsigned int irq) { } -void set_affinity_crisv32_irq(unsigned int irq, cpumask_t dest) +void set_affinity_crisv32_irq(unsigned int irq, const struct cpumask *dest) { unsigned long flags; spin_lock_irqsave(&irq_lock, flags); - irq_allocations[irq - FIRST_IRQ].mask = dest; + irq_allocations[irq - FIRST_IRQ].mask = *dest; spin_unlock_irqrestore(&irq_lock, flags); } diff --git a/arch/cris/arch-v32/kernel/smp.c b/arch/cris/arch-v32/kernel/smp.c index 52e16c6436f..9dac1733464 100644 --- a/arch/cris/arch-v32/kernel/smp.c +++ b/arch/cris/arch-v32/kernel/smp.c @@ -29,11 +29,7 @@ spinlock_t cris_atomic_locks[] = { [0 ... LOCK_COUNT - 1] = SPIN_LOCK_UNLOCKED}; /* CPU masks */ -cpumask_t cpu_online_map = CPU_MASK_NONE; -EXPORT_SYMBOL(cpu_online_map); cpumask_t phys_cpu_present_map = CPU_MASK_NONE; -cpumask_t cpu_possible_map; -EXPORT_SYMBOL(cpu_possible_map); EXPORT_SYMBOL(phys_cpu_present_map); /* Variables used during SMP boot */ diff --git a/arch/cris/include/asm/bitops.h b/arch/cris/include/asm/bitops.h index c0e62f811e0..9e69cfb7f13 100644 --- a/arch/cris/include/asm/bitops.h +++ b/arch/cris/include/asm/bitops.h @@ -148,6 +148,7 @@ static inline int test_and_change_bit(int nr, volatile unsigned long *addr) #define ffs kernel_ffs #include <asm-generic/bitops/fls.h> +#include <asm-generic/bitops/__fls.h> #include <asm-generic/bitops/fls64.h> #include <asm-generic/bitops/hweight.h> #include <asm-generic/bitops/find.h> diff --git a/arch/cris/include/asm/smp.h b/arch/cris/include/asm/smp.h index dba33aba3e9..c615a06dd75 100644 --- a/arch/cris/include/asm/smp.h +++ b/arch/cris/include/asm/smp.h @@ -4,7 +4,6 @@ #include <linux/cpumask.h> extern cpumask_t phys_cpu_present_map; -extern cpumask_t cpu_possible_map; #define raw_smp_processor_id() (current_thread_info()->cpu) diff --git a/arch/cris/kernel/process.c b/arch/cris/kernel/process.c index 5933656db5a..60816e87645 100644 --- a/arch/cris/kernel/process.c +++ b/arch/cris/kernel/process.c @@ -37,7 +37,6 @@ * setup. */ -static struct fs_struct init_fs = INIT_FS; static struct signal_struct init_signals = INIT_SIGNALS(init_signals); static struct sighand_struct init_sighand = INIT_SIGHAND(init_sighand); struct mm_struct init_mm = INIT_MM(init_mm); diff --git a/arch/frv/kernel/init_task.c b/arch/frv/kernel/init_task.c index e2198815b63..29429a8b7f6 100644 --- a/arch/frv/kernel/init_task.c +++ b/arch/frv/kernel/init_task.c @@ -10,7 +10,6 @@ #include <asm/pgtable.h> -static struct fs_struct init_fs = INIT_FS; static struct signal_struct init_signals = INIT_SIGNALS(init_signals); static struct sighand_struct init_sighand = INIT_SIGHAND(init_sighand); struct mm_struct init_mm = INIT_MM(init_mm); diff --git a/arch/h8300/include/asm/bitops.h b/arch/h8300/include/asm/bitops.h index cb18e3b0aa9..cb9ddf5fc54 100644 --- a/arch/h8300/include/asm/bitops.h +++ b/arch/h8300/include/asm/bitops.h @@ -207,6 +207,7 @@ static __inline__ unsigned long __ffs(unsigned long word) #endif /* __KERNEL__ */ #include <asm-generic/bitops/fls.h> +#include <asm-generic/bitops/__fls.h> #include <asm-generic/bitops/fls64.h> #endif /* _H8300_BITOPS_H */ diff --git a/arch/h8300/kernel/init_task.c b/arch/h8300/kernel/init_task.c index 93a4899e46c..cb5dc552da9 100644 --- a/arch/h8300/kernel/init_task.c +++ b/arch/h8300/kernel/init_task.c @@ -12,7 +12,6 @@ #include <asm/uaccess.h> #include <asm/pgtable.h> -static struct fs_struct init_fs = INIT_FS; static struct signal_struct init_signals = INIT_SIGNALS(init_signals); static struct sighand_struct init_sighand = INIT_SIGHAND(init_sighand); struct mm_struct init_mm = INIT_MM(init_mm); diff --git a/arch/ia64/Kconfig b/arch/ia64/Kconfig index 7fa8f615ba6..3d31636cbaf 100644 --- a/arch/ia64/Kconfig +++ b/arch/ia64/Kconfig @@ -687,3 +687,6 @@ config IRQ_PER_CPU config IOMMU_HELPER def_bool (IA64_HP_ZX1 || IA64_HP_ZX1_SWIOTLB || IA64_GENERIC || SWIOTLB) + +config IOMMU_API + def_bool (DMAR) diff --git a/arch/ia64/hp/sim/hpsim_irq.c b/arch/ia64/hp/sim/hpsim_irq.c index c2f58ff364e..cc0a3182db3 100644 --- a/arch/ia64/hp/sim/hpsim_irq.c +++ b/arch/ia64/hp/sim/hpsim_irq.c @@ -22,7 +22,7 @@ hpsim_irq_noop (unsigned int irq) } static void -hpsim_set_affinity_noop (unsigned int a, cpumask_t b) +hpsim_set_affinity_noop(unsigned int a, const struct cpumask *b) { } diff --git a/arch/ia64/include/asm/irq.h b/arch/ia64/include/asm/irq.h index 3627116fb0e..36429a53263 100644 --- a/arch/ia64/include/asm/irq.h +++ b/arch/ia64/include/asm/irq.h @@ -27,7 +27,7 @@ irq_canonicalize (int irq) } extern void set_irq_affinity_info (unsigned int irq, int dest, int redir); -bool is_affinity_mask_valid(cpumask_t cpumask); +bool is_affinity_mask_valid(cpumask_var_t cpumask); #define is_affinity_mask_valid is_affinity_mask_valid diff --git a/arch/ia64/include/asm/kvm.h b/arch/ia64/include/asm/kvm.h index f38472ac226..68aa6da807c 100644 --- a/arch/ia64/include/asm/kvm.h +++ b/arch/ia64/include/asm/kvm.h @@ -166,8 +166,6 @@ struct saved_vpd { }; struct kvm_regs { - char *saved_guest; - char *saved_stack; struct saved_vpd vpd; /*Arch-regs*/ int mp_state; @@ -200,6 +198,10 @@ struct kvm_regs { unsigned long fp_psr; /*used for lazy float register */ unsigned long saved_gp; /*for phycial emulation */ + + union context saved_guest; + + unsigned long reserved[64]; /* for future use */ }; struct kvm_sregs { diff --git a/arch/ia64/include/asm/kvm_host.h b/arch/ia64/include/asm/kvm_host.h index c60d324da54..34866366165 100644 --- a/arch/ia64/include/asm/kvm_host.h +++ b/arch/ia64/include/asm/kvm_host.h @@ -23,17 +23,6 @@ #ifndef __ASM_KVM_HOST_H #define __ASM_KVM_HOST_H - -#include <linux/types.h> -#include <linux/mm.h> -#include <linux/kvm.h> -#include <linux/kvm_para.h> -#include <linux/kvm_types.h> - -#include <asm/pal.h> -#include <asm/sal.h> - -#define KVM_MAX_VCPUS 4 #define KVM_MEMORY_SLOTS 32 /* memory slots that does not exposed to userspace */ #define KVM_PRIVATE_MEM_SLOTS 4 @@ -50,70 +39,132 @@ #define EXIT_REASON_EXTERNAL_INTERRUPT 6 #define EXIT_REASON_IPI 7 #define EXIT_REASON_PTC_G 8 +#define EXIT_REASON_DEBUG 20 /*Define vmm address space and vm data space.*/ -#define KVM_VMM_SIZE (16UL<<20) +#define KVM_VMM_SIZE (__IA64_UL_CONST(16)<<20) #define KVM_VMM_SHIFT 24 -#define KVM_VMM_BASE 0xD000000000000000UL -#define VMM_SIZE (8UL<<20) +#define KVM_VMM_BASE 0xD000000000000000 +#define VMM_SIZE (__IA64_UL_CONST(8)<<20) /* * Define vm_buffer, used by PAL Services, base address. - * Note: vmbuffer is in the VMM-BLOCK, the size must be < 8M + * Note: vm_buffer is in the VMM-BLOCK, the size must be < 8M */ #define KVM_VM_BUFFER_BASE (KVM_VMM_BASE + VMM_SIZE) -#define KVM_VM_BUFFER_SIZE (8UL<<20) - -/*Define Virtual machine data layout.*/ -#define KVM_VM_DATA_SHIFT 24 -#define KVM_VM_DATA_SIZE (1UL << KVM_VM_DATA_SHIFT) -#define KVM_VM_DATA_BASE (KVM_VMM_BASE + KVM_VMM_SIZE) - - -#define KVM_P2M_BASE KVM_VM_DATA_BASE -#define KVM_P2M_OFS 0 -#define KVM_P2M_SIZE (8UL << 20) - -#define KVM_VHPT_BASE (KVM_P2M_BASE + KVM_P2M_SIZE) -#define KVM_VHPT_OFS KVM_P2M_SIZE -#define KVM_VHPT_BLOCK_SIZE (2UL << 20) -#define VHPT_SHIFT 18 -#define VHPT_SIZE (1UL << VHPT_SHIFT) -#define VHPT_NUM_ENTRIES (1<<(VHPT_SHIFT-5)) - -#define KVM_VTLB_BASE (KVM_VHPT_BASE+KVM_VHPT_BLOCK_SIZE) -#define KVM_VTLB_OFS (KVM_VHPT_OFS+KVM_VHPT_BLOCK_SIZE) -#define KVM_VTLB_BLOCK_SIZE (1UL<<20) -#define VTLB_SHIFT 17 -#define VTLB_SIZE (1UL<<VTLB_SHIFT) -#define VTLB_NUM_ENTRIES (1<<(VTLB_SHIFT-5)) - -#define KVM_VPD_BASE (KVM_VTLB_BASE+KVM_VTLB_BLOCK_SIZE) -#define KVM_VPD_OFS (KVM_VTLB_OFS+KVM_VTLB_BLOCK_SIZE) -#define KVM_VPD_BLOCK_SIZE (2UL<<20) -#define VPD_SHIFT 16 -#define VPD_SIZE (1UL<<VPD_SHIFT) - -#define KVM_VCPU_BASE (KVM_VPD_BASE+KVM_VPD_BLOCK_SIZE) -#define KVM_VCPU_OFS (KVM_VPD_OFS+KVM_VPD_BLOCK_SIZE) -#define KVM_VCPU_BLOCK_SIZE (2UL<<20) -#define VCPU_SHIFT 18 -#define VCPU_SIZE (1UL<<VCPU_SHIFT) -#define MAX_VCPU_NUM KVM_VCPU_BLOCK_SIZE/VCPU_SIZE - -#define KVM_VM_BASE (KVM_VCPU_BASE+KVM_VCPU_BLOCK_SIZE) -#define KVM_VM_OFS (KVM_VCPU_OFS+KVM_VCPU_BLOCK_SIZE) -#define KVM_VM_BLOCK_SIZE (1UL<<19) - -#define KVM_MEM_DIRTY_LOG_BASE (KVM_VM_BASE+KVM_VM_BLOCK_SIZE) -#define KVM_MEM_DIRTY_LOG_OFS (KVM_VM_OFS+KVM_VM_BLOCK_SIZE) -#define KVM_MEM_DIRTY_LOG_SIZE (1UL<<19) - -/* Get vpd, vhpt, tlb, vcpu, base*/ -#define VPD_ADDR(n) (KVM_VPD_BASE+n*VPD_SIZE) -#define VHPT_ADDR(n) (KVM_VHPT_BASE+n*VHPT_SIZE) -#define VTLB_ADDR(n) (KVM_VTLB_BASE+n*VTLB_SIZE) -#define VCPU_ADDR(n) (KVM_VCPU_BASE+n*VCPU_SIZE) +#define KVM_VM_BUFFER_SIZE (__IA64_UL_CONST(8)<<20) + +/* + * kvm guest's data area looks as follow: + * + * +----------------------+ ------- KVM_VM_DATA_SIZE + * | vcpu[n]'s data | | ___________________KVM_STK_OFFSET + * | | | / | + * | .......... | | /vcpu's struct&stack | + * | .......... | | /---------------------|---- 0 + * | vcpu[5]'s data | | / vpd | + * | vcpu[4]'s data | |/-----------------------| + * | vcpu[3]'s data | / vtlb | + * | vcpu[2]'s data | /|------------------------| + * | vcpu[1]'s data |/ | vhpt | + * | vcpu[0]'s data |____________________________| + * +----------------------+ | + * | memory dirty log | | + * +----------------------+ | + * | vm's data struct | | + * +----------------------+ | + * | | | + * | | | + * | | | + * | | | + * | | | + * | | | + * | | | + * | vm's p2m table | | + * | | | + * | | | + * | | | | + * vm's data->| | | | + * +----------------------+ ------- 0 + * To support large memory, needs to increase the size of p2m. + * To support more vcpus, needs to ensure it has enough space to + * hold vcpus' data. + */ + +#define KVM_VM_DATA_SHIFT 26 +#define KVM_VM_DATA_SIZE (__IA64_UL_CONST(1) << KVM_VM_DATA_SHIFT) +#define KVM_VM_DATA_BASE (KVM_VMM_BASE + KVM_VM_DATA_SIZE) + +#define KVM_P2M_BASE KVM_VM_DATA_BASE +#define KVM_P2M_SIZE (__IA64_UL_CONST(24) << 20) + +#define VHPT_SHIFT 16 +#define VHPT_SIZE (__IA64_UL_CONST(1) << VHPT_SHIFT) +#define VHPT_NUM_ENTRIES (__IA64_UL_CONST(1) << (VHPT_SHIFT-5)) + +#define VTLB_SHIFT 16 +#define VTLB_SIZE (__IA64_UL_CONST(1) << VTLB_SHIFT) +#define VTLB_NUM_ENTRIES (1UL << (VHPT_SHIFT-5)) + +#define VPD_SHIFT 16 +#define VPD_SIZE (__IA64_UL_CONST(1) << VPD_SHIFT) + +#define VCPU_STRUCT_SHIFT 16 +#define VCPU_STRUCT_SIZE (__IA64_UL_CONST(1) << VCPU_STRUCT_SHIFT) + +#define KVM_STK_OFFSET VCPU_STRUCT_SIZE + +#define KVM_VM_STRUCT_SHIFT 19 +#define KVM_VM_STRUCT_SIZE (__IA64_UL_CONST(1) << KVM_VM_STRUCT_SHIFT) + +#define KVM_MEM_DIRY_LOG_SHIFT 19 +#define KVM_MEM_DIRTY_LOG_SIZE (__IA64_UL_CONST(1) << KVM_MEM_DIRY_LOG_SHIFT) + +#ifndef __ASSEMBLY__ + +/*Define the max vcpus and memory for Guests.*/ +#define KVM_MAX_VCPUS (KVM_VM_DATA_SIZE - KVM_P2M_SIZE - KVM_VM_STRUCT_SIZE -\ + KVM_MEM_DIRTY_LOG_SIZE) / sizeof(struct kvm_vcpu_data) +#define KVM_MAX_MEM_SIZE (KVM_P2M_SIZE >> 3 << PAGE_SHIFT) + +#define VMM_LOG_LEN 256 + +#include <linux/types.h> +#include <linux/mm.h> +#include <linux/kvm.h> +#include <linux/kvm_para.h> +#include <linux/kvm_types.h> + +#include <asm/pal.h> +#include <asm/sal.h> +#include <asm/page.h> + +struct kvm_vcpu_data { + char vcpu_vhpt[VHPT_SIZE]; + char vcpu_vtlb[VTLB_SIZE]; + char vcpu_vpd[VPD_SIZE]; + char vcpu_struct[VCPU_STRUCT_SIZE]; +}; + +struct kvm_vm_data { + char kvm_p2m[KVM_P2M_SIZE]; + char kvm_vm_struct[KVM_VM_STRUCT_SIZE]; + char kvm_mem_dirty_log[KVM_MEM_DIRTY_LOG_SIZE]; + struct kvm_vcpu_data vcpu_data[KVM_MAX_VCPUS]; +}; + +#define VCPU_BASE(n) KVM_VM_DATA_BASE + \ + offsetof(struct kvm_vm_data, vcpu_data[n]) +#define VM_BASE KVM_VM_DATA_BASE + \ + offsetof(struct kvm_vm_data, kvm_vm_struct) +#define KVM_MEM_DIRTY_LOG_BASE KVM_VM_DATA_BASE + \ + offsetof(struct kvm_vm_data, kvm_mem_dirty_log) + +#define VHPT_BASE(n) (VCPU_BASE(n) + offsetof(struct kvm_vcpu_data, vcpu_vhpt)) +#define VTLB_BASE(n) (VCPU_BASE(n) + offsetof(struct kvm_vcpu_data, vcpu_vtlb)) +#define VPD_BASE(n) (VCPU_BASE(n) + offsetof(struct kvm_vcpu_data, vcpu_vpd)) +#define VCPU_STRUCT_BASE(n) (VCPU_BASE(n) + \ + offsetof(struct kvm_vcpu_data, vcpu_struct)) /*IO section definitions*/ #define IOREQ_READ 1 @@ -389,6 +440,7 @@ struct kvm_vcpu_arch { unsigned long opcode; unsigned long cause; + char log_buf[VMM_LOG_LEN]; union context host; union context guest; }; @@ -403,20 +455,19 @@ struct kvm_sal_data { }; struct kvm_arch { + spinlock_t dirty_log_lock; + unsigned long vm_base; unsigned long metaphysical_rr0; unsigned long metaphysical_rr4; unsigned long vmm_init_rr; - unsigned long vhpt_base; - unsigned long vtlb_base; - unsigned long vpd_base; - spinlock_t dirty_log_lock; + struct kvm_ioapic *vioapic; struct kvm_vm_stat stat; struct kvm_sal_data rdv_sal_data; struct list_head assigned_dev_head; - struct dmar_domain *intel_iommu_domain; + struct iommu_domain *iommu_domain; struct hlist_head irq_ack_notifier_list; unsigned long irq_sources_bitmap; @@ -512,7 +563,7 @@ struct kvm_pt_regs { static inline struct kvm_pt_regs *vcpu_regs(struct kvm_vcpu *v) { - return (struct kvm_pt_regs *) ((unsigned long) v + IA64_STK_OFFSET) - 1; + return (struct kvm_pt_regs *) ((unsigned long) v + KVM_STK_OFFSET) - 1; } typedef int kvm_vmm_entry(void); @@ -531,5 +582,6 @@ int kvm_pal_emul(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run); void kvm_sal_emul(struct kvm_vcpu *vcpu); static inline void kvm_inject_nmi(struct kvm_vcpu *vcpu) {} +#endif /* __ASSEMBLY__*/ #endif diff --git a/arch/ia64/include/asm/smp.h b/arch/ia64/include/asm/smp.h index 12d96e0cd51..21c402365d0 100644 --- a/arch/ia64/include/asm/smp.h +++ b/arch/ia64/include/asm/smp.h @@ -57,7 +57,6 @@ extern struct smp_boot_data { extern char no_int_routing __devinitdata; -extern cpumask_t cpu_online_map; extern cpumask_t cpu_core_map[NR_CPUS]; DECLARE_PER_CPU(cpumask_t, cpu_sibling_map); extern int smp_num_siblings; diff --git a/arch/ia64/include/asm/topology.h b/arch/ia64/include/asm/topology.h index 35bcb641c9e..76a33a91ca6 100644 --- a/arch/ia64/include/asm/topology.h +++ b/arch/ia64/include/asm/topology.h @@ -34,6 +34,7 @@ * Returns a bitmask of CPUs on Node 'node'. */ #define node_to_cpumask(node) (node_to_cpu_mask[node]) +#define cpumask_of_node(node) (&node_to_cpu_mask[node]) /* * Returns the number of the node containing Node 'nid'. @@ -45,7 +46,7 @@ /* * Returns the number of the first CPU on Node 'node'. */ -#define node_to_first_cpu(node) (first_cpu(node_to_cpumask(node))) +#define node_to_first_cpu(node) (cpumask_first(cpumask_of_node(node))) /* * Determines the node for a given pci bus @@ -55,7 +56,6 @@ void build_cpu_to_node_map(void); #define SD_CPU_INIT (struct sched_domain) { \ - .span = CPU_MASK_NONE, \ .parent = NULL, \ .child = NULL, \ .groups = NULL, \ @@ -80,7 +80,6 @@ void build_cpu_to_node_map(void); /* sched_domains SD_NODE_INIT for IA64 NUMA machines */ #define SD_NODE_INIT (struct sched_domain) { \ - .span = CPU_MASK_NONE, \ .parent = NULL, \ .child = NULL, \ .groups = NULL, \ @@ -111,6 +110,8 @@ void build_cpu_to_node_map(void); #define topology_core_id(cpu) (cpu_data(cpu)->core_id) #define topology_core_siblings(cpu) (cpu_core_map[cpu]) #define topology_thread_siblings(cpu) (per_cpu(cpu_sibling_map, cpu)) +#define topology_core_cpumask(cpu) (&cpu_core_map[cpu]) +#define topology_thread_cpumask(cpu) (&per_cpu(cpu_sibling_map, cpu)) #define smt_capable() (smp_num_siblings > 1) #endif @@ -121,6 +122,10 @@ extern void arch_fix_phys_package_id(int num, u32 slot); node_to_cpumask(pcibus_to_node(bus)) \ ) +#define cpumask_of_pcibus(bus) (pcibus_to_node(bus) == -1 ? \ + cpu_all_mask : \ + cpumask_from_node(pcibus_to_node(bus))) + #include <asm-generic/topology.h> #endif /* _ASM_IA64_TOPOLOGY_H */ diff --git a/arch/ia64/kernel/acpi.c b/arch/ia64/kernel/acpi.c index bd7acc71e8a..0553648b759 100644 --- a/arch/ia64/kernel/acpi.c +++ b/arch/ia64/kernel/acpi.c @@ -202,7 +202,6 @@ char *__init __acpi_map_table(unsigned long phys_addr, unsigned long size) Boot-time Table Parsing -------------------------------------------------------------------------- */ -static int total_cpus __initdata; static int available_cpus __initdata; struct acpi_table_madt *acpi_madt __initdata; static u8 has_8259; @@ -1001,7 +1000,7 @@ acpi_map_iosapic(acpi_handle handle, u32 depth, void *context, void **ret) node = pxm_to_node(pxm); if (node >= MAX_NUMNODES || !node_online(node) || - cpus_empty(node_to_cpumask(node))) + cpumask_empty(cpumask_of_node(node))) return AE_OK; /* We know a gsi to node mapping! */ diff --git a/arch/ia64/kernel/init_task.c b/arch/ia64/kernel/init_task.c index 9d7e1c66faf..5b0e830c6f3 100644 --- a/arch/ia64/kernel/init_task.c +++ b/arch/ia64/kernel/init_task.c @@ -17,7 +17,6 @@ #include <asm/uaccess.h> #include <asm/pgtable.h> -static struct fs_struct init_fs = INIT_FS; static struct signal_struct init_signals = INIT_SIGNALS(init_signals); static struct sighand_struct init_sighand = INIT_SIGHAND(init_sighand); struct mm_struct init_mm = INIT_MM(init_mm); diff --git a/arch/ia64/kernel/iosapic.c b/arch/ia64/kernel/iosapic.c index 5c4674ae8ae..5cfd3d91001 100644 --- a/arch/ia64/kernel/iosapic.c +++ b/arch/ia64/kernel/iosapic.c @@ -330,25 +330,25 @@ unmask_irq (unsigned int irq) static void -iosapic_set_affinity (unsigned int irq, cpumask_t mask) +iosapic_set_affinity(unsigned int irq, const struct cpumask *mask) { #ifdef CONFIG_SMP u32 high32, low32; - int dest, rte_index; + int cpu, dest, rte_index; int redir = (irq & IA64_IRQ_REDIRECTED) ? 1 : 0; struct iosapic_rte_info *rte; struct iosapic *iosapic; irq &= (~IA64_IRQ_REDIRECTED); - cpus_and(mask, mask, cpu_online_map); - if (cpus_empty(mask)) + cpu = cpumask_first_and(cpu_online_mask, mask); + if (cpu >= nr_cpu_ids) return; - if (irq_prepare_move(irq, first_cpu(mask))) + if (irq_prepare_move(irq, cpu)) return; - dest = cpu_physical_id(first_cpu(mask)); + dest = cpu_physical_id(cpu); if (!iosapic_intr_info[irq].count) return; /* not an IOSAPIC interrupt */ @@ -695,32 +695,31 @@ get_target_cpu (unsigned int gsi, int irq) #ifdef CONFIG_NUMA { int num_cpus, cpu_index, iosapic_index, numa_cpu, i = 0; - cpumask_t cpu_mask; + const struct cpumask *cpu_mask; iosapic_index = find_iosapic(gsi); if (iosapic_index < 0 || iosapic_lists[iosapic_index].node == MAX_NUMNODES) goto skip_numa_setup; - cpu_mask = node_to_cpumask(iosapic_lists[iosapic_index].node); - cpus_and(cpu_mask, cpu_mask, domain); - for_each_cpu_mask(numa_cpu, cpu_mask) { - if (!cpu_online(numa_cpu)) - cpu_clear(numa_cpu, cpu_mask); + cpu_mask = cpumask_of_node(iosapic_lists[iosapic_index].node); + num_cpus = 0; + for_each_cpu_and(numa_cpu, cpu_mask, &domain) { + if (cpu_online(numa_cpu)) + num_cpus++; } - num_cpus = cpus_weight(cpu_mask); - if (!num_cpus) goto skip_numa_setup; /* Use irq assignment to distribute across cpus in node */ cpu_index = irq % num_cpus; - for (numa_cpu = first_cpu(cpu_mask) ; i < cpu_index ; i++) - numa_cpu = next_cpu(numa_cpu, cpu_mask); + for_each_cpu_and(numa_cpu, cpu_mask, &domain) + if (cpu_online(numa_cpu) && i++ >= cpu_index) + break; - if (numa_cpu != NR_CPUS) + if (numa_cpu < nr_cpu_ids) return cpu_physical_id(numa_cpu); } skip_numa_setup: @@ -731,7 +730,7 @@ skip_numa_setup: * case of NUMA.) */ do { - if (++cpu >= NR_CPUS) + if (++cpu >= nr_cpu_ids) cpu = 0; } while (!cpu_online(cpu) || !cpu_isset(cpu, domain)); diff --git a/arch/ia64/kernel/irq.c b/arch/ia64/kernel/irq.c index 7fd18f54c05..95ff16cb05d 100644 --- a/arch/ia64/kernel/irq.c +++ b/arch/ia64/kernel/irq.c @@ -112,11 +112,11 @@ void set_irq_affinity_info (unsigned int irq, int hwid, int redir) } } -bool is_affinity_mask_valid(cpumask_t cpumask) +bool is_affinity_mask_valid(cpumask_var_t cpumask) { if (ia64_platform_is("sn2")) { /* Only allow one CPU to be specified in the smp_affinity mask */ - if (cpus_weight(cpumask) != 1) + if (cpumask_weight(cpumask) != 1) return false; } return true; @@ -133,7 +133,6 @@ unsigned int vectors_in_migration[NR_IRQS]; */ static void migrate_irqs(void) { - cpumask_t mask; irq_desc_t *desc; int irq, new_cpu; @@ -152,15 +151,14 @@ static void migrate_irqs(void) if (desc->status == IRQ_PER_CPU) continue; - cpus_and(mask, irq_desc[irq].affinity, cpu_online_map); - if (any_online_cpu(mask) == NR_CPUS) { + if (cpumask_any_and(&irq_desc[irq].affinity, cpu_online_mask) + >= nr_cpu_ids) { /* * Save it for phase 2 processing */ vectors_in_migration[irq] = irq; new_cpu = any_online_cpu(cpu_online_map); - mask = cpumask_of_cpu(new_cpu); /* * Al three are essential, currently WARN_ON.. maybe panic? @@ -168,7 +166,8 @@ static void migrate_irqs(void) if (desc->chip && desc->chip->disable && desc->chip->enable && desc->chip->set_affinity) { desc->chip->disable(irq); - desc->chip->set_affinity(irq, mask); + desc->chip->set_affinity(irq, + cpumask_of(new_cpu)); desc->chip->enable(irq); } else { WARN_ON((!(desc->chip) || !(desc->chip->disable) || diff --git a/arch/ia64/kernel/msi_ia64.c b/arch/ia64/kernel/msi_ia64.c index 702a09c1323..89033933903 100644 --- a/arch/ia64/kernel/msi_ia64.c +++ b/arch/ia64/kernel/msi_ia64.c @@ -49,11 +49,12 @@ static struct irq_chip ia64_msi_chip; #ifdef CONFIG_SMP -static void ia64_set_msi_irq_affinity(unsigned int irq, cpumask_t cpu_mask) +static void ia64_set_msi_irq_affinity(unsigned int irq, + const cpumask_t *cpu_mask) { struct msi_msg msg; u32 addr, data; - int cpu = first_cpu(cpu_mask); + int cpu = first_cpu(*cpu_mask); if (!cpu_online(cpu)) return; @@ -166,12 +167,11 @@ void arch_teardown_msi_irq(unsigned int irq) #ifdef CONFIG_DMAR #ifdef CONFIG_SMP -static void dmar_msi_set_affinity(unsigned int irq, cpumask_t mask) +static void dmar_msi_set_affinity(unsigned int irq, const struct cpumask *mask) { struct irq_cfg *cfg = irq_cfg + irq; struct msi_msg msg; - int cpu = first_cpu(mask); - + int cpu = cpumask_first(mask); if (!cpu_online(cpu)) return; @@ -187,7 +187,7 @@ static void dmar_msi_set_affinity(unsigned int irq, cpumask_t mask) msg.address_lo |= MSI_ADDR_DESTID_CPU(cpu_physical_id(cpu)); dmar_msi_write(irq, &msg); - irq_desc[irq].affinity = mask; + irq_desc[irq].affinity = *mask; } #endif /* CONFIG_SMP */ diff --git a/arch/ia64/kernel/smpboot.c b/arch/ia64/kernel/smpboot.c index 1dcbb85fc4e..11463994a7d 100644 --- a/arch/ia64/kernel/smpboot.c +++ b/arch/ia64/kernel/smpboot.c @@ -131,12 +131,6 @@ struct task_struct *task_for_booting_cpu; */ DEFINE_PER_CPU(int, cpu_state); -/* Bitmasks of currently online, and possible CPUs */ -cpumask_t cpu_online_map; -EXPORT_SYMBOL(cpu_online_map); -cpumask_t cpu_possible_map = CPU_MASK_NONE; -EXPORT_SYMBOL(cpu_possible_map); - cpumask_t cpu_core_map[NR_CPUS] __cacheline_aligned; EXPORT_SYMBOL(cpu_core_map); DEFINE_PER_CPU_SHARED_ALIGNED(cpumask_t, cpu_sibling_map); @@ -688,7 +682,7 @@ int migrate_platform_irqs(unsigned int cpu) { int new_cpei_cpu; irq_desc_t *desc = NULL; - cpumask_t mask; + const struct cpumask *mask; int retval = 0; /* @@ -701,7 +695,7 @@ int migrate_platform_irqs(unsigned int cpu) * Now re-target the CPEI to a different processor */ new_cpei_cpu = any_online_cpu(cpu_online_map); - mask = cpumask_of_cpu(new_cpei_cpu); + mask = cpumask_of(new_cpei_cpu); set_cpei_target_cpu(new_cpei_cpu); desc = irq_desc + ia64_cpe_irq; /* diff --git a/arch/ia64/kernel/time.c b/arch/ia64/kernel/time.c index 65c10a42c88..f0ebb342409 100644 --- a/arch/ia64/kernel/time.c +++ b/arch/ia64/kernel/time.c @@ -93,13 +93,14 @@ void ia64_account_on_switch(struct task_struct *prev, struct task_struct *next) now = ia64_get_itc(); delta_stime = cycle_to_cputime(pi->ac_stime + (now - pi->ac_stamp)); - account_system_time(prev, 0, delta_stime); - account_system_time_scaled(prev, delta_stime); + if (idle_task(smp_processor_id()) != prev) + account_system_time(prev, 0, delta_stime, delta_stime); + else + account_idle_time(delta_stime); if (pi->ac_utime) { delta_utime = cycle_to_cputime(pi->ac_utime); - account_user_time(prev, delta_utime); - account_user_time_scaled(prev, delta_utime); + account_user_time(prev, delta_utime, delta_utime); } pi->ac_stamp = ni->ac_stamp = now; @@ -122,8 +123,10 @@ void account_system_vtime(struct task_struct *tsk) now = ia64_get_itc(); delta_stime = cycle_to_cputime(ti->ac_stime + (now - ti->ac_stamp)); - account_system_time(tsk, 0, delta_stime); - account_system_time_scaled(tsk, delta_stime); + if (irq_count() || idle_task(smp_processor_id()) != tsk) + account_system_time(tsk, 0, delta_stime, delta_stime); + else + account_idle_time(delta_stime); ti->ac_stime = 0; ti->ac_stamp = now; @@ -143,8 +146,7 @@ void account_process_tick(struct task_struct *p, int user_tick) if (ti->ac_utime) { delta_utime = cycle_to_cputime(ti->ac_utime); - account_user_time(p, delta_utime); - account_user_time_scaled(p, delta_utime); + account_user_time(p, delta_utime, delta_utime); ti->ac_utime = 0; } } diff --git a/arch/ia64/kernel/topology.c b/arch/ia64/kernel/topology.c index c75b914f2d6..a8d61a3e9a9 100644 --- a/arch/ia64/kernel/topology.c +++ b/arch/ia64/kernel/topology.c @@ -219,7 +219,7 @@ static ssize_t show_shared_cpu_map(struct cache_info *this_leaf, char *buf) cpumask_t shared_cpu_map; cpus_and(shared_cpu_map, this_leaf->shared_cpu_map, cpu_online_map); - len = cpumask_scnprintf(buf, NR_CPUS+1, shared_cpu_map); + len = cpumask_scnprintf(buf, NR_CPUS+1, &shared_cpu_map); len += sprintf(buf+len, "\n"); return len; } diff --git a/arch/ia64/kvm/Makefile b/arch/ia64/kvm/Makefile index 92cef66ca26..0bb99b73290 100644 --- a/arch/ia64/kvm/Makefile +++ b/arch/ia64/kvm/Makefile @@ -51,8 +51,8 @@ EXTRA_AFLAGS += -Ivirt/kvm -Iarch/ia64/kvm/ common-objs = $(addprefix ../../../virt/kvm/, kvm_main.o ioapic.o \ coalesced_mmio.o irq_comm.o) -ifeq ($(CONFIG_DMAR),y) -common-objs += $(addprefix ../../../virt/kvm/, vtd.o) +ifeq ($(CONFIG_IOMMU_API),y) +common-objs += $(addprefix ../../../virt/kvm/, iommu.o) endif kvm-objs := $(common-objs) kvm-ia64.o kvm_fw.o @@ -60,7 +60,7 @@ obj-$(CONFIG_KVM) += kvm.o CFLAGS_vcpu.o += -mfixed-range=f2-f5,f12-f127 kvm-intel-objs = vmm.o vmm_ivt.o trampoline.o vcpu.o optvfault.o mmio.o \ - vtlb.o process.o + vtlb.o process.o kvm_lib.o #Add link memcpy and memset to avoid possible structure assignment error kvm-intel-objs += memcpy.o memset.o obj-$(CONFIG_KVM_INTEL) += kvm-intel.o diff --git a/arch/ia64/kvm/asm-offsets.c b/arch/ia64/kvm/asm-offsets.c index 4e3dc13a619..0c3564a7a03 100644 --- a/arch/ia64/kvm/asm-offsets.c +++ b/arch/ia64/kvm/asm-offsets.c @@ -24,19 +24,10 @@ #include <linux/autoconf.h> #include <linux/kvm_host.h> +#include <linux/kbuild.h> #include "vcpu.h" -#define task_struct kvm_vcpu - -#define DEFINE(sym, val) \ - asm volatile("\n->" #sym " (%0) " #val : : "i" (val)) - -#define BLANK() asm volatile("\n->" : :) - -#define OFFSET(_sym, _str, _mem) \ - DEFINE(_sym, offsetof(_str, _mem)); - void foo(void) { DEFINE(VMM_TASK_SIZE, sizeof(struct kvm_vcpu)); diff --git a/arch/ia64/kvm/kvm-ia64.c b/arch/ia64/kvm/kvm-ia64.c index af1464f7a6a..4e586f6110a 100644 --- a/arch/ia64/kvm/kvm-ia64.c +++ b/arch/ia64/kvm/kvm-ia64.c @@ -31,6 +31,7 @@ #include <linux/bitops.h> #include <linux/hrtimer.h> #include <linux/uaccess.h> +#include <linux/iommu.h> #include <linux/intel-iommu.h> #include <asm/pgtable.h> @@ -180,7 +181,6 @@ int kvm_dev_ioctl_check_extension(long ext) switch (ext) { case KVM_CAP_IRQCHIP: - case KVM_CAP_USER_MEMORY: case KVM_CAP_MP_STATE: r = 1; @@ -189,7 +189,7 @@ int kvm_dev_ioctl_check_extension(long ext) r = KVM_COALESCED_MMIO_PAGE_OFFSET; break; case KVM_CAP_IOMMU: - r = intel_iommu_found(); + r = iommu_found(); break; default: r = 0; @@ -439,7 +439,6 @@ int kvm_emulate_halt(struct kvm_vcpu *vcpu) expires = div64_u64(itc_diff, cyc_per_usec); kt = ktime_set(0, 1000 * expires); - down_read(&vcpu->kvm->slots_lock); vcpu->arch.ht_active = 1; hrtimer_start(p_ht, kt, HRTIMER_MODE_ABS); @@ -452,7 +451,6 @@ int kvm_emulate_halt(struct kvm_vcpu *vcpu) if (vcpu->arch.mp_state == KVM_MP_STATE_HALTED) vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE; - up_read(&vcpu->kvm->slots_lock); if (vcpu->arch.mp_state != KVM_MP_STATE_RUNNABLE) return -EINTR; @@ -476,6 +474,13 @@ static int handle_external_interrupt(struct kvm_vcpu *vcpu, return 1; } +static int handle_vcpu_debug(struct kvm_vcpu *vcpu, + struct kvm_run *kvm_run) +{ + printk("VMM: %s", vcpu->arch.log_buf); + return 1; +} + static int (*kvm_vti_exit_handlers[])(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) = { [EXIT_REASON_VM_PANIC] = handle_vm_error, @@ -487,6 +492,7 @@ static int (*kvm_vti_exit_handlers[])(struct kvm_vcpu *vcpu, [EXIT_REASON_EXTERNAL_INTERRUPT] = handle_external_interrupt, [EXIT_REASON_IPI] = handle_ipi, [EXIT_REASON_PTC_G] = handle_global_purge, + [EXIT_REASON_DEBUG] = handle_vcpu_debug, }; @@ -698,27 +704,24 @@ out: return r; } -/* - * Allocate 16M memory for every vm to hold its specific data. - * Its memory map is defined in kvm_host.h. - */ static struct kvm *kvm_alloc_kvm(void) { struct kvm *kvm; uint64_t vm_base; + BUG_ON(sizeof(struct kvm) > KVM_VM_STRUCT_SIZE); + vm_base = __get_free_pages(GFP_KERNEL, get_order(KVM_VM_DATA_SIZE)); if (!vm_base) return ERR_PTR(-ENOMEM); - printk(KERN_DEBUG"kvm: VM data's base Address:0x%lx\n", vm_base); - /* Zero all pages before use! */ memset((void *)vm_base, 0, KVM_VM_DATA_SIZE); - - kvm = (struct kvm *)(vm_base + KVM_VM_OFS); + kvm = (struct kvm *)(vm_base + + offsetof(struct kvm_vm_data, kvm_vm_struct)); kvm->arch.vm_base = vm_base; + printk(KERN_DEBUG"kvm: vm's data area:0x%lx\n", vm_base); return kvm; } @@ -760,21 +763,12 @@ static void kvm_build_io_pmt(struct kvm *kvm) static void kvm_init_vm(struct kvm *kvm) { - long vm_base; - BUG_ON(!kvm); kvm->arch.metaphysical_rr0 = GUEST_PHYSICAL_RR0; kvm->arch.metaphysical_rr4 = GUEST_PHYSICAL_RR4; kvm->arch.vmm_init_rr = VMM_INIT_RR; - vm_base = kvm->arch.vm_base; - if (vm_base) { - kvm->arch.vhpt_base = vm_base + KVM_VHPT_OFS; - kvm->arch.vtlb_base = vm_base + KVM_VTLB_OFS; - kvm->arch.vpd_base = vm_base + KVM_VPD_OFS; - } - /* *Fill P2M entries for MMIO/IO ranges */ @@ -838,9 +832,8 @@ static int kvm_vm_ioctl_set_irqchip(struct kvm *kvm, struct kvm_irqchip *chip) int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs) { - int i; struct vpd *vpd = to_host(vcpu->kvm, vcpu->arch.vpd); - int r; + int i; vcpu_load(vcpu); @@ -857,18 +850,7 @@ int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs) vpd->vpr = regs->vpd.vpr; - r = -EFAULT; - r = copy_from_user(&vcpu->arch.guest, regs->saved_guest, - sizeof(union context)); - if (r) - goto out; - r = copy_from_user(vcpu + 1, regs->saved_stack + - sizeof(struct kvm_vcpu), - IA64_STK_OFFSET - sizeof(struct kvm_vcpu)); - if (r) - goto out; - vcpu->arch.exit_data = - ((struct kvm_vcpu *)(regs->saved_stack))->arch.exit_data; + memcpy(&vcpu->arch.guest, ®s->saved_guest, sizeof(union context)); RESTORE_REGS(mp_state); RESTORE_REGS(vmm_rr); @@ -902,9 +884,8 @@ int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs) set_bit(KVM_REQ_RESUME, &vcpu->requests); vcpu_put(vcpu); - r = 0; -out: - return r; + + return 0; } long kvm_arch_vm_ioctl(struct file *filp, @@ -1166,10 +1147,11 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu) /*Set entry address for first run.*/ regs->cr_iip = PALE_RESET_ENTRY; - /*Initilize itc offset for vcpus*/ + /*Initialize itc offset for vcpus*/ itc_offset = 0UL - ia64_getreg(_IA64_REG_AR_ITC); - for (i = 0; i < MAX_VCPU_NUM; i++) { - v = (struct kvm_vcpu *)((char *)vcpu + VCPU_SIZE * i); + for (i = 0; i < KVM_MAX_VCPUS; i++) { + v = (struct kvm_vcpu *)((char *)vcpu + + sizeof(struct kvm_vcpu_data) * i); v->arch.itc_offset = itc_offset; v->arch.last_itc = 0; } @@ -1183,7 +1165,7 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu) vcpu->arch.apic->vcpu = vcpu; p_ctx->gr[1] = 0; - p_ctx->gr[12] = (unsigned long)((char *)vmm_vcpu + IA64_STK_OFFSET); + p_ctx->gr[12] = (unsigned long)((char *)vmm_vcpu + KVM_STK_OFFSET); p_ctx->gr[13] = (unsigned long)vmm_vcpu; p_ctx->psr = 0x1008522000UL; p_ctx->ar[40] = FPSR_DEFAULT; /*fpsr*/ @@ -1218,12 +1200,12 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu) vcpu->arch.hlt_timer.function = hlt_timer_fn; vcpu->arch.last_run_cpu = -1; - vcpu->arch.vpd = (struct vpd *)VPD_ADDR(vcpu->vcpu_id); + vcpu->arch.vpd = (struct vpd *)VPD_BASE(vcpu->vcpu_id); vcpu->arch.vsa_base = kvm_vsa_base; vcpu->arch.__gp = kvm_vmm_gp; vcpu->arch.dirty_log_lock_pa = __pa(&kvm->arch.dirty_log_lock); - vcpu->arch.vhpt.hash = (struct thash_data *)VHPT_ADDR(vcpu->vcpu_id); - vcpu->arch.vtlb.hash = (struct thash_data *)VTLB_ADDR(vcpu->vcpu_id); + vcpu->arch.vhpt.hash = (struct thash_data *)VHPT_BASE(vcpu->vcpu_id); + vcpu->arch.vtlb.hash = (struct thash_data *)VTLB_BASE(vcpu->vcpu_id); init_ptce_info(vcpu); r = 0; @@ -1273,12 +1255,22 @@ struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm, int r; int cpu; + BUG_ON(sizeof(struct kvm_vcpu) > VCPU_STRUCT_SIZE/2); + + r = -EINVAL; + if (id >= KVM_MAX_VCPUS) { + printk(KERN_ERR"kvm: Can't configure vcpus > %ld", + KVM_MAX_VCPUS); + goto fail; + } + r = -ENOMEM; if (!vm_base) { printk(KERN_ERR"kvm: Create vcpu[%d] error!\n", id); goto fail; } - vcpu = (struct kvm_vcpu *)(vm_base + KVM_VCPU_OFS + VCPU_SIZE * id); + vcpu = (struct kvm_vcpu *)(vm_base + offsetof(struct kvm_vm_data, + vcpu_data[id].vcpu_struct)); vcpu->kvm = kvm; cpu = get_cpu(); @@ -1374,9 +1366,9 @@ void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu) int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs) { - int i; - int r; struct vpd *vpd = to_host(vcpu->kvm, vcpu->arch.vpd); + int i; + vcpu_load(vcpu); for (i = 0; i < 16; i++) { @@ -1391,14 +1383,8 @@ int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs) regs->vpd.vpsr = vpd->vpsr; regs->vpd.vpr = vpd->vpr; - r = -EFAULT; - r = copy_to_user(regs->saved_guest, &vcpu->arch.guest, - sizeof(union context)); - if (r) - goto out; - r = copy_to_user(regs->saved_stack, (void *)vcpu, IA64_STK_OFFSET); - if (r) - goto out; + memcpy(®s->saved_guest, &vcpu->arch.guest, sizeof(union context)); + SAVE_REGS(mp_state); SAVE_REGS(vmm_rr); memcpy(regs->itrs, vcpu->arch.itrs, sizeof(struct thash_data) * NITRS); @@ -1426,10 +1412,9 @@ int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs) SAVE_REGS(metaphysical_saved_rr4); SAVE_REGS(fp_psr); SAVE_REGS(saved_gp); + vcpu_put(vcpu); - r = 0; -out: - return r; + return 0; } void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu) @@ -1457,6 +1442,9 @@ int kvm_arch_set_memory_region(struct kvm *kvm, struct kvm_memory_slot *memslot = &kvm->memslots[mem->slot]; unsigned long base_gfn = memslot->base_gfn; + if (base_gfn + npages > (KVM_MAX_MEM_SIZE >> PAGE_SHIFT)) + return -ENOMEM; + for (i = 0; i < npages; i++) { pfn = gfn_to_pfn(kvm, base_gfn + i); if (!kvm_is_mmio_pfn(pfn)) { @@ -1631,8 +1619,8 @@ static int kvm_ia64_sync_dirty_log(struct kvm *kvm, struct kvm_memory_slot *memslot; int r, i; long n, base; - unsigned long *dirty_bitmap = (unsigned long *)((void *)kvm - KVM_VM_OFS - + KVM_MEM_DIRTY_LOG_OFS); + unsigned long *dirty_bitmap = (unsigned long *)(kvm->arch.vm_base + + offsetof(struct kvm_vm_data, kvm_mem_dirty_log)); r = -EINVAL; if (log->slot >= KVM_MEMORY_SLOTS) diff --git a/arch/ia64/kvm/kvm_lib.c b/arch/ia64/kvm/kvm_lib.c new file mode 100644 index 00000000000..a85cb611ecd --- /dev/null +++ b/arch/ia64/kvm/kvm_lib.c @@ -0,0 +1,15 @@ +/* + * kvm_lib.c: Compile some libraries for kvm-intel module. + * + * Just include kernel's library, and disable symbols export. + * Copyright (C) 2008, Intel Corporation. + * Xiantao Zhang (xiantao.zhang@intel.com) + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + */ +#undef CONFIG_MODULES +#include "../../../lib/vsprintf.c" +#include "../../../lib/ctype.c" diff --git a/arch/ia64/kvm/kvm_minstate.h b/arch/ia64/kvm/kvm_minstate.h index 2cc41d17cf9..b2bcaa2787a 100644 --- a/arch/ia64/kvm/kvm_minstate.h +++ b/arch/ia64/kvm/kvm_minstate.h @@ -24,6 +24,8 @@ #include <asm/asmmacro.h> #include <asm/types.h> #include <asm/kregs.h> +#include <asm/kvm_host.h> + #include "asm-offsets.h" #define KVM_MINSTATE_START_SAVE_MIN \ @@ -33,7 +35,7 @@ addl r22 = VMM_RBS_OFFSET,r1; /* compute base of RBS */ \ ;; \ lfetch.fault.excl.nt1 [r22]; \ - addl r1 = IA64_STK_OFFSET-VMM_PT_REGS_SIZE,r1; /* compute base of memory stack */ \ + addl r1 = KVM_STK_OFFSET-VMM_PT_REGS_SIZE, r1; \ mov r23 = ar.bspstore; /* save ar.bspstore */ \ ;; \ mov ar.bspstore = r22; /* switch to kernel RBS */\ diff --git a/arch/ia64/kvm/misc.h b/arch/ia64/kvm/misc.h index e585c460734..dd979e00b57 100644 --- a/arch/ia64/kvm/misc.h +++ b/arch/ia64/kvm/misc.h @@ -27,7 +27,8 @@ */ static inline uint64_t *kvm_host_get_pmt(struct kvm *kvm) { - return (uint64_t *)(kvm->arch.vm_base + KVM_P2M_OFS); + return (uint64_t *)(kvm->arch.vm_base + + offsetof(struct kvm_vm_data, kvm_p2m)); } static inline void kvm_set_pmt_entry(struct kvm *kvm, gfn_t gfn, diff --git a/arch/ia64/kvm/mmio.c b/arch/ia64/kvm/mmio.c index 7f1a858bc69..21f63fffc37 100644 --- a/arch/ia64/kvm/mmio.c +++ b/arch/ia64/kvm/mmio.c @@ -66,31 +66,25 @@ void lsapic_write(struct kvm_vcpu *v, unsigned long addr, switch (addr) { case PIB_OFST_INTA: - /*panic_domain(NULL, "Undefined write on PIB INTA\n");*/ - panic_vm(v); + panic_vm(v, "Undefined write on PIB INTA\n"); break; case PIB_OFST_XTP: if (length == 1) { vlsapic_write_xtp(v, val); } else { - /*panic_domain(NULL, - "Undefined write on PIB XTP\n");*/ - panic_vm(v); + panic_vm(v, "Undefined write on PIB XTP\n"); } break; default: if (PIB_LOW_HALF(addr)) { - /*lower half */ + /*Lower half */ if (length != 8) - /*panic_domain(NULL, - "Can't LHF write with size %ld!\n", - length);*/ - panic_vm(v); + panic_vm(v, "Can't LHF write with size %ld!\n", + length); else vlsapic_write_ipi(v, addr, val); - } else { /* upper half - printk("IPI-UHF write %lx\n",addr);*/ - panic_vm(v); + } else { /*Upper half */ + panic_vm(v, "IPI-UHF write %lx\n", addr); } break; } @@ -108,22 +102,18 @@ unsigned long lsapic_read(struct kvm_vcpu *v, unsigned long addr, if (length == 1) /* 1 byte load */ ; /* There is no i8259, there is no INTA access*/ else - /*panic_domain(NULL,"Undefined read on PIB INTA\n"); */ - panic_vm(v); + panic_vm(v, "Undefined read on PIB INTA\n"); break; case PIB_OFST_XTP: if (length == 1) { result = VLSAPIC_XTP(v); - /* printk("read xtp %lx\n", result); */ } else { - /*panic_domain(NULL, - "Undefined read on PIB XTP\n");*/ - panic_vm(v); + panic_vm(v, "Undefined read on PIB XTP\n"); } break; default: - panic_vm(v); + panic_vm(v, "Undefined addr access for lsapic!\n"); break; } return result; @@ -162,7 +152,7 @@ static void mmio_access(struct kvm_vcpu *vcpu, u64 src_pa, u64 *dest, /* it's necessary to ensure zero extending */ *dest = p->u.ioreq.data & (~0UL >> (64-(s*8))); } else - panic_vm(vcpu); + panic_vm(vcpu, "Unhandled mmio access returned!\n"); out: local_irq_restore(psr); return ; @@ -324,7 +314,9 @@ void emulate_io_inst(struct kvm_vcpu *vcpu, u64 padr, u64 ma) return; } else { inst_type = -1; - panic_vm(vcpu); + panic_vm(vcpu, "Unsupported MMIO access instruction! \ + Bunld[0]=0x%lx, Bundle[1]=0x%lx\n", + bundle.i64[0], bundle.i64[1]); } size = 1 << size; @@ -335,7 +327,7 @@ void emulate_io_inst(struct kvm_vcpu *vcpu, u64 padr, u64 ma) if (inst_type == SL_INTEGER) vcpu_set_gr(vcpu, inst.M1.r1, data, 0); else - panic_vm(vcpu); + panic_vm(vcpu, "Unsupported instruction type!\n"); } vcpu_increment_iip(vcpu); diff --git a/arch/ia64/kvm/process.c b/arch/ia64/kvm/process.c index 800817307b7..552d0772420 100644 --- a/arch/ia64/kvm/process.c +++ b/arch/ia64/kvm/process.c @@ -527,7 +527,8 @@ void reflect_interruption(u64 ifa, u64 isr, u64 iim, vector = vec2off[vec]; if (!(vpsr & IA64_PSR_IC) && (vector != IA64_DATA_NESTED_TLB_VECTOR)) { - panic_vm(vcpu); + panic_vm(vcpu, "Interruption with vector :0x%lx occurs " + "with psr.ic = 0\n", vector); return; } @@ -586,7 +587,7 @@ static void set_pal_call_result(struct kvm_vcpu *vcpu) vcpu_set_gr(vcpu, 10, p->u.pal_data.ret.v1, 0); vcpu_set_gr(vcpu, 11, p->u.pal_data.ret.v2, 0); } else - panic_vm(vcpu); + panic_vm(vcpu, "Mis-set for exit reason!\n"); } static void set_sal_call_data(struct kvm_vcpu *vcpu) @@ -614,7 +615,7 @@ static void set_sal_call_result(struct kvm_vcpu *vcpu) vcpu_set_gr(vcpu, 10, p->u.sal_data.ret.r10, 0); vcpu_set_gr(vcpu, 11, p->u.sal_data.ret.r11, 0); } else - panic_vm(vcpu); + panic_vm(vcpu, "Mis-set for exit reason!\n"); } void kvm_ia64_handle_break(unsigned long ifa, struct kvm_pt_regs *regs, @@ -680,7 +681,7 @@ static void generate_exirq(struct kvm_vcpu *vcpu) vpsr = VCPU(vcpu, vpsr); isr = vpsr & IA64_PSR_RI; if (!(vpsr & IA64_PSR_IC)) - panic_vm(vcpu); + panic_vm(vcpu, "Trying to inject one IRQ with psr.ic=0\n"); reflect_interruption(0, isr, 0, 12, regs); /* EXT IRQ */ } @@ -941,8 +942,20 @@ static void vcpu_do_resume(struct kvm_vcpu *vcpu) ia64_set_pta(vcpu->arch.vhpt.pta.val); } +static void vmm_sanity_check(struct kvm_vcpu *vcpu) +{ + struct exit_ctl_data *p = &vcpu->arch.exit_data; + + if (!vmm_sanity && p->exit_reason != EXIT_REASON_DEBUG) { + panic_vm(vcpu, "Failed to do vmm sanity check," + "it maybe caused by crashed vmm!!\n\n"); + } +} + static void kvm_do_resume_op(struct kvm_vcpu *vcpu) { + vmm_sanity_check(vcpu); /*Guarantee vcpu runing on healthy vmm!*/ + if (test_and_clear_bit(KVM_REQ_RESUME, &vcpu->requests)) { vcpu_do_resume(vcpu); return; @@ -968,3 +981,11 @@ void vmm_transition(struct kvm_vcpu *vcpu) 1, 0, 0, 0, 0, 0); kvm_do_resume_op(vcpu); } + +void vmm_panic_handler(u64 vec) +{ + struct kvm_vcpu *vcpu = current_vcpu; + vmm_sanity = 0; + panic_vm(vcpu, "Unexpected interruption occurs in VMM, vector:0x%lx\n", + vec2off[vec]); +} diff --git a/arch/ia64/kvm/vcpu.c b/arch/ia64/kvm/vcpu.c index e44027ce566..ecd526b5532 100644 --- a/arch/ia64/kvm/vcpu.c +++ b/arch/ia64/kvm/vcpu.c @@ -816,8 +816,9 @@ static void vcpu_set_itc(struct kvm_vcpu *vcpu, u64 val) unsigned long vitv = VCPU(vcpu, itv); if (vcpu->vcpu_id == 0) { - for (i = 0; i < MAX_VCPU_NUM; i++) { - v = (struct kvm_vcpu *)((char *)vcpu + VCPU_SIZE * i); + for (i = 0; i < KVM_MAX_VCPUS; i++) { + v = (struct kvm_vcpu *)((char *)vcpu + + sizeof(struct kvm_vcpu_data) * i); VMX(v, itc_offset) = itc_offset; VMX(v, last_itc) = 0; } @@ -1650,7 +1651,8 @@ void vcpu_set_psr(struct kvm_vcpu *vcpu, unsigned long val) * Otherwise panic */ if (val & (IA64_PSR_PK | IA64_PSR_IS | IA64_PSR_VM)) - panic_vm(vcpu); + panic_vm(vcpu, "Only support guests with vpsr.pk =0 \ + & vpsr.is=0\n"); /* * For those IA64_PSR bits: id/da/dd/ss/ed/ia @@ -2103,7 +2105,7 @@ void kvm_init_all_rr(struct kvm_vcpu *vcpu) if (is_physical_mode(vcpu)) { if (vcpu->arch.mode_flags & GUEST_PHY_EMUL) - panic_vm(vcpu); + panic_vm(vcpu, "Machine Status conflicts!\n"); ia64_set_rr((VRN0 << VRN_SHIFT), vcpu->arch.metaphysical_rr0); ia64_dv_serialize_data(); @@ -2152,10 +2154,70 @@ int vmm_entry(void) return 0; } -void panic_vm(struct kvm_vcpu *v) -{ +static void kvm_show_registers(struct kvm_pt_regs *regs) +{ + unsigned long ip = regs->cr_iip + ia64_psr(regs)->ri; + + struct kvm_vcpu *vcpu = current_vcpu; + if (vcpu != NULL) + printk("vcpu 0x%p vcpu %d\n", + vcpu, vcpu->vcpu_id); + + printk("psr : %016lx ifs : %016lx ip : [<%016lx>]\n", + regs->cr_ipsr, regs->cr_ifs, ip); + + printk("unat: %016lx pfs : %016lx rsc : %016lx\n", + regs->ar_unat, regs->ar_pfs, regs->ar_rsc); + printk("rnat: %016lx bspstore: %016lx pr : %016lx\n", + regs->ar_rnat, regs->ar_bspstore, regs->pr); + printk("ldrs: %016lx ccv : %016lx fpsr: %016lx\n", + regs->loadrs, regs->ar_ccv, regs->ar_fpsr); + printk("csd : %016lx ssd : %016lx\n", regs->ar_csd, regs->ar_ssd); + printk("b0 : %016lx b6 : %016lx b7 : %016lx\n", regs->b0, + regs->b6, regs->b7); + printk("f6 : %05lx%016lx f7 : %05lx%016lx\n", + regs->f6.u.bits[1], regs->f6.u.bits[0], + regs->f7.u.bits[1], regs->f7.u.bits[0]); + printk("f8 : %05lx%016lx f9 : %05lx%016lx\n", + regs->f8.u.bits[1], regs->f8.u.bits[0], + regs->f9.u.bits[1], regs->f9.u.bits[0]); + printk("f10 : %05lx%016lx f11 : %05lx%016lx\n", + regs->f10.u.bits[1], regs->f10.u.bits[0], + regs->f11.u.bits[1], regs->f11.u.bits[0]); + + printk("r1 : %016lx r2 : %016lx r3 : %016lx\n", regs->r1, + regs->r2, regs->r3); + printk("r8 : %016lx r9 : %016lx r10 : %016lx\n", regs->r8, + regs->r9, regs->r10); + printk("r11 : %016lx r12 : %016lx r13 : %016lx\n", regs->r11, + regs->r12, regs->r13); + printk("r14 : %016lx r15 : %016lx r16 : %016lx\n", regs->r14, + regs->r15, regs->r16); + printk("r17 : %016lx r18 : %016lx r19 : %016lx\n", regs->r17, + regs->r18, regs->r19); + printk("r20 : %016lx r21 : %016lx r22 : %016lx\n", regs->r20, + regs->r21, regs->r22); + printk("r23 : %016lx r24 : %016lx r25 : %016lx\n", regs->r23, + regs->r24, regs->r25); + printk("r26 : %016lx r27 : %016lx r28 : %016lx\n", regs->r26, + regs->r27, regs->r28); + printk("r29 : %016lx r30 : %016lx r31 : %016lx\n", regs->r29, + regs->r30, regs->r31); + +} + +void panic_vm(struct kvm_vcpu *v, const char *fmt, ...) +{ + va_list args; + char buf[256]; + + struct kvm_pt_regs *regs = vcpu_regs(v); struct exit_ctl_data *p = &v->arch.exit_data; - + va_start(args, fmt); + vsnprintf(buf, sizeof(buf), fmt, args); + va_end(args); + printk(buf); + kvm_show_registers(regs); p->exit_reason = EXIT_REASON_VM_PANIC; vmm_transition(v); /*Never to return*/ diff --git a/arch/ia64/kvm/vcpu.h b/arch/ia64/kvm/vcpu.h index e9b2a4e121c..b2f12a562bd 100644 --- a/arch/ia64/kvm/vcpu.h +++ b/arch/ia64/kvm/vcpu.h @@ -737,9 +737,12 @@ void kvm_init_vtlb(struct kvm_vcpu *v); void kvm_init_vhpt(struct kvm_vcpu *v); void thash_init(struct thash_cb *hcb, u64 sz); -void panic_vm(struct kvm_vcpu *v); +void panic_vm(struct kvm_vcpu *v, const char *fmt, ...); extern u64 ia64_call_vsa(u64 proc, u64 arg1, u64 arg2, u64 arg3, u64 arg4, u64 arg5, u64 arg6, u64 arg7); + +extern long vmm_sanity; + #endif #endif /* __VCPU_H__ */ diff --git a/arch/ia64/kvm/vmm.c b/arch/ia64/kvm/vmm.c index 2275bf4e681..9eee5c04bac 100644 --- a/arch/ia64/kvm/vmm.c +++ b/arch/ia64/kvm/vmm.c @@ -20,6 +20,7 @@ */ +#include<linux/kernel.h> #include<linux/module.h> #include<asm/fpswa.h> @@ -31,6 +32,8 @@ MODULE_LICENSE("GPL"); extern char kvm_ia64_ivt; extern fpswa_interface_t *vmm_fpswa_interface; +long vmm_sanity = 1; + struct kvm_vmm_info vmm_info = { .module = THIS_MODULE, .vmm_entry = vmm_entry, @@ -62,5 +65,31 @@ void vmm_spin_unlock(spinlock_t *lock) { _vmm_raw_spin_unlock(lock); } + +static void vcpu_debug_exit(struct kvm_vcpu *vcpu) +{ + struct exit_ctl_data *p = &vcpu->arch.exit_data; + long psr; + + local_irq_save(psr); + p->exit_reason = EXIT_REASON_DEBUG; + vmm_transition(vcpu); + local_irq_restore(psr); +} + +asmlinkage int printk(const char *fmt, ...) +{ + struct kvm_vcpu *vcpu = current_vcpu; + va_list args; + int r; + + memset(vcpu->arch.log_buf, 0, VMM_LOG_LEN); + va_start(args, fmt); + r = vsnprintf(vcpu->arch.log_buf, VMM_LOG_LEN, fmt, args); + va_end(args); + vcpu_debug_exit(vcpu); + return r; +} + module_init(kvm_vmm_init) module_exit(kvm_vmm_exit) diff --git a/arch/ia64/kvm/vmm_ivt.S b/arch/ia64/kvm/vmm_ivt.S index c1d7251a148..3ef1a017a31 100644 --- a/arch/ia64/kvm/vmm_ivt.S +++ b/arch/ia64/kvm/vmm_ivt.S @@ -1,5 +1,5 @@ /* - * /ia64/kvm_ivt.S + * arch/ia64/kvm/vmm_ivt.S * * Copyright (C) 1998-2001, 2003 Hewlett-Packard Co * Stephane Eranian <eranian@hpl.hp.com> @@ -70,32 +70,39 @@ # define PSR_DEFAULT_BITS 0 #endif - #define KVM_FAULT(n) \ - kvm_fault_##n:; \ - mov r19=n;; \ - br.sptk.many kvm_fault_##n; \ - ;; \ - + kvm_fault_##n:; \ + mov r19=n;; \ + br.sptk.many kvm_vmm_panic; \ + ;; \ #define KVM_REFLECT(n) \ - mov r31=pr; \ - mov r19=n; /* prepare to save predicates */ \ - mov r29=cr.ipsr; \ - ;; \ - tbit.z p6,p7=r29,IA64_PSR_VM_BIT; \ -(p7)br.sptk.many kvm_dispatch_reflection; \ - br.sptk.many kvm_panic; \ - - -GLOBAL_ENTRY(kvm_panic) - br.sptk.many kvm_panic - ;; -END(kvm_panic) - - - - + mov r31=pr; \ + mov r19=n; /* prepare to save predicates */ \ + mov r29=cr.ipsr; \ + ;; \ + tbit.z p6,p7=r29,IA64_PSR_VM_BIT; \ +(p7) br.sptk.many kvm_dispatch_reflection; \ + br.sptk.many kvm_vmm_panic; \ + +GLOBAL_ENTRY(kvm_vmm_panic) + KVM_SAVE_MIN_WITH_COVER_R19 + alloc r14=ar.pfs,0,0,1,0 + mov out0=r15 + adds r3=8,r2 // set up second base pointer + ;; + ssm psr.ic + ;; + srlz.i // guarantee that interruption collection is on + ;; + //(p15) ssm psr.i // restore psr.i + addl r14=@gprel(ia64_leave_hypervisor),gp + ;; + KVM_SAVE_REST + mov rp=r14 + ;; + br.call.sptk.many b6=vmm_panic_handler; +END(kvm_vmm_panic) .section .text.ivt,"ax" @@ -105,308 +112,307 @@ kvm_ia64_ivt: /////////////////////////////////////////////////////////////// // 0x0000 Entry 0 (size 64 bundles) VHPT Translation (8,20,47) ENTRY(kvm_vhpt_miss) - KVM_FAULT(0) + KVM_FAULT(0) END(kvm_vhpt_miss) - .org kvm_ia64_ivt+0x400 //////////////////////////////////////////////////////////////// // 0x0400 Entry 1 (size 64 bundles) ITLB (21) ENTRY(kvm_itlb_miss) - mov r31 = pr - mov r29=cr.ipsr; - ;; - tbit.z p6,p7=r29,IA64_PSR_VM_BIT; - (p6) br.sptk kvm_alt_itlb_miss - mov r19 = 1 - br.sptk kvm_itlb_miss_dispatch - KVM_FAULT(1); + mov r31 = pr + mov r29=cr.ipsr; + ;; + tbit.z p6,p7=r29,IA64_PSR_VM_BIT; +(p6) br.sptk kvm_alt_itlb_miss + mov r19 = 1 + br.sptk kvm_itlb_miss_dispatch + KVM_FAULT(1); END(kvm_itlb_miss) .org kvm_ia64_ivt+0x0800 ////////////////////////////////////////////////////////////////// // 0x0800 Entry 2 (size 64 bundles) DTLB (9,48) ENTRY(kvm_dtlb_miss) - mov r31 = pr - mov r29=cr.ipsr; - ;; - tbit.z p6,p7=r29,IA64_PSR_VM_BIT; -(p6)br.sptk kvm_alt_dtlb_miss - br.sptk kvm_dtlb_miss_dispatch + mov r31 = pr + mov r29=cr.ipsr; + ;; + tbit.z p6,p7=r29,IA64_PSR_VM_BIT; +(p6) br.sptk kvm_alt_dtlb_miss + br.sptk kvm_dtlb_miss_dispatch END(kvm_dtlb_miss) .org kvm_ia64_ivt+0x0c00 //////////////////////////////////////////////////////////////////// // 0x0c00 Entry 3 (size 64 bundles) Alt ITLB (19) ENTRY(kvm_alt_itlb_miss) - mov r16=cr.ifa // get address that caused the TLB miss - ;; - movl r17=PAGE_KERNEL - mov r24=cr.ipsr - movl r19=(((1 << IA64_MAX_PHYS_BITS) - 1) & ~0xfff) - ;; - and r19=r19,r16 // clear ed, reserved bits, and PTE control bits - ;; - or r19=r17,r19 // insert PTE control bits into r19 - ;; - movl r20=IA64_GRANULE_SHIFT<<2 - ;; - mov cr.itir=r20 - ;; - itc.i r19 // insert the TLB entry - mov pr=r31,-1 - rfi + mov r16=cr.ifa // get address that caused the TLB miss + ;; + movl r17=PAGE_KERNEL + mov r24=cr.ipsr + movl r19=(((1 << IA64_MAX_PHYS_BITS) - 1) & ~0xfff) + ;; + and r19=r19,r16 // clear ed, reserved bits, and PTE control bits + ;; + or r19=r17,r19 // insert PTE control bits into r19 + ;; + movl r20=IA64_GRANULE_SHIFT<<2 + ;; + mov cr.itir=r20 + ;; + itc.i r19 // insert the TLB entry + mov pr=r31,-1 + rfi END(kvm_alt_itlb_miss) .org kvm_ia64_ivt+0x1000 ///////////////////////////////////////////////////////////////////// // 0x1000 Entry 4 (size 64 bundles) Alt DTLB (7,46) ENTRY(kvm_alt_dtlb_miss) - mov r16=cr.ifa // get address that caused the TLB miss - ;; - movl r17=PAGE_KERNEL - movl r19=(((1 << IA64_MAX_PHYS_BITS) - 1) & ~0xfff) - mov r24=cr.ipsr - ;; - and r19=r19,r16 // clear ed, reserved bits, and PTE control bits - ;; - or r19=r19,r17 // insert PTE control bits into r19 - ;; - movl r20=IA64_GRANULE_SHIFT<<2 - ;; - mov cr.itir=r20 - ;; - itc.d r19 // insert the TLB entry - mov pr=r31,-1 - rfi + mov r16=cr.ifa // get address that caused the TLB miss + ;; + movl r17=PAGE_KERNEL + movl r19=(((1 << IA64_MAX_PHYS_BITS) - 1) & ~0xfff) + mov r24=cr.ipsr + ;; + and r19=r19,r16 // clear ed, reserved bits, and PTE control bits + ;; + or r19=r19,r17 // insert PTE control bits into r19 + ;; + movl r20=IA64_GRANULE_SHIFT<<2 + ;; + mov cr.itir=r20 + ;; + itc.d r19 // insert the TLB entry + mov pr=r31,-1 + rfi END(kvm_alt_dtlb_miss) .org kvm_ia64_ivt+0x1400 ////////////////////////////////////////////////////////////////////// // 0x1400 Entry 5 (size 64 bundles) Data nested TLB (6,45) ENTRY(kvm_nested_dtlb_miss) - KVM_FAULT(5) + KVM_FAULT(5) END(kvm_nested_dtlb_miss) .org kvm_ia64_ivt+0x1800 ///////////////////////////////////////////////////////////////////// // 0x1800 Entry 6 (size 64 bundles) Instruction Key Miss (24) ENTRY(kvm_ikey_miss) - KVM_REFLECT(6) + KVM_REFLECT(6) END(kvm_ikey_miss) .org kvm_ia64_ivt+0x1c00 ///////////////////////////////////////////////////////////////////// // 0x1c00 Entry 7 (size 64 bundles) Data Key Miss (12,51) ENTRY(kvm_dkey_miss) - KVM_REFLECT(7) + KVM_REFLECT(7) END(kvm_dkey_miss) .org kvm_ia64_ivt+0x2000 //////////////////////////////////////////////////////////////////// // 0x2000 Entry 8 (size 64 bundles) Dirty-bit (54) ENTRY(kvm_dirty_bit) - KVM_REFLECT(8) + KVM_REFLECT(8) END(kvm_dirty_bit) .org kvm_ia64_ivt+0x2400 //////////////////////////////////////////////////////////////////// // 0x2400 Entry 9 (size 64 bundles) Instruction Access-bit (27) ENTRY(kvm_iaccess_bit) - KVM_REFLECT(9) + KVM_REFLECT(9) END(kvm_iaccess_bit) .org kvm_ia64_ivt+0x2800 /////////////////////////////////////////////////////////////////// // 0x2800 Entry 10 (size 64 bundles) Data Access-bit (15,55) ENTRY(kvm_daccess_bit) - KVM_REFLECT(10) + KVM_REFLECT(10) END(kvm_daccess_bit) .org kvm_ia64_ivt+0x2c00 ///////////////////////////////////////////////////////////////// // 0x2c00 Entry 11 (size 64 bundles) Break instruction (33) ENTRY(kvm_break_fault) - mov r31=pr - mov r19=11 - mov r29=cr.ipsr - ;; - KVM_SAVE_MIN_WITH_COVER_R19 - ;; - alloc r14=ar.pfs,0,0,4,0 // now it's safe (must be first in insn group!) - mov out0=cr.ifa - mov out2=cr.isr // FIXME: pity to make this slow access twice - mov out3=cr.iim // FIXME: pity to make this slow access twice - adds r3=8,r2 // set up second base pointer - ;; - ssm psr.ic - ;; - srlz.i // guarantee that interruption collection is on - ;; - //(p15)ssm psr.i // restore psr.i - addl r14=@gprel(ia64_leave_hypervisor),gp - ;; - KVM_SAVE_REST - mov rp=r14 - ;; - adds out1=16,sp - br.call.sptk.many b6=kvm_ia64_handle_break - ;; + mov r31=pr + mov r19=11 + mov r29=cr.ipsr + ;; + KVM_SAVE_MIN_WITH_COVER_R19 + ;; + alloc r14=ar.pfs,0,0,4,0 //(must be first in insn group!) + mov out0=cr.ifa + mov out2=cr.isr // FIXME: pity to make this slow access twice + mov out3=cr.iim // FIXME: pity to make this slow access twice + adds r3=8,r2 // set up second base pointer + ;; + ssm psr.ic + ;; + srlz.i // guarantee that interruption collection is on + ;; + //(p15)ssm psr.i // restore psr.i + addl r14=@gprel(ia64_leave_hypervisor),gp + ;; + KVM_SAVE_REST + mov rp=r14 + ;; + adds out1=16,sp + br.call.sptk.many b6=kvm_ia64_handle_break + ;; END(kvm_break_fault) .org kvm_ia64_ivt+0x3000 ///////////////////////////////////////////////////////////////// // 0x3000 Entry 12 (size 64 bundles) External Interrupt (4) ENTRY(kvm_interrupt) - mov r31=pr // prepare to save predicates - mov r19=12 - mov r29=cr.ipsr - ;; - tbit.z p6,p7=r29,IA64_PSR_VM_BIT - tbit.z p0,p15=r29,IA64_PSR_I_BIT - ;; -(p7) br.sptk kvm_dispatch_interrupt - ;; - mov r27=ar.rsc /* M */ - mov r20=r1 /* A */ - mov r25=ar.unat /* M */ - mov r26=ar.pfs /* I */ - mov r28=cr.iip /* M */ - cover /* B (or nothing) */ - ;; - mov r1=sp - ;; - invala /* M */ - mov r30=cr.ifs - ;; - addl r1=-VMM_PT_REGS_SIZE,r1 - ;; - adds r17=2*L1_CACHE_BYTES,r1 /* really: biggest cache-line size */ - adds r16=PT(CR_IPSR),r1 - ;; - lfetch.fault.excl.nt1 [r17],L1_CACHE_BYTES - st8 [r16]=r29 /* save cr.ipsr */ - ;; - lfetch.fault.excl.nt1 [r17] - mov r29=b0 - ;; - adds r16=PT(R8),r1 /* initialize first base pointer */ - adds r17=PT(R9),r1 /* initialize second base pointer */ - mov r18=r0 /* make sure r18 isn't NaT */ - ;; + mov r31=pr // prepare to save predicates + mov r19=12 + mov r29=cr.ipsr + ;; + tbit.z p6,p7=r29,IA64_PSR_VM_BIT + tbit.z p0,p15=r29,IA64_PSR_I_BIT + ;; +(p7) br.sptk kvm_dispatch_interrupt + ;; + mov r27=ar.rsc /* M */ + mov r20=r1 /* A */ + mov r25=ar.unat /* M */ + mov r26=ar.pfs /* I */ + mov r28=cr.iip /* M */ + cover /* B (or nothing) */ + ;; + mov r1=sp + ;; + invala /* M */ + mov r30=cr.ifs + ;; + addl r1=-VMM_PT_REGS_SIZE,r1 + ;; + adds r17=2*L1_CACHE_BYTES,r1 /* really: biggest cache-line size */ + adds r16=PT(CR_IPSR),r1 + ;; + lfetch.fault.excl.nt1 [r17],L1_CACHE_BYTES + st8 [r16]=r29 /* save cr.ipsr */ + ;; + lfetch.fault.excl.nt1 [r17] + mov r29=b0 + ;; + adds r16=PT(R8),r1 /* initialize first base pointer */ + adds r17=PT(R9),r1 /* initialize second base pointer */ + mov r18=r0 /* make sure r18 isn't NaT */ + ;; .mem.offset 0,0; st8.spill [r16]=r8,16 .mem.offset 8,0; st8.spill [r17]=r9,16 ;; .mem.offset 0,0; st8.spill [r16]=r10,24 .mem.offset 8,0; st8.spill [r17]=r11,24 ;; - st8 [r16]=r28,16 /* save cr.iip */ - st8 [r17]=r30,16 /* save cr.ifs */ - mov r8=ar.fpsr /* M */ - mov r9=ar.csd - mov r10=ar.ssd - movl r11=FPSR_DEFAULT /* L-unit */ - ;; - st8 [r16]=r25,16 /* save ar.unat */ - st8 [r17]=r26,16 /* save ar.pfs */ - shl r18=r18,16 /* compute ar.rsc to be used for "loadrs" */ - ;; - st8 [r16]=r27,16 /* save ar.rsc */ - adds r17=16,r17 /* skip over ar_rnat field */ - ;; - st8 [r17]=r31,16 /* save predicates */ - adds r16=16,r16 /* skip over ar_bspstore field */ - ;; - st8 [r16]=r29,16 /* save b0 */ - st8 [r17]=r18,16 /* save ar.rsc value for "loadrs" */ - ;; + st8 [r16]=r28,16 /* save cr.iip */ + st8 [r17]=r30,16 /* save cr.ifs */ + mov r8=ar.fpsr /* M */ + mov r9=ar.csd + mov r10=ar.ssd + movl r11=FPSR_DEFAULT /* L-unit */ + ;; + st8 [r16]=r25,16 /* save ar.unat */ + st8 [r17]=r26,16 /* save ar.pfs */ + shl r18=r18,16 /* compute ar.rsc to be used for "loadrs" */ + ;; + st8 [r16]=r27,16 /* save ar.rsc */ + adds r17=16,r17 /* skip over ar_rnat field */ + ;; + st8 [r17]=r31,16 /* save predicates */ + adds r16=16,r16 /* skip over ar_bspstore field */ + ;; + st8 [r16]=r29,16 /* save b0 */ + st8 [r17]=r18,16 /* save ar.rsc value for "loadrs" */ + ;; .mem.offset 0,0; st8.spill [r16]=r20,16 /* save original r1 */ .mem.offset 8,0; st8.spill [r17]=r12,16 - adds r12=-16,r1 - /* switch to kernel memory stack (with 16 bytes of scratch) */ - ;; + adds r12=-16,r1 + /* switch to kernel memory stack (with 16 bytes of scratch) */ + ;; .mem.offset 0,0; st8.spill [r16]=r13,16 .mem.offset 8,0; st8.spill [r17]=r8,16 /* save ar.fpsr */ - ;; + ;; .mem.offset 0,0; st8.spill [r16]=r15,16 .mem.offset 8,0; st8.spill [r17]=r14,16 - dep r14=-1,r0,60,4 - ;; + dep r14=-1,r0,60,4 + ;; .mem.offset 0,0; st8.spill [r16]=r2,16 .mem.offset 8,0; st8.spill [r17]=r3,16 - adds r2=VMM_PT_REGS_R16_OFFSET,r1 - adds r14 = VMM_VCPU_GP_OFFSET,r13 - ;; - mov r8=ar.ccv - ld8 r14 = [r14] - ;; - mov r1=r14 /* establish kernel global pointer */ - ;; \ - bsw.1 - ;; - alloc r14=ar.pfs,0,0,1,0 // must be first in an insn group - mov out0=r13 - ;; - ssm psr.ic - ;; - srlz.i - ;; - //(p15) ssm psr.i - adds r3=8,r2 // set up second base pointer for SAVE_REST - srlz.i // ensure everybody knows psr.ic is back on - ;; + adds r2=VMM_PT_REGS_R16_OFFSET,r1 + adds r14 = VMM_VCPU_GP_OFFSET,r13 + ;; + mov r8=ar.ccv + ld8 r14 = [r14] + ;; + mov r1=r14 /* establish kernel global pointer */ + ;; \ + bsw.1 + ;; + alloc r14=ar.pfs,0,0,1,0 // must be first in an insn group + mov out0=r13 + ;; + ssm psr.ic + ;; + srlz.i + ;; + //(p15) ssm psr.i + adds r3=8,r2 // set up second base pointer for SAVE_REST + srlz.i // ensure everybody knows psr.ic is back on + ;; .mem.offset 0,0; st8.spill [r2]=r16,16 .mem.offset 8,0; st8.spill [r3]=r17,16 - ;; + ;; .mem.offset 0,0; st8.spill [r2]=r18,16 .mem.offset 8,0; st8.spill [r3]=r19,16 - ;; + ;; .mem.offset 0,0; st8.spill [r2]=r20,16 .mem.offset 8,0; st8.spill [r3]=r21,16 - mov r18=b6 - ;; + mov r18=b6 + ;; .mem.offset 0,0; st8.spill [r2]=r22,16 .mem.offset 8,0; st8.spill [r3]=r23,16 - mov r19=b7 - ;; + mov r19=b7 + ;; .mem.offset 0,0; st8.spill [r2]=r24,16 .mem.offset 8,0; st8.spill [r3]=r25,16 - ;; + ;; .mem.offset 0,0; st8.spill [r2]=r26,16 .mem.offset 8,0; st8.spill [r3]=r27,16 - ;; + ;; .mem.offset 0,0; st8.spill [r2]=r28,16 .mem.offset 8,0; st8.spill [r3]=r29,16 - ;; + ;; .mem.offset 0,0; st8.spill [r2]=r30,16 .mem.offset 8,0; st8.spill [r3]=r31,32 - ;; - mov ar.fpsr=r11 /* M-unit */ - st8 [r2]=r8,8 /* ar.ccv */ - adds r24=PT(B6)-PT(F7),r3 - ;; - stf.spill [r2]=f6,32 - stf.spill [r3]=f7,32 - ;; - stf.spill [r2]=f8,32 - stf.spill [r3]=f9,32 - ;; - stf.spill [r2]=f10 - stf.spill [r3]=f11 - adds r25=PT(B7)-PT(F11),r3 - ;; - st8 [r24]=r18,16 /* b6 */ - st8 [r25]=r19,16 /* b7 */ - ;; - st8 [r24]=r9 /* ar.csd */ - st8 [r25]=r10 /* ar.ssd */ - ;; - srlz.d // make sure we see the effect of cr.ivr - addl r14=@gprel(ia64_leave_nested),gp - ;; - mov rp=r14 - br.call.sptk.many b6=kvm_ia64_handle_irq - ;; + ;; + mov ar.fpsr=r11 /* M-unit */ + st8 [r2]=r8,8 /* ar.ccv */ + adds r24=PT(B6)-PT(F7),r3 + ;; + stf.spill [r2]=f6,32 + stf.spill [r3]=f7,32 + ;; + stf.spill [r2]=f8,32 + stf.spill [r3]=f9,32 + ;; + stf.spill [r2]=f10 + stf.spill [r3]=f11 + adds r25=PT(B7)-PT(F11),r3 + ;; + st8 [r24]=r18,16 /* b6 */ + st8 [r25]=r19,16 /* b7 */ + ;; + st8 [r24]=r9 /* ar.csd */ + st8 [r25]=r10 /* ar.ssd */ + ;; + srlz.d // make sure we see the effect of cr.ivr + addl r14=@gprel(ia64_leave_nested),gp + ;; + mov rp=r14 + br.call.sptk.many b6=kvm_ia64_handle_irq + ;; END(kvm_interrupt) .global kvm_dispatch_vexirq @@ -414,387 +420,385 @@ END(kvm_interrupt) ////////////////////////////////////////////////////////////////////// // 0x3400 Entry 13 (size 64 bundles) Reserved ENTRY(kvm_virtual_exirq) - mov r31=pr - mov r19=13 - mov r30 =r0 - ;; + mov r31=pr + mov r19=13 + mov r30 =r0 + ;; kvm_dispatch_vexirq: - cmp.eq p6,p0 = 1,r30 - ;; -(p6)add r29 = VMM_VCPU_SAVED_GP_OFFSET,r21 - ;; -(p6)ld8 r1 = [r29] - ;; - KVM_SAVE_MIN_WITH_COVER_R19 - alloc r14=ar.pfs,0,0,1,0 - mov out0=r13 - - ssm psr.ic - ;; - srlz.i // guarantee that interruption collection is on - ;; - //(p15) ssm psr.i // restore psr.i - adds r3=8,r2 // set up second base pointer - ;; - KVM_SAVE_REST - addl r14=@gprel(ia64_leave_hypervisor),gp - ;; - mov rp=r14 - br.call.sptk.many b6=kvm_vexirq + cmp.eq p6,p0 = 1,r30 + ;; +(p6) add r29 = VMM_VCPU_SAVED_GP_OFFSET,r21 + ;; +(p6) ld8 r1 = [r29] + ;; + KVM_SAVE_MIN_WITH_COVER_R19 + alloc r14=ar.pfs,0,0,1,0 + mov out0=r13 + + ssm psr.ic + ;; + srlz.i // guarantee that interruption collection is on + ;; + //(p15) ssm psr.i // restore psr.i + adds r3=8,r2 // set up second base pointer + ;; + KVM_SAVE_REST + addl r14=@gprel(ia64_leave_hypervisor),gp + ;; + mov rp=r14 + br.call.sptk.many b6=kvm_vexirq END(kvm_virtual_exirq) .org kvm_ia64_ivt+0x3800 ///////////////////////////////////////////////////////////////////// // 0x3800 Entry 14 (size 64 bundles) Reserved - KVM_FAULT(14) - // this code segment is from 2.6.16.13 - + KVM_FAULT(14) + // this code segment is from 2.6.16.13 .org kvm_ia64_ivt+0x3c00 /////////////////////////////////////////////////////////////////////// // 0x3c00 Entry 15 (size 64 bundles) Reserved - KVM_FAULT(15) - + KVM_FAULT(15) .org kvm_ia64_ivt+0x4000 /////////////////////////////////////////////////////////////////////// // 0x4000 Entry 16 (size 64 bundles) Reserved - KVM_FAULT(16) + KVM_FAULT(16) .org kvm_ia64_ivt+0x4400 ////////////////////////////////////////////////////////////////////// // 0x4400 Entry 17 (size 64 bundles) Reserved - KVM_FAULT(17) + KVM_FAULT(17) .org kvm_ia64_ivt+0x4800 ////////////////////////////////////////////////////////////////////// // 0x4800 Entry 18 (size 64 bundles) Reserved - KVM_FAULT(18) + KVM_FAULT(18) .org kvm_ia64_ivt+0x4c00 ////////////////////////////////////////////////////////////////////// // 0x4c00 Entry 19 (size 64 bundles) Reserved - KVM_FAULT(19) + KVM_FAULT(19) .org kvm_ia64_ivt+0x5000 ////////////////////////////////////////////////////////////////////// // 0x5000 Entry 20 (size 16 bundles) Page Not Present ENTRY(kvm_page_not_present) - KVM_REFLECT(20) + KVM_REFLECT(20) END(kvm_page_not_present) .org kvm_ia64_ivt+0x5100 /////////////////////////////////////////////////////////////////////// // 0x5100 Entry 21 (size 16 bundles) Key Permission vector ENTRY(kvm_key_permission) - KVM_REFLECT(21) + KVM_REFLECT(21) END(kvm_key_permission) .org kvm_ia64_ivt+0x5200 ////////////////////////////////////////////////////////////////////// // 0x5200 Entry 22 (size 16 bundles) Instruction Access Rights (26) ENTRY(kvm_iaccess_rights) - KVM_REFLECT(22) + KVM_REFLECT(22) END(kvm_iaccess_rights) .org kvm_ia64_ivt+0x5300 ////////////////////////////////////////////////////////////////////// // 0x5300 Entry 23 (size 16 bundles) Data Access Rights (14,53) ENTRY(kvm_daccess_rights) - KVM_REFLECT(23) + KVM_REFLECT(23) END(kvm_daccess_rights) .org kvm_ia64_ivt+0x5400 ///////////////////////////////////////////////////////////////////// // 0x5400 Entry 24 (size 16 bundles) General Exception (5,32,34,36,38,39) ENTRY(kvm_general_exception) - KVM_REFLECT(24) - KVM_FAULT(24) + KVM_REFLECT(24) + KVM_FAULT(24) END(kvm_general_exception) .org kvm_ia64_ivt+0x5500 ////////////////////////////////////////////////////////////////////// // 0x5500 Entry 25 (size 16 bundles) Disabled FP-Register (35) ENTRY(kvm_disabled_fp_reg) - KVM_REFLECT(25) + KVM_REFLECT(25) END(kvm_disabled_fp_reg) .org kvm_ia64_ivt+0x5600 //////////////////////////////////////////////////////////////////// // 0x5600 Entry 26 (size 16 bundles) Nat Consumption (11,23,37,50) ENTRY(kvm_nat_consumption) - KVM_REFLECT(26) + KVM_REFLECT(26) END(kvm_nat_consumption) .org kvm_ia64_ivt+0x5700 ///////////////////////////////////////////////////////////////////// // 0x5700 Entry 27 (size 16 bundles) Speculation (40) ENTRY(kvm_speculation_vector) - KVM_REFLECT(27) + KVM_REFLECT(27) END(kvm_speculation_vector) .org kvm_ia64_ivt+0x5800 ///////////////////////////////////////////////////////////////////// // 0x5800 Entry 28 (size 16 bundles) Reserved - KVM_FAULT(28) + KVM_FAULT(28) .org kvm_ia64_ivt+0x5900 /////////////////////////////////////////////////////////////////// // 0x5900 Entry 29 (size 16 bundles) Debug (16,28,56) ENTRY(kvm_debug_vector) - KVM_FAULT(29) + KVM_FAULT(29) END(kvm_debug_vector) .org kvm_ia64_ivt+0x5a00 /////////////////////////////////////////////////////////////// // 0x5a00 Entry 30 (size 16 bundles) Unaligned Reference (57) ENTRY(kvm_unaligned_access) - KVM_REFLECT(30) + KVM_REFLECT(30) END(kvm_unaligned_access) .org kvm_ia64_ivt+0x5b00 ////////////////////////////////////////////////////////////////////// // 0x5b00 Entry 31 (size 16 bundles) Unsupported Data Reference (57) ENTRY(kvm_unsupported_data_reference) - KVM_REFLECT(31) + KVM_REFLECT(31) END(kvm_unsupported_data_reference) .org kvm_ia64_ivt+0x5c00 //////////////////////////////////////////////////////////////////// // 0x5c00 Entry 32 (size 16 bundles) Floating Point FAULT (65) ENTRY(kvm_floating_point_fault) - KVM_REFLECT(32) + KVM_REFLECT(32) END(kvm_floating_point_fault) .org kvm_ia64_ivt+0x5d00 ///////////////////////////////////////////////////////////////////// // 0x5d00 Entry 33 (size 16 bundles) Floating Point Trap (66) ENTRY(kvm_floating_point_trap) - KVM_REFLECT(33) + KVM_REFLECT(33) END(kvm_floating_point_trap) .org kvm_ia64_ivt+0x5e00 ////////////////////////////////////////////////////////////////////// // 0x5e00 Entry 34 (size 16 bundles) Lower Privilege Transfer Trap (66) ENTRY(kvm_lower_privilege_trap) - KVM_REFLECT(34) + KVM_REFLECT(34) END(kvm_lower_privilege_trap) .org kvm_ia64_ivt+0x5f00 ////////////////////////////////////////////////////////////////////// // 0x5f00 Entry 35 (size 16 bundles) Taken Branch Trap (68) ENTRY(kvm_taken_branch_trap) - KVM_REFLECT(35) + KVM_REFLECT(35) END(kvm_taken_branch_trap) .org kvm_ia64_ivt+0x6000 //////////////////////////////////////////////////////////////////// // 0x6000 Entry 36 (size 16 bundles) Single Step Trap (69) ENTRY(kvm_single_step_trap) - KVM_REFLECT(36) + KVM_REFLECT(36) END(kvm_single_step_trap) .global kvm_virtualization_fault_back .org kvm_ia64_ivt+0x6100 ///////////////////////////////////////////////////////////////////// // 0x6100 Entry 37 (size 16 bundles) Virtualization Fault ENTRY(kvm_virtualization_fault) - mov r31=pr - adds r16 = VMM_VCPU_SAVED_GP_OFFSET,r21 - ;; - st8 [r16] = r1 - adds r17 = VMM_VCPU_GP_OFFSET, r21 - ;; - ld8 r1 = [r17] - cmp.eq p6,p0=EVENT_MOV_FROM_AR,r24 - cmp.eq p7,p0=EVENT_MOV_FROM_RR,r24 - cmp.eq p8,p0=EVENT_MOV_TO_RR,r24 - cmp.eq p9,p0=EVENT_RSM,r24 - cmp.eq p10,p0=EVENT_SSM,r24 - cmp.eq p11,p0=EVENT_MOV_TO_PSR,r24 - cmp.eq p12,p0=EVENT_THASH,r24 - (p6) br.dptk.many kvm_asm_mov_from_ar - (p7) br.dptk.many kvm_asm_mov_from_rr - (p8) br.dptk.many kvm_asm_mov_to_rr - (p9) br.dptk.many kvm_asm_rsm - (p10) br.dptk.many kvm_asm_ssm - (p11) br.dptk.many kvm_asm_mov_to_psr - (p12) br.dptk.many kvm_asm_thash - ;; + mov r31=pr + adds r16 = VMM_VCPU_SAVED_GP_OFFSET,r21 + ;; + st8 [r16] = r1 + adds r17 = VMM_VCPU_GP_OFFSET, r21 + ;; + ld8 r1 = [r17] + cmp.eq p6,p0=EVENT_MOV_FROM_AR,r24 + cmp.eq p7,p0=EVENT_MOV_FROM_RR,r24 + cmp.eq p8,p0=EVENT_MOV_TO_RR,r24 + cmp.eq p9,p0=EVENT_RSM,r24 + cmp.eq p10,p0=EVENT_SSM,r24 + cmp.eq p11,p0=EVENT_MOV_TO_PSR,r24 + cmp.eq p12,p0=EVENT_THASH,r24 +(p6) br.dptk.many kvm_asm_mov_from_ar +(p7) br.dptk.many kvm_asm_mov_from_rr +(p8) br.dptk.many kvm_asm_mov_to_rr +(p9) br.dptk.many kvm_asm_rsm +(p10) br.dptk.many kvm_asm_ssm +(p11) br.dptk.many kvm_asm_mov_to_psr +(p12) br.dptk.many kvm_asm_thash + ;; kvm_virtualization_fault_back: - adds r16 = VMM_VCPU_SAVED_GP_OFFSET,r21 - ;; - ld8 r1 = [r16] - ;; - mov r19=37 - adds r16 = VMM_VCPU_CAUSE_OFFSET,r21 - adds r17 = VMM_VCPU_OPCODE_OFFSET,r21 - ;; - st8 [r16] = r24 - st8 [r17] = r25 - ;; - cmp.ne p6,p0=EVENT_RFI, r24 - (p6) br.sptk kvm_dispatch_virtualization_fault - ;; - adds r18=VMM_VPD_BASE_OFFSET,r21 - ;; - ld8 r18=[r18] - ;; - adds r18=VMM_VPD_VIFS_OFFSET,r18 - ;; - ld8 r18=[r18] - ;; - tbit.z p6,p0=r18,63 - (p6) br.sptk kvm_dispatch_virtualization_fault - ;; - //if vifs.v=1 desert current register frame - alloc r18=ar.pfs,0,0,0,0 - br.sptk kvm_dispatch_virtualization_fault + adds r16 = VMM_VCPU_SAVED_GP_OFFSET,r21 + ;; + ld8 r1 = [r16] + ;; + mov r19=37 + adds r16 = VMM_VCPU_CAUSE_OFFSET,r21 + adds r17 = VMM_VCPU_OPCODE_OFFSET,r21 + ;; + st8 [r16] = r24 + st8 [r17] = r25 + ;; + cmp.ne p6,p0=EVENT_RFI, r24 +(p6) br.sptk kvm_dispatch_virtualization_fault + ;; + adds r18=VMM_VPD_BASE_OFFSET,r21 + ;; + ld8 r18=[r18] + ;; + adds r18=VMM_VPD_VIFS_OFFSET,r18 + ;; + ld8 r18=[r18] + ;; + tbit.z p6,p0=r18,63 +(p6) br.sptk kvm_dispatch_virtualization_fault + ;; +//if vifs.v=1 desert current register frame + alloc r18=ar.pfs,0,0,0,0 + br.sptk kvm_dispatch_virtualization_fault END(kvm_virtualization_fault) .org kvm_ia64_ivt+0x6200 ////////////////////////////////////////////////////////////// // 0x6200 Entry 38 (size 16 bundles) Reserved - KVM_FAULT(38) + KVM_FAULT(38) .org kvm_ia64_ivt+0x6300 ///////////////////////////////////////////////////////////////// // 0x6300 Entry 39 (size 16 bundles) Reserved - KVM_FAULT(39) + KVM_FAULT(39) .org kvm_ia64_ivt+0x6400 ///////////////////////////////////////////////////////////////// // 0x6400 Entry 40 (size 16 bundles) Reserved - KVM_FAULT(40) + KVM_FAULT(40) .org kvm_ia64_ivt+0x6500 ////////////////////////////////////////////////////////////////// // 0x6500 Entry 41 (size 16 bundles) Reserved - KVM_FAULT(41) + KVM_FAULT(41) .org kvm_ia64_ivt+0x6600 ////////////////////////////////////////////////////////////////// // 0x6600 Entry 42 (size 16 bundles) Reserved - KVM_FAULT(42) + KVM_FAULT(42) .org kvm_ia64_ivt+0x6700 ////////////////////////////////////////////////////////////////// // 0x6700 Entry 43 (size 16 bundles) Reserved - KVM_FAULT(43) + KVM_FAULT(43) .org kvm_ia64_ivt+0x6800 ////////////////////////////////////////////////////////////////// // 0x6800 Entry 44 (size 16 bundles) Reserved - KVM_FAULT(44) + KVM_FAULT(44) .org kvm_ia64_ivt+0x6900 /////////////////////////////////////////////////////////////////// // 0x6900 Entry 45 (size 16 bundles) IA-32 Exeception //(17,18,29,41,42,43,44,58,60,61,62,72,73,75,76,77) ENTRY(kvm_ia32_exception) - KVM_FAULT(45) + KVM_FAULT(45) END(kvm_ia32_exception) .org kvm_ia64_ivt+0x6a00 //////////////////////////////////////////////////////////////////// // 0x6a00 Entry 46 (size 16 bundles) IA-32 Intercept (30,31,59,70,71) ENTRY(kvm_ia32_intercept) - KVM_FAULT(47) + KVM_FAULT(47) END(kvm_ia32_intercept) .org kvm_ia64_ivt+0x6c00 ///////////////////////////////////////////////////////////////////// // 0x6c00 Entry 48 (size 16 bundles) Reserved - KVM_FAULT(48) + KVM_FAULT(48) .org kvm_ia64_ivt+0x6d00 ////////////////////////////////////////////////////////////////////// // 0x6d00 Entry 49 (size 16 bundles) Reserved - KVM_FAULT(49) + KVM_FAULT(49) .org kvm_ia64_ivt+0x6e00 ////////////////////////////////////////////////////////////////////// // 0x6e00 Entry 50 (size 16 bundles) Reserved - KVM_FAULT(50) + KVM_FAULT(50) .org kvm_ia64_ivt+0x6f00 ///////////////////////////////////////////////////////////////////// // 0x6f00 Entry 51 (size 16 bundles) Reserved - KVM_FAULT(52) + KVM_FAULT(52) .org kvm_ia64_ivt+0x7100 //////////////////////////////////////////////////////////////////// // 0x7100 Entry 53 (size 16 bundles) Reserved - KVM_FAULT(53) + KVM_FAULT(53) .org kvm_ia64_ivt+0x7200 ///////////////////////////////////////////////////////////////////// // 0x7200 Entry 54 (size 16 bundles) Reserved - KVM_FAULT(54) + KVM_FAULT(54) .org kvm_ia64_ivt+0x7300 //////////////////////////////////////////////////////////////////// // 0x7300 Entry 55 (size 16 bundles) Reserved - KVM_FAULT(55) + KVM_FAULT(55) .org kvm_ia64_ivt+0x7400 //////////////////////////////////////////////////////////////////// // 0x7400 Entry 56 (size 16 bundles) Reserved - KVM_FAULT(56) + KVM_FAULT(56) .org kvm_ia64_ivt+0x7500 ///////////////////////////////////////////////////////////////////// // 0x7500 Entry 57 (size 16 bundles) Reserved - KVM_FAULT(57) + KVM_FAULT(57) .org kvm_ia64_ivt+0x7600 ///////////////////////////////////////////////////////////////////// // 0x7600 Entry 58 (size 16 bundles) Reserved - KVM_FAULT(58) + KVM_FAULT(58) .org kvm_ia64_ivt+0x7700 //////////////////////////////////////////////////////////////////// // 0x7700 Entry 59 (size 16 bundles) Reserved - KVM_FAULT(59) + KVM_FAULT(59) .org kvm_ia64_ivt+0x7800 //////////////////////////////////////////////////////////////////// // 0x7800 Entry 60 (size 16 bundles) Reserved - KVM_FAULT(60) + KVM_FAULT(60) .org kvm_ia64_ivt+0x7900 ///////////////////////////////////////////////////////////////////// // 0x7900 Entry 61 (size 16 bundles) Reserved - KVM_FAULT(61) + KVM_FAULT(61) .org kvm_ia64_ivt+0x7a00 ///////////////////////////////////////////////////////////////////// // 0x7a00 Entry 62 (size 16 bundles) Reserved - KVM_FAULT(62) + KVM_FAULT(62) .org kvm_ia64_ivt+0x7b00 ///////////////////////////////////////////////////////////////////// // 0x7b00 Entry 63 (size 16 bundles) Reserved - KVM_FAULT(63) + KVM_FAULT(63) .org kvm_ia64_ivt+0x7c00 //////////////////////////////////////////////////////////////////// // 0x7c00 Entry 64 (size 16 bundles) Reserved - KVM_FAULT(64) + KVM_FAULT(64) .org kvm_ia64_ivt+0x7d00 ///////////////////////////////////////////////////////////////////// // 0x7d00 Entry 65 (size 16 bundles) Reserved - KVM_FAULT(65) + KVM_FAULT(65) .org kvm_ia64_ivt+0x7e00 ///////////////////////////////////////////////////////////////////// // 0x7e00 Entry 66 (size 16 bundles) Reserved - KVM_FAULT(66) + KVM_FAULT(66) .org kvm_ia64_ivt+0x7f00 //////////////////////////////////////////////////////////////////// // 0x7f00 Entry 67 (size 16 bundles) Reserved - KVM_FAULT(67) + KVM_FAULT(67) .org kvm_ia64_ivt+0x8000 // There is no particular reason for this code to be here, other than that @@ -804,132 +808,128 @@ END(kvm_ia32_intercept) ENTRY(kvm_dtlb_miss_dispatch) - mov r19 = 2 - KVM_SAVE_MIN_WITH_COVER_R19 - alloc r14=ar.pfs,0,0,3,0 - mov out0=cr.ifa - mov out1=r15 - adds r3=8,r2 // set up second base pointer - ;; - ssm psr.ic - ;; - srlz.i // guarantee that interruption collection is on - ;; - //(p15) ssm psr.i // restore psr.i - addl r14=@gprel(ia64_leave_hypervisor_prepare),gp - ;; - KVM_SAVE_REST - KVM_SAVE_EXTRA - mov rp=r14 - ;; - adds out2=16,r12 - br.call.sptk.many b6=kvm_page_fault + mov r19 = 2 + KVM_SAVE_MIN_WITH_COVER_R19 + alloc r14=ar.pfs,0,0,3,0 + mov out0=cr.ifa + mov out1=r15 + adds r3=8,r2 // set up second base pointer + ;; + ssm psr.ic + ;; + srlz.i // guarantee that interruption collection is on + ;; + //(p15) ssm psr.i // restore psr.i + addl r14=@gprel(ia64_leave_hypervisor_prepare),gp + ;; + KVM_SAVE_REST + KVM_SAVE_EXTRA + mov rp=r14 + ;; + adds out2=16,r12 + br.call.sptk.many b6=kvm_page_fault END(kvm_dtlb_miss_dispatch) ENTRY(kvm_itlb_miss_dispatch) - KVM_SAVE_MIN_WITH_COVER_R19 - alloc r14=ar.pfs,0,0,3,0 - mov out0=cr.ifa - mov out1=r15 - adds r3=8,r2 // set up second base pointer - ;; - ssm psr.ic - ;; - srlz.i // guarantee that interruption collection is on - ;; - //(p15) ssm psr.i // restore psr.i - addl r14=@gprel(ia64_leave_hypervisor),gp - ;; - KVM_SAVE_REST - mov rp=r14 - ;; - adds out2=16,r12 - br.call.sptk.many b6=kvm_page_fault + KVM_SAVE_MIN_WITH_COVER_R19 + alloc r14=ar.pfs,0,0,3,0 + mov out0=cr.ifa + mov out1=r15 + adds r3=8,r2 // set up second base pointer + ;; + ssm psr.ic + ;; + srlz.i // guarantee that interruption collection is on + ;; + //(p15) ssm psr.i // restore psr.i + addl r14=@gprel(ia64_leave_hypervisor),gp + ;; + KVM_SAVE_REST + mov rp=r14 + ;; + adds out2=16,r12 + br.call.sptk.many b6=kvm_page_fault END(kvm_itlb_miss_dispatch) ENTRY(kvm_dispatch_reflection) - /* - * Input: - * psr.ic: off - * r19: intr type (offset into ivt, see ia64_int.h) - * r31: contains saved predicates (pr) - */ - KVM_SAVE_MIN_WITH_COVER_R19 - alloc r14=ar.pfs,0,0,5,0 - mov out0=cr.ifa - mov out1=cr.isr - mov out2=cr.iim - mov out3=r15 - adds r3=8,r2 // set up second base pointer - ;; - ssm psr.ic - ;; - srlz.i // guarantee that interruption collection is on - ;; - //(p15) ssm psr.i // restore psr.i - addl r14=@gprel(ia64_leave_hypervisor),gp - ;; - KVM_SAVE_REST - mov rp=r14 - ;; - adds out4=16,r12 - br.call.sptk.many b6=reflect_interruption +/* + * Input: + * psr.ic: off + * r19: intr type (offset into ivt, see ia64_int.h) + * r31: contains saved predicates (pr) + */ + KVM_SAVE_MIN_WITH_COVER_R19 + alloc r14=ar.pfs,0,0,5,0 + mov out0=cr.ifa + mov out1=cr.isr + mov out2=cr.iim + mov out3=r15 + adds r3=8,r2 // set up second base pointer + ;; + ssm psr.ic + ;; + srlz.i // guarantee that interruption collection is on + ;; + //(p15) ssm psr.i // restore psr.i + addl r14=@gprel(ia64_leave_hypervisor),gp + ;; + KVM_SAVE_REST + mov rp=r14 + ;; + adds out4=16,r12 + br.call.sptk.many b6=reflect_interruption END(kvm_dispatch_reflection) ENTRY(kvm_dispatch_virtualization_fault) - adds r16 = VMM_VCPU_CAUSE_OFFSET,r21 - adds r17 = VMM_VCPU_OPCODE_OFFSET,r21 - ;; - st8 [r16] = r24 - st8 [r17] = r25 - ;; - KVM_SAVE_MIN_WITH_COVER_R19 - ;; - alloc r14=ar.pfs,0,0,2,0 // now it's safe (must be first in insn group!) - mov out0=r13 //vcpu - adds r3=8,r2 // set up second base pointer - ;; - ssm psr.ic - ;; - srlz.i // guarantee that interruption collection is on - ;; - //(p15) ssm psr.i // restore psr.i - addl r14=@gprel(ia64_leave_hypervisor_prepare),gp - ;; - KVM_SAVE_REST - KVM_SAVE_EXTRA - mov rp=r14 - ;; - adds out1=16,sp //regs - br.call.sptk.many b6=kvm_emulate + adds r16 = VMM_VCPU_CAUSE_OFFSET,r21 + adds r17 = VMM_VCPU_OPCODE_OFFSET,r21 + ;; + st8 [r16] = r24 + st8 [r17] = r25 + ;; + KVM_SAVE_MIN_WITH_COVER_R19 + ;; + alloc r14=ar.pfs,0,0,2,0 // (must be first in insn group!) + mov out0=r13 //vcpu + adds r3=8,r2 // set up second base pointer + ;; + ssm psr.ic + ;; + srlz.i // guarantee that interruption collection is on + ;; + //(p15) ssm psr.i // restore psr.i + addl r14=@gprel(ia64_leave_hypervisor_prepare),gp + ;; + KVM_SAVE_REST + KVM_SAVE_EXTRA + mov rp=r14 + ;; + adds out1=16,sp //regs + br.call.sptk.many b6=kvm_emulate END(kvm_dispatch_virtualization_fault) ENTRY(kvm_dispatch_interrupt) - KVM_SAVE_MIN_WITH_COVER_R19 // uses r31; defines r2 and r3 - ;; - alloc r14=ar.pfs,0,0,1,0 // must be first in an insn group - //mov out0=cr.ivr // pass cr.ivr as first arg - adds r3=8,r2 // set up second base pointer for SAVE_REST - ;; - ssm psr.ic - ;; - srlz.i - ;; - //(p15) ssm psr.i - addl r14=@gprel(ia64_leave_hypervisor),gp - ;; - KVM_SAVE_REST - mov rp=r14 - ;; - mov out0=r13 // pass pointer to pt_regs as second arg - br.call.sptk.many b6=kvm_ia64_handle_irq + KVM_SAVE_MIN_WITH_COVER_R19 // uses r31; defines r2 and r3 + ;; + alloc r14=ar.pfs,0,0,1,0 // must be first in an insn group + adds r3=8,r2 // set up second base pointer for SAVE_REST + ;; + ssm psr.ic + ;; + srlz.i + ;; + //(p15) ssm psr.i + addl r14=@gprel(ia64_leave_hypervisor),gp + ;; + KVM_SAVE_REST + mov rp=r14 + ;; + mov out0=r13 // pass pointer to pt_regs as second arg + br.call.sptk.many b6=kvm_ia64_handle_irq END(kvm_dispatch_interrupt) - - - GLOBAL_ENTRY(ia64_leave_nested) rsm psr.i ;; @@ -1008,7 +1008,7 @@ GLOBAL_ENTRY(ia64_leave_nested) ;; ldf.fill f11=[r2] // mov r18=r13 -// mov r21=r13 +// mov r21=r13 adds r16=PT(CR_IPSR)+16,r12 adds r17=PT(CR_IIP)+16,r12 ;; @@ -1058,138 +1058,135 @@ GLOBAL_ENTRY(ia64_leave_nested) rfi END(ia64_leave_nested) - - GLOBAL_ENTRY(ia64_leave_hypervisor_prepare) - /* - * work.need_resched etc. mustn't get changed - *by this CPU before it returns to - ;; - * user- or fsys-mode, hence we disable interrupts early on: - */ - adds r2 = PT(R4)+16,r12 - adds r3 = PT(R5)+16,r12 - adds r8 = PT(EML_UNAT)+16,r12 - ;; - ld8 r8 = [r8] - ;; - mov ar.unat=r8 - ;; - ld8.fill r4=[r2],16 //load r4 - ld8.fill r5=[r3],16 //load r5 - ;; - ld8.fill r6=[r2] //load r6 - ld8.fill r7=[r3] //load r7 - ;; +/* + * work.need_resched etc. mustn't get changed + *by this CPU before it returns to + * user- or fsys-mode, hence we disable interrupts early on: + */ + adds r2 = PT(R4)+16,r12 + adds r3 = PT(R5)+16,r12 + adds r8 = PT(EML_UNAT)+16,r12 + ;; + ld8 r8 = [r8] + ;; + mov ar.unat=r8 + ;; + ld8.fill r4=[r2],16 //load r4 + ld8.fill r5=[r3],16 //load r5 + ;; + ld8.fill r6=[r2] //load r6 + ld8.fill r7=[r3] //load r7 + ;; END(ia64_leave_hypervisor_prepare) //fall through GLOBAL_ENTRY(ia64_leave_hypervisor) - rsm psr.i - ;; - br.call.sptk.many b0=leave_hypervisor_tail - ;; - adds r20=PT(PR)+16,r12 - adds r8=PT(EML_UNAT)+16,r12 - ;; - ld8 r8=[r8] - ;; - mov ar.unat=r8 - ;; - lfetch [r20],PT(CR_IPSR)-PT(PR) - adds r2 = PT(B6)+16,r12 - adds r3 = PT(B7)+16,r12 - ;; - lfetch [r20] - ;; - ld8 r24=[r2],16 /* B6 */ - ld8 r25=[r3],16 /* B7 */ - ;; - ld8 r26=[r2],16 /* ar_csd */ - ld8 r27=[r3],16 /* ar_ssd */ - mov b6 = r24 - ;; - ld8.fill r8=[r2],16 - ld8.fill r9=[r3],16 - mov b7 = r25 - ;; - mov ar.csd = r26 - mov ar.ssd = r27 - ;; - ld8.fill r10=[r2],PT(R15)-PT(R10) - ld8.fill r11=[r3],PT(R14)-PT(R11) - ;; - ld8.fill r15=[r2],PT(R16)-PT(R15) - ld8.fill r14=[r3],PT(R17)-PT(R14) - ;; - ld8.fill r16=[r2],16 - ld8.fill r17=[r3],16 - ;; - ld8.fill r18=[r2],16 - ld8.fill r19=[r3],16 - ;; - ld8.fill r20=[r2],16 - ld8.fill r21=[r3],16 - ;; - ld8.fill r22=[r2],16 - ld8.fill r23=[r3],16 - ;; - ld8.fill r24=[r2],16 - ld8.fill r25=[r3],16 - ;; - ld8.fill r26=[r2],16 - ld8.fill r27=[r3],16 - ;; - ld8.fill r28=[r2],16 - ld8.fill r29=[r3],16 - ;; - ld8.fill r30=[r2],PT(F6)-PT(R30) - ld8.fill r31=[r3],PT(F7)-PT(R31) - ;; - rsm psr.i | psr.ic - // initiate turning off of interrupt and interruption collection - invala // invalidate ALAT - ;; - srlz.i // ensure interruption collection is off - ;; - bsw.0 - ;; - adds r16 = PT(CR_IPSR)+16,r12 - adds r17 = PT(CR_IIP)+16,r12 - mov r21=r13 // get current - ;; - ld8 r31=[r16],16 // load cr.ipsr - ld8 r30=[r17],16 // load cr.iip - ;; - ld8 r29=[r16],16 // load cr.ifs - ld8 r28=[r17],16 // load ar.unat - ;; - ld8 r27=[r16],16 // load ar.pfs - ld8 r26=[r17],16 // load ar.rsc - ;; - ld8 r25=[r16],16 // load ar.rnat - ld8 r24=[r17],16 // load ar.bspstore - ;; - ld8 r23=[r16],16 // load predicates - ld8 r22=[r17],16 // load b0 - ;; - ld8 r20=[r16],16 // load ar.rsc value for "loadrs" - ld8.fill r1=[r17],16 //load r1 - ;; - ld8.fill r12=[r16],16 //load r12 - ld8.fill r13=[r17],PT(R2)-PT(R13) //load r13 - ;; - ld8 r19=[r16],PT(R3)-PT(AR_FPSR) //load ar_fpsr - ld8.fill r2=[r17],PT(AR_CCV)-PT(R2) //load r2 - ;; - ld8.fill r3=[r16] //load r3 - ld8 r18=[r17] //load ar_ccv - ;; - mov ar.fpsr=r19 - mov ar.ccv=r18 - shr.u r18=r20,16 - ;; + rsm psr.i + ;; + br.call.sptk.many b0=leave_hypervisor_tail + ;; + adds r20=PT(PR)+16,r12 + adds r8=PT(EML_UNAT)+16,r12 + ;; + ld8 r8=[r8] + ;; + mov ar.unat=r8 + ;; + lfetch [r20],PT(CR_IPSR)-PT(PR) + adds r2 = PT(B6)+16,r12 + adds r3 = PT(B7)+16,r12 + ;; + lfetch [r20] + ;; + ld8 r24=[r2],16 /* B6 */ + ld8 r25=[r3],16 /* B7 */ + ;; + ld8 r26=[r2],16 /* ar_csd */ + ld8 r27=[r3],16 /* ar_ssd */ + mov b6 = r24 + ;; + ld8.fill r8=[r2],16 + ld8.fill r9=[r3],16 + mov b7 = r25 + ;; + mov ar.csd = r26 + mov ar.ssd = r27 + ;; + ld8.fill r10=[r2],PT(R15)-PT(R10) + ld8.fill r11=[r3],PT(R14)-PT(R11) + ;; + ld8.fill r15=[r2],PT(R16)-PT(R15) + ld8.fill r14=[r3],PT(R17)-PT(R14) + ;; + ld8.fill r16=[r2],16 + ld8.fill r17=[r3],16 + ;; + ld8.fill r18=[r2],16 + ld8.fill r19=[r3],16 + ;; + ld8.fill r20=[r2],16 + ld8.fill r21=[r3],16 + ;; + ld8.fill r22=[r2],16 + ld8.fill r23=[r3],16 + ;; + ld8.fill r24=[r2],16 + ld8.fill r25=[r3],16 + ;; + ld8.fill r26=[r2],16 + ld8.fill r27=[r3],16 + ;; + ld8.fill r28=[r2],16 + ld8.fill r29=[r3],16 + ;; + ld8.fill r30=[r2],PT(F6)-PT(R30) + ld8.fill r31=[r3],PT(F7)-PT(R31) + ;; + rsm psr.i | psr.ic + // initiate turning off of interrupt and interruption collection + invala // invalidate ALAT + ;; + srlz.i // ensure interruption collection is off + ;; + bsw.0 + ;; + adds r16 = PT(CR_IPSR)+16,r12 + adds r17 = PT(CR_IIP)+16,r12 + mov r21=r13 // get current + ;; + ld8 r31=[r16],16 // load cr.ipsr + ld8 r30=[r17],16 // load cr.iip + ;; + ld8 r29=[r16],16 // load cr.ifs + ld8 r28=[r17],16 // load ar.unat + ;; + ld8 r27=[r16],16 // load ar.pfs + ld8 r26=[r17],16 // load ar.rsc + ;; + ld8 r25=[r16],16 // load ar.rnat + ld8 r24=[r17],16 // load ar.bspstore + ;; + ld8 r23=[r16],16 // load predicates + ld8 r22=[r17],16 // load b0 + ;; + ld8 r20=[r16],16 // load ar.rsc value for "loadrs" + ld8.fill r1=[r17],16 //load r1 + ;; + ld8.fill r12=[r16],16 //load r12 + ld8.fill r13=[r17],PT(R2)-PT(R13) //load r13 + ;; + ld8 r19=[r16],PT(R3)-PT(AR_FPSR) //load ar_fpsr + ld8.fill r2=[r17],PT(AR_CCV)-PT(R2) //load r2 + ;; + ld8.fill r3=[r16] //load r3 + ld8 r18=[r17] //load ar_ccv + ;; + mov ar.fpsr=r19 + mov ar.ccv=r18 + shr.u r18=r20,16 + ;; kvm_rbs_switch: - mov r19=96 + mov r19=96 kvm_dont_preserve_current_frame: /* @@ -1201,76 +1198,76 @@ kvm_dont_preserve_current_frame: # define pReturn p7 # define Nregs 14 - alloc loc0=ar.pfs,2,Nregs-2,2,0 - shr.u loc1=r18,9 // RNaTslots <= floor(dirtySize / (64*8)) - sub r19=r19,r18 // r19 = (physStackedSize + 8) - dirtySize - ;; - mov ar.rsc=r20 // load ar.rsc to be used for "loadrs" - shladd in0=loc1,3,r19 - mov in1=0 - ;; - TEXT_ALIGN(32) + alloc loc0=ar.pfs,2,Nregs-2,2,0 + shr.u loc1=r18,9 // RNaTslots <= floor(dirtySize / (64*8)) + sub r19=r19,r18 // r19 = (physStackedSize + 8) - dirtySize + ;; + mov ar.rsc=r20 // load ar.rsc to be used for "loadrs" + shladd in0=loc1,3,r19 + mov in1=0 + ;; + TEXT_ALIGN(32) kvm_rse_clear_invalid: - alloc loc0=ar.pfs,2,Nregs-2,2,0 - cmp.lt pRecurse,p0=Nregs*8,in0 - // if more than Nregs regs left to clear, (re)curse - add out0=-Nregs*8,in0 - add out1=1,in1 // increment recursion count - mov loc1=0 - mov loc2=0 - ;; - mov loc3=0 - mov loc4=0 - mov loc5=0 - mov loc6=0 - mov loc7=0 + alloc loc0=ar.pfs,2,Nregs-2,2,0 + cmp.lt pRecurse,p0=Nregs*8,in0 + // if more than Nregs regs left to clear, (re)curse + add out0=-Nregs*8,in0 + add out1=1,in1 // increment recursion count + mov loc1=0 + mov loc2=0 + ;; + mov loc3=0 + mov loc4=0 + mov loc5=0 + mov loc6=0 + mov loc7=0 (pRecurse) br.call.dptk.few b0=kvm_rse_clear_invalid - ;; - mov loc8=0 - mov loc9=0 - cmp.ne pReturn,p0=r0,in1 - // if recursion count != 0, we need to do a br.ret - mov loc10=0 - mov loc11=0 + ;; + mov loc8=0 + mov loc9=0 + cmp.ne pReturn,p0=r0,in1 + // if recursion count != 0, we need to do a br.ret + mov loc10=0 + mov loc11=0 (pReturn) br.ret.dptk.many b0 # undef pRecurse # undef pReturn // loadrs has already been shifted - alloc r16=ar.pfs,0,0,0,0 // drop current register frame - ;; - loadrs - ;; - mov ar.bspstore=r24 - ;; - mov ar.unat=r28 - mov ar.rnat=r25 - mov ar.rsc=r26 - ;; - mov cr.ipsr=r31 - mov cr.iip=r30 - mov cr.ifs=r29 - mov ar.pfs=r27 - adds r18=VMM_VPD_BASE_OFFSET,r21 - ;; - ld8 r18=[r18] //vpd - adds r17=VMM_VCPU_ISR_OFFSET,r21 - ;; - ld8 r17=[r17] - adds r19=VMM_VPD_VPSR_OFFSET,r18 - ;; - ld8 r19=[r19] //vpsr - mov r25=r18 - adds r16= VMM_VCPU_GP_OFFSET,r21 - ;; - ld8 r16= [r16] // Put gp in r24 - movl r24=@gprel(ia64_vmm_entry) // calculate return address - ;; - add r24=r24,r16 - ;; - br.sptk.many kvm_vps_sync_write // call the service - ;; + alloc r16=ar.pfs,0,0,0,0 // drop current register frame + ;; + loadrs + ;; + mov ar.bspstore=r24 + ;; + mov ar.unat=r28 + mov ar.rnat=r25 + mov ar.rsc=r26 + ;; + mov cr.ipsr=r31 + mov cr.iip=r30 + mov cr.ifs=r29 + mov ar.pfs=r27 + adds r18=VMM_VPD_BASE_OFFSET,r21 + ;; + ld8 r18=[r18] //vpd + adds r17=VMM_VCPU_ISR_OFFSET,r21 + ;; + ld8 r17=[r17] + adds r19=VMM_VPD_VPSR_OFFSET,r18 + ;; + ld8 r19=[r19] //vpsr + mov r25=r18 + adds r16= VMM_VCPU_GP_OFFSET,r21 + ;; + ld8 r16= [r16] // Put gp in r24 + movl r24=@gprel(ia64_vmm_entry) // calculate return address + ;; + add r24=r24,r16 + ;; + br.sptk.many kvm_vps_sync_write // call the service + ;; END(ia64_leave_hypervisor) // fall through GLOBAL_ENTRY(ia64_vmm_entry) @@ -1283,16 +1280,14 @@ GLOBAL_ENTRY(ia64_vmm_entry) * r22:b0 * r23:predicate */ - mov r24=r22 - mov r25=r18 - tbit.nz p1,p2 = r19,IA64_PSR_IC_BIT // p1=vpsr.ic - (p1) br.cond.sptk.few kvm_vps_resume_normal - (p2) br.cond.sptk.many kvm_vps_resume_handler - ;; + mov r24=r22 + mov r25=r18 + tbit.nz p1,p2 = r19,IA64_PSR_IC_BIT // p1=vpsr.ic +(p1) br.cond.sptk.few kvm_vps_resume_normal +(p2) br.cond.sptk.many kvm_vps_resume_handler + ;; END(ia64_vmm_entry) - - /* * extern u64 ia64_call_vsa(u64 proc, u64 arg1, u64 arg2, * u64 arg3, u64 arg4, u64 arg5, @@ -1310,88 +1305,88 @@ psrsave = loc2 entry = loc3 hostret = r24 - alloc pfssave=ar.pfs,4,4,0,0 - mov rpsave=rp - adds entry=VMM_VCPU_VSA_BASE_OFFSET, r13 - ;; - ld8 entry=[entry] -1: mov hostret=ip - mov r25=in1 // copy arguments - mov r26=in2 - mov r27=in3 - mov psrsave=psr - ;; - tbit.nz p6,p0=psrsave,14 // IA64_PSR_I - tbit.nz p7,p0=psrsave,13 // IA64_PSR_IC - ;; - add hostret=2f-1b,hostret // calculate return address - add entry=entry,in0 - ;; - rsm psr.i | psr.ic - ;; - srlz.i - mov b6=entry - br.cond.sptk b6 // call the service + alloc pfssave=ar.pfs,4,4,0,0 + mov rpsave=rp + adds entry=VMM_VCPU_VSA_BASE_OFFSET, r13 + ;; + ld8 entry=[entry] +1: mov hostret=ip + mov r25=in1 // copy arguments + mov r26=in2 + mov r27=in3 + mov psrsave=psr + ;; + tbit.nz p6,p0=psrsave,14 // IA64_PSR_I + tbit.nz p7,p0=psrsave,13 // IA64_PSR_IC + ;; + add hostret=2f-1b,hostret // calculate return address + add entry=entry,in0 + ;; + rsm psr.i | psr.ic + ;; + srlz.i + mov b6=entry + br.cond.sptk b6 // call the service 2: - // Architectural sequence for enabling interrupts if necessary +// Architectural sequence for enabling interrupts if necessary (p7) ssm psr.ic - ;; + ;; (p7) srlz.i - ;; + ;; //(p6) ssm psr.i - ;; - mov rp=rpsave - mov ar.pfs=pfssave - mov r8=r31 - ;; - srlz.d - br.ret.sptk rp + ;; + mov rp=rpsave + mov ar.pfs=pfssave + mov r8=r31 + ;; + srlz.d + br.ret.sptk rp END(ia64_call_vsa) #define INIT_BSPSTORE ((4<<30)-(12<<20)-0x100) GLOBAL_ENTRY(vmm_reset_entry) - //set up ipsr, iip, vpd.vpsr, dcr - // For IPSR: it/dt/rt=1, i/ic=1, si=1, vm/bn=1 - // For DCR: all bits 0 - bsw.0 - ;; - mov r21 =r13 - adds r14=-VMM_PT_REGS_SIZE, r12 - ;; - movl r6=0x501008826000 // IPSR dt/rt/it:1;i/ic:1, si:1, vm/bn:1 - movl r10=0x8000000000000000 - adds r16=PT(CR_IIP), r14 - adds r20=PT(R1), r14 - ;; - rsm psr.ic | psr.i - ;; - srlz.i - ;; - mov ar.rsc = 0 - ;; - flushrs - ;; - mov ar.bspstore = 0 - // clear BSPSTORE - ;; - mov cr.ipsr=r6 - mov cr.ifs=r10 - ld8 r4 = [r16] // Set init iip for first run. - ld8 r1 = [r20] - ;; - mov cr.iip=r4 - adds r16=VMM_VPD_BASE_OFFSET,r13 - ;; - ld8 r18=[r16] - ;; - adds r19=VMM_VPD_VPSR_OFFSET,r18 - ;; - ld8 r19=[r19] - mov r17=r0 - mov r22=r0 - mov r23=r0 - br.cond.sptk ia64_vmm_entry - br.ret.sptk b0 + //set up ipsr, iip, vpd.vpsr, dcr + // For IPSR: it/dt/rt=1, i/ic=1, si=1, vm/bn=1 + // For DCR: all bits 0 + bsw.0 + ;; + mov r21 =r13 + adds r14=-VMM_PT_REGS_SIZE, r12 + ;; + movl r6=0x501008826000 // IPSR dt/rt/it:1;i/ic:1, si:1, vm/bn:1 + movl r10=0x8000000000000000 + adds r16=PT(CR_IIP), r14 + adds r20=PT(R1), r14 + ;; + rsm psr.ic | psr.i + ;; + srlz.i + ;; + mov ar.rsc = 0 + ;; + flushrs + ;; + mov ar.bspstore = 0 + // clear BSPSTORE + ;; + mov cr.ipsr=r6 + mov cr.ifs=r10 + ld8 r4 = [r16] // Set init iip for first run. + ld8 r1 = [r20] + ;; + mov cr.iip=r4 + adds r16=VMM_VPD_BASE_OFFSET,r13 + ;; + ld8 r18=[r16] + ;; + adds r19=VMM_VPD_VPSR_OFFSET,r18 + ;; + ld8 r19=[r19] + mov r17=r0 + mov r22=r0 + mov r23=r0 + br.cond.sptk ia64_vmm_entry + br.ret.sptk b0 END(vmm_reset_entry) diff --git a/arch/ia64/kvm/vtlb.c b/arch/ia64/kvm/vtlb.c index e22b93361e0..6b6307a3bd5 100644 --- a/arch/ia64/kvm/vtlb.c +++ b/arch/ia64/kvm/vtlb.c @@ -183,8 +183,8 @@ void mark_pages_dirty(struct kvm_vcpu *v, u64 pte, u64 ps) u64 i, dirty_pages = 1; u64 base_gfn = (pte&_PAGE_PPN_MASK) >> PAGE_SHIFT; spinlock_t *lock = __kvm_va(v->arch.dirty_log_lock_pa); - void *dirty_bitmap = (void *)v - (KVM_VCPU_OFS + v->vcpu_id * VCPU_SIZE) - + KVM_MEM_DIRTY_LOG_OFS; + void *dirty_bitmap = (void *)KVM_MEM_DIRTY_LOG_BASE; + dirty_pages <<= ps <= PAGE_SHIFT ? 0 : ps - PAGE_SHIFT; vmm_spin_lock(lock); diff --git a/arch/ia64/sn/kernel/irq.c b/arch/ia64/sn/kernel/irq.c index 0c66dbdd1d7..66fd705e82c 100644 --- a/arch/ia64/sn/kernel/irq.c +++ b/arch/ia64/sn/kernel/irq.c @@ -227,14 +227,14 @@ finish_up: return new_irq_info; } -static void sn_set_affinity_irq(unsigned int irq, cpumask_t mask) +static void sn_set_affinity_irq(unsigned int irq, const struct cpumask *mask) { struct sn_irq_info *sn_irq_info, *sn_irq_info_safe; nasid_t nasid; int slice; - nasid = cpuid_to_nasid(first_cpu(mask)); - slice = cpuid_to_slice(first_cpu(mask)); + nasid = cpuid_to_nasid(cpumask_first(mask)); + slice = cpuid_to_slice(cpumask_first(mask)); list_for_each_entry_safe(sn_irq_info, sn_irq_info_safe, sn_irq_lh[irq], list) diff --git a/arch/ia64/sn/kernel/msi_sn.c b/arch/ia64/sn/kernel/msi_sn.c index 83f190ffe35..ca553b0429c 100644 --- a/arch/ia64/sn/kernel/msi_sn.c +++ b/arch/ia64/sn/kernel/msi_sn.c @@ -151,7 +151,8 @@ int sn_setup_msi_irq(struct pci_dev *pdev, struct msi_desc *entry) } #ifdef CONFIG_SMP -static void sn_set_msi_irq_affinity(unsigned int irq, cpumask_t cpu_mask) +static void sn_set_msi_irq_affinity(unsigned int irq, + const struct cpumask *cpu_mask) { struct msi_msg msg; int slice; @@ -164,7 +165,7 @@ static void sn_set_msi_irq_affinity(unsigned int irq, cpumask_t cpu_mask) struct sn_pcibus_provider *provider; unsigned int cpu; - cpu = first_cpu(cpu_mask); + cpu = cpumask_first(cpu_mask); sn_irq_info = sn_msi_info[irq].sn_irq_info; if (sn_irq_info == NULL || sn_irq_info->irq_int_bit >= 0) return; @@ -204,7 +205,7 @@ static void sn_set_msi_irq_affinity(unsigned int irq, cpumask_t cpu_mask) msg.address_lo = (u32)(bus_addr & 0x00000000ffffffff); write_msi_msg(irq, &msg); - irq_desc[irq].affinity = cpu_mask; + irq_desc[irq].affinity = *cpu_mask; } #endif /* CONFIG_SMP */ diff --git a/arch/ia64/sn/kernel/sn2/sn_hwperf.c b/arch/ia64/sn/kernel/sn2/sn_hwperf.c index 636588e7e06..be339477f90 100644 --- a/arch/ia64/sn/kernel/sn2/sn_hwperf.c +++ b/arch/ia64/sn/kernel/sn2/sn_hwperf.c @@ -385,7 +385,6 @@ static int sn_topology_show(struct seq_file *s, void *d) int j; const char *slabname; int ordinal; - cpumask_t cpumask; char slice; struct cpuinfo_ia64 *c; struct sn_hwperf_port_info *ptdata; @@ -473,23 +472,21 @@ static int sn_topology_show(struct seq_file *s, void *d) * CPUs on this node, if any */ if (!SN_HWPERF_IS_IONODE(obj)) { - cpumask = node_to_cpumask(ordinal); - for_each_online_cpu(i) { - if (cpu_isset(i, cpumask)) { - slice = 'a' + cpuid_to_slice(i); - c = cpu_data(i); - seq_printf(s, "cpu %d %s%c local" - " freq %luMHz, arch ia64", - i, obj->location, slice, - c->proc_freq / 1000000); - for_each_online_cpu(j) { - seq_printf(s, j ? ":%d" : ", dist %d", - node_distance( + for_each_cpu_and(i, cpu_online_mask, + cpumask_of_node(ordinal)) { + slice = 'a' + cpuid_to_slice(i); + c = cpu_data(i); + seq_printf(s, "cpu %d %s%c local" + " freq %luMHz, arch ia64", + i, obj->location, slice, + c->proc_freq / 1000000); + for_each_online_cpu(j) { + seq_printf(s, j ? ":%d" : ", dist %d", + node_distance( cpu_to_node(i), cpu_to_node(j))); - } - seq_putc(s, '\n'); } + seq_putc(s, '\n'); } } } diff --git a/arch/m32r/Kconfig b/arch/m32r/Kconfig index 29047d5c259..cabba332cc4 100644 --- a/arch/m32r/Kconfig +++ b/arch/m32r/Kconfig @@ -10,6 +10,7 @@ config M32R default y select HAVE_IDE select HAVE_OPROFILE + select INIT_ALL_POSSIBLE config SBUS bool diff --git a/arch/m32r/kernel/init_task.c b/arch/m32r/kernel/init_task.c index 0d658dbb676..016885c6f26 100644 --- a/arch/m32r/kernel/init_task.c +++ b/arch/m32r/kernel/init_task.c @@ -11,7 +11,6 @@ #include <asm/uaccess.h> #include <asm/pgtable.h> -static struct fs_struct init_fs = INIT_FS; static struct signal_struct init_signals = INIT_SIGNALS(init_signals); static struct sighand_struct init_sighand = INIT_SIGHAND(init_sighand); struct mm_struct init_mm = INIT_MM(init_mm); diff --git a/arch/m32r/kernel/smpboot.c b/arch/m32r/kernel/smpboot.c index 39cb6da72dc..2547d6c4a82 100644 --- a/arch/m32r/kernel/smpboot.c +++ b/arch/m32r/kernel/smpboot.c @@ -73,17 +73,11 @@ static unsigned int bsp_phys_id = -1; /* Bitmask of physically existing CPUs */ physid_mask_t phys_cpu_present_map; -/* Bitmask of currently online CPUs */ -cpumask_t cpu_online_map; -EXPORT_SYMBOL(cpu_online_map); - cpumask_t cpu_bootout_map; cpumask_t cpu_bootin_map; static cpumask_t cpu_callin_map; cpumask_t cpu_callout_map; EXPORT_SYMBOL(cpu_callout_map); -cpumask_t cpu_possible_map = CPU_MASK_ALL; -EXPORT_SYMBOL(cpu_possible_map); /* Per CPU bogomips and other parameters */ struct cpuinfo_m32r cpu_data[NR_CPUS] __cacheline_aligned; @@ -598,7 +592,7 @@ int setup_profiling_timer(unsigned int multiplier) * accounting. At that time they also adjust their APIC timers * accordingly. */ - for (i = 0; i < NR_CPUS; ++i) + for_each_possible_cpu(i) per_cpu(prof_multiplier, i) = multiplier; return 0; diff --git a/arch/m68k/Kconfig b/arch/m68k/Kconfig index 836fb66f080..c825bde17cb 100644 --- a/arch/m68k/Kconfig +++ b/arch/m68k/Kconfig @@ -280,7 +280,6 @@ config M68060 config MMU_MOTOROLA bool - depends on MMU && !MMU_SUN3 config MMU_SUN3 bool diff --git a/arch/m68k/kernel/process.c b/arch/m68k/kernel/process.c index 3042c2bc8c5..632ce016014 100644 --- a/arch/m68k/kernel/process.c +++ b/arch/m68k/kernel/process.c @@ -40,7 +40,6 @@ * alignment requirements and potentially different initial * setup. */ -static struct fs_struct init_fs = INIT_FS; static struct signal_struct init_signals = INIT_SIGNALS(init_signals); static struct sighand_struct init_sighand = INIT_SIGHAND(init_sighand); struct mm_struct init_mm = INIT_MM(init_mm); diff --git a/arch/m68knommu/include/asm/bitops.h b/arch/m68knommu/include/asm/bitops.h index 6f3685eab44..9d3cbe5fad1 100644 --- a/arch/m68knommu/include/asm/bitops.h +++ b/arch/m68knommu/include/asm/bitops.h @@ -331,6 +331,7 @@ found_middle: #endif /* __KERNEL__ */ #include <asm-generic/bitops/fls.h> +#include <asm-generic/bitops/__fls.h> #include <asm-generic/bitops/fls64.h> #endif /* _M68KNOMMU_BITOPS_H */ diff --git a/arch/m68knommu/kernel/init_task.c b/arch/m68knommu/kernel/init_task.c index 344c01aede0..fe282de1d59 100644 --- a/arch/m68knommu/kernel/init_task.c +++ b/arch/m68knommu/kernel/init_task.c @@ -12,7 +12,6 @@ #include <asm/uaccess.h> #include <asm/pgtable.h> -static struct fs_struct init_fs = INIT_FS; static struct signal_struct init_signals = INIT_SIGNALS(init_signals); static struct sighand_struct init_sighand = INIT_SIGHAND(init_sighand); struct mm_struct init_mm = INIT_MM(init_mm); diff --git a/arch/m68knommu/platform/coldfire/pit.c b/arch/m68knommu/platform/coldfire/pit.c index c5b916700b2..2a12e7fa974 100644 --- a/arch/m68knommu/platform/coldfire/pit.c +++ b/arch/m68knommu/platform/coldfire/pit.c @@ -156,7 +156,7 @@ void hw_timer_init(void) { u32 imr; - cf_pit_clockevent.cpumask = cpumask_of_cpu(smp_processor_id()); + cf_pit_clockevent.cpumask = cpumask_of(smp_processor_id()); cf_pit_clockevent.mult = div_sc(FREQ, NSEC_PER_SEC, 32); cf_pit_clockevent.max_delta_ns = clockevent_delta2ns(0xFFFF, &cf_pit_clockevent); diff --git a/arch/mips/include/asm/irq.h b/arch/mips/include/asm/irq.h index a58f0eecc68..abc62aa744a 100644 --- a/arch/mips/include/asm/irq.h +++ b/arch/mips/include/asm/irq.h @@ -49,7 +49,8 @@ static inline void smtc_im_ack_irq(unsigned int irq) #ifdef CONFIG_MIPS_MT_SMTC_IRQAFF #include <linux/cpumask.h> -extern void plat_set_irq_affinity(unsigned int irq, cpumask_t affinity); +extern void plat_set_irq_affinity(unsigned int irq, + const struct cpumask *affinity); extern void smtc_forward_irq(unsigned int irq); /* diff --git a/arch/mips/include/asm/mach-ip27/topology.h b/arch/mips/include/asm/mach-ip27/topology.h index 7785bec732f..55d481569a1 100644 --- a/arch/mips/include/asm/mach-ip27/topology.h +++ b/arch/mips/include/asm/mach-ip27/topology.h @@ -25,11 +25,13 @@ extern struct cpuinfo_ip27 sn_cpu_info[NR_CPUS]; #define cpu_to_node(cpu) (sn_cpu_info[(cpu)].p_nodeid) #define parent_node(node) (node) #define node_to_cpumask(node) (hub_data(node)->h_cpus) -#define node_to_first_cpu(node) (first_cpu(node_to_cpumask(node))) +#define cpumask_of_node(node) (&hub_data(node)->h_cpus) +#define node_to_first_cpu(node) (cpumask_first(cpumask_of_node(node))) struct pci_bus; extern int pcibus_to_node(struct pci_bus *); #define pcibus_to_cpumask(bus) (cpu_online_map) +#define cpumask_of_pcibus(bus) (cpu_online_mask) extern unsigned char __node_distances[MAX_COMPACT_NODES][MAX_COMPACT_NODES]; @@ -37,7 +39,6 @@ extern unsigned char __node_distances[MAX_COMPACT_NODES][MAX_COMPACT_NODES]; /* sched_domains SD_NODE_INIT for SGI IP27 machines */ #define SD_NODE_INIT (struct sched_domain) { \ - .span = CPU_MASK_NONE, \ .parent = NULL, \ .child = NULL, \ .groups = NULL, \ diff --git a/arch/mips/include/asm/smp.h b/arch/mips/include/asm/smp.h index 0ff5b523ea7..86557b5d1b3 100644 --- a/arch/mips/include/asm/smp.h +++ b/arch/mips/include/asm/smp.h @@ -38,9 +38,6 @@ extern int __cpu_logical_map[NR_CPUS]; #define SMP_RESCHEDULE_YOURSELF 0x1 /* XXX braindead */ #define SMP_CALL_FUNCTION 0x2 -extern cpumask_t phys_cpu_present_map; -#define cpu_possible_map phys_cpu_present_map - extern void asmlinkage smp_bootstrap(void); /* diff --git a/arch/mips/jazz/irq.c b/arch/mips/jazz/irq.c index d7f8a782aae..03965cb1b25 100644 --- a/arch/mips/jazz/irq.c +++ b/arch/mips/jazz/irq.c @@ -146,7 +146,7 @@ void __init plat_time_init(void) BUG_ON(HZ != 100); - cd->cpumask = cpumask_of_cpu(cpu); + cd->cpumask = cpumask_of(cpu); clockevents_register_device(cd); action->dev_id = cd; setup_irq(JAZZ_TIMER_IRQ, action); diff --git a/arch/mips/kernel/cevt-bcm1480.c b/arch/mips/kernel/cevt-bcm1480.c index 0a57f86945f..b820661678b 100644 --- a/arch/mips/kernel/cevt-bcm1480.c +++ b/arch/mips/kernel/cevt-bcm1480.c @@ -126,7 +126,7 @@ void __cpuinit sb1480_clockevent_init(void) cd->min_delta_ns = clockevent_delta2ns(2, cd); cd->rating = 200; cd->irq = irq; - cd->cpumask = cpumask_of_cpu(cpu); + cd->cpumask = cpumask_of(cpu); cd->set_next_event = sibyte_next_event; cd->set_mode = sibyte_set_mode; clockevents_register_device(cd); @@ -148,6 +148,6 @@ void __cpuinit sb1480_clockevent_init(void) action->name = name; action->dev_id = cd; - irq_set_affinity(irq, cpumask_of_cpu(cpu)); + irq_set_affinity(irq, cpumask_of(cpu)); setup_irq(irq, action); } diff --git a/arch/mips/kernel/cevt-ds1287.c b/arch/mips/kernel/cevt-ds1287.c index df4acb68bfb..1ada45ea070 100644 --- a/arch/mips/kernel/cevt-ds1287.c +++ b/arch/mips/kernel/cevt-ds1287.c @@ -88,7 +88,6 @@ static void ds1287_event_handler(struct clock_event_device *dev) static struct clock_event_device ds1287_clockevent = { .name = "ds1287", .features = CLOCK_EVT_FEAT_PERIODIC, - .cpumask = CPU_MASK_CPU0, .set_next_event = ds1287_set_next_event, .set_mode = ds1287_set_mode, .event_handler = ds1287_event_handler, @@ -122,6 +121,7 @@ int __init ds1287_clockevent_init(int irq) clockevent_set_clock(cd, 32768); cd->max_delta_ns = clockevent_delta2ns(0x7fffffff, cd); cd->min_delta_ns = clockevent_delta2ns(0x300, cd); + cd->cpumask = cpumask_of(0); clockevents_register_device(&ds1287_clockevent); diff --git a/arch/mips/kernel/cevt-gt641xx.c b/arch/mips/kernel/cevt-gt641xx.c index 6e2f58520af..e9b787feedc 100644 --- a/arch/mips/kernel/cevt-gt641xx.c +++ b/arch/mips/kernel/cevt-gt641xx.c @@ -96,7 +96,6 @@ static void gt641xx_timer0_event_handler(struct clock_event_device *dev) static struct clock_event_device gt641xx_timer0_clockevent = { .name = "gt641xx-timer0", .features = CLOCK_EVT_FEAT_PERIODIC | CLOCK_EVT_FEAT_ONESHOT, - .cpumask = CPU_MASK_CPU0, .irq = GT641XX_TIMER0_IRQ, .set_next_event = gt641xx_timer0_set_next_event, .set_mode = gt641xx_timer0_set_mode, @@ -132,6 +131,7 @@ static int __init gt641xx_timer0_clockevent_init(void) clockevent_set_clock(cd, gt641xx_base_clock); cd->max_delta_ns = clockevent_delta2ns(0x7fffffff, cd); cd->min_delta_ns = clockevent_delta2ns(0x300, cd); + cd->cpumask = cpumask_of(0); clockevents_register_device(>641xx_timer0_clockevent); diff --git a/arch/mips/kernel/cevt-r4k.c b/arch/mips/kernel/cevt-r4k.c index 4a4c59f2737..e1ec83b6803 100644 --- a/arch/mips/kernel/cevt-r4k.c +++ b/arch/mips/kernel/cevt-r4k.c @@ -195,7 +195,7 @@ int __cpuinit mips_clockevent_init(void) cd->rating = 300; cd->irq = irq; - cd->cpumask = cpumask_of_cpu(cpu); + cd->cpumask = cpumask_of(cpu); cd->set_next_event = mips_next_event; cd->set_mode = mips_set_clock_mode; cd->event_handler = mips_event_handler; diff --git a/arch/mips/kernel/cevt-sb1250.c b/arch/mips/kernel/cevt-sb1250.c index 63ac3ad462b..a2eebaafda5 100644 --- a/arch/mips/kernel/cevt-sb1250.c +++ b/arch/mips/kernel/cevt-sb1250.c @@ -125,7 +125,7 @@ void __cpuinit sb1250_clockevent_init(void) cd->min_delta_ns = clockevent_delta2ns(2, cd); cd->rating = 200; cd->irq = irq; - cd->cpumask = cpumask_of_cpu(cpu); + cd->cpumask = cpumask_of(cpu); cd->set_next_event = sibyte_next_event; cd->set_mode = sibyte_set_mode; clockevents_register_device(cd); @@ -147,6 +147,6 @@ void __cpuinit sb1250_clockevent_init(void) action->name = name; action->dev_id = cd; - irq_set_affinity(irq, cpumask_of_cpu(cpu)); + irq_set_affinity(irq, cpumask_of(cpu)); setup_irq(irq, action); } diff --git a/arch/mips/kernel/cevt-smtc.c b/arch/mips/kernel/cevt-smtc.c index 5162fe4b595..6d45e24db5b 100644 --- a/arch/mips/kernel/cevt-smtc.c +++ b/arch/mips/kernel/cevt-smtc.c @@ -292,7 +292,7 @@ int __cpuinit mips_clockevent_init(void) cd->rating = 300; cd->irq = irq; - cd->cpumask = cpumask_of_cpu(cpu); + cd->cpumask = cpumask_of(cpu); cd->set_next_event = mips_next_event; cd->set_mode = mips_set_clock_mode; cd->event_handler = mips_event_handler; diff --git a/arch/mips/kernel/cevt-txx9.c b/arch/mips/kernel/cevt-txx9.c index b5fc4eb412d..eccf7d6096b 100644 --- a/arch/mips/kernel/cevt-txx9.c +++ b/arch/mips/kernel/cevt-txx9.c @@ -112,7 +112,6 @@ static struct clock_event_device txx9tmr_clock_event_device = { .name = "TXx9", .features = CLOCK_EVT_FEAT_PERIODIC | CLOCK_EVT_FEAT_ONESHOT, .rating = 200, - .cpumask = CPU_MASK_CPU0, .set_mode = txx9tmr_set_mode, .set_next_event = txx9tmr_set_next_event, }; @@ -150,6 +149,7 @@ void __init txx9_clockevent_init(unsigned long baseaddr, int irq, clockevent_delta2ns(0xffffffff >> (32 - TXX9_TIMER_BITS), cd); cd->min_delta_ns = clockevent_delta2ns(0xf, cd); cd->irq = irq; + cd->cpumask = cpumask_of(0), clockevents_register_device(cd); setup_irq(irq, &txx9tmr_irq); printk(KERN_INFO "TXx9: clockevent device at 0x%lx, irq %d\n", diff --git a/arch/mips/kernel/i8253.c b/arch/mips/kernel/i8253.c index b6ac55162b9..f4d187825f9 100644 --- a/arch/mips/kernel/i8253.c +++ b/arch/mips/kernel/i8253.c @@ -115,7 +115,7 @@ void __init setup_pit_timer(void) * Start pit with the boot cpu mask and make it global after the * IO_APIC has been initialized. */ - cd->cpumask = cpumask_of_cpu(cpu); + cd->cpumask = cpumask_of(cpu); clockevent_set_clock(cd, CLOCK_TICK_RATE); cd->max_delta_ns = clockevent_delta2ns(0x7FFF, cd); cd->min_delta_ns = clockevent_delta2ns(0xF, cd); diff --git a/arch/mips/kernel/init_task.c b/arch/mips/kernel/init_task.c index d72487ad7c1..149cd914526 100644 --- a/arch/mips/kernel/init_task.c +++ b/arch/mips/kernel/init_task.c @@ -9,7 +9,6 @@ #include <asm/uaccess.h> #include <asm/pgtable.h> -static struct fs_struct init_fs = INIT_FS; static struct signal_struct init_signals = INIT_SIGNALS(init_signals); static struct sighand_struct init_sighand = INIT_SIGHAND(init_sighand); struct mm_struct init_mm = INIT_MM(init_mm); diff --git a/arch/mips/kernel/irq-gic.c b/arch/mips/kernel/irq-gic.c index f0a4bb19e09..494a49a317e 100644 --- a/arch/mips/kernel/irq-gic.c +++ b/arch/mips/kernel/irq-gic.c @@ -155,7 +155,7 @@ static void gic_unmask_irq(unsigned int irq) static DEFINE_SPINLOCK(gic_lock); -static void gic_set_affinity(unsigned int irq, cpumask_t cpumask) +static void gic_set_affinity(unsigned int irq, const struct cpumask *cpumask) { cpumask_t tmp = CPU_MASK_NONE; unsigned long flags; @@ -164,7 +164,7 @@ static void gic_set_affinity(unsigned int irq, cpumask_t cpumask) pr_debug(KERN_DEBUG "%s called\n", __func__); irq -= _irqbase; - cpus_and(tmp, cpumask, cpu_online_map); + cpumask_and(&tmp, cpumask, cpu_online_mask); if (cpus_empty(tmp)) return; @@ -187,7 +187,7 @@ static void gic_set_affinity(unsigned int irq, cpumask_t cpumask) set_bit(irq, pcpu_masks[first_cpu(tmp)].pcpu_mask); } - irq_desc[irq].affinity = cpumask; + irq_desc[irq].affinity = *cpumask; spin_unlock_irqrestore(&gic_lock, flags); } diff --git a/arch/mips/kernel/smp-cmp.c b/arch/mips/kernel/smp-cmp.c index ca476c4f62a..f27beca4b26 100644 --- a/arch/mips/kernel/smp-cmp.c +++ b/arch/mips/kernel/smp-cmp.c @@ -51,10 +51,10 @@ static int __init allowcpus(char *str) int len; cpus_clear(cpu_allow_map); - if (cpulist_parse(str, cpu_allow_map) == 0) { + if (cpulist_parse(str, &cpu_allow_map) == 0) { cpu_set(0, cpu_allow_map); cpus_and(cpu_possible_map, cpu_possible_map, cpu_allow_map); - len = cpulist_scnprintf(buf, sizeof(buf)-1, cpu_possible_map); + len = cpulist_scnprintf(buf, sizeof(buf)-1, &cpu_possible_map); buf[len] = '\0'; pr_debug("Allowable CPUs: %s\n", buf); return 1; @@ -226,7 +226,7 @@ void __init cmp_smp_setup(void) for (i = 1; i < NR_CPUS; i++) { if (amon_cpu_avail(i)) { - cpu_set(i, phys_cpu_present_map); + cpu_set(i, cpu_possible_map); __cpu_number_map[i] = ++ncpu; __cpu_logical_map[ncpu] = i; } diff --git a/arch/mips/kernel/smp-mt.c b/arch/mips/kernel/smp-mt.c index 87a1816c1f4..6f7ee5ac46e 100644 --- a/arch/mips/kernel/smp-mt.c +++ b/arch/mips/kernel/smp-mt.c @@ -70,7 +70,7 @@ static unsigned int __init smvp_vpe_init(unsigned int tc, unsigned int mvpconf0, write_vpe_c0_vpeconf0(tmp); /* Record this as available CPU */ - cpu_set(tc, phys_cpu_present_map); + cpu_set(tc, cpu_possible_map); __cpu_number_map[tc] = ++ncpu; __cpu_logical_map[ncpu] = tc; } diff --git a/arch/mips/kernel/smp.c b/arch/mips/kernel/smp.c index 8bf88faf5af..3da94704f81 100644 --- a/arch/mips/kernel/smp.c +++ b/arch/mips/kernel/smp.c @@ -44,15 +44,10 @@ #include <asm/mipsmtregs.h> #endif /* CONFIG_MIPS_MT_SMTC */ -cpumask_t phys_cpu_present_map; /* Bitmask of available CPUs */ volatile cpumask_t cpu_callin_map; /* Bitmask of started secondaries */ -cpumask_t cpu_online_map; /* Bitmask of currently online CPUs */ int __cpu_number_map[NR_CPUS]; /* Map physical to logical */ int __cpu_logical_map[NR_CPUS]; /* Map logical to physical */ -EXPORT_SYMBOL(phys_cpu_present_map); -EXPORT_SYMBOL(cpu_online_map); - extern void cpu_idle(void); /* Number of TCs (or siblings in Intel speak) per CPU core */ @@ -195,7 +190,7 @@ void __init smp_prepare_cpus(unsigned int max_cpus) /* preload SMP state for boot cpu */ void __devinit smp_prepare_boot_cpu(void) { - cpu_set(0, phys_cpu_present_map); + cpu_set(0, cpu_possible_map); cpu_set(0, cpu_online_map); cpu_set(0, cpu_callin_map); } diff --git a/arch/mips/kernel/smtc.c b/arch/mips/kernel/smtc.c index 897fb2b4751..b6cca01ff82 100644 --- a/arch/mips/kernel/smtc.c +++ b/arch/mips/kernel/smtc.c @@ -290,7 +290,7 @@ static void smtc_configure_tlb(void) * possibly leave some TCs/VPEs as "slave" processors. * * Use c0_MVPConf0 to find out how many TCs are available, setting up - * phys_cpu_present_map and the logical/physical mappings. + * cpu_possible_map and the logical/physical mappings. */ int __init smtc_build_cpu_map(int start_cpu_slot) @@ -304,7 +304,7 @@ int __init smtc_build_cpu_map(int start_cpu_slot) */ ntcs = ((read_c0_mvpconf0() & MVPCONF0_PTC) >> MVPCONF0_PTC_SHIFT) + 1; for (i=start_cpu_slot; i<NR_CPUS && i<ntcs; i++) { - cpu_set(i, phys_cpu_present_map); + cpu_set(i, cpu_possible_map); __cpu_number_map[i] = i; __cpu_logical_map[i] = i; } @@ -521,7 +521,7 @@ void smtc_prepare_cpus(int cpus) * Pull any physically present but unused TCs out of circulation. */ while (tc < (((val & MVPCONF0_PTC) >> MVPCONF0_PTC_SHIFT) + 1)) { - cpu_clear(tc, phys_cpu_present_map); + cpu_clear(tc, cpu_possible_map); cpu_clear(tc, cpu_present_map); tc++; } diff --git a/arch/mips/mti-malta/malta-smtc.c b/arch/mips/mti-malta/malta-smtc.c index f84a46a8ae6..aabd7274507 100644 --- a/arch/mips/mti-malta/malta-smtc.c +++ b/arch/mips/mti-malta/malta-smtc.c @@ -114,9 +114,9 @@ struct plat_smp_ops msmtc_smp_ops = { */ -void plat_set_irq_affinity(unsigned int irq, cpumask_t affinity) +void plat_set_irq_affinity(unsigned int irq, const struct cpumask *affinity) { - cpumask_t tmask = affinity; + cpumask_t tmask = *affinity; int cpu = 0; void smtc_set_irq_affinity(unsigned int irq, cpumask_t aff); @@ -139,7 +139,7 @@ void plat_set_irq_affinity(unsigned int irq, cpumask_t affinity) * be made to forward to an offline "CPU". */ - for_each_cpu_mask(cpu, affinity) { + for_each_cpu(cpu, affinity) { if ((cpu_data[cpu].vpe_id != 0) || !cpu_online(cpu)) cpu_clear(cpu, tmask); } diff --git a/arch/mips/nxp/pnx8550/common/time.c b/arch/mips/nxp/pnx8550/common/time.c index 62f495b57f9..cf293b27909 100644 --- a/arch/mips/nxp/pnx8550/common/time.c +++ b/arch/mips/nxp/pnx8550/common/time.c @@ -102,6 +102,7 @@ __init void plat_time_init(void) unsigned int p; unsigned int pow2p; + pnx8xxx_clockevent.cpumask = cpu_none_mask; clockevents_register_device(&pnx8xxx_clockevent); clocksource_register(&pnx_clocksource); diff --git a/arch/mips/pmc-sierra/yosemite/smp.c b/arch/mips/pmc-sierra/yosemite/smp.c index 3a7df647ca7..f78c29b68d7 100644 --- a/arch/mips/pmc-sierra/yosemite/smp.c +++ b/arch/mips/pmc-sierra/yosemite/smp.c @@ -141,7 +141,7 @@ static void __cpuinit yos_boot_secondary(int cpu, struct task_struct *idle) } /* - * Detect available CPUs, populate phys_cpu_present_map before smp_init + * Detect available CPUs, populate cpu_possible_map before smp_init * * We don't want to start the secondary CPU yet nor do we have a nice probing * feature in PMON so we just assume presence of the secondary core. @@ -150,10 +150,10 @@ static void __init yos_smp_setup(void) { int i; - cpus_clear(phys_cpu_present_map); + cpus_clear(cpu_possible_map); for (i = 0; i < 2; i++) { - cpu_set(i, phys_cpu_present_map); + cpu_set(i, cpu_possible_map); __cpu_number_map[i] = i; __cpu_logical_map[i] = i; } diff --git a/arch/mips/sgi-ip27/ip27-smp.c b/arch/mips/sgi-ip27/ip27-smp.c index ba5cdebeaf0..5b47d6b6527 100644 --- a/arch/mips/sgi-ip27/ip27-smp.c +++ b/arch/mips/sgi-ip27/ip27-smp.c @@ -76,7 +76,7 @@ static int do_cpumask(cnodeid_t cnode, nasid_t nasid, int highest) /* Only let it join in if it's marked enabled */ if ((acpu->cpu_info.flags & KLINFO_ENABLE) && (tot_cpus_found != NR_CPUS)) { - cpu_set(cpuid, phys_cpu_present_map); + cpu_set(cpuid, cpu_possible_map); alloc_cpupda(cpuid, tot_cpus_found); cpus_found++; tot_cpus_found++; diff --git a/arch/mips/sgi-ip27/ip27-timer.c b/arch/mips/sgi-ip27/ip27-timer.c index 1327c2746fb..f024057a35f 100644 --- a/arch/mips/sgi-ip27/ip27-timer.c +++ b/arch/mips/sgi-ip27/ip27-timer.c @@ -134,7 +134,7 @@ void __cpuinit hub_rt_clock_event_init(void) cd->min_delta_ns = clockevent_delta2ns(0x300, cd); cd->rating = 200; cd->irq = irq; - cd->cpumask = cpumask_of_cpu(cpu); + cd->cpumask = cpumask_of(cpu); cd->set_next_event = rt_next_event; cd->set_mode = rt_set_mode; clockevents_register_device(cd); diff --git a/arch/mips/sibyte/bcm1480/irq.c b/arch/mips/sibyte/bcm1480/irq.c index a35818ed426..12b465d404d 100644 --- a/arch/mips/sibyte/bcm1480/irq.c +++ b/arch/mips/sibyte/bcm1480/irq.c @@ -50,7 +50,7 @@ static void enable_bcm1480_irq(unsigned int irq); static void disable_bcm1480_irq(unsigned int irq); static void ack_bcm1480_irq(unsigned int irq); #ifdef CONFIG_SMP -static void bcm1480_set_affinity(unsigned int irq, cpumask_t mask); +static void bcm1480_set_affinity(unsigned int irq, const struct cpumask *mask); #endif #ifdef CONFIG_PCI @@ -109,7 +109,7 @@ void bcm1480_unmask_irq(int cpu, int irq) } #ifdef CONFIG_SMP -static void bcm1480_set_affinity(unsigned int irq, cpumask_t mask) +static void bcm1480_set_affinity(unsigned int irq, const struct cpumask *mask) { int i = 0, old_cpu, cpu, int_on, k; u64 cur_ints; @@ -117,11 +117,11 @@ static void bcm1480_set_affinity(unsigned int irq, cpumask_t mask) unsigned long flags; unsigned int irq_dirty; - if (cpus_weight(mask) != 1) { + if (cpumask_weight(mask) != 1) { printk("attempted to set irq affinity for irq %d to multiple CPUs\n", irq); return; } - i = first_cpu(mask); + i = cpumask_first(mask); /* Convert logical CPU to physical CPU */ cpu = cpu_logical_map(i); diff --git a/arch/mips/sibyte/bcm1480/smp.c b/arch/mips/sibyte/bcm1480/smp.c index bd9eeb43ed0..dddfda8e829 100644 --- a/arch/mips/sibyte/bcm1480/smp.c +++ b/arch/mips/sibyte/bcm1480/smp.c @@ -136,7 +136,7 @@ static void __cpuinit bcm1480_boot_secondary(int cpu, struct task_struct *idle) /* * Use CFE to find out how many CPUs are available, setting up - * phys_cpu_present_map and the logical/physical mappings. + * cpu_possible_map and the logical/physical mappings. * XXXKW will the boot CPU ever not be physical 0? * * Common setup before any secondaries are started @@ -145,14 +145,14 @@ static void __init bcm1480_smp_setup(void) { int i, num; - cpus_clear(phys_cpu_present_map); - cpu_set(0, phys_cpu_present_map); + cpus_clear(cpu_possible_map); + cpu_set(0, cpu_possible_map); __cpu_number_map[0] = 0; __cpu_logical_map[0] = 0; for (i = 1, num = 0; i < NR_CPUS; i++) { if (cfe_cpu_stop(i) == 0) { - cpu_set(i, phys_cpu_present_map); + cpu_set(i, cpu_possible_map); __cpu_number_map[i] = ++num; __cpu_logical_map[num] = i; } diff --git a/arch/mips/sibyte/sb1250/irq.c b/arch/mips/sibyte/sb1250/irq.c index a5158483986..808ac2959b8 100644 --- a/arch/mips/sibyte/sb1250/irq.c +++ b/arch/mips/sibyte/sb1250/irq.c @@ -50,7 +50,7 @@ static void enable_sb1250_irq(unsigned int irq); static void disable_sb1250_irq(unsigned int irq); static void ack_sb1250_irq(unsigned int irq); #ifdef CONFIG_SMP -static void sb1250_set_affinity(unsigned int irq, cpumask_t mask); +static void sb1250_set_affinity(unsigned int irq, const struct cpumask *mask); #endif #ifdef CONFIG_SIBYTE_HAS_LDT @@ -103,16 +103,16 @@ void sb1250_unmask_irq(int cpu, int irq) } #ifdef CONFIG_SMP -static void sb1250_set_affinity(unsigned int irq, cpumask_t mask) +static void sb1250_set_affinity(unsigned int irq, const struct cpumask *mask) { int i = 0, old_cpu, cpu, int_on; u64 cur_ints; struct irq_desc *desc = irq_desc + irq; unsigned long flags; - i = first_cpu(mask); + i = cpumask_first(mask); - if (cpus_weight(mask) > 1) { + if (cpumask_weight(mask) > 1) { printk("attempted to set irq affinity for irq %d to multiple CPUs\n", irq); return; } diff --git a/arch/mips/sibyte/sb1250/smp.c b/arch/mips/sibyte/sb1250/smp.c index 0734b933e96..5950a288a7d 100644 --- a/arch/mips/sibyte/sb1250/smp.c +++ b/arch/mips/sibyte/sb1250/smp.c @@ -124,7 +124,7 @@ static void __cpuinit sb1250_boot_secondary(int cpu, struct task_struct *idle) /* * Use CFE to find out how many CPUs are available, setting up - * phys_cpu_present_map and the logical/physical mappings. + * cpu_possible_map and the logical/physical mappings. * XXXKW will the boot CPU ever not be physical 0? * * Common setup before any secondaries are started @@ -133,14 +133,14 @@ static void __init sb1250_smp_setup(void) { int i, num; - cpus_clear(phys_cpu_present_map); - cpu_set(0, phys_cpu_present_map); + cpus_clear(cpu_possible_map); + cpu_set(0, cpu_possible_map); __cpu_number_map[0] = 0; __cpu_logical_map[0] = 0; for (i = 1, num = 0; i < NR_CPUS; i++) { if (cfe_cpu_stop(i) == 0) { - cpu_set(i, phys_cpu_present_map); + cpu_set(i, cpu_possible_map); __cpu_number_map[i] = ++num; __cpu_logical_map[num] = i; } diff --git a/arch/mips/sni/time.c b/arch/mips/sni/time.c index 796e3ce2872..69f5f88711c 100644 --- a/arch/mips/sni/time.c +++ b/arch/mips/sni/time.c @@ -80,7 +80,7 @@ static void __init sni_a20r_timer_setup(void) struct irqaction *action = &a20r_irqaction; unsigned int cpu = smp_processor_id(); - cd->cpumask = cpumask_of_cpu(cpu); + cd->cpumask = cpumask_of(cpu); clockevents_register_device(cd); action->dev_id = cd; setup_irq(SNI_A20R_IRQ_TIMER, &a20r_irqaction); diff --git a/arch/mn10300/kernel/init_task.c b/arch/mn10300/kernel/init_task.c index af16f6e5c91..5ac3566f8c9 100644 --- a/arch/mn10300/kernel/init_task.c +++ b/arch/mn10300/kernel/init_task.c @@ -18,7 +18,6 @@ #include <asm/uaccess.h> #include <asm/pgtable.h> -static struct fs_struct init_fs = INIT_FS; static struct signal_struct init_signals = INIT_SIGNALS(init_signals); static struct sighand_struct init_sighand = INIT_SIGHAND(init_sighand); struct mm_struct init_mm = INIT_MM(init_mm); diff --git a/arch/parisc/Kconfig b/arch/parisc/Kconfig index 644a70b1b04..aacf11d3372 100644 --- a/arch/parisc/Kconfig +++ b/arch/parisc/Kconfig @@ -11,6 +11,7 @@ config PARISC select HAVE_OPROFILE select RTC_CLASS select RTC_DRV_PARISC + select INIT_ALL_POSSIBLE help The PA-RISC microprocessor is designed by Hewlett-Packard and used in many of their workstations & servers (HP9000 700 and 800 series, diff --git a/arch/parisc/include/asm/module.h b/arch/parisc/include/asm/module.h index c2cb49e934c..1f4123427ea 100644 --- a/arch/parisc/include/asm/module.h +++ b/arch/parisc/include/asm/module.h @@ -23,8 +23,10 @@ struct mod_arch_specific { unsigned long got_offset, got_count, got_max; unsigned long fdesc_offset, fdesc_count, fdesc_max; - unsigned long stub_offset, stub_count, stub_max; - unsigned long init_stub_offset, init_stub_count, init_stub_max; + struct { + unsigned long stub_offset; + unsigned int stub_entries; + } *section; int unwind_section; struct unwind_table *unwind; }; diff --git a/arch/parisc/include/asm/smp.h b/arch/parisc/include/asm/smp.h index 409e698f436..6ef4b7867b1 100644 --- a/arch/parisc/include/asm/smp.h +++ b/arch/parisc/include/asm/smp.h @@ -16,8 +16,6 @@ #include <linux/cpumask.h> typedef unsigned long address_t; -extern cpumask_t cpu_online_map; - /* * Private routines/data diff --git a/arch/parisc/kernel/init_task.c b/arch/parisc/kernel/init_task.c index f5941c08655..1e25a45d64c 100644 --- a/arch/parisc/kernel/init_task.c +++ b/arch/parisc/kernel/init_task.c @@ -34,7 +34,6 @@ #include <asm/pgtable.h> #include <asm/pgalloc.h> -static struct fs_struct init_fs = INIT_FS; static struct signal_struct init_signals = INIT_SIGNALS(init_signals); static struct sighand_struct init_sighand = INIT_SIGHAND(init_sighand); struct mm_struct init_mm = INIT_MM(init_mm); diff --git a/arch/parisc/kernel/irq.c b/arch/parisc/kernel/irq.c index 23ef950df00..4cea935e2f9 100644 --- a/arch/parisc/kernel/irq.c +++ b/arch/parisc/kernel/irq.c @@ -131,12 +131,12 @@ int cpu_check_affinity(unsigned int irq, cpumask_t *dest) return 0; } -static void cpu_set_affinity_irq(unsigned int irq, cpumask_t dest) +static void cpu_set_affinity_irq(unsigned int irq, const struct cpumask *dest) { - if (cpu_check_affinity(irq, &dest)) + if (cpu_check_affinity(irq, dest)) return; - irq_desc[irq].affinity = dest; + irq_desc[irq].affinity = *dest; } #endif diff --git a/arch/parisc/kernel/module.c b/arch/parisc/kernel/module.c index 44138c3e6ea..9013243cecc 100644 --- a/arch/parisc/kernel/module.c +++ b/arch/parisc/kernel/module.c @@ -6,6 +6,7 @@ * * Linux/PA-RISC Project (http://www.parisc-linux.org/) * Copyright (C) 2003 Randolph Chung <tausq at debian . org> + * Copyright (C) 2008 Helge Deller <deller@gmx.de> * * * This program is free software; you can redistribute it and/or modify @@ -24,6 +25,19 @@ * * * Notes: + * - PLT stub handling + * On 32bit (and sometimes 64bit) and with big kernel modules like xfs or + * ipv6 the relocation types R_PARISC_PCREL17F and R_PARISC_PCREL22F may + * fail to reach their PLT stub if we only create one big stub array for + * all sections at the beginning of the core or init section. + * Instead we now insert individual PLT stub entries directly in front of + * of the code sections where the stubs are actually called. + * This reduces the distance between the PCREL location and the stub entry + * so that the relocations can be fulfilled. + * While calculating the final layout of the kernel module in memory, the + * kernel module loader calls arch_mod_section_prepend() to request the + * to be reserved amount of memory in front of each individual section. + * * - SEGREL32 handling * We are not doing SEGREL32 handling correctly. According to the ABI, we * should do a value offset, like this: @@ -58,9 +72,13 @@ #define DEBUGP(fmt...) #endif +#define RELOC_REACHABLE(val, bits) \ + (( ( !((val) & (1<<((bits)-1))) && ((val)>>(bits)) != 0 ) || \ + ( ((val) & (1<<((bits)-1))) && ((val)>>(bits)) != (((__typeof__(val))(~0))>>((bits)+2)))) ? \ + 0 : 1) + #define CHECK_RELOC(val, bits) \ - if ( ( !((val) & (1<<((bits)-1))) && ((val)>>(bits)) != 0 ) || \ - ( ((val) & (1<<((bits)-1))) && ((val)>>(bits)) != (((__typeof__(val))(~0))>>((bits)+2)))) { \ + if (!RELOC_REACHABLE(val, bits)) { \ printk(KERN_ERR "module %s relocation of symbol %s is out of range (0x%lx in %d bits)\n", \ me->name, strtab + sym->st_name, (unsigned long)val, bits); \ return -ENOEXEC; \ @@ -92,13 +110,6 @@ static inline int in_local(struct module *me, void *loc) return in_init(me, loc) || in_core(me, loc); } -static inline int in_local_section(struct module *me, void *loc, void *dot) -{ - return (in_init(me, loc) && in_init(me, dot)) || - (in_core(me, loc) && in_core(me, dot)); -} - - #ifndef CONFIG_64BIT struct got_entry { Elf32_Addr addr; @@ -258,23 +269,42 @@ static inline unsigned long count_stubs(const Elf_Rela *rela, unsigned long n) /* Free memory returned from module_alloc */ void module_free(struct module *mod, void *module_region) { + kfree(mod->arch.section); + mod->arch.section = NULL; + vfree(module_region); /* FIXME: If module_region == mod->init_region, trim exception table entries. */ } +/* Additional bytes needed in front of individual sections */ +unsigned int arch_mod_section_prepend(struct module *mod, + unsigned int section) +{ + /* size needed for all stubs of this section (including + * one additional for correct alignment of the stubs) */ + return (mod->arch.section[section].stub_entries + 1) + * sizeof(struct stub_entry); +} + #define CONST int module_frob_arch_sections(CONST Elf_Ehdr *hdr, CONST Elf_Shdr *sechdrs, CONST char *secstrings, struct module *me) { - unsigned long gots = 0, fdescs = 0, stubs = 0, init_stubs = 0; + unsigned long gots = 0, fdescs = 0, len; unsigned int i; + len = hdr->e_shnum * sizeof(me->arch.section[0]); + me->arch.section = kzalloc(len, GFP_KERNEL); + if (!me->arch.section) + return -ENOMEM; + for (i = 1; i < hdr->e_shnum; i++) { - const Elf_Rela *rels = (void *)hdr + sechdrs[i].sh_offset; + const Elf_Rela *rels = (void *)sechdrs[i].sh_addr; unsigned long nrels = sechdrs[i].sh_size / sizeof(*rels); + unsigned int count, s; if (strncmp(secstrings + sechdrs[i].sh_name, ".PARISC.unwind", 14) == 0) @@ -290,11 +320,23 @@ int module_frob_arch_sections(CONST Elf_Ehdr *hdr, */ gots += count_gots(rels, nrels); fdescs += count_fdescs(rels, nrels); - if(strncmp(secstrings + sechdrs[i].sh_name, - ".rela.init", 10) == 0) - init_stubs += count_stubs(rels, nrels); - else - stubs += count_stubs(rels, nrels); + + /* XXX: By sorting the relocs and finding duplicate entries + * we could reduce the number of necessary stubs and save + * some memory. */ + count = count_stubs(rels, nrels); + if (!count) + continue; + + /* so we need relocation stubs. reserve necessary memory. */ + /* sh_info gives the section for which we need to add stubs. */ + s = sechdrs[i].sh_info; + + /* each code section should only have one relocation section */ + WARN_ON(me->arch.section[s].stub_entries); + + /* store number of stubs we need for this section */ + me->arch.section[s].stub_entries += count; } /* align things a bit */ @@ -306,18 +348,8 @@ int module_frob_arch_sections(CONST Elf_Ehdr *hdr, me->arch.fdesc_offset = me->core_size; me->core_size += fdescs * sizeof(Elf_Fdesc); - me->core_size = ALIGN(me->core_size, 16); - me->arch.stub_offset = me->core_size; - me->core_size += stubs * sizeof(struct stub_entry); - - me->init_size = ALIGN(me->init_size, 16); - me->arch.init_stub_offset = me->init_size; - me->init_size += init_stubs * sizeof(struct stub_entry); - me->arch.got_max = gots; me->arch.fdesc_max = fdescs; - me->arch.stub_max = stubs; - me->arch.init_stub_max = init_stubs; return 0; } @@ -380,23 +412,27 @@ enum elf_stub_type { }; static Elf_Addr get_stub(struct module *me, unsigned long value, long addend, - enum elf_stub_type stub_type, int init_section) + enum elf_stub_type stub_type, Elf_Addr loc0, unsigned int targetsec) { - unsigned long i; struct stub_entry *stub; - if(init_section) { - i = me->arch.init_stub_count++; - BUG_ON(me->arch.init_stub_count > me->arch.init_stub_max); - stub = me->module_init + me->arch.init_stub_offset + - i * sizeof(struct stub_entry); - } else { - i = me->arch.stub_count++; - BUG_ON(me->arch.stub_count > me->arch.stub_max); - stub = me->module_core + me->arch.stub_offset + - i * sizeof(struct stub_entry); + /* initialize stub_offset to point in front of the section */ + if (!me->arch.section[targetsec].stub_offset) { + loc0 -= (me->arch.section[targetsec].stub_entries + 1) * + sizeof(struct stub_entry); + /* get correct alignment for the stubs */ + loc0 = ALIGN(loc0, sizeof(struct stub_entry)); + me->arch.section[targetsec].stub_offset = loc0; } + /* get address of stub entry */ + stub = (void *) me->arch.section[targetsec].stub_offset; + me->arch.section[targetsec].stub_offset += sizeof(struct stub_entry); + + /* do not write outside available stub area */ + BUG_ON(0 == me->arch.section[targetsec].stub_entries--); + + #ifndef CONFIG_64BIT /* for 32-bit the stub looks like this: * ldil L'XXX,%r1 @@ -489,15 +525,19 @@ int apply_relocate_add(Elf_Shdr *sechdrs, Elf32_Addr val; Elf32_Sword addend; Elf32_Addr dot; + Elf_Addr loc0; + unsigned int targetsec = sechdrs[relsec].sh_info; //unsigned long dp = (unsigned long)$global$; register unsigned long dp asm ("r27"); DEBUGP("Applying relocate section %u to %u\n", relsec, - sechdrs[relsec].sh_info); + targetsec); for (i = 0; i < sechdrs[relsec].sh_size / sizeof(*rel); i++) { /* This is where to make the change */ - loc = (void *)sechdrs[sechdrs[relsec].sh_info].sh_addr + loc = (void *)sechdrs[targetsec].sh_addr + rel[i].r_offset; + /* This is the start of the target section */ + loc0 = sechdrs[targetsec].sh_addr; /* This is the symbol it is referring to */ sym = (Elf32_Sym *)sechdrs[symindex].sh_addr + ELF32_R_SYM(rel[i].r_info); @@ -569,19 +609,32 @@ int apply_relocate_add(Elf_Shdr *sechdrs, break; case R_PARISC_PCREL17F: /* 17-bit PC relative address */ - val = get_stub(me, val, addend, ELF_STUB_GOT, in_init(me, loc)); + /* calculate direct call offset */ + val += addend; val = (val - dot - 8)/4; - CHECK_RELOC(val, 17) + if (!RELOC_REACHABLE(val, 17)) { + /* direct distance too far, create + * stub entry instead */ + val = get_stub(me, sym->st_value, addend, + ELF_STUB_DIRECT, loc0, targetsec); + val = (val - dot - 8)/4; + CHECK_RELOC(val, 17); + } *loc = (*loc & ~0x1f1ffd) | reassemble_17(val); break; case R_PARISC_PCREL22F: /* 22-bit PC relative address; only defined for pa20 */ - val = get_stub(me, val, addend, ELF_STUB_GOT, in_init(me, loc)); - DEBUGP("STUB FOR %s loc %lx+%lx at %lx\n", - strtab + sym->st_name, (unsigned long)loc, addend, - val) + /* calculate direct call offset */ + val += addend; val = (val - dot - 8)/4; - CHECK_RELOC(val, 22); + if (!RELOC_REACHABLE(val, 22)) { + /* direct distance too far, create + * stub entry instead */ + val = get_stub(me, sym->st_value, addend, + ELF_STUB_DIRECT, loc0, targetsec); + val = (val - dot - 8)/4; + CHECK_RELOC(val, 22); + } *loc = (*loc & ~0x3ff1ffd) | reassemble_22(val); break; @@ -610,13 +663,17 @@ int apply_relocate_add(Elf_Shdr *sechdrs, Elf64_Addr val; Elf64_Sxword addend; Elf64_Addr dot; + Elf_Addr loc0; + unsigned int targetsec = sechdrs[relsec].sh_info; DEBUGP("Applying relocate section %u to %u\n", relsec, - sechdrs[relsec].sh_info); + targetsec); for (i = 0; i < sechdrs[relsec].sh_size / sizeof(*rel); i++) { /* This is where to make the change */ - loc = (void *)sechdrs[sechdrs[relsec].sh_info].sh_addr + loc = (void *)sechdrs[targetsec].sh_addr + rel[i].r_offset; + /* This is the start of the target section */ + loc0 = sechdrs[targetsec].sh_addr; /* This is the symbol it is referring to */ sym = (Elf64_Sym *)sechdrs[symindex].sh_addr + ELF64_R_SYM(rel[i].r_info); @@ -672,42 +729,40 @@ int apply_relocate_add(Elf_Shdr *sechdrs, DEBUGP("PCREL22F Symbol %s loc %p val %lx\n", strtab + sym->st_name, loc, val); + val += addend; /* can we reach it locally? */ - if(!in_local_section(me, (void *)val, (void *)dot)) { - - if (in_local(me, (void *)val)) - /* this is the case where the - * symbol is local to the - * module, but in a different - * section, so stub the jump - * in case it's more than 22 - * bits away */ - val = get_stub(me, val, addend, ELF_STUB_DIRECT, - in_init(me, loc)); - else if (strncmp(strtab + sym->st_name, "$$", 2) + if (in_local(me, (void *)val)) { + /* this is the case where the symbol is local + * to the module, but in a different section, + * so stub the jump in case it's more than 22 + * bits away */ + val = (val - dot - 8)/4; + if (!RELOC_REACHABLE(val, 22)) { + /* direct distance too far, create + * stub entry instead */ + val = get_stub(me, sym->st_value, + addend, ELF_STUB_DIRECT, + loc0, targetsec); + } else { + /* Ok, we can reach it directly. */ + val = sym->st_value; + val += addend; + } + } else { + val = sym->st_value; + if (strncmp(strtab + sym->st_name, "$$", 2) == 0) val = get_stub(me, val, addend, ELF_STUB_MILLI, - in_init(me, loc)); + loc0, targetsec); else val = get_stub(me, val, addend, ELF_STUB_GOT, - in_init(me, loc)); + loc0, targetsec); } DEBUGP("STUB FOR %s loc %lx, val %lx+%lx at %lx\n", strtab + sym->st_name, loc, sym->st_value, addend, val); - /* FIXME: local symbols work as long as the - * core and init pieces aren't separated too - * far. If this is ever broken, you will trip - * the check below. The way to fix it would - * be to generate local stubs to go between init - * and core */ - if((Elf64_Sxword)(val - dot - 8) > 0x800000 -1 || - (Elf64_Sxword)(val - dot - 8) < -0x800000) { - printk(KERN_ERR "Module %s, symbol %s is out of range for PCREL22F relocation\n", - me->name, strtab + sym->st_name); - return -ENOEXEC; - } val = (val - dot - 8)/4; + CHECK_RELOC(val, 22); *loc = (*loc & ~0x3ff1ffd) | reassemble_22(val); break; case R_PARISC_DIR64: @@ -794,12 +849,8 @@ int module_finalize(const Elf_Ehdr *hdr, addr = (u32 *)entry->addr; printk("INSNS: %x %x %x %x\n", addr[0], addr[1], addr[2], addr[3]); - printk("stubs used %ld, stubs max %ld\n" - "init_stubs used %ld, init stubs max %ld\n" - "got entries used %ld, gots max %ld\n" + printk("got entries used %ld, gots max %ld\n" "fdescs used %ld, fdescs max %ld\n", - me->arch.stub_count, me->arch.stub_max, - me->arch.init_stub_count, me->arch.init_stub_max, me->arch.got_count, me->arch.got_max, me->arch.fdesc_count, me->arch.fdesc_max); #endif @@ -829,7 +880,10 @@ int module_finalize(const Elf_Ehdr *hdr, me->name, me->arch.got_count, MAX_GOTS); return -EINVAL; } - + + kfree(me->arch.section); + me->arch.section = NULL; + /* no symbol table */ if(symhdr == NULL) return 0; diff --git a/arch/parisc/kernel/smp.c b/arch/parisc/kernel/smp.c index d47f3975c9c..80bc000523f 100644 --- a/arch/parisc/kernel/smp.c +++ b/arch/parisc/kernel/smp.c @@ -67,21 +67,6 @@ static volatile int cpu_now_booting __read_mostly = 0; /* track which CPU is boo static int parisc_max_cpus __read_mostly = 1; -/* online cpus are ones that we've managed to bring up completely - * possible cpus are all valid cpu - * present cpus are all detected cpu - * - * On startup we bring up the "possible" cpus. Since we discover - * CPUs later, we add them as hotplug, so the possible cpu mask is - * empty in the beginning. - */ - -cpumask_t cpu_online_map __read_mostly = CPU_MASK_NONE; /* Bitmap of online CPUs */ -cpumask_t cpu_possible_map __read_mostly = CPU_MASK_ALL; /* Bitmap of Present CPUs */ - -EXPORT_SYMBOL(cpu_online_map); -EXPORT_SYMBOL(cpu_possible_map); - DEFINE_PER_CPU(spinlock_t, ipi_lock) = SPIN_LOCK_UNLOCKED; enum ipi_message_type { diff --git a/arch/powerpc/include/asm/disassemble.h b/arch/powerpc/include/asm/disassemble.h new file mode 100644 index 00000000000..9b198d1b3b2 --- /dev/null +++ b/arch/powerpc/include/asm/disassemble.h @@ -0,0 +1,80 @@ +/* + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License, version 2, as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + * + * Copyright IBM Corp. 2008 + * + * Authors: Hollis Blanchard <hollisb@us.ibm.com> + */ + +#ifndef __ASM_PPC_DISASSEMBLE_H__ +#define __ASM_PPC_DISASSEMBLE_H__ + +#include <linux/types.h> + +static inline unsigned int get_op(u32 inst) +{ + return inst >> 26; +} + +static inline unsigned int get_xop(u32 inst) +{ + return (inst >> 1) & 0x3ff; +} + +static inline unsigned int get_sprn(u32 inst) +{ + return ((inst >> 16) & 0x1f) | ((inst >> 6) & 0x3e0); +} + +static inline unsigned int get_dcrn(u32 inst) +{ + return ((inst >> 16) & 0x1f) | ((inst >> 6) & 0x3e0); +} + +static inline unsigned int get_rt(u32 inst) +{ + return (inst >> 21) & 0x1f; +} + +static inline unsigned int get_rs(u32 inst) +{ + return (inst >> 21) & 0x1f; +} + +static inline unsigned int get_ra(u32 inst) +{ + return (inst >> 16) & 0x1f; +} + +static inline unsigned int get_rb(u32 inst) +{ + return (inst >> 11) & 0x1f; +} + +static inline unsigned int get_rc(u32 inst) +{ + return inst & 0x1; +} + +static inline unsigned int get_ws(u32 inst) +{ + return (inst >> 11) & 0x1f; +} + +static inline unsigned int get_d(u32 inst) +{ + return inst & 0xffff; +} + +#endif /* __ASM_PPC_DISASSEMBLE_H__ */ diff --git a/arch/powerpc/include/asm/kvm_44x.h b/arch/powerpc/include/asm/kvm_44x.h new file mode 100644 index 00000000000..f49031b632c --- /dev/null +++ b/arch/powerpc/include/asm/kvm_44x.h @@ -0,0 +1,61 @@ +/* + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License, version 2, as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + * + * Copyright IBM Corp. 2008 + * + * Authors: Hollis Blanchard <hollisb@us.ibm.com> + */ + +#ifndef __ASM_44X_H__ +#define __ASM_44X_H__ + +#include <linux/kvm_host.h> + +#define PPC44x_TLB_SIZE 64 + +/* If the guest is expecting it, this can be as large as we like; we'd just + * need to find some way of advertising it. */ +#define KVM44x_GUEST_TLB_SIZE 64 + +struct kvmppc_44x_shadow_ref { + struct page *page; + u16 gtlb_index; + u8 writeable; + u8 tid; +}; + +struct kvmppc_vcpu_44x { + /* Unmodified copy of the guest's TLB. */ + struct kvmppc_44x_tlbe guest_tlb[KVM44x_GUEST_TLB_SIZE]; + + /* References to guest pages in the hardware TLB. */ + struct kvmppc_44x_shadow_ref shadow_refs[PPC44x_TLB_SIZE]; + + /* State of the shadow TLB at guest context switch time. */ + struct kvmppc_44x_tlbe shadow_tlb[PPC44x_TLB_SIZE]; + u8 shadow_tlb_mod[PPC44x_TLB_SIZE]; + + struct kvm_vcpu vcpu; +}; + +static inline struct kvmppc_vcpu_44x *to_44x(struct kvm_vcpu *vcpu) +{ + return container_of(vcpu, struct kvmppc_vcpu_44x, vcpu); +} + +void kvmppc_set_pid(struct kvm_vcpu *vcpu, u32 new_pid); +void kvmppc_44x_tlb_put(struct kvm_vcpu *vcpu); +void kvmppc_44x_tlb_load(struct kvm_vcpu *vcpu); + +#endif /* __ASM_44X_H__ */ diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h index 34b52b7180c..c1e436fe773 100644 --- a/arch/powerpc/include/asm/kvm_host.h +++ b/arch/powerpc/include/asm/kvm_host.h @@ -64,27 +64,58 @@ struct kvm_vcpu_stat { u32 halt_wakeup; }; -struct tlbe { +struct kvmppc_44x_tlbe { u32 tid; /* Only the low 8 bits are used. */ u32 word0; u32 word1; u32 word2; }; -struct kvm_arch { +enum kvm_exit_types { + MMIO_EXITS, + DCR_EXITS, + SIGNAL_EXITS, + ITLB_REAL_MISS_EXITS, + ITLB_VIRT_MISS_EXITS, + DTLB_REAL_MISS_EXITS, + DTLB_VIRT_MISS_EXITS, + SYSCALL_EXITS, + ISI_EXITS, + DSI_EXITS, + EMULATED_INST_EXITS, + EMULATED_MTMSRWE_EXITS, + EMULATED_WRTEE_EXITS, + EMULATED_MTSPR_EXITS, + EMULATED_MFSPR_EXITS, + EMULATED_MTMSR_EXITS, + EMULATED_MFMSR_EXITS, + EMULATED_TLBSX_EXITS, + EMULATED_TLBWE_EXITS, + EMULATED_RFI_EXITS, + DEC_EXITS, + EXT_INTR_EXITS, + HALT_WAKEUP, + USR_PR_INST, + FP_UNAVAIL, + DEBUG_EXITS, + TIMEINGUEST, + __NUMBER_OF_KVM_EXIT_TYPES }; -struct kvm_vcpu_arch { - /* Unmodified copy of the guest's TLB. */ - struct tlbe guest_tlb[PPC44x_TLB_SIZE]; - /* TLB that's actually used when the guest is running. */ - struct tlbe shadow_tlb[PPC44x_TLB_SIZE]; - /* Pages which are referenced in the shadow TLB. */ - struct page *shadow_pages[PPC44x_TLB_SIZE]; +/* allow access to big endian 32bit upper/lower parts and 64bit var */ +struct kvmppc_exit_timing { + union { + u64 tv64; + struct { + u32 tbu, tbl; + } tv32; + }; +}; - /* Track which TLB entries we've modified in the current exit. */ - u8 shadow_tlb_mod[PPC44x_TLB_SIZE]; +struct kvm_arch { +}; +struct kvm_vcpu_arch { u32 host_stack; u32 host_pid; u32 host_dbcr0; @@ -94,32 +125,32 @@ struct kvm_vcpu_arch { u32 host_msr; u64 fpr[32]; - u32 gpr[32]; + ulong gpr[32]; - u32 pc; + ulong pc; u32 cr; - u32 ctr; - u32 lr; - u32 xer; + ulong ctr; + ulong lr; + ulong xer; - u32 msr; + ulong msr; u32 mmucr; - u32 sprg0; - u32 sprg1; - u32 sprg2; - u32 sprg3; - u32 sprg4; - u32 sprg5; - u32 sprg6; - u32 sprg7; - u32 srr0; - u32 srr1; - u32 csrr0; - u32 csrr1; - u32 dsrr0; - u32 dsrr1; - u32 dear; - u32 esr; + ulong sprg0; + ulong sprg1; + ulong sprg2; + ulong sprg3; + ulong sprg4; + ulong sprg5; + ulong sprg6; + ulong sprg7; + ulong srr0; + ulong srr1; + ulong csrr0; + ulong csrr1; + ulong dsrr0; + ulong dsrr1; + ulong dear; + ulong esr; u32 dec; u32 decar; u32 tbl; @@ -127,7 +158,7 @@ struct kvm_vcpu_arch { u32 tcr; u32 tsr; u32 ivor[16]; - u32 ivpr; + ulong ivpr; u32 pir; u32 shadow_pid; @@ -140,9 +171,22 @@ struct kvm_vcpu_arch { u32 dbcr0; u32 dbcr1; +#ifdef CONFIG_KVM_EXIT_TIMING + struct kvmppc_exit_timing timing_exit; + struct kvmppc_exit_timing timing_last_enter; + u32 last_exit_type; + u32 timing_count_type[__NUMBER_OF_KVM_EXIT_TYPES]; + u64 timing_sum_duration[__NUMBER_OF_KVM_EXIT_TYPES]; + u64 timing_sum_quad_duration[__NUMBER_OF_KVM_EXIT_TYPES]; + u64 timing_min_duration[__NUMBER_OF_KVM_EXIT_TYPES]; + u64 timing_max_duration[__NUMBER_OF_KVM_EXIT_TYPES]; + u64 timing_last_exit; + struct dentry *debugfs_exit_timing; +#endif + u32 last_inst; - u32 fault_dear; - u32 fault_esr; + ulong fault_dear; + ulong fault_esr; gpa_t paddr_accessed; u8 io_gpr; /* GPR used as IO source/target */ diff --git a/arch/powerpc/include/asm/kvm_ppc.h b/arch/powerpc/include/asm/kvm_ppc.h index bb62ad876de..36d2a50a848 100644 --- a/arch/powerpc/include/asm/kvm_ppc.h +++ b/arch/powerpc/include/asm/kvm_ppc.h @@ -29,11 +29,6 @@ #include <linux/kvm_types.h> #include <linux/kvm_host.h> -struct kvm_tlb { - struct tlbe guest_tlb[PPC44x_TLB_SIZE]; - struct tlbe shadow_tlb[PPC44x_TLB_SIZE]; -}; - enum emulation_result { EMULATE_DONE, /* no further processing */ EMULATE_DO_MMIO, /* kvm_run filled with MMIO request */ @@ -41,9 +36,6 @@ enum emulation_result { EMULATE_FAIL, /* can't emulate this instruction */ }; -extern const unsigned char exception_priority[]; -extern const unsigned char priority_exception[]; - extern int __kvmppc_vcpu_run(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu); extern char kvmppc_handlers_start[]; extern unsigned long kvmppc_handler_len; @@ -58,51 +50,44 @@ extern int kvmppc_handle_store(struct kvm_run *run, struct kvm_vcpu *vcpu, extern int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu); extern int kvmppc_emulate_mmio(struct kvm_run *run, struct kvm_vcpu *vcpu); +extern void kvmppc_emulate_dec(struct kvm_vcpu *vcpu); -extern void kvmppc_mmu_map(struct kvm_vcpu *vcpu, u64 gvaddr, gfn_t gfn, - u64 asid, u32 flags); -extern void kvmppc_mmu_invalidate(struct kvm_vcpu *vcpu, gva_t eaddr, - gva_t eend, u32 asid); +extern void kvmppc_mmu_map(struct kvm_vcpu *vcpu, u64 gvaddr, gpa_t gpaddr, + u64 asid, u32 flags, u32 max_bytes, + unsigned int gtlb_idx); extern void kvmppc_mmu_priv_switch(struct kvm_vcpu *vcpu, int usermode); extern void kvmppc_mmu_switch_pid(struct kvm_vcpu *vcpu, u32 pid); -/* XXX Book E specific */ -extern void kvmppc_tlbe_set_modified(struct kvm_vcpu *vcpu, unsigned int i); - -extern void kvmppc_check_and_deliver_interrupts(struct kvm_vcpu *vcpu); - -static inline void kvmppc_queue_exception(struct kvm_vcpu *vcpu, int exception) -{ - unsigned int priority = exception_priority[exception]; - set_bit(priority, &vcpu->arch.pending_exceptions); -} - -static inline void kvmppc_clear_exception(struct kvm_vcpu *vcpu, int exception) -{ - unsigned int priority = exception_priority[exception]; - clear_bit(priority, &vcpu->arch.pending_exceptions); -} - -/* Helper function for "full" MSR writes. No need to call this if only EE is - * changing. */ -static inline void kvmppc_set_msr(struct kvm_vcpu *vcpu, u32 new_msr) -{ - if ((new_msr & MSR_PR) != (vcpu->arch.msr & MSR_PR)) - kvmppc_mmu_priv_switch(vcpu, new_msr & MSR_PR); - - vcpu->arch.msr = new_msr; - - if (vcpu->arch.msr & MSR_WE) - kvm_vcpu_block(vcpu); -} - -static inline void kvmppc_set_pid(struct kvm_vcpu *vcpu, u32 new_pid) -{ - if (vcpu->arch.pid != new_pid) { - vcpu->arch.pid = new_pid; - vcpu->arch.swap_pid = 1; - } -} +/* Core-specific hooks */ + +extern struct kvm_vcpu *kvmppc_core_vcpu_create(struct kvm *kvm, + unsigned int id); +extern void kvmppc_core_vcpu_free(struct kvm_vcpu *vcpu); +extern int kvmppc_core_vcpu_setup(struct kvm_vcpu *vcpu); +extern int kvmppc_core_check_processor_compat(void); +extern int kvmppc_core_vcpu_translate(struct kvm_vcpu *vcpu, + struct kvm_translation *tr); + +extern void kvmppc_core_vcpu_load(struct kvm_vcpu *vcpu, int cpu); +extern void kvmppc_core_vcpu_put(struct kvm_vcpu *vcpu); + +extern void kvmppc_core_load_guest_debugstate(struct kvm_vcpu *vcpu); +extern void kvmppc_core_load_host_debugstate(struct kvm_vcpu *vcpu); + +extern void kvmppc_core_deliver_interrupts(struct kvm_vcpu *vcpu); +extern int kvmppc_core_pending_dec(struct kvm_vcpu *vcpu); +extern void kvmppc_core_queue_program(struct kvm_vcpu *vcpu); +extern void kvmppc_core_queue_dec(struct kvm_vcpu *vcpu); +extern void kvmppc_core_queue_external(struct kvm_vcpu *vcpu, + struct kvm_interrupt *irq); + +extern int kvmppc_core_emulate_op(struct kvm_run *run, struct kvm_vcpu *vcpu, + unsigned int op, int *advance); +extern int kvmppc_core_emulate_mtspr(struct kvm_vcpu *vcpu, int sprn, int rs); +extern int kvmppc_core_emulate_mfspr(struct kvm_vcpu *vcpu, int sprn, int rt); + +extern int kvmppc_booke_init(void); +extern void kvmppc_booke_exit(void); extern void kvmppc_core_destroy_mmu(struct kvm_vcpu *vcpu); diff --git a/arch/powerpc/include/asm/mmu-44x.h b/arch/powerpc/include/asm/mmu-44x.h index 8a97cfb08b7..27cc6fdcd3b 100644 --- a/arch/powerpc/include/asm/mmu-44x.h +++ b/arch/powerpc/include/asm/mmu-44x.h @@ -56,6 +56,7 @@ #ifndef __ASSEMBLY__ extern unsigned int tlb_44x_hwater; +extern unsigned int tlb_44x_index; typedef struct { unsigned int id; diff --git a/arch/powerpc/include/asm/topology.h b/arch/powerpc/include/asm/topology.h index c32da6f9799..375258559ae 100644 --- a/arch/powerpc/include/asm/topology.h +++ b/arch/powerpc/include/asm/topology.h @@ -22,11 +22,11 @@ static inline cpumask_t node_to_cpumask(int node) return numa_cpumask_lookup_table[node]; } +#define cpumask_of_node(node) (&numa_cpumask_lookup_table[node]) + static inline int node_to_first_cpu(int node) { - cpumask_t tmp; - tmp = node_to_cpumask(node); - return first_cpu(tmp); + return cpumask_first(cpumask_of_node(node)); } int of_node_to_nid(struct device_node *device); @@ -46,9 +46,12 @@ static inline int pcibus_to_node(struct pci_bus *bus) node_to_cpumask(pcibus_to_node(bus)) \ ) +#define cpumask_of_pcibus(bus) (pcibus_to_node(bus) == -1 ? \ + cpu_all_mask : \ + cpumask_of_node(pcibus_to_node(bus))) + /* sched_domains SD_NODE_INIT for PPC64 machines */ #define SD_NODE_INIT (struct sched_domain) { \ - .span = CPU_MASK_NONE, \ .parent = NULL, \ .child = NULL, \ .groups = NULL, \ @@ -109,6 +112,8 @@ static inline void sysfs_remove_device_from_node(struct sys_device *dev, #define topology_thread_siblings(cpu) (per_cpu(cpu_sibling_map, cpu)) #define topology_core_siblings(cpu) (per_cpu(cpu_core_map, cpu)) +#define topology_thread_cpumask(cpu) (&per_cpu(cpu_sibling_map, cpu)) +#define topology_core_cpumask(cpu) (&per_cpu(cpu_core_map, cpu)) #define topology_core_id(cpu) (cpu_to_core_id(cpu)) #endif #endif diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c index 661d07d2146..9937fe44555 100644 --- a/arch/powerpc/kernel/asm-offsets.c +++ b/arch/powerpc/kernel/asm-offsets.c @@ -23,9 +23,6 @@ #include <linux/mm.h> #include <linux/suspend.h> #include <linux/hrtimer.h> -#ifdef CONFIG_KVM -#include <linux/kvm_host.h> -#endif #ifdef CONFIG_PPC64 #include <linux/time.h> #include <linux/hardirq.h> @@ -51,6 +48,9 @@ #ifdef CONFIG_PPC_ISERIES #include <asm/iseries/alpaca.h> #endif +#ifdef CONFIG_KVM +#include <asm/kvm_44x.h> +#endif #if defined(CONFIG_BOOKE) || defined(CONFIG_40x) #include "head_booke.h" @@ -357,12 +357,10 @@ int main(void) DEFINE(PTE_SIZE, sizeof(pte_t)); #ifdef CONFIG_KVM - DEFINE(TLBE_BYTES, sizeof(struct tlbe)); + DEFINE(TLBE_BYTES, sizeof(struct kvmppc_44x_tlbe)); DEFINE(VCPU_HOST_STACK, offsetof(struct kvm_vcpu, arch.host_stack)); DEFINE(VCPU_HOST_PID, offsetof(struct kvm_vcpu, arch.host_pid)); - DEFINE(VCPU_SHADOW_TLB, offsetof(struct kvm_vcpu, arch.shadow_tlb)); - DEFINE(VCPU_SHADOW_MOD, offsetof(struct kvm_vcpu, arch.shadow_tlb_mod)); DEFINE(VCPU_GPRS, offsetof(struct kvm_vcpu, arch.gpr)); DEFINE(VCPU_LR, offsetof(struct kvm_vcpu, arch.lr)); DEFINE(VCPU_CR, offsetof(struct kvm_vcpu, arch.cr)); @@ -385,5 +383,16 @@ int main(void) DEFINE(PTE_T_LOG2, PTE_T_LOG2); #endif +#ifdef CONFIG_KVM_EXIT_TIMING + DEFINE(VCPU_TIMING_EXIT_TBU, offsetof(struct kvm_vcpu, + arch.timing_exit.tv32.tbu)); + DEFINE(VCPU_TIMING_EXIT_TBL, offsetof(struct kvm_vcpu, + arch.timing_exit.tv32.tbl)); + DEFINE(VCPU_TIMING_LAST_ENTER_TBU, offsetof(struct kvm_vcpu, + arch.timing_last_enter.tv32.tbu)); + DEFINE(VCPU_TIMING_LAST_ENTER_TBL, offsetof(struct kvm_vcpu, + arch.timing_last_enter.tv32.tbl)); +#endif + return 0; } diff --git a/arch/powerpc/kernel/init_task.c b/arch/powerpc/kernel/init_task.c index 4c85b8d5647..688b329800b 100644 --- a/arch/powerpc/kernel/init_task.c +++ b/arch/powerpc/kernel/init_task.c @@ -7,7 +7,6 @@ #include <linux/mqueue.h> #include <asm/uaccess.h> -static struct fs_struct init_fs = INIT_FS; static struct signal_struct init_signals = INIT_SIGNALS(init_signals); static struct sighand_struct init_sighand = INIT_SIGHAND(init_sighand); struct mm_struct init_mm = INIT_MM(init_mm); diff --git a/arch/powerpc/kernel/irq.c b/arch/powerpc/kernel/irq.c index ac222d0ab12..23b8b5e36f9 100644 --- a/arch/powerpc/kernel/irq.c +++ b/arch/powerpc/kernel/irq.c @@ -237,7 +237,7 @@ void fixup_irqs(cpumask_t map) mask = map; } if (irq_desc[irq].chip->set_affinity) - irq_desc[irq].chip->set_affinity(irq, mask); + irq_desc[irq].chip->set_affinity(irq, &mask); else if (irq_desc[irq].action && !(warned++)) printk("Cannot set affinity for irq %i\n", irq); } diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c index 51b201ddf9a..fb7049c054c 100644 --- a/arch/powerpc/kernel/process.c +++ b/arch/powerpc/kernel/process.c @@ -33,6 +33,7 @@ #include <linux/mqueue.h> #include <linux/hardirq.h> #include <linux/utsname.h> +#include <linux/kernel_stat.h> #include <asm/pgtable.h> #include <asm/uaccess.h> diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c index 8ac3f721d23..65484b2200b 100644 --- a/arch/powerpc/kernel/smp.c +++ b/arch/powerpc/kernel/smp.c @@ -59,13 +59,9 @@ struct thread_info *secondary_ti; -cpumask_t cpu_possible_map = CPU_MASK_NONE; -cpumask_t cpu_online_map = CPU_MASK_NONE; DEFINE_PER_CPU(cpumask_t, cpu_sibling_map) = CPU_MASK_NONE; DEFINE_PER_CPU(cpumask_t, cpu_core_map) = CPU_MASK_NONE; -EXPORT_SYMBOL(cpu_online_map); -EXPORT_SYMBOL(cpu_possible_map); EXPORT_PER_CPU_SYMBOL(cpu_sibling_map); EXPORT_PER_CPU_SYMBOL(cpu_core_map); diff --git a/arch/powerpc/kernel/time.c b/arch/powerpc/kernel/time.c index e1f3a514042..c9564031a2a 100644 --- a/arch/powerpc/kernel/time.c +++ b/arch/powerpc/kernel/time.c @@ -256,8 +256,10 @@ void account_system_vtime(struct task_struct *tsk) delta += sys_time; get_paca()->system_time = 0; } - account_system_time(tsk, 0, delta); - account_system_time_scaled(tsk, deltascaled); + if (in_irq() || idle_task(smp_processor_id()) != tsk) + account_system_time(tsk, 0, delta, deltascaled); + else + account_idle_time(delta); per_cpu(cputime_last_delta, smp_processor_id()) = delta; per_cpu(cputime_scaled_last_delta, smp_processor_id()) = deltascaled; local_irq_restore(flags); @@ -275,10 +277,8 @@ void account_process_tick(struct task_struct *tsk, int user_tick) utime = get_paca()->user_time; get_paca()->user_time = 0; - account_user_time(tsk, utime); - utimescaled = cputime_to_scaled(utime); - account_user_time_scaled(tsk, utimescaled); + account_user_time(tsk, utime, utimescaled); } /* @@ -338,8 +338,12 @@ void calculate_steal_time(void) tb = mftb(); purr = mfspr(SPRN_PURR); stolen = (tb - pme->tb) - (purr - pme->purr); - if (stolen > 0) - account_steal_time(current, stolen); + if (stolen > 0) { + if (idle_task(smp_processor_id()) != current) + account_steal_time(stolen); + else + account_idle_time(stolen); + } pme->tb = tb; pme->purr = purr; } @@ -844,7 +848,7 @@ static void register_decrementer_clockevent(int cpu) struct clock_event_device *dec = &per_cpu(decrementers, cpu).event; *dec = decrementer_clockevent; - dec->cpumask = cpumask_of_cpu(cpu); + dec->cpumask = cpumask_of(cpu); printk(KERN_DEBUG "clockevent: %s mult[%lx] shift[%d] cpu[%d]\n", dec->name, dec->mult, dec->shift, cpu); diff --git a/arch/powerpc/kvm/44x.c b/arch/powerpc/kvm/44x.c new file mode 100644 index 00000000000..a66bec57265 --- /dev/null +++ b/arch/powerpc/kvm/44x.c @@ -0,0 +1,228 @@ +/* + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License, version 2, as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + * + * Copyright IBM Corp. 2008 + * + * Authors: Hollis Blanchard <hollisb@us.ibm.com> + */ + +#include <linux/kvm_host.h> +#include <linux/err.h> + +#include <asm/reg.h> +#include <asm/cputable.h> +#include <asm/tlbflush.h> +#include <asm/kvm_44x.h> +#include <asm/kvm_ppc.h> + +#include "44x_tlb.h" + +/* Note: clearing MSR[DE] just means that the debug interrupt will not be + * delivered *immediately*. Instead, it simply sets the appropriate DBSR bits. + * If those DBSR bits are still set when MSR[DE] is re-enabled, the interrupt + * will be delivered as an "imprecise debug event" (which is indicated by + * DBSR[IDE]. + */ +static void kvm44x_disable_debug_interrupts(void) +{ + mtmsr(mfmsr() & ~MSR_DE); +} + +void kvmppc_core_load_host_debugstate(struct kvm_vcpu *vcpu) +{ + kvm44x_disable_debug_interrupts(); + + mtspr(SPRN_IAC1, vcpu->arch.host_iac[0]); + mtspr(SPRN_IAC2, vcpu->arch.host_iac[1]); + mtspr(SPRN_IAC3, vcpu->arch.host_iac[2]); + mtspr(SPRN_IAC4, vcpu->arch.host_iac[3]); + mtspr(SPRN_DBCR1, vcpu->arch.host_dbcr1); + mtspr(SPRN_DBCR2, vcpu->arch.host_dbcr2); + mtspr(SPRN_DBCR0, vcpu->arch.host_dbcr0); + mtmsr(vcpu->arch.host_msr); +} + +void kvmppc_core_load_guest_debugstate(struct kvm_vcpu *vcpu) +{ + struct kvm_guest_debug *dbg = &vcpu->guest_debug; + u32 dbcr0 = 0; + + vcpu->arch.host_msr = mfmsr(); + kvm44x_disable_debug_interrupts(); + + /* Save host debug register state. */ + vcpu->arch.host_iac[0] = mfspr(SPRN_IAC1); + vcpu->arch.host_iac[1] = mfspr(SPRN_IAC2); + vcpu->arch.host_iac[2] = mfspr(SPRN_IAC3); + vcpu->arch.host_iac[3] = mfspr(SPRN_IAC4); + vcpu->arch.host_dbcr0 = mfspr(SPRN_DBCR0); + vcpu->arch.host_dbcr1 = mfspr(SPRN_DBCR1); + vcpu->arch.host_dbcr2 = mfspr(SPRN_DBCR2); + + /* set registers up for guest */ + + if (dbg->bp[0]) { + mtspr(SPRN_IAC1, dbg->bp[0]); + dbcr0 |= DBCR0_IAC1 | DBCR0_IDM; + } + if (dbg->bp[1]) { + mtspr(SPRN_IAC2, dbg->bp[1]); + dbcr0 |= DBCR0_IAC2 | DBCR0_IDM; + } + if (dbg->bp[2]) { + mtspr(SPRN_IAC3, dbg->bp[2]); + dbcr0 |= DBCR0_IAC3 | DBCR0_IDM; + } + if (dbg->bp[3]) { + mtspr(SPRN_IAC4, dbg->bp[3]); + dbcr0 |= DBCR0_IAC4 | DBCR0_IDM; + } + + mtspr(SPRN_DBCR0, dbcr0); + mtspr(SPRN_DBCR1, 0); + mtspr(SPRN_DBCR2, 0); +} + +void kvmppc_core_vcpu_load(struct kvm_vcpu *vcpu, int cpu) +{ + kvmppc_44x_tlb_load(vcpu); +} + +void kvmppc_core_vcpu_put(struct kvm_vcpu *vcpu) +{ + kvmppc_44x_tlb_put(vcpu); +} + +int kvmppc_core_check_processor_compat(void) +{ + int r; + + if (strcmp(cur_cpu_spec->platform, "ppc440") == 0) + r = 0; + else + r = -ENOTSUPP; + + return r; +} + +int kvmppc_core_vcpu_setup(struct kvm_vcpu *vcpu) +{ + struct kvmppc_vcpu_44x *vcpu_44x = to_44x(vcpu); + struct kvmppc_44x_tlbe *tlbe = &vcpu_44x->guest_tlb[0]; + int i; + + tlbe->tid = 0; + tlbe->word0 = PPC44x_TLB_16M | PPC44x_TLB_VALID; + tlbe->word1 = 0; + tlbe->word2 = PPC44x_TLB_SX | PPC44x_TLB_SW | PPC44x_TLB_SR; + + tlbe++; + tlbe->tid = 0; + tlbe->word0 = 0xef600000 | PPC44x_TLB_4K | PPC44x_TLB_VALID; + tlbe->word1 = 0xef600000; + tlbe->word2 = PPC44x_TLB_SX | PPC44x_TLB_SW | PPC44x_TLB_SR + | PPC44x_TLB_I | PPC44x_TLB_G; + + /* Since the guest can directly access the timebase, it must know the + * real timebase frequency. Accordingly, it must see the state of + * CCR1[TCS]. */ + vcpu->arch.ccr1 = mfspr(SPRN_CCR1); + + for (i = 0; i < ARRAY_SIZE(vcpu_44x->shadow_refs); i++) + vcpu_44x->shadow_refs[i].gtlb_index = -1; + + return 0; +} + +/* 'linear_address' is actually an encoding of AS|PID|EADDR . */ +int kvmppc_core_vcpu_translate(struct kvm_vcpu *vcpu, + struct kvm_translation *tr) +{ + struct kvmppc_vcpu_44x *vcpu_44x = to_44x(vcpu); + struct kvmppc_44x_tlbe *gtlbe; + int index; + gva_t eaddr; + u8 pid; + u8 as; + + eaddr = tr->linear_address; + pid = (tr->linear_address >> 32) & 0xff; + as = (tr->linear_address >> 40) & 0x1; + + index = kvmppc_44x_tlb_index(vcpu, eaddr, pid, as); + if (index == -1) { + tr->valid = 0; + return 0; + } + + gtlbe = &vcpu_44x->guest_tlb[index]; + + tr->physical_address = tlb_xlate(gtlbe, eaddr); + /* XXX what does "writeable" and "usermode" even mean? */ + tr->valid = 1; + + return 0; +} + +struct kvm_vcpu *kvmppc_core_vcpu_create(struct kvm *kvm, unsigned int id) +{ + struct kvmppc_vcpu_44x *vcpu_44x; + struct kvm_vcpu *vcpu; + int err; + + vcpu_44x = kmem_cache_zalloc(kvm_vcpu_cache, GFP_KERNEL); + if (!vcpu_44x) { + err = -ENOMEM; + goto out; + } + + vcpu = &vcpu_44x->vcpu; + err = kvm_vcpu_init(vcpu, kvm, id); + if (err) + goto free_vcpu; + + return vcpu; + +free_vcpu: + kmem_cache_free(kvm_vcpu_cache, vcpu_44x); +out: + return ERR_PTR(err); +} + +void kvmppc_core_vcpu_free(struct kvm_vcpu *vcpu) +{ + struct kvmppc_vcpu_44x *vcpu_44x = to_44x(vcpu); + + kvm_vcpu_uninit(vcpu); + kmem_cache_free(kvm_vcpu_cache, vcpu_44x); +} + +static int kvmppc_44x_init(void) +{ + int r; + + r = kvmppc_booke_init(); + if (r) + return r; + + return kvm_init(NULL, sizeof(struct kvmppc_vcpu_44x), THIS_MODULE); +} + +static void kvmppc_44x_exit(void) +{ + kvmppc_booke_exit(); +} + +module_init(kvmppc_44x_init); +module_exit(kvmppc_44x_exit); diff --git a/arch/powerpc/kvm/44x_emulate.c b/arch/powerpc/kvm/44x_emulate.c new file mode 100644 index 00000000000..82489a743a6 --- /dev/null +++ b/arch/powerpc/kvm/44x_emulate.c @@ -0,0 +1,371 @@ +/* + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License, version 2, as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + * + * Copyright IBM Corp. 2008 + * + * Authors: Hollis Blanchard <hollisb@us.ibm.com> + */ + +#include <asm/kvm_ppc.h> +#include <asm/dcr.h> +#include <asm/dcr-regs.h> +#include <asm/disassemble.h> +#include <asm/kvm_44x.h> +#include "timing.h" + +#include "booke.h" +#include "44x_tlb.h" + +#define OP_RFI 19 + +#define XOP_RFI 50 +#define XOP_MFMSR 83 +#define XOP_WRTEE 131 +#define XOP_MTMSR 146 +#define XOP_WRTEEI 163 +#define XOP_MFDCR 323 +#define XOP_MTDCR 451 +#define XOP_TLBSX 914 +#define XOP_ICCCI 966 +#define XOP_TLBWE 978 + +static void kvmppc_emul_rfi(struct kvm_vcpu *vcpu) +{ + vcpu->arch.pc = vcpu->arch.srr0; + kvmppc_set_msr(vcpu, vcpu->arch.srr1); +} + +int kvmppc_core_emulate_op(struct kvm_run *run, struct kvm_vcpu *vcpu, + unsigned int inst, int *advance) +{ + int emulated = EMULATE_DONE; + int dcrn; + int ra; + int rb; + int rc; + int rs; + int rt; + int ws; + + switch (get_op(inst)) { + case OP_RFI: + switch (get_xop(inst)) { + case XOP_RFI: + kvmppc_emul_rfi(vcpu); + kvmppc_set_exit_type(vcpu, EMULATED_RFI_EXITS); + *advance = 0; + break; + + default: + emulated = EMULATE_FAIL; + break; + } + break; + + case 31: + switch (get_xop(inst)) { + + case XOP_MFMSR: + rt = get_rt(inst); + vcpu->arch.gpr[rt] = vcpu->arch.msr; + kvmppc_set_exit_type(vcpu, EMULATED_MFMSR_EXITS); + break; + + case XOP_MTMSR: + rs = get_rs(inst); + kvmppc_set_exit_type(vcpu, EMULATED_MTMSR_EXITS); + kvmppc_set_msr(vcpu, vcpu->arch.gpr[rs]); + break; + + case XOP_WRTEE: + rs = get_rs(inst); + vcpu->arch.msr = (vcpu->arch.msr & ~MSR_EE) + | (vcpu->arch.gpr[rs] & MSR_EE); + kvmppc_set_exit_type(vcpu, EMULATED_WRTEE_EXITS); + break; + + case XOP_WRTEEI: + vcpu->arch.msr = (vcpu->arch.msr & ~MSR_EE) + | (inst & MSR_EE); + kvmppc_set_exit_type(vcpu, EMULATED_WRTEE_EXITS); + break; + + case XOP_MFDCR: + dcrn = get_dcrn(inst); + rt = get_rt(inst); + + /* The guest may access CPR0 registers to determine the timebase + * frequency, and it must know the real host frequency because it + * can directly access the timebase registers. + * + * It would be possible to emulate those accesses in userspace, + * but userspace can really only figure out the end frequency. + * We could decompose that into the factors that compute it, but + * that's tricky math, and it's easier to just report the real + * CPR0 values. + */ + switch (dcrn) { + case DCRN_CPR0_CONFIG_ADDR: + vcpu->arch.gpr[rt] = vcpu->arch.cpr0_cfgaddr; + break; + case DCRN_CPR0_CONFIG_DATA: + local_irq_disable(); + mtdcr(DCRN_CPR0_CONFIG_ADDR, + vcpu->arch.cpr0_cfgaddr); + vcpu->arch.gpr[rt] = mfdcr(DCRN_CPR0_CONFIG_DATA); + local_irq_enable(); + break; + default: + run->dcr.dcrn = dcrn; + run->dcr.data = 0; + run->dcr.is_write = 0; + vcpu->arch.io_gpr = rt; + vcpu->arch.dcr_needed = 1; + kvmppc_account_exit(vcpu, DCR_EXITS); + emulated = EMULATE_DO_DCR; + } + + break; + + case XOP_MTDCR: + dcrn = get_dcrn(inst); + rs = get_rs(inst); + + /* emulate some access in kernel */ + switch (dcrn) { + case DCRN_CPR0_CONFIG_ADDR: + vcpu->arch.cpr0_cfgaddr = vcpu->arch.gpr[rs]; + break; + default: + run->dcr.dcrn = dcrn; + run->dcr.data = vcpu->arch.gpr[rs]; + run->dcr.is_write = 1; + vcpu->arch.dcr_needed = 1; + kvmppc_account_exit(vcpu, DCR_EXITS); + emulated = EMULATE_DO_DCR; + } + + break; + + case XOP_TLBWE: + ra = get_ra(inst); + rs = get_rs(inst); + ws = get_ws(inst); + emulated = kvmppc_44x_emul_tlbwe(vcpu, ra, rs, ws); + break; + + case XOP_TLBSX: + rt = get_rt(inst); + ra = get_ra(inst); + rb = get_rb(inst); + rc = get_rc(inst); + emulated = kvmppc_44x_emul_tlbsx(vcpu, rt, ra, rb, rc); + break; + + case XOP_ICCCI: + break; + + default: + emulated = EMULATE_FAIL; + } + + break; + + default: + emulated = EMULATE_FAIL; + } + + return emulated; +} + +int kvmppc_core_emulate_mtspr(struct kvm_vcpu *vcpu, int sprn, int rs) +{ + switch (sprn) { + case SPRN_MMUCR: + vcpu->arch.mmucr = vcpu->arch.gpr[rs]; break; + case SPRN_PID: + kvmppc_set_pid(vcpu, vcpu->arch.gpr[rs]); break; + case SPRN_CCR0: + vcpu->arch.ccr0 = vcpu->arch.gpr[rs]; break; + case SPRN_CCR1: + vcpu->arch.ccr1 = vcpu->arch.gpr[rs]; break; + case SPRN_DEAR: + vcpu->arch.dear = vcpu->arch.gpr[rs]; break; + case SPRN_ESR: + vcpu->arch.esr = vcpu->arch.gpr[rs]; break; + case SPRN_DBCR0: + vcpu->arch.dbcr0 = vcpu->arch.gpr[rs]; break; + case SPRN_DBCR1: + vcpu->arch.dbcr1 = vcpu->arch.gpr[rs]; break; + case SPRN_TSR: + vcpu->arch.tsr &= ~vcpu->arch.gpr[rs]; break; + case SPRN_TCR: + vcpu->arch.tcr = vcpu->arch.gpr[rs]; + kvmppc_emulate_dec(vcpu); + break; + + /* Note: SPRG4-7 are user-readable. These values are + * loaded into the real SPRGs when resuming the + * guest. */ + case SPRN_SPRG4: + vcpu->arch.sprg4 = vcpu->arch.gpr[rs]; break; + case SPRN_SPRG5: + vcpu->arch.sprg5 = vcpu->arch.gpr[rs]; break; + case SPRN_SPRG6: + vcpu->arch.sprg6 = vcpu->arch.gpr[rs]; break; + case SPRN_SPRG7: + vcpu->arch.sprg7 = vcpu->arch.gpr[rs]; break; + + case SPRN_IVPR: + vcpu->arch.ivpr = vcpu->arch.gpr[rs]; + break; + case SPRN_IVOR0: + vcpu->arch.ivor[BOOKE_IRQPRIO_CRITICAL] = vcpu->arch.gpr[rs]; + break; + case SPRN_IVOR1: + vcpu->arch.ivor[BOOKE_IRQPRIO_MACHINE_CHECK] = vcpu->arch.gpr[rs]; + break; + case SPRN_IVOR2: + vcpu->arch.ivor[BOOKE_IRQPRIO_DATA_STORAGE] = vcpu->arch.gpr[rs]; + break; + case SPRN_IVOR3: + vcpu->arch.ivor[BOOKE_IRQPRIO_INST_STORAGE] = vcpu->arch.gpr[rs]; + break; + case SPRN_IVOR4: + vcpu->arch.ivor[BOOKE_IRQPRIO_EXTERNAL] = vcpu->arch.gpr[rs]; + break; + case SPRN_IVOR5: + vcpu->arch.ivor[BOOKE_IRQPRIO_ALIGNMENT] = vcpu->arch.gpr[rs]; + break; + case SPRN_IVOR6: + vcpu->arch.ivor[BOOKE_IRQPRIO_PROGRAM] = vcpu->arch.gpr[rs]; + break; + case SPRN_IVOR7: + vcpu->arch.ivor[BOOKE_IRQPRIO_FP_UNAVAIL] = vcpu->arch.gpr[rs]; + break; + case SPRN_IVOR8: + vcpu->arch.ivor[BOOKE_IRQPRIO_SYSCALL] = vcpu->arch.gpr[rs]; + break; + case SPRN_IVOR9: + vcpu->arch.ivor[BOOKE_IRQPRIO_AP_UNAVAIL] = vcpu->arch.gpr[rs]; + break; + case SPRN_IVOR10: + vcpu->arch.ivor[BOOKE_IRQPRIO_DECREMENTER] = vcpu->arch.gpr[rs]; + break; + case SPRN_IVOR11: + vcpu->arch.ivor[BOOKE_IRQPRIO_FIT] = vcpu->arch.gpr[rs]; + break; + case SPRN_IVOR12: + vcpu->arch.ivor[BOOKE_IRQPRIO_WATCHDOG] = vcpu->arch.gpr[rs]; + break; + case SPRN_IVOR13: + vcpu->arch.ivor[BOOKE_IRQPRIO_DTLB_MISS] = vcpu->arch.gpr[rs]; + break; + case SPRN_IVOR14: + vcpu->arch.ivor[BOOKE_IRQPRIO_ITLB_MISS] = vcpu->arch.gpr[rs]; + break; + case SPRN_IVOR15: + vcpu->arch.ivor[BOOKE_IRQPRIO_DEBUG] = vcpu->arch.gpr[rs]; + break; + + default: + return EMULATE_FAIL; + } + + kvmppc_set_exit_type(vcpu, EMULATED_MTSPR_EXITS); + return EMULATE_DONE; +} + +int kvmppc_core_emulate_mfspr(struct kvm_vcpu *vcpu, int sprn, int rt) +{ + switch (sprn) { + /* 440 */ + case SPRN_MMUCR: + vcpu->arch.gpr[rt] = vcpu->arch.mmucr; break; + case SPRN_CCR0: + vcpu->arch.gpr[rt] = vcpu->arch.ccr0; break; + case SPRN_CCR1: + vcpu->arch.gpr[rt] = vcpu->arch.ccr1; break; + + /* Book E */ + case SPRN_PID: + vcpu->arch.gpr[rt] = vcpu->arch.pid; break; + case SPRN_IVPR: + vcpu->arch.gpr[rt] = vcpu->arch.ivpr; break; + case SPRN_DEAR: + vcpu->arch.gpr[rt] = vcpu->arch.dear; break; + case SPRN_ESR: + vcpu->arch.gpr[rt] = vcpu->arch.esr; break; + case SPRN_DBCR0: + vcpu->arch.gpr[rt] = vcpu->arch.dbcr0; break; + case SPRN_DBCR1: + vcpu->arch.gpr[rt] = vcpu->arch.dbcr1; break; + + case SPRN_IVOR0: + vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_CRITICAL]; + break; + case SPRN_IVOR1: + vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_MACHINE_CHECK]; + break; + case SPRN_IVOR2: + vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_DATA_STORAGE]; + break; + case SPRN_IVOR3: + vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_INST_STORAGE]; + break; + case SPRN_IVOR4: + vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_EXTERNAL]; + break; + case SPRN_IVOR5: + vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_ALIGNMENT]; + break; + case SPRN_IVOR6: + vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_PROGRAM]; + break; + case SPRN_IVOR7: + vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_FP_UNAVAIL]; + break; + case SPRN_IVOR8: + vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_SYSCALL]; + break; + case SPRN_IVOR9: + vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_AP_UNAVAIL]; + break; + case SPRN_IVOR10: + vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_DECREMENTER]; + break; + case SPRN_IVOR11: + vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_FIT]; + break; + case SPRN_IVOR12: + vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_WATCHDOG]; + break; + case SPRN_IVOR13: + vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_DTLB_MISS]; + break; + case SPRN_IVOR14: + vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_ITLB_MISS]; + break; + case SPRN_IVOR15: + vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_DEBUG]; + break; + + default: + return EMULATE_FAIL; + } + + kvmppc_set_exit_type(vcpu, EMULATED_MFSPR_EXITS); + return EMULATE_DONE; +} + diff --git a/arch/powerpc/kvm/44x_tlb.c b/arch/powerpc/kvm/44x_tlb.c index ad72c6f9811..9a34b8edb9e 100644 --- a/arch/powerpc/kvm/44x_tlb.c +++ b/arch/powerpc/kvm/44x_tlb.c @@ -22,20 +22,103 @@ #include <linux/kvm.h> #include <linux/kvm_host.h> #include <linux/highmem.h> + +#include <asm/tlbflush.h> #include <asm/mmu-44x.h> #include <asm/kvm_ppc.h> +#include <asm/kvm_44x.h> +#include "timing.h" #include "44x_tlb.h" +#ifndef PPC44x_TLBE_SIZE +#define PPC44x_TLBE_SIZE PPC44x_TLB_4K +#endif + +#define PAGE_SIZE_4K (1<<12) +#define PAGE_MASK_4K (~(PAGE_SIZE_4K - 1)) + +#define PPC44x_TLB_UATTR_MASK \ + (PPC44x_TLB_U0|PPC44x_TLB_U1|PPC44x_TLB_U2|PPC44x_TLB_U3) #define PPC44x_TLB_USER_PERM_MASK (PPC44x_TLB_UX|PPC44x_TLB_UR|PPC44x_TLB_UW) #define PPC44x_TLB_SUPER_PERM_MASK (PPC44x_TLB_SX|PPC44x_TLB_SR|PPC44x_TLB_SW) -static unsigned int kvmppc_tlb_44x_pos; +#ifdef DEBUG +void kvmppc_dump_tlbs(struct kvm_vcpu *vcpu) +{ + struct kvmppc_44x_tlbe *tlbe; + int i; + + printk("vcpu %d TLB dump:\n", vcpu->vcpu_id); + printk("| %2s | %3s | %8s | %8s | %8s |\n", + "nr", "tid", "word0", "word1", "word2"); + + for (i = 0; i < ARRAY_SIZE(vcpu_44x->guest_tlb); i++) { + tlbe = &vcpu_44x->guest_tlb[i]; + if (tlbe->word0 & PPC44x_TLB_VALID) + printk(" G%2d | %02X | %08X | %08X | %08X |\n", + i, tlbe->tid, tlbe->word0, tlbe->word1, + tlbe->word2); + } +} +#endif + +static inline void kvmppc_44x_tlbie(unsigned int index) +{ + /* 0 <= index < 64, so the V bit is clear and we can use the index as + * word0. */ + asm volatile( + "tlbwe %[index], %[index], 0\n" + : + : [index] "r"(index) + ); +} + +static inline void kvmppc_44x_tlbre(unsigned int index, + struct kvmppc_44x_tlbe *tlbe) +{ + asm volatile( + "tlbre %[word0], %[index], 0\n" + "mfspr %[tid], %[sprn_mmucr]\n" + "andi. %[tid], %[tid], 0xff\n" + "tlbre %[word1], %[index], 1\n" + "tlbre %[word2], %[index], 2\n" + : [word0] "=r"(tlbe->word0), + [word1] "=r"(tlbe->word1), + [word2] "=r"(tlbe->word2), + [tid] "=r"(tlbe->tid) + : [index] "r"(index), + [sprn_mmucr] "i"(SPRN_MMUCR) + : "cc" + ); +} + +static inline void kvmppc_44x_tlbwe(unsigned int index, + struct kvmppc_44x_tlbe *stlbe) +{ + unsigned long tmp; + + asm volatile( + "mfspr %[tmp], %[sprn_mmucr]\n" + "rlwimi %[tmp], %[tid], 0, 0xff\n" + "mtspr %[sprn_mmucr], %[tmp]\n" + "tlbwe %[word0], %[index], 0\n" + "tlbwe %[word1], %[index], 1\n" + "tlbwe %[word2], %[index], 2\n" + : [tmp] "=&r"(tmp) + : [word0] "r"(stlbe->word0), + [word1] "r"(stlbe->word1), + [word2] "r"(stlbe->word2), + [tid] "r"(stlbe->tid), + [index] "r"(index), + [sprn_mmucr] "i"(SPRN_MMUCR) + ); +} static u32 kvmppc_44x_tlb_shadow_attrib(u32 attrib, int usermode) { - /* Mask off reserved bits. */ - attrib &= PPC44x_TLB_PERM_MASK|PPC44x_TLB_ATTR_MASK; + /* We only care about the guest's permission and user bits. */ + attrib &= PPC44x_TLB_PERM_MASK|PPC44x_TLB_UATTR_MASK; if (!usermode) { /* Guest is in supervisor mode, so we need to translate guest @@ -47,18 +130,60 @@ static u32 kvmppc_44x_tlb_shadow_attrib(u32 attrib, int usermode) /* Make sure host can always access this memory. */ attrib |= PPC44x_TLB_SX|PPC44x_TLB_SR|PPC44x_TLB_SW; + /* WIMGE = 0b00100 */ + attrib |= PPC44x_TLB_M; + return attrib; } +/* Load shadow TLB back into hardware. */ +void kvmppc_44x_tlb_load(struct kvm_vcpu *vcpu) +{ + struct kvmppc_vcpu_44x *vcpu_44x = to_44x(vcpu); + int i; + + for (i = 0; i <= tlb_44x_hwater; i++) { + struct kvmppc_44x_tlbe *stlbe = &vcpu_44x->shadow_tlb[i]; + + if (get_tlb_v(stlbe) && get_tlb_ts(stlbe)) + kvmppc_44x_tlbwe(i, stlbe); + } +} + +static void kvmppc_44x_tlbe_set_modified(struct kvmppc_vcpu_44x *vcpu_44x, + unsigned int i) +{ + vcpu_44x->shadow_tlb_mod[i] = 1; +} + +/* Save hardware TLB to the vcpu, and invalidate all guest mappings. */ +void kvmppc_44x_tlb_put(struct kvm_vcpu *vcpu) +{ + struct kvmppc_vcpu_44x *vcpu_44x = to_44x(vcpu); + int i; + + for (i = 0; i <= tlb_44x_hwater; i++) { + struct kvmppc_44x_tlbe *stlbe = &vcpu_44x->shadow_tlb[i]; + + if (vcpu_44x->shadow_tlb_mod[i]) + kvmppc_44x_tlbre(i, stlbe); + + if (get_tlb_v(stlbe) && get_tlb_ts(stlbe)) + kvmppc_44x_tlbie(i); + } +} + + /* Search the guest TLB for a matching entry. */ int kvmppc_44x_tlb_index(struct kvm_vcpu *vcpu, gva_t eaddr, unsigned int pid, unsigned int as) { + struct kvmppc_vcpu_44x *vcpu_44x = to_44x(vcpu); int i; /* XXX Replace loop with fancy data structures. */ - for (i = 0; i < PPC44x_TLB_SIZE; i++) { - struct tlbe *tlbe = &vcpu->arch.guest_tlb[i]; + for (i = 0; i < ARRAY_SIZE(vcpu_44x->guest_tlb); i++) { + struct kvmppc_44x_tlbe *tlbe = &vcpu_44x->guest_tlb[i]; unsigned int tid; if (eaddr < get_tlb_eaddr(tlbe)) @@ -83,78 +208,89 @@ int kvmppc_44x_tlb_index(struct kvm_vcpu *vcpu, gva_t eaddr, unsigned int pid, return -1; } -struct tlbe *kvmppc_44x_itlb_search(struct kvm_vcpu *vcpu, gva_t eaddr) +int kvmppc_44x_itlb_index(struct kvm_vcpu *vcpu, gva_t eaddr) { unsigned int as = !!(vcpu->arch.msr & MSR_IS); - unsigned int index; - index = kvmppc_44x_tlb_index(vcpu, eaddr, vcpu->arch.pid, as); - if (index == -1) - return NULL; - return &vcpu->arch.guest_tlb[index]; + return kvmppc_44x_tlb_index(vcpu, eaddr, vcpu->arch.pid, as); } -struct tlbe *kvmppc_44x_dtlb_search(struct kvm_vcpu *vcpu, gva_t eaddr) +int kvmppc_44x_dtlb_index(struct kvm_vcpu *vcpu, gva_t eaddr) { unsigned int as = !!(vcpu->arch.msr & MSR_DS); - unsigned int index; - index = kvmppc_44x_tlb_index(vcpu, eaddr, vcpu->arch.pid, as); - if (index == -1) - return NULL; - return &vcpu->arch.guest_tlb[index]; + return kvmppc_44x_tlb_index(vcpu, eaddr, vcpu->arch.pid, as); } -static int kvmppc_44x_tlbe_is_writable(struct tlbe *tlbe) +static void kvmppc_44x_shadow_release(struct kvmppc_vcpu_44x *vcpu_44x, + unsigned int stlb_index) { - return tlbe->word2 & (PPC44x_TLB_SW|PPC44x_TLB_UW); -} + struct kvmppc_44x_shadow_ref *ref = &vcpu_44x->shadow_refs[stlb_index]; -static void kvmppc_44x_shadow_release(struct kvm_vcpu *vcpu, - unsigned int index) -{ - struct tlbe *stlbe = &vcpu->arch.shadow_tlb[index]; - struct page *page = vcpu->arch.shadow_pages[index]; + if (!ref->page) + return; - if (get_tlb_v(stlbe)) { - if (kvmppc_44x_tlbe_is_writable(stlbe)) - kvm_release_page_dirty(page); - else - kvm_release_page_clean(page); - } + /* Discard from the TLB. */ + /* Note: we could actually invalidate a host mapping, if the host overwrote + * this TLB entry since we inserted a guest mapping. */ + kvmppc_44x_tlbie(stlb_index); + + /* Now release the page. */ + if (ref->writeable) + kvm_release_page_dirty(ref->page); + else + kvm_release_page_clean(ref->page); + + ref->page = NULL; + + /* XXX set tlb_44x_index to stlb_index? */ + + KVMTRACE_1D(STLB_INVAL, &vcpu_44x->vcpu, stlb_index, handler); } void kvmppc_core_destroy_mmu(struct kvm_vcpu *vcpu) { + struct kvmppc_vcpu_44x *vcpu_44x = to_44x(vcpu); int i; for (i = 0; i <= tlb_44x_hwater; i++) - kvmppc_44x_shadow_release(vcpu, i); -} - -void kvmppc_tlbe_set_modified(struct kvm_vcpu *vcpu, unsigned int i) -{ - vcpu->arch.shadow_tlb_mod[i] = 1; + kvmppc_44x_shadow_release(vcpu_44x, i); } -/* Caller must ensure that the specified guest TLB entry is safe to insert into - * the shadow TLB. */ -void kvmppc_mmu_map(struct kvm_vcpu *vcpu, u64 gvaddr, gfn_t gfn, u64 asid, - u32 flags) +/** + * kvmppc_mmu_map -- create a host mapping for guest memory + * + * If the guest wanted a larger page than the host supports, only the first + * host page is mapped here and the rest are demand faulted. + * + * If the guest wanted a smaller page than the host page size, we map only the + * guest-size page (i.e. not a full host page mapping). + * + * Caller must ensure that the specified guest TLB entry is safe to insert into + * the shadow TLB. + */ +void kvmppc_mmu_map(struct kvm_vcpu *vcpu, u64 gvaddr, gpa_t gpaddr, u64 asid, + u32 flags, u32 max_bytes, unsigned int gtlb_index) { + struct kvmppc_44x_tlbe stlbe; + struct kvmppc_vcpu_44x *vcpu_44x = to_44x(vcpu); + struct kvmppc_44x_shadow_ref *ref; struct page *new_page; - struct tlbe *stlbe; hpa_t hpaddr; + gfn_t gfn; unsigned int victim; - /* Future optimization: don't overwrite the TLB entry containing the - * current PC (or stack?). */ - victim = kvmppc_tlb_44x_pos++; - if (kvmppc_tlb_44x_pos > tlb_44x_hwater) - kvmppc_tlb_44x_pos = 0; - stlbe = &vcpu->arch.shadow_tlb[victim]; + /* Select TLB entry to clobber. Indirectly guard against races with the TLB + * miss handler by disabling interrupts. */ + local_irq_disable(); + victim = ++tlb_44x_index; + if (victim > tlb_44x_hwater) + victim = 0; + tlb_44x_index = victim; + local_irq_enable(); /* Get reference to new page. */ + gfn = gpaddr >> PAGE_SHIFT; new_page = gfn_to_page(vcpu->kvm, gfn); if (is_error_page(new_page)) { printk(KERN_ERR "Couldn't get guest page for gfn %lx!\n", gfn); @@ -163,10 +299,8 @@ void kvmppc_mmu_map(struct kvm_vcpu *vcpu, u64 gvaddr, gfn_t gfn, u64 asid, } hpaddr = page_to_phys(new_page); - /* Drop reference to old page. */ - kvmppc_44x_shadow_release(vcpu, victim); - - vcpu->arch.shadow_pages[victim] = new_page; + /* Invalidate any previous shadow mappings. */ + kvmppc_44x_shadow_release(vcpu_44x, victim); /* XXX Make sure (va, size) doesn't overlap any other * entries. 440x6 user manual says the result would be @@ -174,78 +308,193 @@ void kvmppc_mmu_map(struct kvm_vcpu *vcpu, u64 gvaddr, gfn_t gfn, u64 asid, /* XXX what about AS? */ - stlbe->tid = !(asid & 0xff); - /* Force TS=1 for all guest mappings. */ - /* For now we hardcode 4KB mappings, but it will be important to - * use host large pages in the future. */ - stlbe->word0 = (gvaddr & PAGE_MASK) | PPC44x_TLB_VALID | PPC44x_TLB_TS - | PPC44x_TLB_4K; - stlbe->word1 = (hpaddr & 0xfffffc00) | ((hpaddr >> 32) & 0xf); - stlbe->word2 = kvmppc_44x_tlb_shadow_attrib(flags, - vcpu->arch.msr & MSR_PR); - kvmppc_tlbe_set_modified(vcpu, victim); + stlbe.word0 = PPC44x_TLB_VALID | PPC44x_TLB_TS; + + if (max_bytes >= PAGE_SIZE) { + /* Guest mapping is larger than or equal to host page size. We can use + * a "native" host mapping. */ + stlbe.word0 |= (gvaddr & PAGE_MASK) | PPC44x_TLBE_SIZE; + } else { + /* Guest mapping is smaller than host page size. We must restrict the + * size of the mapping to be at most the smaller of the two, but for + * simplicity we fall back to a 4K mapping (this is probably what the + * guest is using anyways). */ + stlbe.word0 |= (gvaddr & PAGE_MASK_4K) | PPC44x_TLB_4K; + + /* 'hpaddr' is a host page, which is larger than the mapping we're + * inserting here. To compensate, we must add the in-page offset to the + * sub-page. */ + hpaddr |= gpaddr & (PAGE_MASK ^ PAGE_MASK_4K); + } - KVMTRACE_5D(STLB_WRITE, vcpu, victim, - stlbe->tid, stlbe->word0, stlbe->word1, stlbe->word2, - handler); + stlbe.word1 = (hpaddr & 0xfffffc00) | ((hpaddr >> 32) & 0xf); + stlbe.word2 = kvmppc_44x_tlb_shadow_attrib(flags, + vcpu->arch.msr & MSR_PR); + stlbe.tid = !(asid & 0xff); + + /* Keep track of the reference so we can properly release it later. */ + ref = &vcpu_44x->shadow_refs[victim]; + ref->page = new_page; + ref->gtlb_index = gtlb_index; + ref->writeable = !!(stlbe.word2 & PPC44x_TLB_UW); + ref->tid = stlbe.tid; + + /* Insert shadow mapping into hardware TLB. */ + kvmppc_44x_tlbe_set_modified(vcpu_44x, victim); + kvmppc_44x_tlbwe(victim, &stlbe); + KVMTRACE_5D(STLB_WRITE, vcpu, victim, stlbe.tid, stlbe.word0, stlbe.word1, + stlbe.word2, handler); } -void kvmppc_mmu_invalidate(struct kvm_vcpu *vcpu, gva_t eaddr, - gva_t eend, u32 asid) +/* For a particular guest TLB entry, invalidate the corresponding host TLB + * mappings and release the host pages. */ +static void kvmppc_44x_invalidate(struct kvm_vcpu *vcpu, + unsigned int gtlb_index) { - unsigned int pid = !(asid & 0xff); + struct kvmppc_vcpu_44x *vcpu_44x = to_44x(vcpu); int i; - /* XXX Replace loop with fancy data structures. */ - for (i = 0; i <= tlb_44x_hwater; i++) { - struct tlbe *stlbe = &vcpu->arch.shadow_tlb[i]; - unsigned int tid; + for (i = 0; i < ARRAY_SIZE(vcpu_44x->shadow_refs); i++) { + struct kvmppc_44x_shadow_ref *ref = &vcpu_44x->shadow_refs[i]; + if (ref->gtlb_index == gtlb_index) + kvmppc_44x_shadow_release(vcpu_44x, i); + } +} - if (!get_tlb_v(stlbe)) - continue; +void kvmppc_mmu_priv_switch(struct kvm_vcpu *vcpu, int usermode) +{ + vcpu->arch.shadow_pid = !usermode; +} - if (eend < get_tlb_eaddr(stlbe)) - continue; +void kvmppc_set_pid(struct kvm_vcpu *vcpu, u32 new_pid) +{ + struct kvmppc_vcpu_44x *vcpu_44x = to_44x(vcpu); + int i; - if (eaddr > get_tlb_end(stlbe)) - continue; + if (unlikely(vcpu->arch.pid == new_pid)) + return; - tid = get_tlb_tid(stlbe); - if (tid && (tid != pid)) - continue; + vcpu->arch.pid = new_pid; - kvmppc_44x_shadow_release(vcpu, i); - stlbe->word0 = 0; - kvmppc_tlbe_set_modified(vcpu, i); - KVMTRACE_5D(STLB_INVAL, vcpu, i, - stlbe->tid, stlbe->word0, stlbe->word1, - stlbe->word2, handler); + /* Guest userspace runs with TID=0 mappings and PID=0, to make sure it + * can't access guest kernel mappings (TID=1). When we switch to a new + * guest PID, which will also use host PID=0, we must discard the old guest + * userspace mappings. */ + for (i = 0; i < ARRAY_SIZE(vcpu_44x->shadow_refs); i++) { + struct kvmppc_44x_shadow_ref *ref = &vcpu_44x->shadow_refs[i]; + + if (ref->tid == 0) + kvmppc_44x_shadow_release(vcpu_44x, i); } } -/* Invalidate all mappings on the privilege switch after PID has been changed. - * The guest always runs with PID=1, so we must clear the entire TLB when - * switching address spaces. */ -void kvmppc_mmu_priv_switch(struct kvm_vcpu *vcpu, int usermode) +static int tlbe_is_host_safe(const struct kvm_vcpu *vcpu, + const struct kvmppc_44x_tlbe *tlbe) { - int i; + gpa_t gpa; - if (vcpu->arch.swap_pid) { - /* XXX Replace loop with fancy data structures. */ - for (i = 0; i <= tlb_44x_hwater; i++) { - struct tlbe *stlbe = &vcpu->arch.shadow_tlb[i]; - - /* Future optimization: clear only userspace mappings. */ - kvmppc_44x_shadow_release(vcpu, i); - stlbe->word0 = 0; - kvmppc_tlbe_set_modified(vcpu, i); - KVMTRACE_5D(STLB_INVAL, vcpu, i, - stlbe->tid, stlbe->word0, stlbe->word1, - stlbe->word2, handler); - } - vcpu->arch.swap_pid = 0; + if (!get_tlb_v(tlbe)) + return 0; + + /* Does it match current guest AS? */ + /* XXX what about IS != DS? */ + if (get_tlb_ts(tlbe) != !!(vcpu->arch.msr & MSR_IS)) + return 0; + + gpa = get_tlb_raddr(tlbe); + if (!gfn_to_memslot(vcpu->kvm, gpa >> PAGE_SHIFT)) + /* Mapping is not for RAM. */ + return 0; + + return 1; +} + +int kvmppc_44x_emul_tlbwe(struct kvm_vcpu *vcpu, u8 ra, u8 rs, u8 ws) +{ + struct kvmppc_vcpu_44x *vcpu_44x = to_44x(vcpu); + struct kvmppc_44x_tlbe *tlbe; + unsigned int gtlb_index; + + gtlb_index = vcpu->arch.gpr[ra]; + if (gtlb_index > KVM44x_GUEST_TLB_SIZE) { + printk("%s: index %d\n", __func__, gtlb_index); + kvmppc_dump_vcpu(vcpu); + return EMULATE_FAIL; } - vcpu->arch.shadow_pid = !usermode; + tlbe = &vcpu_44x->guest_tlb[gtlb_index]; + + /* Invalidate shadow mappings for the about-to-be-clobbered TLB entry. */ + if (tlbe->word0 & PPC44x_TLB_VALID) + kvmppc_44x_invalidate(vcpu, gtlb_index); + + switch (ws) { + case PPC44x_TLB_PAGEID: + tlbe->tid = get_mmucr_stid(vcpu); + tlbe->word0 = vcpu->arch.gpr[rs]; + break; + + case PPC44x_TLB_XLAT: + tlbe->word1 = vcpu->arch.gpr[rs]; + break; + + case PPC44x_TLB_ATTRIB: + tlbe->word2 = vcpu->arch.gpr[rs]; + break; + + default: + return EMULATE_FAIL; + } + + if (tlbe_is_host_safe(vcpu, tlbe)) { + u64 asid; + gva_t eaddr; + gpa_t gpaddr; + u32 flags; + u32 bytes; + + eaddr = get_tlb_eaddr(tlbe); + gpaddr = get_tlb_raddr(tlbe); + + /* Use the advertised page size to mask effective and real addrs. */ + bytes = get_tlb_bytes(tlbe); + eaddr &= ~(bytes - 1); + gpaddr &= ~(bytes - 1); + + asid = (tlbe->word0 & PPC44x_TLB_TS) | tlbe->tid; + flags = tlbe->word2 & 0xffff; + + kvmppc_mmu_map(vcpu, eaddr, gpaddr, asid, flags, bytes, gtlb_index); + } + + KVMTRACE_5D(GTLB_WRITE, vcpu, gtlb_index, tlbe->tid, tlbe->word0, + tlbe->word1, tlbe->word2, handler); + + kvmppc_set_exit_type(vcpu, EMULATED_TLBWE_EXITS); + return EMULATE_DONE; +} + +int kvmppc_44x_emul_tlbsx(struct kvm_vcpu *vcpu, u8 rt, u8 ra, u8 rb, u8 rc) +{ + u32 ea; + int gtlb_index; + unsigned int as = get_mmucr_sts(vcpu); + unsigned int pid = get_mmucr_stid(vcpu); + + ea = vcpu->arch.gpr[rb]; + if (ra) + ea += vcpu->arch.gpr[ra]; + + gtlb_index = kvmppc_44x_tlb_index(vcpu, ea, pid, as); + if (rc) { + if (gtlb_index < 0) + vcpu->arch.cr &= ~0x20000000; + else + vcpu->arch.cr |= 0x20000000; + } + vcpu->arch.gpr[rt] = gtlb_index; + + kvmppc_set_exit_type(vcpu, EMULATED_TLBSX_EXITS); + return EMULATE_DONE; } diff --git a/arch/powerpc/kvm/44x_tlb.h b/arch/powerpc/kvm/44x_tlb.h index 2ccd46b6f6b..772191f29e6 100644 --- a/arch/powerpc/kvm/44x_tlb.h +++ b/arch/powerpc/kvm/44x_tlb.h @@ -25,48 +25,52 @@ extern int kvmppc_44x_tlb_index(struct kvm_vcpu *vcpu, gva_t eaddr, unsigned int pid, unsigned int as); -extern struct tlbe *kvmppc_44x_dtlb_search(struct kvm_vcpu *vcpu, gva_t eaddr); -extern struct tlbe *kvmppc_44x_itlb_search(struct kvm_vcpu *vcpu, gva_t eaddr); +extern int kvmppc_44x_dtlb_index(struct kvm_vcpu *vcpu, gva_t eaddr); +extern int kvmppc_44x_itlb_index(struct kvm_vcpu *vcpu, gva_t eaddr); + +extern int kvmppc_44x_emul_tlbsx(struct kvm_vcpu *vcpu, u8 rt, u8 ra, u8 rb, + u8 rc); +extern int kvmppc_44x_emul_tlbwe(struct kvm_vcpu *vcpu, u8 ra, u8 rs, u8 ws); /* TLB helper functions */ -static inline unsigned int get_tlb_size(const struct tlbe *tlbe) +static inline unsigned int get_tlb_size(const struct kvmppc_44x_tlbe *tlbe) { return (tlbe->word0 >> 4) & 0xf; } -static inline gva_t get_tlb_eaddr(const struct tlbe *tlbe) +static inline gva_t get_tlb_eaddr(const struct kvmppc_44x_tlbe *tlbe) { return tlbe->word0 & 0xfffffc00; } -static inline gva_t get_tlb_bytes(const struct tlbe *tlbe) +static inline gva_t get_tlb_bytes(const struct kvmppc_44x_tlbe *tlbe) { unsigned int pgsize = get_tlb_size(tlbe); return 1 << 10 << (pgsize << 1); } -static inline gva_t get_tlb_end(const struct tlbe *tlbe) +static inline gva_t get_tlb_end(const struct kvmppc_44x_tlbe *tlbe) { return get_tlb_eaddr(tlbe) + get_tlb_bytes(tlbe) - 1; } -static inline u64 get_tlb_raddr(const struct tlbe *tlbe) +static inline u64 get_tlb_raddr(const struct kvmppc_44x_tlbe *tlbe) { u64 word1 = tlbe->word1; return ((word1 & 0xf) << 32) | (word1 & 0xfffffc00); } -static inline unsigned int get_tlb_tid(const struct tlbe *tlbe) +static inline unsigned int get_tlb_tid(const struct kvmppc_44x_tlbe *tlbe) { return tlbe->tid & 0xff; } -static inline unsigned int get_tlb_ts(const struct tlbe *tlbe) +static inline unsigned int get_tlb_ts(const struct kvmppc_44x_tlbe *tlbe) { return (tlbe->word0 >> 8) & 0x1; } -static inline unsigned int get_tlb_v(const struct tlbe *tlbe) +static inline unsigned int get_tlb_v(const struct kvmppc_44x_tlbe *tlbe) { return (tlbe->word0 >> 9) & 0x1; } @@ -81,7 +85,7 @@ static inline unsigned int get_mmucr_sts(const struct kvm_vcpu *vcpu) return (vcpu->arch.mmucr >> 16) & 0x1; } -static inline gpa_t tlb_xlate(struct tlbe *tlbe, gva_t eaddr) +static inline gpa_t tlb_xlate(struct kvmppc_44x_tlbe *tlbe, gva_t eaddr) { unsigned int pgmask = get_tlb_bytes(tlbe) - 1; diff --git a/arch/powerpc/kvm/Kconfig b/arch/powerpc/kvm/Kconfig index 53aaa66b25e..6dbdc4817d8 100644 --- a/arch/powerpc/kvm/Kconfig +++ b/arch/powerpc/kvm/Kconfig @@ -15,27 +15,33 @@ menuconfig VIRTUALIZATION if VIRTUALIZATION config KVM - bool "Kernel-based Virtual Machine (KVM) support" - depends on 44x && EXPERIMENTAL + bool select PREEMPT_NOTIFIERS select ANON_INODES - # We can only run on Book E hosts so far - select KVM_BOOKE_HOST + +config KVM_440 + bool "KVM support for PowerPC 440 processors" + depends on EXPERIMENTAL && 44x + select KVM ---help--- - Support hosting virtualized guest machines. You will also - need to select one or more of the processor modules below. + Support running unmodified 440 guest kernels in virtual machines on + 440 host processors. This module provides access to the hardware capabilities through a character device node named /dev/kvm. If unsure, say N. -config KVM_BOOKE_HOST - bool "KVM host support for Book E PowerPC processors" - depends on KVM && 44x +config KVM_EXIT_TIMING + bool "Detailed exit timing" + depends on KVM ---help--- - Provides host support for KVM on Book E PowerPC processors. Currently - this works on 440 processors only. + Calculate elapsed time for every exit/enter cycle. A per-vcpu + report is available in debugfs kvm/vm#_vcpu#_timing. + The overhead is relatively small, however it is not recommended for + production environments. + + If unsure, say N. config KVM_TRACE bool "KVM trace support" diff --git a/arch/powerpc/kvm/Makefile b/arch/powerpc/kvm/Makefile index 2a5d4397ac4..df7ba59e6d5 100644 --- a/arch/powerpc/kvm/Makefile +++ b/arch/powerpc/kvm/Makefile @@ -8,10 +8,16 @@ common-objs-y = $(addprefix ../../../virt/kvm/, kvm_main.o coalesced_mmio.o) common-objs-$(CONFIG_KVM_TRACE) += $(addprefix ../../../virt/kvm/, kvm_trace.o) -kvm-objs := $(common-objs-y) powerpc.o emulate.o booke_guest.o +kvm-objs := $(common-objs-y) powerpc.o emulate.o +obj-$(CONFIG_KVM_EXIT_TIMING) += timing.o obj-$(CONFIG_KVM) += kvm.o AFLAGS_booke_interrupts.o := -I$(obj) -kvm-booke-host-objs := booke_host.o booke_interrupts.o 44x_tlb.o -obj-$(CONFIG_KVM_BOOKE_HOST) += kvm-booke-host.o +kvm-440-objs := \ + booke.o \ + booke_interrupts.o \ + 44x.o \ + 44x_tlb.o \ + 44x_emulate.o +obj-$(CONFIG_KVM_440) += kvm-440.o diff --git a/arch/powerpc/kvm/booke_guest.c b/arch/powerpc/kvm/booke.c index 7b2591e26ba..35485dd6927 100644 --- a/arch/powerpc/kvm/booke_guest.c +++ b/arch/powerpc/kvm/booke.c @@ -24,21 +24,26 @@ #include <linux/module.h> #include <linux/vmalloc.h> #include <linux/fs.h> + #include <asm/cputable.h> #include <asm/uaccess.h> #include <asm/kvm_ppc.h> +#include "timing.h" +#include <asm/cacheflush.h> +#include <asm/kvm_44x.h> +#include "booke.h" #include "44x_tlb.h" +unsigned long kvmppc_booke_handlers; + #define VM_STAT(x) offsetof(struct kvm, stat.x), KVM_STAT_VM #define VCPU_STAT(x) offsetof(struct kvm_vcpu, stat.x), KVM_STAT_VCPU struct kvm_stats_debugfs_item debugfs_entries[] = { - { "exits", VCPU_STAT(sum_exits) }, { "mmio", VCPU_STAT(mmio_exits) }, { "dcr", VCPU_STAT(dcr_exits) }, { "sig", VCPU_STAT(signal_exits) }, - { "light", VCPU_STAT(light_exits) }, { "itlb_r", VCPU_STAT(itlb_real_miss_exits) }, { "itlb_v", VCPU_STAT(itlb_virt_miss_exits) }, { "dtlb_r", VCPU_STAT(dtlb_real_miss_exits) }, @@ -53,103 +58,19 @@ struct kvm_stats_debugfs_item debugfs_entries[] = { { NULL } }; -static const u32 interrupt_msr_mask[16] = { - [BOOKE_INTERRUPT_CRITICAL] = MSR_ME, - [BOOKE_INTERRUPT_MACHINE_CHECK] = 0, - [BOOKE_INTERRUPT_DATA_STORAGE] = MSR_CE|MSR_ME|MSR_DE, - [BOOKE_INTERRUPT_INST_STORAGE] = MSR_CE|MSR_ME|MSR_DE, - [BOOKE_INTERRUPT_EXTERNAL] = MSR_CE|MSR_ME|MSR_DE, - [BOOKE_INTERRUPT_ALIGNMENT] = MSR_CE|MSR_ME|MSR_DE, - [BOOKE_INTERRUPT_PROGRAM] = MSR_CE|MSR_ME|MSR_DE, - [BOOKE_INTERRUPT_FP_UNAVAIL] = MSR_CE|MSR_ME|MSR_DE, - [BOOKE_INTERRUPT_SYSCALL] = MSR_CE|MSR_ME|MSR_DE, - [BOOKE_INTERRUPT_AP_UNAVAIL] = MSR_CE|MSR_ME|MSR_DE, - [BOOKE_INTERRUPT_DECREMENTER] = MSR_CE|MSR_ME|MSR_DE, - [BOOKE_INTERRUPT_FIT] = MSR_CE|MSR_ME|MSR_DE, - [BOOKE_INTERRUPT_WATCHDOG] = MSR_ME, - [BOOKE_INTERRUPT_DTLB_MISS] = MSR_CE|MSR_ME|MSR_DE, - [BOOKE_INTERRUPT_ITLB_MISS] = MSR_CE|MSR_ME|MSR_DE, - [BOOKE_INTERRUPT_DEBUG] = MSR_ME, -}; - -const unsigned char exception_priority[] = { - [BOOKE_INTERRUPT_DATA_STORAGE] = 0, - [BOOKE_INTERRUPT_INST_STORAGE] = 1, - [BOOKE_INTERRUPT_ALIGNMENT] = 2, - [BOOKE_INTERRUPT_PROGRAM] = 3, - [BOOKE_INTERRUPT_FP_UNAVAIL] = 4, - [BOOKE_INTERRUPT_SYSCALL] = 5, - [BOOKE_INTERRUPT_AP_UNAVAIL] = 6, - [BOOKE_INTERRUPT_DTLB_MISS] = 7, - [BOOKE_INTERRUPT_ITLB_MISS] = 8, - [BOOKE_INTERRUPT_MACHINE_CHECK] = 9, - [BOOKE_INTERRUPT_DEBUG] = 10, - [BOOKE_INTERRUPT_CRITICAL] = 11, - [BOOKE_INTERRUPT_WATCHDOG] = 12, - [BOOKE_INTERRUPT_EXTERNAL] = 13, - [BOOKE_INTERRUPT_FIT] = 14, - [BOOKE_INTERRUPT_DECREMENTER] = 15, -}; - -const unsigned char priority_exception[] = { - BOOKE_INTERRUPT_DATA_STORAGE, - BOOKE_INTERRUPT_INST_STORAGE, - BOOKE_INTERRUPT_ALIGNMENT, - BOOKE_INTERRUPT_PROGRAM, - BOOKE_INTERRUPT_FP_UNAVAIL, - BOOKE_INTERRUPT_SYSCALL, - BOOKE_INTERRUPT_AP_UNAVAIL, - BOOKE_INTERRUPT_DTLB_MISS, - BOOKE_INTERRUPT_ITLB_MISS, - BOOKE_INTERRUPT_MACHINE_CHECK, - BOOKE_INTERRUPT_DEBUG, - BOOKE_INTERRUPT_CRITICAL, - BOOKE_INTERRUPT_WATCHDOG, - BOOKE_INTERRUPT_EXTERNAL, - BOOKE_INTERRUPT_FIT, - BOOKE_INTERRUPT_DECREMENTER, -}; - - -void kvmppc_dump_tlbs(struct kvm_vcpu *vcpu) -{ - struct tlbe *tlbe; - int i; - - printk("vcpu %d TLB dump:\n", vcpu->vcpu_id); - printk("| %2s | %3s | %8s | %8s | %8s |\n", - "nr", "tid", "word0", "word1", "word2"); - - for (i = 0; i < PPC44x_TLB_SIZE; i++) { - tlbe = &vcpu->arch.guest_tlb[i]; - if (tlbe->word0 & PPC44x_TLB_VALID) - printk(" G%2d | %02X | %08X | %08X | %08X |\n", - i, tlbe->tid, tlbe->word0, tlbe->word1, - tlbe->word2); - } - - for (i = 0; i < PPC44x_TLB_SIZE; i++) { - tlbe = &vcpu->arch.shadow_tlb[i]; - if (tlbe->word0 & PPC44x_TLB_VALID) - printk(" S%2d | %02X | %08X | %08X | %08X |\n", - i, tlbe->tid, tlbe->word0, tlbe->word1, - tlbe->word2); - } -} - /* TODO: use vcpu_printf() */ void kvmppc_dump_vcpu(struct kvm_vcpu *vcpu) { int i; - printk("pc: %08x msr: %08x\n", vcpu->arch.pc, vcpu->arch.msr); - printk("lr: %08x ctr: %08x\n", vcpu->arch.lr, vcpu->arch.ctr); - printk("srr0: %08x srr1: %08x\n", vcpu->arch.srr0, vcpu->arch.srr1); + printk("pc: %08lx msr: %08lx\n", vcpu->arch.pc, vcpu->arch.msr); + printk("lr: %08lx ctr: %08lx\n", vcpu->arch.lr, vcpu->arch.ctr); + printk("srr0: %08lx srr1: %08lx\n", vcpu->arch.srr0, vcpu->arch.srr1); printk("exceptions: %08lx\n", vcpu->arch.pending_exceptions); for (i = 0; i < 32; i += 4) { - printk("gpr%02d: %08x %08x %08x %08x\n", i, + printk("gpr%02d: %08lx %08lx %08lx %08lx\n", i, vcpu->arch.gpr[i], vcpu->arch.gpr[i+1], vcpu->arch.gpr[i+2], @@ -157,69 +78,96 @@ void kvmppc_dump_vcpu(struct kvm_vcpu *vcpu) } } -/* Check if we are ready to deliver the interrupt */ -static int kvmppc_can_deliver_interrupt(struct kvm_vcpu *vcpu, int interrupt) +static void kvmppc_booke_queue_irqprio(struct kvm_vcpu *vcpu, + unsigned int priority) { - int r; + set_bit(priority, &vcpu->arch.pending_exceptions); +} - switch (interrupt) { - case BOOKE_INTERRUPT_CRITICAL: - r = vcpu->arch.msr & MSR_CE; - break; - case BOOKE_INTERRUPT_MACHINE_CHECK: - r = vcpu->arch.msr & MSR_ME; - break; - case BOOKE_INTERRUPT_EXTERNAL: - r = vcpu->arch.msr & MSR_EE; +void kvmppc_core_queue_program(struct kvm_vcpu *vcpu) +{ + kvmppc_booke_queue_irqprio(vcpu, BOOKE_IRQPRIO_PROGRAM); +} + +void kvmppc_core_queue_dec(struct kvm_vcpu *vcpu) +{ + kvmppc_booke_queue_irqprio(vcpu, BOOKE_IRQPRIO_DECREMENTER); +} + +int kvmppc_core_pending_dec(struct kvm_vcpu *vcpu) +{ + return test_bit(BOOKE_IRQPRIO_DECREMENTER, &vcpu->arch.pending_exceptions); +} + +void kvmppc_core_queue_external(struct kvm_vcpu *vcpu, + struct kvm_interrupt *irq) +{ + kvmppc_booke_queue_irqprio(vcpu, BOOKE_IRQPRIO_EXTERNAL); +} + +/* Deliver the interrupt of the corresponding priority, if possible. */ +static int kvmppc_booke_irqprio_deliver(struct kvm_vcpu *vcpu, + unsigned int priority) +{ + int allowed = 0; + ulong msr_mask; + + switch (priority) { + case BOOKE_IRQPRIO_PROGRAM: + case BOOKE_IRQPRIO_DTLB_MISS: + case BOOKE_IRQPRIO_ITLB_MISS: + case BOOKE_IRQPRIO_SYSCALL: + case BOOKE_IRQPRIO_DATA_STORAGE: + case BOOKE_IRQPRIO_INST_STORAGE: + case BOOKE_IRQPRIO_FP_UNAVAIL: + case BOOKE_IRQPRIO_AP_UNAVAIL: + case BOOKE_IRQPRIO_ALIGNMENT: + allowed = 1; + msr_mask = MSR_CE|MSR_ME|MSR_DE; break; - case BOOKE_INTERRUPT_DECREMENTER: - r = vcpu->arch.msr & MSR_EE; + case BOOKE_IRQPRIO_CRITICAL: + case BOOKE_IRQPRIO_WATCHDOG: + allowed = vcpu->arch.msr & MSR_CE; + msr_mask = MSR_ME; break; - case BOOKE_INTERRUPT_FIT: - r = vcpu->arch.msr & MSR_EE; + case BOOKE_IRQPRIO_MACHINE_CHECK: + allowed = vcpu->arch.msr & MSR_ME; + msr_mask = 0; break; - case BOOKE_INTERRUPT_WATCHDOG: - r = vcpu->arch.msr & MSR_CE; + case BOOKE_IRQPRIO_EXTERNAL: + case BOOKE_IRQPRIO_DECREMENTER: + case BOOKE_IRQPRIO_FIT: + allowed = vcpu->arch.msr & MSR_EE; + msr_mask = MSR_CE|MSR_ME|MSR_DE; break; - case BOOKE_INTERRUPT_DEBUG: - r = vcpu->arch.msr & MSR_DE; + case BOOKE_IRQPRIO_DEBUG: + allowed = vcpu->arch.msr & MSR_DE; + msr_mask = MSR_ME; break; - default: - r = 1; } - return r; -} + if (allowed) { + vcpu->arch.srr0 = vcpu->arch.pc; + vcpu->arch.srr1 = vcpu->arch.msr; + vcpu->arch.pc = vcpu->arch.ivpr | vcpu->arch.ivor[priority]; + kvmppc_set_msr(vcpu, vcpu->arch.msr & msr_mask); -static void kvmppc_deliver_interrupt(struct kvm_vcpu *vcpu, int interrupt) -{ - switch (interrupt) { - case BOOKE_INTERRUPT_DECREMENTER: - vcpu->arch.tsr |= TSR_DIS; - break; + clear_bit(priority, &vcpu->arch.pending_exceptions); } - vcpu->arch.srr0 = vcpu->arch.pc; - vcpu->arch.srr1 = vcpu->arch.msr; - vcpu->arch.pc = vcpu->arch.ivpr | vcpu->arch.ivor[interrupt]; - kvmppc_set_msr(vcpu, vcpu->arch.msr & interrupt_msr_mask[interrupt]); + return allowed; } /* Check pending exceptions and deliver one, if possible. */ -void kvmppc_check_and_deliver_interrupts(struct kvm_vcpu *vcpu) +void kvmppc_core_deliver_interrupts(struct kvm_vcpu *vcpu) { unsigned long *pending = &vcpu->arch.pending_exceptions; - unsigned int exception; unsigned int priority; - priority = find_first_bit(pending, BITS_PER_BYTE * sizeof(*pending)); + priority = __ffs(*pending); while (priority <= BOOKE_MAX_INTERRUPT) { - exception = priority_exception[priority]; - if (kvmppc_can_deliver_interrupt(vcpu, exception)) { - kvmppc_clear_exception(vcpu, exception); - kvmppc_deliver_interrupt(vcpu, exception); + if (kvmppc_booke_irqprio_deliver(vcpu, priority)) break; - } priority = find_next_bit(pending, BITS_PER_BYTE * sizeof(*pending), @@ -238,6 +186,9 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu, enum emulation_result er; int r = RESUME_HOST; + /* update before a new last_exit_type is rewritten */ + kvmppc_update_timing_stats(vcpu); + local_irq_enable(); run->exit_reason = KVM_EXIT_UNKNOWN; @@ -251,21 +202,19 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu, break; case BOOKE_INTERRUPT_EXTERNAL: + kvmppc_account_exit(vcpu, EXT_INTR_EXITS); + if (need_resched()) + cond_resched(); + r = RESUME_GUEST; + break; + case BOOKE_INTERRUPT_DECREMENTER: /* Since we switched IVPR back to the host's value, the host * handled this interrupt the moment we enabled interrupts. * Now we just offer it a chance to reschedule the guest. */ - - /* XXX At this point the TLB still holds our shadow TLB, so if - * we do reschedule the host will fault over it. Perhaps we - * should politely restore the host's entries to minimize - * misses before ceding control. */ + kvmppc_account_exit(vcpu, DEC_EXITS); if (need_resched()) cond_resched(); - if (exit_nr == BOOKE_INTERRUPT_DECREMENTER) - vcpu->stat.dec_exits++; - else - vcpu->stat.ext_intr_exits++; r = RESUME_GUEST; break; @@ -274,17 +223,19 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu, /* Program traps generated by user-level software must be handled * by the guest kernel. */ vcpu->arch.esr = vcpu->arch.fault_esr; - kvmppc_queue_exception(vcpu, BOOKE_INTERRUPT_PROGRAM); + kvmppc_booke_queue_irqprio(vcpu, BOOKE_IRQPRIO_PROGRAM); r = RESUME_GUEST; + kvmppc_account_exit(vcpu, USR_PR_INST); break; } er = kvmppc_emulate_instruction(run, vcpu); switch (er) { case EMULATE_DONE: + /* don't overwrite subtypes, just account kvm_stats */ + kvmppc_account_exit_stat(vcpu, EMULATED_INST_EXITS); /* Future optimization: only reload non-volatiles if * they were actually modified by emulation. */ - vcpu->stat.emulated_inst_exits++; r = RESUME_GUEST_NV; break; case EMULATE_DO_DCR: @@ -293,7 +244,7 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu, break; case EMULATE_FAIL: /* XXX Deliver Program interrupt to guest. */ - printk(KERN_CRIT "%s: emulation at %x failed (%08x)\n", + printk(KERN_CRIT "%s: emulation at %lx failed (%08x)\n", __func__, vcpu->arch.pc, vcpu->arch.last_inst); /* For debugging, encode the failing instruction and * report it to userspace. */ @@ -307,48 +258,53 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu, break; case BOOKE_INTERRUPT_FP_UNAVAIL: - kvmppc_queue_exception(vcpu, exit_nr); + kvmppc_booke_queue_irqprio(vcpu, BOOKE_IRQPRIO_FP_UNAVAIL); + kvmppc_account_exit(vcpu, FP_UNAVAIL); r = RESUME_GUEST; break; case BOOKE_INTERRUPT_DATA_STORAGE: vcpu->arch.dear = vcpu->arch.fault_dear; vcpu->arch.esr = vcpu->arch.fault_esr; - kvmppc_queue_exception(vcpu, exit_nr); - vcpu->stat.dsi_exits++; + kvmppc_booke_queue_irqprio(vcpu, BOOKE_IRQPRIO_DATA_STORAGE); + kvmppc_account_exit(vcpu, DSI_EXITS); r = RESUME_GUEST; break; case BOOKE_INTERRUPT_INST_STORAGE: vcpu->arch.esr = vcpu->arch.fault_esr; - kvmppc_queue_exception(vcpu, exit_nr); - vcpu->stat.isi_exits++; + kvmppc_booke_queue_irqprio(vcpu, BOOKE_IRQPRIO_INST_STORAGE); + kvmppc_account_exit(vcpu, ISI_EXITS); r = RESUME_GUEST; break; case BOOKE_INTERRUPT_SYSCALL: - kvmppc_queue_exception(vcpu, exit_nr); - vcpu->stat.syscall_exits++; + kvmppc_booke_queue_irqprio(vcpu, BOOKE_IRQPRIO_SYSCALL); + kvmppc_account_exit(vcpu, SYSCALL_EXITS); r = RESUME_GUEST; break; + /* XXX move to a 440-specific file. */ case BOOKE_INTERRUPT_DTLB_MISS: { - struct tlbe *gtlbe; + struct kvmppc_vcpu_44x *vcpu_44x = to_44x(vcpu); + struct kvmppc_44x_tlbe *gtlbe; unsigned long eaddr = vcpu->arch.fault_dear; + int gtlb_index; gfn_t gfn; /* Check the guest TLB. */ - gtlbe = kvmppc_44x_dtlb_search(vcpu, eaddr); - if (!gtlbe) { + gtlb_index = kvmppc_44x_dtlb_index(vcpu, eaddr); + if (gtlb_index < 0) { /* The guest didn't have a mapping for it. */ - kvmppc_queue_exception(vcpu, exit_nr); + kvmppc_booke_queue_irqprio(vcpu, BOOKE_IRQPRIO_DTLB_MISS); vcpu->arch.dear = vcpu->arch.fault_dear; vcpu->arch.esr = vcpu->arch.fault_esr; - vcpu->stat.dtlb_real_miss_exits++; + kvmppc_account_exit(vcpu, DTLB_REAL_MISS_EXITS); r = RESUME_GUEST; break; } + gtlbe = &vcpu_44x->guest_tlb[gtlb_index]; vcpu->arch.paddr_accessed = tlb_xlate(gtlbe, eaddr); gfn = vcpu->arch.paddr_accessed >> PAGE_SHIFT; @@ -359,38 +315,45 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu, * b) the guest used a large mapping which we're faking * Either way, we need to satisfy the fault without * invoking the guest. */ - kvmppc_mmu_map(vcpu, eaddr, gfn, gtlbe->tid, - gtlbe->word2); - vcpu->stat.dtlb_virt_miss_exits++; + kvmppc_mmu_map(vcpu, eaddr, vcpu->arch.paddr_accessed, gtlbe->tid, + gtlbe->word2, get_tlb_bytes(gtlbe), gtlb_index); + kvmppc_account_exit(vcpu, DTLB_VIRT_MISS_EXITS); r = RESUME_GUEST; } else { /* Guest has mapped and accessed a page which is not * actually RAM. */ r = kvmppc_emulate_mmio(run, vcpu); + kvmppc_account_exit(vcpu, MMIO_EXITS); } break; } + /* XXX move to a 440-specific file. */ case BOOKE_INTERRUPT_ITLB_MISS: { - struct tlbe *gtlbe; + struct kvmppc_vcpu_44x *vcpu_44x = to_44x(vcpu); + struct kvmppc_44x_tlbe *gtlbe; unsigned long eaddr = vcpu->arch.pc; + gpa_t gpaddr; gfn_t gfn; + int gtlb_index; r = RESUME_GUEST; /* Check the guest TLB. */ - gtlbe = kvmppc_44x_itlb_search(vcpu, eaddr); - if (!gtlbe) { + gtlb_index = kvmppc_44x_itlb_index(vcpu, eaddr); + if (gtlb_index < 0) { /* The guest didn't have a mapping for it. */ - kvmppc_queue_exception(vcpu, exit_nr); - vcpu->stat.itlb_real_miss_exits++; + kvmppc_booke_queue_irqprio(vcpu, BOOKE_IRQPRIO_ITLB_MISS); + kvmppc_account_exit(vcpu, ITLB_REAL_MISS_EXITS); break; } - vcpu->stat.itlb_virt_miss_exits++; + kvmppc_account_exit(vcpu, ITLB_VIRT_MISS_EXITS); - gfn = tlb_xlate(gtlbe, eaddr) >> PAGE_SHIFT; + gtlbe = &vcpu_44x->guest_tlb[gtlb_index]; + gpaddr = tlb_xlate(gtlbe, eaddr); + gfn = gpaddr >> PAGE_SHIFT; if (kvm_is_visible_gfn(vcpu->kvm, gfn)) { /* The guest TLB had a mapping, but the shadow TLB @@ -399,12 +362,11 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu, * b) the guest used a large mapping which we're faking * Either way, we need to satisfy the fault without * invoking the guest. */ - kvmppc_mmu_map(vcpu, eaddr, gfn, gtlbe->tid, - gtlbe->word2); + kvmppc_mmu_map(vcpu, eaddr, gpaddr, gtlbe->tid, + gtlbe->word2, get_tlb_bytes(gtlbe), gtlb_index); } else { /* Guest mapped and leaped at non-RAM! */ - kvmppc_queue_exception(vcpu, - BOOKE_INTERRUPT_MACHINE_CHECK); + kvmppc_booke_queue_irqprio(vcpu, BOOKE_IRQPRIO_MACHINE_CHECK); } break; @@ -421,6 +383,7 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu, mtspr(SPRN_DBSR, dbsr); run->exit_reason = KVM_EXIT_DEBUG; + kvmppc_account_exit(vcpu, DEBUG_EXITS); r = RESUME_HOST; break; } @@ -432,10 +395,8 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu, local_irq_disable(); - kvmppc_check_and_deliver_interrupts(vcpu); + kvmppc_core_deliver_interrupts(vcpu); - /* Do some exit accounting. */ - vcpu->stat.sum_exits++; if (!(r & RESUME_HOST)) { /* To avoid clobbering exit_reason, only check for signals if * we aren't already exiting to userspace for some other @@ -443,22 +404,7 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu, if (signal_pending(current)) { run->exit_reason = KVM_EXIT_INTR; r = (-EINTR << 2) | RESUME_HOST | (r & RESUME_FLAG_NV); - - vcpu->stat.signal_exits++; - } else { - vcpu->stat.light_exits++; - } - } else { - switch (run->exit_reason) { - case KVM_EXIT_MMIO: - vcpu->stat.mmio_exits++; - break; - case KVM_EXIT_DCR: - vcpu->stat.dcr_exits++; - break; - case KVM_EXIT_INTR: - vcpu->stat.signal_exits++; - break; + kvmppc_account_exit(vcpu, SIGNAL_EXITS); } } @@ -468,20 +414,6 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu, /* Initial guest state: 16MB mapping 0 -> 0, PC = 0, MSR = 0, R1 = 16MB */ int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu) { - struct tlbe *tlbe = &vcpu->arch.guest_tlb[0]; - - tlbe->tid = 0; - tlbe->word0 = PPC44x_TLB_16M | PPC44x_TLB_VALID; - tlbe->word1 = 0; - tlbe->word2 = PPC44x_TLB_SX | PPC44x_TLB_SW | PPC44x_TLB_SR; - - tlbe++; - tlbe->tid = 0; - tlbe->word0 = 0xef600000 | PPC44x_TLB_4K | PPC44x_TLB_VALID; - tlbe->word1 = 0xef600000; - tlbe->word2 = PPC44x_TLB_SX | PPC44x_TLB_SW | PPC44x_TLB_SR - | PPC44x_TLB_I | PPC44x_TLB_G; - vcpu->arch.pc = 0; vcpu->arch.msr = 0; vcpu->arch.gpr[1] = (16<<20) - 8; /* -8 for the callee-save LR slot */ @@ -492,12 +424,9 @@ int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu) * before it's programmed its own IVPR. */ vcpu->arch.ivpr = 0x55550000; - /* Since the guest can directly access the timebase, it must know the - * real timebase frequency. Accordingly, it must see the state of - * CCR1[TCS]. */ - vcpu->arch.ccr1 = mfspr(SPRN_CCR1); + kvmppc_init_timing_stats(vcpu); - return 0; + return kvmppc_core_vcpu_setup(vcpu); } int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs) @@ -536,7 +465,7 @@ int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs) vcpu->arch.ctr = regs->ctr; vcpu->arch.lr = regs->lr; vcpu->arch.xer = regs->xer; - vcpu->arch.msr = regs->msr; + kvmppc_set_msr(vcpu, regs->msr); vcpu->arch.srr0 = regs->srr0; vcpu->arch.srr1 = regs->srr1; vcpu->arch.sprg0 = regs->sprg0; @@ -575,31 +504,62 @@ int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu) return -ENOTSUPP; } -/* 'linear_address' is actually an encoding of AS|PID|EADDR . */ int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu, struct kvm_translation *tr) { - struct tlbe *gtlbe; - int index; - gva_t eaddr; - u8 pid; - u8 as; - - eaddr = tr->linear_address; - pid = (tr->linear_address >> 32) & 0xff; - as = (tr->linear_address >> 40) & 0x1; - - index = kvmppc_44x_tlb_index(vcpu, eaddr, pid, as); - if (index == -1) { - tr->valid = 0; - return 0; - } + return kvmppc_core_vcpu_translate(vcpu, tr); +} - gtlbe = &vcpu->arch.guest_tlb[index]; +int kvmppc_booke_init(void) +{ + unsigned long ivor[16]; + unsigned long max_ivor = 0; + int i; - tr->physical_address = tlb_xlate(gtlbe, eaddr); - /* XXX what does "writeable" and "usermode" even mean? */ - tr->valid = 1; + /* We install our own exception handlers by hijacking IVPR. IVPR must + * be 16-bit aligned, so we need a 64KB allocation. */ + kvmppc_booke_handlers = __get_free_pages(GFP_KERNEL | __GFP_ZERO, + VCPU_SIZE_ORDER); + if (!kvmppc_booke_handlers) + return -ENOMEM; + + /* XXX make sure our handlers are smaller than Linux's */ + + /* Copy our interrupt handlers to match host IVORs. That way we don't + * have to swap the IVORs on every guest/host transition. */ + ivor[0] = mfspr(SPRN_IVOR0); + ivor[1] = mfspr(SPRN_IVOR1); + ivor[2] = mfspr(SPRN_IVOR2); + ivor[3] = mfspr(SPRN_IVOR3); + ivor[4] = mfspr(SPRN_IVOR4); + ivor[5] = mfspr(SPRN_IVOR5); + ivor[6] = mfspr(SPRN_IVOR6); + ivor[7] = mfspr(SPRN_IVOR7); + ivor[8] = mfspr(SPRN_IVOR8); + ivor[9] = mfspr(SPRN_IVOR9); + ivor[10] = mfspr(SPRN_IVOR10); + ivor[11] = mfspr(SPRN_IVOR11); + ivor[12] = mfspr(SPRN_IVOR12); + ivor[13] = mfspr(SPRN_IVOR13); + ivor[14] = mfspr(SPRN_IVOR14); + ivor[15] = mfspr(SPRN_IVOR15); + + for (i = 0; i < 16; i++) { + if (ivor[i] > max_ivor) + max_ivor = ivor[i]; + + memcpy((void *)kvmppc_booke_handlers + ivor[i], + kvmppc_handlers_start + i * kvmppc_handler_len, + kvmppc_handler_len); + } + flush_icache_range(kvmppc_booke_handlers, + kvmppc_booke_handlers + max_ivor + kvmppc_handler_len); return 0; } + +void __exit kvmppc_booke_exit(void) +{ + free_pages(kvmppc_booke_handlers, VCPU_SIZE_ORDER); + kvm_exit(); +} diff --git a/arch/powerpc/kvm/booke.h b/arch/powerpc/kvm/booke.h new file mode 100644 index 00000000000..cf7c94ca24b --- /dev/null +++ b/arch/powerpc/kvm/booke.h @@ -0,0 +1,60 @@ +/* + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License, version 2, as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + * + * Copyright IBM Corp. 2008 + * + * Authors: Hollis Blanchard <hollisb@us.ibm.com> + */ + +#ifndef __KVM_BOOKE_H__ +#define __KVM_BOOKE_H__ + +#include <linux/types.h> +#include <linux/kvm_host.h> +#include "timing.h" + +/* interrupt priortity ordering */ +#define BOOKE_IRQPRIO_DATA_STORAGE 0 +#define BOOKE_IRQPRIO_INST_STORAGE 1 +#define BOOKE_IRQPRIO_ALIGNMENT 2 +#define BOOKE_IRQPRIO_PROGRAM 3 +#define BOOKE_IRQPRIO_FP_UNAVAIL 4 +#define BOOKE_IRQPRIO_SYSCALL 5 +#define BOOKE_IRQPRIO_AP_UNAVAIL 6 +#define BOOKE_IRQPRIO_DTLB_MISS 7 +#define BOOKE_IRQPRIO_ITLB_MISS 8 +#define BOOKE_IRQPRIO_MACHINE_CHECK 9 +#define BOOKE_IRQPRIO_DEBUG 10 +#define BOOKE_IRQPRIO_CRITICAL 11 +#define BOOKE_IRQPRIO_WATCHDOG 12 +#define BOOKE_IRQPRIO_EXTERNAL 13 +#define BOOKE_IRQPRIO_FIT 14 +#define BOOKE_IRQPRIO_DECREMENTER 15 + +/* Helper function for "full" MSR writes. No need to call this if only EE is + * changing. */ +static inline void kvmppc_set_msr(struct kvm_vcpu *vcpu, u32 new_msr) +{ + if ((new_msr & MSR_PR) != (vcpu->arch.msr & MSR_PR)) + kvmppc_mmu_priv_switch(vcpu, new_msr & MSR_PR); + + vcpu->arch.msr = new_msr; + + if (vcpu->arch.msr & MSR_WE) { + kvm_vcpu_block(vcpu); + kvmppc_set_exit_type(vcpu, EMULATED_MTMSRWE_EXITS); + }; +} + +#endif /* __KVM_BOOKE_H__ */ diff --git a/arch/powerpc/kvm/booke_host.c b/arch/powerpc/kvm/booke_host.c deleted file mode 100644 index b480341bc31..00000000000 --- a/arch/powerpc/kvm/booke_host.c +++ /dev/null @@ -1,83 +0,0 @@ -/* - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License, version 2, as - * published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. - * - * Copyright IBM Corp. 2008 - * - * Authors: Hollis Blanchard <hollisb@us.ibm.com> - */ - -#include <linux/errno.h> -#include <linux/kvm_host.h> -#include <linux/module.h> -#include <asm/cacheflush.h> -#include <asm/kvm_ppc.h> - -unsigned long kvmppc_booke_handlers; - -static int kvmppc_booke_init(void) -{ - unsigned long ivor[16]; - unsigned long max_ivor = 0; - int i; - - /* We install our own exception handlers by hijacking IVPR. IVPR must - * be 16-bit aligned, so we need a 64KB allocation. */ - kvmppc_booke_handlers = __get_free_pages(GFP_KERNEL | __GFP_ZERO, - VCPU_SIZE_ORDER); - if (!kvmppc_booke_handlers) - return -ENOMEM; - - /* XXX make sure our handlers are smaller than Linux's */ - - /* Copy our interrupt handlers to match host IVORs. That way we don't - * have to swap the IVORs on every guest/host transition. */ - ivor[0] = mfspr(SPRN_IVOR0); - ivor[1] = mfspr(SPRN_IVOR1); - ivor[2] = mfspr(SPRN_IVOR2); - ivor[3] = mfspr(SPRN_IVOR3); - ivor[4] = mfspr(SPRN_IVOR4); - ivor[5] = mfspr(SPRN_IVOR5); - ivor[6] = mfspr(SPRN_IVOR6); - ivor[7] = mfspr(SPRN_IVOR7); - ivor[8] = mfspr(SPRN_IVOR8); - ivor[9] = mfspr(SPRN_IVOR9); - ivor[10] = mfspr(SPRN_IVOR10); - ivor[11] = mfspr(SPRN_IVOR11); - ivor[12] = mfspr(SPRN_IVOR12); - ivor[13] = mfspr(SPRN_IVOR13); - ivor[14] = mfspr(SPRN_IVOR14); - ivor[15] = mfspr(SPRN_IVOR15); - - for (i = 0; i < 16; i++) { - if (ivor[i] > max_ivor) - max_ivor = ivor[i]; - - memcpy((void *)kvmppc_booke_handlers + ivor[i], - kvmppc_handlers_start + i * kvmppc_handler_len, - kvmppc_handler_len); - } - flush_icache_range(kvmppc_booke_handlers, - kvmppc_booke_handlers + max_ivor + kvmppc_handler_len); - - return kvm_init(NULL, sizeof(struct kvm_vcpu), THIS_MODULE); -} - -static void __exit kvmppc_booke_exit(void) -{ - free_pages(kvmppc_booke_handlers, VCPU_SIZE_ORDER); - kvm_exit(); -} - -module_init(kvmppc_booke_init) -module_exit(kvmppc_booke_exit) diff --git a/arch/powerpc/kvm/booke_interrupts.S b/arch/powerpc/kvm/booke_interrupts.S index 95e165baf85..084ebcd7dd8 100644 --- a/arch/powerpc/kvm/booke_interrupts.S +++ b/arch/powerpc/kvm/booke_interrupts.S @@ -107,6 +107,18 @@ _GLOBAL(kvmppc_resume_host) li r6, 1 slw r6, r6, r5 +#ifdef CONFIG_KVM_EXIT_TIMING + /* save exit time */ +1: + mfspr r7, SPRN_TBRU + mfspr r8, SPRN_TBRL + mfspr r9, SPRN_TBRU + cmpw r9, r7 + bne 1b + stw r8, VCPU_TIMING_EXIT_TBL(r4) + stw r9, VCPU_TIMING_EXIT_TBU(r4) +#endif + /* Save the faulting instruction and all GPRs for emulation. */ andi. r7, r6, NEED_INST_MASK beq ..skip_inst_copy @@ -335,54 +347,6 @@ lightweight_exit: lwz r3, VCPU_SHADOW_PID(r4) mtspr SPRN_PID, r3 - /* Prevent all asynchronous TLB updates. */ - mfmsr r5 - lis r6, (MSR_EE|MSR_CE|MSR_ME|MSR_DE)@h - ori r6, r6, (MSR_EE|MSR_CE|MSR_ME|MSR_DE)@l - andc r6, r5, r6 - mtmsr r6 - - /* Load the guest mappings, leaving the host's "pinned" kernel mappings - * in place. */ - mfspr r10, SPRN_MMUCR /* Save host MMUCR. */ - li r5, PPC44x_TLB_SIZE - lis r5, tlb_44x_hwater@ha - lwz r5, tlb_44x_hwater@l(r5) - mtctr r5 - addi r9, r4, VCPU_SHADOW_TLB - addi r5, r4, VCPU_SHADOW_MOD - li r3, 0 -1: - lbzx r7, r3, r5 - cmpwi r7, 0 - beq 3f - - /* Load guest entry. */ - mulli r11, r3, TLBE_BYTES - add r11, r11, r9 - lwz r7, 0(r11) - mtspr SPRN_MMUCR, r7 - lwz r7, 4(r11) - tlbwe r7, r3, PPC44x_TLB_PAGEID - lwz r7, 8(r11) - tlbwe r7, r3, PPC44x_TLB_XLAT - lwz r7, 12(r11) - tlbwe r7, r3, PPC44x_TLB_ATTRIB -3: - addi r3, r3, 1 /* Increment index. */ - bdnz 1b - - mtspr SPRN_MMUCR, r10 /* Restore host MMUCR. */ - - /* Clear bitmap of modified TLB entries */ - li r5, PPC44x_TLB_SIZE>>2 - mtctr r5 - addi r5, r4, VCPU_SHADOW_MOD - 4 - li r6, 0 -1: - stwu r6, 4(r5) - bdnz 1b - iccci 0, 0 /* XXX hack */ /* Load some guest volatiles. */ @@ -423,6 +387,18 @@ lightweight_exit: lwz r3, VCPU_SPRG7(r4) mtspr SPRN_SPRG7, r3 +#ifdef CONFIG_KVM_EXIT_TIMING + /* save enter time */ +1: + mfspr r6, SPRN_TBRU + mfspr r7, SPRN_TBRL + mfspr r8, SPRN_TBRU + cmpw r8, r6 + bne 1b + stw r7, VCPU_TIMING_LAST_ENTER_TBL(r4) + stw r8, VCPU_TIMING_LAST_ENTER_TBU(r4) +#endif + /* Finish loading guest volatiles and jump to guest. */ lwz r3, VCPU_CTR(r4) mtctr r3 diff --git a/arch/powerpc/kvm/emulate.c b/arch/powerpc/kvm/emulate.c index 0fce4fbdc20..d1d38daa93f 100644 --- a/arch/powerpc/kvm/emulate.c +++ b/arch/powerpc/kvm/emulate.c @@ -23,161 +23,14 @@ #include <linux/string.h> #include <linux/kvm_host.h> -#include <asm/dcr.h> -#include <asm/dcr-regs.h> +#include <asm/reg.h> #include <asm/time.h> #include <asm/byteorder.h> #include <asm/kvm_ppc.h> +#include <asm/disassemble.h> +#include "timing.h" -#include "44x_tlb.h" - -/* Instruction decoding */ -static inline unsigned int get_op(u32 inst) -{ - return inst >> 26; -} - -static inline unsigned int get_xop(u32 inst) -{ - return (inst >> 1) & 0x3ff; -} - -static inline unsigned int get_sprn(u32 inst) -{ - return ((inst >> 16) & 0x1f) | ((inst >> 6) & 0x3e0); -} - -static inline unsigned int get_dcrn(u32 inst) -{ - return ((inst >> 16) & 0x1f) | ((inst >> 6) & 0x3e0); -} - -static inline unsigned int get_rt(u32 inst) -{ - return (inst >> 21) & 0x1f; -} - -static inline unsigned int get_rs(u32 inst) -{ - return (inst >> 21) & 0x1f; -} - -static inline unsigned int get_ra(u32 inst) -{ - return (inst >> 16) & 0x1f; -} - -static inline unsigned int get_rb(u32 inst) -{ - return (inst >> 11) & 0x1f; -} - -static inline unsigned int get_rc(u32 inst) -{ - return inst & 0x1; -} - -static inline unsigned int get_ws(u32 inst) -{ - return (inst >> 11) & 0x1f; -} - -static inline unsigned int get_d(u32 inst) -{ - return inst & 0xffff; -} - -static int tlbe_is_host_safe(const struct kvm_vcpu *vcpu, - const struct tlbe *tlbe) -{ - gpa_t gpa; - - if (!get_tlb_v(tlbe)) - return 0; - - /* Does it match current guest AS? */ - /* XXX what about IS != DS? */ - if (get_tlb_ts(tlbe) != !!(vcpu->arch.msr & MSR_IS)) - return 0; - - gpa = get_tlb_raddr(tlbe); - if (!gfn_to_memslot(vcpu->kvm, gpa >> PAGE_SHIFT)) - /* Mapping is not for RAM. */ - return 0; - - return 1; -} - -static int kvmppc_emul_tlbwe(struct kvm_vcpu *vcpu, u32 inst) -{ - u64 eaddr; - u64 raddr; - u64 asid; - u32 flags; - struct tlbe *tlbe; - unsigned int ra; - unsigned int rs; - unsigned int ws; - unsigned int index; - - ra = get_ra(inst); - rs = get_rs(inst); - ws = get_ws(inst); - - index = vcpu->arch.gpr[ra]; - if (index > PPC44x_TLB_SIZE) { - printk("%s: index %d\n", __func__, index); - kvmppc_dump_vcpu(vcpu); - return EMULATE_FAIL; - } - - tlbe = &vcpu->arch.guest_tlb[index]; - - /* Invalidate shadow mappings for the about-to-be-clobbered TLBE. */ - if (tlbe->word0 & PPC44x_TLB_VALID) { - eaddr = get_tlb_eaddr(tlbe); - asid = (tlbe->word0 & PPC44x_TLB_TS) | tlbe->tid; - kvmppc_mmu_invalidate(vcpu, eaddr, get_tlb_end(tlbe), asid); - } - - switch (ws) { - case PPC44x_TLB_PAGEID: - tlbe->tid = vcpu->arch.mmucr & 0xff; - tlbe->word0 = vcpu->arch.gpr[rs]; - break; - - case PPC44x_TLB_XLAT: - tlbe->word1 = vcpu->arch.gpr[rs]; - break; - - case PPC44x_TLB_ATTRIB: - tlbe->word2 = vcpu->arch.gpr[rs]; - break; - - default: - return EMULATE_FAIL; - } - - if (tlbe_is_host_safe(vcpu, tlbe)) { - eaddr = get_tlb_eaddr(tlbe); - raddr = get_tlb_raddr(tlbe); - asid = (tlbe->word0 & PPC44x_TLB_TS) | tlbe->tid; - flags = tlbe->word2 & 0xffff; - - /* Create a 4KB mapping on the host. If the guest wanted a - * large page, only the first 4KB is mapped here and the rest - * are mapped on the fly. */ - kvmppc_mmu_map(vcpu, eaddr, raddr >> PAGE_SHIFT, asid, flags); - } - - KVMTRACE_5D(GTLB_WRITE, vcpu, index, - tlbe->tid, tlbe->word0, tlbe->word1, tlbe->word2, - handler); - - return EMULATE_DONE; -} - -static void kvmppc_emulate_dec(struct kvm_vcpu *vcpu) +void kvmppc_emulate_dec(struct kvm_vcpu *vcpu) { if (vcpu->arch.tcr & TCR_DIE) { /* The decrementer ticks at the same rate as the timebase, so @@ -193,12 +46,6 @@ static void kvmppc_emulate_dec(struct kvm_vcpu *vcpu) } } -static void kvmppc_emul_rfi(struct kvm_vcpu *vcpu) -{ - vcpu->arch.pc = vcpu->arch.srr0; - kvmppc_set_msr(vcpu, vcpu->arch.srr1); -} - /* XXX to do: * lhax * lhaux @@ -213,40 +60,30 @@ static void kvmppc_emul_rfi(struct kvm_vcpu *vcpu) * * XXX is_bigendian should depend on MMU mapping or MSR[LE] */ +/* XXX Should probably auto-generate instruction decoding for a particular core + * from opcode tables in the future. */ int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu) { u32 inst = vcpu->arch.last_inst; u32 ea; int ra; int rb; - int rc; int rs; int rt; int sprn; - int dcrn; enum emulation_result emulated = EMULATE_DONE; int advance = 1; + /* this default type might be overwritten by subcategories */ + kvmppc_set_exit_type(vcpu, EMULATED_INST_EXITS); + switch (get_op(inst)) { - case 3: /* trap */ - printk("trap!\n"); - kvmppc_queue_exception(vcpu, BOOKE_INTERRUPT_PROGRAM); + case 3: /* trap */ + vcpu->arch.esr |= ESR_PTR; + kvmppc_core_queue_program(vcpu); advance = 0; break; - case 19: - switch (get_xop(inst)) { - case 50: /* rfi */ - kvmppc_emul_rfi(vcpu); - advance = 0; - break; - - default: - emulated = EMULATE_FAIL; - break; - } - break; - case 31: switch (get_xop(inst)) { @@ -255,27 +92,11 @@ int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu) emulated = kvmppc_handle_load(run, vcpu, rt, 4, 1); break; - case 83: /* mfmsr */ - rt = get_rt(inst); - vcpu->arch.gpr[rt] = vcpu->arch.msr; - break; - case 87: /* lbzx */ rt = get_rt(inst); emulated = kvmppc_handle_load(run, vcpu, rt, 1, 1); break; - case 131: /* wrtee */ - rs = get_rs(inst); - vcpu->arch.msr = (vcpu->arch.msr & ~MSR_EE) - | (vcpu->arch.gpr[rs] & MSR_EE); - break; - - case 146: /* mtmsr */ - rs = get_rs(inst); - kvmppc_set_msr(vcpu, vcpu->arch.gpr[rs]); - break; - case 151: /* stwx */ rs = get_rs(inst); emulated = kvmppc_handle_store(run, vcpu, @@ -283,11 +104,6 @@ int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu) 4, 1); break; - case 163: /* wrteei */ - vcpu->arch.msr = (vcpu->arch.msr & ~MSR_EE) - | (inst & MSR_EE); - break; - case 215: /* stbx */ rs = get_rs(inst); emulated = kvmppc_handle_store(run, vcpu, @@ -328,42 +144,6 @@ int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu) vcpu->arch.gpr[ra] = ea; break; - case 323: /* mfdcr */ - dcrn = get_dcrn(inst); - rt = get_rt(inst); - - /* The guest may access CPR0 registers to determine the timebase - * frequency, and it must know the real host frequency because it - * can directly access the timebase registers. - * - * It would be possible to emulate those accesses in userspace, - * but userspace can really only figure out the end frequency. - * We could decompose that into the factors that compute it, but - * that's tricky math, and it's easier to just report the real - * CPR0 values. - */ - switch (dcrn) { - case DCRN_CPR0_CONFIG_ADDR: - vcpu->arch.gpr[rt] = vcpu->arch.cpr0_cfgaddr; - break; - case DCRN_CPR0_CONFIG_DATA: - local_irq_disable(); - mtdcr(DCRN_CPR0_CONFIG_ADDR, - vcpu->arch.cpr0_cfgaddr); - vcpu->arch.gpr[rt] = mfdcr(DCRN_CPR0_CONFIG_DATA); - local_irq_enable(); - break; - default: - run->dcr.dcrn = dcrn; - run->dcr.data = 0; - run->dcr.is_write = 0; - vcpu->arch.io_gpr = rt; - vcpu->arch.dcr_needed = 1; - emulated = EMULATE_DO_DCR; - } - - break; - case 339: /* mfspr */ sprn = get_sprn(inst); rt = get_rt(inst); @@ -373,26 +153,8 @@ int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu) vcpu->arch.gpr[rt] = vcpu->arch.srr0; break; case SPRN_SRR1: vcpu->arch.gpr[rt] = vcpu->arch.srr1; break; - case SPRN_MMUCR: - vcpu->arch.gpr[rt] = vcpu->arch.mmucr; break; - case SPRN_PID: - vcpu->arch.gpr[rt] = vcpu->arch.pid; break; - case SPRN_IVPR: - vcpu->arch.gpr[rt] = vcpu->arch.ivpr; break; - case SPRN_CCR0: - vcpu->arch.gpr[rt] = vcpu->arch.ccr0; break; - case SPRN_CCR1: - vcpu->arch.gpr[rt] = vcpu->arch.ccr1; break; case SPRN_PVR: vcpu->arch.gpr[rt] = vcpu->arch.pvr; break; - case SPRN_DEAR: - vcpu->arch.gpr[rt] = vcpu->arch.dear; break; - case SPRN_ESR: - vcpu->arch.gpr[rt] = vcpu->arch.esr; break; - case SPRN_DBCR0: - vcpu->arch.gpr[rt] = vcpu->arch.dbcr0; break; - case SPRN_DBCR1: - vcpu->arch.gpr[rt] = vcpu->arch.dbcr1; break; /* Note: mftb and TBRL/TBWL are user-accessible, so * the guest can always access the real TB anyways. @@ -413,42 +175,12 @@ int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu) /* Note: SPRG4-7 are user-readable, so we don't get * a trap. */ - case SPRN_IVOR0: - vcpu->arch.gpr[rt] = vcpu->arch.ivor[0]; break; - case SPRN_IVOR1: - vcpu->arch.gpr[rt] = vcpu->arch.ivor[1]; break; - case SPRN_IVOR2: - vcpu->arch.gpr[rt] = vcpu->arch.ivor[2]; break; - case SPRN_IVOR3: - vcpu->arch.gpr[rt] = vcpu->arch.ivor[3]; break; - case SPRN_IVOR4: - vcpu->arch.gpr[rt] = vcpu->arch.ivor[4]; break; - case SPRN_IVOR5: - vcpu->arch.gpr[rt] = vcpu->arch.ivor[5]; break; - case SPRN_IVOR6: - vcpu->arch.gpr[rt] = vcpu->arch.ivor[6]; break; - case SPRN_IVOR7: - vcpu->arch.gpr[rt] = vcpu->arch.ivor[7]; break; - case SPRN_IVOR8: - vcpu->arch.gpr[rt] = vcpu->arch.ivor[8]; break; - case SPRN_IVOR9: - vcpu->arch.gpr[rt] = vcpu->arch.ivor[9]; break; - case SPRN_IVOR10: - vcpu->arch.gpr[rt] = vcpu->arch.ivor[10]; break; - case SPRN_IVOR11: - vcpu->arch.gpr[rt] = vcpu->arch.ivor[11]; break; - case SPRN_IVOR12: - vcpu->arch.gpr[rt] = vcpu->arch.ivor[12]; break; - case SPRN_IVOR13: - vcpu->arch.gpr[rt] = vcpu->arch.ivor[13]; break; - case SPRN_IVOR14: - vcpu->arch.gpr[rt] = vcpu->arch.ivor[14]; break; - case SPRN_IVOR15: - vcpu->arch.gpr[rt] = vcpu->arch.ivor[15]; break; - default: - printk("mfspr: unknown spr %x\n", sprn); - vcpu->arch.gpr[rt] = 0; + emulated = kvmppc_core_emulate_mfspr(vcpu, sprn, rt); + if (emulated == EMULATE_FAIL) { + printk("mfspr: unknown spr %x\n", sprn); + vcpu->arch.gpr[rt] = 0; + } break; } break; @@ -478,25 +210,6 @@ int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu) vcpu->arch.gpr[ra] = ea; break; - case 451: /* mtdcr */ - dcrn = get_dcrn(inst); - rs = get_rs(inst); - - /* emulate some access in kernel */ - switch (dcrn) { - case DCRN_CPR0_CONFIG_ADDR: - vcpu->arch.cpr0_cfgaddr = vcpu->arch.gpr[rs]; - break; - default: - run->dcr.dcrn = dcrn; - run->dcr.data = vcpu->arch.gpr[rs]; - run->dcr.is_write = 1; - vcpu->arch.dcr_needed = 1; - emulated = EMULATE_DO_DCR; - } - - break; - case 467: /* mtspr */ sprn = get_sprn(inst); rs = get_rs(inst); @@ -505,22 +218,6 @@ int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu) vcpu->arch.srr0 = vcpu->arch.gpr[rs]; break; case SPRN_SRR1: vcpu->arch.srr1 = vcpu->arch.gpr[rs]; break; - case SPRN_MMUCR: - vcpu->arch.mmucr = vcpu->arch.gpr[rs]; break; - case SPRN_PID: - kvmppc_set_pid(vcpu, vcpu->arch.gpr[rs]); break; - case SPRN_CCR0: - vcpu->arch.ccr0 = vcpu->arch.gpr[rs]; break; - case SPRN_CCR1: - vcpu->arch.ccr1 = vcpu->arch.gpr[rs]; break; - case SPRN_DEAR: - vcpu->arch.dear = vcpu->arch.gpr[rs]; break; - case SPRN_ESR: - vcpu->arch.esr = vcpu->arch.gpr[rs]; break; - case SPRN_DBCR0: - vcpu->arch.dbcr0 = vcpu->arch.gpr[rs]; break; - case SPRN_DBCR1: - vcpu->arch.dbcr1 = vcpu->arch.gpr[rs]; break; /* XXX We need to context-switch the timebase for * watchdog and FIT. */ @@ -532,14 +229,6 @@ int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu) kvmppc_emulate_dec(vcpu); break; - case SPRN_TSR: - vcpu->arch.tsr &= ~vcpu->arch.gpr[rs]; break; - - case SPRN_TCR: - vcpu->arch.tcr = vcpu->arch.gpr[rs]; - kvmppc_emulate_dec(vcpu); - break; - case SPRN_SPRG0: vcpu->arch.sprg0 = vcpu->arch.gpr[rs]; break; case SPRN_SPRG1: @@ -549,56 +238,10 @@ int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu) case SPRN_SPRG3: vcpu->arch.sprg3 = vcpu->arch.gpr[rs]; break; - /* Note: SPRG4-7 are user-readable. These values are - * loaded into the real SPRGs when resuming the - * guest. */ - case SPRN_SPRG4: - vcpu->arch.sprg4 = vcpu->arch.gpr[rs]; break; - case SPRN_SPRG5: - vcpu->arch.sprg5 = vcpu->arch.gpr[rs]; break; - case SPRN_SPRG6: - vcpu->arch.sprg6 = vcpu->arch.gpr[rs]; break; - case SPRN_SPRG7: - vcpu->arch.sprg7 = vcpu->arch.gpr[rs]; break; - - case SPRN_IVPR: - vcpu->arch.ivpr = vcpu->arch.gpr[rs]; break; - case SPRN_IVOR0: - vcpu->arch.ivor[0] = vcpu->arch.gpr[rs]; break; - case SPRN_IVOR1: - vcpu->arch.ivor[1] = vcpu->arch.gpr[rs]; break; - case SPRN_IVOR2: - vcpu->arch.ivor[2] = vcpu->arch.gpr[rs]; break; - case SPRN_IVOR3: - vcpu->arch.ivor[3] = vcpu->arch.gpr[rs]; break; - case SPRN_IVOR4: - vcpu->arch.ivor[4] = vcpu->arch.gpr[rs]; break; - case SPRN_IVOR5: - vcpu->arch.ivor[5] = vcpu->arch.gpr[rs]; break; - case SPRN_IVOR6: - vcpu->arch.ivor[6] = vcpu->arch.gpr[rs]; break; - case SPRN_IVOR7: - vcpu->arch.ivor[7] = vcpu->arch.gpr[rs]; break; - case SPRN_IVOR8: - vcpu->arch.ivor[8] = vcpu->arch.gpr[rs]; break; - case SPRN_IVOR9: - vcpu->arch.ivor[9] = vcpu->arch.gpr[rs]; break; - case SPRN_IVOR10: - vcpu->arch.ivor[10] = vcpu->arch.gpr[rs]; break; - case SPRN_IVOR11: - vcpu->arch.ivor[11] = vcpu->arch.gpr[rs]; break; - case SPRN_IVOR12: - vcpu->arch.ivor[12] = vcpu->arch.gpr[rs]; break; - case SPRN_IVOR13: - vcpu->arch.ivor[13] = vcpu->arch.gpr[rs]; break; - case SPRN_IVOR14: - vcpu->arch.ivor[14] = vcpu->arch.gpr[rs]; break; - case SPRN_IVOR15: - vcpu->arch.ivor[15] = vcpu->arch.gpr[rs]; break; - default: - printk("mtspr: unknown spr %x\n", sprn); - emulated = EMULATE_FAIL; + emulated = kvmppc_core_emulate_mtspr(vcpu, sprn, rs); + if (emulated == EMULATE_FAIL) + printk("mtspr: unknown spr %x\n", sprn); break; } break; @@ -629,36 +272,6 @@ int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu) 4, 0); break; - case 978: /* tlbwe */ - emulated = kvmppc_emul_tlbwe(vcpu, inst); - break; - - case 914: { /* tlbsx */ - int index; - unsigned int as = get_mmucr_sts(vcpu); - unsigned int pid = get_mmucr_stid(vcpu); - - rt = get_rt(inst); - ra = get_ra(inst); - rb = get_rb(inst); - rc = get_rc(inst); - - ea = vcpu->arch.gpr[rb]; - if (ra) - ea += vcpu->arch.gpr[ra]; - - index = kvmppc_44x_tlb_index(vcpu, ea, pid, as); - if (rc) { - if (index < 0) - vcpu->arch.cr &= ~0x20000000; - else - vcpu->arch.cr |= 0x20000000; - } - vcpu->arch.gpr[rt] = index; - - } - break; - case 790: /* lhbrx */ rt = get_rt(inst); emulated = kvmppc_handle_load(run, vcpu, rt, 2, 0); @@ -674,14 +287,9 @@ int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu) 2, 0); break; - case 966: /* iccci */ - break; - default: - printk("unknown: op %d xop %d\n", get_op(inst), - get_xop(inst)); + /* Attempt core-specific emulation below. */ emulated = EMULATE_FAIL; - break; } break; @@ -764,12 +372,19 @@ int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu) break; default: - printk("unknown op %d\n", get_op(inst)); emulated = EMULATE_FAIL; - break; } - KVMTRACE_3D(PPC_INSTR, vcpu, inst, vcpu->arch.pc, emulated, entryexit); + if (emulated == EMULATE_FAIL) { + emulated = kvmppc_core_emulate_op(run, vcpu, inst, &advance); + if (emulated == EMULATE_FAIL) { + advance = 0; + printk(KERN_ERR "Couldn't emulate instruction 0x%08x " + "(op %d xop %d)\n", inst, get_op(inst), get_xop(inst)); + } + } + + KVMTRACE_3D(PPC_INSTR, vcpu, inst, (int)vcpu->arch.pc, emulated, entryexit); if (advance) vcpu->arch.pc += 4; /* Advance past emulated instruction. */ diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c index 8bef0efcdfe..2822c8ccfaa 100644 --- a/arch/powerpc/kvm/powerpc.c +++ b/arch/powerpc/kvm/powerpc.c @@ -28,9 +28,9 @@ #include <asm/uaccess.h> #include <asm/kvm_ppc.h> #include <asm/tlbflush.h> +#include "timing.h" #include "../mm/mmu_decl.h" - gfn_t unalias_gfn(struct kvm *kvm, gfn_t gfn) { return gfn; @@ -99,14 +99,7 @@ void kvm_arch_hardware_unsetup(void) void kvm_arch_check_processor_compat(void *rtn) { - int r; - - if (strcmp(cur_cpu_spec->platform, "ppc440") == 0) - r = 0; - else - r = -ENOTSUPP; - - *(int *)rtn = r; + *(int *)rtn = kvmppc_core_check_processor_compat(); } struct kvm *kvm_arch_create_vm(void) @@ -144,9 +137,6 @@ int kvm_dev_ioctl_check_extension(long ext) int r; switch (ext) { - case KVM_CAP_USER_MEMORY: - r = 1; - break; case KVM_CAP_COALESCED_MMIO: r = KVM_COALESCED_MMIO_PAGE_OFFSET; break; @@ -179,30 +169,15 @@ void kvm_arch_flush_shadow(struct kvm *kvm) struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm, unsigned int id) { struct kvm_vcpu *vcpu; - int err; - - vcpu = kmem_cache_zalloc(kvm_vcpu_cache, GFP_KERNEL); - if (!vcpu) { - err = -ENOMEM; - goto out; - } - - err = kvm_vcpu_init(vcpu, kvm, id); - if (err) - goto free_vcpu; - + vcpu = kvmppc_core_vcpu_create(kvm, id); + kvmppc_create_vcpu_debugfs(vcpu, id); return vcpu; - -free_vcpu: - kmem_cache_free(kvm_vcpu_cache, vcpu); -out: - return ERR_PTR(err); } void kvm_arch_vcpu_free(struct kvm_vcpu *vcpu) { - kvm_vcpu_uninit(vcpu); - kmem_cache_free(kvm_vcpu_cache, vcpu); + kvmppc_remove_vcpu_debugfs(vcpu); + kvmppc_core_vcpu_free(vcpu); } void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu) @@ -212,16 +187,14 @@ void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu) int kvm_cpu_has_pending_timer(struct kvm_vcpu *vcpu) { - unsigned int priority = exception_priority[BOOKE_INTERRUPT_DECREMENTER]; - - return test_bit(priority, &vcpu->arch.pending_exceptions); + return kvmppc_core_pending_dec(vcpu); } static void kvmppc_decrementer_func(unsigned long data) { struct kvm_vcpu *vcpu = (struct kvm_vcpu *)data; - kvmppc_queue_exception(vcpu, BOOKE_INTERRUPT_DECREMENTER); + kvmppc_core_queue_dec(vcpu); if (waitqueue_active(&vcpu->wq)) { wake_up_interruptible(&vcpu->wq); @@ -242,96 +215,25 @@ void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu) kvmppc_core_destroy_mmu(vcpu); } -/* Note: clearing MSR[DE] just means that the debug interrupt will not be - * delivered *immediately*. Instead, it simply sets the appropriate DBSR bits. - * If those DBSR bits are still set when MSR[DE] is re-enabled, the interrupt - * will be delivered as an "imprecise debug event" (which is indicated by - * DBSR[IDE]. - */ -static void kvmppc_disable_debug_interrupts(void) -{ - mtmsr(mfmsr() & ~MSR_DE); -} - -static void kvmppc_restore_host_debug_state(struct kvm_vcpu *vcpu) -{ - kvmppc_disable_debug_interrupts(); - - mtspr(SPRN_IAC1, vcpu->arch.host_iac[0]); - mtspr(SPRN_IAC2, vcpu->arch.host_iac[1]); - mtspr(SPRN_IAC3, vcpu->arch.host_iac[2]); - mtspr(SPRN_IAC4, vcpu->arch.host_iac[3]); - mtspr(SPRN_DBCR1, vcpu->arch.host_dbcr1); - mtspr(SPRN_DBCR2, vcpu->arch.host_dbcr2); - mtspr(SPRN_DBCR0, vcpu->arch.host_dbcr0); - mtmsr(vcpu->arch.host_msr); -} - -static void kvmppc_load_guest_debug_registers(struct kvm_vcpu *vcpu) -{ - struct kvm_guest_debug *dbg = &vcpu->guest_debug; - u32 dbcr0 = 0; - - vcpu->arch.host_msr = mfmsr(); - kvmppc_disable_debug_interrupts(); - - /* Save host debug register state. */ - vcpu->arch.host_iac[0] = mfspr(SPRN_IAC1); - vcpu->arch.host_iac[1] = mfspr(SPRN_IAC2); - vcpu->arch.host_iac[2] = mfspr(SPRN_IAC3); - vcpu->arch.host_iac[3] = mfspr(SPRN_IAC4); - vcpu->arch.host_dbcr0 = mfspr(SPRN_DBCR0); - vcpu->arch.host_dbcr1 = mfspr(SPRN_DBCR1); - vcpu->arch.host_dbcr2 = mfspr(SPRN_DBCR2); - - /* set registers up for guest */ - - if (dbg->bp[0]) { - mtspr(SPRN_IAC1, dbg->bp[0]); - dbcr0 |= DBCR0_IAC1 | DBCR0_IDM; - } - if (dbg->bp[1]) { - mtspr(SPRN_IAC2, dbg->bp[1]); - dbcr0 |= DBCR0_IAC2 | DBCR0_IDM; - } - if (dbg->bp[2]) { - mtspr(SPRN_IAC3, dbg->bp[2]); - dbcr0 |= DBCR0_IAC3 | DBCR0_IDM; - } - if (dbg->bp[3]) { - mtspr(SPRN_IAC4, dbg->bp[3]); - dbcr0 |= DBCR0_IAC4 | DBCR0_IDM; - } - - mtspr(SPRN_DBCR0, dbcr0); - mtspr(SPRN_DBCR1, 0); - mtspr(SPRN_DBCR2, 0); -} - void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu) { - int i; - if (vcpu->guest_debug.enabled) - kvmppc_load_guest_debug_registers(vcpu); + kvmppc_core_load_guest_debugstate(vcpu); - /* Mark every guest entry in the shadow TLB entry modified, so that they - * will all be reloaded on the next vcpu run (instead of being - * demand-faulted). */ - for (i = 0; i <= tlb_44x_hwater; i++) - kvmppc_tlbe_set_modified(vcpu, i); + kvmppc_core_vcpu_load(vcpu, cpu); } void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu) { if (vcpu->guest_debug.enabled) - kvmppc_restore_host_debug_state(vcpu); + kvmppc_core_load_host_debugstate(vcpu); /* Don't leave guest TLB entries resident when being de-scheduled. */ /* XXX It would be nice to differentiate between heavyweight exit and * sched_out here, since we could avoid the TLB flush for heavyweight * exits. */ _tlbil_all(); + kvmppc_core_vcpu_put(vcpu); } int kvm_arch_vcpu_ioctl_debug_guest(struct kvm_vcpu *vcpu, @@ -355,14 +257,14 @@ int kvm_arch_vcpu_ioctl_debug_guest(struct kvm_vcpu *vcpu, static void kvmppc_complete_dcr_load(struct kvm_vcpu *vcpu, struct kvm_run *run) { - u32 *gpr = &vcpu->arch.gpr[vcpu->arch.io_gpr]; + ulong *gpr = &vcpu->arch.gpr[vcpu->arch.io_gpr]; *gpr = run->dcr.data; } static void kvmppc_complete_mmio_load(struct kvm_vcpu *vcpu, struct kvm_run *run) { - u32 *gpr = &vcpu->arch.gpr[vcpu->arch.io_gpr]; + ulong *gpr = &vcpu->arch.gpr[vcpu->arch.io_gpr]; if (run->mmio.len > sizeof(*gpr)) { printk(KERN_ERR "bad MMIO length: %d\n", run->mmio.len); @@ -460,7 +362,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run) vcpu->arch.dcr_needed = 0; } - kvmppc_check_and_deliver_interrupts(vcpu); + kvmppc_core_deliver_interrupts(vcpu); local_irq_disable(); kvm_guest_enter(); @@ -478,7 +380,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run) int kvm_vcpu_ioctl_interrupt(struct kvm_vcpu *vcpu, struct kvm_interrupt *irq) { - kvmppc_queue_exception(vcpu, BOOKE_INTERRUPT_EXTERNAL); + kvmppc_core_queue_external(vcpu, irq); if (waitqueue_active(&vcpu->wq)) { wake_up_interruptible(&vcpu->wq); diff --git a/arch/powerpc/kvm/timing.c b/arch/powerpc/kvm/timing.c new file mode 100644 index 00000000000..47ee603f558 --- /dev/null +++ b/arch/powerpc/kvm/timing.c @@ -0,0 +1,239 @@ +/* + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License, version 2, as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + * + * Copyright IBM Corp. 2008 + * + * Authors: Hollis Blanchard <hollisb@us.ibm.com> + * Christian Ehrhardt <ehrhardt@linux.vnet.ibm.com> + */ + +#include <linux/kvm_host.h> +#include <linux/fs.h> +#include <linux/seq_file.h> +#include <linux/debugfs.h> +#include <linux/uaccess.h> + +#include <asm/time.h> +#include <asm-generic/div64.h> + +#include "timing.h" + +void kvmppc_init_timing_stats(struct kvm_vcpu *vcpu) +{ + int i; + + /* pause guest execution to avoid concurrent updates */ + local_irq_disable(); + mutex_lock(&vcpu->mutex); + + vcpu->arch.last_exit_type = 0xDEAD; + for (i = 0; i < __NUMBER_OF_KVM_EXIT_TYPES; i++) { + vcpu->arch.timing_count_type[i] = 0; + vcpu->arch.timing_max_duration[i] = 0; + vcpu->arch.timing_min_duration[i] = 0xFFFFFFFF; + vcpu->arch.timing_sum_duration[i] = 0; + vcpu->arch.timing_sum_quad_duration[i] = 0; + } + vcpu->arch.timing_last_exit = 0; + vcpu->arch.timing_exit.tv64 = 0; + vcpu->arch.timing_last_enter.tv64 = 0; + + mutex_unlock(&vcpu->mutex); + local_irq_enable(); +} + +static void add_exit_timing(struct kvm_vcpu *vcpu, u64 duration, int type) +{ + u64 old; + + do_div(duration, tb_ticks_per_usec); + if (unlikely(duration > 0xFFFFFFFF)) { + printk(KERN_ERR"%s - duration too big -> overflow" + " duration %lld type %d exit #%d\n", + __func__, duration, type, + vcpu->arch.timing_count_type[type]); + return; + } + + vcpu->arch.timing_count_type[type]++; + + /* sum */ + old = vcpu->arch.timing_sum_duration[type]; + vcpu->arch.timing_sum_duration[type] += duration; + if (unlikely(old > vcpu->arch.timing_sum_duration[type])) { + printk(KERN_ERR"%s - wrap adding sum of durations" + " old %lld new %lld type %d exit # of type %d\n", + __func__, old, vcpu->arch.timing_sum_duration[type], + type, vcpu->arch.timing_count_type[type]); + } + + /* square sum */ + old = vcpu->arch.timing_sum_quad_duration[type]; + vcpu->arch.timing_sum_quad_duration[type] += (duration*duration); + if (unlikely(old > vcpu->arch.timing_sum_quad_duration[type])) { + printk(KERN_ERR"%s - wrap adding sum of squared durations" + " old %lld new %lld type %d exit # of type %d\n", + __func__, old, + vcpu->arch.timing_sum_quad_duration[type], + type, vcpu->arch.timing_count_type[type]); + } + + /* set min/max */ + if (unlikely(duration < vcpu->arch.timing_min_duration[type])) + vcpu->arch.timing_min_duration[type] = duration; + if (unlikely(duration > vcpu->arch.timing_max_duration[type])) + vcpu->arch.timing_max_duration[type] = duration; +} + +void kvmppc_update_timing_stats(struct kvm_vcpu *vcpu) +{ + u64 exit = vcpu->arch.timing_last_exit; + u64 enter = vcpu->arch.timing_last_enter.tv64; + + /* save exit time, used next exit when the reenter time is known */ + vcpu->arch.timing_last_exit = vcpu->arch.timing_exit.tv64; + + if (unlikely(vcpu->arch.last_exit_type == 0xDEAD || exit == 0)) + return; /* skip incomplete cycle (e.g. after reset) */ + + /* update statistics for average and standard deviation */ + add_exit_timing(vcpu, (enter - exit), vcpu->arch.last_exit_type); + /* enter -> timing_last_exit is time spent in guest - log this too */ + add_exit_timing(vcpu, (vcpu->arch.timing_last_exit - enter), + TIMEINGUEST); +} + +static const char *kvm_exit_names[__NUMBER_OF_KVM_EXIT_TYPES] = { + [MMIO_EXITS] = "MMIO", + [DCR_EXITS] = "DCR", + [SIGNAL_EXITS] = "SIGNAL", + [ITLB_REAL_MISS_EXITS] = "ITLBREAL", + [ITLB_VIRT_MISS_EXITS] = "ITLBVIRT", + [DTLB_REAL_MISS_EXITS] = "DTLBREAL", + [DTLB_VIRT_MISS_EXITS] = "DTLBVIRT", + [SYSCALL_EXITS] = "SYSCALL", + [ISI_EXITS] = "ISI", + [DSI_EXITS] = "DSI", + [EMULATED_INST_EXITS] = "EMULINST", + [EMULATED_MTMSRWE_EXITS] = "EMUL_WAIT", + [EMULATED_WRTEE_EXITS] = "EMUL_WRTEE", + [EMULATED_MTSPR_EXITS] = "EMUL_MTSPR", + [EMULATED_MFSPR_EXITS] = "EMUL_MFSPR", + [EMULATED_MTMSR_EXITS] = "EMUL_MTMSR", + [EMULATED_MFMSR_EXITS] = "EMUL_MFMSR", + [EMULATED_TLBSX_EXITS] = "EMUL_TLBSX", + [EMULATED_TLBWE_EXITS] = "EMUL_TLBWE", + [EMULATED_RFI_EXITS] = "EMUL_RFI", + [DEC_EXITS] = "DEC", + [EXT_INTR_EXITS] = "EXTINT", + [HALT_WAKEUP] = "HALT", + [USR_PR_INST] = "USR_PR_INST", + [FP_UNAVAIL] = "FP_UNAVAIL", + [DEBUG_EXITS] = "DEBUG", + [TIMEINGUEST] = "TIMEINGUEST" +}; + +static int kvmppc_exit_timing_show(struct seq_file *m, void *private) +{ + struct kvm_vcpu *vcpu = m->private; + int i; + + seq_printf(m, "%s", "type count min max sum sum_squared\n"); + + for (i = 0; i < __NUMBER_OF_KVM_EXIT_TYPES; i++) { + seq_printf(m, "%12s %10d %10lld %10lld %20lld %20lld\n", + kvm_exit_names[i], + vcpu->arch.timing_count_type[i], + vcpu->arch.timing_min_duration[i], + vcpu->arch.timing_max_duration[i], + vcpu->arch.timing_sum_duration[i], + vcpu->arch.timing_sum_quad_duration[i]); + } + return 0; +} + +/* Write 'c' to clear the timing statistics. */ +static ssize_t kvmppc_exit_timing_write(struct file *file, + const char __user *user_buf, + size_t count, loff_t *ppos) +{ + int err = -EINVAL; + char c; + + if (count > 1) { + goto done; + } + + if (get_user(c, user_buf)) { + err = -EFAULT; + goto done; + } + + if (c == 'c') { + struct seq_file *seqf = (struct seq_file *)file->private_data; + struct kvm_vcpu *vcpu = seqf->private; + /* Write does not affect our buffers previously generated with + * show. seq_file is locked here to prevent races of init with + * a show call */ + mutex_lock(&seqf->lock); + kvmppc_init_timing_stats(vcpu); + mutex_unlock(&seqf->lock); + err = count; + } + +done: + return err; +} + +static int kvmppc_exit_timing_open(struct inode *inode, struct file *file) +{ + return single_open(file, kvmppc_exit_timing_show, inode->i_private); +} + +static struct file_operations kvmppc_exit_timing_fops = { + .owner = THIS_MODULE, + .open = kvmppc_exit_timing_open, + .read = seq_read, + .write = kvmppc_exit_timing_write, + .llseek = seq_lseek, + .release = single_release, +}; + +void kvmppc_create_vcpu_debugfs(struct kvm_vcpu *vcpu, unsigned int id) +{ + static char dbg_fname[50]; + struct dentry *debugfs_file; + + snprintf(dbg_fname, sizeof(dbg_fname), "vm%u_vcpu%u_timing", + current->pid, id); + debugfs_file = debugfs_create_file(dbg_fname, 0666, + kvm_debugfs_dir, vcpu, + &kvmppc_exit_timing_fops); + + if (!debugfs_file) { + printk(KERN_ERR"%s: error creating debugfs file %s\n", + __func__, dbg_fname); + return; + } + + vcpu->arch.debugfs_exit_timing = debugfs_file; +} + +void kvmppc_remove_vcpu_debugfs(struct kvm_vcpu *vcpu) +{ + if (vcpu->arch.debugfs_exit_timing) { + debugfs_remove(vcpu->arch.debugfs_exit_timing); + vcpu->arch.debugfs_exit_timing = NULL; + } +} diff --git a/arch/powerpc/kvm/timing.h b/arch/powerpc/kvm/timing.h new file mode 100644 index 00000000000..bb13b1f3cd5 --- /dev/null +++ b/arch/powerpc/kvm/timing.h @@ -0,0 +1,102 @@ +/* + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License, version 2, as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + * + * Copyright IBM Corp. 2008 + * + * Authors: Christian Ehrhardt <ehrhardt@linux.vnet.ibm.com> + */ + +#ifndef __POWERPC_KVM_EXITTIMING_H__ +#define __POWERPC_KVM_EXITTIMING_H__ + +#include <linux/kvm_host.h> +#include <asm/kvm_host.h> + +#ifdef CONFIG_KVM_EXIT_TIMING +void kvmppc_init_timing_stats(struct kvm_vcpu *vcpu); +void kvmppc_update_timing_stats(struct kvm_vcpu *vcpu); +void kvmppc_create_vcpu_debugfs(struct kvm_vcpu *vcpu, unsigned int id); +void kvmppc_remove_vcpu_debugfs(struct kvm_vcpu *vcpu); + +static inline void kvmppc_set_exit_type(struct kvm_vcpu *vcpu, int type) +{ + vcpu->arch.last_exit_type = type; +} + +#else +/* if exit timing is not configured there is no need to build the c file */ +static inline void kvmppc_init_timing_stats(struct kvm_vcpu *vcpu) {} +static inline void kvmppc_update_timing_stats(struct kvm_vcpu *vcpu) {} +static inline void kvmppc_create_vcpu_debugfs(struct kvm_vcpu *vcpu, + unsigned int id) {} +static inline void kvmppc_remove_vcpu_debugfs(struct kvm_vcpu *vcpu) {} +static inline void kvmppc_set_exit_type(struct kvm_vcpu *vcpu, int type) {} +#endif /* CONFIG_KVM_EXIT_TIMING */ + +/* account the exit in kvm_stats */ +static inline void kvmppc_account_exit_stat(struct kvm_vcpu *vcpu, int type) +{ + /* type has to be known at build time for optimization */ + BUILD_BUG_ON(__builtin_constant_p(type)); + switch (type) { + case EXT_INTR_EXITS: + vcpu->stat.ext_intr_exits++; + break; + case DEC_EXITS: + vcpu->stat.dec_exits++; + break; + case EMULATED_INST_EXITS: + vcpu->stat.emulated_inst_exits++; + break; + case DCR_EXITS: + vcpu->stat.dcr_exits++; + break; + case DSI_EXITS: + vcpu->stat.dsi_exits++; + break; + case ISI_EXITS: + vcpu->stat.isi_exits++; + break; + case SYSCALL_EXITS: + vcpu->stat.syscall_exits++; + break; + case DTLB_REAL_MISS_EXITS: + vcpu->stat.dtlb_real_miss_exits++; + break; + case DTLB_VIRT_MISS_EXITS: + vcpu->stat.dtlb_virt_miss_exits++; + break; + case MMIO_EXITS: + vcpu->stat.mmio_exits++; + break; + case ITLB_REAL_MISS_EXITS: + vcpu->stat.itlb_real_miss_exits++; + break; + case ITLB_VIRT_MISS_EXITS: + vcpu->stat.itlb_virt_miss_exits++; + break; + case SIGNAL_EXITS: + vcpu->stat.signal_exits++; + break; + } +} + +/* wrapper to set exit time and account for it in kvm_stats */ +static inline void kvmppc_account_exit(struct kvm_vcpu *vcpu, int type) +{ + kvmppc_set_exit_type(vcpu, type); + kvmppc_account_exit_stat(vcpu, type); +} + +#endif /* __POWERPC_KVM_EXITTIMING_H__ */ diff --git a/arch/powerpc/oprofile/cell/spu_task_sync.c b/arch/powerpc/oprofile/cell/spu_task_sync.c index 2949126d28d..6b793aeda72 100644 --- a/arch/powerpc/oprofile/cell/spu_task_sync.c +++ b/arch/powerpc/oprofile/cell/spu_task_sync.c @@ -297,7 +297,7 @@ static inline unsigned long fast_get_dcookie(struct path *path) { unsigned long cookie; - if (path->dentry->d_cookie) + if (path->dentry->d_flags & DCACHE_COOKIE) return (unsigned long)path->dentry; get_dcookie(path, &cookie); return cookie; diff --git a/arch/powerpc/platforms/cell/spu_priv1_mmio.c b/arch/powerpc/platforms/cell/spu_priv1_mmio.c index 906a0a2a9fe..1410443731e 100644 --- a/arch/powerpc/platforms/cell/spu_priv1_mmio.c +++ b/arch/powerpc/platforms/cell/spu_priv1_mmio.c @@ -80,10 +80,10 @@ static void cpu_affinity_set(struct spu *spu, int cpu) u64 route; if (nr_cpus_node(spu->node)) { - cpumask_t spumask = node_to_cpumask(spu->node); - cpumask_t cpumask = node_to_cpumask(cpu_to_node(cpu)); + const struct cpumask *spumask = cpumask_of_node(spu->node), + *cpumask = cpumask_of_node(cpu_to_node(cpu)); - if (!cpus_intersects(spumask, cpumask)) + if (!cpumask_intersects(spumask, cpumask)) return; } diff --git a/arch/powerpc/platforms/cell/spufs/inode.c b/arch/powerpc/platforms/cell/spufs/inode.c index 6296bfd9cb0..e309ef70a53 100644 --- a/arch/powerpc/platforms/cell/spufs/inode.c +++ b/arch/powerpc/platforms/cell/spufs/inode.c @@ -97,7 +97,6 @@ spufs_new_inode(struct super_block *sb, int mode) inode->i_mode = mode; inode->i_uid = current_fsuid(); inode->i_gid = current_fsgid(); - inode->i_blocks = 0; inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME; out: return inode; diff --git a/arch/powerpc/platforms/cell/spufs/sched.c b/arch/powerpc/platforms/cell/spufs/sched.c index 2ad914c4749..6a0ad196aeb 100644 --- a/arch/powerpc/platforms/cell/spufs/sched.c +++ b/arch/powerpc/platforms/cell/spufs/sched.c @@ -166,9 +166,9 @@ void spu_update_sched_info(struct spu_context *ctx) static int __node_allowed(struct spu_context *ctx, int node) { if (nr_cpus_node(node)) { - cpumask_t mask = node_to_cpumask(node); + const struct cpumask *mask = cpumask_of_node(node); - if (cpus_intersects(mask, ctx->cpus_allowed)) + if (cpumask_intersects(mask, &ctx->cpus_allowed)) return 1; } diff --git a/arch/powerpc/platforms/pseries/xics.c b/arch/powerpc/platforms/pseries/xics.c index f7a69021b7b..84e058f1e1c 100644 --- a/arch/powerpc/platforms/pseries/xics.c +++ b/arch/powerpc/platforms/pseries/xics.c @@ -332,7 +332,7 @@ static void xics_eoi_lpar(unsigned int virq) lpar_xirr_info_set((0xff << 24) | irq); } -static void xics_set_affinity(unsigned int virq, cpumask_t cpumask) +static void xics_set_affinity(unsigned int virq, const struct cpumask *cpumask) { unsigned int irq; int status; @@ -870,7 +870,7 @@ void xics_migrate_irqs_away(void) /* Reset affinity to all cpus */ irq_desc[virq].affinity = CPU_MASK_ALL; - desc->chip->set_affinity(virq, CPU_MASK_ALL); + desc->chip->set_affinity(virq, cpu_all_mask); unlock: spin_unlock_irqrestore(&desc->lock, flags); } diff --git a/arch/powerpc/sysdev/mpic.c b/arch/powerpc/sysdev/mpic.c index c82babb7007..3e0d89dcdba 100644 --- a/arch/powerpc/sysdev/mpic.c +++ b/arch/powerpc/sysdev/mpic.c @@ -806,7 +806,7 @@ static void mpic_end_ipi(unsigned int irq) #endif /* CONFIG_SMP */ -void mpic_set_affinity(unsigned int irq, cpumask_t cpumask) +void mpic_set_affinity(unsigned int irq, const struct cpumask *cpumask) { struct mpic *mpic = mpic_from_irq(irq); unsigned int src = mpic_irq_to_hw(irq); @@ -818,7 +818,7 @@ void mpic_set_affinity(unsigned int irq, cpumask_t cpumask) } else { cpumask_t tmp; - cpus_and(tmp, cpumask, cpu_online_map); + cpumask_and(&tmp, cpumask, cpu_online_mask); mpic_irq_write(src, MPIC_INFO(IRQ_DESTINATION), mpic_physmask(cpus_addr(tmp)[0])); diff --git a/arch/powerpc/sysdev/mpic.h b/arch/powerpc/sysdev/mpic.h index 6209c62a426..3cef2af10f4 100644 --- a/arch/powerpc/sysdev/mpic.h +++ b/arch/powerpc/sysdev/mpic.h @@ -36,6 +36,6 @@ static inline int mpic_pasemi_msi_init(struct mpic *mpic) extern int mpic_set_irq_type(unsigned int virq, unsigned int flow_type); extern void mpic_set_vector(unsigned int virq, unsigned int vector); -extern void mpic_set_affinity(unsigned int irq, cpumask_t cpumask); +extern void mpic_set_affinity(unsigned int irq, const struct cpumask *cpumask); #endif /* _POWERPC_SYSDEV_MPIC_H */ diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig index 8152fefc97b..19577aeffd7 100644 --- a/arch/s390/Kconfig +++ b/arch/s390/Kconfig @@ -83,6 +83,7 @@ config S390 select HAVE_KRETPROBES select HAVE_KVM if 64BIT select HAVE_ARCH_TRACEHOOK + select INIT_ALL_POSSIBLE source "init/Kconfig" diff --git a/arch/s390/hypfs/inode.c b/arch/s390/hypfs/inode.c index 9d4f8e6c080..5a805df216b 100644 --- a/arch/s390/hypfs/inode.c +++ b/arch/s390/hypfs/inode.c @@ -106,7 +106,6 @@ static struct inode *hypfs_make_inode(struct super_block *sb, int mode) ret->i_mode = mode; ret->i_uid = hypfs_info->uid; ret->i_gid = hypfs_info->gid; - ret->i_blocks = 0; ret->i_atime = ret->i_mtime = ret->i_ctime = CURRENT_TIME; if (mode & S_IFDIR) ret->i_nlink = 2; diff --git a/arch/s390/include/asm/cpu.h b/arch/s390/include/asm/cpu.h index e5a6a9ba3ad..d60a2eefb17 100644 --- a/arch/s390/include/asm/cpu.h +++ b/arch/s390/include/asm/cpu.h @@ -14,7 +14,6 @@ struct s390_idle_data { spinlock_t lock; - unsigned int in_idle; unsigned long long idle_count; unsigned long long idle_enter; unsigned long long idle_time; @@ -22,12 +21,12 @@ struct s390_idle_data { DECLARE_PER_CPU(struct s390_idle_data, s390_idle); -void s390_idle_leave(void); +void vtime_start_cpu(void); static inline void s390_idle_check(void) { - if ((&__get_cpu_var(s390_idle))->in_idle) - s390_idle_leave(); + if ((&__get_cpu_var(s390_idle))->idle_enter != 0ULL) + vtime_start_cpu(); } #endif /* _ASM_S390_CPU_H_ */ diff --git a/arch/s390/include/asm/cputime.h b/arch/s390/include/asm/cputime.h index 133ce054fc8..521726430af 100644 --- a/arch/s390/include/asm/cputime.h +++ b/arch/s390/include/asm/cputime.h @@ -11,7 +11,7 @@ #include <asm/div64.h> -/* We want to use micro-second resolution. */ +/* We want to use full resolution of the CPU timer: 2**-12 micro-seconds. */ typedef unsigned long long cputime_t; typedef unsigned long long cputime64_t; @@ -53,9 +53,9 @@ __div(unsigned long long n, unsigned int base) #define cputime_ge(__a, __b) ((__a) >= (__b)) #define cputime_lt(__a, __b) ((__a) < (__b)) #define cputime_le(__a, __b) ((__a) <= (__b)) -#define cputime_to_jiffies(__ct) (__div((__ct), 1000000 / HZ)) +#define cputime_to_jiffies(__ct) (__div((__ct), 4096000000ULL / HZ)) #define cputime_to_scaled(__ct) (__ct) -#define jiffies_to_cputime(__hz) ((cputime_t)(__hz) * (1000000 / HZ)) +#define jiffies_to_cputime(__hz) ((cputime_t)(__hz) * (4096000000ULL / HZ)) #define cputime64_zero (0ULL) #define cputime64_add(__a, __b) ((__a) + (__b)) @@ -64,7 +64,7 @@ __div(unsigned long long n, unsigned int base) static inline u64 cputime64_to_jiffies64(cputime64_t cputime) { - do_div(cputime, 1000000 / HZ); + do_div(cputime, 4096000000ULL / HZ); return cputime; } @@ -74,13 +74,13 @@ cputime64_to_jiffies64(cputime64_t cputime) static inline unsigned int cputime_to_msecs(const cputime_t cputime) { - return __div(cputime, 1000); + return __div(cputime, 4096000); } static inline cputime_t msecs_to_cputime(const unsigned int m) { - return (cputime_t) m * 1000; + return (cputime_t) m * 4096000; } /* @@ -89,13 +89,13 @@ msecs_to_cputime(const unsigned int m) static inline unsigned int cputime_to_secs(const cputime_t cputime) { - return __div(cputime, 1000000); + return __div(cputime, 2048000000) >> 1; } static inline cputime_t secs_to_cputime(const unsigned int s) { - return (cputime_t) s * 1000000; + return (cputime_t) s * 4096000000ULL; } /* @@ -104,7 +104,7 @@ secs_to_cputime(const unsigned int s) static inline cputime_t timespec_to_cputime(const struct timespec *value) { - return value->tv_nsec / 1000 + (u64) value->tv_sec * 1000000; + return value->tv_nsec * 4096 / 1000 + (u64) value->tv_sec * 4096000000ULL; } static inline void @@ -114,12 +114,12 @@ cputime_to_timespec(const cputime_t cputime, struct timespec *value) register_pair rp; rp.pair = cputime >> 1; - asm ("dr %0,%1" : "+d" (rp) : "d" (1000000 >> 1)); - value->tv_nsec = rp.subreg.even * 1000; + asm ("dr %0,%1" : "+d" (rp) : "d" (2048000000UL)); + value->tv_nsec = rp.subreg.even * 1000 / 4096; value->tv_sec = rp.subreg.odd; #else - value->tv_nsec = (cputime % 1000000) * 1000; - value->tv_sec = cputime / 1000000; + value->tv_nsec = (cputime % 4096000000ULL) * 1000 / 4096; + value->tv_sec = cputime / 4096000000ULL; #endif } @@ -131,7 +131,7 @@ cputime_to_timespec(const cputime_t cputime, struct timespec *value) static inline cputime_t timeval_to_cputime(const struct timeval *value) { - return value->tv_usec + (u64) value->tv_sec * 1000000; + return value->tv_usec * 4096 + (u64) value->tv_sec * 4096000000ULL; } static inline void @@ -141,12 +141,12 @@ cputime_to_timeval(const cputime_t cputime, struct timeval *value) register_pair rp; rp.pair = cputime >> 1; - asm ("dr %0,%1" : "+d" (rp) : "d" (1000000 >> 1)); - value->tv_usec = rp.subreg.even; + asm ("dr %0,%1" : "+d" (rp) : "d" (2048000000UL)); + value->tv_usec = rp.subreg.even / 4096; value->tv_sec = rp.subreg.odd; #else - value->tv_usec = cputime % 1000000; - value->tv_sec = cputime / 1000000; + value->tv_usec = cputime % 4096000000ULL; + value->tv_sec = cputime / 4096000000ULL; #endif } @@ -156,13 +156,13 @@ cputime_to_timeval(const cputime_t cputime, struct timeval *value) static inline clock_t cputime_to_clock_t(cputime_t cputime) { - return __div(cputime, 1000000 / USER_HZ); + return __div(cputime, 4096000000ULL / USER_HZ); } static inline cputime_t clock_t_to_cputime(unsigned long x) { - return (cputime_t) x * (1000000 / USER_HZ); + return (cputime_t) x * (4096000000ULL / USER_HZ); } /* @@ -171,7 +171,7 @@ clock_t_to_cputime(unsigned long x) static inline clock_t cputime64_to_clock_t(cputime64_t cputime) { - return __div(cputime, 1000000 / USER_HZ); + return __div(cputime, 4096000000ULL / USER_HZ); } #endif /* _S390_CPUTIME_H */ diff --git a/arch/s390/include/asm/lowcore.h b/arch/s390/include/asm/lowcore.h index 0bc51d52a89..ffdef5fe858 100644 --- a/arch/s390/include/asm/lowcore.h +++ b/arch/s390/include/asm/lowcore.h @@ -67,11 +67,11 @@ #define __LC_SYNC_ENTER_TIMER 0x248 #define __LC_ASYNC_ENTER_TIMER 0x250 #define __LC_EXIT_TIMER 0x258 -#define __LC_LAST_UPDATE_TIMER 0x260 -#define __LC_USER_TIMER 0x268 -#define __LC_SYSTEM_TIMER 0x270 -#define __LC_LAST_UPDATE_CLOCK 0x278 -#define __LC_STEAL_CLOCK 0x280 +#define __LC_USER_TIMER 0x260 +#define __LC_SYSTEM_TIMER 0x268 +#define __LC_STEAL_TIMER 0x270 +#define __LC_LAST_UPDATE_TIMER 0x278 +#define __LC_LAST_UPDATE_CLOCK 0x280 #define __LC_RETURN_MCCK_PSW 0x288 #define __LC_KERNEL_STACK 0xC40 #define __LC_THREAD_INFO 0xC44 @@ -89,11 +89,11 @@ #define __LC_SYNC_ENTER_TIMER 0x250 #define __LC_ASYNC_ENTER_TIMER 0x258 #define __LC_EXIT_TIMER 0x260 -#define __LC_LAST_UPDATE_TIMER 0x268 -#define __LC_USER_TIMER 0x270 -#define __LC_SYSTEM_TIMER 0x278 -#define __LC_LAST_UPDATE_CLOCK 0x280 -#define __LC_STEAL_CLOCK 0x288 +#define __LC_USER_TIMER 0x268 +#define __LC_SYSTEM_TIMER 0x270 +#define __LC_STEAL_TIMER 0x278 +#define __LC_LAST_UPDATE_TIMER 0x280 +#define __LC_LAST_UPDATE_CLOCK 0x288 #define __LC_RETURN_MCCK_PSW 0x290 #define __LC_KERNEL_STACK 0xD40 #define __LC_THREAD_INFO 0xD48 @@ -106,8 +106,10 @@ #define __LC_IPLDEV 0xDB8 #define __LC_CURRENT 0xDD8 #define __LC_INT_CLOCK 0xDE8 +#define __LC_VDSO_PER_CPU 0xE38 #endif /* __s390x__ */ +#define __LC_PASTE 0xE40 #define __LC_PANIC_MAGIC 0xE00 #ifndef __s390x__ @@ -252,11 +254,11 @@ struct _lowcore __u64 sync_enter_timer; /* 0x248 */ __u64 async_enter_timer; /* 0x250 */ __u64 exit_timer; /* 0x258 */ - __u64 last_update_timer; /* 0x260 */ - __u64 user_timer; /* 0x268 */ - __u64 system_timer; /* 0x270 */ - __u64 last_update_clock; /* 0x278 */ - __u64 steal_clock; /* 0x280 */ + __u64 user_timer; /* 0x260 */ + __u64 system_timer; /* 0x268 */ + __u64 steal_timer; /* 0x270 */ + __u64 last_update_timer; /* 0x278 */ + __u64 last_update_clock; /* 0x280 */ psw_t return_mcck_psw; /* 0x288 */ __u8 pad8[0xc00-0x290]; /* 0x290 */ @@ -343,11 +345,11 @@ struct _lowcore __u64 sync_enter_timer; /* 0x250 */ __u64 async_enter_timer; /* 0x258 */ __u64 exit_timer; /* 0x260 */ - __u64 last_update_timer; /* 0x268 */ - __u64 user_timer; /* 0x270 */ - __u64 system_timer; /* 0x278 */ - __u64 last_update_clock; /* 0x280 */ - __u64 steal_clock; /* 0x288 */ + __u64 user_timer; /* 0x268 */ + __u64 system_timer; /* 0x270 */ + __u64 steal_timer; /* 0x278 */ + __u64 last_update_timer; /* 0x280 */ + __u64 last_update_clock; /* 0x288 */ psw_t return_mcck_psw; /* 0x290 */ __u8 pad8[0xc00-0x2a0]; /* 0x2a0 */ /* System info area */ @@ -381,7 +383,12 @@ struct _lowcore /* whether the kernel died with panic() or not */ __u32 panic_magic; /* 0xe00 */ - __u8 pad13[0x11b8-0xe04]; /* 0xe04 */ + /* Per cpu primary space access list */ + __u8 pad_0xe04[0xe3c-0xe04]; /* 0xe04 */ + __u32 vdso_per_cpu_data; /* 0xe3c */ + __u32 paste[16]; /* 0xe40 */ + + __u8 pad13[0x11b8-0xe80]; /* 0xe80 */ /* 64 bit extparam used for pfault, diag 250 etc */ __u64 ext_params2; /* 0x11B8 */ diff --git a/arch/s390/include/asm/system.h b/arch/s390/include/asm/system.h index 024ef42ed6d..3a8b26eb1f2 100644 --- a/arch/s390/include/asm/system.h +++ b/arch/s390/include/asm/system.h @@ -99,7 +99,7 @@ static inline void restore_access_regs(unsigned int *acrs) prev = __switch_to(prev,next); \ } while (0) -extern void account_vtime(struct task_struct *); +extern void account_vtime(struct task_struct *, struct task_struct *); extern void account_tick_vtime(struct task_struct *); extern void account_system_vtime(struct task_struct *); @@ -121,7 +121,7 @@ static inline void cmma_init(void) { } #define finish_arch_switch(prev) do { \ set_fs(current->thread.mm_segment); \ - account_vtime(prev); \ + account_vtime(prev, current); \ } while (0) #define nop() asm volatile("nop") diff --git a/arch/s390/include/asm/thread_info.h b/arch/s390/include/asm/thread_info.h index c1eaf9604da..c544aa52453 100644 --- a/arch/s390/include/asm/thread_info.h +++ b/arch/s390/include/asm/thread_info.h @@ -47,6 +47,8 @@ struct thread_info { unsigned int cpu; /* current CPU */ int preempt_count; /* 0 => preemptable, <0 => BUG */ struct restart_block restart_block; + __u64 user_timer; + __u64 system_timer; }; /* diff --git a/arch/s390/include/asm/timer.h b/arch/s390/include/asm/timer.h index 61705d60f99..e4bcab739c1 100644 --- a/arch/s390/include/asm/timer.h +++ b/arch/s390/include/asm/timer.h @@ -23,20 +23,18 @@ struct vtimer_list { __u64 expires; __u64 interval; - spinlock_t lock; - unsigned long magic; - void (*function)(unsigned long); unsigned long data; }; -/* the offset value will wrap after ca. 71 years */ +/* the vtimer value will wrap after ca. 71 years */ struct vtimer_queue { struct list_head list; spinlock_t lock; - __u64 to_expire; /* current event expire time */ - __u64 offset; /* list offset to zero */ - __u64 idle; /* temp var for idle */ + __u64 timer; /* last programmed timer */ + __u64 elapsed; /* elapsed time of timer expire values */ + __u64 idle; /* temp var for idle */ + int do_spt; /* =1: reprogram cpu timer in idle */ }; extern void init_virt_timer(struct vtimer_list *timer); @@ -48,8 +46,8 @@ extern int del_virt_timer(struct vtimer_list *timer); extern void init_cpu_vtimer(void); extern void vtime_init(void); -extern void vtime_start_cpu_timer(void); -extern void vtime_stop_cpu_timer(void); +extern void vtime_stop_cpu(void); +extern void vtime_start_leave(void); #endif /* __KERNEL__ */ diff --git a/arch/s390/include/asm/topology.h b/arch/s390/include/asm/topology.h index d96c9164345..c93eb50e1d0 100644 --- a/arch/s390/include/asm/topology.h +++ b/arch/s390/include/asm/topology.h @@ -6,10 +6,12 @@ #define mc_capable() (1) cpumask_t cpu_coregroup_map(unsigned int cpu); +const struct cpumask *cpu_coregroup_mask(unsigned int cpu); extern cpumask_t cpu_core_map[NR_CPUS]; #define topology_core_siblings(cpu) (cpu_core_map[cpu]) +#define topology_core_cpumask(cpu) (&cpu_core_map[cpu]) int topology_set_cpu_management(int fc); void topology_schedule_update(void); diff --git a/arch/s390/include/asm/vdso.h b/arch/s390/include/asm/vdso.h index a44f4fe16a3..7bdd7c8ebc9 100644 --- a/arch/s390/include/asm/vdso.h +++ b/arch/s390/include/asm/vdso.h @@ -12,9 +12,9 @@ #ifndef __ASSEMBLY__ /* - * Note about this structure: + * Note about the vdso_data and vdso_per_cpu_data structures: * - * NEVER USE THIS IN USERSPACE CODE DIRECTLY. The layout of this + * NEVER USE THEM IN USERSPACE CODE DIRECTLY. The layout of the * structure is supposed to be known only to the function in the vdso * itself and may change without notice. */ @@ -28,10 +28,21 @@ struct vdso_data { __u64 wtom_clock_nsec; /* 0x28 */ __u32 tz_minuteswest; /* Minutes west of Greenwich 0x30 */ __u32 tz_dsttime; /* Type of dst correction 0x34 */ + __u32 ectg_available; +}; + +struct vdso_per_cpu_data { + __u64 ectg_timer_base; + __u64 ectg_user_time; }; extern struct vdso_data *vdso_data; +#ifdef CONFIG_64BIT +int vdso_alloc_per_cpu(int cpu, struct _lowcore *lowcore); +void vdso_free_per_cpu(int cpu, struct _lowcore *lowcore); +#endif + #endif /* __ASSEMBLY__ */ #endif /* __KERNEL__ */ diff --git a/arch/s390/kernel/asm-offsets.c b/arch/s390/kernel/asm-offsets.c index e641f60bac9..67a60016bab 100644 --- a/arch/s390/kernel/asm-offsets.c +++ b/arch/s390/kernel/asm-offsets.c @@ -48,6 +48,11 @@ int main(void) DEFINE(__VDSO_WTOM_SEC, offsetof(struct vdso_data, wtom_clock_sec)); DEFINE(__VDSO_WTOM_NSEC, offsetof(struct vdso_data, wtom_clock_nsec)); DEFINE(__VDSO_TIMEZONE, offsetof(struct vdso_data, tz_minuteswest)); + DEFINE(__VDSO_ECTG_OK, offsetof(struct vdso_data, ectg_available)); + DEFINE(__VDSO_ECTG_BASE, + offsetof(struct vdso_per_cpu_data, ectg_timer_base)); + DEFINE(__VDSO_ECTG_USER, + offsetof(struct vdso_per_cpu_data, ectg_user_time)); /* constants used by the vdso */ DEFINE(CLOCK_REALTIME, CLOCK_REALTIME); DEFINE(CLOCK_MONOTONIC, CLOCK_MONOTONIC); diff --git a/arch/s390/kernel/entry.S b/arch/s390/kernel/entry.S index 55de521aef7..1268aa2991b 100644 --- a/arch/s390/kernel/entry.S +++ b/arch/s390/kernel/entry.S @@ -583,8 +583,8 @@ kernel_per: .globl io_int_handler io_int_handler: - stpt __LC_ASYNC_ENTER_TIMER stck __LC_INT_CLOCK + stpt __LC_ASYNC_ENTER_TIMER SAVE_ALL_BASE __LC_SAVE_AREA+16 SAVE_ALL_ASYNC __LC_IO_OLD_PSW,__LC_SAVE_AREA+16 CREATE_STACK_FRAME __LC_IO_OLD_PSW,__LC_SAVE_AREA+16 @@ -723,8 +723,8 @@ io_notify_resume: .globl ext_int_handler ext_int_handler: - stpt __LC_ASYNC_ENTER_TIMER stck __LC_INT_CLOCK + stpt __LC_ASYNC_ENTER_TIMER SAVE_ALL_BASE __LC_SAVE_AREA+16 SAVE_ALL_ASYNC __LC_EXT_OLD_PSW,__LC_SAVE_AREA+16 CREATE_STACK_FRAME __LC_EXT_OLD_PSW,__LC_SAVE_AREA+16 @@ -750,6 +750,7 @@ __critical_end: .globl mcck_int_handler mcck_int_handler: + stck __LC_INT_CLOCK spt __LC_CPU_TIMER_SAVE_AREA # revalidate cpu timer lm %r0,%r15,__LC_GPREGS_SAVE_AREA # revalidate gprs SAVE_ALL_BASE __LC_SAVE_AREA+32 diff --git a/arch/s390/kernel/entry64.S b/arch/s390/kernel/entry64.S index 16bb4fd1a40..c6fbde13971 100644 --- a/arch/s390/kernel/entry64.S +++ b/arch/s390/kernel/entry64.S @@ -177,8 +177,11 @@ _TIF_WORK_INT = (_TIF_SIGPENDING | _TIF_NOTIFY_RESUME | _TIF_NEED_RESCHED | \ .if !\sync ni \psworg+1,0xfd # clear wait state bit .endif - lmg %r0,%r15,SP_R0(%r15) # load gprs 0-15 of user + lg %r14,__LC_VDSO_PER_CPU + lmg %r0,%r13,SP_R0(%r15) # load gprs 0-13 of user stpt __LC_EXIT_TIMER + mvc __VDSO_ECTG_BASE(16,%r14),__LC_EXIT_TIMER + lmg %r14,%r15,SP_R14(%r15) # load grps 14-15 of user lpswe \psworg # back to caller .endm @@ -559,8 +562,8 @@ kernel_per: */ .globl io_int_handler io_int_handler: - stpt __LC_ASYNC_ENTER_TIMER stck __LC_INT_CLOCK + stpt __LC_ASYNC_ENTER_TIMER SAVE_ALL_BASE __LC_SAVE_AREA+32 SAVE_ALL_ASYNC __LC_IO_OLD_PSW,__LC_SAVE_AREA+32 CREATE_STACK_FRAME __LC_IO_OLD_PSW,__LC_SAVE_AREA+32 @@ -721,8 +724,8 @@ io_notify_resume: */ .globl ext_int_handler ext_int_handler: - stpt __LC_ASYNC_ENTER_TIMER stck __LC_INT_CLOCK + stpt __LC_ASYNC_ENTER_TIMER SAVE_ALL_BASE __LC_SAVE_AREA+32 SAVE_ALL_ASYNC __LC_EXT_OLD_PSW,__LC_SAVE_AREA+32 CREATE_STACK_FRAME __LC_EXT_OLD_PSW,__LC_SAVE_AREA+32 @@ -746,6 +749,7 @@ __critical_end: */ .globl mcck_int_handler mcck_int_handler: + stck __LC_INT_CLOCK la %r1,4095 # revalidate r1 spt __LC_CPU_TIMER_SAVE_AREA-4095(%r1) # revalidate cpu timer lmg %r0,%r15,__LC_GPREGS_SAVE_AREA-4095(%r1)# revalidate gprs @@ -979,23 +983,23 @@ cleanup_sysc_return: cleanup_sysc_leave: clc 8(8,%r12),BASED(cleanup_sysc_leave_insn) - je 2f - mvc __LC_EXIT_TIMER(8),__LC_ASYNC_ENTER_TIMER + je 3f clc 8(8,%r12),BASED(cleanup_sysc_leave_insn+8) - je 2f - mvc __LC_RETURN_PSW(16),SP_PSW(%r15) + jhe 0f + mvc __LC_EXIT_TIMER(8),__LC_ASYNC_ENTER_TIMER +0: mvc __LC_RETURN_PSW(16),SP_PSW(%r15) cghi %r12,__LC_MCK_OLD_PSW - jne 0f + jne 1f mvc __LC_SAVE_AREA+64(32),SP_R12(%r15) - j 1f -0: mvc __LC_SAVE_AREA+32(32),SP_R12(%r15) -1: lmg %r0,%r11,SP_R0(%r15) + j 2f +1: mvc __LC_SAVE_AREA+32(32),SP_R12(%r15) +2: lmg %r0,%r11,SP_R0(%r15) lg %r15,SP_R15(%r15) -2: la %r12,__LC_RETURN_PSW +3: la %r12,__LC_RETURN_PSW br %r14 cleanup_sysc_leave_insn: .quad sysc_done - 4 - .quad sysc_done - 8 + .quad sysc_done - 16 cleanup_io_return: mvc __LC_RETURN_PSW(8),0(%r12) @@ -1005,23 +1009,23 @@ cleanup_io_return: cleanup_io_leave: clc 8(8,%r12),BASED(cleanup_io_leave_insn) - je 2f - mvc __LC_EXIT_TIMER(8),__LC_ASYNC_ENTER_TIMER + je 3f clc 8(8,%r12),BASED(cleanup_io_leave_insn+8) - je 2f - mvc __LC_RETURN_PSW(16),SP_PSW(%r15) + jhe 0f + mvc __LC_EXIT_TIMER(8),__LC_ASYNC_ENTER_TIMER +0: mvc __LC_RETURN_PSW(16),SP_PSW(%r15) cghi %r12,__LC_MCK_OLD_PSW - jne 0f + jne 1f mvc __LC_SAVE_AREA+64(32),SP_R12(%r15) - j 1f -0: mvc __LC_SAVE_AREA+32(32),SP_R12(%r15) -1: lmg %r0,%r11,SP_R0(%r15) + j 2f +1: mvc __LC_SAVE_AREA+32(32),SP_R12(%r15) +2: lmg %r0,%r11,SP_R0(%r15) lg %r15,SP_R15(%r15) -2: la %r12,__LC_RETURN_PSW +3: la %r12,__LC_RETURN_PSW br %r14 cleanup_io_leave_insn: .quad io_done - 4 - .quad io_done - 8 + .quad io_done - 16 /* * Integer constants diff --git a/arch/s390/kernel/head64.S b/arch/s390/kernel/head64.S index 3ccd36b24b8..f9f70aa1524 100644 --- a/arch/s390/kernel/head64.S +++ b/arch/s390/kernel/head64.S @@ -87,6 +87,8 @@ startup_continue: lg %r12,.Lparmaddr-.LPG1(%r13) # pointer to parameter area # move IPL device to lowcore mvc __LC_IPLDEV(4),IPL_DEVICE+4-PARMAREA(%r12) + lghi %r0,__LC_PASTE + stg %r0,__LC_VDSO_PER_CPU # # Setup stack # diff --git a/arch/s390/kernel/init_task.c b/arch/s390/kernel/init_task.c index e8071684361..7db95c0b869 100644 --- a/arch/s390/kernel/init_task.c +++ b/arch/s390/kernel/init_task.c @@ -16,7 +16,6 @@ #include <asm/uaccess.h> #include <asm/pgtable.h> -static struct fs_struct init_fs = INIT_FS; static struct signal_struct init_signals = INIT_SIGNALS(init_signals); static struct sighand_struct init_sighand = INIT_SIGHAND(init_sighand); struct mm_struct init_mm = INIT_MM(init_mm); diff --git a/arch/s390/kernel/process.c b/arch/s390/kernel/process.c index 04f8c67a610..b6110bdf8dc 100644 --- a/arch/s390/kernel/process.c +++ b/arch/s390/kernel/process.c @@ -38,6 +38,7 @@ #include <linux/utsname.h> #include <linux/tick.h> #include <linux/elfcore.h> +#include <linux/kernel_stat.h> #include <asm/uaccess.h> #include <asm/pgtable.h> #include <asm/system.h> @@ -45,7 +46,6 @@ #include <asm/processor.h> #include <asm/irq.h> #include <asm/timer.h> -#include <asm/cpu.h> #include "entry.h" asmlinkage void ret_from_fork(void) asm ("ret_from_fork"); @@ -75,36 +75,6 @@ unsigned long thread_saved_pc(struct task_struct *tsk) return sf->gprs[8]; } -DEFINE_PER_CPU(struct s390_idle_data, s390_idle) = { - .lock = __SPIN_LOCK_UNLOCKED(s390_idle.lock) -}; - -static int s390_idle_enter(void) -{ - struct s390_idle_data *idle; - - idle = &__get_cpu_var(s390_idle); - spin_lock(&idle->lock); - idle->idle_count++; - idle->in_idle = 1; - idle->idle_enter = get_clock(); - spin_unlock(&idle->lock); - vtime_stop_cpu_timer(); - return NOTIFY_OK; -} - -void s390_idle_leave(void) -{ - struct s390_idle_data *idle; - - vtime_start_cpu_timer(); - idle = &__get_cpu_var(s390_idle); - spin_lock(&idle->lock); - idle->idle_time += get_clock() - idle->idle_enter; - idle->in_idle = 0; - spin_unlock(&idle->lock); -} - extern void s390_handle_mcck(void); /* * The idle loop on a S390... @@ -117,10 +87,6 @@ static void default_idle(void) local_irq_enable(); return; } - if (s390_idle_enter() == NOTIFY_BAD) { - local_irq_enable(); - return; - } #ifdef CONFIG_HOTPLUG_CPU if (cpu_is_offline(smp_processor_id())) { preempt_enable_no_resched(); @@ -130,7 +96,6 @@ static void default_idle(void) local_mcck_disable(); if (test_thread_flag(TIF_MCCK_PENDING)) { local_mcck_enable(); - s390_idle_leave(); local_irq_enable(); s390_handle_mcck(); return; @@ -138,9 +103,9 @@ static void default_idle(void) trace_hardirqs_on(); /* Don't trace preempt off for idle. */ stop_critical_timings(); - /* Wait for external, I/O or machine check interrupt. */ - __load_psw_mask(psw_kernel_bits | PSW_MASK_WAIT | - PSW_MASK_IO | PSW_MASK_EXT); + /* Stop virtual timer and halt the cpu. */ + vtime_stop_cpu(); + /* Reenable preemption tracer. */ start_critical_timings(); } diff --git a/arch/s390/kernel/s390_ext.c b/arch/s390/kernel/s390_ext.c index e019b419efc..a0d2d55d7fb 100644 --- a/arch/s390/kernel/s390_ext.c +++ b/arch/s390/kernel/s390_ext.c @@ -119,8 +119,8 @@ void do_extint(struct pt_regs *regs, unsigned short code) struct pt_regs *old_regs; old_regs = set_irq_regs(regs); - irq_enter(); s390_idle_check(); + irq_enter(); if (S390_lowcore.int_clock >= S390_lowcore.clock_comparator) /* Serve timer interrupts first. */ clock_comparator_work(); diff --git a/arch/s390/kernel/setup.c b/arch/s390/kernel/setup.c index b7a1efd5522..d825f4950e4 100644 --- a/arch/s390/kernel/setup.c +++ b/arch/s390/kernel/setup.c @@ -427,6 +427,8 @@ setup_lowcore(void) /* enable extended save area */ __ctl_set_bit(14, 29); } +#else + lc->vdso_per_cpu_data = (unsigned long) &lc->paste[0]; #endif set_prefix((u32)(unsigned long) lc); } diff --git a/arch/s390/kernel/smp.c b/arch/s390/kernel/smp.c index 6fc78541dc5..9c0ccb532a4 100644 --- a/arch/s390/kernel/smp.c +++ b/arch/s390/kernel/smp.c @@ -47,6 +47,7 @@ #include <asm/lowcore.h> #include <asm/sclp.h> #include <asm/cpu.h> +#include <asm/vdso.h> #include "entry.h" /* @@ -55,12 +56,6 @@ struct _lowcore *lowcore_ptr[NR_CPUS]; EXPORT_SYMBOL(lowcore_ptr); -cpumask_t cpu_online_map = CPU_MASK_NONE; -EXPORT_SYMBOL(cpu_online_map); - -cpumask_t cpu_possible_map = CPU_MASK_ALL; -EXPORT_SYMBOL(cpu_possible_map); - static struct task_struct *current_set[NR_CPUS]; static u8 smp_cpu_type; @@ -506,6 +501,9 @@ static int __cpuinit smp_alloc_lowcore(int cpu) goto out; lowcore->extended_save_area_addr = (u32) save_area; } +#else + if (vdso_alloc_per_cpu(cpu, lowcore)) + goto out; #endif lowcore_ptr[cpu] = lowcore; return 0; @@ -528,6 +526,8 @@ static void smp_free_lowcore(int cpu) #ifndef CONFIG_64BIT if (MACHINE_HAS_IEEE) free_page((unsigned long) lowcore->extended_save_area_addr); +#else + vdso_free_per_cpu(cpu, lowcore); #endif free_page(lowcore->panic_stack - PAGE_SIZE); free_pages(lowcore->async_stack - ASYNC_SIZE, ASYNC_ORDER); @@ -670,6 +670,7 @@ void __init smp_prepare_cpus(unsigned int max_cpus) lowcore = (void *) __get_free_pages(GFP_KERNEL | GFP_DMA, lc_order); panic_stack = __get_free_page(GFP_KERNEL); async_stack = __get_free_pages(GFP_KERNEL, ASYNC_ORDER); + BUG_ON(!lowcore || !panic_stack || !async_stack); #ifndef CONFIG_64BIT if (MACHINE_HAS_IEEE) save_area = get_zeroed_page(GFP_KERNEL); @@ -683,6 +684,8 @@ void __init smp_prepare_cpus(unsigned int max_cpus) #ifndef CONFIG_64BIT if (MACHINE_HAS_IEEE) lowcore->extended_save_area_addr = (u32) save_area; +#else + BUG_ON(vdso_alloc_per_cpu(smp_processor_id(), lowcore)); #endif set_prefix((u32)(unsigned long) lowcore); local_mcck_enable(); @@ -851,9 +854,11 @@ static ssize_t show_idle_count(struct sys_device *dev, unsigned long long idle_count; idle = &per_cpu(s390_idle, dev->id); - spin_lock_irq(&idle->lock); + spin_lock(&idle->lock); idle_count = idle->idle_count; - spin_unlock_irq(&idle->lock); + if (idle->idle_enter) + idle_count++; + spin_unlock(&idle->lock); return sprintf(buf, "%llu\n", idle_count); } static SYSDEV_ATTR(idle_count, 0444, show_idle_count, NULL); @@ -862,18 +867,17 @@ static ssize_t show_idle_time(struct sys_device *dev, struct sysdev_attribute *attr, char *buf) { struct s390_idle_data *idle; - unsigned long long new_time; + unsigned long long now, idle_time, idle_enter; idle = &per_cpu(s390_idle, dev->id); - spin_lock_irq(&idle->lock); - if (idle->in_idle) { - new_time = get_clock(); - idle->idle_time += new_time - idle->idle_enter; - idle->idle_enter = new_time; - } - new_time = idle->idle_time; - spin_unlock_irq(&idle->lock); - return sprintf(buf, "%llu\n", new_time >> 12); + spin_lock(&idle->lock); + now = get_clock(); + idle_time = idle->idle_time; + idle_enter = idle->idle_enter; + if (idle_enter != 0ULL && idle_enter < now) + idle_time += now - idle_enter; + spin_unlock(&idle->lock); + return sprintf(buf, "%llu\n", idle_time >> 12); } static SYSDEV_ATTR(idle_time_us, 0444, show_idle_time, NULL); diff --git a/arch/s390/kernel/time.c b/arch/s390/kernel/time.c index 5be981a36c3..d649600df5b 100644 --- a/arch/s390/kernel/time.c +++ b/arch/s390/kernel/time.c @@ -160,7 +160,7 @@ void init_cpu_timer(void) cd->min_delta_ns = 1; cd->max_delta_ns = LONG_MAX; cd->rating = 400; - cd->cpumask = cpumask_of_cpu(cpu); + cd->cpumask = cpumask_of(cpu); cd->set_next_event = s390_next_event; cd->set_mode = s390_set_mode; diff --git a/arch/s390/kernel/topology.c b/arch/s390/kernel/topology.c index 90e9ba11eba..cc362c9ea8f 100644 --- a/arch/s390/kernel/topology.c +++ b/arch/s390/kernel/topology.c @@ -97,6 +97,11 @@ cpumask_t cpu_coregroup_map(unsigned int cpu) return mask; } +const struct cpumask *cpu_coregroup_mask(unsigned int cpu) +{ + return &cpu_core_map[cpu]; +} + static void add_cpus_to_core(struct tl_cpu *tl_cpu, struct core_info *core) { unsigned int cpu; diff --git a/arch/s390/kernel/vdso.c b/arch/s390/kernel/vdso.c index 10a6ccef441..25a6a82f1c0 100644 --- a/arch/s390/kernel/vdso.c +++ b/arch/s390/kernel/vdso.c @@ -31,9 +31,6 @@ #include <asm/sections.h> #include <asm/vdso.h> -/* Max supported size for symbol names */ -#define MAX_SYMNAME 64 - #if defined(CONFIG_32BIT) || defined(CONFIG_COMPAT) extern char vdso32_start, vdso32_end; static void *vdso32_kbase = &vdso32_start; @@ -71,6 +68,119 @@ static union { struct vdso_data *vdso_data = &vdso_data_store.data; /* + * Setup vdso data page. + */ +static void vdso_init_data(struct vdso_data *vd) +{ + unsigned int facility_list; + + facility_list = stfl(); + vd->ectg_available = switch_amode && (facility_list & 1); +} + +#ifdef CONFIG_64BIT +/* + * Setup per cpu vdso data page. + */ +static void vdso_init_per_cpu_data(int cpu, struct vdso_per_cpu_data *vpcd) +{ +} + +/* + * Allocate/free per cpu vdso data. + */ +#ifdef CONFIG_64BIT +#define SEGMENT_ORDER 2 +#else +#define SEGMENT_ORDER 1 +#endif + +int vdso_alloc_per_cpu(int cpu, struct _lowcore *lowcore) +{ + unsigned long segment_table, page_table, page_frame; + u32 *psal, *aste; + int i; + + lowcore->vdso_per_cpu_data = __LC_PASTE; + + if (!switch_amode || !vdso_enabled) + return 0; + + segment_table = __get_free_pages(GFP_KERNEL, SEGMENT_ORDER); + page_table = get_zeroed_page(GFP_KERNEL | GFP_DMA); + page_frame = get_zeroed_page(GFP_KERNEL); + if (!segment_table || !page_table || !page_frame) + goto out; + + clear_table((unsigned long *) segment_table, _SEGMENT_ENTRY_EMPTY, + PAGE_SIZE << SEGMENT_ORDER); + clear_table((unsigned long *) page_table, _PAGE_TYPE_EMPTY, + 256*sizeof(unsigned long)); + + *(unsigned long *) segment_table = _SEGMENT_ENTRY + page_table; + *(unsigned long *) page_table = _PAGE_RO + page_frame; + + psal = (u32 *) (page_table + 256*sizeof(unsigned long)); + aste = psal + 32; + + for (i = 4; i < 32; i += 4) + psal[i] = 0x80000000; + + lowcore->paste[4] = (u32)(addr_t) psal; + psal[0] = 0x20000000; + psal[2] = (u32)(addr_t) aste; + *(unsigned long *) (aste + 2) = segment_table + + _ASCE_TABLE_LENGTH + _ASCE_USER_BITS + _ASCE_TYPE_SEGMENT; + aste[4] = (u32)(addr_t) psal; + lowcore->vdso_per_cpu_data = page_frame; + + vdso_init_per_cpu_data(cpu, (struct vdso_per_cpu_data *) page_frame); + return 0; + +out: + free_page(page_frame); + free_page(page_table); + free_pages(segment_table, SEGMENT_ORDER); + return -ENOMEM; +} + +#ifdef CONFIG_HOTPLUG_CPU +void vdso_free_per_cpu(int cpu, struct _lowcore *lowcore) +{ + unsigned long segment_table, page_table, page_frame; + u32 *psal, *aste; + + if (!switch_amode || !vdso_enabled) + return; + + psal = (u32 *)(addr_t) lowcore->paste[4]; + aste = (u32 *)(addr_t) psal[2]; + segment_table = *(unsigned long *)(aste + 2) & PAGE_MASK; + page_table = *(unsigned long *) segment_table; + page_frame = *(unsigned long *) page_table; + + free_page(page_frame); + free_page(page_table); + free_pages(segment_table, SEGMENT_ORDER); +} +#endif /* CONFIG_HOTPLUG_CPU */ + +static void __vdso_init_cr5(void *dummy) +{ + unsigned long cr5; + + cr5 = offsetof(struct _lowcore, paste); + __ctl_load(cr5, 5, 5); +} + +static void vdso_init_cr5(void) +{ + if (switch_amode && vdso_enabled) + on_each_cpu(__vdso_init_cr5, NULL, 1); +} +#endif /* CONFIG_64BIT */ + +/* * This is called from binfmt_elf, we create the special vma for the * vDSO and insert it into the mm struct tree */ @@ -172,6 +282,9 @@ static int __init vdso_init(void) { int i; + if (!vdso_enabled) + return 0; + vdso_init_data(vdso_data); #if defined(CONFIG_32BIT) || defined(CONFIG_COMPAT) /* Calculate the size of the 32 bit vDSO */ vdso32_pages = ((&vdso32_end - &vdso32_start @@ -208,6 +321,10 @@ static int __init vdso_init(void) } vdso64_pagelist[vdso64_pages - 1] = virt_to_page(vdso_data); vdso64_pagelist[vdso64_pages] = NULL; +#ifndef CONFIG_SMP + BUG_ON(vdso_alloc_per_cpu(0, S390_lowcore)); +#endif + vdso_init_cr5(); #endif /* CONFIG_64BIT */ get_page(virt_to_page(vdso_data)); diff --git a/arch/s390/kernel/vdso64/clock_getres.S b/arch/s390/kernel/vdso64/clock_getres.S index 488e31a3c0e..9ce8caafdb4 100644 --- a/arch/s390/kernel/vdso64/clock_getres.S +++ b/arch/s390/kernel/vdso64/clock_getres.S @@ -22,7 +22,12 @@ __kernel_clock_getres: cghi %r2,CLOCK_REALTIME je 0f cghi %r2,CLOCK_MONOTONIC + je 0f + cghi %r2,-2 /* CLOCK_THREAD_CPUTIME_ID for this thread */ jne 2f + larl %r5,_vdso_data + icm %r0,15,__LC_ECTG_OK(%r5) + jz 2f 0: ltgr %r3,%r3 jz 1f /* res == NULL */ larl %r1,3f diff --git a/arch/s390/kernel/vdso64/clock_gettime.S b/arch/s390/kernel/vdso64/clock_gettime.S index 738a410b7eb..79dbfee831e 100644 --- a/arch/s390/kernel/vdso64/clock_gettime.S +++ b/arch/s390/kernel/vdso64/clock_gettime.S @@ -22,8 +22,10 @@ __kernel_clock_gettime: larl %r5,_vdso_data cghi %r2,CLOCK_REALTIME je 4f + cghi %r2,-2 /* CLOCK_THREAD_CPUTIME_ID for this thread */ + je 9f cghi %r2,CLOCK_MONOTONIC - jne 9f + jne 12f /* CLOCK_MONOTONIC */ ltgr %r3,%r3 @@ -42,7 +44,7 @@ __kernel_clock_gettime: alg %r0,__VDSO_WTOM_SEC(%r5) clg %r4,__VDSO_UPD_COUNT(%r5) /* check update counter */ jne 0b - larl %r5,10f + larl %r5,13f 1: clg %r1,0(%r5) jl 2f slg %r1,0(%r5) @@ -68,7 +70,7 @@ __kernel_clock_gettime: lg %r0,__VDSO_XTIME_SEC(%r5) clg %r4,__VDSO_UPD_COUNT(%r5) /* check update counter */ jne 5b - larl %r5,10f + larl %r5,13f 6: clg %r1,0(%r5) jl 7f slg %r1,0(%r5) @@ -79,11 +81,38 @@ __kernel_clock_gettime: 8: lghi %r2,0 br %r14 + /* CLOCK_THREAD_CPUTIME_ID for this thread */ +9: icm %r0,15,__VDSO_ECTG_OK(%r5) + jz 12f + ear %r2,%a4 + llilh %r4,0x0100 + sar %a4,%r4 + lghi %r4,0 + sacf 512 /* Magic ectg instruction */ + .insn ssf,0xc80100000000,__VDSO_ECTG_BASE(4),__VDSO_ECTG_USER(4),4 + sacf 0 + sar %a4,%r2 + algr %r1,%r0 /* r1 = cputime as TOD value */ + mghi %r1,1000 /* convert to nanoseconds */ + srlg %r1,%r1,12 /* r1 = cputime in nanosec */ + lgr %r4,%r1 + larl %r5,13f + srlg %r1,%r1,9 /* divide by 1000000000 */ + mlg %r0,8(%r5) + srlg %r0,%r0,11 /* r0 = tv_sec */ + stg %r0,0(%r3) + msg %r0,0(%r5) /* calculate tv_nsec */ + slgr %r4,%r0 /* r4 = tv_nsec */ + stg %r4,8(%r3) + lghi %r2,0 + br %r14 + /* Fallback to system call */ -9: lghi %r1,__NR_clock_gettime +12: lghi %r1,__NR_clock_gettime svc 0 br %r14 -10: .quad 1000000000 +13: .quad 1000000000 +14: .quad 19342813113834067 .cfi_endproc .size __kernel_clock_gettime,.-__kernel_clock_gettime diff --git a/arch/s390/kernel/vtime.c b/arch/s390/kernel/vtime.c index 75a6e62ea97..2fb36e46219 100644 --- a/arch/s390/kernel/vtime.c +++ b/arch/s390/kernel/vtime.c @@ -23,19 +23,43 @@ #include <asm/s390_ext.h> #include <asm/timer.h> #include <asm/irq_regs.h> +#include <asm/cpu.h> static ext_int_info_t ext_int_info_timer; + static DEFINE_PER_CPU(struct vtimer_queue, virt_cpu_timer); +DEFINE_PER_CPU(struct s390_idle_data, s390_idle) = { + .lock = __SPIN_LOCK_UNLOCKED(s390_idle.lock) +}; + +static inline __u64 get_vtimer(void) +{ + __u64 timer; + + asm volatile("STPT %0" : "=m" (timer)); + return timer; +} + +static inline void set_vtimer(__u64 expires) +{ + __u64 timer; + + asm volatile (" STPT %0\n" /* Store current cpu timer value */ + " SPT %1" /* Set new value immediatly afterwards */ + : "=m" (timer) : "m" (expires) ); + S390_lowcore.system_timer += S390_lowcore.last_update_timer - timer; + S390_lowcore.last_update_timer = expires; +} + /* * Update process times based on virtual cpu times stored by entry.S * to the lowcore fields user_timer, system_timer & steal_clock. */ -void account_process_tick(struct task_struct *tsk, int user_tick) +static void do_account_vtime(struct task_struct *tsk, int hardirq_offset) { - cputime_t cputime; - __u64 timer, clock; - int rcu_user_flag; + struct thread_info *ti = task_thread_info(tsk); + __u64 timer, clock, user, system, steal; timer = S390_lowcore.last_update_timer; clock = S390_lowcore.last_update_clock; @@ -44,50 +68,41 @@ void account_process_tick(struct task_struct *tsk, int user_tick) : "=m" (S390_lowcore.last_update_timer), "=m" (S390_lowcore.last_update_clock) ); S390_lowcore.system_timer += timer - S390_lowcore.last_update_timer; - S390_lowcore.steal_clock += S390_lowcore.last_update_clock - clock; - - cputime = S390_lowcore.user_timer >> 12; - rcu_user_flag = cputime != 0; - S390_lowcore.user_timer -= cputime << 12; - S390_lowcore.steal_clock -= cputime << 12; - account_user_time(tsk, cputime); - - cputime = S390_lowcore.system_timer >> 12; - S390_lowcore.system_timer -= cputime << 12; - S390_lowcore.steal_clock -= cputime << 12; - account_system_time(tsk, HARDIRQ_OFFSET, cputime); - - cputime = S390_lowcore.steal_clock; - if ((__s64) cputime > 0) { - cputime >>= 12; - S390_lowcore.steal_clock -= cputime << 12; - account_steal_time(tsk, cputime); + S390_lowcore.steal_timer += S390_lowcore.last_update_clock - clock; + + user = S390_lowcore.user_timer - ti->user_timer; + S390_lowcore.steal_timer -= user; + ti->user_timer = S390_lowcore.user_timer; + account_user_time(tsk, user, user); + + system = S390_lowcore.system_timer - ti->system_timer; + S390_lowcore.steal_timer -= system; + ti->system_timer = S390_lowcore.system_timer; + account_system_time(tsk, hardirq_offset, system, system); + + steal = S390_lowcore.steal_timer; + if ((s64) steal > 0) { + S390_lowcore.steal_timer = 0; + account_steal_time(steal); } } -/* - * Update process times based on virtual cpu times stored by entry.S - * to the lowcore fields user_timer, system_timer & steal_clock. - */ -void account_vtime(struct task_struct *tsk) +void account_vtime(struct task_struct *prev, struct task_struct *next) { - cputime_t cputime; - __u64 timer; - - timer = S390_lowcore.last_update_timer; - asm volatile (" STPT %0" /* Store current cpu timer value */ - : "=m" (S390_lowcore.last_update_timer) ); - S390_lowcore.system_timer += timer - S390_lowcore.last_update_timer; - - cputime = S390_lowcore.user_timer >> 12; - S390_lowcore.user_timer -= cputime << 12; - S390_lowcore.steal_clock -= cputime << 12; - account_user_time(tsk, cputime); + struct thread_info *ti; + + do_account_vtime(prev, 0); + ti = task_thread_info(prev); + ti->user_timer = S390_lowcore.user_timer; + ti->system_timer = S390_lowcore.system_timer; + ti = task_thread_info(next); + S390_lowcore.user_timer = ti->user_timer; + S390_lowcore.system_timer = ti->system_timer; +} - cputime = S390_lowcore.system_timer >> 12; - S390_lowcore.system_timer -= cputime << 12; - S390_lowcore.steal_clock -= cputime << 12; - account_system_time(tsk, 0, cputime); +void account_process_tick(struct task_struct *tsk, int user_tick) +{ + do_account_vtime(tsk, HARDIRQ_OFFSET); } /* @@ -96,80 +111,131 @@ void account_vtime(struct task_struct *tsk) */ void account_system_vtime(struct task_struct *tsk) { - cputime_t cputime; - __u64 timer; + struct thread_info *ti = task_thread_info(tsk); + __u64 timer, system; timer = S390_lowcore.last_update_timer; - asm volatile (" STPT %0" /* Store current cpu timer value */ - : "=m" (S390_lowcore.last_update_timer) ); + S390_lowcore.last_update_timer = get_vtimer(); S390_lowcore.system_timer += timer - S390_lowcore.last_update_timer; - cputime = S390_lowcore.system_timer >> 12; - S390_lowcore.system_timer -= cputime << 12; - S390_lowcore.steal_clock -= cputime << 12; - account_system_time(tsk, 0, cputime); + system = S390_lowcore.system_timer - ti->system_timer; + S390_lowcore.steal_timer -= system; + ti->system_timer = S390_lowcore.system_timer; + account_system_time(tsk, 0, system, system); } EXPORT_SYMBOL_GPL(account_system_vtime); -static inline void set_vtimer(__u64 expires) -{ - __u64 timer; - - asm volatile (" STPT %0\n" /* Store current cpu timer value */ - " SPT %1" /* Set new value immediatly afterwards */ - : "=m" (timer) : "m" (expires) ); - S390_lowcore.system_timer += S390_lowcore.last_update_timer - timer; - S390_lowcore.last_update_timer = expires; - - /* store expire time for this CPU timer */ - __get_cpu_var(virt_cpu_timer).to_expire = expires; -} - -void vtime_start_cpu_timer(void) +void vtime_start_cpu(void) { - struct vtimer_queue *vt_list; - - vt_list = &__get_cpu_var(virt_cpu_timer); - - /* CPU timer interrupt is pending, don't reprogramm it */ - if (vt_list->idle & 1LL<<63) - return; + struct s390_idle_data *idle = &__get_cpu_var(s390_idle); + struct vtimer_queue *vq = &__get_cpu_var(virt_cpu_timer); + __u64 idle_time, expires; + + /* Account time spent with enabled wait psw loaded as idle time. */ + idle_time = S390_lowcore.int_clock - idle->idle_enter; + account_idle_time(idle_time); + S390_lowcore.last_update_clock = S390_lowcore.int_clock; + + /* Account system time spent going idle. */ + S390_lowcore.system_timer += S390_lowcore.last_update_timer - vq->idle; + S390_lowcore.last_update_timer = S390_lowcore.async_enter_timer; + + /* Restart vtime CPU timer */ + if (vq->do_spt) { + /* Program old expire value but first save progress. */ + expires = vq->idle - S390_lowcore.async_enter_timer; + expires += get_vtimer(); + set_vtimer(expires); + } else { + /* Don't account the CPU timer delta while the cpu was idle. */ + vq->elapsed -= vq->idle - S390_lowcore.async_enter_timer; + } - if (!list_empty(&vt_list->list)) - set_vtimer(vt_list->idle); + spin_lock(&idle->lock); + idle->idle_time += idle_time; + idle->idle_enter = 0ULL; + idle->idle_count++; + spin_unlock(&idle->lock); } -void vtime_stop_cpu_timer(void) +void vtime_stop_cpu(void) { - struct vtimer_queue *vt_list; - - vt_list = &__get_cpu_var(virt_cpu_timer); - - /* nothing to do */ - if (list_empty(&vt_list->list)) { - vt_list->idle = VTIMER_MAX_SLICE; - goto fire; + struct s390_idle_data *idle = &__get_cpu_var(s390_idle); + struct vtimer_queue *vq = &__get_cpu_var(virt_cpu_timer); + psw_t psw; + + /* Wait for external, I/O or machine check interrupt. */ + psw.mask = psw_kernel_bits | PSW_MASK_WAIT | PSW_MASK_IO | PSW_MASK_EXT; + + /* Check if the CPU timer needs to be reprogrammed. */ + if (vq->do_spt) { + __u64 vmax = VTIMER_MAX_SLICE; + /* + * The inline assembly is equivalent to + * vq->idle = get_cpu_timer(); + * set_cpu_timer(VTIMER_MAX_SLICE); + * idle->idle_enter = get_clock(); + * __load_psw_mask(psw_kernel_bits | PSW_MASK_WAIT | + * PSW_MASK_IO | PSW_MASK_EXT); + * The difference is that the inline assembly makes sure that + * the last three instruction are stpt, stck and lpsw in that + * order. This is done to increase the precision. + */ + asm volatile( +#ifndef CONFIG_64BIT + " basr 1,0\n" + "0: ahi 1,1f-0b\n" + " st 1,4(%2)\n" +#else /* CONFIG_64BIT */ + " larl 1,1f\n" + " stg 1,8(%2)\n" +#endif /* CONFIG_64BIT */ + " stpt 0(%4)\n" + " spt 0(%5)\n" + " stck 0(%3)\n" +#ifndef CONFIG_64BIT + " lpsw 0(%2)\n" +#else /* CONFIG_64BIT */ + " lpswe 0(%2)\n" +#endif /* CONFIG_64BIT */ + "1:" + : "=m" (idle->idle_enter), "=m" (vq->idle) + : "a" (&psw), "a" (&idle->idle_enter), + "a" (&vq->idle), "a" (&vmax), "m" (vmax), "m" (psw) + : "memory", "cc", "1"); + } else { + /* + * The inline assembly is equivalent to + * vq->idle = get_cpu_timer(); + * idle->idle_enter = get_clock(); + * __load_psw_mask(psw_kernel_bits | PSW_MASK_WAIT | + * PSW_MASK_IO | PSW_MASK_EXT); + * The difference is that the inline assembly makes sure that + * the last three instruction are stpt, stck and lpsw in that + * order. This is done to increase the precision. + */ + asm volatile( +#ifndef CONFIG_64BIT + " basr 1,0\n" + "0: ahi 1,1f-0b\n" + " st 1,4(%2)\n" +#else /* CONFIG_64BIT */ + " larl 1,1f\n" + " stg 1,8(%2)\n" +#endif /* CONFIG_64BIT */ + " stpt 0(%4)\n" + " stck 0(%3)\n" +#ifndef CONFIG_64BIT + " lpsw 0(%2)\n" +#else /* CONFIG_64BIT */ + " lpswe 0(%2)\n" +#endif /* CONFIG_64BIT */ + "1:" + : "=m" (idle->idle_enter), "=m" (vq->idle) + : "a" (&psw), "a" (&idle->idle_enter), + "a" (&vq->idle), "m" (psw) + : "memory", "cc", "1"); } - - /* store the actual expire value */ - asm volatile ("STPT %0" : "=m" (vt_list->idle)); - - /* - * If the CPU timer is negative we don't reprogramm - * it because we will get instantly an interrupt. - */ - if (vt_list->idle & 1LL<<63) - return; - - vt_list->offset += vt_list->to_expire - vt_list->idle; - - /* - * We cannot halt the CPU timer, we just write a value that - * nearly never expires (only after 71 years) and re-write - * the stored expire value if we continue the timer - */ - fire: - set_vtimer(VTIMER_MAX_SLICE); } /* @@ -195,30 +261,23 @@ static void list_add_sorted(struct vtimer_list *timer, struct list_head *head) */ static void do_callbacks(struct list_head *cb_list) { - struct vtimer_queue *vt_list; + struct vtimer_queue *vq; struct vtimer_list *event, *tmp; - void (*fn)(unsigned long); - unsigned long data; if (list_empty(cb_list)) return; - vt_list = &__get_cpu_var(virt_cpu_timer); + vq = &__get_cpu_var(virt_cpu_timer); list_for_each_entry_safe(event, tmp, cb_list, entry) { - fn = event->function; - data = event->data; - fn(data); - - if (!event->interval) - /* delete one shot timer */ - list_del_init(&event->entry); - else { - /* move interval timer back to list */ - spin_lock(&vt_list->lock); - list_del_init(&event->entry); - list_add_sorted(event, &vt_list->list); - spin_unlock(&vt_list->lock); + list_del_init(&event->entry); + (event->function)(event->data); + if (event->interval) { + /* Recharge interval timer */ + event->expires = event->interval + vq->elapsed; + spin_lock(&vq->lock); + list_add_sorted(event, &vq->list); + spin_unlock(&vq->lock); } } } @@ -228,64 +287,57 @@ static void do_callbacks(struct list_head *cb_list) */ static void do_cpu_timer_interrupt(__u16 error_code) { - __u64 next, delta; - struct vtimer_queue *vt_list; + struct vtimer_queue *vq; struct vtimer_list *event, *tmp; - struct list_head *ptr; - /* the callback queue */ - struct list_head cb_list; + struct list_head cb_list; /* the callback queue */ + __u64 elapsed, next; INIT_LIST_HEAD(&cb_list); - vt_list = &__get_cpu_var(virt_cpu_timer); + vq = &__get_cpu_var(virt_cpu_timer); /* walk timer list, fire all expired events */ - spin_lock(&vt_list->lock); - - if (vt_list->to_expire < VTIMER_MAX_SLICE) - vt_list->offset += vt_list->to_expire; - - list_for_each_entry_safe(event, tmp, &vt_list->list, entry) { - if (event->expires > vt_list->offset) - /* found first unexpired event, leave */ - break; - - /* re-charge interval timer, we have to add the offset */ - if (event->interval) - event->expires = event->interval + vt_list->offset; - - /* move expired timer to the callback queue */ - list_move_tail(&event->entry, &cb_list); + spin_lock(&vq->lock); + + elapsed = vq->elapsed + (vq->timer - S390_lowcore.async_enter_timer); + BUG_ON((s64) elapsed < 0); + vq->elapsed = 0; + list_for_each_entry_safe(event, tmp, &vq->list, entry) { + if (event->expires < elapsed) + /* move expired timer to the callback queue */ + list_move_tail(&event->entry, &cb_list); + else + event->expires -= elapsed; } - spin_unlock(&vt_list->lock); + spin_unlock(&vq->lock); + + vq->do_spt = list_empty(&cb_list); do_callbacks(&cb_list); /* next event is first in list */ - spin_lock(&vt_list->lock); - if (!list_empty(&vt_list->list)) { - ptr = vt_list->list.next; - event = list_entry(ptr, struct vtimer_list, entry); - next = event->expires - vt_list->offset; - - /* add the expired time from this interrupt handler - * and the callback functions - */ - asm volatile ("STPT %0" : "=m" (delta)); - delta = 0xffffffffffffffffLL - delta + 1; - vt_list->offset += delta; - next -= delta; - } else { - vt_list->offset = 0; - next = VTIMER_MAX_SLICE; - } - spin_unlock(&vt_list->lock); - set_vtimer(next); + next = VTIMER_MAX_SLICE; + spin_lock(&vq->lock); + if (!list_empty(&vq->list)) { + event = list_first_entry(&vq->list, struct vtimer_list, entry); + next = event->expires; + } else + vq->do_spt = 0; + spin_unlock(&vq->lock); + /* + * To improve precision add the time spent by the + * interrupt handler to the elapsed time. + * Note: CPU timer counts down and we got an interrupt, + * the current content is negative + */ + elapsed = S390_lowcore.async_enter_timer - get_vtimer(); + set_vtimer(next - elapsed); + vq->timer = next - elapsed; + vq->elapsed = elapsed; } void init_virt_timer(struct vtimer_list *timer) { timer->function = NULL; INIT_LIST_HEAD(&timer->entry); - spin_lock_init(&timer->lock); } EXPORT_SYMBOL(init_virt_timer); @@ -299,44 +351,40 @@ static inline int vtimer_pending(struct vtimer_list *timer) */ static void internal_add_vtimer(struct vtimer_list *timer) { + struct vtimer_queue *vq; unsigned long flags; - __u64 done; - struct vtimer_list *event; - struct vtimer_queue *vt_list; + __u64 left, expires; - vt_list = &per_cpu(virt_cpu_timer, timer->cpu); - spin_lock_irqsave(&vt_list->lock, flags); + vq = &per_cpu(virt_cpu_timer, timer->cpu); + spin_lock_irqsave(&vq->lock, flags); BUG_ON(timer->cpu != smp_processor_id()); - /* if list is empty we only have to set the timer */ - if (list_empty(&vt_list->list)) { - /* reset the offset, this may happen if the last timer was - * just deleted by mod_virt_timer and the interrupt - * didn't happen until here - */ - vt_list->offset = 0; - goto fire; + if (list_empty(&vq->list)) { + /* First timer on this cpu, just program it. */ + list_add(&timer->entry, &vq->list); + set_vtimer(timer->expires); + vq->timer = timer->expires; + vq->elapsed = 0; + } else { + /* Check progress of old timers. */ + expires = timer->expires; + left = get_vtimer(); + if (likely((s64) expires < (s64) left)) { + /* The new timer expires before the current timer. */ + set_vtimer(expires); + vq->elapsed += vq->timer - left; + vq->timer = expires; + } else { + vq->elapsed += vq->timer - left; + vq->timer = left; + } + /* Insert new timer into per cpu list. */ + timer->expires += vq->elapsed; + list_add_sorted(timer, &vq->list); } - /* save progress */ - asm volatile ("STPT %0" : "=m" (done)); - - /* calculate completed work */ - done = vt_list->to_expire - done + vt_list->offset; - vt_list->offset = 0; - - list_for_each_entry(event, &vt_list->list, entry) - event->expires -= done; - - fire: - list_add_sorted(timer, &vt_list->list); - - /* get first element, which is the next vtimer slice */ - event = list_entry(vt_list->list.next, struct vtimer_list, entry); - - set_vtimer(event->expires); - spin_unlock_irqrestore(&vt_list->lock, flags); + spin_unlock_irqrestore(&vq->lock, flags); /* release CPU acquired in prepare_vtimer or mod_virt_timer() */ put_cpu(); } @@ -381,14 +429,15 @@ EXPORT_SYMBOL(add_virt_timer_periodic); * If we change a pending timer the function must be called on the CPU * where the timer is running on, e.g. by smp_call_function_single() * - * The original mod_timer adds the timer if it is not pending. For compatibility - * we do the same. The timer will be added on the current CPU as a oneshot timer. + * The original mod_timer adds the timer if it is not pending. For + * compatibility we do the same. The timer will be added on the current + * CPU as a oneshot timer. * * returns whether it has modified a pending timer (1) or not (0) */ int mod_virt_timer(struct vtimer_list *timer, __u64 expires) { - struct vtimer_queue *vt_list; + struct vtimer_queue *vq; unsigned long flags; int cpu; @@ -404,17 +453,17 @@ int mod_virt_timer(struct vtimer_list *timer, __u64 expires) return 1; cpu = get_cpu(); - vt_list = &per_cpu(virt_cpu_timer, cpu); + vq = &per_cpu(virt_cpu_timer, cpu); /* check if we run on the right CPU */ BUG_ON(timer->cpu != cpu); /* disable interrupts before test if timer is pending */ - spin_lock_irqsave(&vt_list->lock, flags); + spin_lock_irqsave(&vq->lock, flags); /* if timer isn't pending add it on the current CPU */ if (!vtimer_pending(timer)) { - spin_unlock_irqrestore(&vt_list->lock, flags); + spin_unlock_irqrestore(&vq->lock, flags); /* we do not activate an interval timer with mod_virt_timer */ timer->interval = 0; timer->expires = expires; @@ -431,7 +480,7 @@ int mod_virt_timer(struct vtimer_list *timer, __u64 expires) timer->interval = expires; /* the timer can't expire anymore so we can release the lock */ - spin_unlock_irqrestore(&vt_list->lock, flags); + spin_unlock_irqrestore(&vq->lock, flags); internal_add_vtimer(timer); return 1; } @@ -445,25 +494,19 @@ EXPORT_SYMBOL(mod_virt_timer); int del_virt_timer(struct vtimer_list *timer) { unsigned long flags; - struct vtimer_queue *vt_list; + struct vtimer_queue *vq; /* check if timer is pending */ if (!vtimer_pending(timer)) return 0; - vt_list = &per_cpu(virt_cpu_timer, timer->cpu); - spin_lock_irqsave(&vt_list->lock, flags); + vq = &per_cpu(virt_cpu_timer, timer->cpu); + spin_lock_irqsave(&vq->lock, flags); /* we don't interrupt a running timer, just let it expire! */ list_del_init(&timer->entry); - /* last timer removed */ - if (list_empty(&vt_list->list)) { - vt_list->to_expire = 0; - vt_list->offset = 0; - } - - spin_unlock_irqrestore(&vt_list->lock, flags); + spin_unlock_irqrestore(&vq->lock, flags); return 1; } EXPORT_SYMBOL(del_virt_timer); @@ -473,24 +516,19 @@ EXPORT_SYMBOL(del_virt_timer); */ void init_cpu_vtimer(void) { - struct vtimer_queue *vt_list; + struct vtimer_queue *vq; /* kick the virtual timer */ - S390_lowcore.exit_timer = VTIMER_MAX_SLICE; - S390_lowcore.last_update_timer = VTIMER_MAX_SLICE; - asm volatile ("SPT %0" : : "m" (S390_lowcore.last_update_timer)); asm volatile ("STCK %0" : "=m" (S390_lowcore.last_update_clock)); + asm volatile ("STPT %0" : "=m" (S390_lowcore.last_update_timer)); + + /* initialize per cpu vtimer structure */ + vq = &__get_cpu_var(virt_cpu_timer); + INIT_LIST_HEAD(&vq->list); + spin_lock_init(&vq->lock); /* enable cpu timer interrupts */ __ctl_set_bit(0,10); - - vt_list = &__get_cpu_var(virt_cpu_timer); - INIT_LIST_HEAD(&vt_list->list); - spin_lock_init(&vt_list->lock); - vt_list->to_expire = 0; - vt_list->offset = 0; - vt_list->idle = 0; - } void __init vtime_init(void) diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index 8b00eb2ddf5..be8497186b9 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -113,8 +113,6 @@ long kvm_arch_dev_ioctl(struct file *filp, int kvm_dev_ioctl_check_extension(long ext) { switch (ext) { - case KVM_CAP_USER_MEMORY: - return 1; default: return 0; } @@ -185,8 +183,6 @@ struct kvm *kvm_arch_create_vm(void) debug_register_view(kvm->arch.dbf, &debug_sprintf_view); VM_EVENT(kvm, 3, "%s", "vm created"); - try_module_get(THIS_MODULE); - return kvm; out_nodbf: free_page((unsigned long)(kvm->arch.sca)); @@ -196,13 +192,33 @@ out_nokvm: return ERR_PTR(rc); } +void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu) +{ + VCPU_EVENT(vcpu, 3, "%s", "free cpu"); + free_page((unsigned long)(vcpu->arch.sie_block)); + kvm_vcpu_uninit(vcpu); + kfree(vcpu); +} + +static void kvm_free_vcpus(struct kvm *kvm) +{ + unsigned int i; + + for (i = 0; i < KVM_MAX_VCPUS; ++i) { + if (kvm->vcpus[i]) { + kvm_arch_vcpu_destroy(kvm->vcpus[i]); + kvm->vcpus[i] = NULL; + } + } +} + void kvm_arch_destroy_vm(struct kvm *kvm) { - debug_unregister(kvm->arch.dbf); + kvm_free_vcpus(kvm); kvm_free_physmem(kvm); free_page((unsigned long)(kvm->arch.sca)); + debug_unregister(kvm->arch.dbf); kfree(kvm); - module_put(THIS_MODULE); } /* Section: vcpu related */ @@ -213,8 +229,7 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu) void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu) { - /* kvm common code refers to this, but does'nt call it */ - BUG(); + /* Nothing todo */ } void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu) @@ -308,8 +323,6 @@ struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm, VM_EVENT(kvm, 3, "create cpu %d at %p, sie block at %p", id, vcpu, vcpu->arch.sie_block); - try_module_get(THIS_MODULE); - return vcpu; out_free_cpu: kfree(vcpu); @@ -317,14 +330,6 @@ out_nomem: return ERR_PTR(rc); } -void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu) -{ - VCPU_EVENT(vcpu, 3, "%s", "destroy cpu"); - free_page((unsigned long)(vcpu->arch.sie_block)); - kfree(vcpu); - module_put(THIS_MODULE); -} - int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu) { /* kvm common code refers to this, but never calls it */ diff --git a/arch/sh/include/asm/smp.h b/arch/sh/include/asm/smp.h index 85b660c17eb..c24e9c6a173 100644 --- a/arch/sh/include/asm/smp.h +++ b/arch/sh/include/asm/smp.h @@ -31,7 +31,7 @@ enum { }; void smp_message_recv(unsigned int msg); -void smp_timer_broadcast(cpumask_t mask); +void smp_timer_broadcast(const struct cpumask *mask); void local_timer_interrupt(void); void local_timer_setup(unsigned int cpu); diff --git a/arch/sh/include/asm/topology.h b/arch/sh/include/asm/topology.h index 95f0085e098..066f0fba590 100644 --- a/arch/sh/include/asm/topology.h +++ b/arch/sh/include/asm/topology.h @@ -5,7 +5,6 @@ /* sched_domains SD_NODE_INIT for sh machines */ #define SD_NODE_INIT (struct sched_domain) { \ - .span = CPU_MASK_NONE, \ .parent = NULL, \ .child = NULL, \ .groups = NULL, \ @@ -33,6 +32,7 @@ #define parent_node(node) ((void)(node),0) #define node_to_cpumask(node) ((void)node, cpu_online_map) +#define cpumask_of_node(node) ((void)node, cpu_online_mask) #define node_to_first_cpu(node) ((void)(node),0) #define pcibus_to_node(bus) ((void)(bus), -1) diff --git a/arch/sh/kernel/init_task.c b/arch/sh/kernel/init_task.c index b151a25cb14..80c35ff71d5 100644 --- a/arch/sh/kernel/init_task.c +++ b/arch/sh/kernel/init_task.c @@ -7,7 +7,6 @@ #include <asm/uaccess.h> #include <asm/pgtable.h> -static struct fs_struct init_fs = INIT_FS; static struct signal_struct init_signals = INIT_SIGNALS(init_signals); static struct sighand_struct init_sighand = INIT_SIGHAND(init_sighand); struct pt_regs fake_swapper_regs; diff --git a/arch/sh/kernel/smp.c b/arch/sh/kernel/smp.c index 3c5ad1660bb..8f402741261 100644 --- a/arch/sh/kernel/smp.c +++ b/arch/sh/kernel/smp.c @@ -31,12 +31,6 @@ int __cpu_number_map[NR_CPUS]; /* Map physical to logical */ int __cpu_logical_map[NR_CPUS]; /* Map logical to physical */ -cpumask_t cpu_possible_map; -EXPORT_SYMBOL(cpu_possible_map); - -cpumask_t cpu_online_map; -EXPORT_SYMBOL(cpu_online_map); - static inline void __init smp_store_cpu_info(unsigned int cpu) { struct sh_cpuinfo *c = cpu_data + cpu; @@ -190,11 +184,11 @@ void arch_send_call_function_single_ipi(int cpu) plat_send_ipi(cpu, SMP_MSG_FUNCTION_SINGLE); } -void smp_timer_broadcast(cpumask_t mask) +void smp_timer_broadcast(const struct cpumask *mask) { int cpu; - for_each_cpu_mask(cpu, mask) + for_each_cpu(cpu, mask) plat_send_ipi(cpu, SMP_MSG_TIMER); } diff --git a/arch/sh/kernel/timers/timer-broadcast.c b/arch/sh/kernel/timers/timer-broadcast.c index c2317635230..96e8eaea1e6 100644 --- a/arch/sh/kernel/timers/timer-broadcast.c +++ b/arch/sh/kernel/timers/timer-broadcast.c @@ -51,7 +51,7 @@ void __cpuinit local_timer_setup(unsigned int cpu) clk->mult = 1; clk->set_mode = dummy_timer_set_mode; clk->broadcast = smp_timer_broadcast; - clk->cpumask = cpumask_of_cpu(cpu); + clk->cpumask = cpumask_of(cpu); clockevents_register_device(clk); } diff --git a/arch/sh/kernel/timers/timer-tmu.c b/arch/sh/kernel/timers/timer-tmu.c index 3c61ddd4d43..0db3f951033 100644 --- a/arch/sh/kernel/timers/timer-tmu.c +++ b/arch/sh/kernel/timers/timer-tmu.c @@ -263,7 +263,7 @@ static int tmu_timer_init(void) tmu0_clockevent.min_delta_ns = clockevent_delta2ns(1, &tmu0_clockevent); - tmu0_clockevent.cpumask = cpumask_of_cpu(0); + tmu0_clockevent.cpumask = cpumask_of(0); clockevents_register_device(&tmu0_clockevent); diff --git a/arch/sparc/Kconfig b/arch/sparc/Kconfig index 0a94d9c9cde..de58c02633b 100644 --- a/arch/sparc/Kconfig +++ b/arch/sparc/Kconfig @@ -4,6 +4,17 @@ mainmenu "Linux/SPARC Kernel Configuration" +config 64BIT + bool "64-bit kernel" if ARCH = "sparc" + default ARCH = "sparc64" + help + SPARC is a family of RISC microprocessors designed and marketed by + Sun Microsystems, incorporated. They are very widely found in Sun + workstations and clones. + + Say yes to build a 64-bit kernel - formerly known as sparc64 + Say no to build a 32-bit kernel - formerly known as sparc + config SPARC bool default y @@ -15,22 +26,11 @@ config SPARC select RTC_CLASS select RTC_DRV_M48T59 -# Identify this as a Sparc32 build config SPARC32 - bool - default y if ARCH = "sparc" - help - SPARC is a family of RISC microprocessors designed and marketed by - Sun Microsystems, incorporated. They are very widely found in Sun - workstations and clones. This port covers the original 32-bit SPARC; - it is old and stable and usually considered one of the "big three" - along with the Intel and Alpha ports. The UltraLinux project - maintains both the SPARC32 and SPARC64 ports; its web page is - available at <http://www.ultralinux.org/>. + def_bool !64BIT config SPARC64 - bool - default y if ARCH = "sparc64" + def_bool 64BIT select ARCH_SUPPORTS_MSI select HAVE_FUNCTION_TRACER select HAVE_KRETPROBES @@ -53,9 +53,6 @@ config BITS default 32 if SPARC32 default 64 if SPARC64 -config 64BIT - def_bool y if SPARC64 - config GENERIC_TIME bool default y if SPARC64 @@ -188,14 +185,6 @@ config ARCH_MAY_HAVE_PC_FDC bool default y -config ARCH_HAS_ILOG2_U32 - bool - default n - -config ARCH_HAS_ILOG2_U64 - bool - default n - config EMULATED_CMPXCHG bool default y if SPARC32 @@ -442,26 +431,6 @@ config SERIAL_CONSOLE endmenu menu "Bus options (PCI etc.)" -config ISA - bool - help - ISA is found on Espresso only and is not supported currently. - -config ISAPNP - bool - help - ISAPNP is not supported - -config EISA - bool - help - EISA is not supported. - -config MCA - bool - help - MCA is not supported. - config SBUS bool default y diff --git a/arch/sparc/configs/sparc64_defconfig b/arch/sparc/configs/sparc64_defconfig index 05d19a3e590..cde19ae78f5 100644 --- a/arch/sparc/configs/sparc64_defconfig +++ b/arch/sparc/configs/sparc64_defconfig @@ -1,27 +1,27 @@ # # Automatically generated make config: don't edit -# Linux kernel version: 2.6.28-rc4 -# Mon Nov 10 12:35:09 2008 +# Linux kernel version: 2.6.28 +# Fri Jan 2 18:14:26 2009 # CONFIG_SPARC=y CONFIG_SPARC64=y +CONFIG_ARCH_DEFCONFIG="arch/sparc/configs/sparc64_defconfig" +CONFIG_BITS=64 +CONFIG_64BIT=y CONFIG_GENERIC_TIME=y CONFIG_GENERIC_CMOS_UPDATE=y CONFIG_GENERIC_CLOCKEVENTS=y -CONFIG_64BIT=y -CONFIG_MMU=y CONFIG_IOMMU_HELPER=y CONFIG_QUICKLIST=y CONFIG_STACKTRACE_SUPPORT=y CONFIG_LOCKDEP_SUPPORT=y -CONFIG_ARCH_MAY_HAVE_PC_FDC=y -# CONFIG_ARCH_HAS_ILOG2_U32 is not set -# CONFIG_ARCH_HAS_ILOG2_U64 is not set +CONFIG_HAVE_LATENCYTOP_SUPPORT=y CONFIG_AUDIT_ARCH=y CONFIG_HAVE_SETUP_PER_CPU_AREA=y +CONFIG_GENERIC_HARDIRQS_NO__DO_IRQ=y +CONFIG_MMU=y CONFIG_ARCH_NO_VIRT_TO_BUS=y CONFIG_OF=y -CONFIG_GENERIC_HARDIRQS_NO__DO_IRQ=y CONFIG_DEFCONFIG_LIST="/lib/modules/$UNAME_RELEASE/.config" # @@ -86,6 +86,7 @@ CONFIG_SLUB_DEBUG=y CONFIG_SLUB=y # CONFIG_SLOB is not set CONFIG_PROFILING=y +CONFIG_TRACEPOINTS=y # CONFIG_MARKERS is not set CONFIG_OPROFILE=m CONFIG_HAVE_OPROFILE=y @@ -127,34 +128,40 @@ CONFIG_DEFAULT_AS=y # CONFIG_DEFAULT_NOOP is not set CONFIG_DEFAULT_IOSCHED="anticipatory" CONFIG_CLASSIC_RCU=y +# CONFIG_TREE_RCU is not set +# CONFIG_PREEMPT_RCU is not set +# CONFIG_TREE_RCU_TRACE is not set +# CONFIG_PREEMPT_RCU_TRACE is not set # CONFIG_FREEZER is not set # # Processor type and features # -CONFIG_SPARC64_PAGE_SIZE_8KB=y -# CONFIG_SPARC64_PAGE_SIZE_64KB is not set -CONFIG_SECCOMP=y +CONFIG_SMP=y +CONFIG_NR_CPUS=64 CONFIG_HZ_100=y # CONFIG_HZ_250 is not set # CONFIG_HZ_300 is not set # CONFIG_HZ_1000 is not set CONFIG_HZ=100 CONFIG_SCHED_HRTICK=y +CONFIG_RWSEM_XCHGADD_ALGORITHM=y +CONFIG_GENERIC_FIND_NEXT_BIT=y +CONFIG_GENERIC_HWEIGHT=y +CONFIG_GENERIC_CALIBRATE_DELAY=y +CONFIG_ARCH_MAY_HAVE_PC_FDC=y +CONFIG_SPARC64_SMP=y +CONFIG_SPARC64_PAGE_SIZE_8KB=y +# CONFIG_SPARC64_PAGE_SIZE_64KB is not set +CONFIG_SECCOMP=y CONFIG_HOTPLUG_CPU=y CONFIG_GENERIC_HARDIRQS=y CONFIG_TICK_ONESHOT=y CONFIG_NO_HZ=y CONFIG_HIGH_RES_TIMERS=y CONFIG_GENERIC_CLOCKEVENTS_BUILD=y -CONFIG_SMP=y -CONFIG_NR_CPUS=64 # CONFIG_CPU_FREQ is not set CONFIG_US3_MC=y -CONFIG_RWSEM_XCHGADD_ALGORITHM=y -CONFIG_GENERIC_FIND_NEXT_BIT=y -CONFIG_GENERIC_HWEIGHT=y -CONFIG_GENERIC_CALIBRATE_DELAY=y CONFIG_HUGETLB_PAGE_SIZE_4MB=y # CONFIG_HUGETLB_PAGE_SIZE_512K is not set # CONFIG_HUGETLB_PAGE_SIZE_64K is not set @@ -183,10 +190,18 @@ CONFIG_PHYS_ADDR_T_64BIT=y CONFIG_ZONE_DMA_FLAG=0 CONFIG_NR_QUICK=1 CONFIG_UNEVICTABLE_LRU=y +CONFIG_SCHED_SMT=y +CONFIG_SCHED_MC=y +# CONFIG_PREEMPT_NONE is not set +CONFIG_PREEMPT_VOLUNTARY=y +# CONFIG_PREEMPT is not set +# CONFIG_CMDLINE_BOOL is not set + +# +# Bus options (PCI etc.) +# CONFIG_SBUS=y CONFIG_SBUSCHAR=y -CONFIG_SUN_AUXIO=y -CONFIG_SUN_IO=y CONFIG_SUN_LDOMS=y CONFIG_PCI=y CONFIG_PCI_DOMAINS=y @@ -195,7 +210,9 @@ CONFIG_ARCH_SUPPORTS_MSI=y CONFIG_PCI_MSI=y # CONFIG_PCI_LEGACY is not set # CONFIG_PCI_DEBUG is not set +# CONFIG_PCCARD is not set CONFIG_SUN_OPENPROMFS=m +CONFIG_SPARC64_PCI=y # # Executable file formats @@ -207,17 +224,13 @@ CONFIG_COMPAT_BINFMT_ELF=y CONFIG_BINFMT_MISC=m CONFIG_COMPAT=y CONFIG_SYSVIPC_COMPAT=y -CONFIG_SCHED_SMT=y -CONFIG_SCHED_MC=y -# CONFIG_PREEMPT_NONE is not set -CONFIG_PREEMPT_VOLUNTARY=y -# CONFIG_PREEMPT is not set -# CONFIG_CMDLINE_BOOL is not set CONFIG_NET=y # # Networking options # +# CONFIG_NET_NS is not set +CONFIG_COMPAT_NET_DEV_OPS=y CONFIG_PACKET=y CONFIG_PACKET_MMAP=y CONFIG_UNIX=y @@ -314,6 +327,7 @@ CONFIG_VLAN_8021Q=m # CONFIG_ECONET is not set # CONFIG_WAN_ROUTER is not set # CONFIG_NET_SCHED is not set +# CONFIG_DCB is not set # # Network testing @@ -330,8 +344,8 @@ CONFIG_WIRELESS=y # CONFIG_CFG80211 is not set CONFIG_WIRELESS_OLD_REGULATORY=y # CONFIG_WIRELESS_EXT is not set +# CONFIG_LIB80211 is not set # CONFIG_MAC80211 is not set -# CONFIG_IEEE80211 is not set # CONFIG_RFKILL is not set # CONFIG_NET_9P is not set @@ -378,8 +392,10 @@ CONFIG_MISC_DEVICES=y # CONFIG_EEPROM_93CX6 is not set # CONFIG_SGI_IOC4 is not set # CONFIG_TIFM_CORE is not set +# CONFIG_ICS932S401 is not set # CONFIG_ENCLOSURE_SERVICES is not set # CONFIG_HP_ILO is not set +# CONFIG_C2PORT is not set CONFIG_HAVE_IDE=y CONFIG_IDE=y @@ -387,6 +403,7 @@ CONFIG_IDE=y # Please see Documentation/ide/ide.txt for help/info on IDE drives # CONFIG_IDE_TIMINGS=y +CONFIG_IDE_ATAPI=y # CONFIG_BLK_DEV_IDE_SATA is not set CONFIG_IDE_GD=y CONFIG_IDE_GD_ATA=y @@ -394,7 +411,6 @@ CONFIG_IDE_GD_ATA=y CONFIG_BLK_DEV_IDECD=y CONFIG_BLK_DEV_IDECD_VERBOSE_ERRORS=y # CONFIG_BLK_DEV_IDETAPE is not set -# CONFIG_BLK_DEV_IDESCSI is not set # CONFIG_IDE_TASK_IOCTL is not set CONFIG_IDE_PROC_FS=y @@ -477,6 +493,7 @@ CONFIG_SCSI_FC_ATTRS=y # CONFIG_SCSI_SRP_ATTRS is not set CONFIG_SCSI_LOWLEVEL=y # CONFIG_ISCSI_TCP is not set +# CONFIG_SCSI_CXGB3_ISCSI is not set # CONFIG_BLK_DEV_3W_XXXX_RAID is not set # CONFIG_SCSI_3W_9XXX is not set # CONFIG_SCSI_ACARD is not set @@ -490,6 +507,8 @@ CONFIG_SCSI_LOWLEVEL=y # CONFIG_MEGARAID_LEGACY is not set # CONFIG_MEGARAID_SAS is not set # CONFIG_SCSI_HPTIOP is not set +# CONFIG_LIBFC is not set +# CONFIG_FCOE is not set # CONFIG_SCSI_DMX3191D is not set # CONFIG_SCSI_FUTURE_DOMAIN is not set # CONFIG_SCSI_IPS is not set @@ -564,6 +583,9 @@ CONFIG_PHYLIB=m # CONFIG_BROADCOM_PHY is not set # CONFIG_ICPLUS_PHY is not set # CONFIG_REALTEK_PHY is not set +# CONFIG_NATIONAL_PHY is not set +# CONFIG_STE10XP is not set +# CONFIG_LSI_ET1011C_PHY is not set # CONFIG_MDIO_BITBANG is not set CONFIG_NET_ETHERNET=y CONFIG_MII=m @@ -590,7 +612,6 @@ CONFIG_NET_PCI=y # CONFIG_ADAPTEC_STARFIRE is not set # CONFIG_B44 is not set # CONFIG_FORCEDETH is not set -# CONFIG_EEPRO100 is not set # CONFIG_E100 is not set # CONFIG_FEALNX is not set # CONFIG_NATSEMI is not set @@ -600,6 +621,7 @@ CONFIG_NET_PCI=y # CONFIG_R6040 is not set # CONFIG_SIS900 is not set # CONFIG_EPIC100 is not set +# CONFIG_SMSC9420 is not set # CONFIG_SUNDANCE is not set # CONFIG_TLAN is not set # CONFIG_VIA_RHINE is not set @@ -629,6 +651,7 @@ CONFIG_BNX2=m # CONFIG_JME is not set CONFIG_NETDEV_10000=y # CONFIG_CHELSIO_T1 is not set +CONFIG_CHELSIO_T3_DEPENDS=y # CONFIG_CHELSIO_T3 is not set # CONFIG_ENIC is not set # CONFIG_IXGBE is not set @@ -778,6 +801,7 @@ CONFIG_SERIAL_CORE=y CONFIG_SERIAL_CORE_CONSOLE=y # CONFIG_SERIAL_JSM is not set CONFIG_UNIX98_PTYS=y +# CONFIG_DEVPTS_MULTIPLE_INSTANCES is not set # CONFIG_LEGACY_PTYS is not set # CONFIG_IPMI_HANDLER is not set CONFIG_HW_RANDOM=m @@ -870,6 +894,7 @@ CONFIG_HWMON=y # CONFIG_SENSORS_ADM1029 is not set # CONFIG_SENSORS_ADM1031 is not set # CONFIG_SENSORS_ADM9240 is not set +# CONFIG_SENSORS_ADT7462 is not set # CONFIG_SENSORS_ADT7470 is not set # CONFIG_SENSORS_ADT7473 is not set # CONFIG_SENSORS_ATXP1 is not set @@ -919,11 +944,11 @@ CONFIG_HWMON=y # CONFIG_THERMAL is not set # CONFIG_THERMAL_HWMON is not set # CONFIG_WATCHDOG is not set +CONFIG_SSB_POSSIBLE=y # # Sonics Silicon Backplane # -CONFIG_SSB_POSSIBLE=y # CONFIG_SSB is not set # @@ -1071,6 +1096,7 @@ CONFIG_SND_MIXER_OSS=m CONFIG_SND_PCM_OSS=m CONFIG_SND_PCM_OSS_PLUGINS=y CONFIG_SND_SEQUENCER_OSS=y +# CONFIG_SND_HRTIMER is not set # CONFIG_SND_DYNAMIC_MINORS is not set CONFIG_SND_SUPPORT_OLD_API=y CONFIG_SND_VERBOSE_PROCFS=y @@ -1242,11 +1268,11 @@ CONFIG_USB_UHCI_HCD=m # CONFIG_USB_TMC is not set # -# NOTE: USB_STORAGE enables SCSI, and 'SCSI disk support' +# NOTE: USB_STORAGE depends on SCSI but BLK_DEV_SD may also be needed; # # -# may also be needed; see USB_STORAGE Help for more information +# see USB_STORAGE Help for more information # CONFIG_USB_STORAGE=m # CONFIG_USB_STORAGE_DEBUG is not set @@ -1337,6 +1363,7 @@ CONFIG_RTC_INTF_DEV=y # CONFIG_RTC_DRV_M41T80 is not set # CONFIG_RTC_DRV_S35390A is not set # CONFIG_RTC_DRV_FM3130 is not set +# CONFIG_RTC_DRV_RX8581 is not set # # SPI RTC drivers @@ -1365,7 +1392,6 @@ CONFIG_RTC_DRV_STARFIRE=y # CONFIG_DMADEVICES is not set # CONFIG_UIO is not set # CONFIG_STAGING is not set -CONFIG_STAGING_EXCLUDE_BUILD=y # # Misc Linux/SPARC drivers @@ -1544,6 +1570,7 @@ CONFIG_SCHEDSTATS=y # CONFIG_LOCK_STAT is not set # CONFIG_DEBUG_SPINLOCK_SLEEP is not set # CONFIG_DEBUG_LOCKING_API_SELFTESTS is not set +CONFIG_STACKTRACE=y # CONFIG_DEBUG_KOBJECT is not set CONFIG_DEBUG_BUGVERBOSE=y # CONFIG_DEBUG_INFO is not set @@ -1552,6 +1579,7 @@ CONFIG_DEBUG_BUGVERBOSE=y CONFIG_DEBUG_MEMORY_INIT=y # CONFIG_DEBUG_LIST is not set # CONFIG_DEBUG_SG is not set +# CONFIG_DEBUG_NOTIFIERS is not set # CONFIG_BOOT_PRINTK_DELAY is not set # CONFIG_RCU_TORTURE_TEST is not set # CONFIG_RCU_CPU_STALL_DETECTOR is not set @@ -1560,8 +1588,12 @@ CONFIG_DEBUG_MEMORY_INIT=y # CONFIG_DEBUG_BLOCK_EXT_DEVT is not set # CONFIG_LKDTM is not set # CONFIG_FAULT_INJECTION is not set +# CONFIG_LATENCYTOP is not set CONFIG_SYSCTL_SYSCALL_CHECK=y +CONFIG_NOP_TRACER=y CONFIG_HAVE_FUNCTION_TRACER=y +CONFIG_RING_BUFFER=y +CONFIG_TRACING=y # # Tracers @@ -1571,7 +1603,9 @@ CONFIG_HAVE_FUNCTION_TRACER=y # CONFIG_SCHED_TRACER is not set # CONFIG_CONTEXT_SWITCH_TRACER is not set # CONFIG_BOOT_TRACER is not set +# CONFIG_TRACE_BRANCH_PROFILING is not set # CONFIG_STACK_TRACER is not set +# CONFIG_FTRACE_STARTUP_TEST is not set # CONFIG_DYNAMIC_PRINTK_DEBUG is not set # CONFIG_SAMPLES is not set CONFIG_HAVE_ARCH_KGDB=y @@ -1600,11 +1634,16 @@ CONFIG_CRYPTO=y # # CONFIG_CRYPTO_FIPS is not set CONFIG_CRYPTO_ALGAPI=y +CONFIG_CRYPTO_ALGAPI2=y CONFIG_CRYPTO_AEAD=y +CONFIG_CRYPTO_AEAD2=y CONFIG_CRYPTO_BLKCIPHER=y +CONFIG_CRYPTO_BLKCIPHER2=y CONFIG_CRYPTO_HASH=y -CONFIG_CRYPTO_RNG=y +CONFIG_CRYPTO_HASH2=y +CONFIG_CRYPTO_RNG2=y CONFIG_CRYPTO_MANAGER=y +CONFIG_CRYPTO_MANAGER2=y CONFIG_CRYPTO_GF128MUL=m CONFIG_CRYPTO_NULL=m # CONFIG_CRYPTO_CRYPTD is not set diff --git a/arch/sparc/include/asm/Kbuild b/arch/sparc/include/asm/Kbuild index 89c260aab45..deeb0fba802 100644 --- a/arch/sparc/include/asm/Kbuild +++ b/arch/sparc/include/asm/Kbuild @@ -1,21 +1,6 @@ # User exported sparc header files include include/asm-generic/Kbuild.asm -header-y += ipcbuf_32.h -header-y += ipcbuf_64.h -header-y += posix_types_32.h -header-y += posix_types_64.h -header-y += ptrace_32.h -header-y += ptrace_64.h -header-y += sigcontext_32.h -header-y += sigcontext_64.h -header-y += siginfo_32.h -header-y += siginfo_64.h -header-y += signal_32.h -header-y += signal_64.h -header-y += stat_32.h -header-y += stat_64.h - header-y += apc.h header-y += asi.h header-y += display7seg.h @@ -23,16 +8,11 @@ header-y += envctrl.h header-y += fbio.h header-y += jsflash.h header-y += openprom.h -header-y += openprom_32.h -header-y += openprom_64.h header-y += openpromio.h header-y += perfctr.h header-y += psrcompat.h header-y += psr.h header-y += pstate.h -header-y += reg.h -header-y += reg_32.h -header-y += reg_64.h header-y += traps.h header-y += uctx.h header-y += utrap.h diff --git a/arch/sparc/include/asm/byteorder.h b/arch/sparc/include/asm/byteorder.h index 5a70f137f1f..738414b2655 100644 --- a/arch/sparc/include/asm/byteorder.h +++ b/arch/sparc/include/asm/byteorder.h @@ -1,16 +1,12 @@ #ifndef _SPARC_BYTEORDER_H #define _SPARC_BYTEORDER_H -#include <asm/types.h> +#include <linux/types.h> #include <asm/asi.h> #define __BIG_ENDIAN -#ifdef CONFIG_SPARC32 -#define __SWAB_64_THRU_32__ -#endif - -#ifdef CONFIG_SPARC64 +#if defined(__sparc__) && defined(__arch64__) static inline __u16 __arch_swab16p(const __u16 *addr) { __u16 ret; @@ -44,7 +40,9 @@ static inline __u64 __arch_swab64p(const __u64 *addr) } #define __arch_swab64p __arch_swab64p -#endif /* CONFIG_SPARC64 */ +#else +#define __SWAB_64_THRU_32__ +#endif /* defined(__sparc__) && defined(__arch64__) */ #include <linux/byteorder.h> diff --git a/arch/sparc/include/asm/ipcbuf.h b/arch/sparc/include/asm/ipcbuf.h index 17d6ef7b23a..66013b4fe10 100644 --- a/arch/sparc/include/asm/ipcbuf.h +++ b/arch/sparc/include/asm/ipcbuf.h @@ -1,8 +1,32 @@ -#ifndef ___ASM_SPARC_IPCBUF_H -#define ___ASM_SPARC_IPCBUF_H -#if defined(__sparc__) && defined(__arch64__) -#include <asm/ipcbuf_64.h> -#else -#include <asm/ipcbuf_32.h> -#endif +#ifndef __SPARC_IPCBUF_H +#define __SPARC_IPCBUF_H + +/* + * The ipc64_perm structure for sparc/sparc64 architecture. + * Note extra padding because this structure is passed back and forth + * between kernel and user space. + * + * Pad space is left for: + * - 32-bit seq + * - on sparc for 32 bit mode (it is 32 bit on sparc64) + * - 2 miscellaneous 64-bit values + */ + +struct ipc64_perm +{ + __kernel_key_t key; + __kernel_uid_t uid; + __kernel_gid_t gid; + __kernel_uid_t cuid; + __kernel_gid_t cgid; +#ifndef __arch64__ + unsigned short __pad0; #endif + __kernel_mode_t mode; + unsigned short __pad1; + unsigned short seq; + unsigned long long __unused1; + unsigned long long __unused2; +}; + +#endif /* __SPARC_IPCBUF_H */ diff --git a/arch/sparc/include/asm/ipcbuf_32.h b/arch/sparc/include/asm/ipcbuf_32.h deleted file mode 100644 index 6387209518f..00000000000 --- a/arch/sparc/include/asm/ipcbuf_32.h +++ /dev/null @@ -1,31 +0,0 @@ -#ifndef _SPARC_IPCBUF_H -#define _SPARC_IPCBUF_H - -/* - * The ipc64_perm structure for sparc architecture. - * Note extra padding because this structure is passed back and forth - * between kernel and user space. - * - * Pad space is left for: - * - 32-bit mode - * - 32-bit seq - * - 2 miscellaneous 64-bit values (so that this structure matches - * sparc64 ipc64_perm) - */ - -struct ipc64_perm -{ - __kernel_key_t key; - __kernel_uid32_t uid; - __kernel_gid32_t gid; - __kernel_uid32_t cuid; - __kernel_gid32_t cgid; - unsigned short __pad1; - __kernel_mode_t mode; - unsigned short __pad2; - unsigned short seq; - unsigned long long __unused1; - unsigned long long __unused2; -}; - -#endif /* _SPARC_IPCBUF_H */ diff --git a/arch/sparc/include/asm/ipcbuf_64.h b/arch/sparc/include/asm/ipcbuf_64.h deleted file mode 100644 index a44b855b98d..00000000000 --- a/arch/sparc/include/asm/ipcbuf_64.h +++ /dev/null @@ -1,28 +0,0 @@ -#ifndef _SPARC64_IPCBUF_H -#define _SPARC64_IPCBUF_H - -/* - * The ipc64_perm structure for sparc64 architecture. - * Note extra padding because this structure is passed back and forth - * between kernel and user space. - * - * Pad space is left for: - * - 32-bit seq - * - 2 miscellaneous 64-bit values - */ - -struct ipc64_perm -{ - __kernel_key_t key; - __kernel_uid_t uid; - __kernel_gid_t gid; - __kernel_uid_t cuid; - __kernel_gid_t cgid; - __kernel_mode_t mode; - unsigned short __pad1; - unsigned short seq; - unsigned long __unused1; - unsigned long __unused2; -}; - -#endif /* _SPARC64_IPCBUF_H */ diff --git a/arch/sparc/include/asm/jsflash.h b/arch/sparc/include/asm/jsflash.h index 3457f29bd73..0717d9e39d2 100644 --- a/arch/sparc/include/asm/jsflash.h +++ b/arch/sparc/include/asm/jsflash.h @@ -8,7 +8,7 @@ #define _SPARC_JSFLASH_H #ifndef _SPARC_TYPES_H -#include <asm/types.h> +#include <linux/types.h> #endif /* diff --git a/arch/sparc/include/asm/openprom.h b/arch/sparc/include/asm/openprom.h index aaeae905ed3..963e1a45c35 100644 --- a/arch/sparc/include/asm/openprom.h +++ b/arch/sparc/include/asm/openprom.h @@ -1,8 +1,277 @@ -#ifndef ___ASM_SPARC_OPENPROM_H -#define ___ASM_SPARC_OPENPROM_H +#ifndef __SPARC_OPENPROM_H +#define __SPARC_OPENPROM_H + +/* openprom.h: Prom structures and defines for access to the OPENBOOT + * prom routines and data areas. + * + * Copyright (C) 1995,1996 David S. Miller (davem@caip.rutgers.edu) + */ + +/* Empirical constants... */ +#define LINUX_OPPROM_MAGIC 0x10010407 + +#ifndef __ASSEMBLY__ +/* V0 prom device operations. */ +struct linux_dev_v0_funcs { + int (*v0_devopen)(char *device_str); + int (*v0_devclose)(int dev_desc); + int (*v0_rdblkdev)(int dev_desc, int num_blks, int blk_st, char *buf); + int (*v0_wrblkdev)(int dev_desc, int num_blks, int blk_st, char *buf); + int (*v0_wrnetdev)(int dev_desc, int num_bytes, char *buf); + int (*v0_rdnetdev)(int dev_desc, int num_bytes, char *buf); + int (*v0_rdchardev)(int dev_desc, int num_bytes, int dummy, char *buf); + int (*v0_wrchardev)(int dev_desc, int num_bytes, int dummy, char *buf); + int (*v0_seekdev)(int dev_desc, long logical_offst, int from); +}; + +/* V2 and later prom device operations. */ +struct linux_dev_v2_funcs { + int (*v2_inst2pkg)(int d); /* Convert ihandle to phandle */ + char * (*v2_dumb_mem_alloc)(char *va, unsigned sz); + void (*v2_dumb_mem_free)(char *va, unsigned sz); + + /* To map devices into virtual I/O space. */ + char * (*v2_dumb_mmap)(char *virta, int which_io, unsigned paddr, unsigned sz); + void (*v2_dumb_munmap)(char *virta, unsigned size); + + int (*v2_dev_open)(char *devpath); + void (*v2_dev_close)(int d); + int (*v2_dev_read)(int d, char *buf, int nbytes); + int (*v2_dev_write)(int d, char *buf, int nbytes); + int (*v2_dev_seek)(int d, int hi, int lo); + + /* Never issued (multistage load support) */ + void (*v2_wheee2)(void); + void (*v2_wheee3)(void); +}; + +struct linux_mlist_v0 { + struct linux_mlist_v0 *theres_more; + unsigned int start_adr; + unsigned num_bytes; +}; + +struct linux_mem_v0 { + struct linux_mlist_v0 **v0_totphys; + struct linux_mlist_v0 **v0_prommap; + struct linux_mlist_v0 **v0_available; /* What we can use */ +}; + +/* Arguments sent to the kernel from the boot prompt. */ +struct linux_arguments_v0 { + char *argv[8]; + char args[100]; + char boot_dev[2]; + int boot_dev_ctrl; + int boot_dev_unit; + int dev_partition; + char *kernel_file_name; + void *aieee1; /* XXX */ +}; + +/* V2 and up boot things. */ +struct linux_bootargs_v2 { + char **bootpath; + char **bootargs; + int *fd_stdin; + int *fd_stdout; +}; + +/* The top level PROM vector. */ +struct linux_romvec { + /* Version numbers. */ + unsigned int pv_magic_cookie; + unsigned int pv_romvers; + unsigned int pv_plugin_revision; + unsigned int pv_printrev; + + /* Version 0 memory descriptors. */ + struct linux_mem_v0 pv_v0mem; + + /* Node operations. */ + struct linux_nodeops *pv_nodeops; + + char **pv_bootstr; + struct linux_dev_v0_funcs pv_v0devops; + + char *pv_stdin; + char *pv_stdout; +#define PROMDEV_KBD 0 /* input from keyboard */ +#define PROMDEV_SCREEN 0 /* output to screen */ +#define PROMDEV_TTYA 1 /* in/out to ttya */ +#define PROMDEV_TTYB 2 /* in/out to ttyb */ + + /* Blocking getchar/putchar. NOT REENTRANT! (grr) */ + int (*pv_getchar)(void); + void (*pv_putchar)(int ch); + + /* Non-blocking variants. */ + int (*pv_nbgetchar)(void); + int (*pv_nbputchar)(int ch); + + void (*pv_putstr)(char *str, int len); + + /* Miscellany. */ + void (*pv_reboot)(char *bootstr); + void (*pv_printf)(__const__ char *fmt, ...); + void (*pv_abort)(void); + __volatile__ int *pv_ticks; + void (*pv_halt)(void); + void (**pv_synchook)(void); + + /* Evaluate a forth string, not different proto for V0 and V2->up. */ + union { + void (*v0_eval)(int len, char *str); + void (*v2_eval)(char *str); + } pv_fortheval; + + struct linux_arguments_v0 **pv_v0bootargs; + + /* Get ether address. */ + unsigned int (*pv_enaddr)(int d, char *enaddr); + + struct linux_bootargs_v2 pv_v2bootargs; + struct linux_dev_v2_funcs pv_v2devops; + + int filler[15]; + + /* This one is sun4c/sun4 only. */ + void (*pv_setctxt)(int ctxt, char *va, int pmeg); + + /* Prom version 3 Multiprocessor routines. This stuff is crazy. + * No joke. Calling these when there is only one cpu probably + * crashes the machine, have to test this. :-) + */ + + /* v3_cpustart() will start the cpu 'whichcpu' in mmu-context + * 'thiscontext' executing at address 'prog_counter' + */ + int (*v3_cpustart)(unsigned int whichcpu, int ctxtbl_ptr, + int thiscontext, char *prog_counter); + + /* v3_cpustop() will cause cpu 'whichcpu' to stop executing + * until a resume cpu call is made. + */ + int (*v3_cpustop)(unsigned int whichcpu); + + /* v3_cpuidle() will idle cpu 'whichcpu' until a stop or + * resume cpu call is made. + */ + int (*v3_cpuidle)(unsigned int whichcpu); + + /* v3_cpuresume() will resume processor 'whichcpu' executing + * starting with whatever 'pc' and 'npc' were left at the + * last 'idle' or 'stop' call. + */ + int (*v3_cpuresume)(unsigned int whichcpu); +}; + +/* Routines for traversing the prom device tree. */ +struct linux_nodeops { + int (*no_nextnode)(int node); + int (*no_child)(int node); + int (*no_proplen)(int node, const char *name); + int (*no_getprop)(int node, const char *name, char *val); + int (*no_setprop)(int node, const char *name, char *val, int len); + char * (*no_nextprop)(int node, char *name); +}; + +/* More fun PROM structures for device probing. */ #if defined(__sparc__) && defined(__arch64__) -#include <asm/openprom_64.h> +#define PROMREG_MAX 24 +#define PROMVADDR_MAX 16 +#define PROMINTR_MAX 32 #else -#include <asm/openprom_32.h> +#define PROMREG_MAX 16 +#define PROMVADDR_MAX 16 +#define PROMINTR_MAX 15 #endif + +struct linux_prom_registers { + unsigned int which_io; /* hi part of physical address */ + unsigned int phys_addr; /* The physical address of this register */ + unsigned int reg_size; /* How many bytes does this register take up? */ +}; + +struct linux_prom64_registers { + unsigned long phys_addr; + unsigned long reg_size; +}; + +struct linux_prom_irqs { + int pri; /* IRQ priority */ + int vector; /* This is foobar, what does it do? */ +}; + +/* Element of the "ranges" vector */ +struct linux_prom_ranges { + unsigned int ot_child_space; + unsigned int ot_child_base; /* Bus feels this */ + unsigned int ot_parent_space; + unsigned int ot_parent_base; /* CPU looks from here */ + unsigned int or_size; +}; + +/* + * Ranges and reg properties are a bit different for PCI. + */ +#if defined(__sparc__) && defined(__arch64__) +struct linux_prom_pci_registers { + unsigned int phys_hi; + unsigned int phys_mid; + unsigned int phys_lo; + + unsigned int size_hi; + unsigned int size_lo; +}; +#else +struct linux_prom_pci_registers { + /* + * We don't know what information this field contain. + * We guess, PCI device function is in bits 15:8 + * So, ... + */ + unsigned int which_io; /* Let it be which_io */ + + unsigned int phys_hi; + unsigned int phys_lo; + + unsigned int size_hi; + unsigned int size_lo; +}; + #endif + +struct linux_prom_pci_ranges { + unsigned int child_phys_hi; /* Only certain bits are encoded here. */ + unsigned int child_phys_mid; + unsigned int child_phys_lo; + + unsigned int parent_phys_hi; + unsigned int parent_phys_lo; + + unsigned int size_hi; + unsigned int size_lo; +}; + +struct linux_prom_pci_intmap { + unsigned int phys_hi; + unsigned int phys_mid; + unsigned int phys_lo; + + unsigned int interrupt; + + int cnode; + unsigned int cinterrupt; +}; + +struct linux_prom_pci_intmask { + unsigned int phys_hi; + unsigned int phys_mid; + unsigned int phys_lo; + unsigned int interrupt; +}; + +#endif /* !(__ASSEMBLY__) */ + +#endif /* !(__SPARC_OPENPROM_H) */ diff --git a/arch/sparc/include/asm/openprom_32.h b/arch/sparc/include/asm/openprom_32.h deleted file mode 100644 index 875da3552d8..00000000000 --- a/arch/sparc/include/asm/openprom_32.h +++ /dev/null @@ -1,255 +0,0 @@ -#ifndef __SPARC_OPENPROM_H -#define __SPARC_OPENPROM_H - -/* openprom.h: Prom structures and defines for access to the OPENBOOT - * prom routines and data areas. - * - * Copyright (C) 1995 David S. Miller (davem@caip.rutgers.edu) - */ - -/* Empirical constants... */ -#define LINUX_OPPROM_MAGIC 0x10010407 - -#ifndef __ASSEMBLY__ -/* V0 prom device operations. */ -struct linux_dev_v0_funcs { - int (*v0_devopen)(char *device_str); - int (*v0_devclose)(int dev_desc); - int (*v0_rdblkdev)(int dev_desc, int num_blks, int blk_st, char *buf); - int (*v0_wrblkdev)(int dev_desc, int num_blks, int blk_st, char *buf); - int (*v0_wrnetdev)(int dev_desc, int num_bytes, char *buf); - int (*v0_rdnetdev)(int dev_desc, int num_bytes, char *buf); - int (*v0_rdchardev)(int dev_desc, int num_bytes, int dummy, char *buf); - int (*v0_wrchardev)(int dev_desc, int num_bytes, int dummy, char *buf); - int (*v0_seekdev)(int dev_desc, long logical_offst, int from); -}; - -/* V2 and later prom device operations. */ -struct linux_dev_v2_funcs { - int (*v2_inst2pkg)(int d); /* Convert ihandle to phandle */ - char * (*v2_dumb_mem_alloc)(char *va, unsigned sz); - void (*v2_dumb_mem_free)(char *va, unsigned sz); - - /* To map devices into virtual I/O space. */ - char * (*v2_dumb_mmap)(char *virta, int which_io, unsigned paddr, unsigned sz); - void (*v2_dumb_munmap)(char *virta, unsigned size); - - int (*v2_dev_open)(char *devpath); - void (*v2_dev_close)(int d); - int (*v2_dev_read)(int d, char *buf, int nbytes); - int (*v2_dev_write)(int d, char *buf, int nbytes); - int (*v2_dev_seek)(int d, int hi, int lo); - - /* Never issued (multistage load support) */ - void (*v2_wheee2)(void); - void (*v2_wheee3)(void); -}; - -struct linux_mlist_v0 { - struct linux_mlist_v0 *theres_more; - char *start_adr; - unsigned num_bytes; -}; - -struct linux_mem_v0 { - struct linux_mlist_v0 **v0_totphys; - struct linux_mlist_v0 **v0_prommap; - struct linux_mlist_v0 **v0_available; /* What we can use */ -}; - -/* Arguments sent to the kernel from the boot prompt. */ -struct linux_arguments_v0 { - char *argv[8]; - char args[100]; - char boot_dev[2]; - int boot_dev_ctrl; - int boot_dev_unit; - int dev_partition; - char *kernel_file_name; - void *aieee1; /* XXX */ -}; - -/* V2 and up boot things. */ -struct linux_bootargs_v2 { - char **bootpath; - char **bootargs; - int *fd_stdin; - int *fd_stdout; -}; - -/* The top level PROM vector. */ -struct linux_romvec { - /* Version numbers. */ - unsigned int pv_magic_cookie; - unsigned int pv_romvers; - unsigned int pv_plugin_revision; - unsigned int pv_printrev; - - /* Version 0 memory descriptors. */ - struct linux_mem_v0 pv_v0mem; - - /* Node operations. */ - struct linux_nodeops *pv_nodeops; - - char **pv_bootstr; - struct linux_dev_v0_funcs pv_v0devops; - - char *pv_stdin; - char *pv_stdout; -#define PROMDEV_KBD 0 /* input from keyboard */ -#define PROMDEV_SCREEN 0 /* output to screen */ -#define PROMDEV_TTYA 1 /* in/out to ttya */ -#define PROMDEV_TTYB 2 /* in/out to ttyb */ - - /* Blocking getchar/putchar. NOT REENTRANT! (grr) */ - int (*pv_getchar)(void); - void (*pv_putchar)(int ch); - - /* Non-blocking variants. */ - int (*pv_nbgetchar)(void); - int (*pv_nbputchar)(int ch); - - void (*pv_putstr)(char *str, int len); - - /* Miscellany. */ - void (*pv_reboot)(char *bootstr); - void (*pv_printf)(__const__ char *fmt, ...); - void (*pv_abort)(void); - __volatile__ int *pv_ticks; - void (*pv_halt)(void); - void (**pv_synchook)(void); - - /* Evaluate a forth string, not different proto for V0 and V2->up. */ - union { - void (*v0_eval)(int len, char *str); - void (*v2_eval)(char *str); - } pv_fortheval; - - struct linux_arguments_v0 **pv_v0bootargs; - - /* Get ether address. */ - unsigned int (*pv_enaddr)(int d, char *enaddr); - - struct linux_bootargs_v2 pv_v2bootargs; - struct linux_dev_v2_funcs pv_v2devops; - - int filler[15]; - - /* This one is sun4c/sun4 only. */ - void (*pv_setctxt)(int ctxt, char *va, int pmeg); - - /* Prom version 3 Multiprocessor routines. This stuff is crazy. - * No joke. Calling these when there is only one cpu probably - * crashes the machine, have to test this. :-) - */ - - /* v3_cpustart() will start the cpu 'whichcpu' in mmu-context - * 'thiscontext' executing at address 'prog_counter' - */ - int (*v3_cpustart)(unsigned int whichcpu, int ctxtbl_ptr, - int thiscontext, char *prog_counter); - - /* v3_cpustop() will cause cpu 'whichcpu' to stop executing - * until a resume cpu call is made. - */ - int (*v3_cpustop)(unsigned int whichcpu); - - /* v3_cpuidle() will idle cpu 'whichcpu' until a stop or - * resume cpu call is made. - */ - int (*v3_cpuidle)(unsigned int whichcpu); - - /* v3_cpuresume() will resume processor 'whichcpu' executing - * starting with whatever 'pc' and 'npc' were left at the - * last 'idle' or 'stop' call. - */ - int (*v3_cpuresume)(unsigned int whichcpu); -}; - -/* Routines for traversing the prom device tree. */ -struct linux_nodeops { - int (*no_nextnode)(int node); - int (*no_child)(int node); - int (*no_proplen)(int node, const char *name); - int (*no_getprop)(int node, const char *name, char *val); - int (*no_setprop)(int node, const char *name, char *val, int len); - char * (*no_nextprop)(int node, char *name); -}; - -/* More fun PROM structures for device probing. */ -#define PROMREG_MAX 16 -#define PROMVADDR_MAX 16 -#define PROMINTR_MAX 15 - -struct linux_prom_registers { - unsigned int which_io; /* is this in OBIO space? */ - unsigned int phys_addr; /* The physical address of this register */ - unsigned int reg_size; /* How many bytes does this register take up? */ -}; - -struct linux_prom_irqs { - int pri; /* IRQ priority */ - int vector; /* This is foobar, what does it do? */ -}; - -/* Element of the "ranges" vector */ -struct linux_prom_ranges { - unsigned int ot_child_space; - unsigned int ot_child_base; /* Bus feels this */ - unsigned int ot_parent_space; - unsigned int ot_parent_base; /* CPU looks from here */ - unsigned int or_size; -}; - -/* Ranges and reg properties are a bit different for PCI. */ -struct linux_prom_pci_registers { - /* - * We don't know what information this field contain. - * We guess, PCI device function is in bits 15:8 - * So, ... - */ - unsigned int which_io; /* Let it be which_io */ - - unsigned int phys_hi; - unsigned int phys_lo; - - unsigned int size_hi; - unsigned int size_lo; -}; - -struct linux_prom_pci_ranges { - unsigned int child_phys_hi; /* Only certain bits are encoded here. */ - unsigned int child_phys_mid; - unsigned int child_phys_lo; - - unsigned int parent_phys_hi; - unsigned int parent_phys_lo; - - unsigned int size_hi; - unsigned int size_lo; -}; - -struct linux_prom_pci_assigned_addresses { - unsigned int which_io; - - unsigned int phys_hi; - unsigned int phys_lo; - - unsigned int size_hi; - unsigned int size_lo; -}; - -struct linux_prom_ebus_ranges { - unsigned int child_phys_hi; - unsigned int child_phys_lo; - - unsigned int parent_phys_hi; - unsigned int parent_phys_mid; - unsigned int parent_phys_lo; - - unsigned int size; -}; - -#endif /* !(__ASSEMBLY__) */ - -#endif /* !(__SPARC_OPENPROM_H) */ diff --git a/arch/sparc/include/asm/openprom_64.h b/arch/sparc/include/asm/openprom_64.h deleted file mode 100644 index b69e4a8c917..00000000000 --- a/arch/sparc/include/asm/openprom_64.h +++ /dev/null @@ -1,280 +0,0 @@ -#ifndef __SPARC64_OPENPROM_H -#define __SPARC64_OPENPROM_H - -/* openprom.h: Prom structures and defines for access to the OPENBOOT - * prom routines and data areas. - * - * Copyright (C) 1996 David S. Miller (davem@caip.rutgers.edu) - */ - -#ifndef __ASSEMBLY__ -/* V0 prom device operations. */ -struct linux_dev_v0_funcs { - int (*v0_devopen)(char *device_str); - int (*v0_devclose)(int dev_desc); - int (*v0_rdblkdev)(int dev_desc, int num_blks, int blk_st, char *buf); - int (*v0_wrblkdev)(int dev_desc, int num_blks, int blk_st, char *buf); - int (*v0_wrnetdev)(int dev_desc, int num_bytes, char *buf); - int (*v0_rdnetdev)(int dev_desc, int num_bytes, char *buf); - int (*v0_rdchardev)(int dev_desc, int num_bytes, int dummy, char *buf); - int (*v0_wrchardev)(int dev_desc, int num_bytes, int dummy, char *buf); - int (*v0_seekdev)(int dev_desc, long logical_offst, int from); -}; - -/* V2 and later prom device operations. */ -struct linux_dev_v2_funcs { - int (*v2_inst2pkg)(int d); /* Convert ihandle to phandle */ - char * (*v2_dumb_mem_alloc)(char *va, unsigned sz); - void (*v2_dumb_mem_free)(char *va, unsigned sz); - - /* To map devices into virtual I/O space. */ - char * (*v2_dumb_mmap)(char *virta, int which_io, unsigned paddr, unsigned sz); - void (*v2_dumb_munmap)(char *virta, unsigned size); - - int (*v2_dev_open)(char *devpath); - void (*v2_dev_close)(int d); - int (*v2_dev_read)(int d, char *buf, int nbytes); - int (*v2_dev_write)(int d, char *buf, int nbytes); - int (*v2_dev_seek)(int d, int hi, int lo); - - /* Never issued (multistage load support) */ - void (*v2_wheee2)(void); - void (*v2_wheee3)(void); -}; - -struct linux_mlist_v0 { - struct linux_mlist_v0 *theres_more; - unsigned start_adr; - unsigned num_bytes; -}; - -struct linux_mem_v0 { - struct linux_mlist_v0 **v0_totphys; - struct linux_mlist_v0 **v0_prommap; - struct linux_mlist_v0 **v0_available; /* What we can use */ -}; - -/* Arguments sent to the kernel from the boot prompt. */ -struct linux_arguments_v0 { - char *argv[8]; - char args[100]; - char boot_dev[2]; - int boot_dev_ctrl; - int boot_dev_unit; - int dev_partition; - char *kernel_file_name; - void *aieee1; /* XXX */ -}; - -/* V2 and up boot things. */ -struct linux_bootargs_v2 { - char **bootpath; - char **bootargs; - int *fd_stdin; - int *fd_stdout; -}; - -/* The top level PROM vector. */ -struct linux_romvec { - /* Version numbers. */ - unsigned int pv_magic_cookie; - unsigned int pv_romvers; - unsigned int pv_plugin_revision; - unsigned int pv_printrev; - - /* Version 0 memory descriptors. */ - struct linux_mem_v0 pv_v0mem; - - /* Node operations. */ - struct linux_nodeops *pv_nodeops; - - char **pv_bootstr; - struct linux_dev_v0_funcs pv_v0devops; - - char *pv_stdin; - char *pv_stdout; -#define PROMDEV_KBD 0 /* input from keyboard */ -#define PROMDEV_SCREEN 0 /* output to screen */ -#define PROMDEV_TTYA 1 /* in/out to ttya */ -#define PROMDEV_TTYB 2 /* in/out to ttyb */ - - /* Blocking getchar/putchar. NOT REENTRANT! (grr) */ - int (*pv_getchar)(void); - void (*pv_putchar)(int ch); - - /* Non-blocking variants. */ - int (*pv_nbgetchar)(void); - int (*pv_nbputchar)(int ch); - - void (*pv_putstr)(char *str, int len); - - /* Miscellany. */ - void (*pv_reboot)(char *bootstr); - void (*pv_printf)(__const__ char *fmt, ...); - void (*pv_abort)(void); - __volatile__ int *pv_ticks; - void (*pv_halt)(void); - void (**pv_synchook)(void); - - /* Evaluate a forth string, not different proto for V0 and V2->up. */ - union { - void (*v0_eval)(int len, char *str); - void (*v2_eval)(char *str); - } pv_fortheval; - - struct linux_arguments_v0 **pv_v0bootargs; - - /* Get ether address. */ - unsigned int (*pv_enaddr)(int d, char *enaddr); - - struct linux_bootargs_v2 pv_v2bootargs; - struct linux_dev_v2_funcs pv_v2devops; - - int filler[15]; - - /* This one is sun4c/sun4 only. */ - void (*pv_setctxt)(int ctxt, char *va, int pmeg); - - /* Prom version 3 Multiprocessor routines. This stuff is crazy. - * No joke. Calling these when there is only one cpu probably - * crashes the machine, have to test this. :-) - */ - - /* v3_cpustart() will start the cpu 'whichcpu' in mmu-context - * 'thiscontext' executing at address 'prog_counter' - */ - int (*v3_cpustart)(unsigned int whichcpu, int ctxtbl_ptr, - int thiscontext, char *prog_counter); - - /* v3_cpustop() will cause cpu 'whichcpu' to stop executing - * until a resume cpu call is made. - */ - int (*v3_cpustop)(unsigned int whichcpu); - - /* v3_cpuidle() will idle cpu 'whichcpu' until a stop or - * resume cpu call is made. - */ - int (*v3_cpuidle)(unsigned int whichcpu); - - /* v3_cpuresume() will resume processor 'whichcpu' executing - * starting with whatever 'pc' and 'npc' were left at the - * last 'idle' or 'stop' call. - */ - int (*v3_cpuresume)(unsigned int whichcpu); -}; - -/* Routines for traversing the prom device tree. */ -struct linux_nodeops { - int (*no_nextnode)(int node); - int (*no_child)(int node); - int (*no_proplen)(int node, char *name); - int (*no_getprop)(int node, char *name, char *val); - int (*no_setprop)(int node, char *name, char *val, int len); - char * (*no_nextprop)(int node, char *name); -}; - -/* More fun PROM structures for device probing. */ -#define PROMREG_MAX 24 -#define PROMVADDR_MAX 16 -#define PROMINTR_MAX 32 - -struct linux_prom_registers { - unsigned which_io; /* hi part of physical address */ - unsigned phys_addr; /* The physical address of this register */ - int reg_size; /* How many bytes does this register take up? */ -}; - -struct linux_prom64_registers { - unsigned long phys_addr; - unsigned long reg_size; -}; - -struct linux_prom_irqs { - int pri; /* IRQ priority */ - int vector; /* This is foobar, what does it do? */ -}; - -/* Element of the "ranges" vector */ -struct linux_prom_ranges { - unsigned int ot_child_space; - unsigned int ot_child_base; /* Bus feels this */ - unsigned int ot_parent_space; - unsigned int ot_parent_base; /* CPU looks from here */ - unsigned int or_size; -}; - -struct linux_prom64_ranges { - unsigned long ot_child_base; /* Bus feels this */ - unsigned long ot_parent_base; /* CPU looks from here */ - unsigned long or_size; -}; - -/* Ranges and reg properties are a bit different for PCI. */ -struct linux_prom_pci_registers { - unsigned int phys_hi; - unsigned int phys_mid; - unsigned int phys_lo; - - unsigned int size_hi; - unsigned int size_lo; -}; - -struct linux_prom_pci_ranges { - unsigned int child_phys_hi; /* Only certain bits are encoded here. */ - unsigned int child_phys_mid; - unsigned int child_phys_lo; - - unsigned int parent_phys_hi; - unsigned int parent_phys_lo; - - unsigned int size_hi; - unsigned int size_lo; -}; - -struct linux_prom_pci_intmap { - unsigned int phys_hi; - unsigned int phys_mid; - unsigned int phys_lo; - - unsigned int interrupt; - - int cnode; - unsigned int cinterrupt; -}; - -struct linux_prom_pci_intmask { - unsigned int phys_hi; - unsigned int phys_mid; - unsigned int phys_lo; - unsigned int interrupt; -}; - -struct linux_prom_ebus_ranges { - unsigned int child_phys_hi; - unsigned int child_phys_lo; - - unsigned int parent_phys_hi; - unsigned int parent_phys_mid; - unsigned int parent_phys_lo; - - unsigned int size; -}; - -struct linux_prom_ebus_intmap { - unsigned int phys_hi; - unsigned int phys_lo; - - unsigned int interrupt; - - int cnode; - unsigned int cinterrupt; -}; - -struct linux_prom_ebus_intmask { - unsigned int phys_hi; - unsigned int phys_lo; - unsigned int interrupt; -}; -#endif /* !(__ASSEMBLY__) */ - -#endif /* !(__SPARC64_OPENPROM_H) */ diff --git a/arch/sparc/include/asm/posix_types.h b/arch/sparc/include/asm/posix_types.h index 03a0e091a88..98d6ebb922f 100644 --- a/arch/sparc/include/asm/posix_types.h +++ b/arch/sparc/include/asm/posix_types.h @@ -1,8 +1,155 @@ -#ifndef ___ASM_SPARC_POSIX_TYPES_H -#define ___ASM_SPARC_POSIX_TYPES_H +/* + * This file is generally used by user-level software, so you need to + * be a little careful about namespace pollution etc. Also, we cannot + * assume GCC is being used. + */ + +#ifndef __SPARC_POSIX_TYPES_H +#define __SPARC_POSIX_TYPES_H + #if defined(__sparc__) && defined(__arch64__) -#include <asm/posix_types_64.h> +/* sparc 64 bit */ +typedef unsigned long __kernel_size_t; +typedef long __kernel_ssize_t; +typedef long __kernel_ptrdiff_t; +typedef long __kernel_time_t; +typedef long __kernel_clock_t; +typedef int __kernel_pid_t; +typedef int __kernel_ipc_pid_t; +typedef unsigned int __kernel_uid_t; +typedef unsigned int __kernel_gid_t; +typedef unsigned long __kernel_ino_t; +typedef unsigned int __kernel_mode_t; +typedef unsigned short __kernel_umode_t; +typedef unsigned int __kernel_nlink_t; +typedef int __kernel_daddr_t; +typedef long __kernel_off_t; +typedef char * __kernel_caddr_t; +typedef unsigned short __kernel_uid16_t; +typedef unsigned short __kernel_gid16_t; +typedef int __kernel_clockid_t; +typedef int __kernel_timer_t; + +typedef unsigned short __kernel_old_uid_t; +typedef unsigned short __kernel_old_gid_t; +typedef __kernel_uid_t __kernel_uid32_t; +typedef __kernel_gid_t __kernel_gid32_t; + +typedef unsigned int __kernel_old_dev_t; + +/* Note this piece of asymmetry from the v9 ABI. */ +typedef int __kernel_suseconds_t; + #else -#include <asm/posix_types_32.h> -#endif +/* sparc 32 bit */ + +typedef unsigned int __kernel_size_t; +typedef int __kernel_ssize_t; +typedef long int __kernel_ptrdiff_t; +typedef long __kernel_time_t; +typedef long __kernel_suseconds_t; +typedef long __kernel_clock_t; +typedef int __kernel_pid_t; +typedef unsigned short __kernel_ipc_pid_t; +typedef unsigned short __kernel_uid_t; +typedef unsigned short __kernel_gid_t; +typedef unsigned long __kernel_ino_t; +typedef unsigned short __kernel_mode_t; +typedef unsigned short __kernel_umode_t; +typedef short __kernel_nlink_t; +typedef long __kernel_daddr_t; +typedef long __kernel_off_t; +typedef char * __kernel_caddr_t; +typedef unsigned short __kernel_uid16_t; +typedef unsigned short __kernel_gid16_t; +typedef unsigned int __kernel_uid32_t; +typedef unsigned int __kernel_gid32_t; +typedef unsigned short __kernel_old_uid_t; +typedef unsigned short __kernel_old_gid_t; +typedef unsigned short __kernel_old_dev_t; +typedef int __kernel_clockid_t; +typedef int __kernel_timer_t; + +#endif /* defined(__sparc__) && defined(__arch64__) */ + +#ifdef __GNUC__ +typedef long long __kernel_loff_t; #endif + +typedef struct { + int val[2]; +} __kernel_fsid_t; + +#ifdef __KERNEL__ + +#undef __FD_SET +static inline void __FD_SET(unsigned long fd, __kernel_fd_set *fdsetp) +{ + unsigned long _tmp = fd / __NFDBITS; + unsigned long _rem = fd % __NFDBITS; + fdsetp->fds_bits[_tmp] |= (1UL<<_rem); +} + +#undef __FD_CLR +static inline void __FD_CLR(unsigned long fd, __kernel_fd_set *fdsetp) +{ + unsigned long _tmp = fd / __NFDBITS; + unsigned long _rem = fd % __NFDBITS; + fdsetp->fds_bits[_tmp] &= ~(1UL<<_rem); +} + +#undef __FD_ISSET +static inline int __FD_ISSET(unsigned long fd, __const__ __kernel_fd_set *p) +{ + unsigned long _tmp = fd / __NFDBITS; + unsigned long _rem = fd % __NFDBITS; + return (p->fds_bits[_tmp] & (1UL<<_rem)) != 0; +} + +/* + * This will unroll the loop for the normal constant cases (8 or 32 longs, + * for 256 and 1024-bit fd_sets respectively) + */ +#undef __FD_ZERO +static inline void __FD_ZERO(__kernel_fd_set *p) +{ + unsigned long *tmp = p->fds_bits; + int i; + + if (__builtin_constant_p(__FDSET_LONGS)) { + switch (__FDSET_LONGS) { + case 32: + tmp[ 0] = 0; tmp[ 1] = 0; tmp[ 2] = 0; tmp[ 3] = 0; + tmp[ 4] = 0; tmp[ 5] = 0; tmp[ 6] = 0; tmp[ 7] = 0; + tmp[ 8] = 0; tmp[ 9] = 0; tmp[10] = 0; tmp[11] = 0; + tmp[12] = 0; tmp[13] = 0; tmp[14] = 0; tmp[15] = 0; + tmp[16] = 0; tmp[17] = 0; tmp[18] = 0; tmp[19] = 0; + tmp[20] = 0; tmp[21] = 0; tmp[22] = 0; tmp[23] = 0; + tmp[24] = 0; tmp[25] = 0; tmp[26] = 0; tmp[27] = 0; + tmp[28] = 0; tmp[29] = 0; tmp[30] = 0; tmp[31] = 0; + return; + case 16: + tmp[ 0] = 0; tmp[ 1] = 0; tmp[ 2] = 0; tmp[ 3] = 0; + tmp[ 4] = 0; tmp[ 5] = 0; tmp[ 6] = 0; tmp[ 7] = 0; + tmp[ 8] = 0; tmp[ 9] = 0; tmp[10] = 0; tmp[11] = 0; + tmp[12] = 0; tmp[13] = 0; tmp[14] = 0; tmp[15] = 0; + return; + case 8: + tmp[ 0] = 0; tmp[ 1] = 0; tmp[ 2] = 0; tmp[ 3] = 0; + tmp[ 4] = 0; tmp[ 5] = 0; tmp[ 6] = 0; tmp[ 7] = 0; + return; + case 4: + tmp[ 0] = 0; tmp[ 1] = 0; tmp[ 2] = 0; tmp[ 3] = 0; + return; + } + } + i = __FDSET_LONGS; + while (i) { + i--; + *tmp = 0; + tmp++; + } +} + +#endif /* __KERNEL__ */ +#endif /* __SPARC_POSIX_TYPES_H */ diff --git a/arch/sparc/include/asm/posix_types_32.h b/arch/sparc/include/asm/posix_types_32.h deleted file mode 100644 index 6bb6eb1ca0f..00000000000 --- a/arch/sparc/include/asm/posix_types_32.h +++ /dev/null @@ -1,118 +0,0 @@ -#ifndef __ARCH_SPARC_POSIX_TYPES_H -#define __ARCH_SPARC_POSIX_TYPES_H - -/* - * This file is generally used by user-level software, so you need to - * be a little careful about namespace pollution etc. Also, we cannot - * assume GCC is being used. - */ - -typedef unsigned int __kernel_size_t; -typedef int __kernel_ssize_t; -typedef long int __kernel_ptrdiff_t; -typedef long __kernel_time_t; -typedef long __kernel_suseconds_t; -typedef long __kernel_clock_t; -typedef int __kernel_pid_t; -typedef unsigned short __kernel_ipc_pid_t; -typedef unsigned short __kernel_uid_t; -typedef unsigned short __kernel_gid_t; -typedef unsigned long __kernel_ino_t; -typedef unsigned short __kernel_mode_t; -typedef unsigned short __kernel_umode_t; -typedef short __kernel_nlink_t; -typedef long __kernel_daddr_t; -typedef long __kernel_off_t; -typedef char * __kernel_caddr_t; -typedef unsigned short __kernel_uid16_t; -typedef unsigned short __kernel_gid16_t; -typedef unsigned int __kernel_uid32_t; -typedef unsigned int __kernel_gid32_t; -typedef unsigned short __kernel_old_uid_t; -typedef unsigned short __kernel_old_gid_t; -typedef unsigned short __kernel_old_dev_t; -typedef int __kernel_clockid_t; -typedef int __kernel_timer_t; - -#ifdef __GNUC__ -typedef long long __kernel_loff_t; -#endif - -typedef struct { - int val[2]; -} __kernel_fsid_t; - -#if defined(__KERNEL__) - -#undef __FD_SET -static inline void __FD_SET(unsigned long fd, __kernel_fd_set *fdsetp) -{ - unsigned long _tmp = fd / __NFDBITS; - unsigned long _rem = fd % __NFDBITS; - fdsetp->fds_bits[_tmp] |= (1UL<<_rem); -} - -#undef __FD_CLR -static inline void __FD_CLR(unsigned long fd, __kernel_fd_set *fdsetp) -{ - unsigned long _tmp = fd / __NFDBITS; - unsigned long _rem = fd % __NFDBITS; - fdsetp->fds_bits[_tmp] &= ~(1UL<<_rem); -} - -#undef __FD_ISSET -static inline int __FD_ISSET(unsigned long fd, __const__ __kernel_fd_set *p) -{ - unsigned long _tmp = fd / __NFDBITS; - unsigned long _rem = fd % __NFDBITS; - return (p->fds_bits[_tmp] & (1UL<<_rem)) != 0; -} - -/* - * This will unroll the loop for the normal constant cases (8 or 32 longs, - * for 256 and 1024-bit fd_sets respectively) - */ -#undef __FD_ZERO -static inline void __FD_ZERO(__kernel_fd_set *p) -{ - unsigned long *tmp = p->fds_bits; - int i; - - if (__builtin_constant_p(__FDSET_LONGS)) { - switch (__FDSET_LONGS) { - case 32: - tmp[ 0] = 0; tmp[ 1] = 0; tmp[ 2] = 0; tmp[ 3] = 0; - tmp[ 4] = 0; tmp[ 5] = 0; tmp[ 6] = 0; tmp[ 7] = 0; - tmp[ 8] = 0; tmp[ 9] = 0; tmp[10] = 0; tmp[11] = 0; - tmp[12] = 0; tmp[13] = 0; tmp[14] = 0; tmp[15] = 0; - tmp[16] = 0; tmp[17] = 0; tmp[18] = 0; tmp[19] = 0; - tmp[20] = 0; tmp[21] = 0; tmp[22] = 0; tmp[23] = 0; - tmp[24] = 0; tmp[25] = 0; tmp[26] = 0; tmp[27] = 0; - tmp[28] = 0; tmp[29] = 0; tmp[30] = 0; tmp[31] = 0; - return; - case 16: - tmp[ 0] = 0; tmp[ 1] = 0; tmp[ 2] = 0; tmp[ 3] = 0; - tmp[ 4] = 0; tmp[ 5] = 0; tmp[ 6] = 0; tmp[ 7] = 0; - tmp[ 8] = 0; tmp[ 9] = 0; tmp[10] = 0; tmp[11] = 0; - tmp[12] = 0; tmp[13] = 0; tmp[14] = 0; tmp[15] = 0; - return; - case 8: - tmp[ 0] = 0; tmp[ 1] = 0; tmp[ 2] = 0; tmp[ 3] = 0; - tmp[ 4] = 0; tmp[ 5] = 0; tmp[ 6] = 0; tmp[ 7] = 0; - return; - case 4: - tmp[ 0] = 0; tmp[ 1] = 0; tmp[ 2] = 0; tmp[ 3] = 0; - return; - } - } - i = __FDSET_LONGS; - while (i) { - i--; - *tmp = 0; - tmp++; - } -} - -#endif /* defined(__KERNEL__) */ - -#endif /* !(__ARCH_SPARC_POSIX_TYPES_H) */ diff --git a/arch/sparc/include/asm/posix_types_64.h b/arch/sparc/include/asm/posix_types_64.h deleted file mode 100644 index ba8f9329576..00000000000 --- a/arch/sparc/include/asm/posix_types_64.h +++ /dev/null @@ -1,122 +0,0 @@ -#ifndef __ARCH_SPARC64_POSIX_TYPES_H -#define __ARCH_SPARC64_POSIX_TYPES_H - -/* - * This file is generally used by user-level software, so you need to - * be a little careful about namespace pollution etc. Also, we cannot - * assume GCC is being used. - */ - -typedef unsigned long __kernel_size_t; -typedef long __kernel_ssize_t; -typedef long __kernel_ptrdiff_t; -typedef long __kernel_time_t; -typedef long __kernel_clock_t; -typedef int __kernel_pid_t; -typedef int __kernel_ipc_pid_t; -typedef unsigned int __kernel_uid_t; -typedef unsigned int __kernel_gid_t; -typedef unsigned long __kernel_ino_t; -typedef unsigned int __kernel_mode_t; -typedef unsigned short __kernel_umode_t; -typedef unsigned int __kernel_nlink_t; -typedef int __kernel_daddr_t; -typedef long __kernel_off_t; -typedef char * __kernel_caddr_t; -typedef unsigned short __kernel_uid16_t; -typedef unsigned short __kernel_gid16_t; -typedef int __kernel_clockid_t; -typedef int __kernel_timer_t; - -typedef unsigned short __kernel_old_uid_t; -typedef unsigned short __kernel_old_gid_t; -typedef __kernel_uid_t __kernel_uid32_t; -typedef __kernel_gid_t __kernel_gid32_t; - -typedef unsigned int __kernel_old_dev_t; - -/* Note this piece of asymmetry from the v9 ABI. */ -typedef int __kernel_suseconds_t; - -#ifdef __GNUC__ -typedef long long __kernel_loff_t; -#endif - -typedef struct { - int val[2]; -} __kernel_fsid_t; - -#if defined(__KERNEL__) - -#undef __FD_SET -static inline void __FD_SET(unsigned long fd, __kernel_fd_set *fdsetp) -{ - unsigned long _tmp = fd / __NFDBITS; - unsigned long _rem = fd % __NFDBITS; - fdsetp->fds_bits[_tmp] |= (1UL<<_rem); -} - -#undef __FD_CLR -static inline void __FD_CLR(unsigned long fd, __kernel_fd_set *fdsetp) -{ - unsigned long _tmp = fd / __NFDBITS; - unsigned long _rem = fd % __NFDBITS; - fdsetp->fds_bits[_tmp] &= ~(1UL<<_rem); -} - -#undef __FD_ISSET -static inline int __FD_ISSET(unsigned long fd, __const__ __kernel_fd_set *p) -{ - unsigned long _tmp = fd / __NFDBITS; - unsigned long _rem = fd % __NFDBITS; - return (p->fds_bits[_tmp] & (1UL<<_rem)) != 0; -} - -/* - * This will unroll the loop for the normal constant cases (8 or 32 longs, - * for 256 and 1024-bit fd_sets respectively) - */ -#undef __FD_ZERO -static inline void __FD_ZERO(__kernel_fd_set *p) -{ - unsigned long *tmp = p->fds_bits; - int i; - - if (__builtin_constant_p(__FDSET_LONGS)) { - switch (__FDSET_LONGS) { - case 32: - tmp[ 0] = 0; tmp[ 1] = 0; tmp[ 2] = 0; tmp[ 3] = 0; - tmp[ 4] = 0; tmp[ 5] = 0; tmp[ 6] = 0; tmp[ 7] = 0; - tmp[ 8] = 0; tmp[ 9] = 0; tmp[10] = 0; tmp[11] = 0; - tmp[12] = 0; tmp[13] = 0; tmp[14] = 0; tmp[15] = 0; - tmp[16] = 0; tmp[17] = 0; tmp[18] = 0; tmp[19] = 0; - tmp[20] = 0; tmp[21] = 0; tmp[22] = 0; tmp[23] = 0; - tmp[24] = 0; tmp[25] = 0; tmp[26] = 0; tmp[27] = 0; - tmp[28] = 0; tmp[29] = 0; tmp[30] = 0; tmp[31] = 0; - return; - case 16: - tmp[ 0] = 0; tmp[ 1] = 0; tmp[ 2] = 0; tmp[ 3] = 0; - tmp[ 4] = 0; tmp[ 5] = 0; tmp[ 6] = 0; tmp[ 7] = 0; - tmp[ 8] = 0; tmp[ 9] = 0; tmp[10] = 0; tmp[11] = 0; - tmp[12] = 0; tmp[13] = 0; tmp[14] = 0; tmp[15] = 0; - return; - case 8: - tmp[ 0] = 0; tmp[ 1] = 0; tmp[ 2] = 0; tmp[ 3] = 0; - tmp[ 4] = 0; tmp[ 5] = 0; tmp[ 6] = 0; tmp[ 7] = 0; - return; - case 4: - tmp[ 0] = 0; tmp[ 1] = 0; tmp[ 2] = 0; tmp[ 3] = 0; - return; - } - } - i = __FDSET_LONGS; - while (i) { - i--; - *tmp = 0; - tmp++; - } -} - -#endif /* defined(__KERNEL__) */ - -#endif /* !(__ARCH_SPARC64_POSIX_TYPES_H) */ diff --git a/arch/sparc/include/asm/processor_32.h b/arch/sparc/include/asm/processor_32.h index 2ae67a2e7f3..09521c6a5ed 100644 --- a/arch/sparc/include/asm/processor_32.h +++ b/arch/sparc/include/asm/processor_32.h @@ -99,7 +99,7 @@ static inline void start_thread(struct pt_regs * regs, unsigned long pc, "st\t%%g0, [%0 + %3 + 0x3c]" : /* no outputs */ : "r" (regs), - "r" (sp - sizeof(struct reg_window)), + "r" (sp - sizeof(struct reg_window32)), "r" (zero), "i" ((const unsigned long)(&((struct pt_regs *)0)->u_regs[0])) : "memory"); diff --git a/arch/sparc/include/asm/ptrace.h b/arch/sparc/include/asm/ptrace.h index 6dcbe2eed2e..30b0b797dc0 100644 --- a/arch/sparc/include/asm/ptrace.h +++ b/arch/sparc/include/asm/ptrace.h @@ -1,8 +1,448 @@ -#ifndef ___ASM_SPARC_PTRACE_H -#define ___ASM_SPARC_PTRACE_H +#ifndef __SPARC_PTRACE_H +#define __SPARC_PTRACE_H + #if defined(__sparc__) && defined(__arch64__) -#include <asm/ptrace_64.h> +/* 64 bit sparc */ +#include <asm/pstate.h> + +/* This struct defines the way the registers are stored on the + * stack during a system call and basically all traps. + */ + +/* This magic value must have the low 9 bits clear, + * as that is where we encode the %tt value, see below. + */ +#define PT_REGS_MAGIC 0x57ac6c00 + +#ifndef __ASSEMBLY__ + +#include <linux/types.h> + +struct pt_regs { + unsigned long u_regs[16]; /* globals and ins */ + unsigned long tstate; + unsigned long tpc; + unsigned long tnpc; + unsigned int y; + + /* We encode a magic number, PT_REGS_MAGIC, along + * with the %tt (trap type) register value at trap + * entry time. The magic number allows us to identify + * accurately a trap stack frame in the stack + * unwinder, and the %tt value allows us to test + * things like "in a system call" etc. for an arbitray + * process. + * + * The PT_REGS_MAGIC is choosen such that it can be + * loaded completely using just a sethi instruction. + */ + unsigned int magic; +}; + +struct pt_regs32 { + unsigned int psr; + unsigned int pc; + unsigned int npc; + unsigned int y; + unsigned int u_regs[16]; /* globals and ins */ +}; + +/* A V9 register window */ +struct reg_window { + unsigned long locals[8]; + unsigned long ins[8]; +}; + +/* A 32-bit register window. */ +struct reg_window32 { + unsigned int locals[8]; + unsigned int ins[8]; +}; + +/* A V9 Sparc stack frame */ +struct sparc_stackf { + unsigned long locals[8]; + unsigned long ins[6]; + struct sparc_stackf *fp; + unsigned long callers_pc; + char *structptr; + unsigned long xargs[6]; + unsigned long xxargs[1]; +}; + +/* A 32-bit Sparc stack frame */ +struct sparc_stackf32 { + unsigned int locals[8]; + unsigned int ins[6]; + unsigned int fp; + unsigned int callers_pc; + unsigned int structptr; + unsigned int xargs[6]; + unsigned int xxargs[1]; +}; + +struct sparc_trapf { + unsigned long locals[8]; + unsigned long ins[8]; + unsigned long _unused; + struct pt_regs *regs; +}; +#endif /* (!__ASSEMBLY__) */ #else -#include <asm/ptrace_32.h> +/* 32 bit sparc */ + +#include <asm/psr.h> + +/* This struct defines the way the registers are stored on the + * stack during a system call and basically all traps. + */ +#ifndef __ASSEMBLY__ + +struct pt_regs { + unsigned long psr; + unsigned long pc; + unsigned long npc; + unsigned long y; + unsigned long u_regs[16]; /* globals and ins */ +}; + +/* A 32-bit register window. */ +struct reg_window32 { + unsigned long locals[8]; + unsigned long ins[8]; +}; + +/* A Sparc stack frame */ +struct sparc_stackf { + unsigned long locals[8]; + unsigned long ins[6]; + struct sparc_stackf *fp; + unsigned long callers_pc; + char *structptr; + unsigned long xargs[6]; + unsigned long xxargs[1]; +}; +#endif /* (!__ASSEMBLY__) */ + +#endif /* (defined(__sparc__) && defined(__arch64__))*/ + +#ifndef __ASSEMBLY__ + +#define TRACEREG_SZ sizeof(struct pt_regs) +#define STACKFRAME_SZ sizeof(struct sparc_stackf) + +#define TRACEREG32_SZ sizeof(struct pt_regs32) +#define STACKFRAME32_SZ sizeof(struct sparc_stackf32) + +#endif /* (!__ASSEMBLY__) */ + +#define UREG_G0 0 +#define UREG_G1 1 +#define UREG_G2 2 +#define UREG_G3 3 +#define UREG_G4 4 +#define UREG_G5 5 +#define UREG_G6 6 +#define UREG_G7 7 +#define UREG_I0 8 +#define UREG_I1 9 +#define UREG_I2 10 +#define UREG_I3 11 +#define UREG_I4 12 +#define UREG_I5 13 +#define UREG_I6 14 +#define UREG_I7 15 +#define UREG_FP UREG_I6 +#define UREG_RETPC UREG_I7 + +#if defined(__sparc__) && defined(__arch64__) +/* 64 bit sparc */ + +#ifndef __ASSEMBLY__ + +#ifdef __KERNEL__ + +#include <linux/threads.h> +#include <asm/system.h> + +static inline int pt_regs_trap_type(struct pt_regs *regs) +{ + return regs->magic & 0x1ff; +} + +static inline bool pt_regs_is_syscall(struct pt_regs *regs) +{ + return (regs->tstate & TSTATE_SYSCALL); +} + +static inline bool pt_regs_clear_syscall(struct pt_regs *regs) +{ + return (regs->tstate &= ~TSTATE_SYSCALL); +} + +#define arch_ptrace_stop_needed(exit_code, info) \ +({ flush_user_windows(); \ + get_thread_wsaved() != 0; \ +}) + +#define arch_ptrace_stop(exit_code, info) \ + synchronize_user_stack() + +struct global_reg_snapshot { + unsigned long tstate; + unsigned long tpc; + unsigned long tnpc; + unsigned long o7; + unsigned long i7; + unsigned long rpc; + struct thread_info *thread; + unsigned long pad1; +}; +extern struct global_reg_snapshot global_reg_snapshot[NR_CPUS]; + +#define force_successful_syscall_return() \ +do { current_thread_info()->syscall_noerror = 1; \ +} while (0) +#define user_mode(regs) (!((regs)->tstate & TSTATE_PRIV)) +#define instruction_pointer(regs) ((regs)->tpc) +#define user_stack_pointer(regs) ((regs)->u_regs[UREG_FP]) +#define regs_return_value(regs) ((regs)->u_regs[UREG_I0]) +#ifdef CONFIG_SMP +extern unsigned long profile_pc(struct pt_regs *); +#else +#define profile_pc(regs) instruction_pointer(regs) #endif +extern void show_regs(struct pt_regs *); +#endif /* (__KERNEL__) */ + +#else /* __ASSEMBLY__ */ +/* For assembly code. */ +#define TRACEREG_SZ 0xa0 +#define STACKFRAME_SZ 0xc0 + +#define TRACEREG32_SZ 0x50 +#define STACKFRAME32_SZ 0x60 +#endif /* __ASSEMBLY__ */ + +#else /* (defined(__sparc__) && defined(__arch64__)) */ + +/* 32 bit sparc */ + +#ifndef __ASSEMBLY__ + +#ifdef __KERNEL__ + +#include <asm/system.h> + +static inline bool pt_regs_is_syscall(struct pt_regs *regs) +{ + return (regs->psr & PSR_SYSCALL); +} + +static inline bool pt_regs_clear_syscall(struct pt_regs *regs) +{ + return (regs->psr &= ~PSR_SYSCALL); +} + +#define arch_ptrace_stop_needed(exit_code, info) \ +({ flush_user_windows(); \ + current_thread_info()->w_saved != 0; \ +}) + +#define arch_ptrace_stop(exit_code, info) \ + synchronize_user_stack() + +#define user_mode(regs) (!((regs)->psr & PSR_PS)) +#define instruction_pointer(regs) ((regs)->pc) +#define user_stack_pointer(regs) ((regs)->u_regs[UREG_FP]) +unsigned long profile_pc(struct pt_regs *); +extern void show_regs(struct pt_regs *); +#endif /* (__KERNEL__) */ + +#else /* (!__ASSEMBLY__) */ +/* For assembly code. */ +#define TRACEREG_SZ 0x50 +#define STACKFRAME_SZ 0x60 +#endif /* (!__ASSEMBLY__) */ + +#endif /* (defined(__sparc__) && defined(__arch64__)) */ + +#ifdef __KERNEL__ +#define STACK_BIAS 2047 #endif + +/* These are for pt_regs. */ +#define PT_V9_G0 0x00 +#define PT_V9_G1 0x08 +#define PT_V9_G2 0x10 +#define PT_V9_G3 0x18 +#define PT_V9_G4 0x20 +#define PT_V9_G5 0x28 +#define PT_V9_G6 0x30 +#define PT_V9_G7 0x38 +#define PT_V9_I0 0x40 +#define PT_V9_I1 0x48 +#define PT_V9_I2 0x50 +#define PT_V9_I3 0x58 +#define PT_V9_I4 0x60 +#define PT_V9_I5 0x68 +#define PT_V9_I6 0x70 +#define PT_V9_FP PT_V9_I6 +#define PT_V9_I7 0x78 +#define PT_V9_TSTATE 0x80 +#define PT_V9_TPC 0x88 +#define PT_V9_TNPC 0x90 +#define PT_V9_Y 0x98 +#define PT_V9_MAGIC 0x9c +#define PT_TSTATE PT_V9_TSTATE +#define PT_TPC PT_V9_TPC +#define PT_TNPC PT_V9_TNPC + +/* These for pt_regs32. */ +#define PT_PSR 0x0 +#define PT_PC 0x4 +#define PT_NPC 0x8 +#define PT_Y 0xc +#define PT_G0 0x10 +#define PT_WIM PT_G0 +#define PT_G1 0x14 +#define PT_G2 0x18 +#define PT_G3 0x1c +#define PT_G4 0x20 +#define PT_G5 0x24 +#define PT_G6 0x28 +#define PT_G7 0x2c +#define PT_I0 0x30 +#define PT_I1 0x34 +#define PT_I2 0x38 +#define PT_I3 0x3c +#define PT_I4 0x40 +#define PT_I5 0x44 +#define PT_I6 0x48 +#define PT_FP PT_I6 +#define PT_I7 0x4c + +/* Reg_window offsets */ +#define RW_V9_L0 0x00 +#define RW_V9_L1 0x08 +#define RW_V9_L2 0x10 +#define RW_V9_L3 0x18 +#define RW_V9_L4 0x20 +#define RW_V9_L5 0x28 +#define RW_V9_L6 0x30 +#define RW_V9_L7 0x38 +#define RW_V9_I0 0x40 +#define RW_V9_I1 0x48 +#define RW_V9_I2 0x50 +#define RW_V9_I3 0x58 +#define RW_V9_I4 0x60 +#define RW_V9_I5 0x68 +#define RW_V9_I6 0x70 +#define RW_V9_I7 0x78 + +#define RW_L0 0x00 +#define RW_L1 0x04 +#define RW_L2 0x08 +#define RW_L3 0x0c +#define RW_L4 0x10 +#define RW_L5 0x14 +#define RW_L6 0x18 +#define RW_L7 0x1c +#define RW_I0 0x20 +#define RW_I1 0x24 +#define RW_I2 0x28 +#define RW_I3 0x2c +#define RW_I4 0x30 +#define RW_I5 0x34 +#define RW_I6 0x38 +#define RW_I7 0x3c + +/* Stack_frame offsets */ +#define SF_V9_L0 0x00 +#define SF_V9_L1 0x08 +#define SF_V9_L2 0x10 +#define SF_V9_L3 0x18 +#define SF_V9_L4 0x20 +#define SF_V9_L5 0x28 +#define SF_V9_L6 0x30 +#define SF_V9_L7 0x38 +#define SF_V9_I0 0x40 +#define SF_V9_I1 0x48 +#define SF_V9_I2 0x50 +#define SF_V9_I3 0x58 +#define SF_V9_I4 0x60 +#define SF_V9_I5 0x68 +#define SF_V9_FP 0x70 +#define SF_V9_PC 0x78 +#define SF_V9_RETP 0x80 +#define SF_V9_XARG0 0x88 +#define SF_V9_XARG1 0x90 +#define SF_V9_XARG2 0x98 +#define SF_V9_XARG3 0xa0 +#define SF_V9_XARG4 0xa8 +#define SF_V9_XARG5 0xb0 +#define SF_V9_XXARG 0xb8 + +#define SF_L0 0x00 +#define SF_L1 0x04 +#define SF_L2 0x08 +#define SF_L3 0x0c +#define SF_L4 0x10 +#define SF_L5 0x14 +#define SF_L6 0x18 +#define SF_L7 0x1c +#define SF_I0 0x20 +#define SF_I1 0x24 +#define SF_I2 0x28 +#define SF_I3 0x2c +#define SF_I4 0x30 +#define SF_I5 0x34 +#define SF_FP 0x38 +#define SF_PC 0x3c +#define SF_RETP 0x40 +#define SF_XARG0 0x44 +#define SF_XARG1 0x48 +#define SF_XARG2 0x4c +#define SF_XARG3 0x50 +#define SF_XARG4 0x54 +#define SF_XARG5 0x58 +#define SF_XXARG 0x5c + +#ifdef __KERNEL__ + +/* global_reg_snapshot offsets */ +#define GR_SNAP_TSTATE 0x00 +#define GR_SNAP_TPC 0x08 +#define GR_SNAP_TNPC 0x10 +#define GR_SNAP_O7 0x18 +#define GR_SNAP_I7 0x20 +#define GR_SNAP_RPC 0x28 +#define GR_SNAP_THREAD 0x30 +#define GR_SNAP_PAD1 0x38 + +#endif /* __KERNEL__ */ + +/* Stuff for the ptrace system call */ +#define PTRACE_SPARC_DETACH 11 +#define PTRACE_GETREGS 12 +#define PTRACE_SETREGS 13 +#define PTRACE_GETFPREGS 14 +#define PTRACE_SETFPREGS 15 +#define PTRACE_READDATA 16 +#define PTRACE_WRITEDATA 17 +#define PTRACE_READTEXT 18 +#define PTRACE_WRITETEXT 19 +#define PTRACE_GETFPAREGS 20 +#define PTRACE_SETFPAREGS 21 + +/* There are for debugging 64-bit processes, either from a 32 or 64 bit + * parent. Thus their complements are for debugging 32-bit processes only. + */ + +#define PTRACE_GETREGS64 22 +#define PTRACE_SETREGS64 23 +/* PTRACE_SYSCALL is 24 */ +#define PTRACE_GETFPREGS64 25 +#define PTRACE_SETFPREGS64 26 + +#endif /* !(__SPARC_PTRACE_H) */ diff --git a/arch/sparc/include/asm/ptrace_32.h b/arch/sparc/include/asm/ptrace_32.h deleted file mode 100644 index 4cef450167d..00000000000 --- a/arch/sparc/include/asm/ptrace_32.h +++ /dev/null @@ -1,186 +0,0 @@ -#ifndef _SPARC_PTRACE_H -#define _SPARC_PTRACE_H - -#include <asm/psr.h> - -/* This struct defines the way the registers are stored on the - * stack during a system call and basically all traps. - */ - -#ifndef __ASSEMBLY__ - -#include <linux/types.h> - -struct pt_regs { - unsigned long psr; - unsigned long pc; - unsigned long npc; - unsigned long y; - unsigned long u_regs[16]; /* globals and ins */ -}; - -#define UREG_G0 0 -#define UREG_G1 1 -#define UREG_G2 2 -#define UREG_G3 3 -#define UREG_G4 4 -#define UREG_G5 5 -#define UREG_G6 6 -#define UREG_G7 7 -#define UREG_I0 8 -#define UREG_I1 9 -#define UREG_I2 10 -#define UREG_I3 11 -#define UREG_I4 12 -#define UREG_I5 13 -#define UREG_I6 14 -#define UREG_I7 15 -#define UREG_WIM UREG_G0 -#define UREG_FADDR UREG_G0 -#define UREG_FP UREG_I6 -#define UREG_RETPC UREG_I7 - -/* A register window */ -struct reg_window { - unsigned long locals[8]; - unsigned long ins[8]; -}; - -/* A Sparc stack frame */ -struct sparc_stackf { - unsigned long locals[8]; - unsigned long ins[6]; - struct sparc_stackf *fp; - unsigned long callers_pc; - char *structptr; - unsigned long xargs[6]; - unsigned long xxargs[1]; -}; - -#define TRACEREG_SZ sizeof(struct pt_regs) -#define STACKFRAME_SZ sizeof(struct sparc_stackf) - -#ifdef __KERNEL__ - -#include <asm/system.h> - -static inline bool pt_regs_is_syscall(struct pt_regs *regs) -{ - return (regs->psr & PSR_SYSCALL); -} - -static inline bool pt_regs_clear_syscall(struct pt_regs *regs) -{ - return (regs->psr &= ~PSR_SYSCALL); -} - -#define arch_ptrace_stop_needed(exit_code, info) \ -({ flush_user_windows(); \ - current_thread_info()->w_saved != 0; \ -}) - -#define arch_ptrace_stop(exit_code, info) \ - synchronize_user_stack() - -#define user_mode(regs) (!((regs)->psr & PSR_PS)) -#define instruction_pointer(regs) ((regs)->pc) -#define user_stack_pointer(regs) ((regs)->u_regs[UREG_FP]) -unsigned long profile_pc(struct pt_regs *); -extern void show_regs(struct pt_regs *); -#endif - -#else /* __ASSEMBLY__ */ -/* For assembly code. */ -#define TRACEREG_SZ 0x50 -#define STACKFRAME_SZ 0x60 -#endif - -/* - * The asm-offsets.h is a generated file, so we cannot include it. - * It may be OK for glibc headers, but it's utterly pointless for C code. - * The assembly code using those offsets has to include it explicitly. - */ -/* #include <asm/asm-offsets.h> */ - -/* These are for pt_regs. */ -#define PT_PSR 0x0 -#define PT_PC 0x4 -#define PT_NPC 0x8 -#define PT_Y 0xc -#define PT_G0 0x10 -#define PT_WIM PT_G0 -#define PT_G1 0x14 -#define PT_G2 0x18 -#define PT_G3 0x1c -#define PT_G4 0x20 -#define PT_G5 0x24 -#define PT_G6 0x28 -#define PT_G7 0x2c -#define PT_I0 0x30 -#define PT_I1 0x34 -#define PT_I2 0x38 -#define PT_I3 0x3c -#define PT_I4 0x40 -#define PT_I5 0x44 -#define PT_I6 0x48 -#define PT_FP PT_I6 -#define PT_I7 0x4c - -/* Reg_window offsets */ -#define RW_L0 0x00 -#define RW_L1 0x04 -#define RW_L2 0x08 -#define RW_L3 0x0c -#define RW_L4 0x10 -#define RW_L5 0x14 -#define RW_L6 0x18 -#define RW_L7 0x1c -#define RW_I0 0x20 -#define RW_I1 0x24 -#define RW_I2 0x28 -#define RW_I3 0x2c -#define RW_I4 0x30 -#define RW_I5 0x34 -#define RW_I6 0x38 -#define RW_I7 0x3c - -/* Stack_frame offsets */ -#define SF_L0 0x00 -#define SF_L1 0x04 -#define SF_L2 0x08 -#define SF_L3 0x0c -#define SF_L4 0x10 -#define SF_L5 0x14 -#define SF_L6 0x18 -#define SF_L7 0x1c -#define SF_I0 0x20 -#define SF_I1 0x24 -#define SF_I2 0x28 -#define SF_I3 0x2c -#define SF_I4 0x30 -#define SF_I5 0x34 -#define SF_FP 0x38 -#define SF_PC 0x3c -#define SF_RETP 0x40 -#define SF_XARG0 0x44 -#define SF_XARG1 0x48 -#define SF_XARG2 0x4c -#define SF_XARG3 0x50 -#define SF_XARG4 0x54 -#define SF_XARG5 0x58 -#define SF_XXARG 0x5c - -/* Stuff for the ptrace system call */ -#define PTRACE_SPARC_DETACH 11 -#define PTRACE_GETREGS 12 -#define PTRACE_SETREGS 13 -#define PTRACE_GETFPREGS 14 -#define PTRACE_SETFPREGS 15 -#define PTRACE_READDATA 16 -#define PTRACE_WRITEDATA 17 -#define PTRACE_READTEXT 18 -#define PTRACE_WRITETEXT 19 -#define PTRACE_GETFPAREGS 20 -#define PTRACE_SETFPAREGS 21 - -#endif /* !(_SPARC_PTRACE_H) */ diff --git a/arch/sparc/include/asm/ptrace_64.h b/arch/sparc/include/asm/ptrace_64.h deleted file mode 100644 index cd6fbfc2043..00000000000 --- a/arch/sparc/include/asm/ptrace_64.h +++ /dev/null @@ -1,356 +0,0 @@ -#ifndef _SPARC64_PTRACE_H -#define _SPARC64_PTRACE_H - -#include <asm/pstate.h> - -/* This struct defines the way the registers are stored on the - * stack during a system call and basically all traps. - */ - -/* This magic value must have the low 9 bits clear, - * as that is where we encode the %tt value, see below. - */ -#define PT_REGS_MAGIC 0x57ac6c00 - -#ifndef __ASSEMBLY__ - -#include <linux/types.h> - -struct pt_regs { - unsigned long u_regs[16]; /* globals and ins */ - unsigned long tstate; - unsigned long tpc; - unsigned long tnpc; - unsigned int y; - - /* We encode a magic number, PT_REGS_MAGIC, along - * with the %tt (trap type) register value at trap - * entry time. The magic number allows us to identify - * accurately a trap stack frame in the stack - * unwinder, and the %tt value allows us to test - * things like "in a system call" etc. for an arbitray - * process. - * - * The PT_REGS_MAGIC is choosen such that it can be - * loaded completely using just a sethi instruction. - */ - unsigned int magic; -}; - -struct pt_regs32 { - unsigned int psr; - unsigned int pc; - unsigned int npc; - unsigned int y; - unsigned int u_regs[16]; /* globals and ins */ -}; - -#define UREG_G0 0 -#define UREG_G1 1 -#define UREG_G2 2 -#define UREG_G3 3 -#define UREG_G4 4 -#define UREG_G5 5 -#define UREG_G6 6 -#define UREG_G7 7 -#define UREG_I0 8 -#define UREG_I1 9 -#define UREG_I2 10 -#define UREG_I3 11 -#define UREG_I4 12 -#define UREG_I5 13 -#define UREG_I6 14 -#define UREG_I7 15 -#define UREG_FP UREG_I6 -#define UREG_RETPC UREG_I7 - -/* A V9 register window */ -struct reg_window { - unsigned long locals[8]; - unsigned long ins[8]; -}; - -/* A 32-bit register window. */ -struct reg_window32 { - unsigned int locals[8]; - unsigned int ins[8]; -}; - -/* A V9 Sparc stack frame */ -struct sparc_stackf { - unsigned long locals[8]; - unsigned long ins[6]; - struct sparc_stackf *fp; - unsigned long callers_pc; - char *structptr; - unsigned long xargs[6]; - unsigned long xxargs[1]; -}; - -/* A 32-bit Sparc stack frame */ -struct sparc_stackf32 { - unsigned int locals[8]; - unsigned int ins[6]; - unsigned int fp; - unsigned int callers_pc; - unsigned int structptr; - unsigned int xargs[6]; - unsigned int xxargs[1]; -}; - -struct sparc_trapf { - unsigned long locals[8]; - unsigned long ins[8]; - unsigned long _unused; - struct pt_regs *regs; -}; - -#define TRACEREG_SZ sizeof(struct pt_regs) -#define STACKFRAME_SZ sizeof(struct sparc_stackf) - -#define TRACEREG32_SZ sizeof(struct pt_regs32) -#define STACKFRAME32_SZ sizeof(struct sparc_stackf32) - -#ifdef __KERNEL__ - -#include <linux/threads.h> -#include <asm/system.h> - -static inline int pt_regs_trap_type(struct pt_regs *regs) -{ - return regs->magic & 0x1ff; -} - -static inline bool pt_regs_is_syscall(struct pt_regs *regs) -{ - return (regs->tstate & TSTATE_SYSCALL); -} - -static inline bool pt_regs_clear_syscall(struct pt_regs *regs) -{ - return (regs->tstate &= ~TSTATE_SYSCALL); -} - -#define arch_ptrace_stop_needed(exit_code, info) \ -({ flush_user_windows(); \ - get_thread_wsaved() != 0; \ -}) - -#define arch_ptrace_stop(exit_code, info) \ - synchronize_user_stack() - -struct global_reg_snapshot { - unsigned long tstate; - unsigned long tpc; - unsigned long tnpc; - unsigned long o7; - unsigned long i7; - unsigned long rpc; - struct thread_info *thread; - unsigned long pad1; -}; -extern struct global_reg_snapshot global_reg_snapshot[NR_CPUS]; - -#define force_successful_syscall_return() \ -do { current_thread_info()->syscall_noerror = 1; \ -} while (0) -#define user_mode(regs) (!((regs)->tstate & TSTATE_PRIV)) -#define instruction_pointer(regs) ((regs)->tpc) -#define user_stack_pointer(regs) ((regs)->u_regs[UREG_FP]) -#define regs_return_value(regs) ((regs)->u_regs[UREG_I0]) -#ifdef CONFIG_SMP -extern unsigned long profile_pc(struct pt_regs *); -#else -#define profile_pc(regs) instruction_pointer(regs) -#endif -extern void show_regs(struct pt_regs *); -#endif - -#else /* __ASSEMBLY__ */ -/* For assembly code. */ -#define TRACEREG_SZ 0xa0 -#define STACKFRAME_SZ 0xc0 - -#define TRACEREG32_SZ 0x50 -#define STACKFRAME32_SZ 0x60 -#endif - -#ifdef __KERNEL__ -#define STACK_BIAS 2047 -#endif - -/* These are for pt_regs. */ -#define PT_V9_G0 0x00 -#define PT_V9_G1 0x08 -#define PT_V9_G2 0x10 -#define PT_V9_G3 0x18 -#define PT_V9_G4 0x20 -#define PT_V9_G5 0x28 -#define PT_V9_G6 0x30 -#define PT_V9_G7 0x38 -#define PT_V9_I0 0x40 -#define PT_V9_I1 0x48 -#define PT_V9_I2 0x50 -#define PT_V9_I3 0x58 -#define PT_V9_I4 0x60 -#define PT_V9_I5 0x68 -#define PT_V9_I6 0x70 -#define PT_V9_FP PT_V9_I6 -#define PT_V9_I7 0x78 -#define PT_V9_TSTATE 0x80 -#define PT_V9_TPC 0x88 -#define PT_V9_TNPC 0x90 -#define PT_V9_Y 0x98 -#define PT_V9_MAGIC 0x9c -#define PT_TSTATE PT_V9_TSTATE -#define PT_TPC PT_V9_TPC -#define PT_TNPC PT_V9_TNPC - -/* These for pt_regs32. */ -#define PT_PSR 0x0 -#define PT_PC 0x4 -#define PT_NPC 0x8 -#define PT_Y 0xc -#define PT_G0 0x10 -#define PT_WIM PT_G0 -#define PT_G1 0x14 -#define PT_G2 0x18 -#define PT_G3 0x1c -#define PT_G4 0x20 -#define PT_G5 0x24 -#define PT_G6 0x28 -#define PT_G7 0x2c -#define PT_I0 0x30 -#define PT_I1 0x34 -#define PT_I2 0x38 -#define PT_I3 0x3c -#define PT_I4 0x40 -#define PT_I5 0x44 -#define PT_I6 0x48 -#define PT_FP PT_I6 -#define PT_I7 0x4c - -/* Reg_window offsets */ -#define RW_V9_L0 0x00 -#define RW_V9_L1 0x08 -#define RW_V9_L2 0x10 -#define RW_V9_L3 0x18 -#define RW_V9_L4 0x20 -#define RW_V9_L5 0x28 -#define RW_V9_L6 0x30 -#define RW_V9_L7 0x38 -#define RW_V9_I0 0x40 -#define RW_V9_I1 0x48 -#define RW_V9_I2 0x50 -#define RW_V9_I3 0x58 -#define RW_V9_I4 0x60 -#define RW_V9_I5 0x68 -#define RW_V9_I6 0x70 -#define RW_V9_I7 0x78 - -#define RW_L0 0x00 -#define RW_L1 0x04 -#define RW_L2 0x08 -#define RW_L3 0x0c -#define RW_L4 0x10 -#define RW_L5 0x14 -#define RW_L6 0x18 -#define RW_L7 0x1c -#define RW_I0 0x20 -#define RW_I1 0x24 -#define RW_I2 0x28 -#define RW_I3 0x2c -#define RW_I4 0x30 -#define RW_I5 0x34 -#define RW_I6 0x38 -#define RW_I7 0x3c - -/* Stack_frame offsets */ -#define SF_V9_L0 0x00 -#define SF_V9_L1 0x08 -#define SF_V9_L2 0x10 -#define SF_V9_L3 0x18 -#define SF_V9_L4 0x20 -#define SF_V9_L5 0x28 -#define SF_V9_L6 0x30 -#define SF_V9_L7 0x38 -#define SF_V9_I0 0x40 -#define SF_V9_I1 0x48 -#define SF_V9_I2 0x50 -#define SF_V9_I3 0x58 -#define SF_V9_I4 0x60 -#define SF_V9_I5 0x68 -#define SF_V9_FP 0x70 -#define SF_V9_PC 0x78 -#define SF_V9_RETP 0x80 -#define SF_V9_XARG0 0x88 -#define SF_V9_XARG1 0x90 -#define SF_V9_XARG2 0x98 -#define SF_V9_XARG3 0xa0 -#define SF_V9_XARG4 0xa8 -#define SF_V9_XARG5 0xb0 -#define SF_V9_XXARG 0xb8 - -#define SF_L0 0x00 -#define SF_L1 0x04 -#define SF_L2 0x08 -#define SF_L3 0x0c -#define SF_L4 0x10 -#define SF_L5 0x14 -#define SF_L6 0x18 -#define SF_L7 0x1c -#define SF_I0 0x20 -#define SF_I1 0x24 -#define SF_I2 0x28 -#define SF_I3 0x2c -#define SF_I4 0x30 -#define SF_I5 0x34 -#define SF_FP 0x38 -#define SF_PC 0x3c -#define SF_RETP 0x40 -#define SF_XARG0 0x44 -#define SF_XARG1 0x48 -#define SF_XARG2 0x4c -#define SF_XARG3 0x50 -#define SF_XARG4 0x54 -#define SF_XARG5 0x58 -#define SF_XXARG 0x5c - -#ifdef __KERNEL__ - -/* global_reg_snapshot offsets */ -#define GR_SNAP_TSTATE 0x00 -#define GR_SNAP_TPC 0x08 -#define GR_SNAP_TNPC 0x10 -#define GR_SNAP_O7 0x18 -#define GR_SNAP_I7 0x20 -#define GR_SNAP_RPC 0x28 -#define GR_SNAP_THREAD 0x30 -#define GR_SNAP_PAD1 0x38 - -#endif /* __KERNEL__ */ - -/* Stuff for the ptrace system call */ -#define PTRACE_SPARC_DETACH 11 -#define PTRACE_GETREGS 12 -#define PTRACE_SETREGS 13 -#define PTRACE_GETFPREGS 14 -#define PTRACE_SETFPREGS 15 -#define PTRACE_READDATA 16 -#define PTRACE_WRITEDATA 17 -#define PTRACE_READTEXT 18 -#define PTRACE_WRITETEXT 19 -#define PTRACE_GETFPAREGS 20 -#define PTRACE_SETFPAREGS 21 - -/* There are for debugging 64-bit processes, either from a 32 or 64 bit - * parent. Thus their complements are for debugging 32-bit processes only. - */ - -#define PTRACE_GETREGS64 22 -#define PTRACE_SETREGS64 23 -/* PTRACE_SYSCALL is 24 */ -#define PTRACE_GETFPREGS64 25 -#define PTRACE_SETFPREGS64 26 - -#endif /* !(_SPARC64_PTRACE_H) */ diff --git a/arch/sparc/include/asm/reg.h b/arch/sparc/include/asm/reg.h deleted file mode 100644 index 0c16e19cae4..00000000000 --- a/arch/sparc/include/asm/reg.h +++ /dev/null @@ -1,8 +0,0 @@ -#ifndef ___ASM_SPARC_REG_H -#define ___ASM_SPARC_REG_H -#if defined(__sparc__) && defined(__arch64__) -#include <asm/reg_64.h> -#else -#include <asm/reg_32.h> -#endif -#endif diff --git a/arch/sparc/include/asm/reg_32.h b/arch/sparc/include/asm/reg_32.h deleted file mode 100644 index 1efb056fb3d..00000000000 --- a/arch/sparc/include/asm/reg_32.h +++ /dev/null @@ -1,79 +0,0 @@ -/* - * linux/include/asm/reg.h - * Layout of the registers as expected by gdb on the Sparc - * we should replace the user.h definitions with those in - * this file, we don't even use the other - * -miguel - * - * The names of the structures, constants and aliases in this file - * have the same names as the sunos ones, some programs rely on these - * names (gdb for example). - * - */ - -#ifndef __SPARC_REG_H -#define __SPARC_REG_H - -struct regs { - int r_psr; -#define r_ps r_psr - int r_pc; - int r_npc; - int r_y; - int r_g1; - int r_g2; - int r_g3; - int r_g4; - int r_g5; - int r_g6; - int r_g7; - int r_o0; - int r_o1; - int r_o2; - int r_o3; - int r_o4; - int r_o5; - int r_o6; - int r_o7; -}; - -struct fpq { - unsigned long *addr; - unsigned long instr; -}; - -struct fq { - union { - double whole; - struct fpq fpq; - } FQu; -}; - -#define FPU_REGS_TYPE unsigned int -#define FPU_FSR_TYPE unsigned - -struct fp_status { - union { - FPU_REGS_TYPE Fpu_regs[32]; - double Fpu_dregs[16]; - } fpu_fr; - FPU_FSR_TYPE Fpu_fsr; - unsigned Fpu_flags; - unsigned Fpu_extra; - unsigned Fpu_qcnt; - struct fq Fpu_q[16]; -}; - -#define fpu_regs f_fpstatus.fpu_fr.Fpu_regs -#define fpu_dregs f_fpstatus.fpu_fr.Fpu_dregs -#define fpu_fsr f_fpstatus.Fpu_fsr -#define fpu_flags f_fpstatus.Fpu_flags -#define fpu_extra f_fpstatus.Fpu_extra -#define fpu_q f_fpstatus.Fpu_q -#define fpu_qcnt f_fpstatus.Fpu_qcnt - -struct fpu { - struct fp_status f_fpstatus; -}; - -#endif /* __SPARC_REG_H */ diff --git a/arch/sparc/include/asm/reg_64.h b/arch/sparc/include/asm/reg_64.h deleted file mode 100644 index 6f277d7c7d8..00000000000 --- a/arch/sparc/include/asm/reg_64.h +++ /dev/null @@ -1,56 +0,0 @@ -/* - * linux/asm/reg.h - * Layout of the registers as expected by gdb on the Sparc - * we should replace the user.h definitions with those in - * this file, we don't even use the other - * -miguel - * - * The names of the structures, constants and aliases in this file - * have the same names as the sunos ones, some programs rely on these - * names (gdb for example). - * - */ - -#ifndef __SPARC64_REG_H -#define __SPARC64_REG_H - -struct regs { - unsigned long r_g1; - unsigned long r_g2; - unsigned long r_g3; - unsigned long r_g4; - unsigned long r_g5; - unsigned long r_g6; - unsigned long r_g7; - unsigned long r_o0; - unsigned long r_o1; - unsigned long r_o2; - unsigned long r_o3; - unsigned long r_o4; - unsigned long r_o5; - unsigned long r_o6; - unsigned long r_o7; - unsigned long __pad; - unsigned long r_tstate; - unsigned long r_tpc; - unsigned long r_tnpc; - unsigned int r_y; - unsigned int r_fprs; -}; - -#define FPU_REGS_TYPE unsigned int -#define FPU_FSR_TYPE unsigned long - -struct fp_status { - unsigned long fpu_fr[32]; - unsigned long Fpu_fsr; -}; - -struct fpu { - struct fp_status f_fpstatus; -}; - -#define fpu_regs f_fpstatus.fpu_fr -#define fpu_fsr f_fpstatus.Fpu_fsr - -#endif /* __SPARC64_REG_H */ diff --git a/arch/sparc/include/asm/sigcontext.h b/arch/sparc/include/asm/sigcontext.h index e92de7e286b..a1607d18035 100644 --- a/arch/sparc/include/asm/sigcontext.h +++ b/arch/sparc/include/asm/sigcontext.h @@ -1,8 +1,96 @@ -#ifndef ___ASM_SPARC_SIGCONTEXT_H -#define ___ASM_SPARC_SIGCONTEXT_H -#if defined(__sparc__) && defined(__arch64__) -#include <asm/sigcontext_64.h> +#ifndef __SPARC_SIGCONTEXT_H +#define __SPARC_SIGCONTEXT_H + +#ifdef __KERNEL__ +#include <asm/ptrace.h> + +#ifndef __ASSEMBLY__ + +#define __SUNOS_MAXWIN 31 + +/* This is what SunOS does, so shall I unless we use new 32bit signals or rt signals. */ +struct sigcontext32 { + int sigc_onstack; /* state to restore */ + int sigc_mask; /* sigmask to restore */ + int sigc_sp; /* stack pointer */ + int sigc_pc; /* program counter */ + int sigc_npc; /* next program counter */ + int sigc_psr; /* for condition codes etc */ + int sigc_g1; /* User uses these two registers */ + int sigc_o0; /* within the trampoline code. */ + + /* Now comes information regarding the users window set + * at the time of the signal. + */ + int sigc_oswins; /* outstanding windows */ + + /* stack ptrs for each regwin buf */ + unsigned sigc_spbuf[__SUNOS_MAXWIN]; + + /* Windows to restore after signal */ + struct reg_window32 sigc_wbuf[__SUNOS_MAXWIN]; +}; + + +/* This is what we use for 32bit new non-rt signals. */ + +typedef struct { + struct { + unsigned int psr; + unsigned int pc; + unsigned int npc; + unsigned int y; + unsigned int u_regs[16]; /* globals and ins */ + } si_regs; + int si_mask; +} __siginfo32_t; + +#ifdef CONFIG_SPARC64 +typedef struct { + unsigned int si_float_regs [64]; + unsigned long si_fsr; + unsigned long si_gsr; + unsigned long si_fprs; +} __siginfo_fpu_t; + +/* This is what SunOS doesn't, so we have to write this alone + and do it properly. */ +struct sigcontext { + /* The size of this array has to match SI_MAX_SIZE from siginfo.h */ + char sigc_info[128]; + struct { + unsigned long u_regs[16]; /* globals and ins */ + unsigned long tstate; + unsigned long tpc; + unsigned long tnpc; + unsigned int y; + unsigned int fprs; + } sigc_regs; + __siginfo_fpu_t * sigc_fpu_save; + struct { + void * ss_sp; + int ss_flags; + unsigned long ss_size; + } sigc_stack; + unsigned long sigc_mask; +}; + #else -#include <asm/sigcontext_32.h> -#endif -#endif + +typedef struct { + unsigned long si_float_regs [32]; + unsigned long si_fsr; + unsigned long si_fpqdepth; + struct { + unsigned long *insn_addr; + unsigned long insn; + } si_fpqueue [16]; +} __siginfo_fpu_t; +#endif /* (CONFIG_SPARC64) */ + + +#endif /* !(__ASSEMBLY__) */ + +#endif /* (__KERNEL__) */ + +#endif /* !(__SPARC_SIGCONTEXT_H) */ diff --git a/arch/sparc/include/asm/sigcontext_32.h b/arch/sparc/include/asm/sigcontext_32.h deleted file mode 100644 index c5fb60dcbd7..00000000000 --- a/arch/sparc/include/asm/sigcontext_32.h +++ /dev/null @@ -1,62 +0,0 @@ -#ifndef __SPARC_SIGCONTEXT_H -#define __SPARC_SIGCONTEXT_H - -#ifdef __KERNEL__ -#include <asm/ptrace.h> - -#ifndef __ASSEMBLY__ - -#define __SUNOS_MAXWIN 31 - -/* This is what SunOS does, so shall I. */ -struct sigcontext { - int sigc_onstack; /* state to restore */ - int sigc_mask; /* sigmask to restore */ - int sigc_sp; /* stack pointer */ - int sigc_pc; /* program counter */ - int sigc_npc; /* next program counter */ - int sigc_psr; /* for condition codes etc */ - int sigc_g1; /* User uses these two registers */ - int sigc_o0; /* within the trampoline code. */ - - /* Now comes information regarding the users window set - * at the time of the signal. - */ - int sigc_oswins; /* outstanding windows */ - - /* stack ptrs for each regwin buf */ - char *sigc_spbuf[__SUNOS_MAXWIN]; - - /* Windows to restore after signal */ - struct { - unsigned long locals[8]; - unsigned long ins[8]; - } sigc_wbuf[__SUNOS_MAXWIN]; -}; - -typedef struct { - struct { - unsigned long psr; - unsigned long pc; - unsigned long npc; - unsigned long y; - unsigned long u_regs[16]; /* globals and ins */ - } si_regs; - int si_mask; -} __siginfo_t; - -typedef struct { - unsigned long si_float_regs [32]; - unsigned long si_fsr; - unsigned long si_fpqdepth; - struct { - unsigned long *insn_addr; - unsigned long insn; - } si_fpqueue [16]; -} __siginfo_fpu_t; - -#endif /* !(__ASSEMBLY__) */ - -#endif /* (__KERNEL__) */ - -#endif /* !(__SPARC_SIGCONTEXT_H) */ diff --git a/arch/sparc/include/asm/sigcontext_64.h b/arch/sparc/include/asm/sigcontext_64.h deleted file mode 100644 index 1c868d680cf..00000000000 --- a/arch/sparc/include/asm/sigcontext_64.h +++ /dev/null @@ -1,87 +0,0 @@ -#ifndef __SPARC64_SIGCONTEXT_H -#define __SPARC64_SIGCONTEXT_H - -#ifdef __KERNEL__ -#include <asm/ptrace.h> -#endif - -#ifndef __ASSEMBLY__ - -#ifdef __KERNEL__ - -#define __SUNOS_MAXWIN 31 - -/* This is what SunOS does, so shall I unless we use new 32bit signals or rt signals. */ -struct sigcontext32 { - int sigc_onstack; /* state to restore */ - int sigc_mask; /* sigmask to restore */ - int sigc_sp; /* stack pointer */ - int sigc_pc; /* program counter */ - int sigc_npc; /* next program counter */ - int sigc_psr; /* for condition codes etc */ - int sigc_g1; /* User uses these two registers */ - int sigc_o0; /* within the trampoline code. */ - - /* Now comes information regarding the users window set - * at the time of the signal. - */ - int sigc_oswins; /* outstanding windows */ - - /* stack ptrs for each regwin buf */ - unsigned sigc_spbuf[__SUNOS_MAXWIN]; - - /* Windows to restore after signal */ - struct reg_window32 sigc_wbuf[__SUNOS_MAXWIN]; -}; - -#endif - -#ifdef __KERNEL__ - -/* This is what we use for 32bit new non-rt signals. */ - -typedef struct { - struct { - unsigned int psr; - unsigned int pc; - unsigned int npc; - unsigned int y; - unsigned int u_regs[16]; /* globals and ins */ - } si_regs; - int si_mask; -} __siginfo32_t; - -#endif - -typedef struct { - unsigned int si_float_regs [64]; - unsigned long si_fsr; - unsigned long si_gsr; - unsigned long si_fprs; -} __siginfo_fpu_t; - -/* This is what SunOS doesn't, so we have to write this alone - and do it properly. */ -struct sigcontext { - /* The size of this array has to match SI_MAX_SIZE from siginfo.h */ - char sigc_info[128]; - struct { - unsigned long u_regs[16]; /* globals and ins */ - unsigned long tstate; - unsigned long tpc; - unsigned long tnpc; - unsigned int y; - unsigned int fprs; - } sigc_regs; - __siginfo_fpu_t * sigc_fpu_save; - struct { - void * ss_sp; - int ss_flags; - unsigned long ss_size; - } sigc_stack; - unsigned long sigc_mask; -}; - -#endif /* !(__ASSEMBLY__) */ - -#endif /* !(__SPARC64_SIGCONTEXT_H) */ diff --git a/arch/sparc/include/asm/siginfo.h b/arch/sparc/include/asm/siginfo.h index bd81f8d7f5c..988e5d8ed11 100644 --- a/arch/sparc/include/asm/siginfo.h +++ b/arch/sparc/include/asm/siginfo.h @@ -1,8 +1,37 @@ -#ifndef ___ASM_SPARC_SIGINFO_H -#define ___ASM_SPARC_SIGINFO_H +#ifndef __SPARC_SIGINFO_H +#define __SPARC_SIGINFO_H + #if defined(__sparc__) && defined(__arch64__) -#include <asm/siginfo_64.h> -#else -#include <asm/siginfo_32.h> -#endif -#endif + +#define SI_PAD_SIZE32 ((SI_MAX_SIZE/sizeof(int)) - 3) +#define __ARCH_SI_PREAMBLE_SIZE (4 * sizeof(int)) +#define __ARCH_SI_BAND_T int + +#endif /* defined(__sparc__) && defined(__arch64__) */ + + +#define __ARCH_SI_TRAPNO + +#include <asm-generic/siginfo.h> + +#ifdef __KERNEL__ + +#include <linux/compat.h> + +#ifdef CONFIG_COMPAT + +struct compat_siginfo; + +#endif /* CONFIG_COMPAT */ + +#endif /* __KERNEL__ */ + +#define SI_NOINFO 32767 /* no information in siginfo_t */ + +/* + * SIGEMT si_codes + */ +#define EMT_TAGOVF (__SI_FAULT|1) /* tag overflow */ +#define NSIGEMT 1 + +#endif /* !(__SPARC_SIGINFO_H) */ diff --git a/arch/sparc/include/asm/siginfo_32.h b/arch/sparc/include/asm/siginfo_32.h deleted file mode 100644 index 3c71af135c5..00000000000 --- a/arch/sparc/include/asm/siginfo_32.h +++ /dev/null @@ -1,17 +0,0 @@ -#ifndef _SPARC_SIGINFO_H -#define _SPARC_SIGINFO_H - -#define __ARCH_SI_UID_T unsigned int -#define __ARCH_SI_TRAPNO - -#include <asm-generic/siginfo.h> - -#define SI_NOINFO 32767 /* no information in siginfo_t */ - -/* - * SIGEMT si_codes - */ -#define EMT_TAGOVF (__SI_FAULT|1) /* tag overflow */ -#define NSIGEMT 1 - -#endif /* !(_SPARC_SIGINFO_H) */ diff --git a/arch/sparc/include/asm/siginfo_64.h b/arch/sparc/include/asm/siginfo_64.h deleted file mode 100644 index c96e6c30f8b..00000000000 --- a/arch/sparc/include/asm/siginfo_64.h +++ /dev/null @@ -1,32 +0,0 @@ -#ifndef _SPARC64_SIGINFO_H -#define _SPARC64_SIGINFO_H - -#define SI_PAD_SIZE32 ((SI_MAX_SIZE/sizeof(int)) - 3) - -#define __ARCH_SI_PREAMBLE_SIZE (4 * sizeof(int)) -#define __ARCH_SI_TRAPNO -#define __ARCH_SI_BAND_T int - -#include <asm-generic/siginfo.h> - -#ifdef __KERNEL__ - -#include <linux/compat.h> - -#ifdef CONFIG_COMPAT - -struct compat_siginfo; - -#endif /* CONFIG_COMPAT */ - -#endif /* __KERNEL__ */ - -#define SI_NOINFO 32767 /* no information in siginfo_t */ - -/* - * SIGEMT si_codes - */ -#define EMT_TAGOVF (__SI_FAULT|1) /* tag overflow */ -#define NSIGEMT 1 - -#endif diff --git a/arch/sparc/include/asm/signal.h b/arch/sparc/include/asm/signal.h index 27ab05dc203..41535e77b25 100644 --- a/arch/sparc/include/asm/signal.h +++ b/arch/sparc/include/asm/signal.h @@ -1,8 +1,210 @@ -#ifndef ___ASM_SPARC_SIGNAL_H -#define ___ASM_SPARC_SIGNAL_H -#if defined(__sparc__) && defined(__arch64__) -#include <asm/signal_64.h> +#ifndef __SPARC_SIGNAL_H +#define __SPARC_SIGNAL_H + +#include <asm/sigcontext.h> +#include <linux/compiler.h> + +#ifdef __KERNEL__ +#ifndef __ASSEMBLY__ +#include <linux/personality.h> +#include <linux/types.h> +#endif +#endif + +/* On the Sparc the signal handlers get passed a 'sub-signal' code + * for certain signal types, which we document here. + */ +#define SIGHUP 1 +#define SIGINT 2 +#define SIGQUIT 3 +#define SIGILL 4 +#define SUBSIG_STACK 0 +#define SUBSIG_ILLINST 2 +#define SUBSIG_PRIVINST 3 +#define SUBSIG_BADTRAP(t) (0x80 + (t)) + +#define SIGTRAP 5 +#define SIGABRT 6 +#define SIGIOT 6 + +#define SIGEMT 7 +#define SUBSIG_TAG 10 + +#define SIGFPE 8 +#define SUBSIG_FPDISABLED 0x400 +#define SUBSIG_FPERROR 0x404 +#define SUBSIG_FPINTOVFL 0x001 +#define SUBSIG_FPSTSIG 0x002 +#define SUBSIG_IDIVZERO 0x014 +#define SUBSIG_FPINEXACT 0x0c4 +#define SUBSIG_FPDIVZERO 0x0c8 +#define SUBSIG_FPUNFLOW 0x0cc +#define SUBSIG_FPOPERROR 0x0d0 +#define SUBSIG_FPOVFLOW 0x0d4 + +#define SIGKILL 9 +#define SIGBUS 10 +#define SUBSIG_BUSTIMEOUT 1 +#define SUBSIG_ALIGNMENT 2 +#define SUBSIG_MISCERROR 5 + +#define SIGSEGV 11 +#define SUBSIG_NOMAPPING 3 +#define SUBSIG_PROTECTION 4 +#define SUBSIG_SEGERROR 5 + +#define SIGSYS 12 + +#define SIGPIPE 13 +#define SIGALRM 14 +#define SIGTERM 15 +#define SIGURG 16 + +/* SunOS values which deviate from the Linux/i386 ones */ +#define SIGSTOP 17 +#define SIGTSTP 18 +#define SIGCONT 19 +#define SIGCHLD 20 +#define SIGTTIN 21 +#define SIGTTOU 22 +#define SIGIO 23 +#define SIGPOLL SIGIO /* SysV name for SIGIO */ +#define SIGXCPU 24 +#define SIGXFSZ 25 +#define SIGVTALRM 26 +#define SIGPROF 27 +#define SIGWINCH 28 +#define SIGLOST 29 +#define SIGPWR SIGLOST +#define SIGUSR1 30 +#define SIGUSR2 31 + +/* Most things should be clean enough to redefine this at will, if care + is taken to make libc match. */ + +#define __OLD_NSIG 32 +#define __NEW_NSIG 64 +#define _NSIG_BPW 64 +#define _NSIG_WORDS (__NEW_NSIG / _NSIG_BPW) + +#define SIGRTMIN 32 +#define SIGRTMAX __NEW_NSIG + +#if defined(__KERNEL__) || defined(__WANT_POSIX1B_SIGNALS__) +#define _NSIG __NEW_NSIG +#define __new_sigset_t sigset_t +#define __new_sigaction sigaction +#define __new_sigaction32 sigaction32 +#define __old_sigset_t old_sigset_t +#define __old_sigaction old_sigaction +#define __old_sigaction32 old_sigaction32 #else -#include <asm/signal_32.h> +#define _NSIG __OLD_NSIG +#define NSIG _NSIG +#define __old_sigset_t sigset_t +#define __old_sigaction sigaction +#define __old_sigaction32 sigaction32 #endif + +#ifndef __ASSEMBLY__ + +typedef unsigned long __old_sigset_t; /* at least 32 bits */ + +typedef struct { + unsigned long sig[_NSIG_WORDS]; +} __new_sigset_t; + +/* A SunOS sigstack */ +struct sigstack { + /* XXX 32-bit pointers pinhead XXX */ + char *the_stack; + int cur_status; +}; + +/* Sigvec flags */ +#define _SV_SSTACK 1u /* This signal handler should use sig-stack */ +#define _SV_INTR 2u /* Sig return should not restart system call */ +#define _SV_RESET 4u /* Set handler to SIG_DFL upon taken signal */ +#define _SV_IGNCHILD 8u /* Do not send SIGCHLD */ + +/* + * sa_flags values: SA_STACK is not currently supported, but will allow the + * usage of signal stacks by using the (now obsolete) sa_restorer field in + * the sigaction structure as a stack pointer. This is now possible due to + * the changes in signal handling. LBT 010493. + * SA_RESTART flag to get restarting signals (which were the default long ago) + */ +#define SA_NOCLDSTOP _SV_IGNCHILD +#define SA_STACK _SV_SSTACK +#define SA_ONSTACK _SV_SSTACK +#define SA_RESTART _SV_INTR +#define SA_ONESHOT _SV_RESET +#define SA_NOMASK 0x20u +#define SA_NOCLDWAIT 0x100u +#define SA_SIGINFO 0x200u + + +#define SIG_BLOCK 0x01 /* for blocking signals */ +#define SIG_UNBLOCK 0x02 /* for unblocking signals */ +#define SIG_SETMASK 0x04 /* for setting the signal mask */ + +/* + * sigaltstack controls + */ +#define SS_ONSTACK 1 +#define SS_DISABLE 2 + +#define MINSIGSTKSZ 4096 +#define SIGSTKSZ 16384 + +#ifdef __KERNEL__ +/* + * DJHR + * SA_STATIC_ALLOC is used for the sparc32 system to indicate that this + * interrupt handler's irq structure should be statically allocated + * by the request_irq routine. + * The alternative is that arch/sparc/kernel/irq.c has carnal knowledge + * of interrupt usage and that sucks. Also without a flag like this + * it may be possible for the free_irq routine to attempt to free + * statically allocated data.. which is NOT GOOD. + * + */ +#define SA_STATIC_ALLOC 0x8000 #endif + +#include <asm-generic/signal.h> + +struct __new_sigaction { + __sighandler_t sa_handler; + unsigned long sa_flags; + __sigrestore_t sa_restorer; /* not used by Linux/SPARC yet */ + __new_sigset_t sa_mask; +}; + +struct __old_sigaction { + __sighandler_t sa_handler; + __old_sigset_t sa_mask; + unsigned long sa_flags; + void (*sa_restorer)(void); /* not used by Linux/SPARC yet */ +}; + +typedef struct sigaltstack { + void __user *ss_sp; + int ss_flags; + size_t ss_size; +} stack_t; + +#ifdef __KERNEL__ + +struct k_sigaction { + struct __new_sigaction sa; + void __user *ka_restorer; +}; + +#define ptrace_signal_deliver(regs, cookie) do { } while (0) + +#endif /* !(__KERNEL__) */ + +#endif /* !(__ASSEMBLY__) */ + +#endif /* !(__SPARC_SIGNAL_H) */ diff --git a/arch/sparc/include/asm/signal_32.h b/arch/sparc/include/asm/signal_32.h deleted file mode 100644 index 96a60ab03ca..00000000000 --- a/arch/sparc/include/asm/signal_32.h +++ /dev/null @@ -1,207 +0,0 @@ -#ifndef _ASMSPARC_SIGNAL_H -#define _ASMSPARC_SIGNAL_H - -#include <asm/sigcontext.h> -#include <linux/compiler.h> - -#ifdef __KERNEL__ -#ifndef __ASSEMBLY__ -#include <linux/personality.h> -#include <linux/types.h> -#endif -#endif - -/* On the Sparc the signal handlers get passed a 'sub-signal' code - * for certain signal types, which we document here. - */ -#define SIGHUP 1 -#define SIGINT 2 -#define SIGQUIT 3 -#define SIGILL 4 -#define SUBSIG_STACK 0 -#define SUBSIG_ILLINST 2 -#define SUBSIG_PRIVINST 3 -#define SUBSIG_BADTRAP(t) (0x80 + (t)) - -#define SIGTRAP 5 -#define SIGABRT 6 -#define SIGIOT 6 - -#define SIGEMT 7 -#define SUBSIG_TAG 10 - -#define SIGFPE 8 -#define SUBSIG_FPDISABLED 0x400 -#define SUBSIG_FPERROR 0x404 -#define SUBSIG_FPINTOVFL 0x001 -#define SUBSIG_FPSTSIG 0x002 -#define SUBSIG_IDIVZERO 0x014 -#define SUBSIG_FPINEXACT 0x0c4 -#define SUBSIG_FPDIVZERO 0x0c8 -#define SUBSIG_FPUNFLOW 0x0cc -#define SUBSIG_FPOPERROR 0x0d0 -#define SUBSIG_FPOVFLOW 0x0d4 - -#define SIGKILL 9 -#define SIGBUS 10 -#define SUBSIG_BUSTIMEOUT 1 -#define SUBSIG_ALIGNMENT 2 -#define SUBSIG_MISCERROR 5 - -#define SIGSEGV 11 -#define SUBSIG_NOMAPPING 3 -#define SUBSIG_PROTECTION 4 -#define SUBSIG_SEGERROR 5 - -#define SIGSYS 12 - -#define SIGPIPE 13 -#define SIGALRM 14 -#define SIGTERM 15 -#define SIGURG 16 - -/* SunOS values which deviate from the Linux/i386 ones */ -#define SIGSTOP 17 -#define SIGTSTP 18 -#define SIGCONT 19 -#define SIGCHLD 20 -#define SIGTTIN 21 -#define SIGTTOU 22 -#define SIGIO 23 -#define SIGPOLL SIGIO /* SysV name for SIGIO */ -#define SIGXCPU 24 -#define SIGXFSZ 25 -#define SIGVTALRM 26 -#define SIGPROF 27 -#define SIGWINCH 28 -#define SIGLOST 29 -#define SIGPWR SIGLOST -#define SIGUSR1 30 -#define SIGUSR2 31 - -/* Most things should be clean enough to redefine this at will, if care - * is taken to make libc match. - */ - -#define __OLD_NSIG 32 -#define __NEW_NSIG 64 -#define _NSIG_BPW 32 -#define _NSIG_WORDS (__NEW_NSIG / _NSIG_BPW) - -#define SIGRTMIN 32 -#define SIGRTMAX __NEW_NSIG - -#if defined(__KERNEL__) || defined(__WANT_POSIX1B_SIGNALS__) -#define _NSIG __NEW_NSIG -#define __new_sigset_t sigset_t -#define __new_sigaction sigaction -#define __old_sigset_t old_sigset_t -#define __old_sigaction old_sigaction -#else -#define _NSIG __OLD_NSIG -#define __old_sigset_t sigset_t -#define __old_sigaction sigaction -#endif - -#ifndef __ASSEMBLY__ - -typedef unsigned long __old_sigset_t; - -typedef struct { - unsigned long sig[_NSIG_WORDS]; -} __new_sigset_t; - - -#ifdef __KERNEL__ -/* A SunOS sigstack */ -struct sigstack { - char *the_stack; - int cur_status; -}; -#endif - -/* Sigvec flags */ -#define _SV_SSTACK 1u /* This signal handler should use sig-stack */ -#define _SV_INTR 2u /* Sig return should not restart system call */ -#define _SV_RESET 4u /* Set handler to SIG_DFL upon taken signal */ -#define _SV_IGNCHILD 8u /* Do not send SIGCHLD */ - -/* - * sa_flags values: SA_STACK is not currently supported, but will allow the - * usage of signal stacks by using the (now obsolete) sa_restorer field in - * the sigaction structure as a stack pointer. This is now possible due to - * the changes in signal handling. LBT 010493. - * SA_RESTART flag to get restarting signals (which were the default long ago) - */ -#define SA_NOCLDSTOP _SV_IGNCHILD -#define SA_STACK _SV_SSTACK -#define SA_ONSTACK _SV_SSTACK -#define SA_RESTART _SV_INTR -#define SA_ONESHOT _SV_RESET -#define SA_NOMASK 0x20u -#define SA_NOCLDWAIT 0x100u -#define SA_SIGINFO 0x200u - -#define SIG_BLOCK 0x01 /* for blocking signals */ -#define SIG_UNBLOCK 0x02 /* for unblocking signals */ -#define SIG_SETMASK 0x04 /* for setting the signal mask */ - -/* - * sigaltstack controls - */ -#define SS_ONSTACK 1 -#define SS_DISABLE 2 - -#define MINSIGSTKSZ 4096 -#define SIGSTKSZ 16384 - -#ifdef __KERNEL__ -/* - * DJHR - * SA_STATIC_ALLOC is used for the SPARC system to indicate that this - * interrupt handler's irq structure should be statically allocated - * by the request_irq routine. - * The alternative is that arch/sparc/kernel/irq.c has carnal knowledge - * of interrupt usage and that sucks. Also without a flag like this - * it may be possible for the free_irq routine to attempt to free - * statically allocated data.. which is NOT GOOD. - * - */ -#define SA_STATIC_ALLOC 0x8000 -#endif - -#include <asm-generic/signal.h> - -#ifdef __KERNEL__ -struct __new_sigaction { - __sighandler_t sa_handler; - unsigned long sa_flags; - void (*sa_restorer)(void); /* Not used by Linux/SPARC */ - __new_sigset_t sa_mask; -}; - -struct k_sigaction { - struct __new_sigaction sa; - void __user *ka_restorer; -}; - -struct __old_sigaction { - __sighandler_t sa_handler; - __old_sigset_t sa_mask; - unsigned long sa_flags; - void (*sa_restorer) (void); /* not used by Linux/SPARC */ -}; - -typedef struct sigaltstack { - void __user *ss_sp; - int ss_flags; - size_t ss_size; -} stack_t; - -#define ptrace_signal_deliver(regs, cookie) do { } while (0) - -#endif /* !(__KERNEL__) */ - -#endif /* !(__ASSEMBLY__) */ - -#endif /* !(_ASMSPARC_SIGNAL_H) */ diff --git a/arch/sparc/include/asm/signal_64.h b/arch/sparc/include/asm/signal_64.h deleted file mode 100644 index ab1509a101c..00000000000 --- a/arch/sparc/include/asm/signal_64.h +++ /dev/null @@ -1,194 +0,0 @@ -#ifndef _ASMSPARC64_SIGNAL_H -#define _ASMSPARC64_SIGNAL_H - -#include <asm/sigcontext.h> - -#ifdef __KERNEL__ -#ifndef __ASSEMBLY__ -#include <linux/personality.h> -#include <linux/types.h> -#endif -#endif - -/* On the Sparc the signal handlers get passed a 'sub-signal' code - * for certain signal types, which we document here. - */ -#define SIGHUP 1 -#define SIGINT 2 -#define SIGQUIT 3 -#define SIGILL 4 -#define SUBSIG_STACK 0 -#define SUBSIG_ILLINST 2 -#define SUBSIG_PRIVINST 3 -#define SUBSIG_BADTRAP(t) (0x80 + (t)) - -#define SIGTRAP 5 -#define SIGABRT 6 -#define SIGIOT 6 - -#define SIGEMT 7 -#define SUBSIG_TAG 10 - -#define SIGFPE 8 -#define SUBSIG_FPDISABLED 0x400 -#define SUBSIG_FPERROR 0x404 -#define SUBSIG_FPINTOVFL 0x001 -#define SUBSIG_FPSTSIG 0x002 -#define SUBSIG_IDIVZERO 0x014 -#define SUBSIG_FPINEXACT 0x0c4 -#define SUBSIG_FPDIVZERO 0x0c8 -#define SUBSIG_FPUNFLOW 0x0cc -#define SUBSIG_FPOPERROR 0x0d0 -#define SUBSIG_FPOVFLOW 0x0d4 - -#define SIGKILL 9 -#define SIGBUS 10 -#define SUBSIG_BUSTIMEOUT 1 -#define SUBSIG_ALIGNMENT 2 -#define SUBSIG_MISCERROR 5 - -#define SIGSEGV 11 -#define SUBSIG_NOMAPPING 3 -#define SUBSIG_PROTECTION 4 -#define SUBSIG_SEGERROR 5 - -#define SIGSYS 12 - -#define SIGPIPE 13 -#define SIGALRM 14 -#define SIGTERM 15 -#define SIGURG 16 - -/* SunOS values which deviate from the Linux/i386 ones */ -#define SIGSTOP 17 -#define SIGTSTP 18 -#define SIGCONT 19 -#define SIGCHLD 20 -#define SIGTTIN 21 -#define SIGTTOU 22 -#define SIGIO 23 -#define SIGPOLL SIGIO /* SysV name for SIGIO */ -#define SIGXCPU 24 -#define SIGXFSZ 25 -#define SIGVTALRM 26 -#define SIGPROF 27 -#define SIGWINCH 28 -#define SIGLOST 29 -#define SIGPWR SIGLOST -#define SIGUSR1 30 -#define SIGUSR2 31 - -/* Most things should be clean enough to redefine this at will, if care - is taken to make libc match. */ - -#define __OLD_NSIG 32 -#define __NEW_NSIG 64 -#define _NSIG_BPW 64 -#define _NSIG_WORDS (__NEW_NSIG / _NSIG_BPW) - -#define SIGRTMIN 32 -#define SIGRTMAX __NEW_NSIG - -#if defined(__KERNEL__) || defined(__WANT_POSIX1B_SIGNALS__) -#define _NSIG __NEW_NSIG -#define __new_sigset_t sigset_t -#define __new_sigaction sigaction -#define __new_sigaction32 sigaction32 -#define __old_sigset_t old_sigset_t -#define __old_sigaction old_sigaction -#define __old_sigaction32 old_sigaction32 -#else -#define _NSIG __OLD_NSIG -#define NSIG _NSIG -#define __old_sigset_t sigset_t -#define __old_sigaction sigaction -#define __old_sigaction32 sigaction32 -#endif - -#ifndef __ASSEMBLY__ - -typedef unsigned long __old_sigset_t; /* at least 32 bits */ - -typedef struct { - unsigned long sig[_NSIG_WORDS]; -} __new_sigset_t; - -/* A SunOS sigstack */ -struct sigstack { - /* XXX 32-bit pointers pinhead XXX */ - char *the_stack; - int cur_status; -}; - -/* Sigvec flags */ -#define _SV_SSTACK 1u /* This signal handler should use sig-stack */ -#define _SV_INTR 2u /* Sig return should not restart system call */ -#define _SV_RESET 4u /* Set handler to SIG_DFL upon taken signal */ -#define _SV_IGNCHILD 8u /* Do not send SIGCHLD */ - -/* - * sa_flags values: SA_STACK is not currently supported, but will allow the - * usage of signal stacks by using the (now obsolete) sa_restorer field in - * the sigaction structure as a stack pointer. This is now possible due to - * the changes in signal handling. LBT 010493. - * SA_RESTART flag to get restarting signals (which were the default long ago) - */ -#define SA_NOCLDSTOP _SV_IGNCHILD -#define SA_STACK _SV_SSTACK -#define SA_ONSTACK _SV_SSTACK -#define SA_RESTART _SV_INTR -#define SA_ONESHOT _SV_RESET -#define SA_NOMASK 0x20u -#define SA_NOCLDWAIT 0x100u -#define SA_SIGINFO 0x200u - - -#define SIG_BLOCK 0x01 /* for blocking signals */ -#define SIG_UNBLOCK 0x02 /* for unblocking signals */ -#define SIG_SETMASK 0x04 /* for setting the signal mask */ - -/* - * sigaltstack controls - */ -#define SS_ONSTACK 1 -#define SS_DISABLE 2 - -#define MINSIGSTKSZ 4096 -#define SIGSTKSZ 16384 - -#include <asm-generic/signal.h> - -struct __new_sigaction { - __sighandler_t sa_handler; - unsigned long sa_flags; - __sigrestore_t sa_restorer; /* not used by Linux/SPARC yet */ - __new_sigset_t sa_mask; -}; - -struct __old_sigaction { - __sighandler_t sa_handler; - __old_sigset_t sa_mask; - unsigned long sa_flags; - void (*sa_restorer)(void); /* not used by Linux/SPARC yet */ -}; - -typedef struct sigaltstack { - void __user *ss_sp; - int ss_flags; - size_t ss_size; -} stack_t; - -#ifdef __KERNEL__ - -struct k_sigaction { - struct __new_sigaction sa; - void __user *ka_restorer; -}; - -#define ptrace_signal_deliver(regs, cookie) do { } while (0) - -#endif /* !(__KERNEL__) */ - -#endif /* !(__ASSEMBLY__) */ - -#endif /* !(_ASMSPARC64_SIGNAL_H) */ diff --git a/arch/sparc/include/asm/smp_32.h b/arch/sparc/include/asm/smp_32.h index a8180e546a4..58101dc7049 100644 --- a/arch/sparc/include/asm/smp_32.h +++ b/arch/sparc/include/asm/smp_32.h @@ -29,8 +29,6 @@ */ extern unsigned char boot_cpu_id; -extern cpumask_t phys_cpu_present_map; -#define cpu_possible_map phys_cpu_present_map typedef void (*smpfunc_t)(unsigned long, unsigned long, unsigned long, unsigned long, unsigned long); @@ -172,7 +170,4 @@ void smp_setup_cpu_possible_map(void); #define smp_setup_cpu_possible_map() do { } while (0) #endif /* !(SMP) */ - -#define NO_PROC_ID 0xFF - #endif /* !(_SPARC_SMP_H) */ diff --git a/arch/sparc/include/asm/stat.h b/arch/sparc/include/asm/stat.h index d8153013df7..55db5eca08e 100644 --- a/arch/sparc/include/asm/stat.h +++ b/arch/sparc/include/asm/stat.h @@ -1,8 +1,107 @@ -#ifndef ___ASM_SPARC_STAT_H -#define ___ASM_SPARC_STAT_H +#ifndef __SPARC_STAT_H +#define __SPARC_STAT_H + +#include <linux/types.h> + #if defined(__sparc__) && defined(__arch64__) -#include <asm/stat_64.h> +/* 64 bit sparc */ +struct stat { + unsigned st_dev; + ino_t st_ino; + mode_t st_mode; + short st_nlink; + uid_t st_uid; + gid_t st_gid; + unsigned st_rdev; + off_t st_size; + time_t st_atime; + time_t st_mtime; + time_t st_ctime; + off_t st_blksize; + off_t st_blocks; + unsigned long __unused4[2]; +}; + +struct stat64 { + unsigned long st_dev; + unsigned long st_ino; + unsigned long st_nlink; + + unsigned int st_mode; + unsigned int st_uid; + unsigned int st_gid; + unsigned int __pad0; + + unsigned long st_rdev; + long st_size; + long st_blksize; + long st_blocks; + + unsigned long st_atime; + unsigned long st_atime_nsec; + unsigned long st_mtime; + unsigned long st_mtime_nsec; + unsigned long st_ctime; + unsigned long st_ctime_nsec; + long __unused[3]; +}; + #else -#include <asm/stat_32.h> -#endif -#endif +/* 32 bit sparc */ +struct stat { + unsigned short st_dev; + ino_t st_ino; + mode_t st_mode; + short st_nlink; + uid_t st_uid; + gid_t st_gid; + unsigned short st_rdev; + off_t st_size; + time_t st_atime; + unsigned long st_atime_nsec; + time_t st_mtime; + unsigned long st_mtime_nsec; + time_t st_ctime; + unsigned long st_ctime_nsec; + off_t st_blksize; + off_t st_blocks; + unsigned long __unused4[2]; +}; + +#define STAT_HAVE_NSEC 1 + +struct stat64 { + unsigned long long st_dev; + + unsigned long long st_ino; + + unsigned int st_mode; + unsigned int st_nlink; + + unsigned int st_uid; + unsigned int st_gid; + + unsigned long long st_rdev; + + unsigned char __pad3[8]; + + long long st_size; + unsigned int st_blksize; + + unsigned char __pad4[8]; + unsigned int st_blocks; + + unsigned int st_atime; + unsigned int st_atime_nsec; + + unsigned int st_mtime; + unsigned int st_mtime_nsec; + + unsigned int st_ctime; + unsigned int st_ctime_nsec; + + unsigned int __unused4; + unsigned int __unused5; +}; +#endif /* defined(__sparc__) && defined(__arch64__) */ +#endif /* __SPARC_STAT_H */ diff --git a/arch/sparc/include/asm/stat_32.h b/arch/sparc/include/asm/stat_32.h deleted file mode 100644 index 2299e1d5d94..00000000000 --- a/arch/sparc/include/asm/stat_32.h +++ /dev/null @@ -1,76 +0,0 @@ -#ifndef _SPARC_STAT_H -#define _SPARC_STAT_H - -#include <linux/types.h> - -struct __old_kernel_stat { - unsigned short st_dev; - unsigned short st_ino; - unsigned short st_mode; - unsigned short st_nlink; - unsigned short st_uid; - unsigned short st_gid; - unsigned short st_rdev; - unsigned long st_size; - unsigned long st_atime; - unsigned long st_mtime; - unsigned long st_ctime; -}; - -struct stat { - unsigned short st_dev; - unsigned long st_ino; - unsigned short st_mode; - short st_nlink; - unsigned short st_uid; - unsigned short st_gid; - unsigned short st_rdev; - long st_size; - long st_atime; - unsigned long st_atime_nsec; - long st_mtime; - unsigned long st_mtime_nsec; - long st_ctime; - unsigned long st_ctime_nsec; - long st_blksize; - long st_blocks; - unsigned long __unused4[2]; -}; - -#define STAT_HAVE_NSEC 1 - -struct stat64 { - unsigned long long st_dev; - - unsigned long long st_ino; - - unsigned int st_mode; - unsigned int st_nlink; - - unsigned int st_uid; - unsigned int st_gid; - - unsigned long long st_rdev; - - unsigned char __pad3[8]; - - long long st_size; - unsigned int st_blksize; - - unsigned char __pad4[8]; - unsigned int st_blocks; - - unsigned int st_atime; - unsigned int st_atime_nsec; - - unsigned int st_mtime; - unsigned int st_mtime_nsec; - - unsigned int st_ctime; - unsigned int st_ctime_nsec; - - unsigned int __unused4; - unsigned int __unused5; -}; - -#endif diff --git a/arch/sparc/include/asm/stat_64.h b/arch/sparc/include/asm/stat_64.h deleted file mode 100644 index 9650fdea847..00000000000 --- a/arch/sparc/include/asm/stat_64.h +++ /dev/null @@ -1,47 +0,0 @@ -#ifndef _SPARC64_STAT_H -#define _SPARC64_STAT_H - -#include <linux/types.h> - -struct stat { - unsigned st_dev; - ino_t st_ino; - mode_t st_mode; - short st_nlink; - uid_t st_uid; - gid_t st_gid; - unsigned st_rdev; - off_t st_size; - time_t st_atime; - time_t st_mtime; - time_t st_ctime; - off_t st_blksize; - off_t st_blocks; - unsigned long __unused4[2]; -}; - -struct stat64 { - unsigned long st_dev; - unsigned long st_ino; - unsigned long st_nlink; - - unsigned int st_mode; - unsigned int st_uid; - unsigned int st_gid; - unsigned int __pad0; - - unsigned long st_rdev; - long st_size; - long st_blksize; - long st_blocks; - - unsigned long st_atime; - unsigned long st_atime_nsec; - unsigned long st_mtime; - unsigned long st_mtime_nsec; - unsigned long st_ctime; - unsigned long st_ctime_nsec; - long __unused[3]; -}; - -#endif diff --git a/arch/sparc/include/asm/thread_info_32.h b/arch/sparc/include/asm/thread_info_32.h index 80fe547c3f4..0f7b0e5fb1c 100644 --- a/arch/sparc/include/asm/thread_info_32.h +++ b/arch/sparc/include/asm/thread_info_32.h @@ -45,7 +45,7 @@ struct thread_info { /* A place to store user windows and stack pointers * when the stack needs inspection. */ - struct reg_window reg_window[NSWINS]; /* align for ldd! */ + struct reg_window32 reg_window[NSWINS]; /* align for ldd! */ unsigned long rwbuf_stkptrs[NSWINS]; unsigned long w_saved; diff --git a/arch/sparc/include/asm/topology_64.h b/arch/sparc/include/asm/topology_64.h index 001c04027c8..b8a65b64e1d 100644 --- a/arch/sparc/include/asm/topology_64.h +++ b/arch/sparc/include/asm/topology_64.h @@ -16,8 +16,12 @@ static inline cpumask_t node_to_cpumask(int node) { return numa_cpumask_lookup_table[node]; } +#define cpumask_of_node(node) (&numa_cpumask_lookup_table[node]) -/* Returns a pointer to the cpumask of CPUs on Node 'node'. */ +/* + * Returns a pointer to the cpumask of CPUs on Node 'node'. + * Deprecated: use "const struct cpumask *mask = cpumask_of_node(node)" + */ #define node_to_cpumask_ptr(v, node) \ cpumask_t *v = &(numa_cpumask_lookup_table[node]) @@ -26,9 +30,7 @@ static inline cpumask_t node_to_cpumask(int node) static inline int node_to_first_cpu(int node) { - cpumask_t tmp; - tmp = node_to_cpumask(node); - return first_cpu(tmp); + return cpumask_first(cpumask_of_node(node)); } struct pci_bus; @@ -77,10 +79,13 @@ static inline int pcibus_to_node(struct pci_bus *pbus) #define topology_core_id(cpu) (cpu_data(cpu).core_id) #define topology_core_siblings(cpu) (cpu_core_map[cpu]) #define topology_thread_siblings(cpu) (per_cpu(cpu_sibling_map, cpu)) +#define topology_core_cpumask(cpu) (&cpu_core_map[cpu]) +#define topology_thread_cpumask(cpu) (&per_cpu(cpu_sibling_map, cpu)) #define mc_capable() (sparc64_multi_core) #define smt_capable() (sparc64_multi_core) #endif /* CONFIG_SMP */ #define cpu_coregroup_map(cpu) (cpu_core_map[cpu]) +#define cpu_coregroup_mask(cpu) (&cpu_core_map[cpu]) #endif /* _ASM_SPARC64_TOPOLOGY_H */ diff --git a/arch/sparc/include/asm/traps.h b/arch/sparc/include/asm/traps.h index bebdbf8f43a..3aa62dde343 100644 --- a/arch/sparc/include/asm/traps.h +++ b/arch/sparc/include/asm/traps.h @@ -10,7 +10,7 @@ #define NUM_SPARC_TRAPS 255 #ifndef __ASSEMBLY__ - +#ifdef __KERNEL__ /* This is for V8 compliant Sparc CPUS */ struct tt_entry { unsigned long inst_one; @@ -22,14 +22,7 @@ struct tt_entry { /* We set this to _start in system setup. */ extern struct tt_entry *sparc_ttable; -static inline unsigned long get_tbr(void) -{ - unsigned long tbr; - - __asm__ __volatile__("rd %%tbr, %0\n\t" : "=r" (tbr)); - return tbr; -} - +#endif /* (__KERNEL__) */ #endif /* !(__ASSEMBLY__) */ /* For patching the trap table at boot time, we need to know how to diff --git a/arch/sparc/kernel/init_task.c b/arch/sparc/kernel/init_task.c index 62126e4cec5..f28cb8278e9 100644 --- a/arch/sparc/kernel/init_task.c +++ b/arch/sparc/kernel/init_task.c @@ -8,7 +8,6 @@ #include <asm/pgtable.h> #include <asm/uaccess.h> -static struct fs_struct init_fs = INIT_FS; static struct signal_struct init_signals = INIT_SIGNALS(init_signals); static struct sighand_struct init_sighand = INIT_SIGHAND(init_sighand); struct mm_struct init_mm = INIT_MM(init_mm); diff --git a/arch/sparc/kernel/irq_32.c b/arch/sparc/kernel/irq_32.c index f3488c45d57..1eff942fe22 100644 --- a/arch/sparc/kernel/irq_32.c +++ b/arch/sparc/kernel/irq_32.c @@ -669,7 +669,9 @@ void __init init_IRQ(void) btfixup(); } +#ifdef CONFIG_PROC_FS void init_irq_proc(void) { /* For now, nothing... */ } +#endif /* CONFIG_PROC_FS */ diff --git a/arch/sparc/kernel/irq_64.c b/arch/sparc/kernel/irq_64.c index a3ea2bcb95d..cab8e028687 100644 --- a/arch/sparc/kernel/irq_64.c +++ b/arch/sparc/kernel/irq_64.c @@ -312,7 +312,8 @@ static void sun4u_irq_enable(unsigned int virt_irq) } } -static void sun4u_set_affinity(unsigned int virt_irq, cpumask_t mask) +static void sun4u_set_affinity(unsigned int virt_irq, + const struct cpumask *mask) { sun4u_irq_enable(virt_irq); } @@ -362,7 +363,8 @@ static void sun4v_irq_enable(unsigned int virt_irq) ino, err); } -static void sun4v_set_affinity(unsigned int virt_irq, cpumask_t mask) +static void sun4v_set_affinity(unsigned int virt_irq, + const struct cpumask *mask) { unsigned int ino = virt_irq_table[virt_irq].dev_ino; unsigned long cpuid = irq_choose_cpu(virt_irq); @@ -429,7 +431,8 @@ static void sun4v_virq_enable(unsigned int virt_irq) dev_handle, dev_ino, err); } -static void sun4v_virt_set_affinity(unsigned int virt_irq, cpumask_t mask) +static void sun4v_virt_set_affinity(unsigned int virt_irq, + const struct cpumask *mask) { unsigned long cpuid, dev_handle, dev_ino; int err; @@ -851,7 +854,7 @@ void fixup_irqs(void) !(irq_desc[irq].status & IRQ_PER_CPU)) { if (irq_desc[irq].chip->set_affinity) irq_desc[irq].chip->set_affinity(irq, - irq_desc[irq].affinity); + &irq_desc[irq].affinity); } spin_unlock_irqrestore(&irq_desc[irq].lock, flags); } diff --git a/arch/sparc/kernel/kgdb_32.c b/arch/sparc/kernel/kgdb_32.c index 757805ce02e..04df4edc007 100644 --- a/arch/sparc/kernel/kgdb_32.c +++ b/arch/sparc/kernel/kgdb_32.c @@ -14,14 +14,14 @@ extern unsigned long trapbase; void pt_regs_to_gdb_regs(unsigned long *gdb_regs, struct pt_regs *regs) { - struct reg_window *win; + struct reg_window32 *win; int i; gdb_regs[GDB_G0] = 0; for (i = 0; i < 15; i++) gdb_regs[GDB_G1 + i] = regs->u_regs[UREG_G1 + i]; - win = (struct reg_window *) regs->u_regs[UREG_FP]; + win = (struct reg_window32 *) regs->u_regs[UREG_FP]; for (i = 0; i < 8; i++) gdb_regs[GDB_L0 + i] = win->locals[i]; for (i = 0; i < 8; i++) @@ -43,7 +43,7 @@ void pt_regs_to_gdb_regs(unsigned long *gdb_regs, struct pt_regs *regs) void sleeping_thread_to_gdb_regs(unsigned long *gdb_regs, struct task_struct *p) { struct thread_info *t = task_thread_info(p); - struct reg_window *win; + struct reg_window32 *win; int i; for (i = GDB_G0; i < GDB_G6; i++) @@ -55,7 +55,7 @@ void sleeping_thread_to_gdb_regs(unsigned long *gdb_regs, struct task_struct *p) gdb_regs[GDB_SP] = t->ksp; gdb_regs[GDB_O7] = 0; - win = (struct reg_window *) t->ksp; + win = (struct reg_window32 *) t->ksp; for (i = 0; i < 8; i++) gdb_regs[GDB_L0 + i] = win->locals[i]; for (i = 0; i < 8; i++) @@ -77,7 +77,7 @@ void sleeping_thread_to_gdb_regs(unsigned long *gdb_regs, struct task_struct *p) void gdb_regs_to_pt_regs(unsigned long *gdb_regs, struct pt_regs *regs) { - struct reg_window *win; + struct reg_window32 *win; int i; for (i = 0; i < 15; i++) @@ -96,7 +96,7 @@ void gdb_regs_to_pt_regs(unsigned long *gdb_regs, struct pt_regs *regs) regs->npc = gdb_regs[GDB_NPC]; regs->y = gdb_regs[GDB_Y]; - win = (struct reg_window *) regs->u_regs[UREG_FP]; + win = (struct reg_window32 *) regs->u_regs[UREG_FP]; for (i = 0; i < 8; i++) win->locals[i] = gdb_regs[GDB_L0 + i]; for (i = 0; i < 8; i++) diff --git a/arch/sparc/kernel/muldiv.c b/arch/sparc/kernel/muldiv.c index ba960c02bb5..6ce1021d487 100644 --- a/arch/sparc/kernel/muldiv.c +++ b/arch/sparc/kernel/muldiv.c @@ -60,7 +60,7 @@ static inline void maybe_flush_windows(unsigned int rs1, unsigned int rs2, } #define fetch_reg(reg, regs) ({ \ - struct reg_window __user *win; \ + struct reg_window32 __user *win; \ register unsigned long ret; \ \ if (!(reg)) ret = 0; \ @@ -68,7 +68,7 @@ static inline void maybe_flush_windows(unsigned int rs1, unsigned int rs2, ret = regs->u_regs[(reg)]; \ } else { \ /* Ho hum, the slightly complicated case. */ \ - win = (struct reg_window __user *)regs->u_regs[UREG_FP];\ + win = (struct reg_window32 __user *)regs->u_regs[UREG_FP];\ if (get_user (ret, &win->locals[(reg) - 16])) return -1;\ } \ ret; \ @@ -77,7 +77,7 @@ static inline void maybe_flush_windows(unsigned int rs1, unsigned int rs2, static inline int store_reg(unsigned int result, unsigned int reg, struct pt_regs *regs) { - struct reg_window __user *win; + struct reg_window32 __user *win; if (!reg) return 0; @@ -86,7 +86,7 @@ store_reg(unsigned int result, unsigned int reg, struct pt_regs *regs) return 0; } else { /* need to use put_user() in this case: */ - win = (struct reg_window __user *) regs->u_regs[UREG_FP]; + win = (struct reg_window32 __user *) regs->u_regs[UREG_FP]; return (put_user(result, &win->locals[reg - 16])); } } diff --git a/arch/sparc/kernel/of_device_64.c b/arch/sparc/kernel/of_device_64.c index 46e231f7c5c..4873f28905b 100644 --- a/arch/sparc/kernel/of_device_64.c +++ b/arch/sparc/kernel/of_device_64.c @@ -778,9 +778,9 @@ static unsigned int __init build_one_device_irq(struct of_device *op, out: nid = of_node_to_nid(dp); if (nid != -1) { - cpumask_t numa_mask = node_to_cpumask(nid); + cpumask_t numa_mask = *cpumask_of_node(nid); - irq_set_affinity(irq, numa_mask); + irq_set_affinity(irq, &numa_mask); } return irq; diff --git a/arch/sparc/kernel/pci_msi.c b/arch/sparc/kernel/pci_msi.c index 2e680f34f72..4ef282e8191 100644 --- a/arch/sparc/kernel/pci_msi.c +++ b/arch/sparc/kernel/pci_msi.c @@ -286,9 +286,9 @@ static int bringup_one_msi_queue(struct pci_pbm_info *pbm, nid = pbm->numa_node; if (nid != -1) { - cpumask_t numa_mask = node_to_cpumask(nid); + cpumask_t numa_mask = *cpumask_of_node(nid); - irq_set_affinity(irq, numa_mask); + irq_set_affinity(irq, &numa_mask); } err = request_irq(irq, sparc64_msiq_interrupt, 0, "MSIQ", diff --git a/arch/sparc/kernel/pci_psycho.c b/arch/sparc/kernel/pci_psycho.c index dfb3ec89298..3b34344082e 100644 --- a/arch/sparc/kernel/pci_psycho.c +++ b/arch/sparc/kernel/pci_psycho.c @@ -307,10 +307,7 @@ static void psycho_register_error_handlers(struct pci_pbm_info *pbm) /* We really mean to ignore the return result here. Two * PCI controller share the same interrupt numbers and - * drive the same front-end hardware. Whichever of the - * two get in here first will register the IRQ handler - * the second will just error out since we do not pass in - * IRQF_SHARED. + * drive the same front-end hardware. */ err = request_irq(op->irqs[1], psycho_ue_intr, IRQF_SHARED, "PSYCHO_UE", pbm); diff --git a/arch/sparc/kernel/process_32.c b/arch/sparc/kernel/process_32.c index 69d9315f4a9..5a8d8ced33d 100644 --- a/arch/sparc/kernel/process_32.c +++ b/arch/sparc/kernel/process_32.c @@ -180,13 +180,13 @@ static DEFINE_SPINLOCK(sparc_backtrace_lock); void __show_backtrace(unsigned long fp) { - struct reg_window *rw; + struct reg_window32 *rw; unsigned long flags; int cpu = smp_processor_id(); spin_lock_irqsave(&sparc_backtrace_lock, flags); - rw = (struct reg_window *)fp; + rw = (struct reg_window32 *)fp; while(rw && (((unsigned long) rw) >= PAGE_OFFSET) && !(((unsigned long) rw) & 0x7)) { printk("CPU[%d]: ARGS[%08lx,%08lx,%08lx,%08lx,%08lx,%08lx] " @@ -196,7 +196,7 @@ void __show_backtrace(unsigned long fp) rw->ins[6], rw->ins[7]); printk("%pS\n", (void *) rw->ins[7]); - rw = (struct reg_window *) rw->ins[6]; + rw = (struct reg_window32 *) rw->ins[6]; } spin_unlock_irqrestore(&sparc_backtrace_lock, flags); } @@ -258,7 +258,7 @@ void show_stackframe(struct sparc_stackf *sf) void show_regs(struct pt_regs *r) { - struct reg_window *rw = (struct reg_window *) r->u_regs[14]; + struct reg_window32 *rw = (struct reg_window32 *) r->u_regs[14]; printk("PSR: %08lx PC: %08lx NPC: %08lx Y: %08lx %s\n", r->psr, r->pc, r->npc, r->y, print_tainted()); @@ -287,7 +287,7 @@ void show_stack(struct task_struct *tsk, unsigned long *_ksp) { unsigned long pc, fp; unsigned long task_base; - struct reg_window *rw; + struct reg_window32 *rw; int count = 0; if (tsk != NULL) @@ -301,7 +301,7 @@ void show_stack(struct task_struct *tsk, unsigned long *_ksp) if (fp < (task_base + sizeof(struct thread_info)) || fp >= (task_base + (PAGE_SIZE << 1))) break; - rw = (struct reg_window *) fp; + rw = (struct reg_window32 *) fp; pc = rw->ins[7]; printk("[%08lx : ", pc); printk("%pS ] ", (void *) pc); @@ -679,7 +679,7 @@ unsigned long get_wchan(struct task_struct *task) unsigned long pc, fp, bias = 0; unsigned long task_base = (unsigned long) task; unsigned long ret = 0; - struct reg_window *rw; + struct reg_window32 *rw; int count = 0; if (!task || task == current || @@ -692,7 +692,7 @@ unsigned long get_wchan(struct task_struct *task) if (fp < (task_base + sizeof(struct thread_info)) || fp >= (task_base + (2 * PAGE_SIZE))) break; - rw = (struct reg_window *) fp; + rw = (struct reg_window32 *) fp; pc = rw->ins[7]; if (!in_sched_functions(pc)) { ret = pc; diff --git a/arch/sparc/kernel/prom_common.c b/arch/sparc/kernel/prom_common.c index 4e9af593db4..ff7b591c894 100644 --- a/arch/sparc/kernel/prom_common.c +++ b/arch/sparc/kernel/prom_common.c @@ -155,20 +155,12 @@ static struct property * __init build_one_prop(phandle node, char *prev, p->value = prom_early_alloc(special_len); memcpy(p->value, special_val, special_len); } else { -#ifdef CONFIG_SPARC32 - if (prev == NULL) { - name = prom_firstprop(node, NULL); - } else { - name = prom_nextprop(node, prev, NULL); - } -#else if (prev == NULL) { - prom_firstprop(node, p->name); + name = prom_firstprop(node, p->name); } else { - prom_nextprop(node, prev, p->name); + name = prom_nextprop(node, prev, p->name); } - name = p->name; -#endif + if (strlen(name) == 0) { tmp = p; return NULL; diff --git a/arch/sparc/kernel/signal_32.c b/arch/sparc/kernel/signal_32.c index c94f91c8b6e..181d069a2d4 100644 --- a/arch/sparc/kernel/signal_32.c +++ b/arch/sparc/kernel/signal_32.c @@ -34,7 +34,7 @@ extern void fpload(unsigned long *fpregs, unsigned long *fsr); struct signal_frame { struct sparc_stackf ss; - __siginfo_t info; + __siginfo32_t info; __siginfo_fpu_t __user *fpu_save; unsigned long insns[2] __attribute__ ((aligned (8))); unsigned int extramask[_NSIG_WORDS - 1]; @@ -351,7 +351,7 @@ static void setup_frame(struct k_sigaction *ka, struct pt_regs *regs, err |= __copy_to_user(sf->extramask, &oldset->sig[1], (_NSIG_WORDS - 1) * sizeof(unsigned int)); err |= __copy_to_user(sf, (char *) regs->u_regs[UREG_FP], - sizeof(struct reg_window)); + sizeof(struct reg_window32)); if (err) goto sigsegv; @@ -433,7 +433,7 @@ static void setup_rt_frame(struct k_sigaction *ka, struct pt_regs *regs, err |= __put_user(current->sas_ss_size, &sf->stack.ss_size); err |= __copy_to_user(sf, (char *) regs->u_regs[UREG_FP], - sizeof(struct reg_window)); + sizeof(struct reg_window32)); err |= copy_siginfo_to_user(&sf->info, info); diff --git a/arch/sparc/kernel/smp_32.c b/arch/sparc/kernel/smp_32.c index e396c1f17a9..1e5ac4e282e 100644 --- a/arch/sparc/kernel/smp_32.c +++ b/arch/sparc/kernel/smp_32.c @@ -39,8 +39,6 @@ volatile unsigned long cpu_callin_map[NR_CPUS] __cpuinitdata = {0,}; unsigned char boot_cpu_id = 0; unsigned char boot_cpu_id4 = 0; /* boot_cpu_id << 2 */ -cpumask_t cpu_online_map = CPU_MASK_NONE; -cpumask_t phys_cpu_present_map = CPU_MASK_NONE; cpumask_t smp_commenced_mask = CPU_MASK_NONE; /* The only guaranteed locking primitive available on all Sparc @@ -334,7 +332,7 @@ void __init smp_setup_cpu_possible_map(void) instance = 0; while (!cpu_find_by_instance(instance, NULL, &mid)) { if (mid < NR_CPUS) { - cpu_set(mid, phys_cpu_present_map); + cpu_set(mid, cpu_possible_map); cpu_set(mid, cpu_present_map); } instance++; @@ -354,7 +352,7 @@ void __init smp_prepare_boot_cpu(void) current_thread_info()->cpu = cpuid; cpu_set(cpuid, cpu_online_map); - cpu_set(cpuid, phys_cpu_present_map); + cpu_set(cpuid, cpu_possible_map); } int __cpuinit __cpu_up(unsigned int cpu) diff --git a/arch/sparc/kernel/smp_64.c b/arch/sparc/kernel/smp_64.c index bfe99d82d45..46329799f34 100644 --- a/arch/sparc/kernel/smp_64.c +++ b/arch/sparc/kernel/smp_64.c @@ -49,14 +49,10 @@ int sparc64_multi_core __read_mostly; -cpumask_t cpu_possible_map __read_mostly = CPU_MASK_NONE; -cpumask_t cpu_online_map __read_mostly = CPU_MASK_NONE; DEFINE_PER_CPU(cpumask_t, cpu_sibling_map) = CPU_MASK_NONE; cpumask_t cpu_core_map[NR_CPUS] __read_mostly = { [0 ... NR_CPUS-1] = CPU_MASK_NONE }; -EXPORT_SYMBOL(cpu_possible_map); -EXPORT_SYMBOL(cpu_online_map); EXPORT_PER_CPU_SYMBOL(cpu_sibling_map); EXPORT_SYMBOL(cpu_core_map); diff --git a/arch/sparc/kernel/sparc_ksyms_32.c b/arch/sparc/kernel/sparc_ksyms_32.c index a4d45fc29b2..e1e97639231 100644 --- a/arch/sparc/kernel/sparc_ksyms_32.c +++ b/arch/sparc/kernel/sparc_ksyms_32.c @@ -112,10 +112,6 @@ EXPORT_PER_CPU_SYMBOL(__cpu_data); #ifdef CONFIG_SMP /* IRQ implementation. */ EXPORT_SYMBOL(synchronize_irq); - -/* CPU online map and active count. */ -EXPORT_SYMBOL(cpu_online_map); -EXPORT_SYMBOL(phys_cpu_present_map); #endif EXPORT_SYMBOL(__udelay); diff --git a/arch/sparc/kernel/time_64.c b/arch/sparc/kernel/time_64.c index 141da375909..9df8f095a8b 100644 --- a/arch/sparc/kernel/time_64.c +++ b/arch/sparc/kernel/time_64.c @@ -763,7 +763,7 @@ void __devinit setup_sparc64_timer(void) sevt = &__get_cpu_var(sparc64_events); memcpy(sevt, &sparc64_clockevent, sizeof(*sevt)); - sevt->cpumask = cpumask_of_cpu(smp_processor_id()); + sevt->cpumask = cpumask_of(smp_processor_id()); clockevents_register_device(sevt); } diff --git a/arch/sparc/kernel/traps_32.c b/arch/sparc/kernel/traps_32.c index 716f3946c49..213645be6e9 100644 --- a/arch/sparc/kernel/traps_32.c +++ b/arch/sparc/kernel/traps_32.c @@ -67,7 +67,7 @@ void die_if_kernel(char *str, struct pt_regs *regs) __RESTORE; __RESTORE; __RESTORE; __RESTORE; { - struct reg_window *rw = (struct reg_window *)regs->u_regs[UREG_FP]; + struct reg_window32 *rw = (struct reg_window32 *)regs->u_regs[UREG_FP]; /* Stop the back trace when we hit userland or we * find some badly aligned kernel stack. Set an upper @@ -79,7 +79,7 @@ void die_if_kernel(char *str, struct pt_regs *regs) !(((unsigned long) rw) & 0x7)) { printk("Caller[%08lx]: %pS\n", rw->ins[7], (void *) rw->ins[7]); - rw = (struct reg_window *)rw->ins[6]; + rw = (struct reg_window32 *)rw->ins[6]; } } printk("Instruction DUMP:"); diff --git a/arch/sparc/kernel/unaligned_32.c b/arch/sparc/kernel/unaligned_32.c index c2a28c5ad65..6b1e6cde6ff 100644 --- a/arch/sparc/kernel/unaligned_32.c +++ b/arch/sparc/kernel/unaligned_32.c @@ -97,26 +97,26 @@ static inline int sign_extend_imm13(int imm) static inline unsigned long fetch_reg(unsigned int reg, struct pt_regs *regs) { - struct reg_window *win; + struct reg_window32 *win; if(reg < 16) return (!reg ? 0 : regs->u_regs[reg]); /* Ho hum, the slightly complicated case. */ - win = (struct reg_window *) regs->u_regs[UREG_FP]; + win = (struct reg_window32 *) regs->u_regs[UREG_FP]; return win->locals[reg - 16]; /* yes, I know what this does... */ } static inline unsigned long safe_fetch_reg(unsigned int reg, struct pt_regs *regs) { - struct reg_window __user *win; + struct reg_window32 __user *win; unsigned long ret; if (reg < 16) return (!reg ? 0 : regs->u_regs[reg]); /* Ho hum, the slightly complicated case. */ - win = (struct reg_window __user *) regs->u_regs[UREG_FP]; + win = (struct reg_window32 __user *) regs->u_regs[UREG_FP]; if ((unsigned long)win & 3) return -1; @@ -129,11 +129,11 @@ static inline unsigned long safe_fetch_reg(unsigned int reg, struct pt_regs *reg static inline unsigned long *fetch_reg_addr(unsigned int reg, struct pt_regs *regs) { - struct reg_window *win; + struct reg_window32 *win; if(reg < 16) return ®s->u_regs[reg]; - win = (struct reg_window *) regs->u_regs[UREG_FP]; + win = (struct reg_window32 *) regs->u_regs[UREG_FP]; return &win->locals[reg - 16]; } diff --git a/arch/sparc/kernel/windows.c b/arch/sparc/kernel/windows.c index 9cc93eaa4ab..f24d298bda2 100644 --- a/arch/sparc/kernel/windows.c +++ b/arch/sparc/kernel/windows.c @@ -42,7 +42,7 @@ static inline void shift_window_buffer(int first_win, int last_win, struct threa for(i = first_win; i < last_win; i++) { tp->rwbuf_stkptrs[i] = tp->rwbuf_stkptrs[i+1]; - memcpy(&tp->reg_window[i], &tp->reg_window[i+1], sizeof(struct reg_window)); + memcpy(&tp->reg_window[i], &tp->reg_window[i+1], sizeof(struct reg_window32)); } } @@ -70,7 +70,7 @@ void synchronize_user_stack(void) /* Ok, let it rip. */ if (copy_to_user((char __user *) sp, &tp->reg_window[window], - sizeof(struct reg_window))) + sizeof(struct reg_window32))) continue; shift_window_buffer(window, tp->w_saved - 1, tp); @@ -119,7 +119,7 @@ void try_to_clear_window_buffer(struct pt_regs *regs, int who) if ((sp & 7) || copy_to_user((char __user *) sp, &tp->reg_window[window], - sizeof(struct reg_window))) + sizeof(struct reg_window32))) do_exit(SIGILL); } tp->w_saved = 0; diff --git a/arch/um/Makefile b/arch/um/Makefile index d944c343acd..0728def3223 100644 --- a/arch/um/Makefile +++ b/arch/um/Makefile @@ -22,10 +22,11 @@ MODE_INCLUDE += -I$(srctree)/$(ARCH_DIR)/include/shared/skas include $(srctree)/$(ARCH_DIR)/Makefile-skas -ARCH_INCLUDE := -I$(srctree)/$(ARCH_DIR)/include/shared +SHARED_HEADERS := $(ARCH_DIR)/include/shared +ARCH_INCLUDE := -I$(srctree)/$(SHARED_HEADERS) ARCH_INCLUDE += -I$(srctree)/$(ARCH_DIR)/sys-$(SUBARCH)/shared ifneq ($(KBUILD_SRC),) -ARCH_INCLUDE += -I$(ARCH_DIR)/include/shared # for two generated files +ARCH_INCLUDE += -I$(SHARED_HEADERS) endif KBUILD_CPPFLAGS += -I$(srctree)/$(ARCH_DIR)/sys-$(SUBARCH) @@ -85,8 +86,8 @@ endef KBUILD_KCONFIG := arch/um/Kconfig.$(HEADER_ARCH) -archprepare: $(ARCH_DIR)/include/shared/user_constants.h -prepare: $(ARCH_DIR)/include/shared/kern_constants.h +archprepare: $(SHARED_HEADERS)/user_constants.h +archprepare: $(SHARED_HEADERS)/kern_constants.h LINK-$(CONFIG_LD_SCRIPT_STATIC) += -static LINK-$(CONFIG_LD_SCRIPT_DYN) += -Wl,-rpath,/lib @@ -119,17 +120,13 @@ endef # When cleaning we don't include .config, so we don't include # TT or skas makefiles and don't clean skas_ptregs.h. CLEAN_FILES += linux x.i gmon.out \ - $(ARCH_DIR)/include/shared/user_constants.h \ - $(ARCH_DIR)/include/shared/kern_constants.h + $(SHARED_HEADERS)/user_constants.h \ + $(SHARED_HEADERS)/kern_constants.h archclean: @find . \( -name '*.bb' -o -name '*.bbg' -o -name '*.da' \ -o -name '*.gcov' \) -type f -print | xargs rm -f -$(objtree)/$(ARCH_DIR)/include/shared: - @echo ' MKDIR $@' - $(Q)mkdir -p $@ - # Generated files $(ARCH_DIR)/sys-$(SUBARCH)/user-offsets.s: FORCE @@ -148,11 +145,11 @@ define filechk_gen-asm-offsets echo ""; ) endef -$(ARCH_DIR)/include/shared/user_constants.h: $(ARCH_DIR)/sys-$(SUBARCH)/user-offsets.s +$(SHARED_HEADERS)/user_constants.h: $(ARCH_DIR)/sys-$(SUBARCH)/user-offsets.s $(call filechk,gen-asm-offsets) -$(ARCH_DIR)/include/shared/kern_constants.h: $(objtree)/$(ARCH_DIR)/include/shared - @echo ' SYMLINK $@' - $(Q)ln -sf ../../../../include/asm/asm-offsets.h $@ +$(SHARED_HEADERS)/kern_constants.h: + $(Q)mkdir -p $(dir $@) + $(Q)echo '#include "../../../../include/asm/asm-offsets.h"' >$@ export SUBARCH USER_CFLAGS CFLAGS_NO_HARDENING OS HEADER_ARCH DEV_NULL_PATH diff --git a/arch/um/include/asm/system.h b/arch/um/include/asm/system.h index ae5f94d6317..753346e2cdf 100644 --- a/arch/um/include/asm/system.h +++ b/arch/um/include/asm/system.h @@ -11,21 +11,21 @@ extern int get_signals(void); extern void block_signals(void); extern void unblock_signals(void); -#define raw_local_save_flags(flags) do { typecheck(unsigned long, flags); \ +#define local_save_flags(flags) do { typecheck(unsigned long, flags); \ (flags) = get_signals(); } while(0) -#define raw_local_irq_restore(flags) do { typecheck(unsigned long, flags); \ +#define local_irq_restore(flags) do { typecheck(unsigned long, flags); \ set_signals(flags); } while(0) -#define raw_local_irq_save(flags) do { raw_local_save_flags(flags); \ - raw_local_irq_disable(); } while(0) +#define local_irq_save(flags) do { local_save_flags(flags); \ + local_irq_disable(); } while(0) -#define raw_local_irq_enable() unblock_signals() -#define raw_local_irq_disable() block_signals() +#define local_irq_enable() unblock_signals() +#define local_irq_disable() block_signals() #define irqs_disabled() \ ({ \ unsigned long flags; \ - raw_local_save_flags(flags); \ + local_save_flags(flags); \ (flags == 0); \ }) diff --git a/arch/um/kernel/init_task.c b/arch/um/kernel/init_task.c index 910eda8fca1..806d381947b 100644 --- a/arch/um/kernel/init_task.c +++ b/arch/um/kernel/init_task.c @@ -10,7 +10,6 @@ #include "linux/mqueue.h" #include "asm/uaccess.h" -static struct fs_struct init_fs = INIT_FS; struct mm_struct init_mm = INIT_MM(init_mm); static struct signal_struct init_signals = INIT_SIGNALS(init_signals); static struct sighand_struct init_sighand = INIT_SIGHAND(init_sighand); diff --git a/arch/um/kernel/smp.c b/arch/um/kernel/smp.c index 04577214284..98351c78bc8 100644 --- a/arch/um/kernel/smp.c +++ b/arch/um/kernel/smp.c @@ -25,13 +25,6 @@ DEFINE_PER_CPU(struct mmu_gather, mmu_gathers); #include "irq_user.h" #include "os.h" -/* CPU online map, set by smp_boot_cpus */ -cpumask_t cpu_online_map = CPU_MASK_NONE; -cpumask_t cpu_possible_map = CPU_MASK_NONE; - -EXPORT_SYMBOL(cpu_online_map); -EXPORT_SYMBOL(cpu_possible_map); - /* Per CPU bogomips and other parameters * The only piece used here is the ipi pipe, which is set before SMP is * started and never changed. diff --git a/arch/um/kernel/time.c b/arch/um/kernel/time.c index 47f04f4a346..b13a87a3ec9 100644 --- a/arch/um/kernel/time.c +++ b/arch/um/kernel/time.c @@ -50,7 +50,7 @@ static int itimer_next_event(unsigned long delta, static struct clock_event_device itimer_clockevent = { .name = "itimer", .rating = 250, - .cpumask = CPU_MASK_ALL, + .cpumask = cpu_all_mask, .features = CLOCK_EVT_FEAT_PERIODIC | CLOCK_EVT_FEAT_ONESHOT, .set_mode = itimer_set_mode, .set_next_event = itimer_next_event, diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 0f44add3e0b..862adb9bf0d 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -586,6 +586,16 @@ config AMD_IOMMU your BIOS for an option to enable it or if you have an IVRS ACPI table. +config AMD_IOMMU_STATS + bool "Export AMD IOMMU statistics to debugfs" + depends on AMD_IOMMU + select DEBUG_FS + help + This option enables code in the AMD IOMMU driver to collect various + statistics about whats happening in the driver and exports that + information to userspace via debugfs. + If unsure, say N. + # need this always selected by IOMMU for the VIA workaround config SWIOTLB def_bool y if X86_64 @@ -599,21 +609,25 @@ config SWIOTLB config IOMMU_HELPER def_bool (CALGARY_IOMMU || GART_IOMMU || SWIOTLB || AMD_IOMMU) +config IOMMU_API + def_bool (AMD_IOMMU || DMAR) + config MAXSMP bool "Configure Maximum number of SMP Processors and NUMA Nodes" - depends on X86_64 && SMP && BROKEN + depends on X86_64 && SMP && DEBUG_KERNEL && EXPERIMENTAL + select CPUMASK_OFFSTACK default n help Configure maximum number of CPUS and NUMA Nodes for this architecture. If unsure, say N. config NR_CPUS - int "Maximum number of CPUs (2-512)" if !MAXSMP - range 2 512 - depends on SMP + int "Maximum number of CPUs" if SMP && !MAXSMP + range 2 512 if SMP && !MAXSMP + default "1" if !SMP default "4096" if MAXSMP - default "32" if X86_NUMAQ || X86_SUMMIT || X86_BIGSMP || X86_ES7000 - default "8" + default "32" if SMP && (X86_NUMAQ || X86_SUMMIT || X86_BIGSMP || X86_ES7000) + default "8" if SMP help This allows you to specify the maximum number of CPUs which this kernel will support. The maximum supported value is 512 and the diff --git a/arch/x86/Kconfig.cpu b/arch/x86/Kconfig.cpu index 85a78575956..8078955845a 100644 --- a/arch/x86/Kconfig.cpu +++ b/arch/x86/Kconfig.cpu @@ -408,7 +408,7 @@ config X86_MINIMUM_CPU_FAMILY config X86_DEBUGCTLMSR def_bool y - depends on !(MK6 || MWINCHIPC6 || MWINCHIP3D || MCYRIXIII || M586MMX || M586TSC || M586 || M486 || M386) + depends on !(MK6 || MWINCHIPC6 || MWINCHIP3D || MCYRIXIII || M586MMX || M586TSC || M586 || M486 || M386) && !UML menuconfig PROCESSOR_SELECT bool "Supported processor vendors" if EMBEDDED diff --git a/arch/x86/ia32/ia32_signal.c b/arch/x86/ia32/ia32_signal.c index b195f85526e..9dabd00e980 100644 --- a/arch/x86/ia32/ia32_signal.c +++ b/arch/x86/ia32/ia32_signal.c @@ -24,15 +24,14 @@ #include <asm/ucontext.h> #include <asm/uaccess.h> #include <asm/i387.h> -#include <asm/ia32.h> #include <asm/ptrace.h> #include <asm/ia32_unistd.h> #include <asm/user32.h> #include <asm/sigcontext32.h> #include <asm/proto.h> #include <asm/vdso.h> - #include <asm/sigframe.h> +#include <asm/sys_ia32.h> #define DEBUG_SIG 0 diff --git a/arch/x86/ia32/ipc32.c b/arch/x86/ia32/ipc32.c index d21991ce606..29cdcd02ead 100644 --- a/arch/x86/ia32/ipc32.c +++ b/arch/x86/ia32/ipc32.c @@ -8,6 +8,7 @@ #include <linux/shm.h> #include <linux/ipc.h> #include <linux/compat.h> +#include <asm/sys_ia32.h> asmlinkage long sys32_ipc(u32 call, int first, int second, int third, compat_uptr_t ptr, u32 fifth) diff --git a/arch/x86/ia32/sys_ia32.c b/arch/x86/ia32/sys_ia32.c index 2e09dcd3c0a..6c0d7f6231a 100644 --- a/arch/x86/ia32/sys_ia32.c +++ b/arch/x86/ia32/sys_ia32.c @@ -44,8 +44,8 @@ #include <asm/types.h> #include <asm/uaccess.h> #include <asm/atomic.h> -#include <asm/ia32.h> #include <asm/vgtod.h> +#include <asm/sys_ia32.h> #define AA(__x) ((unsigned long)(__x)) diff --git a/arch/x86/include/asm/amd_iommu_types.h b/arch/x86/include/asm/amd_iommu_types.h index ac302a2fa33..95c8cd9d22b 100644 --- a/arch/x86/include/asm/amd_iommu_types.h +++ b/arch/x86/include/asm/amd_iommu_types.h @@ -190,16 +190,23 @@ /* FIXME: move this macro to <linux/pci.h> */ #define PCI_BUS(x) (((x) >> 8) & 0xff) +/* Protection domain flags */ +#define PD_DMA_OPS_MASK (1UL << 0) /* domain used for dma_ops */ +#define PD_DEFAULT_MASK (1UL << 1) /* domain is a default dma_ops + domain for an IOMMU */ + /* * This structure contains generic data for IOMMU protection domains * independent of their use. */ struct protection_domain { - spinlock_t lock; /* mostly used to lock the page table*/ - u16 id; /* the domain id written to the device table */ - int mode; /* paging mode (0-6 levels) */ - u64 *pt_root; /* page table root pointer */ - void *priv; /* private data */ + spinlock_t lock; /* mostly used to lock the page table*/ + u16 id; /* the domain id written to the device table */ + int mode; /* paging mode (0-6 levels) */ + u64 *pt_root; /* page table root pointer */ + unsigned long flags; /* flags to find out type of domain */ + unsigned dev_cnt; /* devices assigned to this domain */ + void *priv; /* private data */ }; /* @@ -295,7 +302,7 @@ struct amd_iommu { bool int_enabled; /* if one, we need to send a completion wait command */ - int need_sync; + bool need_sync; /* default dma_ops domain for that IOMMU */ struct dma_ops_domain *default_dom; @@ -374,7 +381,7 @@ extern struct protection_domain **amd_iommu_pd_table; extern unsigned long *amd_iommu_pd_alloc_bitmap; /* will be 1 if device isolation is enabled */ -extern int amd_iommu_isolate; +extern bool amd_iommu_isolate; /* * If true, the addresses will be flushed on unmap time, not when @@ -382,18 +389,6 @@ extern int amd_iommu_isolate; */ extern bool amd_iommu_unmap_flush; -/* takes a PCI device id and prints it out in a readable form */ -static inline void print_devid(u16 devid, int nl) -{ - int bus = devid >> 8; - int dev = devid >> 3 & 0x1f; - int fn = devid & 0x07; - - printk("%02x:%02x.%x", bus, dev, fn); - if (nl) - printk("\n"); -} - /* takes bus and device/function and returns the device id * FIXME: should that be in generic PCI code? */ static inline u16 calc_devid(u8 bus, u8 devfn) @@ -401,4 +396,32 @@ static inline u16 calc_devid(u8 bus, u8 devfn) return (((u16)bus) << 8) | devfn; } +#ifdef CONFIG_AMD_IOMMU_STATS + +struct __iommu_counter { + char *name; + struct dentry *dent; + u64 value; +}; + +#define DECLARE_STATS_COUNTER(nm) \ + static struct __iommu_counter nm = { \ + .name = #nm, \ + } + +#define INC_STATS_COUNTER(name) name.value += 1 +#define ADD_STATS_COUNTER(name, x) name.value += (x) +#define SUB_STATS_COUNTER(name, x) name.value -= (x) + +#else /* CONFIG_AMD_IOMMU_STATS */ + +#define DECLARE_STATS_COUNTER(name) +#define INC_STATS_COUNTER(name) +#define ADD_STATS_COUNTER(name, x) +#define SUB_STATS_COUNTER(name, x) + +static inline void amd_iommu_stats_init(void) { } + +#endif /* CONFIG_AMD_IOMMU_STATS */ + #endif /* _ASM_X86_AMD_IOMMU_TYPES_H */ diff --git a/arch/x86/include/asm/apic.h b/arch/x86/include/asm/apic.h index 25caa0738af..ab1d51a8855 100644 --- a/arch/x86/include/asm/apic.h +++ b/arch/x86/include/asm/apic.h @@ -54,7 +54,6 @@ extern int disable_apic; extern int is_vsmp_box(void); extern void xapic_wait_icr_idle(void); extern u32 safe_xapic_wait_icr_idle(void); -extern u64 xapic_icr_read(void); extern void xapic_icr_write(u32, u32); extern int setup_profiling_timer(unsigned int); @@ -93,7 +92,7 @@ static inline u32 native_apic_msr_read(u32 reg) } #ifndef CONFIG_X86_32 -extern int x2apic, x2apic_preenabled; +extern int x2apic; extern void check_x2apic(void); extern void enable_x2apic(void); extern void enable_IR_x2apic(void); diff --git a/arch/x86/include/asm/bigsmp/apic.h b/arch/x86/include/asm/bigsmp/apic.h index ce547f24a1c..d8dd9f53791 100644 --- a/arch/x86/include/asm/bigsmp/apic.h +++ b/arch/x86/include/asm/bigsmp/apic.h @@ -9,12 +9,12 @@ static inline int apic_id_registered(void) return (1); } -static inline cpumask_t target_cpus(void) +static inline const cpumask_t *target_cpus(void) { #ifdef CONFIG_SMP - return cpu_online_map; + return &cpu_online_map; #else - return cpumask_of_cpu(0); + return &cpumask_of_cpu(0); #endif } @@ -79,7 +79,7 @@ static inline int apicid_to_node(int logical_apicid) static inline int cpu_present_to_apicid(int mps_cpu) { - if (mps_cpu < NR_CPUS) + if (mps_cpu < nr_cpu_ids) return (int) per_cpu(x86_bios_cpu_apicid, mps_cpu); return BAD_APICID; @@ -94,7 +94,7 @@ extern u8 cpu_2_logical_apicid[]; /* Mapping from cpu number to logical apicid */ static inline int cpu_to_logical_apicid(int cpu) { - if (cpu >= NR_CPUS) + if (cpu >= nr_cpu_ids) return BAD_APICID; return cpu_physical_id(cpu); } @@ -119,16 +119,34 @@ static inline int check_phys_apicid_present(int boot_cpu_physical_apicid) } /* As we are using single CPU as destination, pick only one CPU here */ -static inline unsigned int cpu_mask_to_apicid(cpumask_t cpumask) +static inline unsigned int cpu_mask_to_apicid(const cpumask_t *cpumask) { int cpu; int apicid; - cpu = first_cpu(cpumask); + cpu = first_cpu(*cpumask); apicid = cpu_to_logical_apicid(cpu); return apicid; } +static inline unsigned int cpu_mask_to_apicid_and(const struct cpumask *cpumask, + const struct cpumask *andmask) +{ + int cpu; + + /* + * We're using fixed IRQ delivery, can only return one phys APIC ID. + * May as well be the first. + */ + for_each_cpu_and(cpu, cpumask, andmask) + if (cpumask_test_cpu(cpu, cpu_online_mask)) + break; + if (cpu < nr_cpu_ids) + return cpu_to_logical_apicid(cpu); + + return BAD_APICID; +} + static inline u32 phys_pkg_id(u32 cpuid_apic, int index_msb) { return cpuid_apic >> index_msb; diff --git a/arch/x86/include/asm/bigsmp/ipi.h b/arch/x86/include/asm/bigsmp/ipi.h index 9404c535b7e..27fcd01b3ae 100644 --- a/arch/x86/include/asm/bigsmp/ipi.h +++ b/arch/x86/include/asm/bigsmp/ipi.h @@ -1,25 +1,22 @@ #ifndef __ASM_MACH_IPI_H #define __ASM_MACH_IPI_H -void send_IPI_mask_sequence(cpumask_t mask, int vector); +void send_IPI_mask_sequence(const struct cpumask *mask, int vector); +void send_IPI_mask_allbutself(const struct cpumask *mask, int vector); -static inline void send_IPI_mask(cpumask_t mask, int vector) +static inline void send_IPI_mask(const struct cpumask *mask, int vector) { send_IPI_mask_sequence(mask, vector); } static inline void send_IPI_allbutself(int vector) { - cpumask_t mask = cpu_online_map; - cpu_clear(smp_processor_id(), mask); - - if (!cpus_empty(mask)) - send_IPI_mask(mask, vector); + send_IPI_mask_allbutself(cpu_online_mask, vector); } static inline void send_IPI_all(int vector) { - send_IPI_mask(cpu_online_map, vector); + send_IPI_mask(cpu_online_mask, vector); } #endif /* __ASM_MACH_IPI_H */ diff --git a/arch/x86/include/asm/desc.h b/arch/x86/include/asm/desc.h index e6b82b17b07..dc27705f544 100644 --- a/arch/x86/include/asm/desc.h +++ b/arch/x86/include/asm/desc.h @@ -320,16 +320,14 @@ static inline void set_intr_gate(unsigned int n, void *addr) _set_gate(n, GATE_INTERRUPT, addr, 0, 0, __KERNEL_CS); } -#define SYS_VECTOR_FREE 0 -#define SYS_VECTOR_ALLOCED 1 - extern int first_system_vector; -extern char system_vectors[]; +/* used_vectors is BITMAP for irq is not managed by percpu vector_irq */ +extern unsigned long used_vectors[]; static inline void alloc_system_vector(int vector) { - if (system_vectors[vector] == SYS_VECTOR_FREE) { - system_vectors[vector] = SYS_VECTOR_ALLOCED; + if (!test_bit(vector, used_vectors)) { + set_bit(vector, used_vectors); if (first_system_vector > vector) first_system_vector = vector; } else diff --git a/arch/x86/include/asm/efi.h b/arch/x86/include/asm/efi.h index a2e545c91c3..ca5ffb2856b 100644 --- a/arch/x86/include/asm/efi.h +++ b/arch/x86/include/asm/efi.h @@ -90,6 +90,7 @@ extern void __iomem *efi_ioremap(unsigned long addr, unsigned long size); #endif /* CONFIG_X86_32 */ +extern int add_efi_memmap; extern void efi_reserve_early(void); extern void efi_call_phys_prelog(void); extern void efi_call_phys_epilog(void); diff --git a/arch/x86/include/asm/es7000/apic.h b/arch/x86/include/asm/es7000/apic.h index e24ef876915..bc53d5ef138 100644 --- a/arch/x86/include/asm/es7000/apic.h +++ b/arch/x86/include/asm/es7000/apic.h @@ -9,14 +9,14 @@ static inline int apic_id_registered(void) return (1); } -static inline cpumask_t target_cpus_cluster(void) +static inline const cpumask_t *target_cpus_cluster(void) { - return CPU_MASK_ALL; + return &CPU_MASK_ALL; } -static inline cpumask_t target_cpus(void) +static inline const cpumask_t *target_cpus(void) { - return cpumask_of_cpu(smp_processor_id()); + return &cpumask_of_cpu(smp_processor_id()); } #define APIC_DFR_VALUE_CLUSTER (APIC_DFR_CLUSTER) @@ -80,9 +80,10 @@ extern int apic_version [MAX_APICS]; static inline void setup_apic_routing(void) { int apic = per_cpu(x86_bios_cpu_apicid, smp_processor_id()); - printk("Enabling APIC mode: %s. Using %d I/O APICs, target cpus %lx\n", + printk("Enabling APIC mode: %s. Using %d I/O APICs, target cpus %lx\n", (apic_version[apic] == 0x14) ? - "Physical Cluster" : "Logical Cluster", nr_ioapics, cpus_addr(target_cpus())[0]); + "Physical Cluster" : "Logical Cluster", + nr_ioapics, cpus_addr(*target_cpus())[0]); } static inline int multi_timer_check(int apic, int irq) @@ -100,7 +101,7 @@ static inline int cpu_present_to_apicid(int mps_cpu) { if (!mps_cpu) return boot_cpu_physical_apicid; - else if (mps_cpu < NR_CPUS) + else if (mps_cpu < nr_cpu_ids) return (int) per_cpu(x86_bios_cpu_apicid, mps_cpu); else return BAD_APICID; @@ -120,9 +121,9 @@ extern u8 cpu_2_logical_apicid[]; static inline int cpu_to_logical_apicid(int cpu) { #ifdef CONFIG_SMP - if (cpu >= NR_CPUS) - return BAD_APICID; - return (int)cpu_2_logical_apicid[cpu]; + if (cpu >= nr_cpu_ids) + return BAD_APICID; + return (int)cpu_2_logical_apicid[cpu]; #else return logical_smp_processor_id(); #endif @@ -146,25 +147,26 @@ static inline int check_phys_apicid_present(int cpu_physical_apicid) return (1); } -static inline unsigned int cpu_mask_to_apicid_cluster(cpumask_t cpumask) +static inline unsigned int +cpu_mask_to_apicid_cluster(const struct cpumask *cpumask) { int num_bits_set; int cpus_found = 0; int cpu; int apicid; - num_bits_set = cpus_weight(cpumask); + num_bits_set = cpumask_weight(cpumask); /* Return id to all */ - if (num_bits_set == NR_CPUS) + if (num_bits_set == nr_cpu_ids) return 0xFF; /* * The cpus in the mask must all be on the apic cluster. If are not * on the same apicid cluster return default value of TARGET_CPUS. */ - cpu = first_cpu(cpumask); + cpu = cpumask_first(cpumask); apicid = cpu_to_logical_apicid(cpu); while (cpus_found < num_bits_set) { - if (cpu_isset(cpu, cpumask)) { + if (cpumask_test_cpu(cpu, cpumask)) { int new_apicid = cpu_to_logical_apicid(cpu); if (apicid_cluster(apicid) != apicid_cluster(new_apicid)){ @@ -179,25 +181,25 @@ static inline unsigned int cpu_mask_to_apicid_cluster(cpumask_t cpumask) return apicid; } -static inline unsigned int cpu_mask_to_apicid(cpumask_t cpumask) +static inline unsigned int cpu_mask_to_apicid(const cpumask_t *cpumask) { int num_bits_set; int cpus_found = 0; int cpu; int apicid; - num_bits_set = cpus_weight(cpumask); + num_bits_set = cpus_weight(*cpumask); /* Return id to all */ - if (num_bits_set == NR_CPUS) + if (num_bits_set == nr_cpu_ids) return cpu_to_logical_apicid(0); /* * The cpus in the mask must all be on the apic cluster. If are not * on the same apicid cluster return default value of TARGET_CPUS. */ - cpu = first_cpu(cpumask); + cpu = first_cpu(*cpumask); apicid = cpu_to_logical_apicid(cpu); while (cpus_found < num_bits_set) { - if (cpu_isset(cpu, cpumask)) { + if (cpu_isset(cpu, *cpumask)) { int new_apicid = cpu_to_logical_apicid(cpu); if (apicid_cluster(apicid) != apicid_cluster(new_apicid)){ @@ -212,6 +214,24 @@ static inline unsigned int cpu_mask_to_apicid(cpumask_t cpumask) return apicid; } + +static inline unsigned int cpu_mask_to_apicid_and(const struct cpumask *inmask, + const struct cpumask *andmask) +{ + int apicid = cpu_to_logical_apicid(0); + cpumask_var_t cpumask; + + if (!alloc_cpumask_var(&cpumask, GFP_ATOMIC)) + return apicid; + + cpumask_and(cpumask, inmask, andmask); + cpumask_and(cpumask, cpumask, cpu_online_mask); + apicid = cpu_mask_to_apicid(cpumask); + + free_cpumask_var(cpumask); + return apicid; +} + static inline u32 phys_pkg_id(u32 cpuid_apic, int index_msb) { return cpuid_apic >> index_msb; diff --git a/arch/x86/include/asm/es7000/ipi.h b/arch/x86/include/asm/es7000/ipi.h index 632a955fcc0..7e8ed24d4b8 100644 --- a/arch/x86/include/asm/es7000/ipi.h +++ b/arch/x86/include/asm/es7000/ipi.h @@ -1,24 +1,22 @@ #ifndef __ASM_ES7000_IPI_H #define __ASM_ES7000_IPI_H -void send_IPI_mask_sequence(cpumask_t mask, int vector); +void send_IPI_mask_sequence(const struct cpumask *mask, int vector); +void send_IPI_mask_allbutself(const struct cpumask *mask, int vector); -static inline void send_IPI_mask(cpumask_t mask, int vector) +static inline void send_IPI_mask(const struct cpumask *mask, int vector) { send_IPI_mask_sequence(mask, vector); } static inline void send_IPI_allbutself(int vector) { - cpumask_t mask = cpu_online_map; - cpu_clear(smp_processor_id(), mask); - if (!cpus_empty(mask)) - send_IPI_mask(mask, vector); + send_IPI_mask_allbutself(cpu_online_mask, vector); } static inline void send_IPI_all(int vector) { - send_IPI_mask(cpu_online_map, vector); + send_IPI_mask(cpu_online_mask, vector); } #endif /* __ASM_ES7000_IPI_H */ diff --git a/arch/x86/include/asm/genapic_32.h b/arch/x86/include/asm/genapic_32.h index 0ac17d33a8c..746f37a7963 100644 --- a/arch/x86/include/asm/genapic_32.h +++ b/arch/x86/include/asm/genapic_32.h @@ -24,7 +24,7 @@ struct genapic { int (*probe)(void); int (*apic_id_registered)(void); - cpumask_t (*target_cpus)(void); + const struct cpumask *(*target_cpus)(void); int int_delivery_mode; int int_dest_mode; int ESR_DISABLE; @@ -57,12 +57,16 @@ struct genapic { unsigned (*get_apic_id)(unsigned long x); unsigned long apic_id_mask; - unsigned int (*cpu_mask_to_apicid)(cpumask_t cpumask); - cpumask_t (*vector_allocation_domain)(int cpu); + unsigned int (*cpu_mask_to_apicid)(const struct cpumask *cpumask); + unsigned int (*cpu_mask_to_apicid_and)(const struct cpumask *cpumask, + const struct cpumask *andmask); + void (*vector_allocation_domain)(int cpu, struct cpumask *retmask); #ifdef CONFIG_SMP /* ipi */ - void (*send_IPI_mask)(cpumask_t mask, int vector); + void (*send_IPI_mask)(const struct cpumask *mask, int vector); + void (*send_IPI_mask_allbutself)(const struct cpumask *mask, + int vector); void (*send_IPI_allbutself)(int vector); void (*send_IPI_all)(int vector); #endif @@ -114,6 +118,7 @@ struct genapic { APICFUNC(get_apic_id) \ .apic_id_mask = APIC_ID_MASK, \ APICFUNC(cpu_mask_to_apicid) \ + APICFUNC(cpu_mask_to_apicid_and) \ APICFUNC(vector_allocation_domain) \ APICFUNC(acpi_madt_oem_check) \ IPIFUNC(send_IPI_mask) \ diff --git a/arch/x86/include/asm/genapic_64.h b/arch/x86/include/asm/genapic_64.h index 2cae011668b..adf32fb56aa 100644 --- a/arch/x86/include/asm/genapic_64.h +++ b/arch/x86/include/asm/genapic_64.h @@ -1,6 +1,8 @@ #ifndef _ASM_X86_GENAPIC_64_H #define _ASM_X86_GENAPIC_64_H +#include <linux/cpumask.h> + /* * Copyright 2004 James Cleverdon, IBM. * Subject to the GNU Public License, v.2 @@ -18,16 +20,20 @@ struct genapic { u32 int_delivery_mode; u32 int_dest_mode; int (*apic_id_registered)(void); - cpumask_t (*target_cpus)(void); - cpumask_t (*vector_allocation_domain)(int cpu); + const struct cpumask *(*target_cpus)(void); + void (*vector_allocation_domain)(int cpu, struct cpumask *retmask); void (*init_apic_ldr)(void); /* ipi */ - void (*send_IPI_mask)(cpumask_t mask, int vector); + void (*send_IPI_mask)(const struct cpumask *mask, int vector); + void (*send_IPI_mask_allbutself)(const struct cpumask *mask, + int vector); void (*send_IPI_allbutself)(int vector); void (*send_IPI_all)(int vector); void (*send_IPI_self)(int vector); /* */ - unsigned int (*cpu_mask_to_apicid)(cpumask_t cpumask); + unsigned int (*cpu_mask_to_apicid)(const struct cpumask *cpumask); + unsigned int (*cpu_mask_to_apicid_and)(const struct cpumask *cpumask, + const struct cpumask *andmask); unsigned int (*phys_pkg_id)(int index_msb); unsigned int (*get_apic_id)(unsigned long x); unsigned long (*set_apic_id)(unsigned int id); diff --git a/arch/x86/include/asm/ipi.h b/arch/x86/include/asm/ipi.h index f89dffb28aa..c745a306f7d 100644 --- a/arch/x86/include/asm/ipi.h +++ b/arch/x86/include/asm/ipi.h @@ -117,7 +117,8 @@ static inline void __send_IPI_dest_field(unsigned int mask, int vector, native_apic_mem_write(APIC_ICR, cfg); } -static inline void send_IPI_mask_sequence(cpumask_t mask, int vector) +static inline void send_IPI_mask_sequence(const struct cpumask *mask, + int vector) { unsigned long flags; unsigned long query_cpu; @@ -128,11 +129,29 @@ static inline void send_IPI_mask_sequence(cpumask_t mask, int vector) * - mbligh */ local_irq_save(flags); - for_each_cpu_mask_nr(query_cpu, mask) { + for_each_cpu(query_cpu, mask) { __send_IPI_dest_field(per_cpu(x86_cpu_to_apicid, query_cpu), vector, APIC_DEST_PHYSICAL); } local_irq_restore(flags); } +static inline void send_IPI_mask_allbutself(const struct cpumask *mask, + int vector) +{ + unsigned long flags; + unsigned int query_cpu; + unsigned int this_cpu = smp_processor_id(); + + /* See Hack comment above */ + + local_irq_save(flags); + for_each_cpu(query_cpu, mask) + if (query_cpu != this_cpu) + __send_IPI_dest_field( + per_cpu(x86_cpu_to_apicid, query_cpu), + vector, APIC_DEST_PHYSICAL); + local_irq_restore(flags); +} + #endif /* _ASM_X86_IPI_H */ diff --git a/arch/x86/include/asm/irq.h b/arch/x86/include/asm/irq.h index 28e409fc73f..592688ed04d 100644 --- a/arch/x86/include/asm/irq.h +++ b/arch/x86/include/asm/irq.h @@ -33,7 +33,7 @@ static inline int irq_canonicalize(int irq) #ifdef CONFIG_HOTPLUG_CPU #include <linux/cpumask.h> -extern void fixup_irqs(cpumask_t map); +extern void fixup_irqs(void); #endif extern unsigned int do_IRQ(struct pt_regs *regs); @@ -42,5 +42,6 @@ extern void native_init_IRQ(void); /* Interrupt vector management */ extern DECLARE_BITMAP(used_vectors, NR_VECTORS); +extern int vector_used_by_percpu_irq(unsigned int vector); #endif /* _ASM_X86_IRQ_H */ diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 8346be87cfa..730843d1d2f 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -21,6 +21,7 @@ #include <asm/pvclock-abi.h> #include <asm/desc.h> +#include <asm/mtrr.h> #define KVM_MAX_VCPUS 16 #define KVM_MEMORY_SLOTS 32 @@ -86,6 +87,7 @@ #define KVM_MIN_FREE_MMU_PAGES 5 #define KVM_REFILL_PAGES 25 #define KVM_MAX_CPUID_ENTRIES 40 +#define KVM_NR_FIXED_MTRR_REGION 88 #define KVM_NR_VAR_MTRR 8 extern spinlock_t kvm_lock; @@ -180,6 +182,8 @@ struct kvm_mmu_page { struct list_head link; struct hlist_node hash_link; + struct list_head oos_link; + /* * The following two entries are used to key the shadow page in the * hash table. @@ -190,13 +194,16 @@ struct kvm_mmu_page { u64 *spt; /* hold the gfn of each spte inside spt */ gfn_t *gfns; - unsigned long slot_bitmap; /* One bit set per slot which has memory - * in this shadow page. - */ + /* + * One bit set per slot which has memory + * in this shadow page. + */ + DECLARE_BITMAP(slot_bitmap, KVM_MEMORY_SLOTS + KVM_PRIVATE_MEM_SLOTS); int multimapped; /* More than one parent_pte? */ int root_count; /* Currently serving as active root */ bool unsync; - bool unsync_children; + bool global; + unsigned int unsync_children; union { u64 *parent_pte; /* !multimapped */ struct hlist_head parent_ptes; /* multimapped, kvm_pte_chain */ @@ -327,8 +334,10 @@ struct kvm_vcpu_arch { bool nmi_pending; bool nmi_injected; + bool nmi_window_open; - u64 mtrr[0x100]; + struct mtrr_state_type mtrr_state; + u32 pat; }; struct kvm_mem_alias { @@ -350,11 +359,13 @@ struct kvm_arch{ */ struct list_head active_mmu_pages; struct list_head assigned_dev_head; - struct dmar_domain *intel_iommu_domain; + struct list_head oos_global_pages; + struct iommu_domain *iommu_domain; struct kvm_pic *vpic; struct kvm_ioapic *vioapic; struct kvm_pit *vpit; struct hlist_head irq_ack_notifier_list; + int vapics_in_nmi_mode; int round_robin_prev_vcpu; unsigned int tss_addr; @@ -378,6 +389,7 @@ struct kvm_vm_stat { u32 mmu_recycled; u32 mmu_cache_miss; u32 mmu_unsync; + u32 mmu_unsync_global; u32 remote_tlb_flush; u32 lpages; }; @@ -397,6 +409,7 @@ struct kvm_vcpu_stat { u32 halt_exits; u32 halt_wakeup; u32 request_irq_exits; + u32 request_nmi_exits; u32 irq_exits; u32 host_state_reload; u32 efer_reload; @@ -405,6 +418,7 @@ struct kvm_vcpu_stat { u32 insn_emulation_fail; u32 hypercalls; u32 irq_injections; + u32 nmi_injections; }; struct descriptor_table { @@ -477,6 +491,7 @@ struct kvm_x86_ops { int (*set_tss_addr)(struct kvm *kvm, unsigned int addr); int (*get_tdp_level)(void); + int (*get_mt_mask_shift)(void); }; extern struct kvm_x86_ops *kvm_x86_ops; @@ -490,7 +505,7 @@ int kvm_mmu_setup(struct kvm_vcpu *vcpu); void kvm_mmu_set_nonpresent_ptes(u64 trap_pte, u64 notrap_pte); void kvm_mmu_set_base_ptes(u64 base_pte); void kvm_mmu_set_mask_ptes(u64 user_mask, u64 accessed_mask, - u64 dirty_mask, u64 nx_mask, u64 x_mask); + u64 dirty_mask, u64 nx_mask, u64 x_mask, u64 mt_mask); int kvm_mmu_reset_context(struct kvm_vcpu *vcpu); void kvm_mmu_slot_remove_write_access(struct kvm *kvm, int slot); @@ -587,12 +602,14 @@ unsigned long segment_base(u16 selector); void kvm_mmu_flush_tlb(struct kvm_vcpu *vcpu); void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa, - const u8 *new, int bytes); + const u8 *new, int bytes, + bool guest_initiated); int kvm_mmu_unprotect_page_virt(struct kvm_vcpu *vcpu, gva_t gva); void __kvm_mmu_free_some_pages(struct kvm_vcpu *vcpu); int kvm_mmu_load(struct kvm_vcpu *vcpu); void kvm_mmu_unload(struct kvm_vcpu *vcpu); void kvm_mmu_sync_roots(struct kvm_vcpu *vcpu); +void kvm_mmu_sync_global(struct kvm_vcpu *vcpu); int kvm_emulate_hypercall(struct kvm_vcpu *vcpu); @@ -607,6 +624,8 @@ void kvm_disable_tdp(void); int load_pdptrs(struct kvm_vcpu *vcpu, unsigned long cr3); int complete_pio(struct kvm_vcpu *vcpu); +struct kvm_memory_slot *gfn_to_memslot_unaliased(struct kvm *kvm, gfn_t gfn); + static inline struct kvm_mmu_page *page_header(hpa_t shadow_page) { struct page *page = pfn_to_page(shadow_page >> PAGE_SHIFT); @@ -702,18 +721,6 @@ static inline void kvm_inject_gp(struct kvm_vcpu *vcpu, u32 error_code) kvm_queue_exception_e(vcpu, GP_VECTOR, error_code); } -#define ASM_VMX_VMCLEAR_RAX ".byte 0x66, 0x0f, 0xc7, 0x30" -#define ASM_VMX_VMLAUNCH ".byte 0x0f, 0x01, 0xc2" -#define ASM_VMX_VMRESUME ".byte 0x0f, 0x01, 0xc3" -#define ASM_VMX_VMPTRLD_RAX ".byte 0x0f, 0xc7, 0x30" -#define ASM_VMX_VMREAD_RDX_RAX ".byte 0x0f, 0x78, 0xd0" -#define ASM_VMX_VMWRITE_RAX_RDX ".byte 0x0f, 0x79, 0xd0" -#define ASM_VMX_VMWRITE_RSP_RDX ".byte 0x0f, 0x79, 0xd4" -#define ASM_VMX_VMXOFF ".byte 0x0f, 0x01, 0xc4" -#define ASM_VMX_VMXON_RAX ".byte 0xf3, 0x0f, 0xc7, 0x30" -#define ASM_VMX_INVEPT ".byte 0x66, 0x0f, 0x38, 0x80, 0x08" -#define ASM_VMX_INVVPID ".byte 0x66, 0x0f, 0x38, 0x81, 0x08" - #define MSR_IA32_TIME_STAMP_COUNTER 0x010 #define TSS_IOPB_BASE_OFFSET 0x66 diff --git a/arch/x86/include/asm/kvm_x86_emulate.h b/arch/x86/include/asm/kvm_x86_emulate.h index 25179a29f20..6a159732881 100644 --- a/arch/x86/include/asm/kvm_x86_emulate.h +++ b/arch/x86/include/asm/kvm_x86_emulate.h @@ -123,6 +123,7 @@ struct decode_cache { u8 ad_bytes; u8 rex_prefix; struct operand src; + struct operand src2; struct operand dst; bool has_seg_override; u8 seg_override; @@ -146,22 +147,18 @@ struct x86_emulate_ctxt { /* Register state before/after emulation. */ struct kvm_vcpu *vcpu; - /* Linear faulting address (if emulating a page-faulting instruction) */ unsigned long eflags; - /* Emulated execution mode, represented by an X86EMUL_MODE value. */ int mode; - u32 cs_base; /* decode cache */ - struct decode_cache decode; }; /* Repeat String Operation Prefix */ -#define REPE_PREFIX 1 -#define REPNE_PREFIX 2 +#define REPE_PREFIX 1 +#define REPNE_PREFIX 2 /* Execution mode, passed to the emulator. */ #define X86EMUL_MODE_REAL 0 /* Real mode. */ @@ -170,7 +167,7 @@ struct x86_emulate_ctxt { #define X86EMUL_MODE_PROT64 8 /* 64-bit (long) mode. */ /* Host execution mode. */ -#if defined(__i386__) +#if defined(CONFIG_X86_32) #define X86EMUL_MODE_HOST X86EMUL_MODE_PROT32 #elif defined(CONFIG_X86_64) #define X86EMUL_MODE_HOST X86EMUL_MODE_PROT64 diff --git a/arch/x86/include/asm/lguest.h b/arch/x86/include/asm/lguest.h index d28a507cef3..1caf57628b9 100644 --- a/arch/x86/include/asm/lguest.h +++ b/arch/x86/include/asm/lguest.h @@ -15,7 +15,7 @@ #define SHARED_SWITCHER_PAGES \ DIV_ROUND_UP(end_switcher_text - start_switcher_text, PAGE_SIZE) /* Pages for switcher itself, then two pages per cpu */ -#define TOTAL_SWITCHER_PAGES (SHARED_SWITCHER_PAGES + 2 * NR_CPUS) +#define TOTAL_SWITCHER_PAGES (SHARED_SWITCHER_PAGES + 2 * nr_cpu_ids) /* We map at -4M for ease of mapping into the guest (one PTE page). */ #define SWITCHER_ADDR 0xFFC00000 diff --git a/arch/x86/include/asm/mach-default/mach_apic.h b/arch/x86/include/asm/mach-default/mach_apic.h index 6cb3a467e06..cc09cbbee27 100644 --- a/arch/x86/include/asm/mach-default/mach_apic.h +++ b/arch/x86/include/asm/mach-default/mach_apic.h @@ -8,12 +8,12 @@ #define APIC_DFR_VALUE (APIC_DFR_FLAT) -static inline cpumask_t target_cpus(void) +static inline const struct cpumask *target_cpus(void) { #ifdef CONFIG_SMP - return cpu_online_map; + return cpu_online_mask; #else - return cpumask_of_cpu(0); + return cpumask_of(0); #endif } @@ -28,6 +28,7 @@ static inline cpumask_t target_cpus(void) #define apic_id_registered (genapic->apic_id_registered) #define init_apic_ldr (genapic->init_apic_ldr) #define cpu_mask_to_apicid (genapic->cpu_mask_to_apicid) +#define cpu_mask_to_apicid_and (genapic->cpu_mask_to_apicid_and) #define phys_pkg_id (genapic->phys_pkg_id) #define vector_allocation_domain (genapic->vector_allocation_domain) #define read_apic_id() (GET_APIC_ID(apic_read(APIC_ID))) @@ -61,9 +62,19 @@ static inline int apic_id_registered(void) return physid_isset(read_apic_id(), phys_cpu_present_map); } -static inline unsigned int cpu_mask_to_apicid(cpumask_t cpumask) +static inline unsigned int cpu_mask_to_apicid(const struct cpumask *cpumask) { - return cpus_addr(cpumask)[0]; + return cpumask_bits(cpumask)[0]; +} + +static inline unsigned int cpu_mask_to_apicid_and(const struct cpumask *cpumask, + const struct cpumask *andmask) +{ + unsigned long mask1 = cpumask_bits(cpumask)[0]; + unsigned long mask2 = cpumask_bits(andmask)[0]; + unsigned long mask3 = cpumask_bits(cpu_online_mask)[0]; + + return (unsigned int)(mask1 & mask2 & mask3); } static inline u32 phys_pkg_id(u32 cpuid_apic, int index_msb) @@ -88,7 +99,7 @@ static inline int apicid_to_node(int logical_apicid) #endif } -static inline cpumask_t vector_allocation_domain(int cpu) +static inline void vector_allocation_domain(int cpu, struct cpumask *retmask) { /* Careful. Some cpus do not strictly honor the set of cpus * specified in the interrupt destination when using lowest @@ -98,8 +109,7 @@ static inline cpumask_t vector_allocation_domain(int cpu) * deliver interrupts to the wrong hyperthread when only one * hyperthread was specified in the interrupt desitination. */ - cpumask_t domain = { { [0] = APIC_ALL_CPUS, } }; - return domain; + *retmask = (cpumask_t) { { [0] = APIC_ALL_CPUS } }; } #endif @@ -131,7 +141,7 @@ static inline int cpu_to_logical_apicid(int cpu) static inline int cpu_present_to_apicid(int mps_cpu) { - if (mps_cpu < NR_CPUS && cpu_present(mps_cpu)) + if (mps_cpu < nr_cpu_ids && cpu_present(mps_cpu)) return (int)per_cpu(x86_bios_cpu_apicid, mps_cpu); else return BAD_APICID; diff --git a/arch/x86/include/asm/mach-default/mach_ipi.h b/arch/x86/include/asm/mach-default/mach_ipi.h index fabca01ebac..191312d155d 100644 --- a/arch/x86/include/asm/mach-default/mach_ipi.h +++ b/arch/x86/include/asm/mach-default/mach_ipi.h @@ -4,7 +4,8 @@ /* Avoid include hell */ #define NMI_VECTOR 0x02 -void send_IPI_mask_bitmask(cpumask_t mask, int vector); +void send_IPI_mask_bitmask(const struct cpumask *mask, int vector); +void send_IPI_mask_allbutself(const struct cpumask *mask, int vector); void __send_IPI_shortcut(unsigned int shortcut, int vector); extern int no_broadcast; @@ -12,28 +13,27 @@ extern int no_broadcast; #ifdef CONFIG_X86_64 #include <asm/genapic.h> #define send_IPI_mask (genapic->send_IPI_mask) +#define send_IPI_mask_allbutself (genapic->send_IPI_mask_allbutself) #else -static inline void send_IPI_mask(cpumask_t mask, int vector) +static inline void send_IPI_mask(const struct cpumask *mask, int vector) { send_IPI_mask_bitmask(mask, vector); } +void send_IPI_mask_allbutself(const struct cpumask *mask, int vector); #endif static inline void __local_send_IPI_allbutself(int vector) { - if (no_broadcast || vector == NMI_VECTOR) { - cpumask_t mask = cpu_online_map; - - cpu_clear(smp_processor_id(), mask); - send_IPI_mask(mask, vector); - } else + if (no_broadcast || vector == NMI_VECTOR) + send_IPI_mask_allbutself(cpu_online_mask, vector); + else __send_IPI_shortcut(APIC_DEST_ALLBUT, vector); } static inline void __local_send_IPI_all(int vector) { if (no_broadcast || vector == NMI_VECTOR) - send_IPI_mask(cpu_online_map, vector); + send_IPI_mask(cpu_online_mask, vector); else __send_IPI_shortcut(APIC_DEST_ALLINC, vector); } diff --git a/arch/x86/include/asm/mach-generic/mach_apic.h b/arch/x86/include/asm/mach-generic/mach_apic.h index e430f47df66..48553e958ad 100644 --- a/arch/x86/include/asm/mach-generic/mach_apic.h +++ b/arch/x86/include/asm/mach-generic/mach_apic.h @@ -24,6 +24,7 @@ #define check_phys_apicid_present (genapic->check_phys_apicid_present) #define check_apicid_used (genapic->check_apicid_used) #define cpu_mask_to_apicid (genapic->cpu_mask_to_apicid) +#define cpu_mask_to_apicid_and (genapic->cpu_mask_to_apicid_and) #define vector_allocation_domain (genapic->vector_allocation_domain) #define enable_apic_mode (genapic->enable_apic_mode) #define phys_pkg_id (genapic->phys_pkg_id) diff --git a/arch/x86/include/asm/mpspec.h b/arch/x86/include/asm/mpspec.h index 91885c28f66..62d14ce3cd0 100644 --- a/arch/x86/include/asm/mpspec.h +++ b/arch/x86/include/asm/mpspec.h @@ -6,13 +6,13 @@ #include <asm/mpspec_def.h> extern int apic_version[MAX_APICS]; +extern int pic_mode; #ifdef CONFIG_X86_32 #include <mach_mpspec.h> extern unsigned int def_to_bigsmp; extern u8 apicid_2_node[]; -extern int pic_mode; #ifdef CONFIG_X86_NUMAQ extern int mp_bus_id_to_node[MAX_MP_BUSSES]; diff --git a/arch/x86/include/asm/mtrr.h b/arch/x86/include/asm/mtrr.h index 7c1e4258b31..cb988aab716 100644 --- a/arch/x86/include/asm/mtrr.h +++ b/arch/x86/include/asm/mtrr.h @@ -57,6 +57,31 @@ struct mtrr_gentry { }; #endif /* !__i386__ */ +struct mtrr_var_range { + u32 base_lo; + u32 base_hi; + u32 mask_lo; + u32 mask_hi; +}; + +/* In the Intel processor's MTRR interface, the MTRR type is always held in + an 8 bit field: */ +typedef u8 mtrr_type; + +#define MTRR_NUM_FIXED_RANGES 88 +#define MTRR_MAX_VAR_RANGES 256 + +struct mtrr_state_type { + struct mtrr_var_range var_ranges[MTRR_MAX_VAR_RANGES]; + mtrr_type fixed_ranges[MTRR_NUM_FIXED_RANGES]; + unsigned char enabled; + unsigned char have_fixed; + mtrr_type def_type; +}; + +#define MTRRphysBase_MSR(reg) (0x200 + 2 * (reg)) +#define MTRRphysMask_MSR(reg) (0x200 + 2 * (reg) + 1) + /* These are the various ioctls */ #define MTRRIOC_ADD_ENTRY _IOW(MTRR_IOCTL_BASE, 0, struct mtrr_sentry) #define MTRRIOC_SET_ENTRY _IOW(MTRR_IOCTL_BASE, 1, struct mtrr_sentry) diff --git a/arch/x86/include/asm/numaq/apic.h b/arch/x86/include/asm/numaq/apic.h index 0bf2a06b7a4..bf37bc49bd8 100644 --- a/arch/x86/include/asm/numaq/apic.h +++ b/arch/x86/include/asm/numaq/apic.h @@ -7,9 +7,9 @@ #define APIC_DFR_VALUE (APIC_DFR_CLUSTER) -static inline cpumask_t target_cpus(void) +static inline const cpumask_t *target_cpus(void) { - return CPU_MASK_ALL; + return &CPU_MASK_ALL; } #define NO_BALANCE_IRQ (1) @@ -63,8 +63,8 @@ static inline physid_mask_t ioapic_phys_id_map(physid_mask_t phys_map) extern u8 cpu_2_logical_apicid[]; static inline int cpu_to_logical_apicid(int cpu) { - if (cpu >= NR_CPUS) - return BAD_APICID; + if (cpu >= nr_cpu_ids) + return BAD_APICID; return (int)cpu_2_logical_apicid[cpu]; } @@ -122,7 +122,13 @@ static inline void enable_apic_mode(void) * We use physical apicids here, not logical, so just return the default * physical broadcast to stop people from breaking us */ -static inline unsigned int cpu_mask_to_apicid(cpumask_t cpumask) +static inline unsigned int cpu_mask_to_apicid(const cpumask_t *cpumask) +{ + return (int) 0xF; +} + +static inline unsigned int cpu_mask_to_apicid_and(const struct cpumask *cpumask, + const struct cpumask *andmask) { return (int) 0xF; } diff --git a/arch/x86/include/asm/numaq/ipi.h b/arch/x86/include/asm/numaq/ipi.h index 935588d286c..a8374c65277 100644 --- a/arch/x86/include/asm/numaq/ipi.h +++ b/arch/x86/include/asm/numaq/ipi.h @@ -1,25 +1,22 @@ #ifndef __ASM_NUMAQ_IPI_H #define __ASM_NUMAQ_IPI_H -void send_IPI_mask_sequence(cpumask_t, int vector); +void send_IPI_mask_sequence(const struct cpumask *mask, int vector); +void send_IPI_mask_allbutself(const struct cpumask *mask, int vector); -static inline void send_IPI_mask(cpumask_t mask, int vector) +static inline void send_IPI_mask(const struct cpumask *mask, int vector) { send_IPI_mask_sequence(mask, vector); } static inline void send_IPI_allbutself(int vector) { - cpumask_t mask = cpu_online_map; - cpu_clear(smp_processor_id(), mask); - - if (!cpus_empty(mask)) - send_IPI_mask(mask, vector); + send_IPI_mask_allbutself(cpu_online_mask, vector); } static inline void send_IPI_all(int vector) { - send_IPI_mask(cpu_online_map, vector); + send_IPI_mask(cpu_online_mask, vector); } #endif /* __ASM_NUMAQ_IPI_H */ diff --git a/arch/x86/include/asm/pci.h b/arch/x86/include/asm/pci.h index 66834c41c04..a977de23cb4 100644 --- a/arch/x86/include/asm/pci.h +++ b/arch/x86/include/asm/pci.h @@ -102,9 +102,9 @@ extern void pci_iommu_alloc(void); #ifdef CONFIG_NUMA /* Returns the node based on pci bus */ -static inline int __pcibus_to_node(struct pci_bus *bus) +static inline int __pcibus_to_node(const struct pci_bus *bus) { - struct pci_sysdata *sd = bus->sysdata; + const struct pci_sysdata *sd = bus->sysdata; return sd->node; } @@ -113,6 +113,12 @@ static inline cpumask_t __pcibus_to_cpumask(struct pci_bus *bus) { return node_to_cpumask(__pcibus_to_node(bus)); } + +static inline const struct cpumask * +cpumask_of_pcibus(const struct pci_bus *bus) +{ + return cpumask_of_node(__pcibus_to_node(bus)); +} #endif #endif /* _ASM_X86_PCI_H */ diff --git a/arch/x86/pci/pci.h b/arch/x86/include/asm/pci_x86.h index 1959018aac0..e60fd3e14bd 100644 --- a/arch/x86/pci/pci.h +++ b/arch/x86/include/asm/pci_x86.h @@ -57,7 +57,8 @@ extern struct pci_ops pci_root_ops; struct irq_info { u8 bus, devfn; /* Bus, device and function */ struct { - u8 link; /* IRQ line ID, chipset dependent, 0=not routed */ + u8 link; /* IRQ line ID, chipset dependent, + 0 = not routed */ u16 bitmap; /* Available IRQs */ } __attribute__((packed)) irq[4]; u8 slot; /* Slot number, 0=onboard */ @@ -69,11 +70,13 @@ struct irq_routing_table { u16 version; /* PIRQ_VERSION */ u16 size; /* Table size in bytes */ u8 rtr_bus, rtr_devfn; /* Where the interrupt router lies */ - u16 exclusive_irqs; /* IRQs devoted exclusively to PCI usage */ - u16 rtr_vendor, rtr_device; /* Vendor and device ID of interrupt router */ + u16 exclusive_irqs; /* IRQs devoted exclusively to + PCI usage */ + u16 rtr_vendor, rtr_device; /* Vendor and device ID of + interrupt router */ u32 miniport_data; /* Crap */ u8 rfu[11]; - u8 checksum; /* Modulo 256 checksum must give zero */ + u8 checksum; /* Modulo 256 checksum must give 0 */ struct irq_info slots[0]; } __attribute__((packed)); @@ -148,15 +151,15 @@ static inline unsigned int mmio_config_readl(void __iomem *pos) static inline void mmio_config_writeb(void __iomem *pos, u8 val) { - asm volatile("movb %%al,(%1)" :: "a" (val), "r" (pos) : "memory"); + asm volatile("movb %%al,(%1)" : : "a" (val), "r" (pos) : "memory"); } static inline void mmio_config_writew(void __iomem *pos, u16 val) { - asm volatile("movw %%ax,(%1)" :: "a" (val), "r" (pos) : "memory"); + asm volatile("movw %%ax,(%1)" : : "a" (val), "r" (pos) : "memory"); } static inline void mmio_config_writel(void __iomem *pos, u32 val) { - asm volatile("movl %%eax,(%1)" :: "a" (val), "r" (pos) : "memory"); + asm volatile("movl %%eax,(%1)" : : "a" (val), "r" (pos) : "memory"); } diff --git a/arch/x86/include/asm/smp.h b/arch/x86/include/asm/smp.h index d12811ce51d..830b9fcb642 100644 --- a/arch/x86/include/asm/smp.h +++ b/arch/x86/include/asm/smp.h @@ -60,7 +60,7 @@ struct smp_ops { void (*cpu_die)(unsigned int cpu); void (*play_dead)(void); - void (*send_call_func_ipi)(cpumask_t mask); + void (*send_call_func_ipi)(const struct cpumask *mask); void (*send_call_func_single_ipi)(int cpu); }; @@ -125,7 +125,7 @@ static inline void arch_send_call_function_single_ipi(int cpu) static inline void arch_send_call_function_ipi(cpumask_t mask) { - smp_ops.send_call_func_ipi(mask); + smp_ops.send_call_func_ipi(&mask); } void cpu_disable_common(void); @@ -138,7 +138,7 @@ void native_cpu_die(unsigned int cpu); void native_play_dead(void); void play_dead_common(void); -void native_send_call_func_ipi(cpumask_t mask); +void native_send_call_func_ipi(const struct cpumask *mask); void native_send_call_func_single_ipi(int cpu); extern void prefill_possible_map(void); diff --git a/arch/x86/include/asm/summit/apic.h b/arch/x86/include/asm/summit/apic.h index 9b3070f1c2a..4bb5fb34f03 100644 --- a/arch/x86/include/asm/summit/apic.h +++ b/arch/x86/include/asm/summit/apic.h @@ -14,13 +14,13 @@ #define APIC_DFR_VALUE (APIC_DFR_CLUSTER) -static inline cpumask_t target_cpus(void) +static inline const cpumask_t *target_cpus(void) { /* CPU_MASK_ALL (0xff) has undefined behaviour with * dest_LowestPrio mode logical clustered apic interrupt routing * Just start on cpu 0. IRQ balancing will spread load */ - return cpumask_of_cpu(0); + return &cpumask_of_cpu(0); } #define INT_DELIVERY_MODE (dest_LowestPrio) @@ -52,7 +52,7 @@ static inline void init_apic_ldr(void) int i; /* Create logical APIC IDs by counting CPUs already in cluster. */ - for (count = 0, i = NR_CPUS; --i >= 0; ) { + for (count = 0, i = nr_cpu_ids; --i >= 0; ) { lid = cpu_2_logical_apicid[i]; if (lid != BAD_APICID && apicid_cluster(lid) == my_cluster) ++count; @@ -97,8 +97,8 @@ static inline int apicid_to_node(int logical_apicid) static inline int cpu_to_logical_apicid(int cpu) { #ifdef CONFIG_SMP - if (cpu >= NR_CPUS) - return BAD_APICID; + if (cpu >= nr_cpu_ids) + return BAD_APICID; return (int)cpu_2_logical_apicid[cpu]; #else return logical_smp_processor_id(); @@ -107,7 +107,7 @@ static inline int cpu_to_logical_apicid(int cpu) static inline int cpu_present_to_apicid(int mps_cpu) { - if (mps_cpu < NR_CPUS) + if (mps_cpu < nr_cpu_ids) return (int)per_cpu(x86_bios_cpu_apicid, mps_cpu); else return BAD_APICID; @@ -137,25 +137,25 @@ static inline void enable_apic_mode(void) { } -static inline unsigned int cpu_mask_to_apicid(cpumask_t cpumask) +static inline unsigned int cpu_mask_to_apicid(const cpumask_t *cpumask) { int num_bits_set; int cpus_found = 0; int cpu; int apicid; - num_bits_set = cpus_weight(cpumask); + num_bits_set = cpus_weight(*cpumask); /* Return id to all */ - if (num_bits_set == NR_CPUS) + if (num_bits_set >= nr_cpu_ids) return (int) 0xFF; /* * The cpus in the mask must all be on the apic cluster. If are not * on the same apicid cluster return default value of TARGET_CPUS. */ - cpu = first_cpu(cpumask); + cpu = first_cpu(*cpumask); apicid = cpu_to_logical_apicid(cpu); while (cpus_found < num_bits_set) { - if (cpu_isset(cpu, cpumask)) { + if (cpu_isset(cpu, *cpumask)) { int new_apicid = cpu_to_logical_apicid(cpu); if (apicid_cluster(apicid) != apicid_cluster(new_apicid)){ @@ -170,6 +170,23 @@ static inline unsigned int cpu_mask_to_apicid(cpumask_t cpumask) return apicid; } +static inline unsigned int cpu_mask_to_apicid_and(const struct cpumask *inmask, + const struct cpumask *andmask) +{ + int apicid = cpu_to_logical_apicid(0); + cpumask_var_t cpumask; + + if (!alloc_cpumask_var(&cpumask, GFP_ATOMIC)) + return apicid; + + cpumask_and(cpumask, inmask, andmask); + cpumask_and(cpumask, cpumask, cpu_online_mask); + apicid = cpu_mask_to_apicid(cpumask); + + free_cpumask_var(cpumask); + return apicid; +} + /* cpuid returns the value latched in the HW at reset, not the APIC ID * register's value. For any box whose BIOS changes APIC IDs, like * clustered APIC systems, we must use hard_smp_processor_id. diff --git a/arch/x86/include/asm/summit/ipi.h b/arch/x86/include/asm/summit/ipi.h index 53bd1e7bd7b..a8a2c24f50c 100644 --- a/arch/x86/include/asm/summit/ipi.h +++ b/arch/x86/include/asm/summit/ipi.h @@ -1,9 +1,10 @@ #ifndef __ASM_SUMMIT_IPI_H #define __ASM_SUMMIT_IPI_H -void send_IPI_mask_sequence(cpumask_t mask, int vector); +void send_IPI_mask_sequence(const cpumask_t *mask, int vector); +void send_IPI_mask_allbutself(const cpumask_t *mask, int vector); -static inline void send_IPI_mask(cpumask_t mask, int vector) +static inline void send_IPI_mask(const cpumask_t *mask, int vector) { send_IPI_mask_sequence(mask, vector); } @@ -14,12 +15,12 @@ static inline void send_IPI_allbutself(int vector) cpu_clear(smp_processor_id(), mask); if (!cpus_empty(mask)) - send_IPI_mask(mask, vector); + send_IPI_mask(&mask, vector); } static inline void send_IPI_all(int vector) { - send_IPI_mask(cpu_online_map, vector); + send_IPI_mask(&cpu_online_map, vector); } #endif /* __ASM_SUMMIT_IPI_H */ diff --git a/arch/x86/kvm/svm.h b/arch/x86/include/asm/svm.h index 1b8afa78e86..1b8afa78e86 100644 --- a/arch/x86/kvm/svm.h +++ b/arch/x86/include/asm/svm.h diff --git a/arch/x86/include/asm/sys_ia32.h b/arch/x86/include/asm/sys_ia32.h new file mode 100644 index 00000000000..ffb08be2a53 --- /dev/null +++ b/arch/x86/include/asm/sys_ia32.h @@ -0,0 +1,101 @@ +/* + * sys_ia32.h - Linux ia32 syscall interfaces + * + * Copyright (c) 2008 Jaswinder Singh Rajput + * + * This file is released under the GPLv2. + * See the file COPYING for more details. + */ + +#ifndef _ASM_X86_SYS_IA32_H +#define _ASM_X86_SYS_IA32_H + +#include <linux/compiler.h> +#include <linux/linkage.h> +#include <linux/types.h> +#include <linux/signal.h> +#include <asm/compat.h> +#include <asm/ia32.h> + +/* ia32/sys_ia32.c */ +asmlinkage long sys32_truncate64(char __user *, unsigned long, unsigned long); +asmlinkage long sys32_ftruncate64(unsigned int, unsigned long, unsigned long); + +asmlinkage long sys32_stat64(char __user *, struct stat64 __user *); +asmlinkage long sys32_lstat64(char __user *, struct stat64 __user *); +asmlinkage long sys32_fstat64(unsigned int, struct stat64 __user *); +asmlinkage long sys32_fstatat(unsigned int, char __user *, + struct stat64 __user *, int); +struct mmap_arg_struct; +asmlinkage long sys32_mmap(struct mmap_arg_struct __user *); +asmlinkage long sys32_mprotect(unsigned long, size_t, unsigned long); + +asmlinkage long sys32_pipe(int __user *); +struct sigaction32; +struct old_sigaction32; +asmlinkage long sys32_rt_sigaction(int, struct sigaction32 __user *, + struct sigaction32 __user *, unsigned int); +asmlinkage long sys32_sigaction(int, struct old_sigaction32 __user *, + struct old_sigaction32 __user *); +asmlinkage long sys32_rt_sigprocmask(int, compat_sigset_t __user *, + compat_sigset_t __user *, unsigned int); +asmlinkage long sys32_alarm(unsigned int); + +struct sel_arg_struct; +asmlinkage long sys32_old_select(struct sel_arg_struct __user *); +asmlinkage long sys32_waitpid(compat_pid_t, unsigned int *, int); +asmlinkage long sys32_sysfs(int, u32, u32); + +asmlinkage long sys32_sched_rr_get_interval(compat_pid_t, + struct compat_timespec __user *); +asmlinkage long sys32_rt_sigpending(compat_sigset_t __user *, compat_size_t); +asmlinkage long sys32_rt_sigqueueinfo(int, int, compat_siginfo_t __user *); + +#ifdef CONFIG_SYSCTL_SYSCALL +struct sysctl_ia32; +asmlinkage long sys32_sysctl(struct sysctl_ia32 __user *); +#endif + +asmlinkage long sys32_pread(unsigned int, char __user *, u32, u32, u32); +asmlinkage long sys32_pwrite(unsigned int, char __user *, u32, u32, u32); + +asmlinkage long sys32_personality(unsigned long); +asmlinkage long sys32_sendfile(int, int, compat_off_t __user *, s32); + +asmlinkage long sys32_mmap2(unsigned long, unsigned long, unsigned long, + unsigned long, unsigned long, unsigned long); + +struct oldold_utsname; +struct old_utsname; +asmlinkage long sys32_olduname(struct oldold_utsname __user *); +long sys32_uname(struct old_utsname __user *); + +long sys32_ustat(unsigned, struct ustat32 __user *); + +asmlinkage long sys32_execve(char __user *, compat_uptr_t __user *, + compat_uptr_t __user *, struct pt_regs *); +asmlinkage long sys32_clone(unsigned int, unsigned int, struct pt_regs *); + +long sys32_lseek(unsigned int, int, unsigned int); +long sys32_kill(int, int); +long sys32_fadvise64_64(int, __u32, __u32, __u32, __u32, int); +long sys32_vm86_warning(void); +long sys32_lookup_dcookie(u32, u32, char __user *, size_t); + +asmlinkage ssize_t sys32_readahead(int, unsigned, unsigned, size_t); +asmlinkage long sys32_sync_file_range(int, unsigned, unsigned, + unsigned, unsigned, int); +asmlinkage long sys32_fadvise64(int, unsigned, unsigned, size_t, int); +asmlinkage long sys32_fallocate(int, int, unsigned, + unsigned, unsigned, unsigned); + +/* ia32/ia32_signal.c */ +asmlinkage long sys32_sigsuspend(int, int, old_sigset_t); +asmlinkage long sys32_sigaltstack(const stack_ia32_t __user *, + stack_ia32_t __user *, struct pt_regs *); +asmlinkage long sys32_sigreturn(struct pt_regs *); +asmlinkage long sys32_rt_sigreturn(struct pt_regs *); + +/* ia32/ipc32.c */ +asmlinkage long sys32_ipc(u32, int, int, int, compat_uptr_t, u32); +#endif /* _ASM_X86_SYS_IA32_H */ diff --git a/arch/x86/include/asm/topology.h b/arch/x86/include/asm/topology.h index ff386ff50ed..4e2f2e0aab2 100644 --- a/arch/x86/include/asm/topology.h +++ b/arch/x86/include/asm/topology.h @@ -61,13 +61,19 @@ static inline int cpu_to_node(int cpu) * * Side note: this function creates the returned cpumask on the stack * so with a high NR_CPUS count, excessive stack space is used. The - * node_to_cpumask_ptr function should be used whenever possible. + * cpumask_of_node function should be used whenever possible. */ static inline cpumask_t node_to_cpumask(int node) { return node_to_cpumask_map[node]; } +/* Returns a bitmask of CPUs on Node 'node'. */ +static inline const struct cpumask *cpumask_of_node(int node) +{ + return &node_to_cpumask_map[node]; +} + #else /* CONFIG_X86_64 */ /* Mappings between node number and cpus on that node. */ @@ -82,7 +88,7 @@ DECLARE_EARLY_PER_CPU(int, x86_cpu_to_node_map); #ifdef CONFIG_DEBUG_PER_CPU_MAPS extern int cpu_to_node(int cpu); extern int early_cpu_to_node(int cpu); -extern const cpumask_t *_node_to_cpumask_ptr(int node); +extern const cpumask_t *cpumask_of_node(int node); extern cpumask_t node_to_cpumask(int node); #else /* !CONFIG_DEBUG_PER_CPU_MAPS */ @@ -103,7 +109,7 @@ static inline int early_cpu_to_node(int cpu) } /* Returns a pointer to the cpumask of CPUs on Node 'node'. */ -static inline const cpumask_t *_node_to_cpumask_ptr(int node) +static inline const cpumask_t *cpumask_of_node(int node) { return &node_to_cpumask_map[node]; } @@ -116,12 +122,15 @@ static inline cpumask_t node_to_cpumask(int node) #endif /* !CONFIG_DEBUG_PER_CPU_MAPS */ -/* Replace default node_to_cpumask_ptr with optimized version */ +/* + * Replace default node_to_cpumask_ptr with optimized version + * Deprecated: use "const struct cpumask *mask = cpumask_of_node(node)" + */ #define node_to_cpumask_ptr(v, node) \ - const cpumask_t *v = _node_to_cpumask_ptr(node) + const cpumask_t *v = cpumask_of_node(node) #define node_to_cpumask_ptr_next(v, node) \ - v = _node_to_cpumask_ptr(node) + v = cpumask_of_node(node) #endif /* CONFIG_X86_64 */ @@ -187,7 +196,7 @@ extern int __node_distance(int, int); #define cpu_to_node(cpu) 0 #define early_cpu_to_node(cpu) 0 -static inline const cpumask_t *_node_to_cpumask_ptr(int node) +static inline const cpumask_t *cpumask_of_node(int node) { return &cpu_online_map; } @@ -200,12 +209,15 @@ static inline int node_to_first_cpu(int node) return first_cpu(cpu_online_map); } -/* Replace default node_to_cpumask_ptr with optimized version */ +/* + * Replace default node_to_cpumask_ptr with optimized version + * Deprecated: use "const struct cpumask *mask = cpumask_of_node(node)" + */ #define node_to_cpumask_ptr(v, node) \ - const cpumask_t *v = _node_to_cpumask_ptr(node) + const cpumask_t *v = cpumask_of_node(node) #define node_to_cpumask_ptr_next(v, node) \ - v = _node_to_cpumask_ptr(node) + v = cpumask_of_node(node) #endif #include <asm-generic/topology.h> @@ -214,18 +226,20 @@ static inline int node_to_first_cpu(int node) /* Returns the number of the first CPU on Node 'node'. */ static inline int node_to_first_cpu(int node) { - node_to_cpumask_ptr(mask, node); - return first_cpu(*mask); + return cpumask_first(cpumask_of_node(node)); } #endif extern cpumask_t cpu_coregroup_map(int cpu); +extern const struct cpumask *cpu_coregroup_mask(int cpu); #ifdef ENABLE_TOPO_DEFINES #define topology_physical_package_id(cpu) (cpu_data(cpu).phys_proc_id) #define topology_core_id(cpu) (cpu_data(cpu).cpu_core_id) #define topology_core_siblings(cpu) (per_cpu(cpu_core_map, cpu)) #define topology_thread_siblings(cpu) (per_cpu(cpu_sibling_map, cpu)) +#define topology_core_cpumask(cpu) (&per_cpu(cpu_core_map, cpu)) +#define topology_thread_cpumask(cpu) (&per_cpu(cpu_sibling_map, cpu)) /* indicates that pointers to the topology cpumask_t maps are valid */ #define arch_provides_topology_pointers yes diff --git a/arch/x86/include/asm/uv/uv_bau.h b/arch/x86/include/asm/uv/uv_bau.h index e2363253bbb..50423c7b56b 100644 --- a/arch/x86/include/asm/uv/uv_bau.h +++ b/arch/x86/include/asm/uv/uv_bau.h @@ -133,61 +133,61 @@ struct bau_msg_payload { * see table 4.2.3.0.1 in broacast_assist spec. */ struct bau_msg_header { - int dest_subnodeid:6; /* must be zero */ + unsigned int dest_subnodeid:6; /* must be zero */ /* bits 5:0 */ - int base_dest_nodeid:15; /* nasid>>1 (pnode) of first bit in node_map */ - /* bits 20:6 */ - int command:8; /* message type */ + unsigned int base_dest_nodeid:15; /* nasid>>1 (pnode) of */ + /* bits 20:6 */ /* first bit in node_map */ + unsigned int command:8; /* message type */ /* bits 28:21 */ /* 0x38: SN3net EndPoint Message */ - int rsvd_1:3; /* must be zero */ + unsigned int rsvd_1:3; /* must be zero */ /* bits 31:29 */ /* int will align on 32 bits */ - int rsvd_2:9; /* must be zero */ + unsigned int rsvd_2:9; /* must be zero */ /* bits 40:32 */ /* Suppl_A is 56-41 */ - int payload_2a:8; /* becomes byte 16 of msg */ + unsigned int payload_2a:8;/* becomes byte 16 of msg */ /* bits 48:41 */ /* not currently using */ - int payload_2b:8; /* becomes byte 17 of msg */ + unsigned int payload_2b:8;/* becomes byte 17 of msg */ /* bits 56:49 */ /* not currently using */ /* Address field (96:57) is never used as an address (these are address bits 42:3) */ - int rsvd_3:1; /* must be zero */ + unsigned int rsvd_3:1; /* must be zero */ /* bit 57 */ /* address bits 27:4 are payload */ /* these 24 bits become bytes 12-14 of msg */ - int replied_to:1; /* sent as 0 by the source to byte 12 */ + unsigned int replied_to:1;/* sent as 0 by the source to byte 12 */ /* bit 58 */ - int payload_1a:5; /* not currently used */ + unsigned int payload_1a:5;/* not currently used */ /* bits 63:59 */ - int payload_1b:8; /* not currently used */ + unsigned int payload_1b:8;/* not currently used */ /* bits 71:64 */ - int payload_1c:8; /* not currently used */ + unsigned int payload_1c:8;/* not currently used */ /* bits 79:72 */ - int payload_1d:2; /* not currently used */ + unsigned int payload_1d:2;/* not currently used */ /* bits 81:80 */ - int rsvd_4:7; /* must be zero */ + unsigned int rsvd_4:7; /* must be zero */ /* bits 88:82 */ - int sw_ack_flag:1; /* software acknowledge flag */ + unsigned int sw_ack_flag:1;/* software acknowledge flag */ /* bit 89 */ /* INTD trasactions at destination are to wait for software acknowledge */ - int rsvd_5:6; /* must be zero */ + unsigned int rsvd_5:6; /* must be zero */ /* bits 95:90 */ - int rsvd_6:5; /* must be zero */ + unsigned int rsvd_6:5; /* must be zero */ /* bits 100:96 */ - int int_both:1; /* if 1, interrupt both sockets on the blade */ + unsigned int int_both:1;/* if 1, interrupt both sockets on the blade */ /* bit 101*/ - int fairness:3; /* usually zero */ + unsigned int fairness:3;/* usually zero */ /* bits 104:102 */ - int multilevel:1; /* multi-level multicast format */ + unsigned int multilevel:1; /* multi-level multicast format */ /* bit 105 */ /* 0 for TLB: endpoint multi-unicast messages */ - int chaining:1; /* next descriptor is part of this activation*/ + unsigned int chaining:1;/* next descriptor is part of this activation*/ /* bit 106 */ - int rsvd_7:21; /* must be zero */ + unsigned int rsvd_7:21; /* must be zero */ /* bits 127:107 */ }; diff --git a/arch/x86/include/asm/virtext.h b/arch/x86/include/asm/virtext.h new file mode 100644 index 00000000000..59363627523 --- /dev/null +++ b/arch/x86/include/asm/virtext.h @@ -0,0 +1,132 @@ +/* CPU virtualization extensions handling + * + * This should carry the code for handling CPU virtualization extensions + * that needs to live in the kernel core. + * + * Author: Eduardo Habkost <ehabkost@redhat.com> + * + * Copyright (C) 2008, Red Hat Inc. + * + * Contains code from KVM, Copyright (C) 2006 Qumranet, Inc. + * + * This work is licensed under the terms of the GNU GPL, version 2. See + * the COPYING file in the top-level directory. + */ +#ifndef _ASM_X86_VIRTEX_H +#define _ASM_X86_VIRTEX_H + +#include <asm/processor.h> +#include <asm/system.h> + +#include <asm/vmx.h> +#include <asm/svm.h> + +/* + * VMX functions: + */ + +static inline int cpu_has_vmx(void) +{ + unsigned long ecx = cpuid_ecx(1); + return test_bit(5, &ecx); /* CPUID.1:ECX.VMX[bit 5] -> VT */ +} + + +/** Disable VMX on the current CPU + * + * vmxoff causes a undefined-opcode exception if vmxon was not run + * on the CPU previously. Only call this function if you know VMX + * is enabled. + */ +static inline void cpu_vmxoff(void) +{ + asm volatile (ASM_VMX_VMXOFF : : : "cc"); + write_cr4(read_cr4() & ~X86_CR4_VMXE); +} + +static inline int cpu_vmx_enabled(void) +{ + return read_cr4() & X86_CR4_VMXE; +} + +/** Disable VMX if it is enabled on the current CPU + * + * You shouldn't call this if cpu_has_vmx() returns 0. + */ +static inline void __cpu_emergency_vmxoff(void) +{ + if (cpu_vmx_enabled()) + cpu_vmxoff(); +} + +/** Disable VMX if it is supported and enabled on the current CPU + */ +static inline void cpu_emergency_vmxoff(void) +{ + if (cpu_has_vmx()) + __cpu_emergency_vmxoff(); +} + + + + +/* + * SVM functions: + */ + +/** Check if the CPU has SVM support + * + * You can use the 'msg' arg to get a message describing the problem, + * if the function returns zero. Simply pass NULL if you are not interested + * on the messages; gcc should take care of not generating code for + * the messages on this case. + */ +static inline int cpu_has_svm(const char **msg) +{ + uint32_t eax, ebx, ecx, edx; + + if (boot_cpu_data.x86_vendor != X86_VENDOR_AMD) { + if (msg) + *msg = "not amd"; + return 0; + } + + cpuid(0x80000000, &eax, &ebx, &ecx, &edx); + if (eax < SVM_CPUID_FUNC) { + if (msg) + *msg = "can't execute cpuid_8000000a"; + return 0; + } + + cpuid(0x80000001, &eax, &ebx, &ecx, &edx); + if (!(ecx & (1 << SVM_CPUID_FEATURE_SHIFT))) { + if (msg) + *msg = "svm not available"; + return 0; + } + return 1; +} + + +/** Disable SVM on the current CPU + * + * You should call this only if cpu_has_svm() returned true. + */ +static inline void cpu_svm_disable(void) +{ + uint64_t efer; + + wrmsrl(MSR_VM_HSAVE_PA, 0); + rdmsrl(MSR_EFER, efer); + wrmsrl(MSR_EFER, efer & ~MSR_EFER_SVME_MASK); +} + +/** Makes sure SVM is disabled, if it is supported on the CPU + */ +static inline void cpu_emergency_svm_disable(void) +{ + if (cpu_has_svm(NULL)) + cpu_svm_disable(); +} + +#endif /* _ASM_X86_VIRTEX_H */ diff --git a/arch/x86/kvm/vmx.h b/arch/x86/include/asm/vmx.h index ec5edc339da..d0238e6151d 100644 --- a/arch/x86/kvm/vmx.h +++ b/arch/x86/include/asm/vmx.h @@ -63,10 +63,13 @@ #define VM_EXIT_HOST_ADDR_SPACE_SIZE 0x00000200 #define VM_EXIT_ACK_INTR_ON_EXIT 0x00008000 +#define VM_EXIT_SAVE_IA32_PAT 0x00040000 +#define VM_EXIT_LOAD_IA32_PAT 0x00080000 #define VM_ENTRY_IA32E_MODE 0x00000200 #define VM_ENTRY_SMM 0x00000400 #define VM_ENTRY_DEACT_DUAL_MONITOR 0x00000800 +#define VM_ENTRY_LOAD_IA32_PAT 0x00004000 /* VMCS Encodings */ enum vmcs_field { @@ -112,6 +115,8 @@ enum vmcs_field { VMCS_LINK_POINTER_HIGH = 0x00002801, GUEST_IA32_DEBUGCTL = 0x00002802, GUEST_IA32_DEBUGCTL_HIGH = 0x00002803, + GUEST_IA32_PAT = 0x00002804, + GUEST_IA32_PAT_HIGH = 0x00002805, GUEST_PDPTR0 = 0x0000280a, GUEST_PDPTR0_HIGH = 0x0000280b, GUEST_PDPTR1 = 0x0000280c, @@ -120,6 +125,8 @@ enum vmcs_field { GUEST_PDPTR2_HIGH = 0x0000280f, GUEST_PDPTR3 = 0x00002810, GUEST_PDPTR3_HIGH = 0x00002811, + HOST_IA32_PAT = 0x00002c00, + HOST_IA32_PAT_HIGH = 0x00002c01, PIN_BASED_VM_EXEC_CONTROL = 0x00004000, CPU_BASED_VM_EXEC_CONTROL = 0x00004002, EXCEPTION_BITMAP = 0x00004004, @@ -331,8 +338,9 @@ enum vmcs_field { #define AR_RESERVD_MASK 0xfffe0f00 -#define APIC_ACCESS_PAGE_PRIVATE_MEMSLOT 9 -#define IDENTITY_PAGETABLE_PRIVATE_MEMSLOT 10 +#define TSS_PRIVATE_MEMSLOT (KVM_MEMORY_SLOTS + 0) +#define APIC_ACCESS_PAGE_PRIVATE_MEMSLOT (KVM_MEMORY_SLOTS + 1) +#define IDENTITY_PAGETABLE_PRIVATE_MEMSLOT (KVM_MEMORY_SLOTS + 2) #define VMX_NR_VPIDS (1 << 16) #define VMX_VPID_EXTENT_SINGLE_CONTEXT 1 @@ -356,4 +364,19 @@ enum vmcs_field { #define VMX_EPT_IDENTITY_PAGETABLE_ADDR 0xfffbc000ul + +#define ASM_VMX_VMCLEAR_RAX ".byte 0x66, 0x0f, 0xc7, 0x30" +#define ASM_VMX_VMLAUNCH ".byte 0x0f, 0x01, 0xc2" +#define ASM_VMX_VMRESUME ".byte 0x0f, 0x01, 0xc3" +#define ASM_VMX_VMPTRLD_RAX ".byte 0x0f, 0xc7, 0x30" +#define ASM_VMX_VMREAD_RDX_RAX ".byte 0x0f, 0x78, 0xd0" +#define ASM_VMX_VMWRITE_RAX_RDX ".byte 0x0f, 0x79, 0xd0" +#define ASM_VMX_VMWRITE_RSP_RDX ".byte 0x0f, 0x79, 0xd4" +#define ASM_VMX_VMXOFF ".byte 0x0f, 0x01, 0xc4" +#define ASM_VMX_VMXON_RAX ".byte 0xf3, 0x0f, 0xc7, 0x30" +#define ASM_VMX_INVEPT ".byte 0x66, 0x0f, 0x38, 0x80, 0x08" +#define ASM_VMX_INVVPID ".byte 0x66, 0x0f, 0x38, 0x81, 0x08" + + + #endif diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c index 65d0b72777e..29dc0c89d4a 100644 --- a/arch/x86/kernel/acpi/boot.c +++ b/arch/x86/kernel/acpi/boot.c @@ -538,9 +538,10 @@ static int __cpuinit _acpi_map_lsapic(acpi_handle handle, int *pcpu) struct acpi_buffer buffer = { ACPI_ALLOCATE_BUFFER, NULL }; union acpi_object *obj; struct acpi_madt_local_apic *lapic; - cpumask_t tmp_map, new_map; + cpumask_var_t tmp_map, new_map; u8 physid; int cpu; + int retval = -ENOMEM; if (ACPI_FAILURE(acpi_evaluate_object(handle, "_MAT", NULL, &buffer))) return -EINVAL; @@ -569,23 +570,37 @@ static int __cpuinit _acpi_map_lsapic(acpi_handle handle, int *pcpu) buffer.length = ACPI_ALLOCATE_BUFFER; buffer.pointer = NULL; - tmp_map = cpu_present_map; + if (!alloc_cpumask_var(&tmp_map, GFP_KERNEL)) + goto out; + + if (!alloc_cpumask_var(&new_map, GFP_KERNEL)) + goto free_tmp_map; + + cpumask_copy(tmp_map, cpu_present_mask); acpi_register_lapic(physid, lapic->lapic_flags & ACPI_MADT_ENABLED); /* * If mp_register_lapic successfully generates a new logical cpu * number, then the following will get us exactly what was mapped */ - cpus_andnot(new_map, cpu_present_map, tmp_map); - if (cpus_empty(new_map)) { + cpumask_andnot(new_map, cpu_present_mask, tmp_map); + if (cpumask_empty(new_map)) { printk ("Unable to map lapic to logical cpu number\n"); - return -EINVAL; + retval = -EINVAL; + goto free_new_map; } - cpu = first_cpu(new_map); + cpu = cpumask_first(new_map); *pcpu = cpu; - return 0; + retval = 0; + +free_new_map: + free_cpumask_var(new_map); +free_tmp_map: + free_cpumask_var(tmp_map); +out: + return retval; } /* wrapper to silence section mismatch warning */ @@ -598,7 +613,7 @@ EXPORT_SYMBOL(acpi_map_lsapic); int acpi_unmap_lsapic(int cpu) { per_cpu(x86_cpu_to_apicid, cpu) = -1; - cpu_clear(cpu, cpu_present_map); + set_cpu_present(cpu, false); num_processors--; return (0); diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c index 2e2da717b35..5113c080f0c 100644 --- a/arch/x86/kernel/amd_iommu.c +++ b/arch/x86/kernel/amd_iommu.c @@ -20,8 +20,12 @@ #include <linux/pci.h> #include <linux/gfp.h> #include <linux/bitops.h> +#include <linux/debugfs.h> #include <linux/scatterlist.h> #include <linux/iommu-helper.h> +#ifdef CONFIG_IOMMU_API +#include <linux/iommu.h> +#endif #include <asm/proto.h> #include <asm/iommu.h> #include <asm/gart.h> @@ -38,6 +42,10 @@ static DEFINE_RWLOCK(amd_iommu_devtable_lock); static LIST_HEAD(iommu_pd_list); static DEFINE_SPINLOCK(iommu_pd_list_lock); +#ifdef CONFIG_IOMMU_API +static struct iommu_ops amd_iommu_ops; +#endif + /* * general struct to manage commands send to an IOMMU */ @@ -47,6 +55,68 @@ struct iommu_cmd { static int dma_ops_unity_map(struct dma_ops_domain *dma_dom, struct unity_map_entry *e); +static struct dma_ops_domain *find_protection_domain(u16 devid); + + +#ifdef CONFIG_AMD_IOMMU_STATS + +/* + * Initialization code for statistics collection + */ + +DECLARE_STATS_COUNTER(compl_wait); +DECLARE_STATS_COUNTER(cnt_map_single); +DECLARE_STATS_COUNTER(cnt_unmap_single); +DECLARE_STATS_COUNTER(cnt_map_sg); +DECLARE_STATS_COUNTER(cnt_unmap_sg); +DECLARE_STATS_COUNTER(cnt_alloc_coherent); +DECLARE_STATS_COUNTER(cnt_free_coherent); +DECLARE_STATS_COUNTER(cross_page); +DECLARE_STATS_COUNTER(domain_flush_single); +DECLARE_STATS_COUNTER(domain_flush_all); +DECLARE_STATS_COUNTER(alloced_io_mem); +DECLARE_STATS_COUNTER(total_map_requests); + +static struct dentry *stats_dir; +static struct dentry *de_isolate; +static struct dentry *de_fflush; + +static void amd_iommu_stats_add(struct __iommu_counter *cnt) +{ + if (stats_dir == NULL) + return; + + cnt->dent = debugfs_create_u64(cnt->name, 0444, stats_dir, + &cnt->value); +} + +static void amd_iommu_stats_init(void) +{ + stats_dir = debugfs_create_dir("amd-iommu", NULL); + if (stats_dir == NULL) + return; + + de_isolate = debugfs_create_bool("isolation", 0444, stats_dir, + (u32 *)&amd_iommu_isolate); + + de_fflush = debugfs_create_bool("fullflush", 0444, stats_dir, + (u32 *)&amd_iommu_unmap_flush); + + amd_iommu_stats_add(&compl_wait); + amd_iommu_stats_add(&cnt_map_single); + amd_iommu_stats_add(&cnt_unmap_single); + amd_iommu_stats_add(&cnt_map_sg); + amd_iommu_stats_add(&cnt_unmap_sg); + amd_iommu_stats_add(&cnt_alloc_coherent); + amd_iommu_stats_add(&cnt_free_coherent); + amd_iommu_stats_add(&cross_page); + amd_iommu_stats_add(&domain_flush_single); + amd_iommu_stats_add(&domain_flush_all); + amd_iommu_stats_add(&alloced_io_mem); + amd_iommu_stats_add(&total_map_requests); +} + +#endif /* returns !0 if the IOMMU is caching non-present entries in its TLB */ static int iommu_has_npcache(struct amd_iommu *iommu) @@ -189,13 +259,55 @@ static int iommu_queue_command(struct amd_iommu *iommu, struct iommu_cmd *cmd) spin_lock_irqsave(&iommu->lock, flags); ret = __iommu_queue_command(iommu, cmd); if (!ret) - iommu->need_sync = 1; + iommu->need_sync = true; spin_unlock_irqrestore(&iommu->lock, flags); return ret; } /* + * This function waits until an IOMMU has completed a completion + * wait command + */ +static void __iommu_wait_for_completion(struct amd_iommu *iommu) +{ + int ready = 0; + unsigned status = 0; + unsigned long i = 0; + + INC_STATS_COUNTER(compl_wait); + + while (!ready && (i < EXIT_LOOP_COUNT)) { + ++i; + /* wait for the bit to become one */ + status = readl(iommu->mmio_base + MMIO_STATUS_OFFSET); + ready = status & MMIO_STATUS_COM_WAIT_INT_MASK; + } + + /* set bit back to zero */ + status &= ~MMIO_STATUS_COM_WAIT_INT_MASK; + writel(status, iommu->mmio_base + MMIO_STATUS_OFFSET); + + if (unlikely(i == EXIT_LOOP_COUNT)) + panic("AMD IOMMU: Completion wait loop failed\n"); +} + +/* + * This function queues a completion wait command into the command + * buffer of an IOMMU + */ +static int __iommu_completion_wait(struct amd_iommu *iommu) +{ + struct iommu_cmd cmd; + + memset(&cmd, 0, sizeof(cmd)); + cmd.data[0] = CMD_COMPL_WAIT_INT_MASK; + CMD_SET_TYPE(&cmd, CMD_COMPL_WAIT); + + return __iommu_queue_command(iommu, &cmd); +} + +/* * This function is called whenever we need to ensure that the IOMMU has * completed execution of all commands we sent. It sends a * COMPLETION_WAIT command and waits for it to finish. The IOMMU informs @@ -204,40 +316,22 @@ static int iommu_queue_command(struct amd_iommu *iommu, struct iommu_cmd *cmd) */ static int iommu_completion_wait(struct amd_iommu *iommu) { - int ret = 0, ready = 0; - unsigned status = 0; - struct iommu_cmd cmd; - unsigned long flags, i = 0; - - memset(&cmd, 0, sizeof(cmd)); - cmd.data[0] = CMD_COMPL_WAIT_INT_MASK; - CMD_SET_TYPE(&cmd, CMD_COMPL_WAIT); + int ret = 0; + unsigned long flags; spin_lock_irqsave(&iommu->lock, flags); if (!iommu->need_sync) goto out; - iommu->need_sync = 0; + ret = __iommu_completion_wait(iommu); - ret = __iommu_queue_command(iommu, &cmd); + iommu->need_sync = false; if (ret) goto out; - while (!ready && (i < EXIT_LOOP_COUNT)) { - ++i; - /* wait for the bit to become one */ - status = readl(iommu->mmio_base + MMIO_STATUS_OFFSET); - ready = status & MMIO_STATUS_COM_WAIT_INT_MASK; - } - - /* set bit back to zero */ - status &= ~MMIO_STATUS_COM_WAIT_INT_MASK; - writel(status, iommu->mmio_base + MMIO_STATUS_OFFSET); - - if (unlikely(i == EXIT_LOOP_COUNT)) - panic("AMD IOMMU: Completion wait loop failed\n"); + __iommu_wait_for_completion(iommu); out: spin_unlock_irqrestore(&iommu->lock, flags); @@ -264,6 +358,21 @@ static int iommu_queue_inv_dev_entry(struct amd_iommu *iommu, u16 devid) return ret; } +static void __iommu_build_inv_iommu_pages(struct iommu_cmd *cmd, u64 address, + u16 domid, int pde, int s) +{ + memset(cmd, 0, sizeof(*cmd)); + address &= PAGE_MASK; + CMD_SET_TYPE(cmd, CMD_INV_IOMMU_PAGES); + cmd->data[1] |= domid; + cmd->data[2] = lower_32_bits(address); + cmd->data[3] = upper_32_bits(address); + if (s) /* size bit - we flush more than one 4kb page */ + cmd->data[2] |= CMD_INV_IOMMU_PAGES_SIZE_MASK; + if (pde) /* PDE bit - we wan't flush everything not only the PTEs */ + cmd->data[2] |= CMD_INV_IOMMU_PAGES_PDE_MASK; +} + /* * Generic command send function for invalidaing TLB entries */ @@ -273,16 +382,7 @@ static int iommu_queue_inv_iommu_pages(struct amd_iommu *iommu, struct iommu_cmd cmd; int ret; - memset(&cmd, 0, sizeof(cmd)); - address &= PAGE_MASK; - CMD_SET_TYPE(&cmd, CMD_INV_IOMMU_PAGES); - cmd.data[1] |= domid; - cmd.data[2] = lower_32_bits(address); - cmd.data[3] = upper_32_bits(address); - if (s) /* size bit - we flush more than one 4kb page */ - cmd.data[2] |= CMD_INV_IOMMU_PAGES_SIZE_MASK; - if (pde) /* PDE bit - we wan't flush everything not only the PTEs */ - cmd.data[2] |= CMD_INV_IOMMU_PAGES_PDE_MASK; + __iommu_build_inv_iommu_pages(&cmd, address, domid, pde, s); ret = iommu_queue_command(iommu, &cmd); @@ -321,9 +421,35 @@ static void iommu_flush_tlb(struct amd_iommu *iommu, u16 domid) { u64 address = CMD_INV_IOMMU_ALL_PAGES_ADDRESS; + INC_STATS_COUNTER(domain_flush_single); + iommu_queue_inv_iommu_pages(iommu, address, domid, 0, 1); } +/* + * This function is used to flush the IO/TLB for a given protection domain + * on every IOMMU in the system + */ +static void iommu_flush_domain(u16 domid) +{ + unsigned long flags; + struct amd_iommu *iommu; + struct iommu_cmd cmd; + + INC_STATS_COUNTER(domain_flush_all); + + __iommu_build_inv_iommu_pages(&cmd, CMD_INV_IOMMU_ALL_PAGES_ADDRESS, + domid, 1, 1); + + list_for_each_entry(iommu, &amd_iommu_list, list) { + spin_lock_irqsave(&iommu->lock, flags); + __iommu_queue_command(iommu, &cmd); + __iommu_completion_wait(iommu); + __iommu_wait_for_completion(iommu); + spin_unlock_irqrestore(&iommu->lock, flags); + } +} + /**************************************************************************** * * The functions below are used the create the page table mappings for @@ -338,10 +464,10 @@ static void iommu_flush_tlb(struct amd_iommu *iommu, u16 domid) * supporting all features of AMD IOMMU page tables like level skipping * and full 64 bit address spaces. */ -static int iommu_map(struct protection_domain *dom, - unsigned long bus_addr, - unsigned long phys_addr, - int prot) +static int iommu_map_page(struct protection_domain *dom, + unsigned long bus_addr, + unsigned long phys_addr, + int prot) { u64 __pte, *pte, *page; @@ -388,6 +514,28 @@ static int iommu_map(struct protection_domain *dom, return 0; } +static void iommu_unmap_page(struct protection_domain *dom, + unsigned long bus_addr) +{ + u64 *pte; + + pte = &dom->pt_root[IOMMU_PTE_L2_INDEX(bus_addr)]; + + if (!IOMMU_PTE_PRESENT(*pte)) + return; + + pte = IOMMU_PTE_PAGE(*pte); + pte = &pte[IOMMU_PTE_L1_INDEX(bus_addr)]; + + if (!IOMMU_PTE_PRESENT(*pte)) + return; + + pte = IOMMU_PTE_PAGE(*pte); + pte = &pte[IOMMU_PTE_L1_INDEX(bus_addr)]; + + *pte = 0; +} + /* * This function checks if a specific unity mapping entry is needed for * this specific IOMMU. @@ -440,7 +588,7 @@ static int dma_ops_unity_map(struct dma_ops_domain *dma_dom, for (addr = e->address_start; addr < e->address_end; addr += PAGE_SIZE) { - ret = iommu_map(&dma_dom->domain, addr, addr, e->prot); + ret = iommu_map_page(&dma_dom->domain, addr, addr, e->prot); if (ret) return ret; /* @@ -571,6 +719,16 @@ static u16 domain_id_alloc(void) return id; } +static void domain_id_free(int id) +{ + unsigned long flags; + + write_lock_irqsave(&amd_iommu_devtable_lock, flags); + if (id > 0 && id < MAX_DOMAIN_ID) + __clear_bit(id, amd_iommu_pd_alloc_bitmap); + write_unlock_irqrestore(&amd_iommu_devtable_lock, flags); +} + /* * Used to reserve address ranges in the aperture (e.g. for exclusion * ranges. @@ -587,12 +745,12 @@ static void dma_ops_reserve_addresses(struct dma_ops_domain *dom, iommu_area_reserve(dom->bitmap, start_page, pages); } -static void dma_ops_free_pagetable(struct dma_ops_domain *dma_dom) +static void free_pagetable(struct protection_domain *domain) { int i, j; u64 *p1, *p2, *p3; - p1 = dma_dom->domain.pt_root; + p1 = domain->pt_root; if (!p1) return; @@ -613,6 +771,8 @@ static void dma_ops_free_pagetable(struct dma_ops_domain *dma_dom) } free_page((unsigned long)p1); + + domain->pt_root = NULL; } /* @@ -624,7 +784,7 @@ static void dma_ops_domain_free(struct dma_ops_domain *dom) if (!dom) return; - dma_ops_free_pagetable(dom); + free_pagetable(&dom->domain); kfree(dom->pte_pages); @@ -663,6 +823,7 @@ static struct dma_ops_domain *dma_ops_domain_alloc(struct amd_iommu *iommu, goto free_dma_dom; dma_dom->domain.mode = PAGE_MODE_3_LEVEL; dma_dom->domain.pt_root = (void *)get_zeroed_page(GFP_KERNEL); + dma_dom->domain.flags = PD_DMA_OPS_MASK; dma_dom->domain.priv = dma_dom; if (!dma_dom->domain.pt_root) goto free_dma_dom; @@ -725,6 +886,15 @@ free_dma_dom: } /* + * little helper function to check whether a given protection domain is a + * dma_ops domain + */ +static bool dma_ops_domain(struct protection_domain *domain) +{ + return domain->flags & PD_DMA_OPS_MASK; +} + +/* * Find out the protection domain structure for a given PCI device. This * will give us the pointer to the page table root for example. */ @@ -744,14 +914,15 @@ static struct protection_domain *domain_for_device(u16 devid) * If a device is not yet associated with a domain, this function does * assigns it visible for the hardware */ -static void set_device_domain(struct amd_iommu *iommu, - struct protection_domain *domain, - u16 devid) +static void attach_device(struct amd_iommu *iommu, + struct protection_domain *domain, + u16 devid) { unsigned long flags; - u64 pte_root = virt_to_phys(domain->pt_root); + domain->dev_cnt += 1; + pte_root |= (domain->mode & DEV_ENTRY_MODE_MASK) << DEV_ENTRY_MODE_SHIFT; pte_root |= IOMMU_PTE_IR | IOMMU_PTE_IW | IOMMU_PTE_P | IOMMU_PTE_TV; @@ -767,6 +938,116 @@ static void set_device_domain(struct amd_iommu *iommu, iommu_queue_inv_dev_entry(iommu, devid); } +/* + * Removes a device from a protection domain (unlocked) + */ +static void __detach_device(struct protection_domain *domain, u16 devid) +{ + + /* lock domain */ + spin_lock(&domain->lock); + + /* remove domain from the lookup table */ + amd_iommu_pd_table[devid] = NULL; + + /* remove entry from the device table seen by the hardware */ + amd_iommu_dev_table[devid].data[0] = IOMMU_PTE_P | IOMMU_PTE_TV; + amd_iommu_dev_table[devid].data[1] = 0; + amd_iommu_dev_table[devid].data[2] = 0; + + /* decrease reference counter */ + domain->dev_cnt -= 1; + + /* ready */ + spin_unlock(&domain->lock); +} + +/* + * Removes a device from a protection domain (with devtable_lock held) + */ +static void detach_device(struct protection_domain *domain, u16 devid) +{ + unsigned long flags; + + /* lock device table */ + write_lock_irqsave(&amd_iommu_devtable_lock, flags); + __detach_device(domain, devid); + write_unlock_irqrestore(&amd_iommu_devtable_lock, flags); +} + +static int device_change_notifier(struct notifier_block *nb, + unsigned long action, void *data) +{ + struct device *dev = data; + struct pci_dev *pdev = to_pci_dev(dev); + u16 devid = calc_devid(pdev->bus->number, pdev->devfn); + struct protection_domain *domain; + struct dma_ops_domain *dma_domain; + struct amd_iommu *iommu; + int order = amd_iommu_aperture_order; + unsigned long flags; + + if (devid > amd_iommu_last_bdf) + goto out; + + devid = amd_iommu_alias_table[devid]; + + iommu = amd_iommu_rlookup_table[devid]; + if (iommu == NULL) + goto out; + + domain = domain_for_device(devid); + + if (domain && !dma_ops_domain(domain)) + WARN_ONCE(1, "AMD IOMMU WARNING: device %s already bound " + "to a non-dma-ops domain\n", dev_name(dev)); + + switch (action) { + case BUS_NOTIFY_BOUND_DRIVER: + if (domain) + goto out; + dma_domain = find_protection_domain(devid); + if (!dma_domain) + dma_domain = iommu->default_dom; + attach_device(iommu, &dma_domain->domain, devid); + printk(KERN_INFO "AMD IOMMU: Using protection domain %d for " + "device %s\n", dma_domain->domain.id, dev_name(dev)); + break; + case BUS_NOTIFY_UNBIND_DRIVER: + if (!domain) + goto out; + detach_device(domain, devid); + break; + case BUS_NOTIFY_ADD_DEVICE: + /* allocate a protection domain if a device is added */ + dma_domain = find_protection_domain(devid); + if (dma_domain) + goto out; + dma_domain = dma_ops_domain_alloc(iommu, order); + if (!dma_domain) + goto out; + dma_domain->target_dev = devid; + + spin_lock_irqsave(&iommu_pd_list_lock, flags); + list_add_tail(&dma_domain->list, &iommu_pd_list); + spin_unlock_irqrestore(&iommu_pd_list_lock, flags); + + break; + default: + goto out; + } + + iommu_queue_inv_dev_entry(iommu, devid); + iommu_completion_wait(iommu); + +out: + return 0; +} + +struct notifier_block device_nb = { + .notifier_call = device_change_notifier, +}; + /***************************************************************************** * * The next functions belong to the dma_ops mapping/unmapping code. @@ -802,7 +1083,6 @@ static struct dma_ops_domain *find_protection_domain(u16 devid) list_for_each_entry(entry, &iommu_pd_list, list) { if (entry->target_dev == devid) { ret = entry; - list_del(&ret->list); break; } } @@ -853,14 +1133,13 @@ static int get_device_resources(struct device *dev, if (!dma_dom) dma_dom = (*iommu)->default_dom; *domain = &dma_dom->domain; - set_device_domain(*iommu, *domain, *bdf); + attach_device(*iommu, *domain, *bdf); printk(KERN_INFO "AMD IOMMU: Using protection domain %d for " - "device ", (*domain)->id); - print_devid(_bdf, 1); + "device %s\n", (*domain)->id, dev_name(dev)); } if (domain_for_device(_bdf) == NULL) - set_device_domain(*iommu, *domain, _bdf); + attach_device(*iommu, *domain, _bdf); return 1; } @@ -946,6 +1225,11 @@ static dma_addr_t __map_single(struct device *dev, pages = iommu_num_pages(paddr, size, PAGE_SIZE); paddr &= PAGE_MASK; + INC_STATS_COUNTER(total_map_requests); + + if (pages > 1) + INC_STATS_COUNTER(cross_page); + if (align) align_mask = (1UL << get_order(size)) - 1; @@ -962,6 +1246,8 @@ static dma_addr_t __map_single(struct device *dev, } address += offset; + ADD_STATS_COUNTER(alloced_io_mem, size); + if (unlikely(dma_dom->need_flush && !amd_iommu_unmap_flush)) { iommu_flush_tlb(iommu, dma_dom->domain.id); dma_dom->need_flush = false; @@ -998,6 +1284,8 @@ static void __unmap_single(struct amd_iommu *iommu, start += PAGE_SIZE; } + SUB_STATS_COUNTER(alloced_io_mem, size); + dma_ops_free_addresses(dma_dom, dma_addr, pages); if (amd_iommu_unmap_flush || dma_dom->need_flush) { @@ -1019,6 +1307,8 @@ static dma_addr_t map_single(struct device *dev, phys_addr_t paddr, dma_addr_t addr; u64 dma_mask; + INC_STATS_COUNTER(cnt_map_single); + if (!check_device(dev)) return bad_dma_address; @@ -1030,6 +1320,9 @@ static dma_addr_t map_single(struct device *dev, phys_addr_t paddr, /* device not handled by any AMD IOMMU */ return (dma_addr_t)paddr; + if (!dma_ops_domain(domain)) + return bad_dma_address; + spin_lock_irqsave(&domain->lock, flags); addr = __map_single(dev, iommu, domain->priv, paddr, size, dir, false, dma_mask); @@ -1055,11 +1348,16 @@ static void unmap_single(struct device *dev, dma_addr_t dma_addr, struct protection_domain *domain; u16 devid; + INC_STATS_COUNTER(cnt_unmap_single); + if (!check_device(dev) || !get_device_resources(dev, &iommu, &domain, &devid)) /* device not handled by any AMD IOMMU */ return; + if (!dma_ops_domain(domain)) + return; + spin_lock_irqsave(&domain->lock, flags); __unmap_single(iommu, domain->priv, dma_addr, size, dir); @@ -1104,6 +1402,8 @@ static int map_sg(struct device *dev, struct scatterlist *sglist, int mapped_elems = 0; u64 dma_mask; + INC_STATS_COUNTER(cnt_map_sg); + if (!check_device(dev)) return 0; @@ -1114,6 +1414,9 @@ static int map_sg(struct device *dev, struct scatterlist *sglist, if (!iommu || !domain) return map_sg_no_iommu(dev, sglist, nelems, dir); + if (!dma_ops_domain(domain)) + return 0; + spin_lock_irqsave(&domain->lock, flags); for_each_sg(sglist, s, nelems, i) { @@ -1163,10 +1466,15 @@ static void unmap_sg(struct device *dev, struct scatterlist *sglist, u16 devid; int i; + INC_STATS_COUNTER(cnt_unmap_sg); + if (!check_device(dev) || !get_device_resources(dev, &iommu, &domain, &devid)) return; + if (!dma_ops_domain(domain)) + return; + spin_lock_irqsave(&domain->lock, flags); for_each_sg(sglist, s, nelems, i) { @@ -1194,6 +1502,8 @@ static void *alloc_coherent(struct device *dev, size_t size, phys_addr_t paddr; u64 dma_mask = dev->coherent_dma_mask; + INC_STATS_COUNTER(cnt_alloc_coherent); + if (!check_device(dev)) return NULL; @@ -1212,6 +1522,9 @@ static void *alloc_coherent(struct device *dev, size_t size, return virt_addr; } + if (!dma_ops_domain(domain)) + goto out_free; + if (!dma_mask) dma_mask = *dev->dma_mask; @@ -1220,18 +1533,20 @@ static void *alloc_coherent(struct device *dev, size_t size, *dma_addr = __map_single(dev, iommu, domain->priv, paddr, size, DMA_BIDIRECTIONAL, true, dma_mask); - if (*dma_addr == bad_dma_address) { - free_pages((unsigned long)virt_addr, get_order(size)); - virt_addr = NULL; - goto out; - } + if (*dma_addr == bad_dma_address) + goto out_free; iommu_completion_wait(iommu); -out: spin_unlock_irqrestore(&domain->lock, flags); return virt_addr; + +out_free: + + free_pages((unsigned long)virt_addr, get_order(size)); + + return NULL; } /* @@ -1245,6 +1560,8 @@ static void free_coherent(struct device *dev, size_t size, struct protection_domain *domain; u16 devid; + INC_STATS_COUNTER(cnt_free_coherent); + if (!check_device(dev)) return; @@ -1253,6 +1570,9 @@ static void free_coherent(struct device *dev, size_t size, if (!iommu || !domain) goto free_mem; + if (!dma_ops_domain(domain)) + goto free_mem; + spin_lock_irqsave(&domain->lock, flags); __unmap_single(iommu, domain->priv, dma_addr, size, DMA_BIDIRECTIONAL); @@ -1296,7 +1616,7 @@ static int amd_iommu_dma_supported(struct device *dev, u64 mask) * we don't need to preallocate the protection domains anymore. * For now we have to. */ -void prealloc_protection_domains(void) +static void prealloc_protection_domains(void) { struct pci_dev *dev = NULL; struct dma_ops_domain *dma_dom; @@ -1305,7 +1625,7 @@ void prealloc_protection_domains(void) u16 devid; while ((dev = pci_get_device(PCI_ANY_ID, PCI_ANY_ID, dev)) != NULL) { - devid = (dev->bus->number << 8) | dev->devfn; + devid = calc_devid(dev->bus->number, dev->devfn); if (devid > amd_iommu_last_bdf) continue; devid = amd_iommu_alias_table[devid]; @@ -1352,6 +1672,7 @@ int __init amd_iommu_init_dma_ops(void) iommu->default_dom = dma_ops_domain_alloc(iommu, order); if (iommu->default_dom == NULL) return -ENOMEM; + iommu->default_dom->domain.flags |= PD_DEFAULT_MASK; ret = iommu_init_unity_mappings(iommu); if (ret) goto free_domains; @@ -1375,6 +1696,12 @@ int __init amd_iommu_init_dma_ops(void) /* Make the driver finally visible to the drivers */ dma_ops = &amd_iommu_dma_ops; + register_iommu(&amd_iommu_ops); + + bus_register_notifier(&pci_bus_type, &device_nb); + + amd_iommu_stats_init(); + return 0; free_domains: @@ -1386,3 +1713,224 @@ free_domains: return ret; } + +/***************************************************************************** + * + * The following functions belong to the exported interface of AMD IOMMU + * + * This interface allows access to lower level functions of the IOMMU + * like protection domain handling and assignement of devices to domains + * which is not possible with the dma_ops interface. + * + *****************************************************************************/ + +static void cleanup_domain(struct protection_domain *domain) +{ + unsigned long flags; + u16 devid; + + write_lock_irqsave(&amd_iommu_devtable_lock, flags); + + for (devid = 0; devid <= amd_iommu_last_bdf; ++devid) + if (amd_iommu_pd_table[devid] == domain) + __detach_device(domain, devid); + + write_unlock_irqrestore(&amd_iommu_devtable_lock, flags); +} + +static int amd_iommu_domain_init(struct iommu_domain *dom) +{ + struct protection_domain *domain; + + domain = kzalloc(sizeof(*domain), GFP_KERNEL); + if (!domain) + return -ENOMEM; + + spin_lock_init(&domain->lock); + domain->mode = PAGE_MODE_3_LEVEL; + domain->id = domain_id_alloc(); + if (!domain->id) + goto out_free; + domain->pt_root = (void *)get_zeroed_page(GFP_KERNEL); + if (!domain->pt_root) + goto out_free; + + dom->priv = domain; + + return 0; + +out_free: + kfree(domain); + + return -ENOMEM; +} + +static void amd_iommu_domain_destroy(struct iommu_domain *dom) +{ + struct protection_domain *domain = dom->priv; + + if (!domain) + return; + + if (domain->dev_cnt > 0) + cleanup_domain(domain); + + BUG_ON(domain->dev_cnt != 0); + + free_pagetable(domain); + + domain_id_free(domain->id); + + kfree(domain); + + dom->priv = NULL; +} + +static void amd_iommu_detach_device(struct iommu_domain *dom, + struct device *dev) +{ + struct protection_domain *domain = dom->priv; + struct amd_iommu *iommu; + struct pci_dev *pdev; + u16 devid; + + if (dev->bus != &pci_bus_type) + return; + + pdev = to_pci_dev(dev); + + devid = calc_devid(pdev->bus->number, pdev->devfn); + + if (devid > 0) + detach_device(domain, devid); + + iommu = amd_iommu_rlookup_table[devid]; + if (!iommu) + return; + + iommu_queue_inv_dev_entry(iommu, devid); + iommu_completion_wait(iommu); +} + +static int amd_iommu_attach_device(struct iommu_domain *dom, + struct device *dev) +{ + struct protection_domain *domain = dom->priv; + struct protection_domain *old_domain; + struct amd_iommu *iommu; + struct pci_dev *pdev; + u16 devid; + + if (dev->bus != &pci_bus_type) + return -EINVAL; + + pdev = to_pci_dev(dev); + + devid = calc_devid(pdev->bus->number, pdev->devfn); + + if (devid >= amd_iommu_last_bdf || + devid != amd_iommu_alias_table[devid]) + return -EINVAL; + + iommu = amd_iommu_rlookup_table[devid]; + if (!iommu) + return -EINVAL; + + old_domain = domain_for_device(devid); + if (old_domain) + return -EBUSY; + + attach_device(iommu, domain, devid); + + iommu_completion_wait(iommu); + + return 0; +} + +static int amd_iommu_map_range(struct iommu_domain *dom, + unsigned long iova, phys_addr_t paddr, + size_t size, int iommu_prot) +{ + struct protection_domain *domain = dom->priv; + unsigned long i, npages = iommu_num_pages(paddr, size, PAGE_SIZE); + int prot = 0; + int ret; + + if (iommu_prot & IOMMU_READ) + prot |= IOMMU_PROT_IR; + if (iommu_prot & IOMMU_WRITE) + prot |= IOMMU_PROT_IW; + + iova &= PAGE_MASK; + paddr &= PAGE_MASK; + + for (i = 0; i < npages; ++i) { + ret = iommu_map_page(domain, iova, paddr, prot); + if (ret) + return ret; + + iova += PAGE_SIZE; + paddr += PAGE_SIZE; + } + + return 0; +} + +static void amd_iommu_unmap_range(struct iommu_domain *dom, + unsigned long iova, size_t size) +{ + + struct protection_domain *domain = dom->priv; + unsigned long i, npages = iommu_num_pages(iova, size, PAGE_SIZE); + + iova &= PAGE_MASK; + + for (i = 0; i < npages; ++i) { + iommu_unmap_page(domain, iova); + iova += PAGE_SIZE; + } + + iommu_flush_domain(domain->id); +} + +static phys_addr_t amd_iommu_iova_to_phys(struct iommu_domain *dom, + unsigned long iova) +{ + struct protection_domain *domain = dom->priv; + unsigned long offset = iova & ~PAGE_MASK; + phys_addr_t paddr; + u64 *pte; + + pte = &domain->pt_root[IOMMU_PTE_L2_INDEX(iova)]; + + if (!IOMMU_PTE_PRESENT(*pte)) + return 0; + + pte = IOMMU_PTE_PAGE(*pte); + pte = &pte[IOMMU_PTE_L1_INDEX(iova)]; + + if (!IOMMU_PTE_PRESENT(*pte)) + return 0; + + pte = IOMMU_PTE_PAGE(*pte); + pte = &pte[IOMMU_PTE_L0_INDEX(iova)]; + + if (!IOMMU_PTE_PRESENT(*pte)) + return 0; + + paddr = *pte & IOMMU_PAGE_MASK; + paddr |= offset; + + return paddr; +} + +static struct iommu_ops amd_iommu_ops = { + .domain_init = amd_iommu_domain_init, + .domain_destroy = amd_iommu_domain_destroy, + .attach_dev = amd_iommu_attach_device, + .detach_dev = amd_iommu_detach_device, + .map = amd_iommu_map_range, + .unmap = amd_iommu_unmap_range, + .iova_to_phys = amd_iommu_iova_to_phys, +}; + diff --git a/arch/x86/kernel/amd_iommu_init.c b/arch/x86/kernel/amd_iommu_init.c index c625800c55c..42c33cebf00 100644 --- a/arch/x86/kernel/amd_iommu_init.c +++ b/arch/x86/kernel/amd_iommu_init.c @@ -122,7 +122,8 @@ u16 amd_iommu_last_bdf; /* largest PCI device id we have LIST_HEAD(amd_iommu_unity_map); /* a list of required unity mappings we find in ACPI */ unsigned amd_iommu_aperture_order = 26; /* size of aperture in power of 2 */ -int amd_iommu_isolate = 1; /* if 1, device isolation is enabled */ +bool amd_iommu_isolate = true; /* if true, device isolation is + enabled */ bool amd_iommu_unmap_flush; /* if true, flush on every unmap */ LIST_HEAD(amd_iommu_list); /* list of all AMD IOMMUs in the @@ -243,20 +244,16 @@ static void __init iommu_feature_disable(struct amd_iommu *iommu, u8 bit) } /* Function to enable the hardware */ -void __init iommu_enable(struct amd_iommu *iommu) +static void __init iommu_enable(struct amd_iommu *iommu) { - printk(KERN_INFO "AMD IOMMU: Enabling IOMMU " - "at %02x:%02x.%x cap 0x%hx\n", - iommu->dev->bus->number, - PCI_SLOT(iommu->dev->devfn), - PCI_FUNC(iommu->dev->devfn), - iommu->cap_ptr); + printk(KERN_INFO "AMD IOMMU: Enabling IOMMU at %s cap 0x%hx\n", + dev_name(&iommu->dev->dev), iommu->cap_ptr); iommu_feature_enable(iommu, CONTROL_IOMMU_EN); } /* Function to enable IOMMU event logging and event interrupts */ -void __init iommu_enable_event_logging(struct amd_iommu *iommu) +static void __init iommu_enable_event_logging(struct amd_iommu *iommu) { iommu_feature_enable(iommu, CONTROL_EVT_LOG_EN); iommu_feature_enable(iommu, CONTROL_EVT_INT_EN); @@ -1218,9 +1215,9 @@ static int __init parse_amd_iommu_options(char *str) { for (; *str; ++str) { if (strncmp(str, "isolate", 7) == 0) - amd_iommu_isolate = 1; + amd_iommu_isolate = true; if (strncmp(str, "share", 5) == 0) - amd_iommu_isolate = 0; + amd_iommu_isolate = false; if (strncmp(str, "fullflush", 9) == 0) amd_iommu_unmap_flush = true; } diff --git a/arch/x86/kernel/apic.c b/arch/x86/kernel/apic.c index b5229affb95..b13d3c4dbd4 100644 --- a/arch/x86/kernel/apic.c +++ b/arch/x86/kernel/apic.c @@ -98,8 +98,8 @@ __setup("apicpmtimer", setup_apicpmtimer); #ifdef HAVE_X2APIC int x2apic; /* x2apic enabled before OS handover */ -int x2apic_preenabled; -int disable_x2apic; +static int x2apic_preenabled; +static int disable_x2apic; static __init int setup_nox2apic(char *str) { disable_x2apic = 1; @@ -119,8 +119,6 @@ EXPORT_SYMBOL_GPL(local_apic_timer_c2_ok); int first_system_vector = 0xfe; -char system_vectors[NR_VECTORS] = { [0 ... NR_VECTORS-1] = SYS_VECTOR_FREE}; - /* * Debug level, exported for io_apic.c */ @@ -142,7 +140,7 @@ static int lapic_next_event(unsigned long delta, struct clock_event_device *evt); static void lapic_timer_setup(enum clock_event_mode mode, struct clock_event_device *evt); -static void lapic_timer_broadcast(cpumask_t mask); +static void lapic_timer_broadcast(const struct cpumask *mask); static void apic_pm_activate(void); /* @@ -228,7 +226,7 @@ void xapic_icr_write(u32 low, u32 id) apic_write(APIC_ICR, low); } -u64 xapic_icr_read(void) +static u64 xapic_icr_read(void) { u32 icr1, icr2; @@ -268,7 +266,7 @@ void x2apic_icr_write(u32 low, u32 id) wrmsrl(APIC_BASE_MSR + (APIC_ICR >> 4), ((__u64) id) << 32 | low); } -u64 x2apic_icr_read(void) +static u64 x2apic_icr_read(void) { unsigned long val; @@ -455,7 +453,7 @@ static void lapic_timer_setup(enum clock_event_mode mode, /* * Local APIC timer broadcast function */ -static void lapic_timer_broadcast(cpumask_t mask) +static void lapic_timer_broadcast(const struct cpumask *mask) { #ifdef CONFIG_SMP send_IPI_mask(mask, LOCAL_TIMER_VECTOR); @@ -471,7 +469,7 @@ static void __cpuinit setup_APIC_timer(void) struct clock_event_device *levt = &__get_cpu_var(lapic_events); memcpy(levt, &lapic_clockevent, sizeof(*levt)); - levt->cpumask = cpumask_of_cpu(smp_processor_id()); + levt->cpumask = cpumask_of(smp_processor_id()); clockevents_register_device(levt); } @@ -1807,28 +1805,32 @@ void disconnect_bsp_APIC(int virt_wire_setup) void __cpuinit generic_processor_info(int apicid, int version) { int cpu; - cpumask_t tmp_map; /* * Validate version */ if (version == 0x0) { pr_warning("BIOS bug, APIC version is 0 for CPU#%d! " - "fixing up to 0x10. (tell your hw vendor)\n", - version); + "fixing up to 0x10. (tell your hw vendor)\n", + version); version = 0x10; } apic_version[apicid] = version; - if (num_processors >= NR_CPUS) { - pr_warning("WARNING: NR_CPUS limit of %i reached." - " Processor ignored.\n", NR_CPUS); + if (num_processors >= nr_cpu_ids) { + int max = nr_cpu_ids; + int thiscpu = max + disabled_cpus; + + pr_warning( + "ACPI: NR_CPUS/possible_cpus limit of %i reached." + " Processor %d/0x%x ignored.\n", max, thiscpu, apicid); + + disabled_cpus++; return; } num_processors++; - cpus_complement(tmp_map, cpu_present_map); - cpu = first_cpu(tmp_map); + cpu = cpumask_next_zero(-1, cpu_present_mask); physid_set(apicid, phys_cpu_present_map); if (apicid == boot_cpu_physical_apicid) { @@ -1878,8 +1880,8 @@ void __cpuinit generic_processor_info(int apicid, int version) } #endif - cpu_set(cpu, cpu_possible_map); - cpu_set(cpu, cpu_present_map); + set_cpu_possible(cpu, true); + set_cpu_present(cpu, true); } #ifdef CONFIG_X86_64 @@ -2081,7 +2083,7 @@ __cpuinit int apic_is_clustered_box(void) bios_cpu_apicid = early_per_cpu_ptr(x86_bios_cpu_apicid); bitmap_zero(clustermap, NUM_APIC_CLUSTERS); - for (i = 0; i < NR_CPUS; i++) { + for (i = 0; i < nr_cpu_ids; i++) { /* are we being called early in kernel startup? */ if (bios_cpu_apicid) { id = bios_cpu_apicid[i]; diff --git a/arch/x86/kernel/bios_uv.c b/arch/x86/kernel/bios_uv.c index 2a0a2a3cac2..f63882728d9 100644 --- a/arch/x86/kernel/bios_uv.c +++ b/arch/x86/kernel/bios_uv.c @@ -25,7 +25,7 @@ #include <asm/uv/bios.h> #include <asm/uv/uv_hub.h> -struct uv_systab uv_systab; +static struct uv_systab uv_systab; s64 uv_bios_call(enum uv_bios_cmd which, u64 a1, u64 a2, u64 a3, u64 a4, u64 a5) { diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 42e0853030c..3f95a40f718 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -355,7 +355,7 @@ void __cpuinit detect_ht(struct cpuinfo_x86 *c) printk(KERN_INFO "CPU: Hyper-Threading is disabled\n"); } else if (smp_num_siblings > 1) { - if (smp_num_siblings > NR_CPUS) { + if (smp_num_siblings > nr_cpu_ids) { printk(KERN_WARNING "CPU: Unsupported number of siblings %d", smp_num_siblings); smp_num_siblings = 1; diff --git a/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c b/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c index 88ea02dcb62..28102ad1a36 100644 --- a/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c +++ b/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c @@ -517,6 +517,17 @@ acpi_cpufreq_guess_freq(struct acpi_cpufreq_data *data, unsigned int cpu) } } +static void free_acpi_perf_data(void) +{ + unsigned int i; + + /* Freeing a NULL pointer is OK, and alloc_percpu zeroes. */ + for_each_possible_cpu(i) + free_cpumask_var(per_cpu_ptr(acpi_perf_data, i) + ->shared_cpu_map); + free_percpu(acpi_perf_data); +} + /* * acpi_cpufreq_early_init - initialize ACPI P-States library * @@ -527,6 +538,7 @@ acpi_cpufreq_guess_freq(struct acpi_cpufreq_data *data, unsigned int cpu) */ static int __init acpi_cpufreq_early_init(void) { + unsigned int i; dprintk("acpi_cpufreq_early_init\n"); acpi_perf_data = alloc_percpu(struct acpi_processor_performance); @@ -534,6 +546,16 @@ static int __init acpi_cpufreq_early_init(void) dprintk("Memory allocation error for acpi_perf_data.\n"); return -ENOMEM; } + for_each_possible_cpu(i) { + if (!alloc_cpumask_var_node( + &per_cpu_ptr(acpi_perf_data, i)->shared_cpu_map, + GFP_KERNEL, cpu_to_node(i))) { + + /* Freeing a NULL pointer is OK: alloc_percpu zeroes. */ + free_acpi_perf_data(); + return -ENOMEM; + } + } /* Do initialization in ACPI core */ acpi_processor_preregister_performance(acpi_perf_data); @@ -604,9 +626,9 @@ static int acpi_cpufreq_cpu_init(struct cpufreq_policy *policy) */ if (policy->shared_type == CPUFREQ_SHARED_TYPE_ALL || policy->shared_type == CPUFREQ_SHARED_TYPE_ANY) { - policy->cpus = perf->shared_cpu_map; + cpumask_copy(&policy->cpus, perf->shared_cpu_map); } - policy->related_cpus = perf->shared_cpu_map; + cpumask_copy(&policy->related_cpus, perf->shared_cpu_map); #ifdef CONFIG_SMP dmi_check_system(sw_any_bug_dmi_table); @@ -795,7 +817,7 @@ static int __init acpi_cpufreq_init(void) ret = cpufreq_register_driver(&acpi_cpufreq_driver); if (ret) - free_percpu(acpi_perf_data); + free_acpi_perf_data(); return ret; } diff --git a/arch/x86/kernel/cpu/cpufreq/p4-clockmod.c b/arch/x86/kernel/cpu/cpufreq/p4-clockmod.c index b8e05ee4f73..beea4466b06 100644 --- a/arch/x86/kernel/cpu/cpufreq/p4-clockmod.c +++ b/arch/x86/kernel/cpu/cpufreq/p4-clockmod.c @@ -160,6 +160,7 @@ static unsigned int cpufreq_p4_get_frequency(struct cpuinfo_x86 *c) switch (c->x86_model) { case 0x0E: /* Core */ case 0x0F: /* Core Duo */ + case 0x16: /* Celeron Core */ p4clockmod_driver.flags |= CPUFREQ_CONST_LOOPS; return speedstep_get_processor_frequency(SPEEDSTEP_PROCESSOR_PCORE); case 0x0D: /* Pentium M (Dothan) */ @@ -171,7 +172,9 @@ static unsigned int cpufreq_p4_get_frequency(struct cpuinfo_x86 *c) } if (c->x86 != 0xF) { - printk(KERN_WARNING PFX "Unknown p4-clockmod-capable CPU. Please send an e-mail to <cpufreq@vger.kernel.org>\n"); + if (!cpu_has(c, X86_FEATURE_EST)) + printk(KERN_WARNING PFX "Unknown p4-clockmod-capable CPU. " + "Please send an e-mail to <cpufreq@vger.kernel.org>\n"); return 0; } @@ -274,6 +277,7 @@ static struct cpufreq_driver p4clockmod_driver = { .name = "p4-clockmod", .owner = THIS_MODULE, .attr = p4clockmod_attr, + .hide_interface = 1, }; diff --git a/arch/x86/kernel/cpu/cpufreq/powernow-k7.c b/arch/x86/kernel/cpu/cpufreq/powernow-k7.c index 7c7d56b4313..1b446d79a8f 100644 --- a/arch/x86/kernel/cpu/cpufreq/powernow-k7.c +++ b/arch/x86/kernel/cpu/cpufreq/powernow-k7.c @@ -310,6 +310,12 @@ static int powernow_acpi_init(void) goto err0; } + if (!alloc_cpumask_var(&acpi_processor_perf->shared_cpu_map, + GFP_KERNEL)) { + retval = -ENOMEM; + goto err05; + } + if (acpi_processor_register_performance(acpi_processor_perf, 0)) { retval = -EIO; goto err1; @@ -412,6 +418,8 @@ static int powernow_acpi_init(void) err2: acpi_processor_unregister_performance(acpi_processor_perf, 0); err1: + free_cpumask_var(acpi_processor_perf->shared_cpu_map); +err05: kfree(acpi_processor_perf); err0: printk(KERN_WARNING PFX "ACPI perflib can not be used in this platform\n"); @@ -652,6 +660,7 @@ static int powernow_cpu_exit (struct cpufreq_policy *policy) { #ifdef CONFIG_X86_POWERNOW_K7_ACPI if (acpi_processor_perf) { acpi_processor_unregister_performance(acpi_processor_perf, 0); + free_cpumask_var(acpi_processor_perf->shared_cpu_map); kfree(acpi_processor_perf); } #endif diff --git a/arch/x86/kernel/cpu/cpufreq/powernow-k8.c b/arch/x86/kernel/cpu/cpufreq/powernow-k8.c index 7f05f44b97e..c3c9adbaa26 100644 --- a/arch/x86/kernel/cpu/cpufreq/powernow-k8.c +++ b/arch/x86/kernel/cpu/cpufreq/powernow-k8.c @@ -766,7 +766,7 @@ static void powernow_k8_acpi_pst_values(struct powernow_k8_data *data, unsigned static int powernow_k8_cpu_init_acpi(struct powernow_k8_data *data) { struct cpufreq_frequency_table *powernow_table; - int ret_val; + int ret_val = -ENODEV; if (acpi_processor_register_performance(&data->acpi_data, data->cpu)) { dprintk("register performance failed: bad ACPI data\n"); @@ -815,6 +815,13 @@ static int powernow_k8_cpu_init_acpi(struct powernow_k8_data *data) /* notify BIOS that we exist */ acpi_processor_notify_smm(THIS_MODULE); + if (!alloc_cpumask_var(&data->acpi_data.shared_cpu_map, GFP_KERNEL)) { + printk(KERN_ERR PFX + "unable to alloc powernow_k8_data cpumask\n"); + ret_val = -ENOMEM; + goto err_out_mem; + } + return 0; err_out_mem: @@ -826,7 +833,7 @@ err_out: /* data->acpi_data.state_count informs us at ->exit() whether ACPI was used */ data->acpi_data.state_count = 0; - return -ENODEV; + return ret_val; } static int fill_powernow_table_pstate(struct powernow_k8_data *data, struct cpufreq_frequency_table *powernow_table) @@ -929,6 +936,7 @@ static void powernow_k8_cpu_exit_acpi(struct powernow_k8_data *data) { if (data->acpi_data.state_count) acpi_processor_unregister_performance(&data->acpi_data, data->cpu); + free_cpumask_var(data->acpi_data.shared_cpu_map); } #else @@ -1134,7 +1142,8 @@ static int __cpuinit powernowk8_cpu_init(struct cpufreq_policy *pol) data->cpu = pol->cpu; data->currpstate = HW_PSTATE_INVALID; - if (powernow_k8_cpu_init_acpi(data)) { + rc = powernow_k8_cpu_init_acpi(data); + if (rc) { /* * Use the PSB BIOS structure. This is only availabe on * an UP version, and is deprecated by AMD. @@ -1152,20 +1161,17 @@ static int __cpuinit powernowk8_cpu_init(struct cpufreq_policy *pol) "ACPI maintainers and complain to your BIOS " "vendor.\n"); #endif - kfree(data); - return -ENODEV; + goto err_out; } if (pol->cpu != 0) { printk(KERN_ERR FW_BUG PFX "No ACPI _PSS objects for " "CPU other than CPU0. Complain to your BIOS " "vendor.\n"); - kfree(data); - return -ENODEV; + goto err_out; } rc = find_psb_table(data); if (rc) { - kfree(data); - return -ENODEV; + goto err_out; } } diff --git a/arch/x86/kernel/cpu/cpufreq/speedstep-centrino.c b/arch/x86/kernel/cpu/cpufreq/speedstep-centrino.c index 3b5f06423e7..f0ea6fa2f53 100644 --- a/arch/x86/kernel/cpu/cpufreq/speedstep-centrino.c +++ b/arch/x86/kernel/cpu/cpufreq/speedstep-centrino.c @@ -459,9 +459,7 @@ static int centrino_verify (struct cpufreq_policy *policy) * Sets a new CPUFreq policy. */ struct allmasks { - cpumask_t online_policy_cpus; cpumask_t saved_mask; - cpumask_t set_mask; cpumask_t covered_cpus; }; @@ -475,9 +473,7 @@ static int centrino_target (struct cpufreq_policy *policy, int retval = 0; unsigned int j, k, first_cpu, tmp; CPUMASK_ALLOC(allmasks); - CPUMASK_PTR(online_policy_cpus, allmasks); CPUMASK_PTR(saved_mask, allmasks); - CPUMASK_PTR(set_mask, allmasks); CPUMASK_PTR(covered_cpus, allmasks); if (unlikely(allmasks == NULL)) @@ -497,30 +493,28 @@ static int centrino_target (struct cpufreq_policy *policy, goto out; } -#ifdef CONFIG_HOTPLUG_CPU - /* cpufreq holds the hotplug lock, so we are safe from here on */ - cpus_and(*online_policy_cpus, cpu_online_map, policy->cpus); -#else - *online_policy_cpus = policy->cpus; -#endif - *saved_mask = current->cpus_allowed; first_cpu = 1; cpus_clear(*covered_cpus); - for_each_cpu_mask_nr(j, *online_policy_cpus) { + for_each_cpu_mask_nr(j, policy->cpus) { + const cpumask_t *mask; + + /* cpufreq holds the hotplug lock, so we are safe here */ + if (!cpu_online(j)) + continue; + /* * Support for SMP systems. * Make sure we are running on CPU that wants to change freq */ - cpus_clear(*set_mask); if (policy->shared_type == CPUFREQ_SHARED_TYPE_ANY) - cpus_or(*set_mask, *set_mask, *online_policy_cpus); + mask = &policy->cpus; else - cpu_set(j, *set_mask); + mask = &cpumask_of_cpu(j); - set_cpus_allowed_ptr(current, set_mask); + set_cpus_allowed_ptr(current, mask); preempt_disable(); - if (unlikely(!cpu_isset(smp_processor_id(), *set_mask))) { + if (unlikely(!cpu_isset(smp_processor_id(), *mask))) { dprintk("couldn't limit to CPUs in this domain\n"); retval = -EAGAIN; if (first_cpu) { @@ -548,7 +542,9 @@ static int centrino_target (struct cpufreq_policy *policy, dprintk("target=%dkHz old=%d new=%d msr=%04x\n", target_freq, freqs.old, freqs.new, msr); - for_each_cpu_mask_nr(k, *online_policy_cpus) { + for_each_cpu_mask_nr(k, policy->cpus) { + if (!cpu_online(k)) + continue; freqs.cpu = k; cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE); @@ -571,7 +567,9 @@ static int centrino_target (struct cpufreq_policy *policy, preempt_enable(); } - for_each_cpu_mask_nr(k, *online_policy_cpus) { + for_each_cpu_mask_nr(k, policy->cpus) { + if (!cpu_online(k)) + continue; freqs.cpu = k; cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE); } @@ -584,18 +582,17 @@ static int centrino_target (struct cpufreq_policy *policy, * Best effort undo.. */ - if (!cpus_empty(*covered_cpus)) - for_each_cpu_mask_nr(j, *covered_cpus) { - set_cpus_allowed_ptr(current, - &cpumask_of_cpu(j)); - wrmsr(MSR_IA32_PERF_CTL, oldmsr, h); - } + for_each_cpu_mask_nr(j, *covered_cpus) { + set_cpus_allowed_ptr(current, &cpumask_of_cpu(j)); + wrmsr(MSR_IA32_PERF_CTL, oldmsr, h); + } tmp = freqs.new; freqs.new = freqs.old; freqs.old = tmp; - for_each_cpu_mask_nr(j, *online_policy_cpus) { - freqs.cpu = j; + for_each_cpu_mask_nr(j, policy->cpus) { + if (!cpu_online(j)) + continue; cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE); cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE); } diff --git a/arch/x86/kernel/cpu/cpufreq/speedstep-lib.c b/arch/x86/kernel/cpu/cpufreq/speedstep-lib.c index 98d4fdb7dc0..cdac7d62369 100644 --- a/arch/x86/kernel/cpu/cpufreq/speedstep-lib.c +++ b/arch/x86/kernel/cpu/cpufreq/speedstep-lib.c @@ -139,6 +139,15 @@ static unsigned int pentium_core_get_frequency(void) case 3: fsb = 166667; break; + case 2: + fsb = 200000; + break; + case 0: + fsb = 266667; + break; + case 4: + fsb = 333333; + break; default: printk(KERN_ERR "PCORE - MSR_FSB_FREQ undefined value"); } diff --git a/arch/x86/kernel/cpu/intel_cacheinfo.c b/arch/x86/kernel/cpu/intel_cacheinfo.c index 68b5d8681cb..48533d77be7 100644 --- a/arch/x86/kernel/cpu/intel_cacheinfo.c +++ b/arch/x86/kernel/cpu/intel_cacheinfo.c @@ -534,31 +534,16 @@ static void __cpuinit free_cache_attributes(unsigned int cpu) per_cpu(cpuid4_info, cpu) = NULL; } -static int __cpuinit detect_cache_attributes(unsigned int cpu) +static void __cpuinit get_cpu_leaves(void *_retval) { - struct _cpuid4_info *this_leaf; - unsigned long j; - int retval; - cpumask_t oldmask; - - if (num_cache_leaves == 0) - return -ENOENT; - - per_cpu(cpuid4_info, cpu) = kzalloc( - sizeof(struct _cpuid4_info) * num_cache_leaves, GFP_KERNEL); - if (per_cpu(cpuid4_info, cpu) == NULL) - return -ENOMEM; - - oldmask = current->cpus_allowed; - retval = set_cpus_allowed_ptr(current, &cpumask_of_cpu(cpu)); - if (retval) - goto out; + int j, *retval = _retval, cpu = smp_processor_id(); /* Do cpuid and store the results */ for (j = 0; j < num_cache_leaves; j++) { + struct _cpuid4_info *this_leaf; this_leaf = CPUID4_INFO_IDX(cpu, j); - retval = cpuid4_cache_lookup(j, this_leaf); - if (unlikely(retval < 0)) { + *retval = cpuid4_cache_lookup(j, this_leaf); + if (unlikely(*retval < 0)) { int i; for (i = 0; i < j; i++) @@ -567,9 +552,21 @@ static int __cpuinit detect_cache_attributes(unsigned int cpu) } cache_shared_cpu_map_setup(cpu, j); } - set_cpus_allowed_ptr(current, &oldmask); +} + +static int __cpuinit detect_cache_attributes(unsigned int cpu) +{ + int retval; + + if (num_cache_leaves == 0) + return -ENOENT; + + per_cpu(cpuid4_info, cpu) = kzalloc( + sizeof(struct _cpuid4_info) * num_cache_leaves, GFP_KERNEL); + if (per_cpu(cpuid4_info, cpu) == NULL) + return -ENOMEM; -out: + smp_call_function_single(cpu, get_cpu_leaves, &retval, true); if (retval) { kfree(per_cpu(cpuid4_info, cpu)); per_cpu(cpuid4_info, cpu) = NULL; @@ -626,8 +623,8 @@ static ssize_t show_shared_cpu_map_func(struct _cpuid4_info *this_leaf, cpumask_t *mask = &this_leaf->shared_cpu_map; n = type? - cpulist_scnprintf(buf, len-2, *mask): - cpumask_scnprintf(buf, len-2, *mask); + cpulist_scnprintf(buf, len-2, mask) : + cpumask_scnprintf(buf, len-2, mask); buf[n++] = '\n'; buf[n] = '\0'; } diff --git a/arch/x86/kernel/cpu/mcheck/mce_amd_64.c b/arch/x86/kernel/cpu/mcheck/mce_amd_64.c index 748c8f9e7a0..a5a5e053037 100644 --- a/arch/x86/kernel/cpu/mcheck/mce_amd_64.c +++ b/arch/x86/kernel/cpu/mcheck/mce_amd_64.c @@ -83,34 +83,41 @@ static DEFINE_PER_CPU(unsigned char, bank_map); /* see which banks are on */ * CPU Initialization */ +struct thresh_restart { + struct threshold_block *b; + int reset; + u16 old_limit; +}; + /* must be called with correct cpu affinity */ -static void threshold_restart_bank(struct threshold_block *b, - int reset, u16 old_limit) +static long threshold_restart_bank(void *_tr) { + struct thresh_restart *tr = _tr; u32 mci_misc_hi, mci_misc_lo; - rdmsr(b->address, mci_misc_lo, mci_misc_hi); + rdmsr(tr->b->address, mci_misc_lo, mci_misc_hi); - if (b->threshold_limit < (mci_misc_hi & THRESHOLD_MAX)) - reset = 1; /* limit cannot be lower than err count */ + if (tr->b->threshold_limit < (mci_misc_hi & THRESHOLD_MAX)) + tr->reset = 1; /* limit cannot be lower than err count */ - if (reset) { /* reset err count and overflow bit */ + if (tr->reset) { /* reset err count and overflow bit */ mci_misc_hi = (mci_misc_hi & ~(MASK_ERR_COUNT_HI | MASK_OVERFLOW_HI)) | - (THRESHOLD_MAX - b->threshold_limit); - } else if (old_limit) { /* change limit w/o reset */ + (THRESHOLD_MAX - tr->b->threshold_limit); + } else if (tr->old_limit) { /* change limit w/o reset */ int new_count = (mci_misc_hi & THRESHOLD_MAX) + - (old_limit - b->threshold_limit); + (tr->old_limit - tr->b->threshold_limit); mci_misc_hi = (mci_misc_hi & ~MASK_ERR_COUNT_HI) | (new_count & THRESHOLD_MAX); } - b->interrupt_enable ? + tr->b->interrupt_enable ? (mci_misc_hi = (mci_misc_hi & ~MASK_INT_TYPE_HI) | INT_TYPE_APIC) : (mci_misc_hi &= ~MASK_INT_TYPE_HI); mci_misc_hi |= MASK_COUNT_EN_HI; - wrmsr(b->address, mci_misc_lo, mci_misc_hi); + wrmsr(tr->b->address, mci_misc_lo, mci_misc_hi); + return 0; } /* cpu init entry point, called from mce.c with preempt off */ @@ -120,6 +127,7 @@ void __cpuinit mce_amd_feature_init(struct cpuinfo_x86 *c) unsigned int cpu = smp_processor_id(); u8 lvt_off; u32 low = 0, high = 0, address = 0; + struct thresh_restart tr; for (bank = 0; bank < NR_BANKS; ++bank) { for (block = 0; block < NR_BLOCKS; ++block) { @@ -162,7 +170,10 @@ void __cpuinit mce_amd_feature_init(struct cpuinfo_x86 *c) wrmsr(address, low, high); threshold_defaults.address = address; - threshold_restart_bank(&threshold_defaults, 0, 0); + tr.b = &threshold_defaults; + tr.reset = 0; + tr.old_limit = 0; + threshold_restart_bank(&tr); } } } @@ -251,20 +262,6 @@ struct threshold_attr { ssize_t(*store) (struct threshold_block *, const char *, size_t count); }; -static void affinity_set(unsigned int cpu, cpumask_t *oldmask, - cpumask_t *newmask) -{ - *oldmask = current->cpus_allowed; - cpus_clear(*newmask); - cpu_set(cpu, *newmask); - set_cpus_allowed_ptr(current, newmask); -} - -static void affinity_restore(const cpumask_t *oldmask) -{ - set_cpus_allowed_ptr(current, oldmask); -} - #define SHOW_FIELDS(name) \ static ssize_t show_ ## name(struct threshold_block * b, char *buf) \ { \ @@ -277,15 +274,16 @@ static ssize_t store_interrupt_enable(struct threshold_block *b, const char *buf, size_t count) { char *end; - cpumask_t oldmask, newmask; + struct thresh_restart tr; unsigned long new = simple_strtoul(buf, &end, 0); if (end == buf) return -EINVAL; b->interrupt_enable = !!new; - affinity_set(b->cpu, &oldmask, &newmask); - threshold_restart_bank(b, 0, 0); - affinity_restore(&oldmask); + tr.b = b; + tr.reset = 0; + tr.old_limit = 0; + work_on_cpu(b->cpu, threshold_restart_bank, &tr); return end - buf; } @@ -294,8 +292,7 @@ static ssize_t store_threshold_limit(struct threshold_block *b, const char *buf, size_t count) { char *end; - cpumask_t oldmask, newmask; - u16 old; + struct thresh_restart tr; unsigned long new = simple_strtoul(buf, &end, 0); if (end == buf) return -EINVAL; @@ -303,34 +300,36 @@ static ssize_t store_threshold_limit(struct threshold_block *b, new = THRESHOLD_MAX; if (new < 1) new = 1; - old = b->threshold_limit; + tr.old_limit = b->threshold_limit; b->threshold_limit = new; + tr.b = b; + tr.reset = 0; - affinity_set(b->cpu, &oldmask, &newmask); - threshold_restart_bank(b, 0, old); - affinity_restore(&oldmask); + work_on_cpu(b->cpu, threshold_restart_bank, &tr); return end - buf; } -static ssize_t show_error_count(struct threshold_block *b, char *buf) +static long local_error_count(void *_b) { - u32 high, low; - cpumask_t oldmask, newmask; - affinity_set(b->cpu, &oldmask, &newmask); + struct threshold_block *b = _b; + u32 low, high; + rdmsr(b->address, low, high); - affinity_restore(&oldmask); - return sprintf(buf, "%x\n", - (high & 0xFFF) - (THRESHOLD_MAX - b->threshold_limit)); + return (high & 0xFFF) - (THRESHOLD_MAX - b->threshold_limit); +} + +static ssize_t show_error_count(struct threshold_block *b, char *buf) +{ + return sprintf(buf, "%lx\n", work_on_cpu(b->cpu, local_error_count, b)); } static ssize_t store_error_count(struct threshold_block *b, const char *buf, size_t count) { - cpumask_t oldmask, newmask; - affinity_set(b->cpu, &oldmask, &newmask); - threshold_restart_bank(b, 1, 0); - affinity_restore(&oldmask); + struct thresh_restart tr = { .b = b, .reset = 1, .old_limit = 0 }; + + work_on_cpu(b->cpu, threshold_restart_bank, &tr); return 1; } @@ -463,12 +462,19 @@ out_free: return err; } +static long local_allocate_threshold_blocks(void *_bank) +{ + unsigned int *bank = _bank; + + return allocate_threshold_blocks(smp_processor_id(), *bank, 0, + MSR_IA32_MC0_MISC + *bank * 4); +} + /* symlinks sibling shared banks to first core. first core owns dir/files. */ static __cpuinit int threshold_create_bank(unsigned int cpu, unsigned int bank) { int i, err = 0; struct threshold_bank *b = NULL; - cpumask_t oldmask, newmask; char name[32]; sprintf(name, "threshold_bank%i", bank); @@ -519,11 +525,7 @@ static __cpuinit int threshold_create_bank(unsigned int cpu, unsigned int bank) per_cpu(threshold_banks, cpu)[bank] = b; - affinity_set(cpu, &oldmask, &newmask); - err = allocate_threshold_blocks(cpu, bank, 0, - MSR_IA32_MC0_MISC + bank * 4); - affinity_restore(&oldmask); - + err = work_on_cpu(cpu, local_allocate_threshold_blocks, &bank); if (err) goto out_free; diff --git a/arch/x86/kernel/cpu/mtrr/generic.c b/arch/x86/kernel/cpu/mtrr/generic.c index 4e8d77f01ee..b59ddcc88cd 100644 --- a/arch/x86/kernel/cpu/mtrr/generic.c +++ b/arch/x86/kernel/cpu/mtrr/generic.c @@ -14,14 +14,6 @@ #include <asm/pat.h> #include "mtrr.h" -struct mtrr_state { - struct mtrr_var_range var_ranges[MAX_VAR_RANGES]; - mtrr_type fixed_ranges[NUM_FIXED_RANGES]; - unsigned char enabled; - unsigned char have_fixed; - mtrr_type def_type; -}; - struct fixed_range_block { int base_msr; /* start address of an MTRR block */ int ranges; /* number of MTRRs in this block */ @@ -35,10 +27,12 @@ static struct fixed_range_block fixed_range_blocks[] = { }; static unsigned long smp_changes_mask; -static struct mtrr_state mtrr_state = {}; static int mtrr_state_set; u64 mtrr_tom2; +struct mtrr_state_type mtrr_state = {}; +EXPORT_SYMBOL_GPL(mtrr_state); + #undef MODULE_PARAM_PREFIX #define MODULE_PARAM_PREFIX "mtrr." diff --git a/arch/x86/kernel/cpu/mtrr/main.c b/arch/x86/kernel/cpu/mtrr/main.c index 1159e269e59..d259e5d2e05 100644 --- a/arch/x86/kernel/cpu/mtrr/main.c +++ b/arch/x86/kernel/cpu/mtrr/main.c @@ -49,7 +49,7 @@ u32 num_var_ranges = 0; -unsigned int mtrr_usage_table[MAX_VAR_RANGES]; +unsigned int mtrr_usage_table[MTRR_MAX_VAR_RANGES]; static DEFINE_MUTEX(mtrr_mutex); u64 size_or_mask, size_and_mask; @@ -574,7 +574,7 @@ struct mtrr_value { unsigned long lsize; }; -static struct mtrr_value mtrr_state[MAX_VAR_RANGES]; +static struct mtrr_value mtrr_state[MTRR_MAX_VAR_RANGES]; static int mtrr_save(struct sys_device * sysdev, pm_message_t state) { @@ -824,16 +824,14 @@ static int enable_mtrr_cleanup __initdata = static int __init disable_mtrr_cleanup_setup(char *str) { - if (enable_mtrr_cleanup != -1) - enable_mtrr_cleanup = 0; + enable_mtrr_cleanup = 0; return 0; } early_param("disable_mtrr_cleanup", disable_mtrr_cleanup_setup); static int __init enable_mtrr_cleanup_setup(char *str) { - if (enable_mtrr_cleanup != -1) - enable_mtrr_cleanup = 1; + enable_mtrr_cleanup = 1; return 0; } early_param("enable_mtrr_cleanup", enable_mtrr_cleanup_setup); diff --git a/arch/x86/kernel/cpu/mtrr/mtrr.h b/arch/x86/kernel/cpu/mtrr/mtrr.h index 2dc4ec656b2..ffd60409cc6 100644 --- a/arch/x86/kernel/cpu/mtrr/mtrr.h +++ b/arch/x86/kernel/cpu/mtrr/mtrr.h @@ -8,11 +8,6 @@ #define MTRRcap_MSR 0x0fe #define MTRRdefType_MSR 0x2ff -#define MTRRphysBase_MSR(reg) (0x200 + 2 * (reg)) -#define MTRRphysMask_MSR(reg) (0x200 + 2 * (reg) + 1) - -#define NUM_FIXED_RANGES 88 -#define MAX_VAR_RANGES 256 #define MTRRfix64K_00000_MSR 0x250 #define MTRRfix16K_80000_MSR 0x258 #define MTRRfix16K_A0000_MSR 0x259 @@ -29,11 +24,7 @@ #define MTRR_CHANGE_MASK_VARIABLE 0x02 #define MTRR_CHANGE_MASK_DEFTYPE 0x04 -/* In the Intel processor's MTRR interface, the MTRR type is always held in - an 8 bit field: */ -typedef u8 mtrr_type; - -extern unsigned int mtrr_usage_table[MAX_VAR_RANGES]; +extern unsigned int mtrr_usage_table[MTRR_MAX_VAR_RANGES]; struct mtrr_ops { u32 vendor; @@ -70,13 +61,6 @@ struct set_mtrr_context { u32 ccr3; }; -struct mtrr_var_range { - u32 base_lo; - u32 base_hi; - u32 mask_lo; - u32 mask_hi; -}; - void set_mtrr_done(struct set_mtrr_context *ctxt); void set_mtrr_cache_disable(struct set_mtrr_context *ctxt); void set_mtrr_prepare_save(struct set_mtrr_context *ctxt); diff --git a/arch/x86/kernel/cpuid.c b/arch/x86/kernel/cpuid.c index 72cefd1e649..2ac1f0c2beb 100644 --- a/arch/x86/kernel/cpuid.c +++ b/arch/x86/kernel/cpuid.c @@ -39,10 +39,10 @@ #include <linux/device.h> #include <linux/cpu.h> #include <linux/notifier.h> +#include <linux/uaccess.h> #include <asm/processor.h> #include <asm/msr.h> -#include <asm/uaccess.h> #include <asm/system.h> static struct class *cpuid_class; @@ -82,7 +82,7 @@ static loff_t cpuid_seek(struct file *file, loff_t offset, int orig) } static ssize_t cpuid_read(struct file *file, char __user *buf, - size_t count, loff_t * ppos) + size_t count, loff_t *ppos) { char __user *tmp = buf; struct cpuid_regs cmd; @@ -117,11 +117,11 @@ static int cpuid_open(struct inode *inode, struct file *file) unsigned int cpu; struct cpuinfo_x86 *c; int ret = 0; - + lock_kernel(); cpu = iminor(file->f_path.dentry->d_inode); - if (cpu >= NR_CPUS || !cpu_online(cpu)) { + if (cpu >= nr_cpu_ids || !cpu_online(cpu)) { ret = -ENXIO; /* No such CPU */ goto out; } diff --git a/arch/x86/kernel/crash.c b/arch/x86/kernel/crash.c index d84a852e4cd..c689d19e35a 100644 --- a/arch/x86/kernel/crash.c +++ b/arch/x86/kernel/crash.c @@ -26,6 +26,7 @@ #include <linux/kdebug.h> #include <asm/smp.h> #include <asm/reboot.h> +#include <asm/virtext.h> #include <mach_ipi.h> @@ -49,6 +50,15 @@ static void kdump_nmi_callback(int cpu, struct die_args *args) #endif crash_save_cpu(regs, cpu); + /* Disable VMX or SVM if needed. + * + * We need to disable virtualization on all CPUs. + * Having VMX or SVM enabled on any CPU may break rebooting + * after the kdump kernel has finished its task. + */ + cpu_emergency_vmxoff(); + cpu_emergency_svm_disable(); + disable_local_APIC(); } @@ -80,6 +90,14 @@ void native_machine_crash_shutdown(struct pt_regs *regs) local_irq_disable(); kdump_nmi_shootdown_cpus(); + + /* Booting kdump kernel with VMX or SVM enabled won't work, + * because (among other limitations) we can't disable paging + * with the virt flags. + */ + cpu_emergency_vmxoff(); + cpu_emergency_svm_disable(); + lapic_shutdown(); #if defined(CONFIG_X86_IO_APIC) disable_IO_APIC(); diff --git a/arch/x86/kernel/early_printk.c b/arch/x86/kernel/early_printk.c index 23b138e31e9..504ad198e4a 100644 --- a/arch/x86/kernel/early_printk.c +++ b/arch/x86/kernel/early_printk.c @@ -886,7 +886,7 @@ asmlinkage void early_printk(const char *fmt, ...) va_list ap; va_start(ap, fmt); - n = vscnprintf(buf, 512, fmt, ap); + n = vscnprintf(buf, sizeof(buf), fmt, ap); early_console->write(early_console, buf, n); va_end(ap); } diff --git a/arch/x86/kernel/genapic_flat_64.c b/arch/x86/kernel/genapic_flat_64.c index c0262791bda..34185488e4f 100644 --- a/arch/x86/kernel/genapic_flat_64.c +++ b/arch/x86/kernel/genapic_flat_64.c @@ -30,12 +30,12 @@ static int flat_acpi_madt_oem_check(char *oem_id, char *oem_table_id) return 1; } -static cpumask_t flat_target_cpus(void) +static const struct cpumask *flat_target_cpus(void) { - return cpu_online_map; + return cpu_online_mask; } -static cpumask_t flat_vector_allocation_domain(int cpu) +static void flat_vector_allocation_domain(int cpu, struct cpumask *retmask) { /* Careful. Some cpus do not strictly honor the set of cpus * specified in the interrupt destination when using lowest @@ -45,8 +45,8 @@ static cpumask_t flat_vector_allocation_domain(int cpu) * deliver interrupts to the wrong hyperthread when only one * hyperthread was specified in the interrupt desitination. */ - cpumask_t domain = { { [0] = APIC_ALL_CPUS, } }; - return domain; + cpumask_clear(retmask); + cpumask_bits(retmask)[0] = APIC_ALL_CPUS; } /* @@ -69,9 +69,8 @@ static void flat_init_apic_ldr(void) apic_write(APIC_LDR, val); } -static void flat_send_IPI_mask(cpumask_t cpumask, int vector) +static inline void _flat_send_IPI_mask(unsigned long mask, int vector) { - unsigned long mask = cpus_addr(cpumask)[0]; unsigned long flags; local_irq_save(flags); @@ -79,20 +78,41 @@ static void flat_send_IPI_mask(cpumask_t cpumask, int vector) local_irq_restore(flags); } +static void flat_send_IPI_mask(const struct cpumask *cpumask, int vector) +{ + unsigned long mask = cpumask_bits(cpumask)[0]; + + _flat_send_IPI_mask(mask, vector); +} + +static void flat_send_IPI_mask_allbutself(const struct cpumask *cpumask, + int vector) +{ + unsigned long mask = cpumask_bits(cpumask)[0]; + int cpu = smp_processor_id(); + + if (cpu < BITS_PER_LONG) + clear_bit(cpu, &mask); + _flat_send_IPI_mask(mask, vector); +} + static void flat_send_IPI_allbutself(int vector) { + int cpu = smp_processor_id(); #ifdef CONFIG_HOTPLUG_CPU int hotplug = 1; #else int hotplug = 0; #endif if (hotplug || vector == NMI_VECTOR) { - cpumask_t allbutme = cpu_online_map; + if (!cpumask_equal(cpu_online_mask, cpumask_of(cpu))) { + unsigned long mask = cpumask_bits(cpu_online_mask)[0]; - cpu_clear(smp_processor_id(), allbutme); + if (cpu < BITS_PER_LONG) + clear_bit(cpu, &mask); - if (!cpus_empty(allbutme)) - flat_send_IPI_mask(allbutme, vector); + _flat_send_IPI_mask(mask, vector); + } } else if (num_online_cpus() > 1) { __send_IPI_shortcut(APIC_DEST_ALLBUT, vector,APIC_DEST_LOGICAL); } @@ -101,7 +121,7 @@ static void flat_send_IPI_allbutself(int vector) static void flat_send_IPI_all(int vector) { if (vector == NMI_VECTOR) - flat_send_IPI_mask(cpu_online_map, vector); + flat_send_IPI_mask(cpu_online_mask, vector); else __send_IPI_shortcut(APIC_DEST_ALLINC, vector, APIC_DEST_LOGICAL); } @@ -135,9 +155,18 @@ static int flat_apic_id_registered(void) return physid_isset(read_xapic_id(), phys_cpu_present_map); } -static unsigned int flat_cpu_mask_to_apicid(cpumask_t cpumask) +static unsigned int flat_cpu_mask_to_apicid(const struct cpumask *cpumask) +{ + return cpumask_bits(cpumask)[0] & APIC_ALL_CPUS; +} + +static unsigned int flat_cpu_mask_to_apicid_and(const struct cpumask *cpumask, + const struct cpumask *andmask) { - return cpus_addr(cpumask)[0] & APIC_ALL_CPUS; + unsigned long mask1 = cpumask_bits(cpumask)[0] & APIC_ALL_CPUS; + unsigned long mask2 = cpumask_bits(andmask)[0] & APIC_ALL_CPUS; + + return mask1 & mask2; } static unsigned int phys_pkg_id(int index_msb) @@ -157,8 +186,10 @@ struct genapic apic_flat = { .send_IPI_all = flat_send_IPI_all, .send_IPI_allbutself = flat_send_IPI_allbutself, .send_IPI_mask = flat_send_IPI_mask, + .send_IPI_mask_allbutself = flat_send_IPI_mask_allbutself, .send_IPI_self = apic_send_IPI_self, .cpu_mask_to_apicid = flat_cpu_mask_to_apicid, + .cpu_mask_to_apicid_and = flat_cpu_mask_to_apicid_and, .phys_pkg_id = phys_pkg_id, .get_apic_id = get_apic_id, .set_apic_id = set_apic_id, @@ -188,35 +219,39 @@ static int physflat_acpi_madt_oem_check(char *oem_id, char *oem_table_id) return 0; } -static cpumask_t physflat_target_cpus(void) +static const struct cpumask *physflat_target_cpus(void) { - return cpu_online_map; + return cpu_online_mask; } -static cpumask_t physflat_vector_allocation_domain(int cpu) +static void physflat_vector_allocation_domain(int cpu, struct cpumask *retmask) { - return cpumask_of_cpu(cpu); + cpumask_clear(retmask); + cpumask_set_cpu(cpu, retmask); } -static void physflat_send_IPI_mask(cpumask_t cpumask, int vector) +static void physflat_send_IPI_mask(const struct cpumask *cpumask, int vector) { send_IPI_mask_sequence(cpumask, vector); } -static void physflat_send_IPI_allbutself(int vector) +static void physflat_send_IPI_mask_allbutself(const struct cpumask *cpumask, + int vector) { - cpumask_t allbutme = cpu_online_map; + send_IPI_mask_allbutself(cpumask, vector); +} - cpu_clear(smp_processor_id(), allbutme); - physflat_send_IPI_mask(allbutme, vector); +static void physflat_send_IPI_allbutself(int vector) +{ + send_IPI_mask_allbutself(cpu_online_mask, vector); } static void physflat_send_IPI_all(int vector) { - physflat_send_IPI_mask(cpu_online_map, vector); + physflat_send_IPI_mask(cpu_online_mask, vector); } -static unsigned int physflat_cpu_mask_to_apicid(cpumask_t cpumask) +static unsigned int physflat_cpu_mask_to_apicid(const struct cpumask *cpumask) { int cpu; @@ -224,13 +259,31 @@ static unsigned int physflat_cpu_mask_to_apicid(cpumask_t cpumask) * We're using fixed IRQ delivery, can only return one phys APIC ID. * May as well be the first. */ - cpu = first_cpu(cpumask); + cpu = cpumask_first(cpumask); if ((unsigned)cpu < nr_cpu_ids) return per_cpu(x86_cpu_to_apicid, cpu); else return BAD_APICID; } +static unsigned int +physflat_cpu_mask_to_apicid_and(const struct cpumask *cpumask, + const struct cpumask *andmask) +{ + int cpu; + + /* + * We're using fixed IRQ delivery, can only return one phys APIC ID. + * May as well be the first. + */ + for_each_cpu_and(cpu, cpumask, andmask) + if (cpumask_test_cpu(cpu, cpu_online_mask)) + break; + if (cpu < nr_cpu_ids) + return per_cpu(x86_cpu_to_apicid, cpu); + return BAD_APICID; +} + struct genapic apic_physflat = { .name = "physical flat", .acpi_madt_oem_check = physflat_acpi_madt_oem_check, @@ -243,8 +296,10 @@ struct genapic apic_physflat = { .send_IPI_all = physflat_send_IPI_all, .send_IPI_allbutself = physflat_send_IPI_allbutself, .send_IPI_mask = physflat_send_IPI_mask, + .send_IPI_mask_allbutself = physflat_send_IPI_mask_allbutself, .send_IPI_self = apic_send_IPI_self, .cpu_mask_to_apicid = physflat_cpu_mask_to_apicid, + .cpu_mask_to_apicid_and = physflat_cpu_mask_to_apicid_and, .phys_pkg_id = phys_pkg_id, .get_apic_id = get_apic_id, .set_apic_id = set_apic_id, diff --git a/arch/x86/kernel/genx2apic_cluster.c b/arch/x86/kernel/genx2apic_cluster.c index f6a2c8eb48a..6ce497cc372 100644 --- a/arch/x86/kernel/genx2apic_cluster.c +++ b/arch/x86/kernel/genx2apic_cluster.c @@ -22,19 +22,18 @@ static int x2apic_acpi_madt_oem_check(char *oem_id, char *oem_table_id) /* Start with all IRQs pointing to boot CPU. IRQ balancing will shift them. */ -static cpumask_t x2apic_target_cpus(void) +static const struct cpumask *x2apic_target_cpus(void) { - return cpumask_of_cpu(0); + return cpumask_of(0); } /* * for now each logical cpu is in its own vector allocation domain. */ -static cpumask_t x2apic_vector_allocation_domain(int cpu) +static void x2apic_vector_allocation_domain(int cpu, struct cpumask *retmask) { - cpumask_t domain = CPU_MASK_NONE; - cpu_set(cpu, domain); - return domain; + cpumask_clear(retmask); + cpumask_set_cpu(cpu, retmask); } static void __x2apic_send_IPI_dest(unsigned int apicid, int vector, @@ -56,32 +55,53 @@ static void __x2apic_send_IPI_dest(unsigned int apicid, int vector, * at once. We have 16 cpu's in a cluster. This will minimize IPI register * writes. */ -static void x2apic_send_IPI_mask(cpumask_t mask, int vector) +static void x2apic_send_IPI_mask(const struct cpumask *mask, int vector) { unsigned long flags; unsigned long query_cpu; local_irq_save(flags); - for_each_cpu_mask(query_cpu, mask) { - __x2apic_send_IPI_dest(per_cpu(x86_cpu_to_logical_apicid, query_cpu), - vector, APIC_DEST_LOGICAL); - } + for_each_cpu(query_cpu, mask) + __x2apic_send_IPI_dest( + per_cpu(x86_cpu_to_logical_apicid, query_cpu), + vector, APIC_DEST_LOGICAL); local_irq_restore(flags); } -static void x2apic_send_IPI_allbutself(int vector) +static void x2apic_send_IPI_mask_allbutself(const struct cpumask *mask, + int vector) { - cpumask_t mask = cpu_online_map; + unsigned long flags; + unsigned long query_cpu; + unsigned long this_cpu = smp_processor_id(); - cpu_clear(smp_processor_id(), mask); + local_irq_save(flags); + for_each_cpu(query_cpu, mask) + if (query_cpu != this_cpu) + __x2apic_send_IPI_dest( + per_cpu(x86_cpu_to_logical_apicid, query_cpu), + vector, APIC_DEST_LOGICAL); + local_irq_restore(flags); +} + +static void x2apic_send_IPI_allbutself(int vector) +{ + unsigned long flags; + unsigned long query_cpu; + unsigned long this_cpu = smp_processor_id(); - if (!cpus_empty(mask)) - x2apic_send_IPI_mask(mask, vector); + local_irq_save(flags); + for_each_online_cpu(query_cpu) + if (query_cpu != this_cpu) + __x2apic_send_IPI_dest( + per_cpu(x86_cpu_to_logical_apicid, query_cpu), + vector, APIC_DEST_LOGICAL); + local_irq_restore(flags); } static void x2apic_send_IPI_all(int vector) { - x2apic_send_IPI_mask(cpu_online_map, vector); + x2apic_send_IPI_mask(cpu_online_mask, vector); } static int x2apic_apic_id_registered(void) @@ -89,21 +109,38 @@ static int x2apic_apic_id_registered(void) return 1; } -static unsigned int x2apic_cpu_mask_to_apicid(cpumask_t cpumask) +static unsigned int x2apic_cpu_mask_to_apicid(const struct cpumask *cpumask) { int cpu; /* - * We're using fixed IRQ delivery, can only return one phys APIC ID. + * We're using fixed IRQ delivery, can only return one logical APIC ID. * May as well be the first. */ - cpu = first_cpu(cpumask); - if ((unsigned)cpu < NR_CPUS) + cpu = cpumask_first(cpumask); + if ((unsigned)cpu < nr_cpu_ids) return per_cpu(x86_cpu_to_logical_apicid, cpu); else return BAD_APICID; } +static unsigned int x2apic_cpu_mask_to_apicid_and(const struct cpumask *cpumask, + const struct cpumask *andmask) +{ + int cpu; + + /* + * We're using fixed IRQ delivery, can only return one logical APIC ID. + * May as well be the first. + */ + for_each_cpu_and(cpu, cpumask, andmask) + if (cpumask_test_cpu(cpu, cpu_online_mask)) + break; + if (cpu < nr_cpu_ids) + return per_cpu(x86_cpu_to_logical_apicid, cpu); + return BAD_APICID; +} + static unsigned int get_apic_id(unsigned long x) { unsigned int id; @@ -150,8 +187,10 @@ struct genapic apic_x2apic_cluster = { .send_IPI_all = x2apic_send_IPI_all, .send_IPI_allbutself = x2apic_send_IPI_allbutself, .send_IPI_mask = x2apic_send_IPI_mask, + .send_IPI_mask_allbutself = x2apic_send_IPI_mask_allbutself, .send_IPI_self = x2apic_send_IPI_self, .cpu_mask_to_apicid = x2apic_cpu_mask_to_apicid, + .cpu_mask_to_apicid_and = x2apic_cpu_mask_to_apicid_and, .phys_pkg_id = phys_pkg_id, .get_apic_id = get_apic_id, .set_apic_id = set_apic_id, diff --git a/arch/x86/kernel/genx2apic_phys.c b/arch/x86/kernel/genx2apic_phys.c index d042211768b..21bcc0e098b 100644 --- a/arch/x86/kernel/genx2apic_phys.c +++ b/arch/x86/kernel/genx2apic_phys.c @@ -29,16 +29,15 @@ static int x2apic_acpi_madt_oem_check(char *oem_id, char *oem_table_id) /* Start with all IRQs pointing to boot CPU. IRQ balancing will shift them. */ -static cpumask_t x2apic_target_cpus(void) +static const struct cpumask *x2apic_target_cpus(void) { - return cpumask_of_cpu(0); + return cpumask_of(0); } -static cpumask_t x2apic_vector_allocation_domain(int cpu) +static void x2apic_vector_allocation_domain(int cpu, struct cpumask *retmask) { - cpumask_t domain = CPU_MASK_NONE; - cpu_set(cpu, domain); - return domain; + cpumask_clear(retmask); + cpumask_set_cpu(cpu, retmask); } static void __x2apic_send_IPI_dest(unsigned int apicid, int vector, @@ -54,32 +53,54 @@ static void __x2apic_send_IPI_dest(unsigned int apicid, int vector, x2apic_icr_write(cfg, apicid); } -static void x2apic_send_IPI_mask(cpumask_t mask, int vector) +static void x2apic_send_IPI_mask(const struct cpumask *mask, int vector) { unsigned long flags; unsigned long query_cpu; local_irq_save(flags); - for_each_cpu_mask(query_cpu, mask) { + for_each_cpu(query_cpu, mask) { __x2apic_send_IPI_dest(per_cpu(x86_cpu_to_apicid, query_cpu), vector, APIC_DEST_PHYSICAL); } local_irq_restore(flags); } -static void x2apic_send_IPI_allbutself(int vector) +static void x2apic_send_IPI_mask_allbutself(const struct cpumask *mask, + int vector) { - cpumask_t mask = cpu_online_map; + unsigned long flags; + unsigned long query_cpu; + unsigned long this_cpu = smp_processor_id(); + + local_irq_save(flags); + for_each_cpu(query_cpu, mask) { + if (query_cpu != this_cpu) + __x2apic_send_IPI_dest( + per_cpu(x86_cpu_to_apicid, query_cpu), + vector, APIC_DEST_PHYSICAL); + } + local_irq_restore(flags); +} - cpu_clear(smp_processor_id(), mask); +static void x2apic_send_IPI_allbutself(int vector) +{ + unsigned long flags; + unsigned long query_cpu; + unsigned long this_cpu = smp_processor_id(); - if (!cpus_empty(mask)) - x2apic_send_IPI_mask(mask, vector); + local_irq_save(flags); + for_each_online_cpu(query_cpu) + if (query_cpu != this_cpu) + __x2apic_send_IPI_dest( + per_cpu(x86_cpu_to_apicid, query_cpu), + vector, APIC_DEST_PHYSICAL); + local_irq_restore(flags); } static void x2apic_send_IPI_all(int vector) { - x2apic_send_IPI_mask(cpu_online_map, vector); + x2apic_send_IPI_mask(cpu_online_mask, vector); } static int x2apic_apic_id_registered(void) @@ -87,7 +108,7 @@ static int x2apic_apic_id_registered(void) return 1; } -static unsigned int x2apic_cpu_mask_to_apicid(cpumask_t cpumask) +static unsigned int x2apic_cpu_mask_to_apicid(const struct cpumask *cpumask) { int cpu; @@ -95,13 +116,30 @@ static unsigned int x2apic_cpu_mask_to_apicid(cpumask_t cpumask) * We're using fixed IRQ delivery, can only return one phys APIC ID. * May as well be the first. */ - cpu = first_cpu(cpumask); - if ((unsigned)cpu < NR_CPUS) + cpu = cpumask_first(cpumask); + if ((unsigned)cpu < nr_cpu_ids) return per_cpu(x86_cpu_to_apicid, cpu); else return BAD_APICID; } +static unsigned int x2apic_cpu_mask_to_apicid_and(const struct cpumask *cpumask, + const struct cpumask *andmask) +{ + int cpu; + + /* + * We're using fixed IRQ delivery, can only return one phys APIC ID. + * May as well be the first. + */ + for_each_cpu_and(cpu, cpumask, andmask) + if (cpumask_test_cpu(cpu, cpu_online_mask)) + break; + if (cpu < nr_cpu_ids) + return per_cpu(x86_cpu_to_apicid, cpu); + return BAD_APICID; +} + static unsigned int get_apic_id(unsigned long x) { unsigned int id; @@ -123,12 +161,12 @@ static unsigned int phys_pkg_id(int index_msb) return current_cpu_data.initial_apicid >> index_msb; } -void x2apic_send_IPI_self(int vector) +static void x2apic_send_IPI_self(int vector) { apic_write(APIC_SELF_IPI, vector); } -void init_x2apic_ldr(void) +static void init_x2apic_ldr(void) { return; } @@ -145,8 +183,10 @@ struct genapic apic_x2apic_phys = { .send_IPI_all = x2apic_send_IPI_all, .send_IPI_allbutself = x2apic_send_IPI_allbutself, .send_IPI_mask = x2apic_send_IPI_mask, + .send_IPI_mask_allbutself = x2apic_send_IPI_mask_allbutself, .send_IPI_self = x2apic_send_IPI_self, .cpu_mask_to_apicid = x2apic_cpu_mask_to_apicid, + .cpu_mask_to_apicid_and = x2apic_cpu_mask_to_apicid_and, .phys_pkg_id = phys_pkg_id, .get_apic_id = get_apic_id, .set_apic_id = set_apic_id, diff --git a/arch/x86/kernel/genx2apic_uv_x.c b/arch/x86/kernel/genx2apic_uv_x.c index dece1728973..b193e082f6c 100644 --- a/arch/x86/kernel/genx2apic_uv_x.c +++ b/arch/x86/kernel/genx2apic_uv_x.c @@ -79,16 +79,15 @@ EXPORT_SYMBOL(sn_rtc_cycles_per_second); /* Start with all IRQs pointing to boot CPU. IRQ balancing will shift them. */ -static cpumask_t uv_target_cpus(void) +static const struct cpumask *uv_target_cpus(void) { - return cpumask_of_cpu(0); + return cpumask_of(0); } -static cpumask_t uv_vector_allocation_domain(int cpu) +static void uv_vector_allocation_domain(int cpu, struct cpumask *retmask) { - cpumask_t domain = CPU_MASK_NONE; - cpu_set(cpu, domain); - return domain; + cpumask_clear(retmask); + cpumask_set_cpu(cpu, retmask); } int uv_wakeup_secondary(int phys_apicid, unsigned int start_rip) @@ -127,28 +126,37 @@ static void uv_send_IPI_one(int cpu, int vector) uv_write_global_mmr64(pnode, UVH_IPI_INT, val); } -static void uv_send_IPI_mask(cpumask_t mask, int vector) +static void uv_send_IPI_mask(const struct cpumask *mask, int vector) { unsigned int cpu; - for_each_possible_cpu(cpu) - if (cpu_isset(cpu, mask)) + for_each_cpu(cpu, mask) + uv_send_IPI_one(cpu, vector); +} + +static void uv_send_IPI_mask_allbutself(const struct cpumask *mask, int vector) +{ + unsigned int cpu; + unsigned int this_cpu = smp_processor_id(); + + for_each_cpu(cpu, mask) + if (cpu != this_cpu) uv_send_IPI_one(cpu, vector); } static void uv_send_IPI_allbutself(int vector) { - cpumask_t mask = cpu_online_map; - - cpu_clear(smp_processor_id(), mask); + unsigned int cpu; + unsigned int this_cpu = smp_processor_id(); - if (!cpus_empty(mask)) - uv_send_IPI_mask(mask, vector); + for_each_online_cpu(cpu) + if (cpu != this_cpu) + uv_send_IPI_one(cpu, vector); } static void uv_send_IPI_all(int vector) { - uv_send_IPI_mask(cpu_online_map, vector); + uv_send_IPI_mask(cpu_online_mask, vector); } static int uv_apic_id_registered(void) @@ -160,7 +168,7 @@ static void uv_init_apic_ldr(void) { } -static unsigned int uv_cpu_mask_to_apicid(cpumask_t cpumask) +static unsigned int uv_cpu_mask_to_apicid(const struct cpumask *cpumask) { int cpu; @@ -168,13 +176,30 @@ static unsigned int uv_cpu_mask_to_apicid(cpumask_t cpumask) * We're using fixed IRQ delivery, can only return one phys APIC ID. * May as well be the first. */ - cpu = first_cpu(cpumask); + cpu = cpumask_first(cpumask); if ((unsigned)cpu < nr_cpu_ids) return per_cpu(x86_cpu_to_apicid, cpu); else return BAD_APICID; } +static unsigned int uv_cpu_mask_to_apicid_and(const struct cpumask *cpumask, + const struct cpumask *andmask) +{ + int cpu; + + /* + * We're using fixed IRQ delivery, can only return one phys APIC ID. + * May as well be the first. + */ + for_each_cpu_and(cpu, cpumask, andmask) + if (cpumask_test_cpu(cpu, cpu_online_mask)) + break; + if (cpu < nr_cpu_ids) + return per_cpu(x86_cpu_to_apicid, cpu); + return BAD_APICID; +} + static unsigned int get_apic_id(unsigned long x) { unsigned int id; @@ -222,8 +247,10 @@ struct genapic apic_x2apic_uv_x = { .send_IPI_all = uv_send_IPI_all, .send_IPI_allbutself = uv_send_IPI_allbutself, .send_IPI_mask = uv_send_IPI_mask, + .send_IPI_mask_allbutself = uv_send_IPI_mask_allbutself, .send_IPI_self = uv_send_IPI_self, .cpu_mask_to_apicid = uv_cpu_mask_to_apicid, + .cpu_mask_to_apicid_and = uv_cpu_mask_to_apicid_and, .phys_pkg_id = phys_pkg_id, .get_apic_id = get_apic_id, .set_apic_id = set_apic_id, diff --git a/arch/x86/kernel/head64.c b/arch/x86/kernel/head64.c index 388e05a5fc1..b9a4d8c4b93 100644 --- a/arch/x86/kernel/head64.c +++ b/arch/x86/kernel/head64.c @@ -27,7 +27,7 @@ #include <asm/trampoline.h> /* boot cpu pda */ -static struct x8664_pda _boot_cpu_pda __read_mostly; +static struct x8664_pda _boot_cpu_pda; #ifdef CONFIG_SMP /* diff --git a/arch/x86/kernel/hpet.c b/arch/x86/kernel/hpet.c index 845ea097383..cd759ad9069 100644 --- a/arch/x86/kernel/hpet.c +++ b/arch/x86/kernel/hpet.c @@ -248,7 +248,7 @@ static void hpet_legacy_clockevent_register(void) * Start hpet with the boot cpu mask and make it * global after the IO_APIC has been initialized. */ - hpet_clockevent.cpumask = cpumask_of_cpu(smp_processor_id()); + hpet_clockevent.cpumask = cpumask_of(smp_processor_id()); clockevents_register_device(&hpet_clockevent); global_clock_event = &hpet_clockevent; printk(KERN_DEBUG "hpet clockevent registered\n"); @@ -303,7 +303,7 @@ static void hpet_set_mode(enum clock_event_mode mode, struct hpet_dev *hdev = EVT_TO_HPET_DEV(evt); hpet_setup_msi_irq(hdev->irq); disable_irq(hdev->irq); - irq_set_affinity(hdev->irq, cpumask_of_cpu(hdev->cpu)); + irq_set_affinity(hdev->irq, cpumask_of(hdev->cpu)); enable_irq(hdev->irq); } break; @@ -451,7 +451,7 @@ static int hpet_setup_irq(struct hpet_dev *dev) return -1; disable_irq(dev->irq); - irq_set_affinity(dev->irq, cpumask_of_cpu(dev->cpu)); + irq_set_affinity(dev->irq, cpumask_of(dev->cpu)); enable_irq(dev->irq); printk(KERN_DEBUG "hpet: %s irq %d for MSI\n", @@ -502,7 +502,7 @@ static void init_one_hpet_msi_clockevent(struct hpet_dev *hdev, int cpu) /* 5 usec minimum reprogramming delta. */ evt->min_delta_ns = 5000; - evt->cpumask = cpumask_of_cpu(hdev->cpu); + evt->cpumask = cpumask_of(hdev->cpu); clockevents_register_device(evt); } diff --git a/arch/x86/kernel/i8253.c b/arch/x86/kernel/i8253.c index c1b5e3ece1f..10f92fb532f 100644 --- a/arch/x86/kernel/i8253.c +++ b/arch/x86/kernel/i8253.c @@ -114,7 +114,7 @@ void __init setup_pit_timer(void) * Start pit with the boot cpu mask and make it global after the * IO_APIC has been initialized. */ - pit_clockevent.cpumask = cpumask_of_cpu(smp_processor_id()); + pit_clockevent.cpumask = cpumask_of(smp_processor_id()); pit_clockevent.mult = div_sc(CLOCK_TICK_RATE, NSEC_PER_SEC, pit_clockevent.shift); pit_clockevent.max_delta_ns = diff --git a/arch/x86/kernel/init_task.c b/arch/x86/kernel/init_task.c index d39918076bb..df3bf269bea 100644 --- a/arch/x86/kernel/init_task.c +++ b/arch/x86/kernel/init_task.c @@ -10,7 +10,6 @@ #include <asm/pgtable.h> #include <asm/desc.h> -static struct fs_struct init_fs = INIT_FS; static struct signal_struct init_signals = INIT_SIGNALS(init_signals); static struct sighand_struct init_sighand = INIT_SIGHAND(init_sighand); struct mm_struct init_mm = INIT_MM(init_mm); diff --git a/arch/x86/kernel/io_apic.c b/arch/x86/kernel/io_apic.c index f6ea94b74da..3639442aa7a 100644 --- a/arch/x86/kernel/io_apic.c +++ b/arch/x86/kernel/io_apic.c @@ -136,8 +136,8 @@ static struct irq_pin_list *get_one_free_irq_2_pin(int cpu) struct irq_cfg { struct irq_pin_list *irq_2_pin; - cpumask_t domain; - cpumask_t old_domain; + cpumask_var_t domain; + cpumask_var_t old_domain; unsigned move_cleanup_count; u8 vector; u8 move_in_progress : 1; @@ -152,25 +152,25 @@ static struct irq_cfg irq_cfgx[] = { #else static struct irq_cfg irq_cfgx[NR_IRQS] = { #endif - [0] = { .domain = CPU_MASK_ALL, .vector = IRQ0_VECTOR, }, - [1] = { .domain = CPU_MASK_ALL, .vector = IRQ1_VECTOR, }, - [2] = { .domain = CPU_MASK_ALL, .vector = IRQ2_VECTOR, }, - [3] = { .domain = CPU_MASK_ALL, .vector = IRQ3_VECTOR, }, - [4] = { .domain = CPU_MASK_ALL, .vector = IRQ4_VECTOR, }, - [5] = { .domain = CPU_MASK_ALL, .vector = IRQ5_VECTOR, }, - [6] = { .domain = CPU_MASK_ALL, .vector = IRQ6_VECTOR, }, - [7] = { .domain = CPU_MASK_ALL, .vector = IRQ7_VECTOR, }, - [8] = { .domain = CPU_MASK_ALL, .vector = IRQ8_VECTOR, }, - [9] = { .domain = CPU_MASK_ALL, .vector = IRQ9_VECTOR, }, - [10] = { .domain = CPU_MASK_ALL, .vector = IRQ10_VECTOR, }, - [11] = { .domain = CPU_MASK_ALL, .vector = IRQ11_VECTOR, }, - [12] = { .domain = CPU_MASK_ALL, .vector = IRQ12_VECTOR, }, - [13] = { .domain = CPU_MASK_ALL, .vector = IRQ13_VECTOR, }, - [14] = { .domain = CPU_MASK_ALL, .vector = IRQ14_VECTOR, }, - [15] = { .domain = CPU_MASK_ALL, .vector = IRQ15_VECTOR, }, + [0] = { .vector = IRQ0_VECTOR, }, + [1] = { .vector = IRQ1_VECTOR, }, + [2] = { .vector = IRQ2_VECTOR, }, + [3] = { .vector = IRQ3_VECTOR, }, + [4] = { .vector = IRQ4_VECTOR, }, + [5] = { .vector = IRQ5_VECTOR, }, + [6] = { .vector = IRQ6_VECTOR, }, + [7] = { .vector = IRQ7_VECTOR, }, + [8] = { .vector = IRQ8_VECTOR, }, + [9] = { .vector = IRQ9_VECTOR, }, + [10] = { .vector = IRQ10_VECTOR, }, + [11] = { .vector = IRQ11_VECTOR, }, + [12] = { .vector = IRQ12_VECTOR, }, + [13] = { .vector = IRQ13_VECTOR, }, + [14] = { .vector = IRQ14_VECTOR, }, + [15] = { .vector = IRQ15_VECTOR, }, }; -void __init arch_early_irq_init(void) +int __init arch_early_irq_init(void) { struct irq_cfg *cfg; struct irq_desc *desc; @@ -183,7 +183,13 @@ void __init arch_early_irq_init(void) for (i = 0; i < count; i++) { desc = irq_to_desc(i); desc->chip_data = &cfg[i]; + alloc_bootmem_cpumask_var(&cfg[i].domain); + alloc_bootmem_cpumask_var(&cfg[i].old_domain); + if (i < NR_IRQS_LEGACY) + cpumask_setall(cfg[i].domain); } + + return 0; } #ifdef CONFIG_SPARSE_IRQ @@ -207,12 +213,26 @@ static struct irq_cfg *get_one_free_irq_cfg(int cpu) node = cpu_to_node(cpu); cfg = kzalloc_node(sizeof(*cfg), GFP_ATOMIC, node); + if (cfg) { + if (!alloc_cpumask_var_node(&cfg->domain, GFP_ATOMIC, node)) { + kfree(cfg); + cfg = NULL; + } else if (!alloc_cpumask_var_node(&cfg->old_domain, + GFP_ATOMIC, node)) { + free_cpumask_var(cfg->domain); + kfree(cfg); + cfg = NULL; + } else { + cpumask_clear(cfg->domain); + cpumask_clear(cfg->old_domain); + } + } printk(KERN_DEBUG " alloc irq_cfg on cpu %d node %d\n", cpu, node); return cfg; } -void arch_init_chip_data(struct irq_desc *desc, int cpu) +int arch_init_chip_data(struct irq_desc *desc, int cpu) { struct irq_cfg *cfg; @@ -224,6 +244,8 @@ void arch_init_chip_data(struct irq_desc *desc, int cpu) BUG_ON(1); } } + + return 0; } #ifdef CONFIG_NUMA_MIGRATE_IRQ_DESC @@ -329,13 +351,14 @@ void arch_free_chip_data(struct irq_desc *old_desc, struct irq_desc *desc) } } -static void set_extra_move_desc(struct irq_desc *desc, cpumask_t mask) +static void +set_extra_move_desc(struct irq_desc *desc, const struct cpumask *mask) { struct irq_cfg *cfg = desc->chip_data; if (!cfg->move_in_progress) { /* it means that domain is not changed */ - if (!cpus_intersects(desc->affinity, mask)) + if (!cpumask_intersects(&desc->affinity, mask)) cfg->move_desc_pending = 1; } } @@ -350,7 +373,8 @@ static struct irq_cfg *irq_cfg(unsigned int irq) #endif #ifndef CONFIG_NUMA_MIGRATE_IRQ_DESC -static inline void set_extra_move_desc(struct irq_desc *desc, cpumask_t mask) +static inline void +set_extra_move_desc(struct irq_desc *desc, const struct cpumask *mask) { } #endif @@ -481,6 +505,26 @@ static void ioapic_mask_entry(int apic, int pin) } #ifdef CONFIG_SMP +static void send_cleanup_vector(struct irq_cfg *cfg) +{ + cpumask_var_t cleanup_mask; + + if (unlikely(!alloc_cpumask_var(&cleanup_mask, GFP_ATOMIC))) { + unsigned int i; + cfg->move_cleanup_count = 0; + for_each_cpu_and(i, cfg->old_domain, cpu_online_mask) + cfg->move_cleanup_count++; + for_each_cpu_and(i, cfg->old_domain, cpu_online_mask) + send_IPI_mask(cpumask_of(i), IRQ_MOVE_CLEANUP_VECTOR); + } else { + cpumask_and(cleanup_mask, cfg->old_domain, cpu_online_mask); + cfg->move_cleanup_count = cpumask_weight(cleanup_mask); + send_IPI_mask(cleanup_mask, IRQ_MOVE_CLEANUP_VECTOR); + free_cpumask_var(cleanup_mask); + } + cfg->move_in_progress = 0; +} + static void __target_IO_APIC_irq(unsigned int irq, unsigned int dest, struct irq_cfg *cfg) { int apic, pin; @@ -516,41 +560,55 @@ static void __target_IO_APIC_irq(unsigned int irq, unsigned int dest, struct irq } } -static int assign_irq_vector(int irq, struct irq_cfg *cfg, cpumask_t mask); +static int +assign_irq_vector(int irq, struct irq_cfg *cfg, const struct cpumask *mask); -static void set_ioapic_affinity_irq_desc(struct irq_desc *desc, cpumask_t mask) +/* + * Either sets desc->affinity to a valid value, and returns cpu_mask_to_apicid + * of that, or returns BAD_APICID and leaves desc->affinity untouched. + */ +static unsigned int +set_desc_affinity(struct irq_desc *desc, const struct cpumask *mask) { struct irq_cfg *cfg; - unsigned long flags; - unsigned int dest; - cpumask_t tmp; unsigned int irq; - cpus_and(tmp, mask, cpu_online_map); - if (cpus_empty(tmp)) - return; + if (!cpumask_intersects(mask, cpu_online_mask)) + return BAD_APICID; irq = desc->irq; cfg = desc->chip_data; if (assign_irq_vector(irq, cfg, mask)) - return; + return BAD_APICID; + cpumask_and(&desc->affinity, cfg->domain, mask); set_extra_move_desc(desc, mask); + return cpu_mask_to_apicid_and(&desc->affinity, cpu_online_mask); +} - cpus_and(tmp, cfg->domain, mask); - dest = cpu_mask_to_apicid(tmp); - /* - * Only the high 8 bits are valid. - */ - dest = SET_APIC_LOGICAL_ID(dest); +static void +set_ioapic_affinity_irq_desc(struct irq_desc *desc, const struct cpumask *mask) +{ + struct irq_cfg *cfg; + unsigned long flags; + unsigned int dest; + unsigned int irq; + + irq = desc->irq; + cfg = desc->chip_data; spin_lock_irqsave(&ioapic_lock, flags); - __target_IO_APIC_irq(irq, dest, cfg); - desc->affinity = mask; + dest = set_desc_affinity(desc, mask); + if (dest != BAD_APICID) { + /* Only the high 8 bits are valid. */ + dest = SET_APIC_LOGICAL_ID(dest); + __target_IO_APIC_irq(irq, dest, cfg); + } spin_unlock_irqrestore(&ioapic_lock, flags); } -static void set_ioapic_affinity_irq(unsigned int irq, cpumask_t mask) +static void +set_ioapic_affinity_irq(unsigned int irq, const struct cpumask *mask) { struct irq_desc *desc; @@ -648,7 +706,7 @@ static void __unmask_IO_APIC_irq(struct irq_cfg *cfg) } #ifdef CONFIG_X86_64 -void io_apic_sync(struct irq_pin_list *entry) +static void io_apic_sync(struct irq_pin_list *entry) { /* * Synchronize the IO-APIC and the CPU by doing @@ -1218,7 +1276,8 @@ void unlock_vector_lock(void) spin_unlock(&vector_lock); } -static int __assign_irq_vector(int irq, struct irq_cfg *cfg, cpumask_t mask) +static int +__assign_irq_vector(int irq, struct irq_cfg *cfg, const struct cpumask *mask) { /* * NOTE! The local APIC isn't very good at handling @@ -1233,49 +1292,49 @@ static int __assign_irq_vector(int irq, struct irq_cfg *cfg, cpumask_t mask) */ static int current_vector = FIRST_DEVICE_VECTOR, current_offset = 0; unsigned int old_vector; - int cpu; + int cpu, err; + cpumask_var_t tmp_mask; if ((cfg->move_in_progress) || cfg->move_cleanup_count) return -EBUSY; - /* Only try and allocate irqs on cpus that are present */ - cpus_and(mask, mask, cpu_online_map); + if (!alloc_cpumask_var(&tmp_mask, GFP_ATOMIC)) + return -ENOMEM; old_vector = cfg->vector; if (old_vector) { - cpumask_t tmp; - cpus_and(tmp, cfg->domain, mask); - if (!cpus_empty(tmp)) + cpumask_and(tmp_mask, mask, cpu_online_mask); + cpumask_and(tmp_mask, cfg->domain, tmp_mask); + if (!cpumask_empty(tmp_mask)) { + free_cpumask_var(tmp_mask); return 0; + } } - for_each_cpu_mask_nr(cpu, mask) { - cpumask_t domain, new_mask; + /* Only try and allocate irqs on cpus that are present */ + err = -ENOSPC; + for_each_cpu_and(cpu, mask, cpu_online_mask) { int new_cpu; int vector, offset; - domain = vector_allocation_domain(cpu); - cpus_and(new_mask, domain, cpu_online_map); + vector_allocation_domain(cpu, tmp_mask); vector = current_vector; offset = current_offset; next: vector += 8; if (vector >= first_system_vector) { - /* If we run out of vectors on large boxen, must share them. */ + /* If out of vectors on large boxen, must share them. */ offset = (offset + 1) % 8; vector = FIRST_DEVICE_VECTOR + offset; } if (unlikely(current_vector == vector)) continue; -#ifdef CONFIG_X86_64 - if (vector == IA32_SYSCALL_VECTOR) - goto next; -#else - if (vector == SYSCALL_VECTOR) + + if (test_bit(vector, used_vectors)) goto next; -#endif - for_each_cpu_mask_nr(new_cpu, new_mask) + + for_each_cpu_and(new_cpu, tmp_mask, cpu_online_mask) if (per_cpu(vector_irq, new_cpu)[vector] != -1) goto next; /* Found one! */ @@ -1283,18 +1342,21 @@ next: current_offset = offset; if (old_vector) { cfg->move_in_progress = 1; - cfg->old_domain = cfg->domain; + cpumask_copy(cfg->old_domain, cfg->domain); } - for_each_cpu_mask_nr(new_cpu, new_mask) + for_each_cpu_and(new_cpu, tmp_mask, cpu_online_mask) per_cpu(vector_irq, new_cpu)[vector] = irq; cfg->vector = vector; - cfg->domain = domain; - return 0; + cpumask_copy(cfg->domain, tmp_mask); + err = 0; + break; } - return -ENOSPC; + free_cpumask_var(tmp_mask); + return err; } -static int assign_irq_vector(int irq, struct irq_cfg *cfg, cpumask_t mask) +static int +assign_irq_vector(int irq, struct irq_cfg *cfg, const struct cpumask *mask) { int err; unsigned long flags; @@ -1307,23 +1369,20 @@ static int assign_irq_vector(int irq, struct irq_cfg *cfg, cpumask_t mask) static void __clear_irq_vector(int irq, struct irq_cfg *cfg) { - cpumask_t mask; int cpu, vector; BUG_ON(!cfg->vector); vector = cfg->vector; - cpus_and(mask, cfg->domain, cpu_online_map); - for_each_cpu_mask_nr(cpu, mask) + for_each_cpu_and(cpu, cfg->domain, cpu_online_mask) per_cpu(vector_irq, cpu)[vector] = -1; cfg->vector = 0; - cpus_clear(cfg->domain); + cpumask_clear(cfg->domain); if (likely(!cfg->move_in_progress)) return; - cpus_and(mask, cfg->old_domain, cpu_online_map); - for_each_cpu_mask_nr(cpu, mask) { + for_each_cpu_and(cpu, cfg->old_domain, cpu_online_mask) { for (vector = FIRST_EXTERNAL_VECTOR; vector < NR_VECTORS; vector++) { if (per_cpu(vector_irq, cpu)[vector] != irq) @@ -1345,10 +1404,8 @@ void __setup_vector_irq(int cpu) /* Mark the inuse vectors */ for_each_irq_desc(irq, desc) { - if (!desc) - continue; cfg = desc->chip_data; - if (!cpu_isset(cpu, cfg->domain)) + if (!cpumask_test_cpu(cpu, cfg->domain)) continue; vector = cfg->vector; per_cpu(vector_irq, cpu)[vector] = irq; @@ -1360,7 +1417,7 @@ void __setup_vector_irq(int cpu) continue; cfg = irq_cfg(irq); - if (!cpu_isset(cpu, cfg->domain)) + if (!cpumask_test_cpu(cpu, cfg->domain)) per_cpu(vector_irq, cpu)[vector] = -1; } } @@ -1496,18 +1553,17 @@ static void setup_IO_APIC_irq(int apic, int pin, unsigned int irq, struct irq_de { struct irq_cfg *cfg; struct IO_APIC_route_entry entry; - cpumask_t mask; + unsigned int dest; if (!IO_APIC_IRQ(irq)) return; cfg = desc->chip_data; - mask = TARGET_CPUS; - if (assign_irq_vector(irq, cfg, mask)) + if (assign_irq_vector(irq, cfg, TARGET_CPUS)) return; - cpus_and(mask, cfg->domain, mask); + dest = cpu_mask_to_apicid_and(cfg->domain, TARGET_CPUS); apic_printk(APIC_VERBOSE,KERN_DEBUG "IOAPIC[%d]: Set routing entry (%d-%d -> 0x%x -> " @@ -1517,8 +1573,7 @@ static void setup_IO_APIC_irq(int apic, int pin, unsigned int irq, struct irq_de if (setup_ioapic_entry(mp_ioapics[apic].mp_apicid, irq, &entry, - cpu_mask_to_apicid(mask), trigger, polarity, - cfg->vector)) { + dest, trigger, polarity, cfg->vector)) { printk("Failed to setup ioapic entry for ioapic %d, pin %d\n", mp_ioapics[apic].mp_apicid, pin); __clear_irq_vector(irq, cfg); @@ -1730,8 +1785,6 @@ __apicdebuginit(void) print_IO_APIC(void) for_each_irq_desc(irq, desc) { struct irq_pin_list *entry; - if (!desc) - continue; cfg = desc->chip_data; entry = cfg->irq_2_pin; if (!entry) @@ -2240,7 +2293,7 @@ static int ioapic_retrigger_irq(unsigned int irq) unsigned long flags; spin_lock_irqsave(&vector_lock, flags); - send_IPI_mask(cpumask_of_cpu(first_cpu(cfg->domain)), cfg->vector); + send_IPI_mask(cpumask_of(cpumask_first(cfg->domain)), cfg->vector); spin_unlock_irqrestore(&vector_lock, flags); return 1; @@ -2289,18 +2342,17 @@ static DECLARE_DELAYED_WORK(ir_migration_work, ir_irq_migration); * as simple as edge triggered migration and we can do the irq migration * with a simple atomic update to IO-APIC RTE. */ -static void migrate_ioapic_irq_desc(struct irq_desc *desc, cpumask_t mask) +static void +migrate_ioapic_irq_desc(struct irq_desc *desc, const struct cpumask *mask) { struct irq_cfg *cfg; - cpumask_t tmp, cleanup_mask; struct irte irte; int modify_ioapic_rte; unsigned int dest; unsigned long flags; unsigned int irq; - cpus_and(tmp, mask, cpu_online_map); - if (cpus_empty(tmp)) + if (!cpumask_intersects(mask, cpu_online_mask)) return; irq = desc->irq; @@ -2313,8 +2365,7 @@ static void migrate_ioapic_irq_desc(struct irq_desc *desc, cpumask_t mask) set_extra_move_desc(desc, mask); - cpus_and(tmp, cfg->domain, mask); - dest = cpu_mask_to_apicid(tmp); + dest = cpu_mask_to_apicid_and(cfg->domain, mask); modify_ioapic_rte = desc->status & IRQ_LEVEL; if (modify_ioapic_rte) { @@ -2331,14 +2382,10 @@ static void migrate_ioapic_irq_desc(struct irq_desc *desc, cpumask_t mask) */ modify_irte(irq, &irte); - if (cfg->move_in_progress) { - cpus_and(cleanup_mask, cfg->old_domain, cpu_online_map); - cfg->move_cleanup_count = cpus_weight(cleanup_mask); - send_IPI_mask(cleanup_mask, IRQ_MOVE_CLEANUP_VECTOR); - cfg->move_in_progress = 0; - } + if (cfg->move_in_progress) + send_cleanup_vector(cfg); - desc->affinity = mask; + cpumask_copy(&desc->affinity, mask); } static int migrate_irq_remapped_level_desc(struct irq_desc *desc) @@ -2360,11 +2407,11 @@ static int migrate_irq_remapped_level_desc(struct irq_desc *desc) } /* everthing is clear. we have right of way */ - migrate_ioapic_irq_desc(desc, desc->pending_mask); + migrate_ioapic_irq_desc(desc, &desc->pending_mask); ret = 0; desc->status &= ~IRQ_MOVE_PENDING; - cpus_clear(desc->pending_mask); + cpumask_clear(&desc->pending_mask); unmask: unmask_IO_APIC_irq_desc(desc); @@ -2378,9 +2425,6 @@ static void ir_irq_migration(struct work_struct *work) struct irq_desc *desc; for_each_irq_desc(irq, desc) { - if (!desc) - continue; - if (desc->status & IRQ_MOVE_PENDING) { unsigned long flags; @@ -2392,7 +2436,7 @@ static void ir_irq_migration(struct work_struct *work) continue; } - desc->chip->set_affinity(irq, desc->pending_mask); + desc->chip->set_affinity(irq, &desc->pending_mask); spin_unlock_irqrestore(&desc->lock, flags); } } @@ -2401,18 +2445,20 @@ static void ir_irq_migration(struct work_struct *work) /* * Migrates the IRQ destination in the process context. */ -static void set_ir_ioapic_affinity_irq_desc(struct irq_desc *desc, cpumask_t mask) +static void set_ir_ioapic_affinity_irq_desc(struct irq_desc *desc, + const struct cpumask *mask) { if (desc->status & IRQ_LEVEL) { desc->status |= IRQ_MOVE_PENDING; - desc->pending_mask = mask; + cpumask_copy(&desc->pending_mask, mask); migrate_irq_remapped_level_desc(desc); return; } migrate_ioapic_irq_desc(desc, mask); } -static void set_ir_ioapic_affinity_irq(unsigned int irq, cpumask_t mask) +static void set_ir_ioapic_affinity_irq(unsigned int irq, + const struct cpumask *mask) { struct irq_desc *desc = irq_to_desc(irq); @@ -2447,7 +2493,7 @@ asmlinkage void smp_irq_move_cleanup_interrupt(void) if (!cfg->move_cleanup_count) goto unlock; - if ((vector == cfg->vector) && cpu_isset(me, cfg->domain)) + if (vector == cfg->vector && cpumask_test_cpu(me, cfg->domain)) goto unlock; __get_cpu_var(vector_irq)[vector] = -1; @@ -2484,20 +2530,14 @@ static void irq_complete_move(struct irq_desc **descp) vector = ~get_irq_regs()->orig_ax; me = smp_processor_id(); - if ((vector == cfg->vector) && cpu_isset(me, cfg->domain)) { - cpumask_t cleanup_mask; - #ifdef CONFIG_NUMA_MIGRATE_IRQ_DESC *descp = desc = move_irq_desc(desc, me); /* get the new one */ cfg = desc->chip_data; #endif - cpus_and(cleanup_mask, cfg->old_domain, cpu_online_map); - cfg->move_cleanup_count = cpus_weight(cleanup_mask); - send_IPI_mask(cleanup_mask, IRQ_MOVE_CLEANUP_VECTOR); - cfg->move_in_progress = 0; - } + if (vector == cfg->vector && cpumask_test_cpu(me, cfg->domain)) + send_cleanup_vector(cfg); } #else static inline void irq_complete_move(struct irq_desc **descp) {} @@ -2670,9 +2710,6 @@ static inline void init_IO_APIC_traps(void) * 0x80, because int 0x80 is hm, kind of importantish. ;) */ for_each_irq_desc(irq, desc) { - if (!desc) - continue; - cfg = desc->chip_data; if (IO_APIC_IRQ(irq) && cfg && !cfg->vector) { /* @@ -3222,16 +3259,13 @@ static int msi_compose_msg(struct pci_dev *pdev, unsigned int irq, struct msi_ms struct irq_cfg *cfg; int err; unsigned dest; - cpumask_t tmp; cfg = irq_cfg(irq); - tmp = TARGET_CPUS; - err = assign_irq_vector(irq, cfg, tmp); + err = assign_irq_vector(irq, cfg, TARGET_CPUS); if (err) return err; - cpus_and(tmp, cfg->domain, tmp); - dest = cpu_mask_to_apicid(tmp); + dest = cpu_mask_to_apicid_and(cfg->domain, TARGET_CPUS); #ifdef CONFIG_INTR_REMAP if (irq_remapped(irq)) { @@ -3285,26 +3319,18 @@ static int msi_compose_msg(struct pci_dev *pdev, unsigned int irq, struct msi_ms } #ifdef CONFIG_SMP -static void set_msi_irq_affinity(unsigned int irq, cpumask_t mask) +static void set_msi_irq_affinity(unsigned int irq, const struct cpumask *mask) { struct irq_desc *desc = irq_to_desc(irq); struct irq_cfg *cfg; struct msi_msg msg; unsigned int dest; - cpumask_t tmp; - cpus_and(tmp, mask, cpu_online_map); - if (cpus_empty(tmp)) + dest = set_desc_affinity(desc, mask); + if (dest == BAD_APICID) return; cfg = desc->chip_data; - if (assign_irq_vector(irq, cfg, mask)) - return; - - set_extra_move_desc(desc, mask); - - cpus_and(tmp, cfg->domain, mask); - dest = cpu_mask_to_apicid(tmp); read_msi_msg_desc(desc, &msg); @@ -3314,37 +3340,27 @@ static void set_msi_irq_affinity(unsigned int irq, cpumask_t mask) msg.address_lo |= MSI_ADDR_DEST_ID(dest); write_msi_msg_desc(desc, &msg); - desc->affinity = mask; } #ifdef CONFIG_INTR_REMAP /* * Migrate the MSI irq to another cpumask. This migration is * done in the process context using interrupt-remapping hardware. */ -static void ir_set_msi_irq_affinity(unsigned int irq, cpumask_t mask) +static void +ir_set_msi_irq_affinity(unsigned int irq, const struct cpumask *mask) { struct irq_desc *desc = irq_to_desc(irq); - struct irq_cfg *cfg; + struct irq_cfg *cfg = desc->chip_data; unsigned int dest; - cpumask_t tmp, cleanup_mask; struct irte irte; - cpus_and(tmp, mask, cpu_online_map); - if (cpus_empty(tmp)) - return; - if (get_irte(irq, &irte)) return; - cfg = desc->chip_data; - if (assign_irq_vector(irq, cfg, mask)) + dest = set_desc_affinity(desc, mask); + if (dest == BAD_APICID) return; - set_extra_move_desc(desc, mask); - - cpus_and(tmp, cfg->domain, mask); - dest = cpu_mask_to_apicid(tmp); - irte.vector = cfg->vector; irte.dest_id = IRTE_DEST(dest); @@ -3358,14 +3374,8 @@ static void ir_set_msi_irq_affinity(unsigned int irq, cpumask_t mask) * at the new destination. So, time to cleanup the previous * vector allocation. */ - if (cfg->move_in_progress) { - cpus_and(cleanup_mask, cfg->old_domain, cpu_online_map); - cfg->move_cleanup_count = cpus_weight(cleanup_mask); - send_IPI_mask(cleanup_mask, IRQ_MOVE_CLEANUP_VECTOR); - cfg->move_in_progress = 0; - } - - desc->affinity = mask; + if (cfg->move_in_progress) + send_cleanup_vector(cfg); } #endif @@ -3556,26 +3566,18 @@ void arch_teardown_msi_irq(unsigned int irq) #ifdef CONFIG_DMAR #ifdef CONFIG_SMP -static void dmar_msi_set_affinity(unsigned int irq, cpumask_t mask) +static void dmar_msi_set_affinity(unsigned int irq, const struct cpumask *mask) { struct irq_desc *desc = irq_to_desc(irq); struct irq_cfg *cfg; struct msi_msg msg; unsigned int dest; - cpumask_t tmp; - cpus_and(tmp, mask, cpu_online_map); - if (cpus_empty(tmp)) + dest = set_desc_affinity(desc, mask); + if (dest == BAD_APICID) return; cfg = desc->chip_data; - if (assign_irq_vector(irq, cfg, mask)) - return; - - set_extra_move_desc(desc, mask); - - cpus_and(tmp, cfg->domain, mask); - dest = cpu_mask_to_apicid(tmp); dmar_msi_read(irq, &msg); @@ -3585,7 +3587,6 @@ static void dmar_msi_set_affinity(unsigned int irq, cpumask_t mask) msg.address_lo |= MSI_ADDR_DEST_ID(dest); dmar_msi_write(irq, &msg); - desc->affinity = mask; } #endif /* CONFIG_SMP */ @@ -3619,26 +3620,18 @@ int arch_setup_dmar_msi(unsigned int irq) #ifdef CONFIG_HPET_TIMER #ifdef CONFIG_SMP -static void hpet_msi_set_affinity(unsigned int irq, cpumask_t mask) +static void hpet_msi_set_affinity(unsigned int irq, const struct cpumask *mask) { struct irq_desc *desc = irq_to_desc(irq); struct irq_cfg *cfg; struct msi_msg msg; unsigned int dest; - cpumask_t tmp; - cpus_and(tmp, mask, cpu_online_map); - if (cpus_empty(tmp)) + dest = set_desc_affinity(desc, mask); + if (dest == BAD_APICID) return; cfg = desc->chip_data; - if (assign_irq_vector(irq, cfg, mask)) - return; - - set_extra_move_desc(desc, mask); - - cpus_and(tmp, cfg->domain, mask); - dest = cpu_mask_to_apicid(tmp); hpet_msi_read(irq, &msg); @@ -3648,7 +3641,6 @@ static void hpet_msi_set_affinity(unsigned int irq, cpumask_t mask) msg.address_lo |= MSI_ADDR_DEST_ID(dest); hpet_msi_write(irq, &msg); - desc->affinity = mask; } #endif /* CONFIG_SMP */ @@ -3703,28 +3695,19 @@ static void target_ht_irq(unsigned int irq, unsigned int dest, u8 vector) write_ht_irq_msg(irq, &msg); } -static void set_ht_irq_affinity(unsigned int irq, cpumask_t mask) +static void set_ht_irq_affinity(unsigned int irq, const struct cpumask *mask) { struct irq_desc *desc = irq_to_desc(irq); struct irq_cfg *cfg; unsigned int dest; - cpumask_t tmp; - cpus_and(tmp, mask, cpu_online_map); - if (cpus_empty(tmp)) + dest = set_desc_affinity(desc, mask); + if (dest == BAD_APICID) return; cfg = desc->chip_data; - if (assign_irq_vector(irq, cfg, mask)) - return; - - set_extra_move_desc(desc, mask); - - cpus_and(tmp, cfg->domain, mask); - dest = cpu_mask_to_apicid(tmp); target_ht_irq(irq, dest, cfg->vector); - desc->affinity = mask; } #endif @@ -3744,17 +3727,14 @@ int arch_setup_ht_irq(unsigned int irq, struct pci_dev *dev) { struct irq_cfg *cfg; int err; - cpumask_t tmp; cfg = irq_cfg(irq); - tmp = TARGET_CPUS; - err = assign_irq_vector(irq, cfg, tmp); + err = assign_irq_vector(irq, cfg, TARGET_CPUS); if (!err) { struct ht_irq_msg msg; unsigned dest; - cpus_and(tmp, cfg->domain, tmp); - dest = cpu_mask_to_apicid(tmp); + dest = cpu_mask_to_apicid_and(cfg->domain, TARGET_CPUS); msg.address_hi = HT_IRQ_HIGH_DEST_ID(dest); @@ -3790,7 +3770,7 @@ int arch_setup_ht_irq(unsigned int irq, struct pci_dev *dev) int arch_enable_uv_irq(char *irq_name, unsigned int irq, int cpu, int mmr_blade, unsigned long mmr_offset) { - const cpumask_t *eligible_cpu = get_cpu_mask(cpu); + const struct cpumask *eligible_cpu = cpumask_of(cpu); struct irq_cfg *cfg; int mmr_pnode; unsigned long mmr_value; @@ -3800,7 +3780,7 @@ int arch_enable_uv_irq(char *irq_name, unsigned int irq, int cpu, int mmr_blade, cfg = irq_cfg(irq); - err = assign_irq_vector(irq, cfg, *eligible_cpu); + err = assign_irq_vector(irq, cfg, eligible_cpu); if (err != 0) return err; @@ -3819,7 +3799,7 @@ int arch_enable_uv_irq(char *irq_name, unsigned int irq, int cpu, int mmr_blade, entry->polarity = 0; entry->trigger = 0; entry->mask = 0; - entry->dest = cpu_mask_to_apicid(*eligible_cpu); + entry->dest = cpu_mask_to_apicid(eligible_cpu); mmr_pnode = uv_blade_to_pnode(mmr_blade); uv_write_global_mmr64(mmr_pnode, mmr_offset, mmr_value); @@ -4030,7 +4010,7 @@ void __init setup_ioapic_dest(void) int pin, ioapic, irq, irq_entry; struct irq_desc *desc; struct irq_cfg *cfg; - cpumask_t mask; + const struct cpumask *mask; if (skip_ioapic_setup == 1) return; @@ -4061,7 +4041,7 @@ void __init setup_ioapic_dest(void) */ if (desc->status & (IRQ_NO_BALANCING | IRQ_AFFINITY_SET)) - mask = desc->affinity; + mask = &desc->affinity; else mask = TARGET_CPUS; diff --git a/arch/x86/kernel/ipi.c b/arch/x86/kernel/ipi.c index f1c688e46f3..285bbf8831f 100644 --- a/arch/x86/kernel/ipi.c +++ b/arch/x86/kernel/ipi.c @@ -116,18 +116,18 @@ static inline void __send_IPI_dest_field(unsigned long mask, int vector) /* * This is only used on smaller machines. */ -void send_IPI_mask_bitmask(cpumask_t cpumask, int vector) +void send_IPI_mask_bitmask(const struct cpumask *cpumask, int vector) { - unsigned long mask = cpus_addr(cpumask)[0]; + unsigned long mask = cpumask_bits(cpumask)[0]; unsigned long flags; local_irq_save(flags); - WARN_ON(mask & ~cpus_addr(cpu_online_map)[0]); + WARN_ON(mask & ~cpumask_bits(cpu_online_mask)[0]); __send_IPI_dest_field(mask, vector); local_irq_restore(flags); } -void send_IPI_mask_sequence(cpumask_t mask, int vector) +void send_IPI_mask_sequence(const struct cpumask *mask, int vector) { unsigned long flags; unsigned int query_cpu; @@ -139,12 +139,24 @@ void send_IPI_mask_sequence(cpumask_t mask, int vector) */ local_irq_save(flags); - for_each_possible_cpu(query_cpu) { - if (cpu_isset(query_cpu, mask)) { + for_each_cpu(query_cpu, mask) + __send_IPI_dest_field(cpu_to_logical_apicid(query_cpu), vector); + local_irq_restore(flags); +} + +void send_IPI_mask_allbutself(const struct cpumask *mask, int vector) +{ + unsigned long flags; + unsigned int query_cpu; + unsigned int this_cpu = smp_processor_id(); + + /* See Hack comment above */ + + local_irq_save(flags); + for_each_cpu(query_cpu, mask) + if (query_cpu != this_cpu) __send_IPI_dest_field(cpu_to_logical_apicid(query_cpu), vector); - } - } local_irq_restore(flags); } diff --git a/arch/x86/kernel/irq.c b/arch/x86/kernel/irq.c index 3f1d9d18df6..bce53e1352a 100644 --- a/arch/x86/kernel/irq.c +++ b/arch/x86/kernel/irq.c @@ -9,6 +9,7 @@ #include <asm/apic.h> #include <asm/io_apic.h> #include <asm/smp.h> +#include <asm/irq.h> atomic_t irq_err_count; @@ -190,3 +191,5 @@ u64 arch_irq_stat(void) #endif return sum; } + +EXPORT_SYMBOL_GPL(vector_used_by_percpu_irq); diff --git a/arch/x86/kernel/irq_32.c b/arch/x86/kernel/irq_32.c index 119fc9c8ff7..9dc5588f336 100644 --- a/arch/x86/kernel/irq_32.c +++ b/arch/x86/kernel/irq_32.c @@ -233,27 +233,28 @@ unsigned int do_IRQ(struct pt_regs *regs) #ifdef CONFIG_HOTPLUG_CPU #include <mach_apic.h> -void fixup_irqs(cpumask_t map) +/* A cpu has been removed from cpu_online_mask. Reset irq affinities. */ +void fixup_irqs(void) { unsigned int irq; static int warned; struct irq_desc *desc; for_each_irq_desc(irq, desc) { - cpumask_t mask; + const struct cpumask *affinity; if (!desc) continue; if (irq == 2) continue; - cpus_and(mask, desc->affinity, map); - if (any_online_cpu(mask) == NR_CPUS) { + affinity = &desc->affinity; + if (cpumask_any_and(affinity, cpu_online_mask) >= nr_cpu_ids) { printk("Breaking affinity for irq %i\n", irq); - mask = map; + affinity = cpu_all_mask; } if (desc->chip->set_affinity) - desc->chip->set_affinity(irq, mask); + desc->chip->set_affinity(irq, affinity); else if (desc->action && !(warned++)) printk("Cannot set affinity for irq %i\n", irq); } diff --git a/arch/x86/kernel/irq_64.c b/arch/x86/kernel/irq_64.c index a174a217eb1..6383d50f82e 100644 --- a/arch/x86/kernel/irq_64.c +++ b/arch/x86/kernel/irq_64.c @@ -80,16 +80,17 @@ asmlinkage unsigned int __irq_entry do_IRQ(struct pt_regs *regs) } #ifdef CONFIG_HOTPLUG_CPU -void fixup_irqs(cpumask_t map) +/* A cpu has been removed from cpu_online_mask. Reset irq affinities. */ +void fixup_irqs(void) { unsigned int irq; static int warned; struct irq_desc *desc; for_each_irq_desc(irq, desc) { - cpumask_t mask; int break_affinity = 0; int set_affinity = 1; + const struct cpumask *affinity; if (!desc) continue; @@ -99,23 +100,23 @@ void fixup_irqs(cpumask_t map) /* interrupt's are disabled at this point */ spin_lock(&desc->lock); + affinity = &desc->affinity; if (!irq_has_action(irq) || - cpus_equal(desc->affinity, map)) { + cpumask_equal(affinity, cpu_online_mask)) { spin_unlock(&desc->lock); continue; } - cpus_and(mask, desc->affinity, map); - if (cpus_empty(mask)) { + if (cpumask_any_and(affinity, cpu_online_mask) >= nr_cpu_ids) { break_affinity = 1; - mask = map; + affinity = cpu_all_mask; } if (desc->chip->mask) desc->chip->mask(irq); if (desc->chip->set_affinity) - desc->chip->set_affinity(irq, mask); + desc->chip->set_affinity(irq, affinity); else if (!(warned++)) set_affinity = 0; diff --git a/arch/x86/kernel/irqinit_32.c b/arch/x86/kernel/irqinit_32.c index 203384ed2b5..84723295f88 100644 --- a/arch/x86/kernel/irqinit_32.c +++ b/arch/x86/kernel/irqinit_32.c @@ -110,6 +110,18 @@ DEFINE_PER_CPU(vector_irq_t, vector_irq) = { [IRQ15_VECTOR + 1 ... NR_VECTORS - 1] = -1 }; +int vector_used_by_percpu_irq(unsigned int vector) +{ + int cpu; + + for_each_online_cpu(cpu) { + if (per_cpu(vector_irq, cpu)[vector] != -1) + return 1; + } + + return 0; +} + /* Overridden in paravirt.c */ void init_IRQ(void) __attribute__((weak, alias("native_init_IRQ"))); @@ -146,10 +158,12 @@ void __init native_init_IRQ(void) alloc_intr_gate(CALL_FUNCTION_VECTOR, call_function_interrupt); /* IPI for single call function */ - set_intr_gate(CALL_FUNCTION_SINGLE_VECTOR, call_function_single_interrupt); + alloc_intr_gate(CALL_FUNCTION_SINGLE_VECTOR, + call_function_single_interrupt); /* Low priority IPI to cleanup after moving an irq */ set_intr_gate(IRQ_MOVE_CLEANUP_VECTOR, irq_move_cleanup_interrupt); + set_bit(IRQ_MOVE_CLEANUP_VECTOR, used_vectors); #endif #ifdef CONFIG_X86_LOCAL_APIC diff --git a/arch/x86/kernel/irqinit_64.c b/arch/x86/kernel/irqinit_64.c index 6190e6ef546..31ebfe38e96 100644 --- a/arch/x86/kernel/irqinit_64.c +++ b/arch/x86/kernel/irqinit_64.c @@ -69,6 +69,18 @@ DEFINE_PER_CPU(vector_irq_t, vector_irq) = { [IRQ15_VECTOR + 1 ... NR_VECTORS - 1] = -1 }; +int vector_used_by_percpu_irq(unsigned int vector) +{ + int cpu; + + for_each_online_cpu(cpu) { + if (per_cpu(vector_irq, cpu)[vector] != -1) + return 1; + } + + return 0; +} + void __init init_ISA_irqs(void) { int i; @@ -121,6 +133,7 @@ static void __init smp_intr_init(void) /* Low priority IPI to cleanup after moving an irq */ set_intr_gate(IRQ_MOVE_CLEANUP_VECTOR, irq_move_cleanup_interrupt); + set_bit(IRQ_MOVE_CLEANUP_VECTOR, used_vectors); #endif } diff --git a/arch/x86/kernel/kvmclock.c b/arch/x86/kernel/kvmclock.c index e169ae9b6a6..652fce6d2cc 100644 --- a/arch/x86/kernel/kvmclock.c +++ b/arch/x86/kernel/kvmclock.c @@ -89,17 +89,17 @@ static cycle_t kvm_clock_read(void) */ static unsigned long kvm_get_tsc_khz(void) { - return preset_lpj; + struct pvclock_vcpu_time_info *src; + src = &per_cpu(hv_clock, 0); + return pvclock_tsc_khz(src); } static void kvm_get_preset_lpj(void) { - struct pvclock_vcpu_time_info *src; unsigned long khz; u64 lpj; - src = &per_cpu(hv_clock, 0); - khz = pvclock_tsc_khz(src); + khz = kvm_get_tsc_khz(); lpj = ((u64)khz * 1000); do_div(lpj, HZ); @@ -194,5 +194,7 @@ void __init kvmclock_init(void) #endif kvm_get_preset_lpj(); clocksource_register(&kvm_clock); + pv_info.paravirt_enabled = 1; + pv_info.name = "KVM"; } } diff --git a/arch/x86/kernel/ldt.c b/arch/x86/kernel/ldt.c index eee32b43fee..71f1d99a635 100644 --- a/arch/x86/kernel/ldt.c +++ b/arch/x86/kernel/ldt.c @@ -12,8 +12,8 @@ #include <linux/mm.h> #include <linux/smp.h> #include <linux/vmalloc.h> +#include <linux/uaccess.h> -#include <asm/uaccess.h> #include <asm/system.h> #include <asm/ldt.h> #include <asm/desc.h> @@ -93,7 +93,7 @@ static inline int copy_ldt(mm_context_t *new, mm_context_t *old) if (err < 0) return err; - for(i = 0; i < old->size; i++) + for (i = 0; i < old->size; i++) write_ldt_entry(new->ldt, i, old->ldt + i * LDT_ENTRY_SIZE); return 0; } diff --git a/arch/x86/kernel/mfgpt_32.c b/arch/x86/kernel/mfgpt_32.c index 3b599518c32..c12314c9e86 100644 --- a/arch/x86/kernel/mfgpt_32.c +++ b/arch/x86/kernel/mfgpt_32.c @@ -287,7 +287,7 @@ static struct clock_event_device mfgpt_clockevent = { .set_mode = mfgpt_set_mode, .set_next_event = mfgpt_next_event, .rating = 250, - .cpumask = CPU_MASK_ALL, + .cpumask = cpu_all_mask, .shift = 32 }; diff --git a/arch/x86/kernel/mmconf-fam10h_64.c b/arch/x86/kernel/mmconf-fam10h_64.c index efc2f361fe8..666e43df51f 100644 --- a/arch/x86/kernel/mmconf-fam10h_64.c +++ b/arch/x86/kernel/mmconf-fam10h_64.c @@ -13,8 +13,7 @@ #include <asm/msr.h> #include <asm/acpi.h> #include <asm/mmconfig.h> - -#include "../pci/pci.h" +#include <asm/pci_x86.h> struct pci_hostbridge_probe { u32 bus; diff --git a/arch/x86/kernel/mpparse.c b/arch/x86/kernel/mpparse.c index 45e3b69808b..c5c5b8df1db 100644 --- a/arch/x86/kernel/mpparse.c +++ b/arch/x86/kernel/mpparse.c @@ -16,14 +16,14 @@ #include <linux/bitops.h> #include <linux/acpi.h> #include <linux/module.h> +#include <linux/smp.h> +#include <linux/acpi.h> -#include <asm/smp.h> #include <asm/mtrr.h> #include <asm/mpspec.h> #include <asm/pgalloc.h> #include <asm/io_apic.h> #include <asm/proto.h> -#include <asm/acpi.h> #include <asm/bios_ebda.h> #include <asm/e820.h> #include <asm/trampoline.h> @@ -95,8 +95,8 @@ static void __init MP_bus_info(struct mpc_config_bus *m) #endif if (strncmp(str, BUSTYPE_ISA, sizeof(BUSTYPE_ISA) - 1) == 0) { - set_bit(m->mpc_busid, mp_bus_not_pci); -#if defined(CONFIG_EISA) || defined (CONFIG_MCA) + set_bit(m->mpc_busid, mp_bus_not_pci); +#if defined(CONFIG_EISA) || defined(CONFIG_MCA) mp_bus_id_to_type[m->mpc_busid] = MP_BUS_ISA; #endif } else if (strncmp(str, BUSTYPE_PCI, sizeof(BUSTYPE_PCI) - 1) == 0) { @@ -104,7 +104,7 @@ static void __init MP_bus_info(struct mpc_config_bus *m) x86_quirks->mpc_oem_pci_bus(m); clear_bit(m->mpc_busid, mp_bus_not_pci); -#if defined(CONFIG_EISA) || defined (CONFIG_MCA) +#if defined(CONFIG_EISA) || defined(CONFIG_MCA) mp_bus_id_to_type[m->mpc_busid] = MP_BUS_PCI; } else if (strncmp(str, BUSTYPE_EISA, sizeof(BUSTYPE_EISA) - 1) == 0) { mp_bus_id_to_type[m->mpc_busid] = MP_BUS_EISA; diff --git a/arch/x86/kernel/msr.c b/arch/x86/kernel/msr.c index 82a7c7ed6d4..726266695b2 100644 --- a/arch/x86/kernel/msr.c +++ b/arch/x86/kernel/msr.c @@ -136,7 +136,7 @@ static int msr_open(struct inode *inode, struct file *file) lock_kernel(); cpu = iminor(file->f_path.dentry->d_inode); - if (cpu >= NR_CPUS || !cpu_online(cpu)) { + if (cpu >= nr_cpu_ids || !cpu_online(cpu)) { ret = -ENXIO; /* No such CPU */ goto out; } diff --git a/arch/x86/kernel/nmi.c b/arch/x86/kernel/nmi.c index 8bd1bf9622a..45a09ccdc21 100644 --- a/arch/x86/kernel/nmi.c +++ b/arch/x86/kernel/nmi.c @@ -26,11 +26,10 @@ #include <linux/kernel_stat.h> #include <linux/kdebug.h> #include <linux/smp.h> +#include <linux/nmi.h> #include <asm/i8259.h> #include <asm/io_apic.h> -#include <asm/smp.h> -#include <asm/nmi.h> #include <asm/proto.h> #include <asm/timer.h> diff --git a/arch/x86/kernel/pci-gart_64.c b/arch/x86/kernel/pci-gart_64.c index a35eaa379ff..00c2bcd4146 100644 --- a/arch/x86/kernel/pci-gart_64.c +++ b/arch/x86/kernel/pci-gart_64.c @@ -52,7 +52,7 @@ static u32 *iommu_gatt_base; /* Remapping table */ * to trigger bugs with some popular PCI cards, in particular 3ware (but * has been also also seen with Qlogic at least). */ -int iommu_fullflush = 1; +static int iommu_fullflush = 1; /* Allocation bitmap for the remapping area: */ static DEFINE_SPINLOCK(iommu_bitmap_lock); diff --git a/arch/x86/kernel/pci-swiotlb_64.c b/arch/x86/kernel/pci-swiotlb_64.c index 242c3440687..8cba3749a51 100644 --- a/arch/x86/kernel/pci-swiotlb_64.c +++ b/arch/x86/kernel/pci-swiotlb_64.c @@ -13,7 +13,7 @@ int swiotlb __read_mostly; -void *swiotlb_alloc_boot(size_t size, unsigned long nslabs) +void * __init swiotlb_alloc_boot(size_t size, unsigned long nslabs) { return alloc_bootmem_low_pages(size); } diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c index 61f718df6ee..2b46eb41643 100644 --- a/arch/x86/kernel/reboot.c +++ b/arch/x86/kernel/reboot.c @@ -12,6 +12,8 @@ #include <asm/proto.h> #include <asm/reboot_fixups.h> #include <asm/reboot.h> +#include <asm/pci_x86.h> +#include <asm/virtext.h> #ifdef CONFIG_X86_32 # include <linux/dmi.h> @@ -23,7 +25,6 @@ #include <mach_ipi.h> - /* * Power off function, if any */ @@ -39,6 +40,12 @@ int reboot_force; static int reboot_cpu = -1; #endif +/* This is set if we need to go through the 'emergency' path. + * When machine_emergency_restart() is called, we may be on + * an inconsistent state and won't be able to do a clean cleanup + */ +static int reboot_emergency; + /* This is set by the PCI code if either type 1 or type 2 PCI is detected */ bool port_cf9_safe = false; @@ -368,6 +375,48 @@ static inline void kb_wait(void) } } +static void vmxoff_nmi(int cpu, struct die_args *args) +{ + cpu_emergency_vmxoff(); +} + +/* Use NMIs as IPIs to tell all CPUs to disable virtualization + */ +static void emergency_vmx_disable_all(void) +{ + /* Just make sure we won't change CPUs while doing this */ + local_irq_disable(); + + /* We need to disable VMX on all CPUs before rebooting, otherwise + * we risk hanging up the machine, because the CPU ignore INIT + * signals when VMX is enabled. + * + * We can't take any locks and we may be on an inconsistent + * state, so we use NMIs as IPIs to tell the other CPUs to disable + * VMX and halt. + * + * For safety, we will avoid running the nmi_shootdown_cpus() + * stuff unnecessarily, but we don't have a way to check + * if other CPUs have VMX enabled. So we will call it only if the + * CPU we are running on has VMX enabled. + * + * We will miss cases where VMX is not enabled on all CPUs. This + * shouldn't do much harm because KVM always enable VMX on all + * CPUs anyway. But we can miss it on the small window where KVM + * is still enabling VMX. + */ + if (cpu_has_vmx() && cpu_vmx_enabled()) { + /* Disable VMX on this CPU. + */ + cpu_vmxoff(); + + /* Halt and disable VMX on the other CPUs */ + nmi_shootdown_cpus(vmxoff_nmi); + + } +} + + void __attribute__((weak)) mach_reboot_fixups(void) { } @@ -376,6 +425,9 @@ static void native_machine_emergency_restart(void) { int i; + if (reboot_emergency) + emergency_vmx_disable_all(); + /* Tell the BIOS if we want cold or warm reboot */ *((unsigned short *)__va(0x472)) = reboot_mode; @@ -449,7 +501,7 @@ void native_machine_shutdown(void) #ifdef CONFIG_X86_32 /* See if there has been given a command line override */ - if ((reboot_cpu != -1) && (reboot_cpu < NR_CPUS) && + if ((reboot_cpu != -1) && (reboot_cpu < nr_cpu_ids) && cpu_online(reboot_cpu)) reboot_cpu_id = reboot_cpu; #endif @@ -459,7 +511,7 @@ void native_machine_shutdown(void) reboot_cpu_id = smp_processor_id(); /* Make certain I only run on the appropriate processor */ - set_cpus_allowed_ptr(current, &cpumask_of_cpu(reboot_cpu_id)); + set_cpus_allowed_ptr(current, cpumask_of(reboot_cpu_id)); /* O.K Now that I'm on the appropriate processor, * stop all of the others. @@ -482,13 +534,19 @@ void native_machine_shutdown(void) #endif } +static void __machine_emergency_restart(int emergency) +{ + reboot_emergency = emergency; + machine_ops.emergency_restart(); +} + static void native_machine_restart(char *__unused) { printk("machine restart\n"); if (!reboot_force) machine_shutdown(); - machine_emergency_restart(); + __machine_emergency_restart(0); } static void native_machine_halt(void) @@ -532,7 +590,7 @@ void machine_shutdown(void) void machine_emergency_restart(void) { - machine_ops.emergency_restart(); + __machine_emergency_restart(1); } void machine_restart(char *cmd) @@ -592,10 +650,7 @@ static int crash_nmi_callback(struct notifier_block *self, static void smp_send_nmi_allbutself(void) { - cpumask_t mask = cpu_online_map; - cpu_clear(safe_smp_processor_id(), mask); - if (!cpus_empty(mask)) - send_IPI_mask(mask, NMI_VECTOR); + send_IPI_allbutself(NMI_VECTOR); } static struct notifier_block crash_nmi_nb = { diff --git a/arch/x86/kernel/setup_percpu.c b/arch/x86/kernel/setup_percpu.c index ae0c0d3bb77..a4b619c3310 100644 --- a/arch/x86/kernel/setup_percpu.c +++ b/arch/x86/kernel/setup_percpu.c @@ -152,8 +152,11 @@ void __init setup_per_cpu_areas(void) old_size = PERCPU_ENOUGH_ROOM; align = max_t(unsigned long, PAGE_SIZE, align); size = roundup(old_size, align); - printk(KERN_INFO "PERCPU: Allocating %zd bytes of per cpu data\n", - size); + + pr_info("NR_CPUS:%d nr_cpumask_bits:%d nr_cpu_ids:%d nr_node_ids:%d\n", + NR_CPUS, nr_cpumask_bits, nr_cpu_ids, nr_node_ids); + + pr_info("PERCPU: Allocating %zd bytes of per cpu data\n", size); for_each_possible_cpu(cpu) { #ifndef CONFIG_NEED_MULTIPLE_NODES @@ -164,28 +167,21 @@ void __init setup_per_cpu_areas(void) if (!node_online(node) || !NODE_DATA(node)) { ptr = __alloc_bootmem(size, align, __pa(MAX_DMA_ADDRESS)); - printk(KERN_INFO - "cpu %d has no node %d or node-local memory\n", + pr_info("cpu %d has no node %d or node-local memory\n", cpu, node); - if (ptr) - printk(KERN_DEBUG "per cpu data for cpu%d at %016lx\n", - cpu, __pa(ptr)); - } - else { + pr_debug("per cpu data for cpu%d at %016lx\n", + cpu, __pa(ptr)); + } else { ptr = __alloc_bootmem_node(NODE_DATA(node), size, align, __pa(MAX_DMA_ADDRESS)); - if (ptr) - printk(KERN_DEBUG "per cpu data for cpu%d on node%d at %016lx\n", - cpu, node, __pa(ptr)); + pr_debug("per cpu data for cpu%d on node%d at %016lx\n", + cpu, node, __pa(ptr)); } #endif per_cpu_offset(cpu) = ptr - __per_cpu_start; memcpy(ptr, __per_cpu_start, __per_cpu_end - __per_cpu_start); } - printk(KERN_DEBUG "NR_CPUS: %d, nr_cpu_ids: %d, nr_node_ids %d\n", - NR_CPUS, nr_cpu_ids, nr_node_ids); - /* Setup percpu data maps */ setup_per_cpu_maps(); @@ -282,7 +278,7 @@ static void __cpuinit numa_set_cpumask(int cpu, int enable) else cpu_clear(cpu, *mask); - cpulist_scnprintf(buf, sizeof(buf), *mask); + cpulist_scnprintf(buf, sizeof(buf), mask); printk(KERN_DEBUG "%s cpu %d node %d: mask now %s\n", enable? "numa_add_cpu":"numa_remove_cpu", cpu, node, buf); } @@ -334,25 +330,25 @@ static const cpumask_t cpu_mask_none; /* * Returns a pointer to the bitmask of CPUs on Node 'node'. */ -const cpumask_t *_node_to_cpumask_ptr(int node) +const cpumask_t *cpumask_of_node(int node) { if (node_to_cpumask_map == NULL) { printk(KERN_WARNING - "_node_to_cpumask_ptr(%d): no node_to_cpumask_map!\n", + "cpumask_of_node(%d): no node_to_cpumask_map!\n", node); dump_stack(); return (const cpumask_t *)&cpu_online_map; } if (node >= nr_node_ids) { printk(KERN_WARNING - "_node_to_cpumask_ptr(%d): node > nr_node_ids(%d)\n", + "cpumask_of_node(%d): node > nr_node_ids(%d)\n", node, nr_node_ids); dump_stack(); return &cpu_mask_none; } return &node_to_cpumask_map[node]; } -EXPORT_SYMBOL(_node_to_cpumask_ptr); +EXPORT_SYMBOL(cpumask_of_node); /* * Returns a bitmask of CPUs on Node 'node'. diff --git a/arch/x86/kernel/smp.c b/arch/x86/kernel/smp.c index 7e558db362c..beea2649a24 100644 --- a/arch/x86/kernel/smp.c +++ b/arch/x86/kernel/smp.c @@ -118,22 +118,22 @@ static void native_smp_send_reschedule(int cpu) WARN_ON(1); return; } - send_IPI_mask(cpumask_of_cpu(cpu), RESCHEDULE_VECTOR); + send_IPI_mask(cpumask_of(cpu), RESCHEDULE_VECTOR); } void native_send_call_func_single_ipi(int cpu) { - send_IPI_mask(cpumask_of_cpu(cpu), CALL_FUNCTION_SINGLE_VECTOR); + send_IPI_mask(cpumask_of(cpu), CALL_FUNCTION_SINGLE_VECTOR); } -void native_send_call_func_ipi(cpumask_t mask) +void native_send_call_func_ipi(const struct cpumask *mask) { cpumask_t allbutself; allbutself = cpu_online_map; cpu_clear(smp_processor_id(), allbutself); - if (cpus_equal(mask, allbutself) && + if (cpus_equal(*mask, allbutself) && cpus_equal(cpu_online_map, cpu_callout_map)) send_IPI_allbutself(CALL_FUNCTION_VECTOR); else diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index f8500c96944..6bd4d9b7387 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -102,14 +102,8 @@ EXPORT_SYMBOL(smp_num_siblings); /* Last level cache ID of each logical CPU */ DEFINE_PER_CPU(u16, cpu_llc_id) = BAD_APICID; -/* bitmap of online cpus */ -cpumask_t cpu_online_map __read_mostly; -EXPORT_SYMBOL(cpu_online_map); - cpumask_t cpu_callin_map; cpumask_t cpu_callout_map; -cpumask_t cpu_possible_map; -EXPORT_SYMBOL(cpu_possible_map); /* representing HT siblings of each logical CPU */ DEFINE_PER_CPU(cpumask_t, cpu_sibling_map); @@ -502,7 +496,7 @@ void __cpuinit set_cpu_sibling_map(int cpu) } /* maps the cpu to the sched domain representing multi-core */ -cpumask_t cpu_coregroup_map(int cpu) +const struct cpumask *cpu_coregroup_mask(int cpu) { struct cpuinfo_x86 *c = &cpu_data(cpu); /* @@ -510,9 +504,14 @@ cpumask_t cpu_coregroup_map(int cpu) * And for power savings, we return cpu_core_map */ if (sched_mc_power_savings || sched_smt_power_savings) - return per_cpu(cpu_core_map, cpu); + return &per_cpu(cpu_core_map, cpu); else - return c->llc_shared_map; + return &c->llc_shared_map; +} + +cpumask_t cpu_coregroup_map(int cpu) +{ + return *cpu_coregroup_mask(cpu); } static void impress_friends(void) @@ -1155,7 +1154,7 @@ static void __init smp_cpu_index_default(void) for_each_possible_cpu(i) { c = &cpu_data(i); /* mark all to hotplug */ - c->cpu_index = NR_CPUS; + c->cpu_index = nr_cpu_ids; } } @@ -1260,6 +1259,15 @@ void __init native_smp_cpus_done(unsigned int max_cpus) check_nmi_watchdog(); } +static int __initdata setup_possible_cpus = -1; +static int __init _setup_possible_cpus(char *str) +{ + get_option(&str, &setup_possible_cpus); + return 0; +} +early_param("possible_cpus", _setup_possible_cpus); + + /* * cpu_possible_map should be static, it cannot change as cpu's * are onlined, or offlined. The reason is per-cpu data-structures @@ -1272,7 +1280,7 @@ void __init native_smp_cpus_done(unsigned int max_cpus) * * Three ways to find out the number of additional hotplug CPUs: * - If the BIOS specified disabled CPUs in ACPI/mptables use that. - * - The user can overwrite it with additional_cpus=NUM + * - The user can overwrite it with possible_cpus=NUM * - Otherwise don't reserve additional CPUs. * We do this because additional CPUs waste a lot of memory. * -AK @@ -1285,9 +1293,19 @@ __init void prefill_possible_map(void) if (!num_processors) num_processors = 1; - possible = num_processors + disabled_cpus; - if (possible > NR_CPUS) - possible = NR_CPUS; + if (setup_possible_cpus == -1) + possible = num_processors + disabled_cpus; + else + possible = setup_possible_cpus; + + total_cpus = max_t(int, possible, num_processors + disabled_cpus); + + if (possible > CONFIG_NR_CPUS) { + printk(KERN_WARNING + "%d Processors exceeds NR_CPUS limit of %d\n", + possible, CONFIG_NR_CPUS); + possible = CONFIG_NR_CPUS; + } printk(KERN_INFO "SMP: Allowing %d CPUs, %d hotplug CPUs\n", possible, max_t(int, possible - num_processors, 0)); @@ -1352,7 +1370,7 @@ void cpu_disable_common(void) lock_vector_lock(); remove_cpu_from_maps(cpu); unlock_vector_lock(); - fixup_irqs(cpu_online_map); + fixup_irqs(); } int native_cpu_disable(void) diff --git a/arch/x86/kernel/tlb_32.c b/arch/x86/kernel/tlb_32.c index 8da059f949b..ce505464224 100644 --- a/arch/x86/kernel/tlb_32.c +++ b/arch/x86/kernel/tlb_32.c @@ -163,7 +163,7 @@ void native_flush_tlb_others(const cpumask_t *cpumaskp, struct mm_struct *mm, * We have to send the IPI only to * CPUs affected. */ - send_IPI_mask(cpumask, INVALIDATE_TLB_VECTOR); + send_IPI_mask(&cpumask, INVALIDATE_TLB_VECTOR); while (!cpus_empty(flush_cpumask)) /* nothing. lockup detection does not belong here */ diff --git a/arch/x86/kernel/tlb_64.c b/arch/x86/kernel/tlb_64.c index 29887d7081a..f8be6f1d2e4 100644 --- a/arch/x86/kernel/tlb_64.c +++ b/arch/x86/kernel/tlb_64.c @@ -191,7 +191,7 @@ void native_flush_tlb_others(const cpumask_t *cpumaskp, struct mm_struct *mm, * We have to send the IPI only to * CPUs affected. */ - send_IPI_mask(cpumask, INVALIDATE_TLB_VECTOR_START + sender); + send_IPI_mask(&cpumask, INVALIDATE_TLB_VECTOR_START + sender); while (!cpus_empty(f->flush_cpumask)) cpu_relax(); diff --git a/arch/x86/kernel/tlb_uv.c b/arch/x86/kernel/tlb_uv.c index 6a00e5faaa7..f885023167e 100644 --- a/arch/x86/kernel/tlb_uv.c +++ b/arch/x86/kernel/tlb_uv.c @@ -582,7 +582,6 @@ static int __init uv_ptc_init(void) static struct bau_control * __init uv_table_bases_init(int blade, int node) { int i; - int *ip; struct bau_msg_status *msp; struct bau_control *bau_tabp; @@ -599,13 +598,6 @@ static struct bau_control * __init uv_table_bases_init(int blade, int node) bau_cpubits_clear(&msp->seen_by, (int) uv_blade_nr_possible_cpus(blade)); - bau_tabp->watching = - kmalloc_node(sizeof(int) * DEST_NUM_RESOURCES, GFP_KERNEL, node); - BUG_ON(!bau_tabp->watching); - - for (i = 0, ip = bau_tabp->watching; i < DEST_Q_SIZE; i++, ip++) - *ip = 0; - uv_bau_table_bases[blade] = bau_tabp; return bau_tabp; @@ -628,7 +620,6 @@ uv_table_bases_finish(int blade, int node, int cur_cpu, bcp->bau_msg_head = bau_tablesp->va_queue_first; bcp->va_queue_first = bau_tablesp->va_queue_first; bcp->va_queue_last = bau_tablesp->va_queue_last; - bcp->watching = bau_tablesp->watching; bcp->msg_statuses = bau_tablesp->msg_statuses; bcp->descriptor_base = adp; } diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c index 141907ab6e2..ce6650eb64e 100644 --- a/arch/x86/kernel/traps.c +++ b/arch/x86/kernel/traps.c @@ -72,9 +72,6 @@ #include "cpu/mcheck/mce.h" -DECLARE_BITMAP(used_vectors, NR_VECTORS); -EXPORT_SYMBOL_GPL(used_vectors); - asmlinkage int system_call(void); /* Do we ignore FPU interrupts ? */ @@ -89,6 +86,9 @@ gate_desc idt_table[256] __attribute__((__section__(".data.idt"))) = { { { { 0, 0 } } }, }; #endif +DECLARE_BITMAP(used_vectors, NR_VECTORS); +EXPORT_SYMBOL_GPL(used_vectors); + static int ignore_nmis; static inline void conditional_sti(struct pt_regs *regs) @@ -292,8 +292,10 @@ dotraplinkage void do_double_fault(struct pt_regs *regs, long error_code) tsk->thread.error_code = error_code; tsk->thread.trap_no = 8; - /* This is always a kernel trap and never fixable (and thus must - never return). */ + /* + * This is always a kernel trap and never fixable (and thus must + * never return). + */ for (;;) die(str, regs, error_code); } @@ -520,9 +522,11 @@ dotraplinkage void __kprobes do_int3(struct pt_regs *regs, long error_code) } #ifdef CONFIG_X86_64 -/* Help handler running on IST stack to switch back to user stack - for scheduling or signal handling. The actual stack switch is done in - entry.S */ +/* + * Help handler running on IST stack to switch back to user stack + * for scheduling or signal handling. The actual stack switch is done in + * entry.S + */ asmlinkage __kprobes struct pt_regs *sync_regs(struct pt_regs *eregs) { struct pt_regs *regs = eregs; @@ -532,8 +536,10 @@ asmlinkage __kprobes struct pt_regs *sync_regs(struct pt_regs *eregs) /* Exception from user space */ else if (user_mode(eregs)) regs = task_pt_regs(current); - /* Exception from kernel and interrupts are enabled. Move to - kernel process stack. */ + /* + * Exception from kernel and interrupts are enabled. Move to + * kernel process stack. + */ else if (eregs->flags & X86_EFLAGS_IF) regs = (struct pt_regs *)(eregs->sp -= sizeof(struct pt_regs)); if (eregs != regs) @@ -685,12 +691,7 @@ void math_error(void __user *ip) cwd = get_fpu_cwd(task); swd = get_fpu_swd(task); - err = swd & ~cwd & 0x3f; - -#ifdef CONFIG_X86_32 - if (!err) - return; -#endif + err = swd & ~cwd; if (err & 0x001) { /* Invalid op */ /* @@ -708,7 +709,11 @@ void math_error(void __user *ip) } else if (err & 0x020) { /* Precision */ info.si_code = FPE_FLTRES; } else { - info.si_code = __SI_FAULT|SI_KERNEL; /* WTF? */ + /* + * If we're using IRQ 13, or supposedly even some trap 16 + * implementations, it's possible we get a spurious trap... + */ + return; /* Spurious trap, no error */ } force_sig_info(SIGFPE, &info, task); } @@ -941,9 +946,7 @@ dotraplinkage void do_iret_error(struct pt_regs *regs, long error_code) void __init trap_init(void) { -#ifdef CONFIG_X86_32 int i; -#endif #ifdef CONFIG_EISA void __iomem *p = early_ioremap(0x0FFFD9, 4); @@ -1000,11 +1003,15 @@ void __init trap_init(void) } set_system_trap_gate(SYSCALL_VECTOR, &system_call); +#endif /* Reserve all the builtin and the syscall vector: */ for (i = 0; i < FIRST_EXTERNAL_VECTOR; i++) set_bit(i, used_vectors); +#ifdef CONFIG_X86_64 + set_bit(IA32_SYSCALL_VECTOR, used_vectors); +#else set_bit(SYSCALL_VECTOR, used_vectors); #endif /* diff --git a/arch/x86/kernel/vmiclock_32.c b/arch/x86/kernel/vmiclock_32.c index 254ee07f863..c4c1f9e0940 100644 --- a/arch/x86/kernel/vmiclock_32.c +++ b/arch/x86/kernel/vmiclock_32.c @@ -226,7 +226,7 @@ static void __devinit vmi_time_init_clockevent(void) /* Upper bound is clockevent's use of ulong for cycle deltas. */ evt->max_delta_ns = clockevent_delta2ns(ULONG_MAX, evt); evt->min_delta_ns = clockevent_delta2ns(1, evt); - evt->cpumask = cpumask_of_cpu(cpu); + evt->cpumask = cpumask_of(cpu); printk(KERN_WARNING "vmi: registering clock event %s. mult=%lu shift=%u\n", evt->name, evt->mult, evt->shift); diff --git a/arch/x86/kernel/xsave.c b/arch/x86/kernel/xsave.c index 15c3e699918..2b54fe002e9 100644 --- a/arch/x86/kernel/xsave.c +++ b/arch/x86/kernel/xsave.c @@ -159,7 +159,7 @@ int save_i387_xstate(void __user *buf) * Restore the extended state if present. Otherwise, restore the FP/SSE * state. */ -int restore_user_xstate(void __user *buf) +static int restore_user_xstate(void __user *buf) { struct _fpx_sw_bytes fx_sw_user; u64 mask; diff --git a/arch/x86/kvm/Makefile b/arch/x86/kvm/Makefile index c02343594b4..d3ec292f00f 100644 --- a/arch/x86/kvm/Makefile +++ b/arch/x86/kvm/Makefile @@ -7,8 +7,8 @@ common-objs = $(addprefix ../../../virt/kvm/, kvm_main.o ioapic.o \ ifeq ($(CONFIG_KVM_TRACE),y) common-objs += $(addprefix ../../../virt/kvm/, kvm_trace.o) endif -ifeq ($(CONFIG_DMAR),y) -common-objs += $(addprefix ../../../virt/kvm/, vtd.o) +ifeq ($(CONFIG_IOMMU_API),y) +common-objs += $(addprefix ../../../virt/kvm/, iommu.o) endif EXTRA_CFLAGS += -Ivirt/kvm -Iarch/x86/kvm diff --git a/arch/x86/kvm/i8254.c b/arch/x86/kvm/i8254.c index 59ebd37ad79..e665d1c623c 100644 --- a/arch/x86/kvm/i8254.c +++ b/arch/x86/kvm/i8254.c @@ -603,10 +603,29 @@ void kvm_free_pit(struct kvm *kvm) static void __inject_pit_timer_intr(struct kvm *kvm) { + struct kvm_vcpu *vcpu; + int i; + mutex_lock(&kvm->lock); kvm_set_irq(kvm, kvm->arch.vpit->irq_source_id, 0, 1); kvm_set_irq(kvm, kvm->arch.vpit->irq_source_id, 0, 0); mutex_unlock(&kvm->lock); + + /* + * Provides NMI watchdog support via Virtual Wire mode. + * The route is: PIT -> PIC -> LVT0 in NMI mode. + * + * Note: Our Virtual Wire implementation is simplified, only + * propagating PIT interrupts to all VCPUs when they have set + * LVT0 to NMI delivery. Other PIC interrupts are just sent to + * VCPU0, and only if its LVT0 is in EXTINT mode. + */ + if (kvm->arch.vapics_in_nmi_mode > 0) + for (i = 0; i < KVM_MAX_VCPUS; ++i) { + vcpu = kvm->vcpus[i]; + if (vcpu) + kvm_apic_nmi_wd_deliver(vcpu); + } } void kvm_inject_pit_timer_irqs(struct kvm_vcpu *vcpu) diff --git a/arch/x86/kvm/i8259.c b/arch/x86/kvm/i8259.c index 17e41e165f1..179dcb0103f 100644 --- a/arch/x86/kvm/i8259.c +++ b/arch/x86/kvm/i8259.c @@ -26,10 +26,40 @@ * Port from Qemu. */ #include <linux/mm.h> +#include <linux/bitops.h> #include "irq.h" #include <linux/kvm_host.h> +static void pic_lock(struct kvm_pic *s) +{ + spin_lock(&s->lock); +} + +static void pic_unlock(struct kvm_pic *s) +{ + struct kvm *kvm = s->kvm; + unsigned acks = s->pending_acks; + bool wakeup = s->wakeup_needed; + struct kvm_vcpu *vcpu; + + s->pending_acks = 0; + s->wakeup_needed = false; + + spin_unlock(&s->lock); + + while (acks) { + kvm_notify_acked_irq(kvm, __ffs(acks)); + acks &= acks - 1; + } + + if (wakeup) { + vcpu = s->kvm->vcpus[0]; + if (vcpu) + kvm_vcpu_kick(vcpu); + } +} + static void pic_clear_isr(struct kvm_kpic_state *s, int irq) { s->isr &= ~(1 << irq); @@ -136,17 +166,21 @@ static void pic_update_irq(struct kvm_pic *s) void kvm_pic_update_irq(struct kvm_pic *s) { + pic_lock(s); pic_update_irq(s); + pic_unlock(s); } void kvm_pic_set_irq(void *opaque, int irq, int level) { struct kvm_pic *s = opaque; + pic_lock(s); if (irq >= 0 && irq < PIC_NUM_PINS) { pic_set_irq1(&s->pics[irq >> 3], irq & 7, level); pic_update_irq(s); } + pic_unlock(s); } /* @@ -172,6 +206,7 @@ int kvm_pic_read_irq(struct kvm *kvm) int irq, irq2, intno; struct kvm_pic *s = pic_irqchip(kvm); + pic_lock(s); irq = pic_get_irq(&s->pics[0]); if (irq >= 0) { pic_intack(&s->pics[0], irq); @@ -196,6 +231,7 @@ int kvm_pic_read_irq(struct kvm *kvm) intno = s->pics[0].irq_base + irq; } pic_update_irq(s); + pic_unlock(s); kvm_notify_acked_irq(kvm, irq); return intno; @@ -203,7 +239,7 @@ int kvm_pic_read_irq(struct kvm *kvm) void kvm_pic_reset(struct kvm_kpic_state *s) { - int irq, irqbase; + int irq, irqbase, n; struct kvm *kvm = s->pics_state->irq_request_opaque; struct kvm_vcpu *vcpu0 = kvm->vcpus[0]; @@ -214,8 +250,10 @@ void kvm_pic_reset(struct kvm_kpic_state *s) for (irq = 0; irq < PIC_NUM_PINS/2; irq++) { if (vcpu0 && kvm_apic_accept_pic_intr(vcpu0)) - if (s->irr & (1 << irq) || s->isr & (1 << irq)) - kvm_notify_acked_irq(kvm, irq+irqbase); + if (s->irr & (1 << irq) || s->isr & (1 << irq)) { + n = irq + irqbase; + s->pics_state->pending_acks |= 1 << n; + } } s->last_irr = 0; s->irr = 0; @@ -406,6 +444,7 @@ static void picdev_write(struct kvm_io_device *this, printk(KERN_ERR "PIC: non byte write\n"); return; } + pic_lock(s); switch (addr) { case 0x20: case 0x21: @@ -418,6 +457,7 @@ static void picdev_write(struct kvm_io_device *this, elcr_ioport_write(&s->pics[addr & 1], addr, data); break; } + pic_unlock(s); } static void picdev_read(struct kvm_io_device *this, @@ -431,6 +471,7 @@ static void picdev_read(struct kvm_io_device *this, printk(KERN_ERR "PIC: non byte read\n"); return; } + pic_lock(s); switch (addr) { case 0x20: case 0x21: @@ -444,6 +485,7 @@ static void picdev_read(struct kvm_io_device *this, break; } *(unsigned char *)val = data; + pic_unlock(s); } /* @@ -459,7 +501,7 @@ static void pic_irq_request(void *opaque, int level) s->output = level; if (vcpu && level && (s->pics[0].isr_ack & (1 << irq))) { s->pics[0].isr_ack &= ~(1 << irq); - kvm_vcpu_kick(vcpu); + s->wakeup_needed = true; } } @@ -469,6 +511,8 @@ struct kvm_pic *kvm_create_pic(struct kvm *kvm) s = kzalloc(sizeof(struct kvm_pic), GFP_KERNEL); if (!s) return NULL; + spin_lock_init(&s->lock); + s->kvm = kvm; s->pics[0].elcr_mask = 0xf8; s->pics[1].elcr_mask = 0xde; s->irq_request = pic_irq_request; diff --git a/arch/x86/kvm/irq.h b/arch/x86/kvm/irq.h index f17c8f5bbf3..2bf32a03cee 100644 --- a/arch/x86/kvm/irq.h +++ b/arch/x86/kvm/irq.h @@ -25,6 +25,7 @@ #include <linux/mm_types.h> #include <linux/hrtimer.h> #include <linux/kvm_host.h> +#include <linux/spinlock.h> #include "iodev.h" #include "ioapic.h" @@ -59,6 +60,10 @@ struct kvm_kpic_state { }; struct kvm_pic { + spinlock_t lock; + bool wakeup_needed; + unsigned pending_acks; + struct kvm *kvm; struct kvm_kpic_state pics[2]; /* 0 is master pic, 1 is slave pic */ irq_request_func *irq_request; void *irq_request_opaque; @@ -87,6 +92,7 @@ void kvm_pic_reset(struct kvm_kpic_state *s); void kvm_timer_intr_post(struct kvm_vcpu *vcpu, int vec); void kvm_inject_pending_timer_irqs(struct kvm_vcpu *vcpu); void kvm_inject_apic_timer_irqs(struct kvm_vcpu *vcpu); +void kvm_apic_nmi_wd_deliver(struct kvm_vcpu *vcpu); void __kvm_migrate_apic_timer(struct kvm_vcpu *vcpu); void __kvm_migrate_pit_timer(struct kvm_vcpu *vcpu); void __kvm_migrate_timers(struct kvm_vcpu *vcpu); diff --git a/arch/x86/kvm/kvm_svm.h b/arch/x86/kvm/kvm_svm.h index 65ef0fc2c03..8e5ee99551f 100644 --- a/arch/x86/kvm/kvm_svm.h +++ b/arch/x86/kvm/kvm_svm.h @@ -7,7 +7,7 @@ #include <linux/kvm_host.h> #include <asm/msr.h> -#include "svm.h" +#include <asm/svm.h> static const u32 host_save_user_msrs[] = { #ifdef CONFIG_X86_64 diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c index 0fc3cab4894..afac68c0815 100644 --- a/arch/x86/kvm/lapic.c +++ b/arch/x86/kvm/lapic.c @@ -130,6 +130,11 @@ static inline int apic_lvtt_period(struct kvm_lapic *apic) return apic_get_reg(apic, APIC_LVTT) & APIC_LVT_TIMER_PERIODIC; } +static inline int apic_lvt_nmi_mode(u32 lvt_val) +{ + return (lvt_val & (APIC_MODE_MASK | APIC_LVT_MASKED)) == APIC_DM_NMI; +} + static unsigned int apic_lvt_mask[APIC_LVT_NUM] = { LVT_MASK | APIC_LVT_TIMER_PERIODIC, /* LVTT */ LVT_MASK | APIC_MODE_MASK, /* LVTTHMR */ @@ -354,6 +359,7 @@ static int __apic_accept_irq(struct kvm_lapic *apic, int delivery_mode, case APIC_DM_NMI: kvm_inject_nmi(vcpu); + kvm_vcpu_kick(vcpu); break; case APIC_DM_INIT: @@ -380,6 +386,14 @@ static int __apic_accept_irq(struct kvm_lapic *apic, int delivery_mode, } break; + case APIC_DM_EXTINT: + /* + * Should only be called by kvm_apic_local_deliver() with LVT0, + * before NMI watchdog was enabled. Already handled by + * kvm_apic_accept_pic_intr(). + */ + break; + default: printk(KERN_ERR "TODO: unsupported delivery mode %x\n", delivery_mode); @@ -663,6 +677,20 @@ static void start_apic_timer(struct kvm_lapic *apic) apic->timer.period))); } +static void apic_manage_nmi_watchdog(struct kvm_lapic *apic, u32 lvt0_val) +{ + int nmi_wd_enabled = apic_lvt_nmi_mode(apic_get_reg(apic, APIC_LVT0)); + + if (apic_lvt_nmi_mode(lvt0_val)) { + if (!nmi_wd_enabled) { + apic_debug("Receive NMI setting on APIC_LVT0 " + "for cpu %d\n", apic->vcpu->vcpu_id); + apic->vcpu->kvm->arch.vapics_in_nmi_mode++; + } + } else if (nmi_wd_enabled) + apic->vcpu->kvm->arch.vapics_in_nmi_mode--; +} + static void apic_mmio_write(struct kvm_io_device *this, gpa_t address, int len, const void *data) { @@ -743,10 +771,11 @@ static void apic_mmio_write(struct kvm_io_device *this, apic_set_reg(apic, APIC_ICR2, val & 0xff000000); break; + case APIC_LVT0: + apic_manage_nmi_watchdog(apic, val); case APIC_LVTT: case APIC_LVTTHMR: case APIC_LVTPC: - case APIC_LVT0: case APIC_LVT1: case APIC_LVTERR: /* TODO: Check vector */ @@ -961,12 +990,26 @@ int apic_has_pending_timer(struct kvm_vcpu *vcpu) return 0; } -static int __inject_apic_timer_irq(struct kvm_lapic *apic) +static int kvm_apic_local_deliver(struct kvm_lapic *apic, int lvt_type) +{ + u32 reg = apic_get_reg(apic, lvt_type); + int vector, mode, trig_mode; + + if (apic_hw_enabled(apic) && !(reg & APIC_LVT_MASKED)) { + vector = reg & APIC_VECTOR_MASK; + mode = reg & APIC_MODE_MASK; + trig_mode = reg & APIC_LVT_LEVEL_TRIGGER; + return __apic_accept_irq(apic, mode, vector, 1, trig_mode); + } + return 0; +} + +void kvm_apic_nmi_wd_deliver(struct kvm_vcpu *vcpu) { - int vector; + struct kvm_lapic *apic = vcpu->arch.apic; - vector = apic_lvt_vector(apic, APIC_LVTT); - return __apic_accept_irq(apic, APIC_DM_FIXED, vector, 1, 0); + if (apic) + kvm_apic_local_deliver(apic, APIC_LVT0); } static enum hrtimer_restart apic_timer_fn(struct hrtimer *data) @@ -1061,9 +1104,8 @@ void kvm_inject_apic_timer_irqs(struct kvm_vcpu *vcpu) { struct kvm_lapic *apic = vcpu->arch.apic; - if (apic && apic_lvt_enabled(apic, APIC_LVTT) && - atomic_read(&apic->timer.pending) > 0) { - if (__inject_apic_timer_irq(apic)) + if (apic && atomic_read(&apic->timer.pending) > 0) { + if (kvm_apic_local_deliver(apic, APIC_LVTT)) atomic_dec(&apic->timer.pending); } } diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index 410ddbc1aa2..83f11c7474a 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c @@ -17,7 +17,6 @@ * */ -#include "vmx.h" #include "mmu.h" #include <linux/kvm_host.h> @@ -33,6 +32,7 @@ #include <asm/page.h> #include <asm/cmpxchg.h> #include <asm/io.h> +#include <asm/vmx.h> /* * When setting this variable to true it enables Two-Dimensional-Paging @@ -168,6 +168,7 @@ static u64 __read_mostly shadow_x_mask; /* mutual exclusive with nx_mask */ static u64 __read_mostly shadow_user_mask; static u64 __read_mostly shadow_accessed_mask; static u64 __read_mostly shadow_dirty_mask; +static u64 __read_mostly shadow_mt_mask; void kvm_mmu_set_nonpresent_ptes(u64 trap_pte, u64 notrap_pte) { @@ -183,13 +184,14 @@ void kvm_mmu_set_base_ptes(u64 base_pte) EXPORT_SYMBOL_GPL(kvm_mmu_set_base_ptes); void kvm_mmu_set_mask_ptes(u64 user_mask, u64 accessed_mask, - u64 dirty_mask, u64 nx_mask, u64 x_mask) + u64 dirty_mask, u64 nx_mask, u64 x_mask, u64 mt_mask) { shadow_user_mask = user_mask; shadow_accessed_mask = accessed_mask; shadow_dirty_mask = dirty_mask; shadow_nx_mask = nx_mask; shadow_x_mask = x_mask; + shadow_mt_mask = mt_mask; } EXPORT_SYMBOL_GPL(kvm_mmu_set_mask_ptes); @@ -384,7 +386,9 @@ static void account_shadowed(struct kvm *kvm, gfn_t gfn) { int *write_count; - write_count = slot_largepage_idx(gfn, gfn_to_memslot(kvm, gfn)); + gfn = unalias_gfn(kvm, gfn); + write_count = slot_largepage_idx(gfn, + gfn_to_memslot_unaliased(kvm, gfn)); *write_count += 1; } @@ -392,16 +396,20 @@ static void unaccount_shadowed(struct kvm *kvm, gfn_t gfn) { int *write_count; - write_count = slot_largepage_idx(gfn, gfn_to_memslot(kvm, gfn)); + gfn = unalias_gfn(kvm, gfn); + write_count = slot_largepage_idx(gfn, + gfn_to_memslot_unaliased(kvm, gfn)); *write_count -= 1; WARN_ON(*write_count < 0); } static int has_wrprotected_page(struct kvm *kvm, gfn_t gfn) { - struct kvm_memory_slot *slot = gfn_to_memslot(kvm, gfn); + struct kvm_memory_slot *slot; int *largepage_idx; + gfn = unalias_gfn(kvm, gfn); + slot = gfn_to_memslot_unaliased(kvm, gfn); if (slot) { largepage_idx = slot_largepage_idx(gfn, slot); return *largepage_idx; @@ -613,7 +621,7 @@ static u64 *rmap_next(struct kvm *kvm, unsigned long *rmapp, u64 *spte) return NULL; } -static void rmap_write_protect(struct kvm *kvm, u64 gfn) +static int rmap_write_protect(struct kvm *kvm, u64 gfn) { unsigned long *rmapp; u64 *spte; @@ -659,8 +667,7 @@ static void rmap_write_protect(struct kvm *kvm, u64 gfn) spte = rmap_next(kvm, rmapp, spte); } - if (write_protected) - kvm_flush_remote_tlbs(kvm); + return write_protected; } static int kvm_unmap_rmapp(struct kvm *kvm, unsigned long *rmapp) @@ -786,9 +793,11 @@ static struct kvm_mmu_page *kvm_mmu_alloc_page(struct kvm_vcpu *vcpu, sp->gfns = mmu_memory_cache_alloc(&vcpu->arch.mmu_page_cache, PAGE_SIZE); set_page_private(virt_to_page(sp->spt), (unsigned long)sp); list_add(&sp->link, &vcpu->kvm->arch.active_mmu_pages); + INIT_LIST_HEAD(&sp->oos_link); ASSERT(is_empty_shadow_page(sp->spt)); - sp->slot_bitmap = 0; + bitmap_zero(sp->slot_bitmap, KVM_MEMORY_SLOTS + KVM_PRIVATE_MEM_SLOTS); sp->multimapped = 0; + sp->global = 1; sp->parent_pte = parent_pte; --vcpu->kvm->arch.n_free_mmu_pages; return sp; @@ -900,8 +909,9 @@ static void kvm_mmu_update_unsync_bitmap(u64 *spte) struct kvm_mmu_page *sp = page_header(__pa(spte)); index = spte - sp->spt; - __set_bit(index, sp->unsync_child_bitmap); - sp->unsync_children = 1; + if (!__test_and_set_bit(index, sp->unsync_child_bitmap)) + sp->unsync_children++; + WARN_ON(!sp->unsync_children); } static void kvm_mmu_update_parents_unsync(struct kvm_mmu_page *sp) @@ -928,7 +938,6 @@ static void kvm_mmu_update_parents_unsync(struct kvm_mmu_page *sp) static int unsync_walk_fn(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp) { - sp->unsync_children = 1; kvm_mmu_update_parents_unsync(sp); return 1; } @@ -959,38 +968,66 @@ static void nonpaging_invlpg(struct kvm_vcpu *vcpu, gva_t gva) { } +#define KVM_PAGE_ARRAY_NR 16 + +struct kvm_mmu_pages { + struct mmu_page_and_offset { + struct kvm_mmu_page *sp; + unsigned int idx; + } page[KVM_PAGE_ARRAY_NR]; + unsigned int nr; +}; + #define for_each_unsync_children(bitmap, idx) \ for (idx = find_first_bit(bitmap, 512); \ idx < 512; \ idx = find_next_bit(bitmap, 512, idx+1)) -static int mmu_unsync_walk(struct kvm_mmu_page *sp, - struct kvm_unsync_walk *walker) +int mmu_pages_add(struct kvm_mmu_pages *pvec, struct kvm_mmu_page *sp, + int idx) { - int i, ret; + int i; - if (!sp->unsync_children) - return 0; + if (sp->unsync) + for (i=0; i < pvec->nr; i++) + if (pvec->page[i].sp == sp) + return 0; + + pvec->page[pvec->nr].sp = sp; + pvec->page[pvec->nr].idx = idx; + pvec->nr++; + return (pvec->nr == KVM_PAGE_ARRAY_NR); +} + +static int __mmu_unsync_walk(struct kvm_mmu_page *sp, + struct kvm_mmu_pages *pvec) +{ + int i, ret, nr_unsync_leaf = 0; for_each_unsync_children(sp->unsync_child_bitmap, i) { u64 ent = sp->spt[i]; - if (is_shadow_present_pte(ent)) { + if (is_shadow_present_pte(ent) && !is_large_pte(ent)) { struct kvm_mmu_page *child; child = page_header(ent & PT64_BASE_ADDR_MASK); if (child->unsync_children) { - ret = mmu_unsync_walk(child, walker); - if (ret) + if (mmu_pages_add(pvec, child, i)) + return -ENOSPC; + + ret = __mmu_unsync_walk(child, pvec); + if (!ret) + __clear_bit(i, sp->unsync_child_bitmap); + else if (ret > 0) + nr_unsync_leaf += ret; + else return ret; - __clear_bit(i, sp->unsync_child_bitmap); } if (child->unsync) { - ret = walker->entry(child, walker); - __clear_bit(i, sp->unsync_child_bitmap); - if (ret) - return ret; + nr_unsync_leaf++; + if (mmu_pages_add(pvec, child, i)) + return -ENOSPC; } } } @@ -998,7 +1035,17 @@ static int mmu_unsync_walk(struct kvm_mmu_page *sp, if (find_first_bit(sp->unsync_child_bitmap, 512) == 512) sp->unsync_children = 0; - return 0; + return nr_unsync_leaf; +} + +static int mmu_unsync_walk(struct kvm_mmu_page *sp, + struct kvm_mmu_pages *pvec) +{ + if (!sp->unsync_children) + return 0; + + mmu_pages_add(pvec, sp, 0); + return __mmu_unsync_walk(sp, pvec); } static struct kvm_mmu_page *kvm_mmu_lookup_page(struct kvm *kvm, gfn_t gfn) @@ -1021,10 +1068,18 @@ static struct kvm_mmu_page *kvm_mmu_lookup_page(struct kvm *kvm, gfn_t gfn) return NULL; } +static void kvm_unlink_unsync_global(struct kvm *kvm, struct kvm_mmu_page *sp) +{ + list_del(&sp->oos_link); + --kvm->stat.mmu_unsync_global; +} + static void kvm_unlink_unsync_page(struct kvm *kvm, struct kvm_mmu_page *sp) { WARN_ON(!sp->unsync); sp->unsync = 0; + if (sp->global) + kvm_unlink_unsync_global(kvm, sp); --kvm->stat.mmu_unsync; } @@ -1037,7 +1092,8 @@ static int kvm_sync_page(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp) return 1; } - rmap_write_protect(vcpu->kvm, sp->gfn); + if (rmap_write_protect(vcpu->kvm, sp->gfn)) + kvm_flush_remote_tlbs(vcpu->kvm); kvm_unlink_unsync_page(vcpu->kvm, sp); if (vcpu->arch.mmu.sync_page(vcpu, sp)) { kvm_mmu_zap_page(vcpu->kvm, sp); @@ -1048,30 +1104,89 @@ static int kvm_sync_page(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp) return 0; } -struct sync_walker { - struct kvm_vcpu *vcpu; - struct kvm_unsync_walk walker; +struct mmu_page_path { + struct kvm_mmu_page *parent[PT64_ROOT_LEVEL-1]; + unsigned int idx[PT64_ROOT_LEVEL-1]; }; -static int mmu_sync_fn(struct kvm_mmu_page *sp, struct kvm_unsync_walk *walk) +#define for_each_sp(pvec, sp, parents, i) \ + for (i = mmu_pages_next(&pvec, &parents, -1), \ + sp = pvec.page[i].sp; \ + i < pvec.nr && ({ sp = pvec.page[i].sp; 1;}); \ + i = mmu_pages_next(&pvec, &parents, i)) + +int mmu_pages_next(struct kvm_mmu_pages *pvec, struct mmu_page_path *parents, + int i) { - struct sync_walker *sync_walk = container_of(walk, struct sync_walker, - walker); - struct kvm_vcpu *vcpu = sync_walk->vcpu; + int n; - kvm_sync_page(vcpu, sp); - return (need_resched() || spin_needbreak(&vcpu->kvm->mmu_lock)); + for (n = i+1; n < pvec->nr; n++) { + struct kvm_mmu_page *sp = pvec->page[n].sp; + + if (sp->role.level == PT_PAGE_TABLE_LEVEL) { + parents->idx[0] = pvec->page[n].idx; + return n; + } + + parents->parent[sp->role.level-2] = sp; + parents->idx[sp->role.level-1] = pvec->page[n].idx; + } + + return n; } -static void mmu_sync_children(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp) +void mmu_pages_clear_parents(struct mmu_page_path *parents) { - struct sync_walker walker = { - .walker = { .entry = mmu_sync_fn, }, - .vcpu = vcpu, - }; + struct kvm_mmu_page *sp; + unsigned int level = 0; + + do { + unsigned int idx = parents->idx[level]; + + sp = parents->parent[level]; + if (!sp) + return; + + --sp->unsync_children; + WARN_ON((int)sp->unsync_children < 0); + __clear_bit(idx, sp->unsync_child_bitmap); + level++; + } while (level < PT64_ROOT_LEVEL-1 && !sp->unsync_children); +} + +static void kvm_mmu_pages_init(struct kvm_mmu_page *parent, + struct mmu_page_path *parents, + struct kvm_mmu_pages *pvec) +{ + parents->parent[parent->role.level-1] = NULL; + pvec->nr = 0; +} + +static void mmu_sync_children(struct kvm_vcpu *vcpu, + struct kvm_mmu_page *parent) +{ + int i; + struct kvm_mmu_page *sp; + struct mmu_page_path parents; + struct kvm_mmu_pages pages; + + kvm_mmu_pages_init(parent, &parents, &pages); + while (mmu_unsync_walk(parent, &pages)) { + int protected = 0; - while (mmu_unsync_walk(sp, &walker.walker)) + for_each_sp(pages, sp, parents, i) + protected |= rmap_write_protect(vcpu->kvm, sp->gfn); + + if (protected) + kvm_flush_remote_tlbs(vcpu->kvm); + + for_each_sp(pages, sp, parents, i) { + kvm_sync_page(vcpu, sp); + mmu_pages_clear_parents(&parents); + } cond_resched_lock(&vcpu->kvm->mmu_lock); + kvm_mmu_pages_init(parent, &parents, &pages); + } } static struct kvm_mmu_page *kvm_mmu_get_page(struct kvm_vcpu *vcpu, @@ -1129,7 +1244,8 @@ static struct kvm_mmu_page *kvm_mmu_get_page(struct kvm_vcpu *vcpu, sp->role = role; hlist_add_head(&sp->hash_link, bucket); if (!metaphysical) { - rmap_write_protect(vcpu->kvm, gfn); + if (rmap_write_protect(vcpu->kvm, gfn)) + kvm_flush_remote_tlbs(vcpu->kvm); account_shadowed(vcpu->kvm, gfn); } if (shadow_trap_nonpresent_pte != shadow_notrap_nonpresent_pte) @@ -1153,6 +1269,8 @@ static int walk_shadow(struct kvm_shadow_walk *walker, if (level == PT32E_ROOT_LEVEL) { shadow_addr = vcpu->arch.mmu.pae_root[(addr >> 30) & 3]; shadow_addr &= PT64_BASE_ADDR_MASK; + if (!shadow_addr) + return 1; --level; } @@ -1237,33 +1355,29 @@ static void kvm_mmu_unlink_parents(struct kvm *kvm, struct kvm_mmu_page *sp) } } -struct zap_walker { - struct kvm_unsync_walk walker; - struct kvm *kvm; - int zapped; -}; - -static int mmu_zap_fn(struct kvm_mmu_page *sp, struct kvm_unsync_walk *walk) +static int mmu_zap_unsync_children(struct kvm *kvm, + struct kvm_mmu_page *parent) { - struct zap_walker *zap_walk = container_of(walk, struct zap_walker, - walker); - kvm_mmu_zap_page(zap_walk->kvm, sp); - zap_walk->zapped = 1; - return 0; -} + int i, zapped = 0; + struct mmu_page_path parents; + struct kvm_mmu_pages pages; -static int mmu_zap_unsync_children(struct kvm *kvm, struct kvm_mmu_page *sp) -{ - struct zap_walker walker = { - .walker = { .entry = mmu_zap_fn, }, - .kvm = kvm, - .zapped = 0, - }; - - if (sp->role.level == PT_PAGE_TABLE_LEVEL) + if (parent->role.level == PT_PAGE_TABLE_LEVEL) return 0; - mmu_unsync_walk(sp, &walker.walker); - return walker.zapped; + + kvm_mmu_pages_init(parent, &parents, &pages); + while (mmu_unsync_walk(parent, &pages)) { + struct kvm_mmu_page *sp; + + for_each_sp(pages, sp, parents, i) { + kvm_mmu_zap_page(kvm, sp); + mmu_pages_clear_parents(&parents); + } + zapped += pages.nr; + kvm_mmu_pages_init(parent, &parents, &pages); + } + + return zapped; } static int kvm_mmu_zap_page(struct kvm *kvm, struct kvm_mmu_page *sp) @@ -1362,7 +1476,7 @@ static void page_header_update_slot(struct kvm *kvm, void *pte, gfn_t gfn) int slot = memslot_id(kvm, gfn_to_memslot(kvm, gfn)); struct kvm_mmu_page *sp = page_header(__pa(pte)); - __set_bit(slot, &sp->slot_bitmap); + __set_bit(slot, sp->slot_bitmap); } static void mmu_convert_notrap(struct kvm_mmu_page *sp) @@ -1393,6 +1507,110 @@ struct page *gva_to_page(struct kvm_vcpu *vcpu, gva_t gva) return page; } +/* + * The function is based on mtrr_type_lookup() in + * arch/x86/kernel/cpu/mtrr/generic.c + */ +static int get_mtrr_type(struct mtrr_state_type *mtrr_state, + u64 start, u64 end) +{ + int i; + u64 base, mask; + u8 prev_match, curr_match; + int num_var_ranges = KVM_NR_VAR_MTRR; + + if (!mtrr_state->enabled) + return 0xFF; + + /* Make end inclusive end, instead of exclusive */ + end--; + + /* Look in fixed ranges. Just return the type as per start */ + if (mtrr_state->have_fixed && (start < 0x100000)) { + int idx; + + if (start < 0x80000) { + idx = 0; + idx += (start >> 16); + return mtrr_state->fixed_ranges[idx]; + } else if (start < 0xC0000) { + idx = 1 * 8; + idx += ((start - 0x80000) >> 14); + return mtrr_state->fixed_ranges[idx]; + } else if (start < 0x1000000) { + idx = 3 * 8; + idx += ((start - 0xC0000) >> 12); + return mtrr_state->fixed_ranges[idx]; + } + } + + /* + * Look in variable ranges + * Look of multiple ranges matching this address and pick type + * as per MTRR precedence + */ + if (!(mtrr_state->enabled & 2)) + return mtrr_state->def_type; + + prev_match = 0xFF; + for (i = 0; i < num_var_ranges; ++i) { + unsigned short start_state, end_state; + + if (!(mtrr_state->var_ranges[i].mask_lo & (1 << 11))) + continue; + + base = (((u64)mtrr_state->var_ranges[i].base_hi) << 32) + + (mtrr_state->var_ranges[i].base_lo & PAGE_MASK); + mask = (((u64)mtrr_state->var_ranges[i].mask_hi) << 32) + + (mtrr_state->var_ranges[i].mask_lo & PAGE_MASK); + + start_state = ((start & mask) == (base & mask)); + end_state = ((end & mask) == (base & mask)); + if (start_state != end_state) + return 0xFE; + + if ((start & mask) != (base & mask)) + continue; + + curr_match = mtrr_state->var_ranges[i].base_lo & 0xff; + if (prev_match == 0xFF) { + prev_match = curr_match; + continue; + } + + if (prev_match == MTRR_TYPE_UNCACHABLE || + curr_match == MTRR_TYPE_UNCACHABLE) + return MTRR_TYPE_UNCACHABLE; + + if ((prev_match == MTRR_TYPE_WRBACK && + curr_match == MTRR_TYPE_WRTHROUGH) || + (prev_match == MTRR_TYPE_WRTHROUGH && + curr_match == MTRR_TYPE_WRBACK)) { + prev_match = MTRR_TYPE_WRTHROUGH; + curr_match = MTRR_TYPE_WRTHROUGH; + } + + if (prev_match != curr_match) + return MTRR_TYPE_UNCACHABLE; + } + + if (prev_match != 0xFF) + return prev_match; + + return mtrr_state->def_type; +} + +static u8 get_memory_type(struct kvm_vcpu *vcpu, gfn_t gfn) +{ + u8 mtrr; + + mtrr = get_mtrr_type(&vcpu->arch.mtrr_state, gfn << PAGE_SHIFT, + (gfn << PAGE_SHIFT) + PAGE_SIZE); + if (mtrr == 0xfe || mtrr == 0xff) + mtrr = MTRR_TYPE_WRBACK; + return mtrr; +} + static int kvm_unsync_page(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp) { unsigned index; @@ -1409,9 +1627,15 @@ static int kvm_unsync_page(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp) if (s->role.word != sp->role.word) return 1; } - kvm_mmu_mark_parents_unsync(vcpu, sp); ++vcpu->kvm->stat.mmu_unsync; sp->unsync = 1; + + if (sp->global) { + list_add(&sp->oos_link, &vcpu->kvm->arch.oos_global_pages); + ++vcpu->kvm->stat.mmu_unsync_global; + } else + kvm_mmu_mark_parents_unsync(vcpu, sp); + mmu_convert_notrap(sp); return 0; } @@ -1437,11 +1661,24 @@ static int mmu_need_write_protect(struct kvm_vcpu *vcpu, gfn_t gfn, static int set_spte(struct kvm_vcpu *vcpu, u64 *shadow_pte, unsigned pte_access, int user_fault, int write_fault, int dirty, int largepage, - gfn_t gfn, pfn_t pfn, bool speculative, + int global, gfn_t gfn, pfn_t pfn, bool speculative, bool can_unsync) { u64 spte; int ret = 0; + u64 mt_mask = shadow_mt_mask; + struct kvm_mmu_page *sp = page_header(__pa(shadow_pte)); + + if (!(vcpu->arch.cr4 & X86_CR4_PGE)) + global = 0; + if (!global && sp->global) { + sp->global = 0; + if (sp->unsync) { + kvm_unlink_unsync_global(vcpu->kvm, sp); + kvm_mmu_mark_parents_unsync(vcpu, sp); + } + } + /* * We don't set the accessed bit, since we sometimes want to see * whether the guest actually used the pte (in order to detect @@ -1460,6 +1697,11 @@ static int set_spte(struct kvm_vcpu *vcpu, u64 *shadow_pte, spte |= shadow_user_mask; if (largepage) spte |= PT_PAGE_SIZE_MASK; + if (mt_mask) { + mt_mask = get_memory_type(vcpu, gfn) << + kvm_x86_ops->get_mt_mask_shift(); + spte |= mt_mask; + } spte |= (u64)pfn << PAGE_SHIFT; @@ -1474,6 +1716,15 @@ static int set_spte(struct kvm_vcpu *vcpu, u64 *shadow_pte, spte |= PT_WRITABLE_MASK; + /* + * Optimization: for pte sync, if spte was writable the hash + * lookup is unnecessary (and expensive). Write protection + * is responsibility of mmu_get_page / kvm_sync_page. + * Same reasoning can be applied to dirty page accounting. + */ + if (!can_unsync && is_writeble_pte(*shadow_pte)) + goto set_pte; + if (mmu_need_write_protect(vcpu, gfn, can_unsync)) { pgprintk("%s: found shadow page for %lx, marking ro\n", __func__, gfn); @@ -1495,8 +1746,8 @@ set_pte: static void mmu_set_spte(struct kvm_vcpu *vcpu, u64 *shadow_pte, unsigned pt_access, unsigned pte_access, int user_fault, int write_fault, int dirty, - int *ptwrite, int largepage, gfn_t gfn, - pfn_t pfn, bool speculative) + int *ptwrite, int largepage, int global, + gfn_t gfn, pfn_t pfn, bool speculative) { int was_rmapped = 0; int was_writeble = is_writeble_pte(*shadow_pte); @@ -1529,7 +1780,7 @@ static void mmu_set_spte(struct kvm_vcpu *vcpu, u64 *shadow_pte, } } if (set_spte(vcpu, shadow_pte, pte_access, user_fault, write_fault, - dirty, largepage, gfn, pfn, speculative, true)) { + dirty, largepage, global, gfn, pfn, speculative, true)) { if (write_fault) *ptwrite = 1; kvm_x86_ops->tlb_flush(vcpu); @@ -1586,7 +1837,7 @@ static int direct_map_entry(struct kvm_shadow_walk *_walk, || (walk->largepage && level == PT_DIRECTORY_LEVEL)) { mmu_set_spte(vcpu, sptep, ACC_ALL, ACC_ALL, 0, walk->write, 1, &walk->pt_write, - walk->largepage, gfn, walk->pfn, false); + walk->largepage, 0, gfn, walk->pfn, false); ++vcpu->stat.pf_fixed; return 1; } @@ -1773,6 +2024,15 @@ static void mmu_sync_roots(struct kvm_vcpu *vcpu) } } +static void mmu_sync_global(struct kvm_vcpu *vcpu) +{ + struct kvm *kvm = vcpu->kvm; + struct kvm_mmu_page *sp, *n; + + list_for_each_entry_safe(sp, n, &kvm->arch.oos_global_pages, oos_link) + kvm_sync_page(vcpu, sp); +} + void kvm_mmu_sync_roots(struct kvm_vcpu *vcpu) { spin_lock(&vcpu->kvm->mmu_lock); @@ -1780,6 +2040,13 @@ void kvm_mmu_sync_roots(struct kvm_vcpu *vcpu) spin_unlock(&vcpu->kvm->mmu_lock); } +void kvm_mmu_sync_global(struct kvm_vcpu *vcpu) +{ + spin_lock(&vcpu->kvm->mmu_lock); + mmu_sync_global(vcpu); + spin_unlock(&vcpu->kvm->mmu_lock); +} + static gpa_t nonpaging_gva_to_gpa(struct kvm_vcpu *vcpu, gva_t vaddr) { return vaddr; @@ -2178,7 +2445,8 @@ static void kvm_mmu_access_page(struct kvm_vcpu *vcpu, gfn_t gfn) } void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa, - const u8 *new, int bytes) + const u8 *new, int bytes, + bool guest_initiated) { gfn_t gfn = gpa >> PAGE_SHIFT; struct kvm_mmu_page *sp; @@ -2204,15 +2472,17 @@ void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa, kvm_mmu_free_some_pages(vcpu); ++vcpu->kvm->stat.mmu_pte_write; kvm_mmu_audit(vcpu, "pre pte write"); - if (gfn == vcpu->arch.last_pt_write_gfn - && !last_updated_pte_accessed(vcpu)) { - ++vcpu->arch.last_pt_write_count; - if (vcpu->arch.last_pt_write_count >= 3) - flooded = 1; - } else { - vcpu->arch.last_pt_write_gfn = gfn; - vcpu->arch.last_pt_write_count = 1; - vcpu->arch.last_pte_updated = NULL; + if (guest_initiated) { + if (gfn == vcpu->arch.last_pt_write_gfn + && !last_updated_pte_accessed(vcpu)) { + ++vcpu->arch.last_pt_write_count; + if (vcpu->arch.last_pt_write_count >= 3) + flooded = 1; + } else { + vcpu->arch.last_pt_write_gfn = gfn; + vcpu->arch.last_pt_write_count = 1; + vcpu->arch.last_pte_updated = NULL; + } } index = kvm_page_table_hashfn(gfn); bucket = &vcpu->kvm->arch.mmu_page_hash[index]; @@ -2352,9 +2622,7 @@ EXPORT_SYMBOL_GPL(kvm_mmu_page_fault); void kvm_mmu_invlpg(struct kvm_vcpu *vcpu, gva_t gva) { - spin_lock(&vcpu->kvm->mmu_lock); vcpu->arch.mmu.invlpg(vcpu, gva); - spin_unlock(&vcpu->kvm->mmu_lock); kvm_mmu_flush_tlb(vcpu); ++vcpu->stat.invlpg; } @@ -2451,7 +2719,7 @@ void kvm_mmu_slot_remove_write_access(struct kvm *kvm, int slot) int i; u64 *pt; - if (!test_bit(slot, &sp->slot_bitmap)) + if (!test_bit(slot, sp->slot_bitmap)) continue; pt = sp->spt; @@ -2860,8 +3128,8 @@ static void audit_write_protection(struct kvm_vcpu *vcpu) if (sp->role.metaphysical) continue; - slot = gfn_to_memslot(vcpu->kvm, sp->gfn); gfn = unalias_gfn(vcpu->kvm, sp->gfn); + slot = gfn_to_memslot_unaliased(vcpu->kvm, sp->gfn); rmapp = &slot->rmap[gfn - slot->base_gfn]; if (*rmapp) printk(KERN_ERR "%s: (%s) shadow page has writable" diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h index 84eee43bbe7..9fd78b6e17a 100644 --- a/arch/x86/kvm/paging_tmpl.h +++ b/arch/x86/kvm/paging_tmpl.h @@ -82,6 +82,7 @@ struct shadow_walker { int *ptwrite; pfn_t pfn; u64 *sptep; + gpa_t pte_gpa; }; static gfn_t gpte_to_gfn(pt_element_t gpte) @@ -222,7 +223,7 @@ walk: if (ret) goto walk; pte |= PT_DIRTY_MASK; - kvm_mmu_pte_write(vcpu, pte_gpa, (u8 *)&pte, sizeof(pte)); + kvm_mmu_pte_write(vcpu, pte_gpa, (u8 *)&pte, sizeof(pte), 0); walker->ptes[walker->level - 1] = pte; } @@ -274,7 +275,8 @@ static void FNAME(update_pte)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *page, return; kvm_get_pfn(pfn); mmu_set_spte(vcpu, spte, page->role.access, pte_access, 0, 0, - gpte & PT_DIRTY_MASK, NULL, largepage, gpte_to_gfn(gpte), + gpte & PT_DIRTY_MASK, NULL, largepage, + gpte & PT_GLOBAL_MASK, gpte_to_gfn(gpte), pfn, true); } @@ -301,8 +303,9 @@ static int FNAME(shadow_walk_entry)(struct kvm_shadow_walk *_sw, mmu_set_spte(vcpu, sptep, access, gw->pte_access & access, sw->user_fault, sw->write_fault, gw->ptes[gw->level-1] & PT_DIRTY_MASK, - sw->ptwrite, sw->largepage, gw->gfn, sw->pfn, - false); + sw->ptwrite, sw->largepage, + gw->ptes[gw->level-1] & PT_GLOBAL_MASK, + gw->gfn, sw->pfn, false); sw->sptep = sptep; return 1; } @@ -466,10 +469,22 @@ static int FNAME(shadow_invlpg_entry)(struct kvm_shadow_walk *_sw, struct kvm_vcpu *vcpu, u64 addr, u64 *sptep, int level) { + struct shadow_walker *sw = + container_of(_sw, struct shadow_walker, walker); - if (level == PT_PAGE_TABLE_LEVEL) { - if (is_shadow_present_pte(*sptep)) + /* FIXME: properly handle invlpg on large guest pages */ + if (level == PT_PAGE_TABLE_LEVEL || + ((level == PT_DIRECTORY_LEVEL) && is_large_pte(*sptep))) { + struct kvm_mmu_page *sp = page_header(__pa(sptep)); + + sw->pte_gpa = (sp->gfn << PAGE_SHIFT); + sw->pte_gpa += (sptep - sp->spt) * sizeof(pt_element_t); + + if (is_shadow_present_pte(*sptep)) { rmap_remove(vcpu->kvm, sptep); + if (is_large_pte(*sptep)) + --vcpu->kvm->stat.lpages; + } set_shadow_pte(sptep, shadow_trap_nonpresent_pte); return 1; } @@ -480,11 +495,26 @@ static int FNAME(shadow_invlpg_entry)(struct kvm_shadow_walk *_sw, static void FNAME(invlpg)(struct kvm_vcpu *vcpu, gva_t gva) { + pt_element_t gpte; struct shadow_walker walker = { .walker = { .entry = FNAME(shadow_invlpg_entry), }, + .pte_gpa = -1, }; + spin_lock(&vcpu->kvm->mmu_lock); walk_shadow(&walker.walker, vcpu, gva); + spin_unlock(&vcpu->kvm->mmu_lock); + if (walker.pte_gpa == -1) + return; + if (kvm_read_guest_atomic(vcpu->kvm, walker.pte_gpa, &gpte, + sizeof(pt_element_t))) + return; + if (is_present_pte(gpte) && (gpte & PT_ACCESSED_MASK)) { + if (mmu_topup_memory_caches(vcpu)) + return; + kvm_mmu_pte_write(vcpu, walker.pte_gpa, (const u8 *)&gpte, + sizeof(pt_element_t), 0); + } } static gpa_t FNAME(gva_to_gpa)(struct kvm_vcpu *vcpu, gva_t vaddr) @@ -580,7 +610,7 @@ static int FNAME(sync_page)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp) nr_present++; pte_access = sp->role.access & FNAME(gpte_access)(vcpu, gpte); set_spte(vcpu, &sp->spt[i], pte_access, 0, 0, - is_dirty_pte(gpte), 0, gfn, + is_dirty_pte(gpte), 0, gpte & PT_GLOBAL_MASK, gfn, spte_to_pfn(sp->spt[i]), true, false); } diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index 9c4ce657d96..1452851ae25 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -28,6 +28,8 @@ #include <asm/desc.h> +#include <asm/virtext.h> + #define __ex(x) __kvm_handle_fault_on_reboot(x) MODULE_AUTHOR("Qumranet"); @@ -245,34 +247,19 @@ static void skip_emulated_instruction(struct kvm_vcpu *vcpu) static int has_svm(void) { - uint32_t eax, ebx, ecx, edx; - - if (boot_cpu_data.x86_vendor != X86_VENDOR_AMD) { - printk(KERN_INFO "has_svm: not amd\n"); - return 0; - } + const char *msg; - cpuid(0x80000000, &eax, &ebx, &ecx, &edx); - if (eax < SVM_CPUID_FUNC) { - printk(KERN_INFO "has_svm: can't execute cpuid_8000000a\n"); + if (!cpu_has_svm(&msg)) { + printk(KERN_INFO "has_svn: %s\n", msg); return 0; } - cpuid(0x80000001, &eax, &ebx, &ecx, &edx); - if (!(ecx & (1 << SVM_CPUID_FEATURE_SHIFT))) { - printk(KERN_DEBUG "has_svm: svm not available\n"); - return 0; - } return 1; } static void svm_hardware_disable(void *garbage) { - uint64_t efer; - - wrmsrl(MSR_VM_HSAVE_PA, 0); - rdmsrl(MSR_EFER, efer); - wrmsrl(MSR_EFER, efer & ~MSR_EFER_SVME_MASK); + cpu_svm_disable(); } static void svm_hardware_enable(void *garbage) @@ -772,6 +759,22 @@ static void svm_get_segment(struct kvm_vcpu *vcpu, var->l = (s->attrib >> SVM_SELECTOR_L_SHIFT) & 1; var->db = (s->attrib >> SVM_SELECTOR_DB_SHIFT) & 1; var->g = (s->attrib >> SVM_SELECTOR_G_SHIFT) & 1; + + /* + * SVM always stores 0 for the 'G' bit in the CS selector in + * the VMCB on a VMEXIT. This hurts cross-vendor migration: + * Intel's VMENTRY has a check on the 'G' bit. + */ + if (seg == VCPU_SREG_CS) + var->g = s->limit > 0xfffff; + + /* + * Work around a bug where the busy flag in the tr selector + * isn't exposed + */ + if (seg == VCPU_SREG_TR) + var->type |= 0x2; + var->unusable = !var->present; } @@ -1099,6 +1102,7 @@ static int io_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run) rep = (io_info & SVM_IOIO_REP_MASK) != 0; down = (svm->vmcb->save.rflags & X86_EFLAGS_DF) != 0; + skip_emulated_instruction(&svm->vcpu); return kvm_emulate_pio(&svm->vcpu, kvm_run, in, size, port); } @@ -1912,6 +1916,11 @@ static int get_npt_level(void) #endif } +static int svm_get_mt_mask_shift(void) +{ + return 0; +} + static struct kvm_x86_ops svm_x86_ops = { .cpu_has_kvm_support = has_svm, .disabled_by_bios = is_disabled, @@ -1967,6 +1976,7 @@ static struct kvm_x86_ops svm_x86_ops = { .set_tss_addr = svm_set_tss_addr, .get_tdp_level = get_npt_level, + .get_mt_mask_shift = svm_get_mt_mask_shift, }; static int __init svm_init(void) diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index a4018b01e1f..6259d746764 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -16,7 +16,6 @@ */ #include "irq.h" -#include "vmx.h" #include "mmu.h" #include <linux/kvm_host.h> @@ -31,6 +30,8 @@ #include <asm/io.h> #include <asm/desc.h> +#include <asm/vmx.h> +#include <asm/virtext.h> #define __ex(x) __kvm_handle_fault_on_reboot(x) @@ -90,6 +91,11 @@ struct vcpu_vmx { } rmode; int vpid; bool emulation_required; + + /* Support for vnmi-less CPUs */ + int soft_vnmi_blocked; + ktime_t entry_time; + s64 vnmi_blocked_time; }; static inline struct vcpu_vmx *to_vmx(struct kvm_vcpu *vcpu) @@ -122,7 +128,7 @@ static struct vmcs_config { u32 vmentry_ctrl; } vmcs_config; -struct vmx_capability { +static struct vmx_capability { u32 ept; u32 vpid; } vmx_capability; @@ -957,6 +963,13 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 data) pr_unimpl(vcpu, "unimplemented perfctr wrmsr: 0x%x data 0x%llx\n", msr_index, data); break; + case MSR_IA32_CR_PAT: + if (vmcs_config.vmentry_ctrl & VM_ENTRY_LOAD_IA32_PAT) { + vmcs_write64(GUEST_IA32_PAT, data); + vcpu->arch.pat = data; + break; + } + /* Otherwise falls through to kvm_set_msr_common */ default: vmx_load_host_state(vmx); msr = find_msr_entry(vmx, msr_index); @@ -1032,8 +1045,7 @@ static int vmx_get_irq(struct kvm_vcpu *vcpu) static __init int cpu_has_kvm_support(void) { - unsigned long ecx = cpuid_ecx(1); - return test_bit(5, &ecx); /* CPUID.1:ECX.VMX[bit 5] -> VT */ + return cpu_has_vmx(); } static __init int vmx_disabled_by_bios(void) @@ -1079,13 +1091,22 @@ static void vmclear_local_vcpus(void) __vcpu_clear(vmx); } -static void hardware_disable(void *garbage) + +/* Just like cpu_vmxoff(), but with the __kvm_handle_fault_on_reboot() + * tricks. + */ +static void kvm_cpu_vmxoff(void) { - vmclear_local_vcpus(); asm volatile (__ex(ASM_VMX_VMXOFF) : : : "cc"); write_cr4(read_cr4() & ~X86_CR4_VMXE); } +static void hardware_disable(void *garbage) +{ + vmclear_local_vcpus(); + kvm_cpu_vmxoff(); +} + static __init int adjust_vmx_controls(u32 ctl_min, u32 ctl_opt, u32 msr, u32 *result) { @@ -1176,12 +1197,13 @@ static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf) #ifdef CONFIG_X86_64 min |= VM_EXIT_HOST_ADDR_SPACE_SIZE; #endif - opt = 0; + opt = VM_EXIT_SAVE_IA32_PAT | VM_EXIT_LOAD_IA32_PAT; if (adjust_vmx_controls(min, opt, MSR_IA32_VMX_EXIT_CTLS, &_vmexit_control) < 0) return -EIO; - min = opt = 0; + min = 0; + opt = VM_ENTRY_LOAD_IA32_PAT; if (adjust_vmx_controls(min, opt, MSR_IA32_VMX_ENTRY_CTLS, &_vmentry_control) < 0) return -EIO; @@ -2087,8 +2109,9 @@ static void vmx_disable_intercept_for_msr(struct page *msr_bitmap, u32 msr) */ static int vmx_vcpu_setup(struct vcpu_vmx *vmx) { - u32 host_sysenter_cs; + u32 host_sysenter_cs, msr_low, msr_high; u32 junk; + u64 host_pat; unsigned long a; struct descriptor_table dt; int i; @@ -2176,6 +2199,20 @@ static int vmx_vcpu_setup(struct vcpu_vmx *vmx) rdmsrl(MSR_IA32_SYSENTER_EIP, a); vmcs_writel(HOST_IA32_SYSENTER_EIP, a); /* 22.2.3 */ + if (vmcs_config.vmexit_ctrl & VM_EXIT_LOAD_IA32_PAT) { + rdmsr(MSR_IA32_CR_PAT, msr_low, msr_high); + host_pat = msr_low | ((u64) msr_high << 32); + vmcs_write64(HOST_IA32_PAT, host_pat); + } + if (vmcs_config.vmentry_ctrl & VM_ENTRY_LOAD_IA32_PAT) { + rdmsr(MSR_IA32_CR_PAT, msr_low, msr_high); + host_pat = msr_low | ((u64) msr_high << 32); + /* Write the default value follow host pat */ + vmcs_write64(GUEST_IA32_PAT, host_pat); + /* Keep arch.pat sync with GUEST_IA32_PAT */ + vmx->vcpu.arch.pat = host_pat; + } + for (i = 0; i < NR_VMX_MSR; ++i) { u32 index = vmx_msr_index[i]; u32 data_low, data_high; @@ -2230,6 +2267,8 @@ static int vmx_vcpu_reset(struct kvm_vcpu *vcpu) vmx->vcpu.arch.rmode.active = 0; + vmx->soft_vnmi_blocked = 0; + vmx->vcpu.arch.regs[VCPU_REGS_RDX] = get_rdx_init_val(); kvm_set_cr8(&vmx->vcpu, 0); msr = 0xfee00000 | MSR_IA32_APICBASE_ENABLE; @@ -2335,6 +2374,29 @@ out: return ret; } +static void enable_irq_window(struct kvm_vcpu *vcpu) +{ + u32 cpu_based_vm_exec_control; + + cpu_based_vm_exec_control = vmcs_read32(CPU_BASED_VM_EXEC_CONTROL); + cpu_based_vm_exec_control |= CPU_BASED_VIRTUAL_INTR_PENDING; + vmcs_write32(CPU_BASED_VM_EXEC_CONTROL, cpu_based_vm_exec_control); +} + +static void enable_nmi_window(struct kvm_vcpu *vcpu) +{ + u32 cpu_based_vm_exec_control; + + if (!cpu_has_virtual_nmis()) { + enable_irq_window(vcpu); + return; + } + + cpu_based_vm_exec_control = vmcs_read32(CPU_BASED_VM_EXEC_CONTROL); + cpu_based_vm_exec_control |= CPU_BASED_VIRTUAL_NMI_PENDING; + vmcs_write32(CPU_BASED_VM_EXEC_CONTROL, cpu_based_vm_exec_control); +} + static void vmx_inject_irq(struct kvm_vcpu *vcpu, int irq) { struct vcpu_vmx *vmx = to_vmx(vcpu); @@ -2358,10 +2420,54 @@ static void vmx_inject_irq(struct kvm_vcpu *vcpu, int irq) static void vmx_inject_nmi(struct kvm_vcpu *vcpu) { + struct vcpu_vmx *vmx = to_vmx(vcpu); + + if (!cpu_has_virtual_nmis()) { + /* + * Tracking the NMI-blocked state in software is built upon + * finding the next open IRQ window. This, in turn, depends on + * well-behaving guests: They have to keep IRQs disabled at + * least as long as the NMI handler runs. Otherwise we may + * cause NMI nesting, maybe breaking the guest. But as this is + * highly unlikely, we can live with the residual risk. + */ + vmx->soft_vnmi_blocked = 1; + vmx->vnmi_blocked_time = 0; + } + + ++vcpu->stat.nmi_injections; + if (vcpu->arch.rmode.active) { + vmx->rmode.irq.pending = true; + vmx->rmode.irq.vector = NMI_VECTOR; + vmx->rmode.irq.rip = kvm_rip_read(vcpu); + vmcs_write32(VM_ENTRY_INTR_INFO_FIELD, + NMI_VECTOR | INTR_TYPE_SOFT_INTR | + INTR_INFO_VALID_MASK); + vmcs_write32(VM_ENTRY_INSTRUCTION_LEN, 1); + kvm_rip_write(vcpu, vmx->rmode.irq.rip - 1); + return; + } vmcs_write32(VM_ENTRY_INTR_INFO_FIELD, INTR_TYPE_NMI_INTR | INTR_INFO_VALID_MASK | NMI_VECTOR); } +static void vmx_update_window_states(struct kvm_vcpu *vcpu) +{ + u32 guest_intr = vmcs_read32(GUEST_INTERRUPTIBILITY_INFO); + + vcpu->arch.nmi_window_open = + !(guest_intr & (GUEST_INTR_STATE_STI | + GUEST_INTR_STATE_MOV_SS | + GUEST_INTR_STATE_NMI)); + if (!cpu_has_virtual_nmis() && to_vmx(vcpu)->soft_vnmi_blocked) + vcpu->arch.nmi_window_open = 0; + + vcpu->arch.interrupt_window_open = + ((vmcs_readl(GUEST_RFLAGS) & X86_EFLAGS_IF) && + !(guest_intr & (GUEST_INTR_STATE_STI | + GUEST_INTR_STATE_MOV_SS))); +} + static void kvm_do_inject_irq(struct kvm_vcpu *vcpu) { int word_index = __ffs(vcpu->arch.irq_summary); @@ -2374,40 +2480,49 @@ static void kvm_do_inject_irq(struct kvm_vcpu *vcpu) kvm_queue_interrupt(vcpu, irq); } - static void do_interrupt_requests(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) { - u32 cpu_based_vm_exec_control; - - vcpu->arch.interrupt_window_open = - ((vmcs_readl(GUEST_RFLAGS) & X86_EFLAGS_IF) && - (vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) & 3) == 0); + vmx_update_window_states(vcpu); - if (vcpu->arch.interrupt_window_open && - vcpu->arch.irq_summary && !vcpu->arch.interrupt.pending) - kvm_do_inject_irq(vcpu); + if (vcpu->arch.nmi_pending && !vcpu->arch.nmi_injected) { + if (vcpu->arch.interrupt.pending) { + enable_nmi_window(vcpu); + } else if (vcpu->arch.nmi_window_open) { + vcpu->arch.nmi_pending = false; + vcpu->arch.nmi_injected = true; + } else { + enable_nmi_window(vcpu); + return; + } + } + if (vcpu->arch.nmi_injected) { + vmx_inject_nmi(vcpu); + if (vcpu->arch.nmi_pending) + enable_nmi_window(vcpu); + else if (vcpu->arch.irq_summary + || kvm_run->request_interrupt_window) + enable_irq_window(vcpu); + return; + } - if (vcpu->arch.interrupt_window_open && vcpu->arch.interrupt.pending) - vmx_inject_irq(vcpu, vcpu->arch.interrupt.nr); + if (vcpu->arch.interrupt_window_open) { + if (vcpu->arch.irq_summary && !vcpu->arch.interrupt.pending) + kvm_do_inject_irq(vcpu); - cpu_based_vm_exec_control = vmcs_read32(CPU_BASED_VM_EXEC_CONTROL); + if (vcpu->arch.interrupt.pending) + vmx_inject_irq(vcpu, vcpu->arch.interrupt.nr); + } if (!vcpu->arch.interrupt_window_open && (vcpu->arch.irq_summary || kvm_run->request_interrupt_window)) - /* - * Interrupts blocked. Wait for unblock. - */ - cpu_based_vm_exec_control |= CPU_BASED_VIRTUAL_INTR_PENDING; - else - cpu_based_vm_exec_control &= ~CPU_BASED_VIRTUAL_INTR_PENDING; - vmcs_write32(CPU_BASED_VM_EXEC_CONTROL, cpu_based_vm_exec_control); + enable_irq_window(vcpu); } static int vmx_set_tss_addr(struct kvm *kvm, unsigned int addr) { int ret; struct kvm_userspace_memory_region tss_mem = { - .slot = 8, + .slot = TSS_PRIVATE_MEMSLOT, .guest_phys_addr = addr, .memory_size = PAGE_SIZE * 3, .flags = 0, @@ -2492,7 +2607,7 @@ static int handle_exception(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) set_bit(irq / BITS_PER_LONG, &vcpu->arch.irq_summary); } - if ((intr_info & INTR_INFO_INTR_TYPE_MASK) == 0x200) /* nmi */ + if ((intr_info & INTR_INFO_INTR_TYPE_MASK) == INTR_TYPE_NMI_INTR) return 1; /* already handled by vmx_vcpu_run() */ if (is_no_device(intr_info)) { @@ -2581,6 +2696,7 @@ static int handle_io(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) rep = (exit_qualification & 32) != 0; port = exit_qualification >> 16; + skip_emulated_instruction(vcpu); return kvm_emulate_pio(vcpu, kvm_run, in, size, port); } @@ -2767,6 +2883,7 @@ static int handle_interrupt_window(struct kvm_vcpu *vcpu, vmcs_write32(CPU_BASED_VM_EXEC_CONTROL, cpu_based_vm_exec_control); KVMTRACE_0D(PEND_INTR, vcpu, handler); + ++vcpu->stat.irq_window_exits; /* * If the user space waits to inject interrupts, exit as soon as @@ -2775,7 +2892,6 @@ static int handle_interrupt_window(struct kvm_vcpu *vcpu, if (kvm_run->request_interrupt_window && !vcpu->arch.irq_summary) { kvm_run->exit_reason = KVM_EXIT_IRQ_WINDOW_OPEN; - ++vcpu->stat.irq_window_exits; return 0; } return 1; @@ -2832,6 +2948,7 @@ static int handle_apic_access(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) static int handle_task_switch(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) { + struct vcpu_vmx *vmx = to_vmx(vcpu); unsigned long exit_qualification; u16 tss_selector; int reason; @@ -2839,6 +2956,15 @@ static int handle_task_switch(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) exit_qualification = vmcs_readl(EXIT_QUALIFICATION); reason = (u32)exit_qualification >> 30; + if (reason == TASK_SWITCH_GATE && vmx->vcpu.arch.nmi_injected && + (vmx->idt_vectoring_info & VECTORING_INFO_VALID_MASK) && + (vmx->idt_vectoring_info & VECTORING_INFO_TYPE_MASK) + == INTR_TYPE_NMI_INTR) { + vcpu->arch.nmi_injected = false; + if (cpu_has_virtual_nmis()) + vmcs_set_bits(GUEST_INTERRUPTIBILITY_INFO, + GUEST_INTR_STATE_NMI); + } tss_selector = exit_qualification; return kvm_task_switch(vcpu, tss_selector, reason); @@ -2927,16 +3053,12 @@ static void handle_invalid_guest_state(struct kvm_vcpu *vcpu, while (!guest_state_valid(vcpu)) { err = emulate_instruction(vcpu, kvm_run, 0, 0, 0); - switch (err) { - case EMULATE_DONE: - break; - case EMULATE_DO_MMIO: - kvm_report_emulation_failure(vcpu, "mmio"); - /* TODO: Handle MMIO */ - return; - default: - kvm_report_emulation_failure(vcpu, "emulation failure"); - return; + if (err == EMULATE_DO_MMIO) + break; + + if (err != EMULATE_DONE) { + kvm_report_emulation_failure(vcpu, "emulation failure"); + return; } if (signal_pending(current)) @@ -2948,8 +3070,10 @@ static void handle_invalid_guest_state(struct kvm_vcpu *vcpu, local_irq_disable(); preempt_disable(); - /* Guest state should be valid now, no more emulation should be needed */ - vmx->emulation_required = 0; + /* Guest state should be valid now except if we need to + * emulate an MMIO */ + if (guest_state_valid(vcpu)) + vmx->emulation_required = 0; } /* @@ -2996,6 +3120,11 @@ static int kvm_handle_exit(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu) KVMTRACE_3D(VMEXIT, vcpu, exit_reason, (u32)kvm_rip_read(vcpu), (u32)((u64)kvm_rip_read(vcpu) >> 32), entryexit); + /* If we need to emulate an MMIO from handle_invalid_guest_state + * we just return 0 */ + if (vmx->emulation_required && emulate_invalid_guest_state) + return 0; + /* Access CR3 don't cause VMExit in paging mode, so we need * to sync with guest real CR3. */ if (vm_need_ept() && is_paging(vcpu)) { @@ -3012,9 +3141,32 @@ static int kvm_handle_exit(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu) if ((vectoring_info & VECTORING_INFO_VALID_MASK) && (exit_reason != EXIT_REASON_EXCEPTION_NMI && - exit_reason != EXIT_REASON_EPT_VIOLATION)) - printk(KERN_WARNING "%s: unexpected, valid vectoring info and " - "exit reason is 0x%x\n", __func__, exit_reason); + exit_reason != EXIT_REASON_EPT_VIOLATION && + exit_reason != EXIT_REASON_TASK_SWITCH)) + printk(KERN_WARNING "%s: unexpected, valid vectoring info " + "(0x%x) and exit reason is 0x%x\n", + __func__, vectoring_info, exit_reason); + + if (unlikely(!cpu_has_virtual_nmis() && vmx->soft_vnmi_blocked)) { + if (vcpu->arch.interrupt_window_open) { + vmx->soft_vnmi_blocked = 0; + vcpu->arch.nmi_window_open = 1; + } else if (vmx->vnmi_blocked_time > 1000000000LL && + vcpu->arch.nmi_pending) { + /* + * This CPU don't support us in finding the end of an + * NMI-blocked window if the guest runs with IRQs + * disabled. So we pull the trigger after 1 s of + * futile waiting, but inform the user about this. + */ + printk(KERN_WARNING "%s: Breaking out of NMI-blocked " + "state on VCPU %d after 1 s timeout\n", + __func__, vcpu->vcpu_id); + vmx->soft_vnmi_blocked = 0; + vmx->vcpu.arch.nmi_window_open = 1; + } + } + if (exit_reason < kvm_vmx_max_exit_handlers && kvm_vmx_exit_handlers[exit_reason]) return kvm_vmx_exit_handlers[exit_reason](vcpu, kvm_run); @@ -3042,51 +3194,6 @@ static void update_tpr_threshold(struct kvm_vcpu *vcpu) vmcs_write32(TPR_THRESHOLD, (max_irr > tpr) ? tpr >> 4 : max_irr >> 4); } -static void enable_irq_window(struct kvm_vcpu *vcpu) -{ - u32 cpu_based_vm_exec_control; - - cpu_based_vm_exec_control = vmcs_read32(CPU_BASED_VM_EXEC_CONTROL); - cpu_based_vm_exec_control |= CPU_BASED_VIRTUAL_INTR_PENDING; - vmcs_write32(CPU_BASED_VM_EXEC_CONTROL, cpu_based_vm_exec_control); -} - -static void enable_nmi_window(struct kvm_vcpu *vcpu) -{ - u32 cpu_based_vm_exec_control; - - if (!cpu_has_virtual_nmis()) - return; - - cpu_based_vm_exec_control = vmcs_read32(CPU_BASED_VM_EXEC_CONTROL); - cpu_based_vm_exec_control |= CPU_BASED_VIRTUAL_NMI_PENDING; - vmcs_write32(CPU_BASED_VM_EXEC_CONTROL, cpu_based_vm_exec_control); -} - -static int vmx_nmi_enabled(struct kvm_vcpu *vcpu) -{ - u32 guest_intr = vmcs_read32(GUEST_INTERRUPTIBILITY_INFO); - return !(guest_intr & (GUEST_INTR_STATE_NMI | - GUEST_INTR_STATE_MOV_SS | - GUEST_INTR_STATE_STI)); -} - -static int vmx_irq_enabled(struct kvm_vcpu *vcpu) -{ - u32 guest_intr = vmcs_read32(GUEST_INTERRUPTIBILITY_INFO); - return (!(guest_intr & (GUEST_INTR_STATE_MOV_SS | - GUEST_INTR_STATE_STI)) && - (vmcs_readl(GUEST_RFLAGS) & X86_EFLAGS_IF)); -} - -static void enable_intr_window(struct kvm_vcpu *vcpu) -{ - if (vcpu->arch.nmi_pending) - enable_nmi_window(vcpu); - else if (kvm_cpu_has_interrupt(vcpu)) - enable_irq_window(vcpu); -} - static void vmx_complete_interrupts(struct vcpu_vmx *vmx) { u32 exit_intr_info; @@ -3109,7 +3216,9 @@ static void vmx_complete_interrupts(struct vcpu_vmx *vmx) if (unblock_nmi && vector != DF_VECTOR) vmcs_set_bits(GUEST_INTERRUPTIBILITY_INFO, GUEST_INTR_STATE_NMI); - } + } else if (unlikely(vmx->soft_vnmi_blocked)) + vmx->vnmi_blocked_time += + ktime_to_ns(ktime_sub(ktime_get(), vmx->entry_time)); idt_vectoring_info = vmx->idt_vectoring_info; idtv_info_valid = idt_vectoring_info & VECTORING_INFO_VALID_MASK; @@ -3147,26 +3256,29 @@ static void vmx_intr_assist(struct kvm_vcpu *vcpu) { update_tpr_threshold(vcpu); - if (cpu_has_virtual_nmis()) { - if (vcpu->arch.nmi_pending && !vcpu->arch.nmi_injected) { - if (vcpu->arch.interrupt.pending) { - enable_nmi_window(vcpu); - } else if (vmx_nmi_enabled(vcpu)) { - vcpu->arch.nmi_pending = false; - vcpu->arch.nmi_injected = true; - } else { - enable_intr_window(vcpu); - return; - } - } - if (vcpu->arch.nmi_injected) { - vmx_inject_nmi(vcpu); - enable_intr_window(vcpu); + vmx_update_window_states(vcpu); + + if (vcpu->arch.nmi_pending && !vcpu->arch.nmi_injected) { + if (vcpu->arch.interrupt.pending) { + enable_nmi_window(vcpu); + } else if (vcpu->arch.nmi_window_open) { + vcpu->arch.nmi_pending = false; + vcpu->arch.nmi_injected = true; + } else { + enable_nmi_window(vcpu); return; } } + if (vcpu->arch.nmi_injected) { + vmx_inject_nmi(vcpu); + if (vcpu->arch.nmi_pending) + enable_nmi_window(vcpu); + else if (kvm_cpu_has_interrupt(vcpu)) + enable_irq_window(vcpu); + return; + } if (!vcpu->arch.interrupt.pending && kvm_cpu_has_interrupt(vcpu)) { - if (vmx_irq_enabled(vcpu)) + if (vcpu->arch.interrupt_window_open) kvm_queue_interrupt(vcpu, kvm_cpu_get_interrupt(vcpu)); else enable_irq_window(vcpu); @@ -3174,6 +3286,8 @@ static void vmx_intr_assist(struct kvm_vcpu *vcpu) if (vcpu->arch.interrupt.pending) { vmx_inject_irq(vcpu, vcpu->arch.interrupt.nr); kvm_timer_intr_post(vcpu, vcpu->arch.interrupt.nr); + if (kvm_cpu_has_interrupt(vcpu)) + enable_irq_window(vcpu); } } @@ -3213,6 +3327,10 @@ static void vmx_vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) struct vcpu_vmx *vmx = to_vmx(vcpu); u32 intr_info; + /* Record the guest's net vcpu time for enforced NMI injections. */ + if (unlikely(!cpu_has_virtual_nmis() && vmx->soft_vnmi_blocked)) + vmx->entry_time = ktime_get(); + /* Handle invalid guest state instead of entering VMX */ if (vmx->emulation_required && emulate_invalid_guest_state) { handle_invalid_guest_state(vcpu, kvm_run); @@ -3327,9 +3445,7 @@ static void vmx_vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) if (vmx->rmode.irq.pending) fixup_rmode_irq(vmx); - vcpu->arch.interrupt_window_open = - (vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) & - (GUEST_INTR_STATE_STI | GUEST_INTR_STATE_MOV_SS)) == 0; + vmx_update_window_states(vcpu); asm("mov %0, %%ds; mov %0, %%es" : : "r"(__USER_DS)); vmx->launched = 1; @@ -3337,7 +3453,7 @@ static void vmx_vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) intr_info = vmcs_read32(VM_EXIT_INTR_INFO); /* We need to handle NMIs before interrupts are enabled */ - if ((intr_info & INTR_INFO_INTR_TYPE_MASK) == 0x200 && + if ((intr_info & INTR_INFO_INTR_TYPE_MASK) == INTR_TYPE_NMI_INTR && (intr_info & INTR_INFO_VALID_MASK)) { KVMTRACE_0D(NMI, vcpu, handler); asm("int $2"); @@ -3455,6 +3571,11 @@ static int get_ept_level(void) return VMX_EPT_DEFAULT_GAW + 1; } +static int vmx_get_mt_mask_shift(void) +{ + return VMX_EPT_MT_EPTE_SHIFT; +} + static struct kvm_x86_ops vmx_x86_ops = { .cpu_has_kvm_support = cpu_has_kvm_support, .disabled_by_bios = vmx_disabled_by_bios, @@ -3510,6 +3631,7 @@ static struct kvm_x86_ops vmx_x86_ops = { .set_tss_addr = vmx_set_tss_addr, .get_tdp_level = get_ept_level, + .get_mt_mask_shift = vmx_get_mt_mask_shift, }; static int __init vmx_init(void) @@ -3566,10 +3688,10 @@ static int __init vmx_init(void) bypass_guest_pf = 0; kvm_mmu_set_base_ptes(VMX_EPT_READABLE_MASK | VMX_EPT_WRITABLE_MASK | - VMX_EPT_DEFAULT_MT << VMX_EPT_MT_EPTE_SHIFT | VMX_EPT_IGMT_BIT); kvm_mmu_set_mask_ptes(0ull, 0ull, 0ull, 0ull, - VMX_EPT_EXECUTABLE_MASK); + VMX_EPT_EXECUTABLE_MASK, + VMX_EPT_DEFAULT_MT << VMX_EPT_MT_EPTE_SHIFT); kvm_enable_tdp(); } else kvm_disable_tdp(); diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index f1f8ff2f1fa..cc17546a240 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -34,11 +34,13 @@ #include <linux/module.h> #include <linux/mman.h> #include <linux/highmem.h> +#include <linux/iommu.h> #include <linux/intel-iommu.h> #include <asm/uaccess.h> #include <asm/msr.h> #include <asm/desc.h> +#include <asm/mtrr.h> #define MAX_IO_MSRS 256 #define CR0_RESERVED_BITS \ @@ -86,6 +88,7 @@ struct kvm_stats_debugfs_item debugfs_entries[] = { { "halt_wakeup", VCPU_STAT(halt_wakeup) }, { "hypercalls", VCPU_STAT(hypercalls) }, { "request_irq", VCPU_STAT(request_irq_exits) }, + { "request_nmi", VCPU_STAT(request_nmi_exits) }, { "irq_exits", VCPU_STAT(irq_exits) }, { "host_state_reload", VCPU_STAT(host_state_reload) }, { "efer_reload", VCPU_STAT(efer_reload) }, @@ -93,6 +96,7 @@ struct kvm_stats_debugfs_item debugfs_entries[] = { { "insn_emulation", VCPU_STAT(insn_emulation) }, { "insn_emulation_fail", VCPU_STAT(insn_emulation_fail) }, { "irq_injections", VCPU_STAT(irq_injections) }, + { "nmi_injections", VCPU_STAT(nmi_injections) }, { "mmu_shadow_zapped", VM_STAT(mmu_shadow_zapped) }, { "mmu_pte_write", VM_STAT(mmu_pte_write) }, { "mmu_pte_updated", VM_STAT(mmu_pte_updated) }, @@ -101,6 +105,7 @@ struct kvm_stats_debugfs_item debugfs_entries[] = { { "mmu_recycled", VM_STAT(mmu_recycled) }, { "mmu_cache_miss", VM_STAT(mmu_cache_miss) }, { "mmu_unsync", VM_STAT(mmu_unsync) }, + { "mmu_unsync_global", VM_STAT(mmu_unsync_global) }, { "remote_tlb_flush", VM_STAT(remote_tlb_flush) }, { "largepages", VM_STAT(lpages) }, { NULL } @@ -312,6 +317,7 @@ void kvm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0) kvm_x86_ops->set_cr0(vcpu, cr0); vcpu->arch.cr0 = cr0; + kvm_mmu_sync_global(vcpu); kvm_mmu_reset_context(vcpu); return; } @@ -355,6 +361,7 @@ void kvm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4) } kvm_x86_ops->set_cr4(vcpu, cr4); vcpu->arch.cr4 = cr4; + kvm_mmu_sync_global(vcpu); kvm_mmu_reset_context(vcpu); } EXPORT_SYMBOL_GPL(kvm_set_cr4); @@ -449,7 +456,7 @@ static u32 msrs_to_save[] = { MSR_CSTAR, MSR_KERNEL_GS_BASE, MSR_SYSCALL_MASK, MSR_LSTAR, #endif MSR_IA32_TIME_STAMP_COUNTER, MSR_KVM_SYSTEM_TIME, MSR_KVM_WALL_CLOCK, - MSR_IA32_PERF_STATUS, + MSR_IA32_PERF_STATUS, MSR_IA32_CR_PAT }; static unsigned num_msrs_to_save; @@ -648,10 +655,38 @@ static bool msr_mtrr_valid(unsigned msr) static int set_msr_mtrr(struct kvm_vcpu *vcpu, u32 msr, u64 data) { + u64 *p = (u64 *)&vcpu->arch.mtrr_state.fixed_ranges; + if (!msr_mtrr_valid(msr)) return 1; - vcpu->arch.mtrr[msr - 0x200] = data; + if (msr == MSR_MTRRdefType) { + vcpu->arch.mtrr_state.def_type = data; + vcpu->arch.mtrr_state.enabled = (data & 0xc00) >> 10; + } else if (msr == MSR_MTRRfix64K_00000) + p[0] = data; + else if (msr == MSR_MTRRfix16K_80000 || msr == MSR_MTRRfix16K_A0000) + p[1 + msr - MSR_MTRRfix16K_80000] = data; + else if (msr >= MSR_MTRRfix4K_C0000 && msr <= MSR_MTRRfix4K_F8000) + p[3 + msr - MSR_MTRRfix4K_C0000] = data; + else if (msr == MSR_IA32_CR_PAT) + vcpu->arch.pat = data; + else { /* Variable MTRRs */ + int idx, is_mtrr_mask; + u64 *pt; + + idx = (msr - 0x200) / 2; + is_mtrr_mask = msr - 0x200 - 2 * idx; + if (!is_mtrr_mask) + pt = + (u64 *)&vcpu->arch.mtrr_state.var_ranges[idx].base_lo; + else + pt = + (u64 *)&vcpu->arch.mtrr_state.var_ranges[idx].mask_lo; + *pt = data; + } + + kvm_mmu_reset_context(vcpu); return 0; } @@ -747,10 +782,37 @@ int kvm_get_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 *pdata) static int get_msr_mtrr(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata) { + u64 *p = (u64 *)&vcpu->arch.mtrr_state.fixed_ranges; + if (!msr_mtrr_valid(msr)) return 1; - *pdata = vcpu->arch.mtrr[msr - 0x200]; + if (msr == MSR_MTRRdefType) + *pdata = vcpu->arch.mtrr_state.def_type + + (vcpu->arch.mtrr_state.enabled << 10); + else if (msr == MSR_MTRRfix64K_00000) + *pdata = p[0]; + else if (msr == MSR_MTRRfix16K_80000 || msr == MSR_MTRRfix16K_A0000) + *pdata = p[1 + msr - MSR_MTRRfix16K_80000]; + else if (msr >= MSR_MTRRfix4K_C0000 && msr <= MSR_MTRRfix4K_F8000) + *pdata = p[3 + msr - MSR_MTRRfix4K_C0000]; + else if (msr == MSR_IA32_CR_PAT) + *pdata = vcpu->arch.pat; + else { /* Variable MTRRs */ + int idx, is_mtrr_mask; + u64 *pt; + + idx = (msr - 0x200) / 2; + is_mtrr_mask = msr - 0x200 - 2 * idx; + if (!is_mtrr_mask) + pt = + (u64 *)&vcpu->arch.mtrr_state.var_ranges[idx].base_lo; + else + pt = + (u64 *)&vcpu->arch.mtrr_state.var_ranges[idx].mask_lo; + *pdata = *pt; + } + return 0; } @@ -903,7 +965,6 @@ int kvm_dev_ioctl_check_extension(long ext) case KVM_CAP_IRQCHIP: case KVM_CAP_HLT: case KVM_CAP_MMU_SHADOW_CACHE_CONTROL: - case KVM_CAP_USER_MEMORY: case KVM_CAP_SET_TSS_ADDR: case KVM_CAP_EXT_CPUID: case KVM_CAP_CLOCKSOURCE: @@ -929,7 +990,7 @@ int kvm_dev_ioctl_check_extension(long ext) r = !tdp_enabled; break; case KVM_CAP_IOMMU: - r = intel_iommu_found(); + r = iommu_found(); break; default: r = 0; @@ -1188,6 +1249,7 @@ static void do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function, int t, times = entry->eax & 0xff; entry->flags |= KVM_CPUID_FLAG_STATEFUL_FUNC; + entry->flags |= KVM_CPUID_FLAG_STATE_READ_NEXT; for (t = 1; t < times && *nent < maxnent; ++t) { do_cpuid_1_ent(&entry[t], function, 0); entry[t].flags |= KVM_CPUID_FLAG_STATEFUL_FUNC; @@ -1218,7 +1280,7 @@ static void do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function, entry->flags |= KVM_CPUID_FLAG_SIGNIFCANT_INDEX; /* read more entries until level_type is zero */ for (i = 1; *nent < maxnent; ++i) { - level_type = entry[i - 1].ecx & 0xff; + level_type = entry[i - 1].ecx & 0xff00; if (!level_type) break; do_cpuid_1_ent(&entry[i], function, i); @@ -1318,6 +1380,15 @@ static int kvm_vcpu_ioctl_interrupt(struct kvm_vcpu *vcpu, return 0; } +static int kvm_vcpu_ioctl_nmi(struct kvm_vcpu *vcpu) +{ + vcpu_load(vcpu); + kvm_inject_nmi(vcpu); + vcpu_put(vcpu); + + return 0; +} + static int vcpu_ioctl_tpr_access_reporting(struct kvm_vcpu *vcpu, struct kvm_tpr_access_ctl *tac) { @@ -1377,6 +1448,13 @@ long kvm_arch_vcpu_ioctl(struct file *filp, r = 0; break; } + case KVM_NMI: { + r = kvm_vcpu_ioctl_nmi(vcpu); + if (r) + goto out; + r = 0; + break; + } case KVM_SET_CPUID: { struct kvm_cpuid __user *cpuid_arg = argp; struct kvm_cpuid cpuid; @@ -1968,7 +2046,7 @@ int emulator_write_phys(struct kvm_vcpu *vcpu, gpa_t gpa, ret = kvm_write_guest(vcpu->kvm, gpa, val, bytes); if (ret < 0) return 0; - kvm_mmu_pte_write(vcpu, gpa, val, bytes); + kvm_mmu_pte_write(vcpu, gpa, val, bytes, 1); return 1; } @@ -2404,8 +2482,6 @@ int kvm_emulate_pio(struct kvm_vcpu *vcpu, struct kvm_run *run, int in, val = kvm_register_read(vcpu, VCPU_REGS_RAX); memcpy(vcpu->arch.pio_data, &val, 4); - kvm_x86_ops->skip_emulated_instruction(vcpu); - pio_dev = vcpu_find_pio_dev(vcpu, port, size, !in); if (pio_dev) { kernel_pio(pio_dev, vcpu, vcpu->arch.pio_data); @@ -2541,7 +2617,7 @@ int kvm_arch_init(void *opaque) kvm_mmu_set_nonpresent_ptes(0ull, 0ull); kvm_mmu_set_base_ptes(PT_PRESENT_MASK); kvm_mmu_set_mask_ptes(PT_USER_MASK, PT_ACCESSED_MASK, - PT_DIRTY_MASK, PT64_NX_MASK, 0); + PT_DIRTY_MASK, PT64_NX_MASK, 0, 0); return 0; out: @@ -2729,7 +2805,7 @@ static int move_to_next_stateful_cpuid_entry(struct kvm_vcpu *vcpu, int i) e->flags &= ~KVM_CPUID_FLAG_STATE_READ_NEXT; /* when no next entry is found, the current entry[i] is reselected */ - for (j = i + 1; j == i; j = (j + 1) % nent) { + for (j = i + 1; ; j = (j + 1) % nent) { struct kvm_cpuid_entry2 *ej = &vcpu->arch.cpuid_entries[j]; if (ej->function == e->function) { ej->flags |= KVM_CPUID_FLAG_STATE_READ_NEXT; @@ -2973,7 +3049,7 @@ static int __vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) pr_debug("vcpu %d received sipi with vector # %x\n", vcpu->vcpu_id, vcpu->arch.sipi_vector); kvm_lapic_reset(vcpu); - r = kvm_x86_ops->vcpu_reset(vcpu); + r = kvm_arch_vcpu_reset(vcpu); if (r) return r; vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE; @@ -3275,9 +3351,9 @@ static void seg_desct_to_kvm_desct(struct desc_struct *seg_desc, u16 selector, kvm_desct->padding = 0; } -static void get_segment_descritptor_dtable(struct kvm_vcpu *vcpu, - u16 selector, - struct descriptor_table *dtable) +static void get_segment_descriptor_dtable(struct kvm_vcpu *vcpu, + u16 selector, + struct descriptor_table *dtable) { if (selector & 1 << 2) { struct kvm_segment kvm_seg; @@ -3302,7 +3378,7 @@ static int load_guest_segment_descriptor(struct kvm_vcpu *vcpu, u16 selector, struct descriptor_table dtable; u16 index = selector >> 3; - get_segment_descritptor_dtable(vcpu, selector, &dtable); + get_segment_descriptor_dtable(vcpu, selector, &dtable); if (dtable.limit < index * 8 + 7) { kvm_queue_exception_e(vcpu, GP_VECTOR, selector & 0xfffc); @@ -3321,7 +3397,7 @@ static int save_guest_segment_descriptor(struct kvm_vcpu *vcpu, u16 selector, struct descriptor_table dtable; u16 index = selector >> 3; - get_segment_descritptor_dtable(vcpu, selector, &dtable); + get_segment_descriptor_dtable(vcpu, selector, &dtable); if (dtable.limit < index * 8 + 7) return 1; @@ -3900,6 +3976,7 @@ int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu) /* We do fxsave: this must be aligned. */ BUG_ON((unsigned long)&vcpu->arch.host_fx_image & 0xF); + vcpu->arch.mtrr_state.have_fixed = 1; vcpu_load(vcpu); r = kvm_arch_vcpu_reset(vcpu); if (r == 0) @@ -3925,6 +4002,9 @@ void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu) int kvm_arch_vcpu_reset(struct kvm_vcpu *vcpu) { + vcpu->arch.nmi_pending = false; + vcpu->arch.nmi_injected = false; + return kvm_x86_ops->vcpu_reset(vcpu); } @@ -4012,6 +4092,7 @@ struct kvm *kvm_arch_create_vm(void) return ERR_PTR(-ENOMEM); INIT_LIST_HEAD(&kvm->arch.active_mmu_pages); + INIT_LIST_HEAD(&kvm->arch.oos_global_pages); INIT_LIST_HEAD(&kvm->arch.assigned_dev_head); /* Reserve bit 0 of irq_sources_bitmap for userspace irq source */ @@ -4048,8 +4129,8 @@ static void kvm_free_vcpus(struct kvm *kvm) void kvm_arch_destroy_vm(struct kvm *kvm) { - kvm_iommu_unmap_guest(kvm); kvm_free_all_assigned_devices(kvm); + kvm_iommu_unmap_guest(kvm); kvm_free_pit(kvm); kfree(kvm->arch.vpic); kfree(kvm->arch.vioapic); @@ -4127,7 +4208,8 @@ void kvm_arch_flush_shadow(struct kvm *kvm) int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu) { return vcpu->arch.mp_state == KVM_MP_STATE_RUNNABLE - || vcpu->arch.mp_state == KVM_MP_STATE_SIPI_RECEIVED; + || vcpu->arch.mp_state == KVM_MP_STATE_SIPI_RECEIVED + || vcpu->arch.nmi_pending; } static void vcpu_kick_intr(void *info) diff --git a/arch/x86/kvm/x86_emulate.c b/arch/x86/kvm/x86_emulate.c index ea051173b0d..d174db7a337 100644 --- a/arch/x86/kvm/x86_emulate.c +++ b/arch/x86/kvm/x86_emulate.c @@ -58,6 +58,7 @@ #define SrcMem32 (4<<4) /* Memory operand (32-bit). */ #define SrcImm (5<<4) /* Immediate operand. */ #define SrcImmByte (6<<4) /* 8-bit sign-extended immediate operand. */ +#define SrcOne (7<<4) /* Implied '1' */ #define SrcMask (7<<4) /* Generic ModRM decode. */ #define ModRM (1<<7) @@ -70,17 +71,23 @@ #define Group (1<<14) /* Bits 3:5 of modrm byte extend opcode */ #define GroupDual (1<<15) /* Alternate decoding of mod == 3 */ #define GroupMask 0xff /* Group number stored in bits 0:7 */ +/* Source 2 operand type */ +#define Src2None (0<<29) +#define Src2CL (1<<29) +#define Src2ImmByte (2<<29) +#define Src2One (3<<29) +#define Src2Mask (7<<29) enum { Group1_80, Group1_81, Group1_82, Group1_83, Group1A, Group3_Byte, Group3, Group4, Group5, Group7, }; -static u16 opcode_table[256] = { +static u32 opcode_table[256] = { /* 0x00 - 0x07 */ ByteOp | DstMem | SrcReg | ModRM, DstMem | SrcReg | ModRM, ByteOp | DstReg | SrcMem | ModRM, DstReg | SrcMem | ModRM, - 0, 0, 0, 0, + ByteOp | DstAcc | SrcImm, DstAcc | SrcImm, 0, 0, /* 0x08 - 0x0F */ ByteOp | DstMem | SrcReg | ModRM, DstMem | SrcReg | ModRM, ByteOp | DstReg | SrcMem | ModRM, DstReg | SrcMem | ModRM, @@ -195,7 +202,7 @@ static u16 opcode_table[256] = { ImplicitOps, ImplicitOps, Group | Group4, Group | Group5, }; -static u16 twobyte_table[256] = { +static u32 twobyte_table[256] = { /* 0x00 - 0x0F */ 0, Group | GroupDual | Group7, 0, 0, 0, 0, ImplicitOps, 0, ImplicitOps, ImplicitOps, 0, 0, 0, ImplicitOps | ModRM, 0, 0, @@ -230,9 +237,14 @@ static u16 twobyte_table[256] = { /* 0x90 - 0x9F */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0xA0 - 0xA7 */ - 0, 0, 0, DstMem | SrcReg | ModRM | BitOp, 0, 0, 0, 0, + 0, 0, 0, DstMem | SrcReg | ModRM | BitOp, + DstMem | SrcReg | Src2ImmByte | ModRM, + DstMem | SrcReg | Src2CL | ModRM, 0, 0, /* 0xA8 - 0xAF */ - 0, 0, 0, DstMem | SrcReg | ModRM | BitOp, 0, 0, ModRM, 0, + 0, 0, 0, DstMem | SrcReg | ModRM | BitOp, + DstMem | SrcReg | Src2ImmByte | ModRM, + DstMem | SrcReg | Src2CL | ModRM, + ModRM, 0, /* 0xB0 - 0xB7 */ ByteOp | DstMem | SrcReg | ModRM, DstMem | SrcReg | ModRM, 0, DstMem | SrcReg | ModRM | BitOp, @@ -253,7 +265,7 @@ static u16 twobyte_table[256] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }; -static u16 group_table[] = { +static u32 group_table[] = { [Group1_80*8] = ByteOp | DstMem | SrcImm | ModRM, ByteOp | DstMem | SrcImm | ModRM, ByteOp | DstMem | SrcImm | ModRM, ByteOp | DstMem | SrcImm | ModRM, @@ -297,9 +309,9 @@ static u16 group_table[] = { SrcMem16 | ModRM | Mov, SrcMem | ModRM | ByteOp, }; -static u16 group2_table[] = { +static u32 group2_table[] = { [Group7*8] = - SrcNone | ModRM, 0, 0, 0, + SrcNone | ModRM, 0, 0, SrcNone | ModRM, SrcNone | ModRM | DstMem | Mov, 0, SrcMem16 | ModRM | Mov, 0, }; @@ -359,49 +371,48 @@ static u16 group2_table[] = { "andl %"_msk",%"_LO32 _tmp"; " \ "orl %"_LO32 _tmp",%"_sav"; " +#ifdef CONFIG_X86_64 +#define ON64(x) x +#else +#define ON64(x) +#endif + +#define ____emulate_2op(_op, _src, _dst, _eflags, _x, _y, _suffix) \ + do { \ + __asm__ __volatile__ ( \ + _PRE_EFLAGS("0", "4", "2") \ + _op _suffix " %"_x"3,%1; " \ + _POST_EFLAGS("0", "4", "2") \ + : "=m" (_eflags), "=m" ((_dst).val), \ + "=&r" (_tmp) \ + : _y ((_src).val), "i" (EFLAGS_MASK)); \ + } while (0) + + /* Raw emulation: instruction has two explicit operands. */ #define __emulate_2op_nobyte(_op,_src,_dst,_eflags,_wx,_wy,_lx,_ly,_qx,_qy) \ - do { \ - unsigned long _tmp; \ - \ - switch ((_dst).bytes) { \ - case 2: \ - __asm__ __volatile__ ( \ - _PRE_EFLAGS("0", "4", "2") \ - _op"w %"_wx"3,%1; " \ - _POST_EFLAGS("0", "4", "2") \ - : "=m" (_eflags), "=m" ((_dst).val), \ - "=&r" (_tmp) \ - : _wy ((_src).val), "i" (EFLAGS_MASK)); \ - break; \ - case 4: \ - __asm__ __volatile__ ( \ - _PRE_EFLAGS("0", "4", "2") \ - _op"l %"_lx"3,%1; " \ - _POST_EFLAGS("0", "4", "2") \ - : "=m" (_eflags), "=m" ((_dst).val), \ - "=&r" (_tmp) \ - : _ly ((_src).val), "i" (EFLAGS_MASK)); \ - break; \ - case 8: \ - __emulate_2op_8byte(_op, _src, _dst, \ - _eflags, _qx, _qy); \ - break; \ - } \ + do { \ + unsigned long _tmp; \ + \ + switch ((_dst).bytes) { \ + case 2: \ + ____emulate_2op(_op,_src,_dst,_eflags,_wx,_wy,"w"); \ + break; \ + case 4: \ + ____emulate_2op(_op,_src,_dst,_eflags,_lx,_ly,"l"); \ + break; \ + case 8: \ + ON64(____emulate_2op(_op,_src,_dst,_eflags,_qx,_qy,"q")); \ + break; \ + } \ } while (0) #define __emulate_2op(_op,_src,_dst,_eflags,_bx,_by,_wx,_wy,_lx,_ly,_qx,_qy) \ do { \ - unsigned long __tmp; \ + unsigned long _tmp; \ switch ((_dst).bytes) { \ case 1: \ - __asm__ __volatile__ ( \ - _PRE_EFLAGS("0", "4", "2") \ - _op"b %"_bx"3,%1; " \ - _POST_EFLAGS("0", "4", "2") \ - : "=m" (_eflags), "=m" ((_dst).val), \ - "=&r" (__tmp) \ - : _by ((_src).val), "i" (EFLAGS_MASK)); \ + ____emulate_2op(_op,_src,_dst,_eflags,_bx,_by,"b"); \ break; \ default: \ __emulate_2op_nobyte(_op, _src, _dst, _eflags, \ @@ -425,71 +436,68 @@ static u16 group2_table[] = { __emulate_2op_nobyte(_op, _src, _dst, _eflags, \ "w", "r", _LO32, "r", "", "r") -/* Instruction has only one explicit operand (no source operand). */ -#define emulate_1op(_op, _dst, _eflags) \ - do { \ - unsigned long _tmp; \ - \ - switch ((_dst).bytes) { \ - case 1: \ - __asm__ __volatile__ ( \ - _PRE_EFLAGS("0", "3", "2") \ - _op"b %1; " \ - _POST_EFLAGS("0", "3", "2") \ - : "=m" (_eflags), "=m" ((_dst).val), \ - "=&r" (_tmp) \ - : "i" (EFLAGS_MASK)); \ - break; \ - case 2: \ - __asm__ __volatile__ ( \ - _PRE_EFLAGS("0", "3", "2") \ - _op"w %1; " \ - _POST_EFLAGS("0", "3", "2") \ - : "=m" (_eflags), "=m" ((_dst).val), \ - "=&r" (_tmp) \ - : "i" (EFLAGS_MASK)); \ - break; \ - case 4: \ - __asm__ __volatile__ ( \ - _PRE_EFLAGS("0", "3", "2") \ - _op"l %1; " \ - _POST_EFLAGS("0", "3", "2") \ - : "=m" (_eflags), "=m" ((_dst).val), \ - "=&r" (_tmp) \ - : "i" (EFLAGS_MASK)); \ - break; \ - case 8: \ - __emulate_1op_8byte(_op, _dst, _eflags); \ - break; \ - } \ +/* Instruction has three operands and one operand is stored in ECX register */ +#define __emulate_2op_cl(_op, _cl, _src, _dst, _eflags, _suffix, _type) \ + do { \ + unsigned long _tmp; \ + _type _clv = (_cl).val; \ + _type _srcv = (_src).val; \ + _type _dstv = (_dst).val; \ + \ + __asm__ __volatile__ ( \ + _PRE_EFLAGS("0", "5", "2") \ + _op _suffix " %4,%1 \n" \ + _POST_EFLAGS("0", "5", "2") \ + : "=m" (_eflags), "+r" (_dstv), "=&r" (_tmp) \ + : "c" (_clv) , "r" (_srcv), "i" (EFLAGS_MASK) \ + ); \ + \ + (_cl).val = (unsigned long) _clv; \ + (_src).val = (unsigned long) _srcv; \ + (_dst).val = (unsigned long) _dstv; \ } while (0) -/* Emulate an instruction with quadword operands (x86/64 only). */ -#if defined(CONFIG_X86_64) -#define __emulate_2op_8byte(_op, _src, _dst, _eflags, _qx, _qy) \ - do { \ - __asm__ __volatile__ ( \ - _PRE_EFLAGS("0", "4", "2") \ - _op"q %"_qx"3,%1; " \ - _POST_EFLAGS("0", "4", "2") \ - : "=m" (_eflags), "=m" ((_dst).val), "=&r" (_tmp) \ - : _qy ((_src).val), "i" (EFLAGS_MASK)); \ +#define emulate_2op_cl(_op, _cl, _src, _dst, _eflags) \ + do { \ + switch ((_dst).bytes) { \ + case 2: \ + __emulate_2op_cl(_op, _cl, _src, _dst, _eflags, \ + "w", unsigned short); \ + break; \ + case 4: \ + __emulate_2op_cl(_op, _cl, _src, _dst, _eflags, \ + "l", unsigned int); \ + break; \ + case 8: \ + ON64(__emulate_2op_cl(_op, _cl, _src, _dst, _eflags, \ + "q", unsigned long)); \ + break; \ + } \ } while (0) -#define __emulate_1op_8byte(_op, _dst, _eflags) \ - do { \ - __asm__ __volatile__ ( \ - _PRE_EFLAGS("0", "3", "2") \ - _op"q %1; " \ - _POST_EFLAGS("0", "3", "2") \ - : "=m" (_eflags), "=m" ((_dst).val), "=&r" (_tmp) \ - : "i" (EFLAGS_MASK)); \ +#define __emulate_1op(_op, _dst, _eflags, _suffix) \ + do { \ + unsigned long _tmp; \ + \ + __asm__ __volatile__ ( \ + _PRE_EFLAGS("0", "3", "2") \ + _op _suffix " %1; " \ + _POST_EFLAGS("0", "3", "2") \ + : "=m" (_eflags), "+m" ((_dst).val), \ + "=&r" (_tmp) \ + : "i" (EFLAGS_MASK)); \ } while (0) -#elif defined(__i386__) -#define __emulate_2op_8byte(_op, _src, _dst, _eflags, _qx, _qy) -#define __emulate_1op_8byte(_op, _dst, _eflags) -#endif /* __i386__ */ +/* Instruction has only one explicit operand (no source operand). */ +#define emulate_1op(_op, _dst, _eflags) \ + do { \ + switch ((_dst).bytes) { \ + case 1: __emulate_1op(_op, _dst, _eflags, "b"); break; \ + case 2: __emulate_1op(_op, _dst, _eflags, "w"); break; \ + case 4: __emulate_1op(_op, _dst, _eflags, "l"); break; \ + case 8: ON64(__emulate_1op(_op, _dst, _eflags, "q")); break; \ + } \ + } while (0) /* Fetch next part of the instruction being emulated. */ #define insn_fetch(_type, _size, _eip) \ @@ -1041,6 +1049,33 @@ done_prefixes: c->src.bytes = 1; c->src.val = insn_fetch(s8, 1, c->eip); break; + case SrcOne: + c->src.bytes = 1; + c->src.val = 1; + break; + } + + /* + * Decode and fetch the second source operand: register, memory + * or immediate. + */ + switch (c->d & Src2Mask) { + case Src2None: + break; + case Src2CL: + c->src2.bytes = 1; + c->src2.val = c->regs[VCPU_REGS_RCX] & 0x8; + break; + case Src2ImmByte: + c->src2.type = OP_IMM; + c->src2.ptr = (unsigned long *)c->eip; + c->src2.bytes = 1; + c->src2.val = insn_fetch(u8, 1, c->eip); + break; + case Src2One: + c->src2.bytes = 1; + c->src2.val = 1; + break; } /* Decode and fetch the destination operand: register or memory. */ @@ -1100,20 +1135,33 @@ static inline void emulate_push(struct x86_emulate_ctxt *ctxt) c->regs[VCPU_REGS_RSP]); } -static inline int emulate_grp1a(struct x86_emulate_ctxt *ctxt, - struct x86_emulate_ops *ops) +static int emulate_pop(struct x86_emulate_ctxt *ctxt, + struct x86_emulate_ops *ops) { struct decode_cache *c = &ctxt->decode; int rc; - rc = ops->read_std(register_address(c, ss_base(ctxt), - c->regs[VCPU_REGS_RSP]), - &c->dst.val, c->dst.bytes, ctxt->vcpu); + rc = ops->read_emulated(register_address(c, ss_base(ctxt), + c->regs[VCPU_REGS_RSP]), + &c->src.val, c->src.bytes, ctxt->vcpu); if (rc != 0) return rc; - register_address_increment(c, &c->regs[VCPU_REGS_RSP], c->dst.bytes); + register_address_increment(c, &c->regs[VCPU_REGS_RSP], c->src.bytes); + return rc; +} + +static inline int emulate_grp1a(struct x86_emulate_ctxt *ctxt, + struct x86_emulate_ops *ops) +{ + struct decode_cache *c = &ctxt->decode; + int rc; + c->src.bytes = c->dst.bytes; + rc = emulate_pop(ctxt, ops); + if (rc != 0) + return rc; + c->dst.val = c->src.val; return 0; } @@ -1415,24 +1463,15 @@ special_insn: emulate_1op("dec", c->dst, ctxt->eflags); break; case 0x50 ... 0x57: /* push reg */ - c->dst.type = OP_MEM; - c->dst.bytes = c->op_bytes; - c->dst.val = c->src.val; - register_address_increment(c, &c->regs[VCPU_REGS_RSP], - -c->op_bytes); - c->dst.ptr = (void *) register_address( - c, ss_base(ctxt), c->regs[VCPU_REGS_RSP]); + emulate_push(ctxt); break; case 0x58 ... 0x5f: /* pop reg */ pop_instruction: - if ((rc = ops->read_std(register_address(c, ss_base(ctxt), - c->regs[VCPU_REGS_RSP]), c->dst.ptr, - c->op_bytes, ctxt->vcpu)) != 0) + c->src.bytes = c->op_bytes; + rc = emulate_pop(ctxt, ops); + if (rc != 0) goto done; - - register_address_increment(c, &c->regs[VCPU_REGS_RSP], - c->op_bytes); - c->dst.type = OP_NONE; /* Disable writeback. */ + c->dst.val = c->src.val; break; case 0x63: /* movsxd */ if (ctxt->mode != X86EMUL_MODE_PROT64) @@ -1591,7 +1630,9 @@ special_insn: emulate_push(ctxt); break; case 0x9d: /* popf */ + c->dst.type = OP_REG; c->dst.ptr = (unsigned long *) &ctxt->eflags; + c->dst.bytes = c->op_bytes; goto pop_instruction; case 0xa0 ... 0xa1: /* mov */ c->dst.ptr = (unsigned long *)&c->regs[VCPU_REGS_RAX]; @@ -1689,7 +1730,9 @@ special_insn: emulate_grp2(ctxt); break; case 0xc3: /* ret */ + c->dst.type = OP_REG; c->dst.ptr = &c->eip; + c->dst.bytes = c->op_bytes; goto pop_instruction; case 0xc6 ... 0xc7: /* mov (sole member of Grp11) */ mov: @@ -1778,7 +1821,7 @@ special_insn: c->eip = saved_eip; goto cannot_emulate; } - return 0; + break; case 0xf4: /* hlt */ ctxt->vcpu->arch.halt_request = 1; break; @@ -1999,12 +2042,20 @@ twobyte_insn: c->src.val &= (c->dst.bytes << 3) - 1; emulate_2op_SrcV_nobyte("bt", c->src, c->dst, ctxt->eflags); break; + case 0xa4: /* shld imm8, r, r/m */ + case 0xa5: /* shld cl, r, r/m */ + emulate_2op_cl("shld", c->src2, c->src, c->dst, ctxt->eflags); + break; case 0xab: bts: /* bts */ /* only subword offset */ c->src.val &= (c->dst.bytes << 3) - 1; emulate_2op_SrcV_nobyte("bts", c->src, c->dst, ctxt->eflags); break; + case 0xac: /* shrd imm8, r, r/m */ + case 0xad: /* shrd cl, r, r/m */ + emulate_2op_cl("shrd", c->src2, c->src, c->dst, ctxt->eflags); + break; case 0xae: /* clflush */ break; case 0xb0 ... 0xb1: /* cmpxchg */ diff --git a/arch/x86/lguest/boot.c b/arch/x86/lguest/boot.c index 50a779264bb..a7ed208f81e 100644 --- a/arch/x86/lguest/boot.c +++ b/arch/x86/lguest/boot.c @@ -738,7 +738,7 @@ static void lguest_time_init(void) /* We can't set cpumask in the initializer: damn C limitations! Set it * here and register our timer device. */ - lguest_clockevent.cpumask = cpumask_of_cpu(0); + lguest_clockevent.cpumask = cpumask_of(0); clockevents_register_device(&lguest_clockevent); /* Finally, we unblock the timer interrupt. */ diff --git a/arch/x86/mach-default/setup.c b/arch/x86/mach-default/setup.c index 37b9ae4d44c..df167f26562 100644 --- a/arch/x86/mach-default/setup.c +++ b/arch/x86/mach-default/setup.c @@ -133,29 +133,28 @@ void __init time_init_hook(void) **/ void mca_nmi_hook(void) { - /* If I recall correctly, there's a whole bunch of other things that + /* + * If I recall correctly, there's a whole bunch of other things that * we can do to check for NMI problems, but that's all I know about * at the moment. */ - - printk("NMI generated from unknown source!\n"); + pr_warning("NMI generated from unknown source!\n"); } #endif static __init int no_ipi_broadcast(char *str) { get_option(&str, &no_broadcast); - printk ("Using %s mode\n", no_broadcast ? "No IPI Broadcast" : - "IPI Broadcast"); + pr_info("Using %s mode\n", + no_broadcast ? "No IPI Broadcast" : "IPI Broadcast"); return 1; } - __setup("no_ipi_broadcast=", no_ipi_broadcast); static int __init print_ipi_mode(void) { - printk ("Using IPI %s mode\n", no_broadcast ? "No-Shortcut" : - "Shortcut"); + pr_info("Using IPI %s mode\n", + no_broadcast ? "No-Shortcut" : "Shortcut"); return 0; } diff --git a/arch/x86/mach-generic/bigsmp.c b/arch/x86/mach-generic/bigsmp.c index 3624a364b7f..bc4c7840b2a 100644 --- a/arch/x86/mach-generic/bigsmp.c +++ b/arch/x86/mach-generic/bigsmp.c @@ -42,9 +42,10 @@ static const struct dmi_system_id bigsmp_dmi_table[] = { { } }; -static cpumask_t vector_allocation_domain(int cpu) +static void vector_allocation_domain(int cpu, cpumask_t *retmask) { - return cpumask_of_cpu(cpu); + cpus_clear(*retmask); + cpu_set(cpu, *retmask); } static int probe_bigsmp(void) diff --git a/arch/x86/mach-generic/es7000.c b/arch/x86/mach-generic/es7000.c index 7b4e6d0d169..4ba5ccaa158 100644 --- a/arch/x86/mach-generic/es7000.c +++ b/arch/x86/mach-generic/es7000.c @@ -87,7 +87,7 @@ static int __init acpi_madt_oem_check(char *oem_id, char *oem_table_id) } #endif -static cpumask_t vector_allocation_domain(int cpu) +static void vector_allocation_domain(int cpu, cpumask_t *retmask) { /* Careful. Some cpus do not strictly honor the set of cpus * specified in the interrupt destination when using lowest @@ -97,8 +97,7 @@ static cpumask_t vector_allocation_domain(int cpu) * deliver interrupts to the wrong hyperthread when only one * hyperthread was specified in the interrupt desitination. */ - cpumask_t domain = { { [0] = APIC_ALL_CPUS, } }; - return domain; + *retmask = (cpumask_t){ { [0] = APIC_ALL_CPUS, } }; } struct genapic __initdata_refok apic_es7000 = APIC_INIT("es7000", probe_es7000); diff --git a/arch/x86/mach-generic/numaq.c b/arch/x86/mach-generic/numaq.c index 71a309b122e..511d7941364 100644 --- a/arch/x86/mach-generic/numaq.c +++ b/arch/x86/mach-generic/numaq.c @@ -38,7 +38,7 @@ static int acpi_madt_oem_check(char *oem_id, char *oem_table_id) return 0; } -static cpumask_t vector_allocation_domain(int cpu) +static void vector_allocation_domain(int cpu, cpumask_t *retmask) { /* Careful. Some cpus do not strictly honor the set of cpus * specified in the interrupt destination when using lowest @@ -48,8 +48,7 @@ static cpumask_t vector_allocation_domain(int cpu) * deliver interrupts to the wrong hyperthread when only one * hyperthread was specified in the interrupt desitination. */ - cpumask_t domain = { { [0] = APIC_ALL_CPUS, } }; - return domain; + *retmask = (cpumask_t){ { [0] = APIC_ALL_CPUS, } }; } struct genapic apic_numaq = APIC_INIT("NUMAQ", probe_numaq); diff --git a/arch/x86/mach-generic/summit.c b/arch/x86/mach-generic/summit.c index 2c6d234e000..2821ffc188b 100644 --- a/arch/x86/mach-generic/summit.c +++ b/arch/x86/mach-generic/summit.c @@ -24,7 +24,7 @@ static int probe_summit(void) return 0; } -static cpumask_t vector_allocation_domain(int cpu) +static void vector_allocation_domain(int cpu, cpumask_t *retmask) { /* Careful. Some cpus do not strictly honor the set of cpus * specified in the interrupt destination when using lowest @@ -34,8 +34,7 @@ static cpumask_t vector_allocation_domain(int cpu) * deliver interrupts to the wrong hyperthread when only one * hyperthread was specified in the interrupt desitination. */ - cpumask_t domain = { { [0] = APIC_ALL_CPUS, } }; - return domain; + *retmask = (cpumask_t){ { [0] = APIC_ALL_CPUS, } }; } struct genapic apic_summit = APIC_INIT("summit", probe_summit); diff --git a/arch/x86/mach-voyager/voyager_smp.c b/arch/x86/mach-voyager/voyager_smp.c index 52145007bd7..9840b7ec749 100644 --- a/arch/x86/mach-voyager/voyager_smp.c +++ b/arch/x86/mach-voyager/voyager_smp.c @@ -63,11 +63,6 @@ static int voyager_extended_cpus = 1; /* Used for the invalidate map that's also checked in the spinlock */ static volatile unsigned long smp_invalidate_needed; -/* Bitmask of currently online CPUs - used by setup.c for - /proc/cpuinfo, visible externally but still physical */ -cpumask_t cpu_online_map = CPU_MASK_NONE; -EXPORT_SYMBOL(cpu_online_map); - /* Bitmask of CPUs present in the system - exported by i386_syms.c, used * by scheduler but indexed physically */ cpumask_t phys_cpu_present_map = CPU_MASK_NONE; @@ -218,8 +213,6 @@ static cpumask_t smp_commenced_mask = CPU_MASK_NONE; /* This is for the new dynamic CPU boot code */ cpumask_t cpu_callin_map = CPU_MASK_NONE; cpumask_t cpu_callout_map = CPU_MASK_NONE; -cpumask_t cpu_possible_map = CPU_MASK_NONE; -EXPORT_SYMBOL(cpu_possible_map); /* The per processor IRQ masks (these are usually kept in sync) */ static __u16 vic_irq_mask[NR_CPUS] __cacheline_aligned; @@ -364,9 +357,8 @@ void __init find_smp_config(void) printk("VOYAGER SMP: Boot cpu is %d\n", boot_cpu_id); /* initialize the CPU structures (moved from smp_boot_cpus) */ - for (i = 0; i < NR_CPUS; i++) { + for (i = 0; i < nr_cpu_ids; i++) cpu_irq_affinity[i] = ~0; - } cpu_online_map = cpumask_of_cpu(boot_cpu_id); /* The boot CPU must be extended */ @@ -679,7 +671,7 @@ void __init smp_boot_cpus(void) /* loop over all the extended VIC CPUs and boot them. The * Quad CPUs must be bootstrapped by their extended VIC cpu */ - for (i = 0; i < NR_CPUS; i++) { + for (i = 0; i < nr_cpu_ids; i++) { if (i == boot_cpu_id || !cpu_isset(i, phys_cpu_present_map)) continue; do_boot_cpu(i); @@ -1234,7 +1226,7 @@ int setup_profiling_timer(unsigned int multiplier) * new values until the next timer interrupt in which they do process * accounting. */ - for (i = 0; i < NR_CPUS; ++i) + for (i = 0; i < nr_cpu_ids; ++i) per_cpu(prof_multiplier, i) = multiplier; return 0; @@ -1264,7 +1256,7 @@ void __init voyager_smp_intr_init(void) int i; /* initialize the per cpu irq mask to all disabled */ - for (i = 0; i < NR_CPUS; i++) + for (i = 0; i < nr_cpu_ids; i++) vic_irq_mask[i] = 0xFFFF; VIC_SET_GATE(VIC_CPI_LEVEL0, vic_cpi_interrupt); diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c index 8655b5bb096..f99a6c6c432 100644 --- a/arch/x86/mm/init_32.c +++ b/arch/x86/mm/init_32.c @@ -435,8 +435,12 @@ static void __init set_highmem_pages_init(void) #endif /* !CONFIG_NUMA */ #else -# define permanent_kmaps_init(pgd_base) do { } while (0) -# define set_highmem_pages_init() do { } while (0) +static inline void permanent_kmaps_init(pgd_t *pgd_base) +{ +} +static inline void set_highmem_pages_init(void) +{ +} #endif /* CONFIG_HIGHMEM */ void __init native_pagetable_setup_start(pgd_t *base) diff --git a/arch/x86/mm/numa_64.c b/arch/x86/mm/numa_64.c index cebcbf152d4..71a14f89f89 100644 --- a/arch/x86/mm/numa_64.c +++ b/arch/x86/mm/numa_64.c @@ -278,7 +278,7 @@ void __init numa_init_array(void) int rr, i; rr = first_node(node_online_map); - for (i = 0; i < NR_CPUS; i++) { + for (i = 0; i < nr_cpu_ids; i++) { if (early_cpu_to_node(i) != NUMA_NO_NODE) continue; numa_set_node(i, rr); @@ -549,7 +549,7 @@ void __init initmem_init(unsigned long start_pfn, unsigned long last_pfn) memnodemap[0] = 0; node_set_online(0); node_set(0, node_possible_map); - for (i = 0; i < NR_CPUS; i++) + for (i = 0; i < nr_cpu_ids; i++) numa_set_node(i, 0); e820_register_active_regions(0, start_pfn, last_pfn); setup_node_bootmem(0, start_pfn << PAGE_SHIFT, last_pfn << PAGE_SHIFT); diff --git a/arch/x86/mm/srat_64.c b/arch/x86/mm/srat_64.c index 51c0a2fc14f..09737c8af07 100644 --- a/arch/x86/mm/srat_64.c +++ b/arch/x86/mm/srat_64.c @@ -382,7 +382,7 @@ int __init acpi_scan_nodes(unsigned long start, unsigned long end) if (!node_online(i)) setup_node_bootmem(i, nodes[i].start, nodes[i].end); - for (i = 0; i < NR_CPUS; i++) { + for (i = 0; i < nr_cpu_ids; i++) { int node = early_cpu_to_node(i); if (node == NUMA_NO_NODE) diff --git a/arch/x86/pci/acpi.c b/arch/x86/pci/acpi.c index 1d88d2b3977..9e5752fe4d1 100644 --- a/arch/x86/pci/acpi.c +++ b/arch/x86/pci/acpi.c @@ -4,7 +4,7 @@ #include <linux/irq.h> #include <linux/dmi.h> #include <asm/numa.h> -#include "pci.h" +#include <asm/pci_x86.h> struct pci_root_info { char *name; diff --git a/arch/x86/pci/amd_bus.c b/arch/x86/pci/amd_bus.c index 22e057665e5..9bb09823b36 100644 --- a/arch/x86/pci/amd_bus.c +++ b/arch/x86/pci/amd_bus.c @@ -2,7 +2,7 @@ #include <linux/pci.h> #include <linux/topology.h> #include <linux/cpu.h> -#include "pci.h" +#include <asm/pci_x86.h> #ifdef CONFIG_X86_64 #include <asm/pci-direct.h> diff --git a/arch/x86/pci/common.c b/arch/x86/pci/common.c index bb1a01f089e..62ddb73e09e 100644 --- a/arch/x86/pci/common.c +++ b/arch/x86/pci/common.c @@ -14,8 +14,7 @@ #include <asm/segment.h> #include <asm/io.h> #include <asm/smp.h> - -#include "pci.h" +#include <asm/pci_x86.h> unsigned int pci_probe = PCI_PROBE_BIOS | PCI_PROBE_CONF1 | PCI_PROBE_CONF2 | PCI_PROBE_MMCONF; diff --git a/arch/x86/pci/direct.c b/arch/x86/pci/direct.c index 9a5af6c8fbe..bd13c3e4c6d 100644 --- a/arch/x86/pci/direct.c +++ b/arch/x86/pci/direct.c @@ -5,7 +5,7 @@ #include <linux/pci.h> #include <linux/init.h> #include <linux/dmi.h> -#include "pci.h" +#include <asm/pci_x86.h> /* * Functions for accessing PCI base (first 256 bytes) and extended diff --git a/arch/x86/pci/early.c b/arch/x86/pci/early.c index 86631ccbc25..f6adf2c6d75 100644 --- a/arch/x86/pci/early.c +++ b/arch/x86/pci/early.c @@ -2,7 +2,7 @@ #include <linux/pci.h> #include <asm/pci-direct.h> #include <asm/io.h> -#include "pci.h" +#include <asm/pci_x86.h> /* Direct PCI access. This is used for PCI accesses in early boot before the PCI subsystem works. */ diff --git a/arch/x86/pci/fixup.c b/arch/x86/pci/fixup.c index 2051dc96b8e..7d388d5cf54 100644 --- a/arch/x86/pci/fixup.c +++ b/arch/x86/pci/fixup.c @@ -6,8 +6,7 @@ #include <linux/dmi.h> #include <linux/pci.h> #include <linux/init.h> -#include "pci.h" - +#include <asm/pci_x86.h> static void __devinit pci_fixup_i450nx(struct pci_dev *d) { diff --git a/arch/x86/pci/i386.c b/arch/x86/pci/i386.c index 844df0cbbd3..e51bf2cda4b 100644 --- a/arch/x86/pci/i386.c +++ b/arch/x86/pci/i386.c @@ -34,8 +34,8 @@ #include <asm/pat.h> #include <asm/e820.h> +#include <asm/pci_x86.h> -#include "pci.h" static int skip_isa_ioresource_align(struct pci_dev *dev) { diff --git a/arch/x86/pci/init.c b/arch/x86/pci/init.c index d6c950f8185..bec3b048e72 100644 --- a/arch/x86/pci/init.c +++ b/arch/x86/pci/init.c @@ -1,6 +1,6 @@ #include <linux/pci.h> #include <linux/init.h> -#include "pci.h" +#include <asm/pci_x86.h> /* arch_initcall has too random ordering, so call the initializers in the right sequence from here. */ diff --git a/arch/x86/pci/irq.c b/arch/x86/pci/irq.c index bf69dbe08bf..373b9afe6d4 100644 --- a/arch/x86/pci/irq.c +++ b/arch/x86/pci/irq.c @@ -16,8 +16,7 @@ #include <asm/io_apic.h> #include <linux/irq.h> #include <linux/acpi.h> - -#include "pci.h" +#include <asm/pci_x86.h> #define PIRQ_SIGNATURE (('$' << 0) + ('P' << 8) + ('I' << 16) + ('R' << 24)) #define PIRQ_VERSION 0x0100 diff --git a/arch/x86/pci/legacy.c b/arch/x86/pci/legacy.c index b722dd481b3..f1065b129e9 100644 --- a/arch/x86/pci/legacy.c +++ b/arch/x86/pci/legacy.c @@ -3,7 +3,7 @@ */ #include <linux/init.h> #include <linux/pci.h> -#include "pci.h" +#include <asm/pci_x86.h> /* * Discover remaining PCI buses in case there are peer host bridges. diff --git a/arch/x86/pci/mmconfig-shared.c b/arch/x86/pci/mmconfig-shared.c index 654a2234f8f..89bf9242c80 100644 --- a/arch/x86/pci/mmconfig-shared.c +++ b/arch/x86/pci/mmconfig-shared.c @@ -15,8 +15,7 @@ #include <linux/acpi.h> #include <linux/bitmap.h> #include <asm/e820.h> - -#include "pci.h" +#include <asm/pci_x86.h> /* aperture is up to 256MB but BIOS may reserve less */ #define MMCONFIG_APER_MIN (2 * 1024*1024) diff --git a/arch/x86/pci/mmconfig_32.c b/arch/x86/pci/mmconfig_32.c index f3c761dce69..8b2d561046a 100644 --- a/arch/x86/pci/mmconfig_32.c +++ b/arch/x86/pci/mmconfig_32.c @@ -13,7 +13,7 @@ #include <linux/init.h> #include <linux/acpi.h> #include <asm/e820.h> -#include "pci.h" +#include <asm/pci_x86.h> /* Assume systems with more busses have correct MCFG */ #define mmcfg_virt_addr ((void __iomem *) fix_to_virt(FIX_PCIE_MCFG)) diff --git a/arch/x86/pci/mmconfig_64.c b/arch/x86/pci/mmconfig_64.c index a1994163c99..30007ffc8e1 100644 --- a/arch/x86/pci/mmconfig_64.c +++ b/arch/x86/pci/mmconfig_64.c @@ -10,8 +10,7 @@ #include <linux/acpi.h> #include <linux/bitmap.h> #include <asm/e820.h> - -#include "pci.h" +#include <asm/pci_x86.h> /* Static virtual mapping of the MMCONFIG aperture */ struct mmcfg_virt { diff --git a/arch/x86/pci/numaq_32.c b/arch/x86/pci/numaq_32.c index 1177845d318..2089354968a 100644 --- a/arch/x86/pci/numaq_32.c +++ b/arch/x86/pci/numaq_32.c @@ -7,7 +7,7 @@ #include <linux/nodemask.h> #include <mach_apic.h> #include <asm/mpspec.h> -#include "pci.h" +#include <asm/pci_x86.h> #define XQUAD_PORTIO_BASE 0xfe400000 #define XQUAD_PORTIO_QUAD 0x40000 /* 256k per quad. */ diff --git a/arch/x86/pci/olpc.c b/arch/x86/pci/olpc.c index e11e9e803d5..b889d824f7c 100644 --- a/arch/x86/pci/olpc.c +++ b/arch/x86/pci/olpc.c @@ -29,7 +29,7 @@ #include <linux/init.h> #include <asm/olpc.h> #include <asm/geode.h> -#include "pci.h" +#include <asm/pci_x86.h> /* * In the tables below, the first two line (8 longwords) are the diff --git a/arch/x86/pci/pcbios.c b/arch/x86/pci/pcbios.c index 37472fc6f72..b82cae970df 100644 --- a/arch/x86/pci/pcbios.c +++ b/arch/x86/pci/pcbios.c @@ -6,9 +6,8 @@ #include <linux/init.h> #include <linux/module.h> #include <linux/uaccess.h> -#include "pci.h" -#include "pci-functions.h" - +#include <asm/pci_x86.h> +#include <asm/mach-default/pci-functions.h> /* BIOS32 signature: "_32_" */ #define BIOS32_SIGNATURE (('_' << 0) + ('3' << 8) + ('2' << 16) + ('_' << 24)) diff --git a/arch/x86/pci/visws.c b/arch/x86/pci/visws.c index 42f4cb19fac..16d0c0eb0d1 100644 --- a/arch/x86/pci/visws.c +++ b/arch/x86/pci/visws.c @@ -9,11 +9,10 @@ #include <linux/init.h> #include <asm/setup.h> +#include <asm/pci_x86.h> #include <asm/visws/cobalt.h> #include <asm/visws/lithium.h> -#include "pci.h" - static int pci_visws_enable_irq(struct pci_dev *dev) { return 0; } static void pci_visws_disable_irq(struct pci_dev *dev) { } diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c index 773d68d3e91..503c240e26c 100644 --- a/arch/x86/xen/mmu.c +++ b/arch/x86/xen/mmu.c @@ -1082,7 +1082,7 @@ static void drop_other_mm_ref(void *info) static void xen_drop_mm_ref(struct mm_struct *mm) { - cpumask_t mask; + cpumask_var_t mask; unsigned cpu; if (current->active_mm == mm) { @@ -1094,7 +1094,16 @@ static void xen_drop_mm_ref(struct mm_struct *mm) } /* Get the "official" set of cpus referring to our pagetable. */ - mask = mm->cpu_vm_mask; + if (!alloc_cpumask_var(&mask, GFP_ATOMIC)) { + for_each_online_cpu(cpu) { + if (!cpumask_test_cpu(cpu, &mm->cpu_vm_mask) + && per_cpu(xen_current_cr3, cpu) != __pa(mm->pgd)) + continue; + smp_call_function_single(cpu, drop_other_mm_ref, mm, 1); + } + return; + } + cpumask_copy(mask, &mm->cpu_vm_mask); /* It's possible that a vcpu may have a stale reference to our cr3, because its in lazy mode, and it hasn't yet flushed @@ -1103,11 +1112,12 @@ static void xen_drop_mm_ref(struct mm_struct *mm) if needed. */ for_each_online_cpu(cpu) { if (per_cpu(xen_current_cr3, cpu) == __pa(mm->pgd)) - cpu_set(cpu, mask); + cpumask_set_cpu(cpu, mask); } - if (!cpus_empty(mask)) - smp_call_function_mask(mask, drop_other_mm_ref, mm, 1); + if (!cpumask_empty(mask)) + smp_call_function_many(mask, drop_other_mm_ref, mm, 1); + free_cpumask_var(mask); } #else static void xen_drop_mm_ref(struct mm_struct *mm) diff --git a/arch/x86/xen/smp.c b/arch/x86/xen/smp.c index acd9b6705e0..c44e2069c7c 100644 --- a/arch/x86/xen/smp.c +++ b/arch/x86/xen/smp.c @@ -33,7 +33,7 @@ #include "xen-ops.h" #include "mmu.h" -cpumask_t xen_cpu_initialized_map; +cpumask_var_t xen_cpu_initialized_map; static DEFINE_PER_CPU(int, resched_irq); static DEFINE_PER_CPU(int, callfunc_irq); @@ -158,7 +158,7 @@ static void __init xen_fill_possible_map(void) { int i, rc; - for (i = 0; i < NR_CPUS; i++) { + for (i = 0; i < nr_cpu_ids; i++) { rc = HYPERVISOR_vcpu_op(VCPUOP_is_up, i, NULL); if (rc >= 0) { num_processors++; @@ -192,11 +192,14 @@ static void __init xen_smp_prepare_cpus(unsigned int max_cpus) if (xen_smp_intr_init(0)) BUG(); - xen_cpu_initialized_map = cpumask_of_cpu(0); + if (!alloc_cpumask_var(&xen_cpu_initialized_map, GFP_KERNEL)) + panic("could not allocate xen_cpu_initialized_map\n"); + + cpumask_copy(xen_cpu_initialized_map, cpumask_of(0)); /* Restrict the possible_map according to max_cpus. */ while ((num_possible_cpus() > 1) && (num_possible_cpus() > max_cpus)) { - for (cpu = NR_CPUS - 1; !cpu_possible(cpu); cpu--) + for (cpu = nr_cpu_ids - 1; !cpu_possible(cpu); cpu--) continue; cpu_clear(cpu, cpu_possible_map); } @@ -221,7 +224,7 @@ cpu_initialize_context(unsigned int cpu, struct task_struct *idle) struct vcpu_guest_context *ctxt; struct desc_struct *gdt; - if (cpu_test_and_set(cpu, xen_cpu_initialized_map)) + if (cpumask_test_and_set_cpu(cpu, xen_cpu_initialized_map)) return 0; ctxt = kzalloc(sizeof(*ctxt), GFP_KERNEL); @@ -408,24 +411,23 @@ static void xen_smp_send_reschedule(int cpu) xen_send_IPI_one(cpu, XEN_RESCHEDULE_VECTOR); } -static void xen_send_IPI_mask(cpumask_t mask, enum ipi_vector vector) +static void xen_send_IPI_mask(const struct cpumask *mask, + enum ipi_vector vector) { unsigned cpu; - cpus_and(mask, mask, cpu_online_map); - - for_each_cpu_mask_nr(cpu, mask) + for_each_cpu_and(cpu, mask, cpu_online_mask) xen_send_IPI_one(cpu, vector); } -static void xen_smp_send_call_function_ipi(cpumask_t mask) +static void xen_smp_send_call_function_ipi(const struct cpumask *mask) { int cpu; xen_send_IPI_mask(mask, XEN_CALL_FUNCTION_VECTOR); /* Make sure other vcpus get a chance to run if they need to. */ - for_each_cpu_mask_nr(cpu, mask) { + for_each_cpu(cpu, mask) { if (xen_vcpu_stolen(cpu)) { HYPERVISOR_sched_op(SCHEDOP_yield, 0); break; @@ -435,7 +437,8 @@ static void xen_smp_send_call_function_ipi(cpumask_t mask) static void xen_smp_send_call_function_single_ipi(int cpu) { - xen_send_IPI_mask(cpumask_of_cpu(cpu), XEN_CALL_FUNCTION_SINGLE_VECTOR); + xen_send_IPI_mask(cpumask_of(cpu), + XEN_CALL_FUNCTION_SINGLE_VECTOR); } static irqreturn_t xen_call_function_interrupt(int irq, void *dev_id) diff --git a/arch/x86/xen/suspend.c b/arch/x86/xen/suspend.c index 2a234db5949..212ffe012b7 100644 --- a/arch/x86/xen/suspend.c +++ b/arch/x86/xen/suspend.c @@ -35,7 +35,8 @@ void xen_post_suspend(int suspend_cancelled) pfn_to_mfn(xen_start_info->console.domU.mfn); } else { #ifdef CONFIG_SMP - xen_cpu_initialized_map = cpu_online_map; + BUG_ON(xen_cpu_initialized_map == NULL); + cpumask_copy(xen_cpu_initialized_map, cpu_online_mask); #endif xen_vcpu_restore(); } diff --git a/arch/x86/xen/time.c b/arch/x86/xen/time.c index c9f7cda48ed..14f24062349 100644 --- a/arch/x86/xen/time.c +++ b/arch/x86/xen/time.c @@ -132,8 +132,7 @@ static void do_stolen_accounting(void) *snap = state; /* Add the appropriate number of ticks of stolen time, - including any left-overs from last time. Passing NULL to - account_steal_time accounts the time as stolen. */ + including any left-overs from last time. */ stolen = runnable + offline + __get_cpu_var(residual_stolen); if (stolen < 0) @@ -141,11 +140,10 @@ static void do_stolen_accounting(void) ticks = iter_div_u64_rem(stolen, NS_PER_TICK, &stolen); __get_cpu_var(residual_stolen) = stolen; - account_steal_time(NULL, ticks); + account_steal_ticks(ticks); /* Add the appropriate number of ticks of blocked time, - including any left-overs from last time. Passing idle to - account_steal_time accounts the time as idle/wait. */ + including any left-overs from last time. */ blocked += __get_cpu_var(residual_blocked); if (blocked < 0) @@ -153,7 +151,7 @@ static void do_stolen_accounting(void) ticks = iter_div_u64_rem(blocked, NS_PER_TICK, &blocked); __get_cpu_var(residual_blocked) = blocked; - account_steal_time(idle_task(smp_processor_id()), ticks); + account_idle_ticks(ticks); } /* @@ -437,7 +435,7 @@ void xen_setup_timer(int cpu) evt = &per_cpu(xen_clock_events, cpu); memcpy(evt, xen_clockevent, sizeof(*evt)); - evt->cpumask = cpumask_of_cpu(cpu); + evt->cpumask = cpumask_of(cpu); evt->irq = irq; setup_runstate_info(cpu); diff --git a/arch/x86/xen/xen-ops.h b/arch/x86/xen/xen-ops.h index 9e1afae8461..c1f8faf0a2c 100644 --- a/arch/x86/xen/xen-ops.h +++ b/arch/x86/xen/xen-ops.h @@ -58,7 +58,7 @@ void __init xen_init_spinlocks(void); __cpuinit void xen_init_lock_cpu(int cpu); void xen_uninit_lock_cpu(int cpu); -extern cpumask_t xen_cpu_initialized_map; +extern cpumask_var_t xen_cpu_initialized_map; #else static inline void xen_smp_init(void) {} #endif diff --git a/arch/xtensa/kernel/init_task.c b/arch/xtensa/kernel/init_task.c index 3df469dbe81..e07f5c9fcd3 100644 --- a/arch/xtensa/kernel/init_task.c +++ b/arch/xtensa/kernel/init_task.c @@ -21,7 +21,6 @@ #include <asm/uaccess.h> -static struct fs_struct init_fs = INIT_FS; static struct signal_struct init_signals = INIT_SIGNALS(init_signals); static struct sighand_struct init_sighand = INIT_SIGHAND(init_sighand); struct mm_struct init_mm = INIT_MM(init_mm); |