aboutsummaryrefslogtreecommitdiff
path: root/arch/i386/kernel
diff options
context:
space:
mode:
Diffstat (limited to 'arch/i386/kernel')
-rw-r--r--arch/i386/kernel/Makefile75
-rw-r--r--arch/i386/kernel/acpi/Makefile8
-rw-r--r--arch/i386/kernel/acpi/boot.c1150
-rw-r--r--arch/i386/kernel/acpi/cstate.c103
-rw-r--r--arch/i386/kernel/acpi/earlyquirk.c51
-rw-r--r--arch/i386/kernel/acpi/sleep.c121
-rw-r--r--arch/i386/kernel/acpi/wakeup.S313
-rw-r--r--arch/i386/kernel/apic.c1300
-rw-r--r--arch/i386/kernel/apm.c2423
-rw-r--r--arch/i386/kernel/asm-offsets.c72
-rw-r--r--arch/i386/kernel/bootflag.c99
-rw-r--r--arch/i386/kernel/cpu/Makefile19
-rw-r--r--arch/i386/kernel/cpu/amd.c272
-rw-r--r--arch/i386/kernel/cpu/centaur.c476
-rw-r--r--arch/i386/kernel/cpu/changelog63
-rw-r--r--arch/i386/kernel/cpu/common.c660
-rw-r--r--arch/i386/kernel/cpu/cpu.h30
-rw-r--r--arch/i386/kernel/cpu/cpufreq/Kconfig243
-rw-r--r--arch/i386/kernel/cpu/cpufreq/Makefile15
-rw-r--r--arch/i386/kernel/cpu/cpufreq/acpi-cpufreq.c560
-rw-r--r--arch/i386/kernel/cpu/cpufreq/cpufreq-nforce2.c457
-rw-r--r--arch/i386/kernel/cpu/cpufreq/elanfreq.c312
-rw-r--r--arch/i386/kernel/cpu/cpufreq/gx-suspmod.c502
-rw-r--r--arch/i386/kernel/cpu/cpufreq/longhaul.c700
-rw-r--r--arch/i386/kernel/cpu/cpufreq/longhaul.h466
-rw-r--r--arch/i386/kernel/cpu/cpufreq/longrun.c326
-rw-r--r--arch/i386/kernel/cpu/cpufreq/p4-clockmod.c338
-rw-r--r--arch/i386/kernel/cpu/cpufreq/powernow-k6.c256
-rw-r--r--arch/i386/kernel/cpu/cpufreq/powernow-k7.c693
-rw-r--r--arch/i386/kernel/cpu/cpufreq/powernow-k7.h44
-rw-r--r--arch/i386/kernel/cpu/cpufreq/powernow-k8.c1184
-rw-r--r--arch/i386/kernel/cpu/cpufreq/powernow-k8.h197
-rw-r--r--arch/i386/kernel/cpu/cpufreq/sc520_freq.c186
-rw-r--r--arch/i386/kernel/cpu/cpufreq/speedstep-centrino.c722
-rw-r--r--arch/i386/kernel/cpu/cpufreq/speedstep-est-common.h25
-rw-r--r--arch/i386/kernel/cpu/cpufreq/speedstep-ich.c424
-rw-r--r--arch/i386/kernel/cpu/cpufreq/speedstep-lib.c385
-rw-r--r--arch/i386/kernel/cpu/cpufreq/speedstep-lib.h47
-rw-r--r--arch/i386/kernel/cpu/cpufreq/speedstep-smi.c427
-rw-r--r--arch/i386/kernel/cpu/cyrix.c435
-rw-r--r--arch/i386/kernel/cpu/intel.c268
-rw-r--r--arch/i386/kernel/cpu/intel_cacheinfo.c626
-rw-r--r--arch/i386/kernel/cpu/mcheck/Makefile2
-rw-r--r--arch/i386/kernel/cpu/mcheck/k7.c96
-rw-r--r--arch/i386/kernel/cpu/mcheck/mce.c77
-rw-r--r--arch/i386/kernel/cpu/mcheck/mce.h14
-rw-r--r--arch/i386/kernel/cpu/mcheck/non-fatal.c92
-rw-r--r--arch/i386/kernel/cpu/mcheck/p4.c270
-rw-r--r--arch/i386/kernel/cpu/mcheck/p5.c53
-rw-r--r--arch/i386/kernel/cpu/mcheck/p6.c119
-rw-r--r--arch/i386/kernel/cpu/mcheck/winchip.c36
-rw-r--r--arch/i386/kernel/cpu/mtrr/Makefile5
-rw-r--r--arch/i386/kernel/cpu/mtrr/amd.c121
-rw-r--r--arch/i386/kernel/cpu/mtrr/centaur.c223
-rw-r--r--arch/i386/kernel/cpu/mtrr/changelog229
-rw-r--r--arch/i386/kernel/cpu/mtrr/cyrix.c364
-rw-r--r--arch/i386/kernel/cpu/mtrr/generic.c418
-rw-r--r--arch/i386/kernel/cpu/mtrr/if.c403
-rw-r--r--arch/i386/kernel/cpu/mtrr/main.c715
-rw-r--r--arch/i386/kernel/cpu/mtrr/mtrr.h97
-rw-r--r--arch/i386/kernel/cpu/mtrr/state.c78
-rw-r--r--arch/i386/kernel/cpu/nexgen.c63
-rw-r--r--arch/i386/kernel/cpu/proc.c155
-rw-r--r--arch/i386/kernel/cpu/rise.c53
-rw-r--r--arch/i386/kernel/cpu/transmeta.c113
-rw-r--r--arch/i386/kernel/cpu/umc.c33
-rw-r--r--arch/i386/kernel/cpuid.c246
-rw-r--r--arch/i386/kernel/crash.c214
-rw-r--r--arch/i386/kernel/dmi_scan.c301
-rw-r--r--arch/i386/kernel/doublefault.c65
-rw-r--r--arch/i386/kernel/early_printk.c2
-rw-r--r--arch/i386/kernel/efi.c637
-rw-r--r--arch/i386/kernel/efi_stub.S124
-rw-r--r--arch/i386/kernel/entry.S663
-rw-r--r--arch/i386/kernel/head.S527
-rw-r--r--arch/i386/kernel/i386_ksyms.c35
-rw-r--r--arch/i386/kernel/i387.c547
-rw-r--r--arch/i386/kernel/i8237.c67
-rw-r--r--arch/i386/kernel/i8259.c438
-rw-r--r--arch/i386/kernel/init_task.c46
-rw-r--r--arch/i386/kernel/io_apic.c2659
-rw-r--r--arch/i386/kernel/ioport.c151
-rw-r--r--arch/i386/kernel/irq.c307
-rw-r--r--arch/i386/kernel/kprobes.c548
-rw-r--r--arch/i386/kernel/ldt.c253
-rw-r--r--arch/i386/kernel/machine_kexec.c214
-rw-r--r--arch/i386/kernel/mca.c474
-rw-r--r--arch/i386/kernel/microcode.c515
-rw-r--r--arch/i386/kernel/module.c129
-rw-r--r--arch/i386/kernel/mpparse.c1149
-rw-r--r--arch/i386/kernel/msr.c327
-rw-r--r--arch/i386/kernel/nmi.c621
-rw-r--r--arch/i386/kernel/numaq.c88
-rw-r--r--arch/i386/kernel/pci-dma.c150
-rw-r--r--arch/i386/kernel/process.c949
-rw-r--r--arch/i386/kernel/ptrace.c730
-rw-r--r--arch/i386/kernel/quirks.c52
-rw-r--r--arch/i386/kernel/reboot.c356
-rw-r--r--arch/i386/kernel/reboot_fixups.c57
-rw-r--r--arch/i386/kernel/relocate_kernel.S120
-rw-r--r--arch/i386/kernel/scx200.c168
-rw-r--r--arch/i386/kernel/semaphore.c135
-rw-r--r--arch/i386/kernel/setup.c1634
-rw-r--r--arch/i386/kernel/sigframe.h21
-rw-r--r--arch/i386/kernel/signal.c675
-rw-r--r--arch/i386/kernel/smp.c630
-rw-r--r--arch/i386/kernel/smpboot.c1404
-rw-r--r--arch/i386/kernel/srat.c467
-rw-r--r--arch/i386/kernel/summit.c180
-rw-r--r--arch/i386/kernel/sys_i386.c252
-rw-r--r--arch/i386/kernel/syscall_table.S296
-rw-r--r--arch/i386/kernel/sysenter.c67
-rw-r--r--arch/i386/kernel/time.c485
-rw-r--r--arch/i386/kernel/time_hpet.c466
-rw-r--r--arch/i386/kernel/timers/Makefile9
-rw-r--r--arch/i386/kernel/timers/common.c172
-rw-r--r--arch/i386/kernel/timers/timer.c75
-rw-r--r--arch/i386/kernel/timers/timer_cyclone.c259
-rw-r--r--arch/i386/kernel/timers/timer_hpet.c217
-rw-r--r--arch/i386/kernel/timers/timer_none.c39
-rw-r--r--arch/i386/kernel/timers/timer_pit.c176
-rw-r--r--arch/i386/kernel/timers/timer_pm.c268
-rw-r--r--arch/i386/kernel/timers/timer_tsc.c600
-rw-r--r--arch/i386/kernel/trampoline.S80
-rw-r--r--arch/i386/kernel/traps.c1120
-rw-r--r--arch/i386/kernel/vm86.c808
-rw-r--r--arch/i386/kernel/vmlinux.lds.S150
-rw-r--r--arch/i386/kernel/vsyscall-int80.S53
-rw-r--r--arch/i386/kernel/vsyscall-note.S25
-rw-r--r--arch/i386/kernel/vsyscall-sigreturn.S143
-rw-r--r--arch/i386/kernel/vsyscall-sysenter.S104
-rw-r--r--arch/i386/kernel/vsyscall.S15
-rw-r--r--arch/i386/kernel/vsyscall.lds.S66
133 files changed, 0 insertions, 45922 deletions
diff --git a/arch/i386/kernel/Makefile b/arch/i386/kernel/Makefile
deleted file mode 100644
index f10de0f2c5e..00000000000
--- a/arch/i386/kernel/Makefile
+++ /dev/null
@@ -1,75 +0,0 @@
-#
-# Makefile for the linux kernel.
-#
-
-extra-y := head.o init_task.o vmlinux.lds
-
-obj-y := process.o semaphore.o signal.o entry.o traps.o irq.o vm86.o \
- ptrace.o time.o ioport.o ldt.o setup.o i8259.o sys_i386.o \
- pci-dma.o i386_ksyms.o i387.o dmi_scan.o bootflag.o \
- doublefault.o quirks.o i8237.o
-
-obj-y += cpu/
-obj-y += timers/
-obj-$(CONFIG_ACPI) += acpi/
-obj-$(CONFIG_X86_BIOS_REBOOT) += reboot.o
-obj-$(CONFIG_MCA) += mca.o
-obj-$(CONFIG_X86_MSR) += msr.o
-obj-$(CONFIG_X86_CPUID) += cpuid.o
-obj-$(CONFIG_MICROCODE) += microcode.o
-obj-$(CONFIG_APM) += apm.o
-obj-$(CONFIG_X86_SMP) += smp.o smpboot.o
-obj-$(CONFIG_X86_TRAMPOLINE) += trampoline.o
-obj-$(CONFIG_X86_MPPARSE) += mpparse.o
-obj-$(CONFIG_X86_LOCAL_APIC) += apic.o nmi.o
-obj-$(CONFIG_X86_IO_APIC) += io_apic.o
-obj-$(CONFIG_X86_REBOOTFIXUPS) += reboot_fixups.o
-obj-$(CONFIG_KEXEC) += machine_kexec.o relocate_kernel.o crash.o
-obj-$(CONFIG_X86_NUMAQ) += numaq.o
-obj-$(CONFIG_X86_SUMMIT_NUMA) += summit.o
-obj-$(CONFIG_KPROBES) += kprobes.o
-obj-$(CONFIG_MODULES) += module.o
-obj-y += sysenter.o vsyscall.o
-obj-$(CONFIG_ACPI_SRAT) += srat.o
-obj-$(CONFIG_HPET_TIMER) += time_hpet.o
-obj-$(CONFIG_EFI) += efi.o efi_stub.o
-obj-$(CONFIG_EARLY_PRINTK) += early_printk.o
-
-EXTRA_AFLAGS := -traditional
-
-obj-$(CONFIG_SCx200) += scx200.o
-
-# vsyscall.o contains the vsyscall DSO images as __initdata.
-# We must build both images before we can assemble it.
-# Note: kbuild does not track this dependency due to usage of .incbin
-$(obj)/vsyscall.o: $(obj)/vsyscall-int80.so $(obj)/vsyscall-sysenter.so
-targets += $(foreach F,int80 sysenter,vsyscall-$F.o vsyscall-$F.so)
-targets += vsyscall-note.o vsyscall.lds
-
-# The DSO images are built using a special linker script.
-quiet_cmd_syscall = SYSCALL $@
- cmd_syscall = $(CC) -m elf_i386 -nostdlib $(SYSCFLAGS_$(@F)) \
- -Wl,-T,$(filter-out FORCE,$^) -o $@
-
-export CPPFLAGS_vsyscall.lds += -P -C -U$(ARCH)
-
-vsyscall-flags = -shared -s -Wl,-soname=linux-gate.so.1
-SYSCFLAGS_vsyscall-sysenter.so = $(vsyscall-flags)
-SYSCFLAGS_vsyscall-int80.so = $(vsyscall-flags)
-
-$(obj)/vsyscall-int80.so $(obj)/vsyscall-sysenter.so: \
-$(obj)/vsyscall-%.so: $(src)/vsyscall.lds \
- $(obj)/vsyscall-%.o $(obj)/vsyscall-note.o FORCE
- $(call if_changed,syscall)
-
-# We also create a special relocatable object that should mirror the symbol
-# table and layout of the linked DSO. With ld -R we can then refer to
-# these symbols in the kernel code rather than hand-coded addresses.
-extra-y += vsyscall-syms.o
-$(obj)/built-in.o: $(obj)/vsyscall-syms.o
-$(obj)/built-in.o: ld_flags += -R $(obj)/vsyscall-syms.o
-
-SYSCFLAGS_vsyscall-syms.o = -r
-$(obj)/vsyscall-syms.o: $(src)/vsyscall.lds \
- $(obj)/vsyscall-sysenter.o $(obj)/vsyscall-note.o FORCE
- $(call if_changed,syscall)
diff --git a/arch/i386/kernel/acpi/Makefile b/arch/i386/kernel/acpi/Makefile
deleted file mode 100644
index 267ca48e1b6..00000000000
--- a/arch/i386/kernel/acpi/Makefile
+++ /dev/null
@@ -1,8 +0,0 @@
-obj-y := boot.o
-obj-$(CONFIG_X86_IO_APIC) += earlyquirk.o
-obj-$(CONFIG_ACPI_SLEEP) += sleep.o wakeup.o
-
-ifneq ($(CONFIG_ACPI_PROCESSOR),)
-obj-y += cstate.o
-endif
-
diff --git a/arch/i386/kernel/acpi/boot.c b/arch/i386/kernel/acpi/boot.c
deleted file mode 100644
index b66c13c0cc0..00000000000
--- a/arch/i386/kernel/acpi/boot.c
+++ /dev/null
@@ -1,1150 +0,0 @@
-/*
- * boot.c - Architecture-Specific Low-Level ACPI Boot Support
- *
- * Copyright (C) 2001, 2002 Paul Diefenbaugh <paul.s.diefenbaugh@intel.com>
- * Copyright (C) 2001 Jun Nakajima <jun.nakajima@intel.com>
- *
- * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
- *
- * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
- */
-
-#include <linux/init.h>
-#include <linux/config.h>
-#include <linux/acpi.h>
-#include <linux/efi.h>
-#include <linux/module.h>
-#include <linux/dmi.h>
-#include <linux/irq.h>
-
-#include <asm/pgtable.h>
-#include <asm/io_apic.h>
-#include <asm/apic.h>
-#include <asm/io.h>
-#include <asm/mpspec.h>
-
-#ifdef CONFIG_X86_64
-
-static inline void acpi_madt_oem_check(char *oem_id, char *oem_table_id)
-{
-}
-extern void __init clustered_apic_check(void);
-static inline int ioapic_setup_disabled(void)
-{
- return 0;
-}
-
-#include <asm/proto.h>
-
-#else /* X86 */
-
-#ifdef CONFIG_X86_LOCAL_APIC
-#include <mach_apic.h>
-#include <mach_mpparse.h>
-#endif /* CONFIG_X86_LOCAL_APIC */
-
-#endif /* X86 */
-
-#define BAD_MADT_ENTRY(entry, end) ( \
- (!entry) || (unsigned long)entry + sizeof(*entry) > end || \
- ((acpi_table_entry_header *)entry)->length != sizeof(*entry))
-
-#define PREFIX "ACPI: "
-
-int acpi_noirq __initdata; /* skip ACPI IRQ initialization */
-int acpi_pci_disabled __initdata; /* skip ACPI PCI scan and IRQ initialization */
-int acpi_ht __initdata = 1; /* enable HT */
-
-int acpi_lapic;
-int acpi_ioapic;
-int acpi_strict;
-EXPORT_SYMBOL(acpi_strict);
-
-acpi_interrupt_flags acpi_sci_flags __initdata;
-int acpi_sci_override_gsi __initdata;
-int acpi_skip_timer_override __initdata;
-
-#ifdef CONFIG_X86_LOCAL_APIC
-static u64 acpi_lapic_addr __initdata = APIC_DEFAULT_PHYS_BASE;
-#endif
-
-#ifndef __HAVE_ARCH_CMPXCHG
-#warning ACPI uses CMPXCHG, i486 and later hardware
-#endif
-
-#define MAX_MADT_ENTRIES 256
-u8 x86_acpiid_to_apicid[MAX_MADT_ENTRIES] =
- {[0 ... MAX_MADT_ENTRIES - 1] = 0xff };
-EXPORT_SYMBOL(x86_acpiid_to_apicid);
-
-/* --------------------------------------------------------------------------
- Boot-time Configuration
- -------------------------------------------------------------------------- */
-
-/*
- * The default interrupt routing model is PIC (8259). This gets
- * overriden if IOAPICs are enumerated (below).
- */
-enum acpi_irq_model_id acpi_irq_model = ACPI_IRQ_MODEL_PIC;
-
-#ifdef CONFIG_X86_64
-
-/* rely on all ACPI tables being in the direct mapping */
-char *__acpi_map_table(unsigned long phys_addr, unsigned long size)
-{
- if (!phys_addr || !size)
- return NULL;
-
- if (phys_addr < (end_pfn_map << PAGE_SHIFT))
- return __va(phys_addr);
-
- return NULL;
-}
-
-#else
-
-/*
- * Temporarily use the virtual area starting from FIX_IO_APIC_BASE_END,
- * to map the target physical address. The problem is that set_fixmap()
- * provides a single page, and it is possible that the page is not
- * sufficient.
- * By using this area, we can map up to MAX_IO_APICS pages temporarily,
- * i.e. until the next __va_range() call.
- *
- * Important Safety Note: The fixed I/O APIC page numbers are *subtracted*
- * from the fixed base. That's why we start at FIX_IO_APIC_BASE_END and
- * count idx down while incrementing the phys address.
- */
-char *__acpi_map_table(unsigned long phys, unsigned long size)
-{
- unsigned long base, offset, mapped_size;
- int idx;
-
- if (phys + size < 8 * 1024 * 1024)
- return __va(phys);
-
- offset = phys & (PAGE_SIZE - 1);
- mapped_size = PAGE_SIZE - offset;
- set_fixmap(FIX_ACPI_END, phys);
- base = fix_to_virt(FIX_ACPI_END);
-
- /*
- * Most cases can be covered by the below.
- */
- idx = FIX_ACPI_END;
- while (mapped_size < size) {
- if (--idx < FIX_ACPI_BEGIN)
- return NULL; /* cannot handle this */
- phys += PAGE_SIZE;
- set_fixmap(idx, phys);
- mapped_size += PAGE_SIZE;
- }
-
- return ((unsigned char *)base + offset);
-}
-#endif
-
-#ifdef CONFIG_PCI_MMCONFIG
-/* The physical address of the MMCONFIG aperture. Set from ACPI tables. */
-struct acpi_table_mcfg_config *pci_mmcfg_config;
-int pci_mmcfg_config_num;
-
-int __init acpi_parse_mcfg(unsigned long phys_addr, unsigned long size)
-{
- struct acpi_table_mcfg *mcfg;
- unsigned long i;
- int config_size;
-
- if (!phys_addr || !size)
- return -EINVAL;
-
- mcfg = (struct acpi_table_mcfg *)__acpi_map_table(phys_addr, size);
- if (!mcfg) {
- printk(KERN_WARNING PREFIX "Unable to map MCFG\n");
- return -ENODEV;
- }
-
- /* how many config structures do we have */
- pci_mmcfg_config_num = 0;
- i = size - sizeof(struct acpi_table_mcfg);
- while (i >= sizeof(struct acpi_table_mcfg_config)) {
- ++pci_mmcfg_config_num;
- i -= sizeof(struct acpi_table_mcfg_config);
- };
- if (pci_mmcfg_config_num == 0) {
- printk(KERN_ERR PREFIX "MMCONFIG has no entries\n");
- return -ENODEV;
- }
-
- config_size = pci_mmcfg_config_num * sizeof(*pci_mmcfg_config);
- pci_mmcfg_config = kmalloc(config_size, GFP_KERNEL);
- if (!pci_mmcfg_config) {
- printk(KERN_WARNING PREFIX
- "No memory for MCFG config tables\n");
- return -ENOMEM;
- }
-
- memcpy(pci_mmcfg_config, &mcfg->config, config_size);
- for (i = 0; i < pci_mmcfg_config_num; ++i) {
- if (mcfg->config[i].base_reserved) {
- printk(KERN_ERR PREFIX
- "MMCONFIG not in low 4GB of memory\n");
- return -ENODEV;
- }
- }
-
- return 0;
-}
-#endif /* CONFIG_PCI_MMCONFIG */
-
-#ifdef CONFIG_X86_LOCAL_APIC
-static int __init acpi_parse_madt(unsigned long phys_addr, unsigned long size)
-{
- struct acpi_table_madt *madt = NULL;
-
- if (!phys_addr || !size)
- return -EINVAL;
-
- madt = (struct acpi_table_madt *)__acpi_map_table(phys_addr, size);
- if (!madt) {
- printk(KERN_WARNING PREFIX "Unable to map MADT\n");
- return -ENODEV;
- }
-
- if (madt->lapic_address) {
- acpi_lapic_addr = (u64) madt->lapic_address;
-
- printk(KERN_DEBUG PREFIX "Local APIC address 0x%08x\n",
- madt->lapic_address);
- }
-
- acpi_madt_oem_check(madt->header.oem_id, madt->header.oem_table_id);
-
- return 0;
-}
-
-static int __init
-acpi_parse_lapic(acpi_table_entry_header * header, const unsigned long end)
-{
- struct acpi_table_lapic *processor = NULL;
-
- processor = (struct acpi_table_lapic *)header;
-
- if (BAD_MADT_ENTRY(processor, end))
- return -EINVAL;
-
- acpi_table_print_madt_entry(header);
-
- /* no utility in registering a disabled processor */
- if (processor->flags.enabled == 0)
- return 0;
-
- x86_acpiid_to_apicid[processor->acpi_id] = processor->id;
-
- mp_register_lapic(processor->id, /* APIC ID */
- processor->flags.enabled); /* Enabled? */
-
- return 0;
-}
-
-static int __init
-acpi_parse_lapic_addr_ovr(acpi_table_entry_header * header,
- const unsigned long end)
-{
- struct acpi_table_lapic_addr_ovr *lapic_addr_ovr = NULL;
-
- lapic_addr_ovr = (struct acpi_table_lapic_addr_ovr *)header;
-
- if (BAD_MADT_ENTRY(lapic_addr_ovr, end))
- return -EINVAL;
-
- acpi_lapic_addr = lapic_addr_ovr->address;
-
- return 0;
-}
-
-static int __init
-acpi_parse_lapic_nmi(acpi_table_entry_header * header, const unsigned long end)
-{
- struct acpi_table_lapic_nmi *lapic_nmi = NULL;
-
- lapic_nmi = (struct acpi_table_lapic_nmi *)header;
-
- if (BAD_MADT_ENTRY(lapic_nmi, end))
- return -EINVAL;
-
- acpi_table_print_madt_entry(header);
-
- if (lapic_nmi->lint != 1)
- printk(KERN_WARNING PREFIX "NMI not connected to LINT 1!\n");
-
- return 0;
-}
-
-#endif /*CONFIG_X86_LOCAL_APIC */
-
-#ifdef CONFIG_X86_IO_APIC
-
-static int __init
-acpi_parse_ioapic(acpi_table_entry_header * header, const unsigned long end)
-{
- struct acpi_table_ioapic *ioapic = NULL;
-
- ioapic = (struct acpi_table_ioapic *)header;
-
- if (BAD_MADT_ENTRY(ioapic, end))
- return -EINVAL;
-
- acpi_table_print_madt_entry(header);
-
- mp_register_ioapic(ioapic->id,
- ioapic->address, ioapic->global_irq_base);
-
- return 0;
-}
-
-/*
- * Parse Interrupt Source Override for the ACPI SCI
- */
-static void acpi_sci_ioapic_setup(u32 gsi, u16 polarity, u16 trigger)
-{
- if (trigger == 0) /* compatible SCI trigger is level */
- trigger = 3;
-
- if (polarity == 0) /* compatible SCI polarity is low */
- polarity = 3;
-
- /* Command-line over-ride via acpi_sci= */
- if (acpi_sci_flags.trigger)
- trigger = acpi_sci_flags.trigger;
-
- if (acpi_sci_flags.polarity)
- polarity = acpi_sci_flags.polarity;
-
- /*
- * mp_config_acpi_legacy_irqs() already setup IRQs < 16
- * If GSI is < 16, this will update its flags,
- * else it will create a new mp_irqs[] entry.
- */
- mp_override_legacy_irq(gsi, polarity, trigger, gsi);
-
- /*
- * stash over-ride to indicate we've been here
- * and for later update of acpi_fadt
- */
- acpi_sci_override_gsi = gsi;
- return;
-}
-
-static int __init
-acpi_parse_int_src_ovr(acpi_table_entry_header * header,
- const unsigned long end)
-{
- struct acpi_table_int_src_ovr *intsrc = NULL;
-
- intsrc = (struct acpi_table_int_src_ovr *)header;
-
- if (BAD_MADT_ENTRY(intsrc, end))
- return -EINVAL;
-
- acpi_table_print_madt_entry(header);
-
- if (intsrc->bus_irq == acpi_fadt.sci_int) {
- acpi_sci_ioapic_setup(intsrc->global_irq,
- intsrc->flags.polarity,
- intsrc->flags.trigger);
- return 0;
- }
-
- if (acpi_skip_timer_override &&
- intsrc->bus_irq == 0 && intsrc->global_irq == 2) {
- printk(PREFIX "BIOS IRQ0 pin2 override ignored.\n");
- return 0;
- }
-
- mp_override_legacy_irq(intsrc->bus_irq,
- intsrc->flags.polarity,
- intsrc->flags.trigger, intsrc->global_irq);
-
- return 0;
-}
-
-static int __init
-acpi_parse_nmi_src(acpi_table_entry_header * header, const unsigned long end)
-{
- struct acpi_table_nmi_src *nmi_src = NULL;
-
- nmi_src = (struct acpi_table_nmi_src *)header;
-
- if (BAD_MADT_ENTRY(nmi_src, end))
- return -EINVAL;
-
- acpi_table_print_madt_entry(header);
-
- /* TBD: Support nimsrc entries? */
-
- return 0;
-}
-
-#endif /* CONFIG_X86_IO_APIC */
-
-/*
- * acpi_pic_sci_set_trigger()
- *
- * use ELCR to set PIC-mode trigger type for SCI
- *
- * If a PIC-mode SCI is not recognized or gives spurious IRQ7's
- * it may require Edge Trigger -- use "acpi_sci=edge"
- *
- * Port 0x4d0-4d1 are ECLR1 and ECLR2, the Edge/Level Control Registers
- * for the 8259 PIC. bit[n] = 1 means irq[n] is Level, otherwise Edge.
- * ECLR1 is IRQ's 0-7 (IRQ 0, 1, 2 must be 0)
- * ECLR2 is IRQ's 8-15 (IRQ 8, 13 must be 0)
- */
-
-void __init acpi_pic_sci_set_trigger(unsigned int irq, u16 trigger)
-{
- unsigned int mask = 1 << irq;
- unsigned int old, new;
-
- /* Real old ELCR mask */
- old = inb(0x4d0) | (inb(0x4d1) << 8);
-
- /*
- * If we use ACPI to set PCI irq's, then we should clear ELCR
- * since we will set it correctly as we enable the PCI irq
- * routing.
- */
- new = acpi_noirq ? old : 0;
-
- /*
- * Update SCI information in the ELCR, it isn't in the PCI
- * routing tables..
- */
- switch (trigger) {
- case 1: /* Edge - clear */
- new &= ~mask;
- break;
- case 3: /* Level - set */
- new |= mask;
- break;
- }
-
- if (old == new)
- return;
-
- printk(PREFIX "setting ELCR to %04x (from %04x)\n", new, old);
- outb(new, 0x4d0);
- outb(new >> 8, 0x4d1);
-}
-
-int acpi_gsi_to_irq(u32 gsi, unsigned int *irq)
-{
-#ifdef CONFIG_X86_IO_APIC
- if (use_pci_vector() && !platform_legacy_irq(gsi))
- *irq = IO_APIC_VECTOR(gsi);
- else
-#endif
- *irq = gsi;
- return 0;
-}
-
-/*
- * success: return IRQ number (>=0)
- * failure: return < 0
- */
-int acpi_register_gsi(u32 gsi, int edge_level, int active_high_low)
-{
- unsigned int irq;
- unsigned int plat_gsi = gsi;
-
-#ifdef CONFIG_PCI
- /*
- * Make sure all (legacy) PCI IRQs are set as level-triggered.
- */
- if (acpi_irq_model == ACPI_IRQ_MODEL_PIC) {
- extern void eisa_set_level_irq(unsigned int irq);
-
- if (edge_level == ACPI_LEVEL_SENSITIVE)
- eisa_set_level_irq(gsi);
- }
-#endif
-
-#ifdef CONFIG_X86_IO_APIC
- if (acpi_irq_model == ACPI_IRQ_MODEL_IOAPIC) {
- plat_gsi = mp_register_gsi(gsi, edge_level, active_high_low);
- }
-#endif
- acpi_gsi_to_irq(plat_gsi, &irq);
- return irq;
-}
-
-EXPORT_SYMBOL(acpi_register_gsi);
-
-/*
- * ACPI based hotplug support for CPU
- */
-#ifdef CONFIG_ACPI_HOTPLUG_CPU
-int acpi_map_lsapic(acpi_handle handle, int *pcpu)
-{
- /* TBD */
- return -EINVAL;
-}
-
-EXPORT_SYMBOL(acpi_map_lsapic);
-
-int acpi_unmap_lsapic(int cpu)
-{
- /* TBD */
- return -EINVAL;
-}
-
-EXPORT_SYMBOL(acpi_unmap_lsapic);
-#endif /* CONFIG_ACPI_HOTPLUG_CPU */
-
-int acpi_register_ioapic(acpi_handle handle, u64 phys_addr, u32 gsi_base)
-{
- /* TBD */
- return -EINVAL;
-}
-
-EXPORT_SYMBOL(acpi_register_ioapic);
-
-int acpi_unregister_ioapic(acpi_handle handle, u32 gsi_base)
-{
- /* TBD */
- return -EINVAL;
-}
-
-EXPORT_SYMBOL(acpi_unregister_ioapic);
-
-static unsigned long __init
-acpi_scan_rsdp(unsigned long start, unsigned long length)
-{
- unsigned long offset = 0;
- unsigned long sig_len = sizeof("RSD PTR ") - 1;
-
- /*
- * Scan all 16-byte boundaries of the physical memory region for the
- * RSDP signature.
- */
- for (offset = 0; offset < length; offset += 16) {
- if (strncmp((char *)(start + offset), "RSD PTR ", sig_len))
- continue;
- return (start + offset);
- }
-
- return 0;
-}
-
-static int __init acpi_parse_sbf(unsigned long phys_addr, unsigned long size)
-{
- struct acpi_table_sbf *sb;
-
- if (!phys_addr || !size)
- return -EINVAL;
-
- sb = (struct acpi_table_sbf *)__acpi_map_table(phys_addr, size);
- if (!sb) {
- printk(KERN_WARNING PREFIX "Unable to map SBF\n");
- return -ENODEV;
- }
-
- sbf_port = sb->sbf_cmos; /* Save CMOS port */
-
- return 0;
-}
-
-#ifdef CONFIG_HPET_TIMER
-
-static int __init acpi_parse_hpet(unsigned long phys, unsigned long size)
-{
- struct acpi_table_hpet *hpet_tbl;
-
- if (!phys || !size)
- return -EINVAL;
-
- hpet_tbl = (struct acpi_table_hpet *)__acpi_map_table(phys, size);
- if (!hpet_tbl) {
- printk(KERN_WARNING PREFIX "Unable to map HPET\n");
- return -ENODEV;
- }
-
- if (hpet_tbl->addr.space_id != ACPI_SPACE_MEM) {
- printk(KERN_WARNING PREFIX "HPET timers must be located in "
- "memory.\n");
- return -1;
- }
-#ifdef CONFIG_X86_64
- vxtime.hpet_address = hpet_tbl->addr.addrl |
- ((long)hpet_tbl->addr.addrh << 32);
-
- printk(KERN_INFO PREFIX "HPET id: %#x base: %#lx\n",
- hpet_tbl->id, vxtime.hpet_address);
-#else /* X86 */
- {
- extern unsigned long hpet_address;
-
- hpet_address = hpet_tbl->addr.addrl;
- printk(KERN_INFO PREFIX "HPET id: %#x base: %#lx\n",
- hpet_tbl->id, hpet_address);
- }
-#endif /* X86 */
-
- return 0;
-}
-#else
-#define acpi_parse_hpet NULL
-#endif
-
-#ifdef CONFIG_X86_PM_TIMER
-extern u32 pmtmr_ioport;
-#endif
-
-static int __init acpi_parse_fadt(unsigned long phys, unsigned long size)
-{
- struct fadt_descriptor_rev2 *fadt = NULL;
-
- fadt = (struct fadt_descriptor_rev2 *)__acpi_map_table(phys, size);
- if (!fadt) {
- printk(KERN_WARNING PREFIX "Unable to map FADT\n");
- return 0;
- }
- /* initialize sci_int early for INT_SRC_OVR MADT parsing */
- acpi_fadt.sci_int = fadt->sci_int;
-
- /* initialize rev and apic_phys_dest_mode for x86_64 genapic */
- acpi_fadt.revision = fadt->revision;
- acpi_fadt.force_apic_physical_destination_mode =
- fadt->force_apic_physical_destination_mode;
-
-#ifdef CONFIG_X86_PM_TIMER
- /* detect the location of the ACPI PM Timer */
- if (fadt->revision >= FADT2_REVISION_ID) {
- /* FADT rev. 2 */
- if (fadt->xpm_tmr_blk.address_space_id !=
- ACPI_ADR_SPACE_SYSTEM_IO)
- return 0;
-
- pmtmr_ioport = fadt->xpm_tmr_blk.address;
- } else {
- /* FADT rev. 1 */
- pmtmr_ioport = fadt->V1_pm_tmr_blk;
- }
- if (pmtmr_ioport)
- printk(KERN_INFO PREFIX "PM-Timer IO Port: %#x\n",
- pmtmr_ioport);
-#endif
- return 0;
-}
-
-unsigned long __init acpi_find_rsdp(void)
-{
- unsigned long rsdp_phys = 0;
-
- if (efi_enabled) {
- if (efi.acpi20)
- return __pa(efi.acpi20);
- else if (efi.acpi)
- return __pa(efi.acpi);
- }
- /*
- * Scan memory looking for the RSDP signature. First search EBDA (low
- * memory) paragraphs and then search upper memory (E0000-FFFFF).
- */
- rsdp_phys = acpi_scan_rsdp(0, 0x400);
- if (!rsdp_phys)
- rsdp_phys = acpi_scan_rsdp(0xE0000, 0x20000);
-
- return rsdp_phys;
-}
-
-#ifdef CONFIG_X86_LOCAL_APIC
-/*
- * Parse LAPIC entries in MADT
- * returns 0 on success, < 0 on error
- */
-static int __init acpi_parse_madt_lapic_entries(void)
-{
- int count;
-
- /*
- * Note that the LAPIC address is obtained from the MADT (32-bit value)
- * and (optionally) overriden by a LAPIC_ADDR_OVR entry (64-bit value).
- */
-
- count =
- acpi_table_parse_madt(ACPI_MADT_LAPIC_ADDR_OVR,
- acpi_parse_lapic_addr_ovr, 0);
- if (count < 0) {
- printk(KERN_ERR PREFIX
- "Error parsing LAPIC address override entry\n");
- return count;
- }
-
- mp_register_lapic_address(acpi_lapic_addr);
-
- count = acpi_table_parse_madt(ACPI_MADT_LAPIC, acpi_parse_lapic,
- MAX_APICS);
- if (!count) {
- printk(KERN_ERR PREFIX "No LAPIC entries present\n");
- /* TBD: Cleanup to allow fallback to MPS */
- return -ENODEV;
- } else if (count < 0) {
- printk(KERN_ERR PREFIX "Error parsing LAPIC entry\n");
- /* TBD: Cleanup to allow fallback to MPS */
- return count;
- }
-
- count =
- acpi_table_parse_madt(ACPI_MADT_LAPIC_NMI, acpi_parse_lapic_nmi, 0);
- if (count < 0) {
- printk(KERN_ERR PREFIX "Error parsing LAPIC NMI entry\n");
- /* TBD: Cleanup to allow fallback to MPS */
- return count;
- }
- return 0;
-}
-#endif /* CONFIG_X86_LOCAL_APIC */
-
-#ifdef CONFIG_X86_IO_APIC
-/*
- * Parse IOAPIC related entries in MADT
- * returns 0 on success, < 0 on error
- */
-static int __init acpi_parse_madt_ioapic_entries(void)
-{
- int count;
-
- /*
- * ACPI interpreter is required to complete interrupt setup,
- * so if it is off, don't enumerate the io-apics with ACPI.
- * If MPS is present, it will handle them,
- * otherwise the system will stay in PIC mode
- */
- if (acpi_disabled || acpi_noirq) {
- return -ENODEV;
- }
-
- /*
- * if "noapic" boot option, don't look for IO-APICs
- */
- if (skip_ioapic_setup) {
- printk(KERN_INFO PREFIX "Skipping IOAPIC probe "
- "due to 'noapic' option.\n");
- return -ENODEV;
- }
-
- count =
- acpi_table_parse_madt(ACPI_MADT_IOAPIC, acpi_parse_ioapic,
- MAX_IO_APICS);
- if (!count) {
- printk(KERN_ERR PREFIX "No IOAPIC entries present\n");
- return -ENODEV;
- } else if (count < 0) {
- printk(KERN_ERR PREFIX "Error parsing IOAPIC entry\n");
- return count;
- }
-
- count =
- acpi_table_parse_madt(ACPI_MADT_INT_SRC_OVR, acpi_parse_int_src_ovr,
- NR_IRQ_VECTORS);
- if (count < 0) {
- printk(KERN_ERR PREFIX
- "Error parsing interrupt source overrides entry\n");
- /* TBD: Cleanup to allow fallback to MPS */
- return count;
- }
-
- /*
- * If BIOS did not supply an INT_SRC_OVR for the SCI
- * pretend we got one so we can set the SCI flags.
- */
- if (!acpi_sci_override_gsi)
- acpi_sci_ioapic_setup(acpi_fadt.sci_int, 0, 0);
-
- /* Fill in identity legacy mapings where no override */
- mp_config_acpi_legacy_irqs();
-
- count =
- acpi_table_parse_madt(ACPI_MADT_NMI_SRC, acpi_parse_nmi_src,
- NR_IRQ_VECTORS);
- if (count < 0) {
- printk(KERN_ERR PREFIX "Error parsing NMI SRC entry\n");
- /* TBD: Cleanup to allow fallback to MPS */
- return count;
- }
-
- return 0;
-}
-#else
-static inline int acpi_parse_madt_ioapic_entries(void)
-{
- return -1;
-}
-#endif /* !CONFIG_X86_IO_APIC */
-
-static void __init acpi_process_madt(void)
-{
-#ifdef CONFIG_X86_LOCAL_APIC
- int count, error;
-
- count = acpi_table_parse(ACPI_APIC, acpi_parse_madt);
- if (count >= 1) {
-
- /*
- * Parse MADT LAPIC entries
- */
- error = acpi_parse_madt_lapic_entries();
- if (!error) {
- acpi_lapic = 1;
-
-#ifdef CONFIG_X86_GENERICARCH
- generic_bigsmp_probe();
-#endif
- /*
- * Parse MADT IO-APIC entries
- */
- error = acpi_parse_madt_ioapic_entries();
- if (!error) {
- acpi_irq_model = ACPI_IRQ_MODEL_IOAPIC;
- acpi_irq_balance_set(NULL);
- acpi_ioapic = 1;
-
- smp_found_config = 1;
- clustered_apic_check();
- }
- }
- if (error == -EINVAL) {
- /*
- * Dell Precision Workstation 410, 610 come here.
- */
- printk(KERN_ERR PREFIX
- "Invalid BIOS MADT, disabling ACPI\n");
- disable_acpi();
- }
- }
-#endif
- return;
-}
-
-extern int acpi_force;
-
-#ifdef __i386__
-
-static int __init disable_acpi_irq(struct dmi_system_id *d)
-{
- if (!acpi_force) {
- printk(KERN_NOTICE "%s detected: force use of acpi=noirq\n",
- d->ident);
- acpi_noirq_set();
- }
- return 0;
-}
-
-static int __init disable_acpi_pci(struct dmi_system_id *d)
-{
- if (!acpi_force) {
- printk(KERN_NOTICE "%s detected: force use of pci=noacpi\n",
- d->ident);
- acpi_disable_pci();
- }
- return 0;
-}
-
-static int __init dmi_disable_acpi(struct dmi_system_id *d)
-{
- if (!acpi_force) {
- printk(KERN_NOTICE "%s detected: acpi off\n", d->ident);
- disable_acpi();
- } else {
- printk(KERN_NOTICE
- "Warning: DMI blacklist says broken, but acpi forced\n");
- }
- return 0;
-}
-
-/*
- * Limit ACPI to CPU enumeration for HT
- */
-static int __init force_acpi_ht(struct dmi_system_id *d)
-{
- if (!acpi_force) {
- printk(KERN_NOTICE "%s detected: force use of acpi=ht\n",
- d->ident);
- disable_acpi();
- acpi_ht = 1;
- } else {
- printk(KERN_NOTICE
- "Warning: acpi=force overrules DMI blacklist: acpi=ht\n");
- }
- return 0;
-}
-
-/*
- * If your system is blacklisted here, but you find that acpi=force
- * works for you, please contact acpi-devel@sourceforge.net
- */
-static struct dmi_system_id __initdata acpi_dmi_table[] = {
- /*
- * Boxes that need ACPI disabled
- */
- {
- .callback = dmi_disable_acpi,
- .ident = "IBM Thinkpad",
- .matches = {
- DMI_MATCH(DMI_BOARD_VENDOR, "IBM"),
- DMI_MATCH(DMI_BOARD_NAME, "2629H1G"),
- },
- },
-
- /*
- * Boxes that need acpi=ht
- */
- {
- .callback = force_acpi_ht,
- .ident = "FSC Primergy T850",
- .matches = {
- DMI_MATCH(DMI_SYS_VENDOR, "FUJITSU SIEMENS"),
- DMI_MATCH(DMI_PRODUCT_NAME, "PRIMERGY T850"),
- },
- },
- {
- .callback = force_acpi_ht,
- .ident = "DELL GX240",
- .matches = {
- DMI_MATCH(DMI_BOARD_VENDOR, "Dell Computer Corporation"),
- DMI_MATCH(DMI_BOARD_NAME, "OptiPlex GX240"),
- },
- },
- {
- .callback = force_acpi_ht,
- .ident = "HP VISUALIZE NT Workstation",
- .matches = {
- DMI_MATCH(DMI_BOARD_VENDOR, "Hewlett-Packard"),
- DMI_MATCH(DMI_PRODUCT_NAME, "HP VISUALIZE NT Workstation"),
- },
- },
- {
- .callback = force_acpi_ht,
- .ident = "Compaq Workstation W8000",
- .matches = {
- DMI_MATCH(DMI_SYS_VENDOR, "Compaq"),
- DMI_MATCH(DMI_PRODUCT_NAME, "Workstation W8000"),
- },
- },
- {
- .callback = force_acpi_ht,
- .ident = "ASUS P4B266",
- .matches = {
- DMI_MATCH(DMI_BOARD_VENDOR, "ASUSTeK Computer INC."),
- DMI_MATCH(DMI_BOARD_NAME, "P4B266"),
- },
- },
- {
- .callback = force_acpi_ht,
- .ident = "ASUS P2B-DS",
- .matches = {
- DMI_MATCH(DMI_BOARD_VENDOR, "ASUSTeK Computer INC."),
- DMI_MATCH(DMI_BOARD_NAME, "P2B-DS"),
- },
- },
- {
- .callback = force_acpi_ht,
- .ident = "ASUS CUR-DLS",
- .matches = {
- DMI_MATCH(DMI_BOARD_VENDOR, "ASUSTeK Computer INC."),
- DMI_MATCH(DMI_BOARD_NAME, "CUR-DLS"),
- },
- },
- {
- .callback = force_acpi_ht,
- .ident = "ABIT i440BX-W83977",
- .matches = {
- DMI_MATCH(DMI_BOARD_VENDOR, "ABIT <http://www.abit.com>"),
- DMI_MATCH(DMI_BOARD_NAME, "i440BX-W83977 (BP6)"),
- },
- },
- {
- .callback = force_acpi_ht,
- .ident = "IBM Bladecenter",
- .matches = {
- DMI_MATCH(DMI_BOARD_VENDOR, "IBM"),
- DMI_MATCH(DMI_BOARD_NAME, "IBM eServer BladeCenter HS20"),
- },
- },
- {
- .callback = force_acpi_ht,
- .ident = "IBM eServer xSeries 360",
- .matches = {
- DMI_MATCH(DMI_BOARD_VENDOR, "IBM"),
- DMI_MATCH(DMI_BOARD_NAME, "eServer xSeries 360"),
- },
- },
- {
- .callback = force_acpi_ht,
- .ident = "IBM eserver xSeries 330",
- .matches = {
- DMI_MATCH(DMI_BOARD_VENDOR, "IBM"),
- DMI_MATCH(DMI_BOARD_NAME, "eserver xSeries 330"),
- },
- },
- {
- .callback = force_acpi_ht,
- .ident = "IBM eserver xSeries 440",
- .matches = {
- DMI_MATCH(DMI_BOARD_VENDOR, "IBM"),
- DMI_MATCH(DMI_PRODUCT_NAME, "eserver xSeries 440"),
- },
- },
-
- /*
- * Boxes that need ACPI PCI IRQ routing disabled
- */
- {
- .callback = disable_acpi_irq,
- .ident = "ASUS A7V",
- .matches = {
- DMI_MATCH(DMI_BOARD_VENDOR, "ASUSTeK Computer INC"),
- DMI_MATCH(DMI_BOARD_NAME, "<A7V>"),
- /* newer BIOS, Revision 1011, does work */
- DMI_MATCH(DMI_BIOS_VERSION,
- "ASUS A7V ACPI BIOS Revision 1007"),
- },
- },
-
- /*
- * Boxes that need ACPI PCI IRQ routing and PCI scan disabled
- */
- { /* _BBN 0 bug */
- .callback = disable_acpi_pci,
- .ident = "ASUS PR-DLS",
- .matches = {
- DMI_MATCH(DMI_BOARD_VENDOR, "ASUSTeK Computer INC."),
- DMI_MATCH(DMI_BOARD_NAME, "PR-DLS"),
- DMI_MATCH(DMI_BIOS_VERSION,
- "ASUS PR-DLS ACPI BIOS Revision 1010"),
- DMI_MATCH(DMI_BIOS_DATE, "03/21/2003")
- },
- },
- {
- .callback = disable_acpi_pci,
- .ident = "Acer TravelMate 36x Laptop",
- .matches = {
- DMI_MATCH(DMI_SYS_VENDOR, "Acer"),
- DMI_MATCH(DMI_PRODUCT_NAME, "TravelMate 360"),
- },
- },
- {}
-};
-
-#endif /* __i386__ */
-
-/*
- * acpi_boot_table_init() and acpi_boot_init()
- * called from setup_arch(), always.
- * 1. checksums all tables
- * 2. enumerates lapics
- * 3. enumerates io-apics
- *
- * acpi_table_init() is separate to allow reading SRAT without
- * other side effects.
- *
- * side effects of acpi_boot_init:
- * acpi_lapic = 1 if LAPIC found
- * acpi_ioapic = 1 if IOAPIC found
- * if (acpi_lapic && acpi_ioapic) smp_found_config = 1;
- * if acpi_blacklisted() acpi_disabled = 1;
- * acpi_irq_model=...
- * ...
- *
- * return value: (currently ignored)
- * 0: success
- * !0: failure
- */
-
-int __init acpi_boot_table_init(void)
-{
- int error;
-
-#ifdef __i386__
- dmi_check_system(acpi_dmi_table);
-#endif
-
- /*
- * If acpi_disabled, bail out
- * One exception: acpi=ht continues far enough to enumerate LAPICs
- */
- if (acpi_disabled && !acpi_ht)
- return 1;
-
- /*
- * Initialize the ACPI boot-time table parser.
- */
- error = acpi_table_init();
- if (error) {
- disable_acpi();
- return error;
- }
-#ifdef __i386__
- check_acpi_pci();
-#endif
-
- acpi_table_parse(ACPI_BOOT, acpi_parse_sbf);
-
- /*
- * blacklist may disable ACPI entirely
- */
- error = acpi_blacklisted();
- if (error) {
- if (acpi_force) {
- printk(KERN_WARNING PREFIX "acpi=force override\n");
- } else {
- printk(KERN_WARNING PREFIX "Disabling ACPI support\n");
- disable_acpi();
- return error;
- }
- }
-
- return 0;
-}
-
-int __init acpi_boot_init(void)
-{
- /*
- * If acpi_disabled, bail out
- * One exception: acpi=ht continues far enough to enumerate LAPICs
- */
- if (acpi_disabled && !acpi_ht)
- return 1;
-
- acpi_table_parse(ACPI_BOOT, acpi_parse_sbf);
-
- /*
- * set sci_int and PM timer address
- */
- acpi_table_parse(ACPI_FADT, acpi_parse_fadt);
-
- /*
- * Process the Multiple APIC Description Table (MADT), if present
- */
- acpi_process_madt();
-
- acpi_table_parse(ACPI_HPET, acpi_parse_hpet);
-
- return 0;
-}
diff --git a/arch/i386/kernel/acpi/cstate.c b/arch/i386/kernel/acpi/cstate.c
deleted file mode 100644
index 4c3036ba65d..00000000000
--- a/arch/i386/kernel/acpi/cstate.c
+++ /dev/null
@@ -1,103 +0,0 @@
-/*
- * arch/i386/kernel/acpi/cstate.c
- *
- * Copyright (C) 2005 Intel Corporation
- * Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>
- * - Added _PDC for SMP C-states on Intel CPUs
- */
-
-#include <linux/kernel.h>
-#include <linux/module.h>
-#include <linux/init.h>
-#include <linux/acpi.h>
-
-#include <acpi/processor.h>
-#include <asm/acpi.h>
-
-static void acpi_processor_power_init_intel_pdc(struct acpi_processor_power
- *pow)
-{
- struct acpi_object_list *obj_list;
- union acpi_object *obj;
- u32 *buf;
-
- /* allocate and initialize pdc. It will be used later. */
- obj_list = kmalloc(sizeof(struct acpi_object_list), GFP_KERNEL);
- if (!obj_list) {
- printk(KERN_ERR "Memory allocation error\n");
- return;
- }
-
- obj = kmalloc(sizeof(union acpi_object), GFP_KERNEL);
- if (!obj) {
- printk(KERN_ERR "Memory allocation error\n");
- kfree(obj_list);
- return;
- }
-
- buf = kmalloc(12, GFP_KERNEL);
- if (!buf) {
- printk(KERN_ERR "Memory allocation error\n");
- kfree(obj);
- kfree(obj_list);
- return;
- }
-
- buf[0] = ACPI_PDC_REVISION_ID;
- buf[1] = 1;
- buf[2] = ACPI_PDC_C_CAPABILITY_SMP;
-
- obj->type = ACPI_TYPE_BUFFER;
- obj->buffer.length = 12;
- obj->buffer.pointer = (u8 *) buf;
- obj_list->count = 1;
- obj_list->pointer = obj;
- pow->pdc = obj_list;
-
- return;
-}
-
-/* Initialize _PDC data based on the CPU vendor */
-void acpi_processor_power_init_pdc(struct acpi_processor_power *pow,
- unsigned int cpu)
-{
- struct cpuinfo_x86 *c = cpu_data + cpu;
-
- pow->pdc = NULL;
- if (c->x86_vendor == X86_VENDOR_INTEL)
- acpi_processor_power_init_intel_pdc(pow);
-
- return;
-}
-
-EXPORT_SYMBOL(acpi_processor_power_init_pdc);
-
-/*
- * Initialize bm_flags based on the CPU cache properties
- * On SMP it depends on cache configuration
- * - When cache is not shared among all CPUs, we flush cache
- * before entering C3.
- * - When cache is shared among all CPUs, we use bm_check
- * mechanism as in UP case
- *
- * This routine is called only after all the CPUs are online
- */
-void acpi_processor_power_init_bm_check(struct acpi_processor_flags *flags,
- unsigned int cpu)
-{
- struct cpuinfo_x86 *c = cpu_data + cpu;
-
- flags->bm_check = 0;
- if (num_online_cpus() == 1)
- flags->bm_check = 1;
- else if (c->x86_vendor == X86_VENDOR_INTEL) {
- /*
- * Today all CPUs that support C3 share cache.
- * TBD: This needs to look at cache shared map, once
- * multi-core detection patch makes to the base.
- */
- flags->bm_check = 1;
- }
-}
-
-EXPORT_SYMBOL(acpi_processor_power_init_bm_check);
diff --git a/arch/i386/kernel/acpi/earlyquirk.c b/arch/i386/kernel/acpi/earlyquirk.c
deleted file mode 100644
index f1b9d2a46da..00000000000
--- a/arch/i386/kernel/acpi/earlyquirk.c
+++ /dev/null
@@ -1,51 +0,0 @@
-/*
- * Do early PCI probing for bug detection when the main PCI subsystem is
- * not up yet.
- */
-#include <linux/init.h>
-#include <linux/kernel.h>
-#include <linux/pci.h>
-#include <asm/pci-direct.h>
-#include <asm/acpi.h>
-
-static int __init check_bridge(int vendor, int device)
-{
- /* According to Nvidia all timer overrides are bogus. Just ignore
- them all. */
- if (vendor == PCI_VENDOR_ID_NVIDIA) {
- acpi_skip_timer_override = 1;
- }
- return 0;
-}
-
-void __init check_acpi_pci(void)
-{
- int num, slot, func;
-
- /* Assume the machine supports type 1. If not it will
- always read ffffffff and should not have any side effect. */
-
- /* Poor man's PCI discovery */
- for (num = 0; num < 32; num++) {
- for (slot = 0; slot < 32; slot++) {
- for (func = 0; func < 8; func++) {
- u32 class;
- u32 vendor;
- class = read_pci_config(num, slot, func,
- PCI_CLASS_REVISION);
- if (class == 0xffffffff)
- break;
-
- if ((class >> 16) != PCI_CLASS_BRIDGE_PCI)
- continue;
-
- vendor = read_pci_config(num, slot, func,
- PCI_VENDOR_ID);
-
- if (check_bridge(vendor & 0xffff, vendor >> 16))
- return;
- }
-
- }
- }
-}
diff --git a/arch/i386/kernel/acpi/sleep.c b/arch/i386/kernel/acpi/sleep.c
deleted file mode 100644
index 1cb2b186a3a..00000000000
--- a/arch/i386/kernel/acpi/sleep.c
+++ /dev/null
@@ -1,121 +0,0 @@
-/*
- * sleep.c - x86-specific ACPI sleep support.
- *
- * Copyright (C) 2001-2003 Patrick Mochel
- * Copyright (C) 2001-2003 Pavel Machek <pavel@suse.cz>
- */
-
-#include <linux/acpi.h>
-#include <linux/bootmem.h>
-#include <linux/dmi.h>
-#include <asm/smp.h>
-#include <asm/tlbflush.h>
-
-/* address in low memory of the wakeup routine. */
-unsigned long acpi_wakeup_address = 0;
-unsigned long acpi_video_flags;
-extern char wakeup_start, wakeup_end;
-
-extern void zap_low_mappings(void);
-
-extern unsigned long FASTCALL(acpi_copy_wakeup_routine(unsigned long));
-
-static void init_low_mapping(pgd_t * pgd, int pgd_limit)
-{
- int pgd_ofs = 0;
-
- while ((pgd_ofs < pgd_limit)
- && (pgd_ofs + USER_PTRS_PER_PGD < PTRS_PER_PGD)) {
- set_pgd(pgd, *(pgd + USER_PTRS_PER_PGD));
- pgd_ofs++, pgd++;
- }
- flush_tlb_all();
-}
-
-/**
- * acpi_save_state_mem - save kernel state
- *
- * Create an identity mapped page table and copy the wakeup routine to
- * low memory.
- */
-int acpi_save_state_mem(void)
-{
- if (!acpi_wakeup_address)
- return 1;
- init_low_mapping(swapper_pg_dir, USER_PTRS_PER_PGD);
- memcpy((void *)acpi_wakeup_address, &wakeup_start,
- &wakeup_end - &wakeup_start);
- acpi_copy_wakeup_routine(acpi_wakeup_address);
-
- return 0;
-}
-
-/*
- * acpi_restore_state - undo effects of acpi_save_state_mem
- */
-void acpi_restore_state_mem(void)
-{
- zap_low_mappings();
-}
-
-/**
- * acpi_reserve_bootmem - do _very_ early ACPI initialisation
- *
- * We allocate a page from the first 1MB of memory for the wakeup
- * routine for when we come back from a sleep state. The
- * runtime allocator allows specification of <16MB pages, but not
- * <1MB pages.
- */
-void __init acpi_reserve_bootmem(void)
-{
- if ((&wakeup_end - &wakeup_start) > PAGE_SIZE) {
- printk(KERN_ERR
- "ACPI: Wakeup code way too big, S3 disabled.\n");
- return;
- }
-
- acpi_wakeup_address = (unsigned long)alloc_bootmem_low(PAGE_SIZE);
- if (!acpi_wakeup_address)
- printk(KERN_ERR "ACPI: Cannot allocate lowmem, S3 disabled.\n");
-}
-
-static int __init acpi_sleep_setup(char *str)
-{
- while ((str != NULL) && (*str != '\0')) {
- if (strncmp(str, "s3_bios", 7) == 0)
- acpi_video_flags = 1;
- if (strncmp(str, "s3_mode", 7) == 0)
- acpi_video_flags |= 2;
- str = strchr(str, ',');
- if (str != NULL)
- str += strspn(str, ", \t");
- }
- return 1;
-}
-
-__setup("acpi_sleep=", acpi_sleep_setup);
-
-static __init int reset_videomode_after_s3(struct dmi_system_id *d)
-{
- acpi_video_flags |= 2;
- return 0;
-}
-
-static __initdata struct dmi_system_id acpisleep_dmi_table[] = {
- { /* Reset video mode after returning from ACPI S3 sleep */
- .callback = reset_videomode_after_s3,
- .ident = "Toshiba Satellite 4030cdt",
- .matches = {
- DMI_MATCH(DMI_PRODUCT_NAME, "S4030CDT/4.3"),
- },
- },
- {}
-};
-
-static int __init acpisleep_dmi_init(void)
-{
- dmi_check_system(acpisleep_dmi_table);
- return 0;
-}
-
-core_initcall(acpisleep_dmi_init);
diff --git a/arch/i386/kernel/acpi/wakeup.S b/arch/i386/kernel/acpi/wakeup.S
deleted file mode 100644
index 7c74fe0dc93..00000000000
--- a/arch/i386/kernel/acpi/wakeup.S
+++ /dev/null
@@ -1,313 +0,0 @@
-.text
-#include <linux/linkage.h>
-#include <asm/segment.h>
-#include <asm/page.h>
-
-#
-# wakeup_code runs in real mode, and at unknown address (determined at run-time).
-# Therefore it must only use relative jumps/calls.
-#
-# Do we need to deal with A20? It is okay: ACPI specs says A20 must be enabled
-#
-# If physical address of wakeup_code is 0x12345, BIOS should call us with
-# cs = 0x1234, eip = 0x05
-#
-
-ALIGN
- .align 4096
-ENTRY(wakeup_start)
-wakeup_code:
- wakeup_code_start = .
- .code16
-
- movw $0xb800, %ax
- movw %ax,%fs
- movw $0x0e00 + 'L', %fs:(0x10)
-
- cli
- cld
-
- # setup data segment
- movw %cs, %ax
- movw %ax, %ds # Make ds:0 point to wakeup_start
- movw %ax, %ss
- mov $(wakeup_stack - wakeup_code), %sp # Private stack is needed for ASUS board
- movw $0x0e00 + 'S', %fs:(0x12)
-
- pushl $0 # Kill any dangerous flags
- popfl
-
- movl real_magic - wakeup_code, %eax
- cmpl $0x12345678, %eax
- jne bogus_real_magic
-
- testl $1, video_flags - wakeup_code
- jz 1f
- lcall $0xc000,$3
- movw %cs, %ax
- movw %ax, %ds # Bios might have played with that
- movw %ax, %ss
-1:
-
- testl $2, video_flags - wakeup_code
- jz 1f
- mov video_mode - wakeup_code, %ax
- call mode_set
-1:
-
- # set up page table
- movl $swapper_pg_dir-__PAGE_OFFSET, %eax
- movl %eax, %cr3
-
- testl $1, real_efer_save_restore - wakeup_code
- jz 4f
- # restore efer setting
- movl real_save_efer_edx - wakeup_code, %edx
- movl real_save_efer_eax - wakeup_code, %eax
- mov $0xc0000080, %ecx
- wrmsr
-4:
- # make sure %cr4 is set correctly (features, etc)
- movl real_save_cr4 - wakeup_code, %eax
- movl %eax, %cr4
- movw $0xb800, %ax
- movw %ax,%fs
- movw $0x0e00 + 'i', %fs:(0x12)
-
- # need a gdt -- use lgdtl to force 32-bit operands, in case
- # the GDT is located past 16 megabytes.
- lgdtl real_save_gdt - wakeup_code
-
- movl real_save_cr0 - wakeup_code, %eax
- movl %eax, %cr0
- jmp 1f
-1:
- movw $0x0e00 + 'n', %fs:(0x14)
-
- movl real_magic - wakeup_code, %eax
- cmpl $0x12345678, %eax
- jne bogus_real_magic
-
- ljmpl $__KERNEL_CS,$wakeup_pmode_return
-
-real_save_gdt: .word 0
- .long 0
-real_save_cr0: .long 0
-real_save_cr3: .long 0
-real_save_cr4: .long 0
-real_magic: .long 0
-video_mode: .long 0
-video_flags: .long 0
-real_efer_save_restore: .long 0
-real_save_efer_edx: .long 0
-real_save_efer_eax: .long 0
-
-bogus_real_magic:
- movw $0x0e00 + 'B', %fs:(0x12)
- jmp bogus_real_magic
-
-/* This code uses an extended set of video mode numbers. These include:
- * Aliases for standard modes
- * NORMAL_VGA (-1)
- * EXTENDED_VGA (-2)
- * ASK_VGA (-3)
- * Video modes numbered by menu position -- NOT RECOMMENDED because of lack
- * of compatibility when extending the table. These are between 0x00 and 0xff.
- */
-#define VIDEO_FIRST_MENU 0x0000
-
-/* Standard BIOS video modes (BIOS number + 0x0100) */
-#define VIDEO_FIRST_BIOS 0x0100
-
-/* VESA BIOS video modes (VESA number + 0x0200) */
-#define VIDEO_FIRST_VESA 0x0200
-
-/* Video7 special modes (BIOS number + 0x0900) */
-#define VIDEO_FIRST_V7 0x0900
-
-# Setting of user mode (AX=mode ID) => CF=success
-mode_set:
- movw %ax, %bx
-#if 0
- cmpb $0xff, %ah
- jz setalias
-
- testb $VIDEO_RECALC>>8, %ah
- jnz _setrec
-
- cmpb $VIDEO_FIRST_RESOLUTION>>8, %ah
- jnc setres
-
- cmpb $VIDEO_FIRST_SPECIAL>>8, %ah
- jz setspc
-
- cmpb $VIDEO_FIRST_V7>>8, %ah
- jz setv7
-#endif
-
- cmpb $VIDEO_FIRST_VESA>>8, %ah
- jnc check_vesa
-#if 0
- orb %ah, %ah
- jz setmenu
-#endif
-
- decb %ah
-# jz setbios Add bios modes later
-
-setbad: clc
- ret
-
-check_vesa:
- subb $VIDEO_FIRST_VESA>>8, %bh
- orw $0x4000, %bx # Use linear frame buffer
- movw $0x4f02, %ax # VESA BIOS mode set call
- int $0x10
- cmpw $0x004f, %ax # AL=4f if implemented
- jnz _setbad # AH=0 if OK
-
- stc
- ret
-
-_setbad: jmp setbad
-
- .code32
- ALIGN
-
-.org 0x800
-wakeup_stack_begin: # Stack grows down
-
-.org 0xff0 # Just below end of page
-wakeup_stack:
-ENTRY(wakeup_end)
-
-.org 0x1000
-
-wakeup_pmode_return:
- movw $__KERNEL_DS, %ax
- movw %ax, %ss
- movw %ax, %ds
- movw %ax, %es
- movw %ax, %fs
- movw %ax, %gs
- movw $0x0e00 + 'u', 0xb8016
-
- # reload the gdt, as we need the full 32 bit address
- lgdt saved_gdt
- lidt saved_idt
- lldt saved_ldt
- ljmp $(__KERNEL_CS),$1f
-1:
- movl %cr3, %eax
- movl %eax, %cr3
- wbinvd
-
- # and restore the stack ... but you need gdt for this to work
- movl saved_context_esp, %esp
-
- movl %cs:saved_magic, %eax
- cmpl $0x12345678, %eax
- jne bogus_magic
-
- # jump to place where we left off
- movl saved_eip,%eax
- jmp *%eax
-
-bogus_magic:
- movw $0x0e00 + 'B', 0xb8018
- jmp bogus_magic
-
-
-##
-# acpi_copy_wakeup_routine
-#
-# Copy the above routine to low memory.
-#
-# Parameters:
-# %eax: place to copy wakeup routine to
-#
-# Returned address is location of code in low memory (past data and stack)
-#
-ENTRY(acpi_copy_wakeup_routine)
-
- sgdt saved_gdt
- sidt saved_idt
- sldt saved_ldt
- str saved_tss
-
- movl nx_enabled, %edx
- movl %edx, real_efer_save_restore - wakeup_start (%eax)
- testl $1, real_efer_save_restore - wakeup_start (%eax)
- jz 2f
- # save efer setting
- pushl %eax
- movl %eax, %ebx
- mov $0xc0000080, %ecx
- rdmsr
- movl %edx, real_save_efer_edx - wakeup_start (%ebx)
- movl %eax, real_save_efer_eax - wakeup_start (%ebx)
- popl %eax
-2:
-
- movl %cr3, %edx
- movl %edx, real_save_cr3 - wakeup_start (%eax)
- movl %cr4, %edx
- movl %edx, real_save_cr4 - wakeup_start (%eax)
- movl %cr0, %edx
- movl %edx, real_save_cr0 - wakeup_start (%eax)
- sgdt real_save_gdt - wakeup_start (%eax)
-
- movl saved_videomode, %edx
- movl %edx, video_mode - wakeup_start (%eax)
- movl acpi_video_flags, %edx
- movl %edx, video_flags - wakeup_start (%eax)
- movl $0x12345678, real_magic - wakeup_start (%eax)
- movl $0x12345678, saved_magic
- ret
-
-.data
-ALIGN
-ENTRY(saved_magic) .long 0
-ENTRY(saved_eip) .long 0
-
-save_registers:
- leal 4(%esp), %eax
- movl %eax, saved_context_esp
- movl %ebx, saved_context_ebx
- movl %ebp, saved_context_ebp
- movl %esi, saved_context_esi
- movl %edi, saved_context_edi
- pushfl ; popl saved_context_eflags
-
- movl $ret_point, saved_eip
- ret
-
-
-restore_registers:
- movl saved_context_ebp, %ebp
- movl saved_context_ebx, %ebx
- movl saved_context_esi, %esi
- movl saved_context_edi, %edi
- pushl saved_context_eflags ; popfl
- ret
-
-ENTRY(do_suspend_lowlevel)
- call save_processor_state
- call save_registers
- pushl $3
- call acpi_enter_sleep_state
- addl $4, %esp
- ret
- .p2align 4,,7
-ret_point:
- call restore_registers
- call restore_processor_state
- ret
-
-ALIGN
-# saved registers
-saved_gdt: .long 0,0
-saved_idt: .long 0,0
-saved_ldt: .long 0
-saved_tss: .long 0
-
diff --git a/arch/i386/kernel/apic.c b/arch/i386/kernel/apic.c
deleted file mode 100644
index 496a2c9909f..00000000000
--- a/arch/i386/kernel/apic.c
+++ /dev/null
@@ -1,1300 +0,0 @@
-/*
- * Local APIC handling, local APIC timers
- *
- * (c) 1999, 2000 Ingo Molnar <mingo@redhat.com>
- *
- * Fixes
- * Maciej W. Rozycki : Bits for genuine 82489DX APICs;
- * thanks to Eric Gilmore
- * and Rolf G. Tews
- * for testing these extensively.
- * Maciej W. Rozycki : Various updates and fixes.
- * Mikael Pettersson : Power Management for UP-APIC.
- * Pavel Machek and
- * Mikael Pettersson : PM converted to driver model.
- */
-
-#include <linux/config.h>
-#include <linux/init.h>
-
-#include <linux/mm.h>
-#include <linux/delay.h>
-#include <linux/bootmem.h>
-#include <linux/smp_lock.h>
-#include <linux/interrupt.h>
-#include <linux/mc146818rtc.h>
-#include <linux/kernel_stat.h>
-#include <linux/sysdev.h>
-#include <linux/cpu.h>
-
-#include <asm/atomic.h>
-#include <asm/smp.h>
-#include <asm/mtrr.h>
-#include <asm/mpspec.h>
-#include <asm/desc.h>
-#include <asm/arch_hooks.h>
-#include <asm/hpet.h>
-#include <asm/i8253.h>
-
-#include <mach_apic.h>
-
-#include "io_ports.h"
-
-/*
- * Knob to control our willingness to enable the local APIC.
- */
-int enable_local_apic __initdata = 0; /* -1=force-disable, +1=force-enable */
-
-/*
- * Debug level
- */
-int apic_verbosity;
-
-
-static void apic_pm_activate(void);
-
-/*
- * 'what should we do if we get a hw irq event on an illegal vector'.
- * each architecture has to answer this themselves.
- */
-void ack_bad_irq(unsigned int irq)
-{
- printk("unexpected IRQ trap at vector %02x\n", irq);
- /*
- * Currently unexpected vectors happen only on SMP and APIC.
- * We _must_ ack these because every local APIC has only N
- * irq slots per priority level, and a 'hanging, unacked' IRQ
- * holds up an irq slot - in excessive cases (when multiple
- * unexpected vectors occur) that might lock up the APIC
- * completely.
- */
- ack_APIC_irq();
-}
-
-void __init apic_intr_init(void)
-{
-#ifdef CONFIG_SMP
- smp_intr_init();
-#endif
- /* self generated IPI for local APIC timer */
- set_intr_gate(LOCAL_TIMER_VECTOR, apic_timer_interrupt);
-
- /* IPI vectors for APIC spurious and error interrupts */
- set_intr_gate(SPURIOUS_APIC_VECTOR, spurious_interrupt);
- set_intr_gate(ERROR_APIC_VECTOR, error_interrupt);
-
- /* thermal monitor LVT interrupt */
-#ifdef CONFIG_X86_MCE_P4THERMAL
- set_intr_gate(THERMAL_APIC_VECTOR, thermal_interrupt);
-#endif
-}
-
-/* Using APIC to generate smp_local_timer_interrupt? */
-int using_apic_timer = 0;
-
-static DEFINE_PER_CPU(int, prof_multiplier) = 1;
-static DEFINE_PER_CPU(int, prof_old_multiplier) = 1;
-static DEFINE_PER_CPU(int, prof_counter) = 1;
-
-static int enabled_via_apicbase;
-
-void enable_NMI_through_LVT0 (void * dummy)
-{
- unsigned int v, ver;
-
- ver = apic_read(APIC_LVR);
- ver = GET_APIC_VERSION(ver);
- v = APIC_DM_NMI; /* unmask and set to NMI */
- if (!APIC_INTEGRATED(ver)) /* 82489DX */
- v |= APIC_LVT_LEVEL_TRIGGER;
- apic_write_around(APIC_LVT0, v);
-}
-
-int get_physical_broadcast(void)
-{
- unsigned int lvr, version;
- lvr = apic_read(APIC_LVR);
- version = GET_APIC_VERSION(lvr);
- if (!APIC_INTEGRATED(version) || version >= 0x14)
- return 0xff;
- else
- return 0xf;
-}
-
-int get_maxlvt(void)
-{
- unsigned int v, ver, maxlvt;
-
- v = apic_read(APIC_LVR);
- ver = GET_APIC_VERSION(v);
- /* 82489DXs do not report # of LVT entries. */
- maxlvt = APIC_INTEGRATED(ver) ? GET_APIC_MAXLVT(v) : 2;
- return maxlvt;
-}
-
-void clear_local_APIC(void)
-{
- int maxlvt;
- unsigned long v;
-
- maxlvt = get_maxlvt();
-
- /*
- * Masking an LVT entry on a P6 can trigger a local APIC error
- * if the vector is zero. Mask LVTERR first to prevent this.
- */
- if (maxlvt >= 3) {
- v = ERROR_APIC_VECTOR; /* any non-zero vector will do */
- apic_write_around(APIC_LVTERR, v | APIC_LVT_MASKED);
- }
- /*
- * Careful: we have to set masks only first to deassert
- * any level-triggered sources.
- */
- v = apic_read(APIC_LVTT);
- apic_write_around(APIC_LVTT, v | APIC_LVT_MASKED);
- v = apic_read(APIC_LVT0);
- apic_write_around(APIC_LVT0, v | APIC_LVT_MASKED);
- v = apic_read(APIC_LVT1);
- apic_write_around(APIC_LVT1, v | APIC_LVT_MASKED);
- if (maxlvt >= 4) {
- v = apic_read(APIC_LVTPC);
- apic_write_around(APIC_LVTPC, v | APIC_LVT_MASKED);
- }
-
-/* lets not touch this if we didn't frob it */
-#ifdef CONFIG_X86_MCE_P4THERMAL
- if (maxlvt >= 5) {
- v = apic_read(APIC_LVTTHMR);
- apic_write_around(APIC_LVTTHMR, v | APIC_LVT_MASKED);
- }
-#endif
- /*
- * Clean APIC state for other OSs:
- */
- apic_write_around(APIC_LVTT, APIC_LVT_MASKED);
- apic_write_around(APIC_LVT0, APIC_LVT_MASKED);
- apic_write_around(APIC_LVT1, APIC_LVT_MASKED);
- if (maxlvt >= 3)
- apic_write_around(APIC_LVTERR, APIC_LVT_MASKED);
- if (maxlvt >= 4)
- apic_write_around(APIC_LVTPC, APIC_LVT_MASKED);
-
-#ifdef CONFIG_X86_MCE_P4THERMAL
- if (maxlvt >= 5)
- apic_write_around(APIC_LVTTHMR, APIC_LVT_MASKED);
-#endif
- v = GET_APIC_VERSION(apic_read(APIC_LVR));
- if (APIC_INTEGRATED(v)) { /* !82489DX */
- if (maxlvt > 3) /* Due to Pentium errata 3AP and 11AP. */
- apic_write(APIC_ESR, 0);
- apic_read(APIC_ESR);
- }
-}
-
-void __init connect_bsp_APIC(void)
-{
- if (pic_mode) {
- /*
- * Do not trust the local APIC being empty at bootup.
- */
- clear_local_APIC();
- /*
- * PIC mode, enable APIC mode in the IMCR, i.e.
- * connect BSP's local APIC to INT and NMI lines.
- */
- apic_printk(APIC_VERBOSE, "leaving PIC mode, "
- "enabling APIC mode.\n");
- outb(0x70, 0x22);
- outb(0x01, 0x23);
- }
- enable_apic_mode();
-}
-
-void disconnect_bsp_APIC(int virt_wire_setup)
-{
- if (pic_mode) {
- /*
- * Put the board back into PIC mode (has an effect
- * only on certain older boards). Note that APIC
- * interrupts, including IPIs, won't work beyond
- * this point! The only exception are INIT IPIs.
- */
- apic_printk(APIC_VERBOSE, "disabling APIC mode, "
- "entering PIC mode.\n");
- outb(0x70, 0x22);
- outb(0x00, 0x23);
- }
- else {
- /* Go back to Virtual Wire compatibility mode */
- unsigned long value;
-
- /* For the spurious interrupt use vector F, and enable it */
- value = apic_read(APIC_SPIV);
- value &= ~APIC_VECTOR_MASK;
- value |= APIC_SPIV_APIC_ENABLED;
- value |= 0xf;
- apic_write_around(APIC_SPIV, value);
-
- if (!virt_wire_setup) {
- /* For LVT0 make it edge triggered, active high, external and enabled */
- value = apic_read(APIC_LVT0);
- value &= ~(APIC_MODE_MASK | APIC_SEND_PENDING |
- APIC_INPUT_POLARITY | APIC_LVT_REMOTE_IRR |
- APIC_LVT_LEVEL_TRIGGER | APIC_LVT_MASKED );
- value |= APIC_LVT_REMOTE_IRR | APIC_SEND_PENDING;
- value = SET_APIC_DELIVERY_MODE(value, APIC_MODE_EXTINT);
- apic_write_around(APIC_LVT0, value);
- }
- else {
- /* Disable LVT0 */
- apic_write_around(APIC_LVT0, APIC_LVT_MASKED);
- }
-
- /* For LVT1 make it edge triggered, active high, nmi and enabled */
- value = apic_read(APIC_LVT1);
- value &= ~(
- APIC_MODE_MASK | APIC_SEND_PENDING |
- APIC_INPUT_POLARITY | APIC_LVT_REMOTE_IRR |
- APIC_LVT_LEVEL_TRIGGER | APIC_LVT_MASKED);
- value |= APIC_LVT_REMOTE_IRR | APIC_SEND_PENDING;
- value = SET_APIC_DELIVERY_MODE(value, APIC_MODE_NMI);
- apic_write_around(APIC_LVT1, value);
- }
-}
-
-void disable_local_APIC(void)
-{
- unsigned long value;
-
- clear_local_APIC();
-
- /*
- * Disable APIC (implies clearing of registers
- * for 82489DX!).
- */
- value = apic_read(APIC_SPIV);
- value &= ~APIC_SPIV_APIC_ENABLED;
- apic_write_around(APIC_SPIV, value);
-
- if (enabled_via_apicbase) {
- unsigned int l, h;
- rdmsr(MSR_IA32_APICBASE, l, h);
- l &= ~MSR_IA32_APICBASE_ENABLE;
- wrmsr(MSR_IA32_APICBASE, l, h);
- }
-}
-
-/*
- * This is to verify that we're looking at a real local APIC.
- * Check these against your board if the CPUs aren't getting
- * started for no apparent reason.
- */
-int __init verify_local_APIC(void)
-{
- unsigned int reg0, reg1;
-
- /*
- * The version register is read-only in a real APIC.
- */
- reg0 = apic_read(APIC_LVR);
- apic_printk(APIC_DEBUG, "Getting VERSION: %x\n", reg0);
- apic_write(APIC_LVR, reg0 ^ APIC_LVR_MASK);
- reg1 = apic_read(APIC_LVR);
- apic_printk(APIC_DEBUG, "Getting VERSION: %x\n", reg1);
-
- /*
- * The two version reads above should print the same
- * numbers. If the second one is different, then we
- * poke at a non-APIC.
- */
- if (reg1 != reg0)
- return 0;
-
- /*
- * Check if the version looks reasonably.
- */
- reg1 = GET_APIC_VERSION(reg0);
- if (reg1 == 0x00 || reg1 == 0xff)
- return 0;
- reg1 = get_maxlvt();
- if (reg1 < 0x02 || reg1 == 0xff)
- return 0;
-
- /*
- * The ID register is read/write in a real APIC.
- */
- reg0 = apic_read(APIC_ID);
- apic_printk(APIC_DEBUG, "Getting ID: %x\n", reg0);
-
- /*
- * The next two are just to see if we have sane values.
- * They're only really relevant if we're in Virtual Wire
- * compatibility mode, but most boxes are anymore.
- */
- reg0 = apic_read(APIC_LVT0);
- apic_printk(APIC_DEBUG, "Getting LVT0: %x\n", reg0);
- reg1 = apic_read(APIC_LVT1);
- apic_printk(APIC_DEBUG, "Getting LVT1: %x\n", reg1);
-
- return 1;
-}
-
-void __init sync_Arb_IDs(void)
-{
- /* Unsupported on P4 - see Intel Dev. Manual Vol. 3, Ch. 8.6.1 */
- unsigned int ver = GET_APIC_VERSION(apic_read(APIC_LVR));
- if (ver >= 0x14) /* P4 or higher */
- return;
- /*
- * Wait for idle.
- */
- apic_wait_icr_idle();
-
- apic_printk(APIC_DEBUG, "Synchronizing Arb IDs.\n");
- apic_write_around(APIC_ICR, APIC_DEST_ALLINC | APIC_INT_LEVELTRIG
- | APIC_DM_INIT);
-}
-
-extern void __error_in_apic_c (void);
-
-/*
- * An initial setup of the virtual wire mode.
- */
-void __init init_bsp_APIC(void)
-{
- unsigned long value, ver;
-
- /*
- * Don't do the setup now if we have a SMP BIOS as the
- * through-I/O-APIC virtual wire mode might be active.
- */
- if (smp_found_config || !cpu_has_apic)
- return;
-
- value = apic_read(APIC_LVR);
- ver = GET_APIC_VERSION(value);
-
- /*
- * Do not trust the local APIC being empty at bootup.
- */
- clear_local_APIC();
-
- /*
- * Enable APIC.
- */
- value = apic_read(APIC_SPIV);
- value &= ~APIC_VECTOR_MASK;
- value |= APIC_SPIV_APIC_ENABLED;
-
- /* This bit is reserved on P4/Xeon and should be cleared */
- if ((boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) && (boot_cpu_data.x86 == 15))
- value &= ~APIC_SPIV_FOCUS_DISABLED;
- else
- value |= APIC_SPIV_FOCUS_DISABLED;
- value |= SPURIOUS_APIC_VECTOR;
- apic_write_around(APIC_SPIV, value);
-
- /*
- * Set up the virtual wire mode.
- */
- apic_write_around(APIC_LVT0, APIC_DM_EXTINT);
- value = APIC_DM_NMI;
- if (!APIC_INTEGRATED(ver)) /* 82489DX */
- value |= APIC_LVT_LEVEL_TRIGGER;
- apic_write_around(APIC_LVT1, value);
-}
-
-void __devinit setup_local_APIC(void)
-{
- unsigned long oldvalue, value, ver, maxlvt;
-
- /* Pound the ESR really hard over the head with a big hammer - mbligh */
- if (esr_disable) {
- apic_write(APIC_ESR, 0);
- apic_write(APIC_ESR, 0);
- apic_write(APIC_ESR, 0);
- apic_write(APIC_ESR, 0);
- }
-
- value = apic_read(APIC_LVR);
- ver = GET_APIC_VERSION(value);
-
- if ((SPURIOUS_APIC_VECTOR & 0x0f) != 0x0f)
- __error_in_apic_c();
-
- /*
- * Double-check whether this APIC is really registered.
- */
- if (!apic_id_registered())
- BUG();
-
- /*
- * Intel recommends to set DFR, LDR and TPR before enabling
- * an APIC. See e.g. "AP-388 82489DX User's Manual" (Intel
- * document number 292116). So here it goes...
- */
- init_apic_ldr();
-
- /*
- * Set Task Priority to 'accept all'. We never change this
- * later on.
- */
- value = apic_read(APIC_TASKPRI);
- value &= ~APIC_TPRI_MASK;
- apic_write_around(APIC_TASKPRI, value);
-
- /*
- * Now that we are all set up, enable the APIC
- */
- value = apic_read(APIC_SPIV);
- value &= ~APIC_VECTOR_MASK;
- /*
- * Enable APIC
- */
- value |= APIC_SPIV_APIC_ENABLED;
-
- /*
- * Some unknown Intel IO/APIC (or APIC) errata is biting us with
- * certain networking cards. If high frequency interrupts are
- * happening on a particular IOAPIC pin, plus the IOAPIC routing
- * entry is masked/unmasked at a high rate as well then sooner or
- * later IOAPIC line gets 'stuck', no more interrupts are received
- * from the device. If focus CPU is disabled then the hang goes
- * away, oh well :-(
- *
- * [ This bug can be reproduced easily with a level-triggered
- * PCI Ne2000 networking cards and PII/PIII processors, dual
- * BX chipset. ]
- */
- /*
- * Actually disabling the focus CPU check just makes the hang less
- * frequent as it makes the interrupt distributon model be more
- * like LRU than MRU (the short-term load is more even across CPUs).
- * See also the comment in end_level_ioapic_irq(). --macro
- */
-#if 1
- /* Enable focus processor (bit==0) */
- value &= ~APIC_SPIV_FOCUS_DISABLED;
-#else
- /* Disable focus processor (bit==1) */
- value |= APIC_SPIV_FOCUS_DISABLED;
-#endif
- /*
- * Set spurious IRQ vector
- */
- value |= SPURIOUS_APIC_VECTOR;
- apic_write_around(APIC_SPIV, value);
-
- /*
- * Set up LVT0, LVT1:
- *
- * set up through-local-APIC on the BP's LINT0. This is not
- * strictly necessery in pure symmetric-IO mode, but sometimes
- * we delegate interrupts to the 8259A.
- */
- /*
- * TODO: set up through-local-APIC from through-I/O-APIC? --macro
- */
- value = apic_read(APIC_LVT0) & APIC_LVT_MASKED;
- if (!smp_processor_id() && (pic_mode || !value)) {
- value = APIC_DM_EXTINT;
- apic_printk(APIC_VERBOSE, "enabled ExtINT on CPU#%d\n",
- smp_processor_id());
- } else {
- value = APIC_DM_EXTINT | APIC_LVT_MASKED;
- apic_printk(APIC_VERBOSE, "masked ExtINT on CPU#%d\n",
- smp_processor_id());
- }
- apic_write_around(APIC_LVT0, value);
-
- /*
- * only the BP should see the LINT1 NMI signal, obviously.
- */
- if (!smp_processor_id())
- value = APIC_DM_NMI;
- else
- value = APIC_DM_NMI | APIC_LVT_MASKED;
- if (!APIC_INTEGRATED(ver)) /* 82489DX */
- value |= APIC_LVT_LEVEL_TRIGGER;
- apic_write_around(APIC_LVT1, value);
-
- if (APIC_INTEGRATED(ver) && !esr_disable) { /* !82489DX */
- maxlvt = get_maxlvt();
- if (maxlvt > 3) /* Due to the Pentium erratum 3AP. */
- apic_write(APIC_ESR, 0);
- oldvalue = apic_read(APIC_ESR);
-
- value = ERROR_APIC_VECTOR; // enables sending errors
- apic_write_around(APIC_LVTERR, value);
- /*
- * spec says clear errors after enabling vector.
- */
- if (maxlvt > 3)
- apic_write(APIC_ESR, 0);
- value = apic_read(APIC_ESR);
- if (value != oldvalue)
- apic_printk(APIC_VERBOSE, "ESR value before enabling "
- "vector: 0x%08lx after: 0x%08lx\n",
- oldvalue, value);
- } else {
- if (esr_disable)
- /*
- * Something untraceble is creating bad interrupts on
- * secondary quads ... for the moment, just leave the
- * ESR disabled - we can't do anything useful with the
- * errors anyway - mbligh
- */
- printk("Leaving ESR disabled.\n");
- else
- printk("No ESR for 82489DX.\n");
- }
-
- if (nmi_watchdog == NMI_LOCAL_APIC)
- setup_apic_nmi_watchdog();
- apic_pm_activate();
-}
-
-/*
- * If Linux enabled the LAPIC against the BIOS default
- * disable it down before re-entering the BIOS on shutdown.
- * Otherwise the BIOS may get confused and not power-off.
- * Additionally clear all LVT entries before disable_local_APIC
- * for the case where Linux didn't enable the LAPIC.
- */
-void lapic_shutdown(void)
-{
- if (!cpu_has_apic)
- return;
-
- local_irq_disable();
- clear_local_APIC();
-
- if (enabled_via_apicbase)
- disable_local_APIC();
-
- local_irq_enable();
-}
-
-#ifdef CONFIG_PM
-
-static struct {
- int active;
- /* r/w apic fields */
- unsigned int apic_id;
- unsigned int apic_taskpri;
- unsigned int apic_ldr;
- unsigned int apic_dfr;
- unsigned int apic_spiv;
- unsigned int apic_lvtt;
- unsigned int apic_lvtpc;
- unsigned int apic_lvt0;
- unsigned int apic_lvt1;
- unsigned int apic_lvterr;
- unsigned int apic_tmict;
- unsigned int apic_tdcr;
- unsigned int apic_thmr;
-} apic_pm_state;
-
-static int lapic_suspend(struct sys_device *dev, pm_message_t state)
-{
- unsigned long flags;
-
- if (!apic_pm_state.active)
- return 0;
-
- apic_pm_state.apic_id = apic_read(APIC_ID);
- apic_pm_state.apic_taskpri = apic_read(APIC_TASKPRI);
- apic_pm_state.apic_ldr = apic_read(APIC_LDR);
- apic_pm_state.apic_dfr = apic_read(APIC_DFR);
- apic_pm_state.apic_spiv = apic_read(APIC_SPIV);
- apic_pm_state.apic_lvtt = apic_read(APIC_LVTT);
- apic_pm_state.apic_lvtpc = apic_read(APIC_LVTPC);
- apic_pm_state.apic_lvt0 = apic_read(APIC_LVT0);
- apic_pm_state.apic_lvt1 = apic_read(APIC_LVT1);
- apic_pm_state.apic_lvterr = apic_read(APIC_LVTERR);
- apic_pm_state.apic_tmict = apic_read(APIC_TMICT);
- apic_pm_state.apic_tdcr = apic_read(APIC_TDCR);
- apic_pm_state.apic_thmr = apic_read(APIC_LVTTHMR);
-
- local_irq_save(flags);
- disable_local_APIC();
- local_irq_restore(flags);
- return 0;
-}
-
-static int lapic_resume(struct sys_device *dev)
-{
- unsigned int l, h;
- unsigned long flags;
-
- if (!apic_pm_state.active)
- return 0;
-
- local_irq_save(flags);
-
- /*
- * Make sure the APICBASE points to the right address
- *
- * FIXME! This will be wrong if we ever support suspend on
- * SMP! We'll need to do this as part of the CPU restore!
- */
- rdmsr(MSR_IA32_APICBASE, l, h);
- l &= ~MSR_IA32_APICBASE_BASE;
- l |= MSR_IA32_APICBASE_ENABLE | mp_lapic_addr;
- wrmsr(MSR_IA32_APICBASE, l, h);
-
- apic_write(APIC_LVTERR, ERROR_APIC_VECTOR | APIC_LVT_MASKED);
- apic_write(APIC_ID, apic_pm_state.apic_id);
- apic_write(APIC_DFR, apic_pm_state.apic_dfr);
- apic_write(APIC_LDR, apic_pm_state.apic_ldr);
- apic_write(APIC_TASKPRI, apic_pm_state.apic_taskpri);
- apic_write(APIC_SPIV, apic_pm_state.apic_spiv);
- apic_write(APIC_LVT0, apic_pm_state.apic_lvt0);
- apic_write(APIC_LVT1, apic_pm_state.apic_lvt1);
- apic_write(APIC_LVTTHMR, apic_pm_state.apic_thmr);
- apic_write(APIC_LVTPC, apic_pm_state.apic_lvtpc);
- apic_write(APIC_LVTT, apic_pm_state.apic_lvtt);
- apic_write(APIC_TDCR, apic_pm_state.apic_tdcr);
- apic_write(APIC_TMICT, apic_pm_state.apic_tmict);
- apic_write(APIC_ESR, 0);
- apic_read(APIC_ESR);
- apic_write(APIC_LVTERR, apic_pm_state.apic_lvterr);
- apic_write(APIC_ESR, 0);
- apic_read(APIC_ESR);
- local_irq_restore(flags);
- return 0;
-}
-
-/*
- * This device has no shutdown method - fully functioning local APICs
- * are needed on every CPU up until machine_halt/restart/poweroff.
- */
-
-static struct sysdev_class lapic_sysclass = {
- set_kset_name("lapic"),
- .resume = lapic_resume,
- .suspend = lapic_suspend,
-};
-
-static struct sys_device device_lapic = {
- .id = 0,
- .cls = &lapic_sysclass,
-};
-
-static void __devinit apic_pm_activate(void)
-{
- apic_pm_state.active = 1;
-}
-
-static int __init init_lapic_sysfs(void)
-{
- int error;
-
- if (!cpu_has_apic)
- return 0;
- /* XXX: remove suspend/resume procs if !apic_pm_state.active? */
-
- error = sysdev_class_register(&lapic_sysclass);
- if (!error)
- error = sysdev_register(&device_lapic);
- return error;
-}
-device_initcall(init_lapic_sysfs);
-
-#else /* CONFIG_PM */
-
-static void apic_pm_activate(void) { }
-
-#endif /* CONFIG_PM */
-
-/*
- * Detect and enable local APICs on non-SMP boards.
- * Original code written by Keir Fraser.
- */
-
-static int __init apic_set_verbosity(char *str)
-{
- if (strcmp("debug", str) == 0)
- apic_verbosity = APIC_DEBUG;
- else if (strcmp("verbose", str) == 0)
- apic_verbosity = APIC_VERBOSE;
- else
- printk(KERN_WARNING "APIC Verbosity level %s not recognised"
- " use apic=verbose or apic=debug", str);
-
- return 0;
-}
-
-__setup("apic=", apic_set_verbosity);
-
-static int __init detect_init_APIC (void)
-{
- u32 h, l, features;
-
- /* Disabled by kernel option? */
- if (enable_local_apic < 0)
- return -1;
-
- switch (boot_cpu_data.x86_vendor) {
- case X86_VENDOR_AMD:
- if ((boot_cpu_data.x86 == 6 && boot_cpu_data.x86_model > 1) ||
- (boot_cpu_data.x86 == 15))
- break;
- goto no_apic;
- case X86_VENDOR_INTEL:
- if (boot_cpu_data.x86 == 6 || boot_cpu_data.x86 == 15 ||
- (boot_cpu_data.x86 == 5 && cpu_has_apic))
- break;
- goto no_apic;
- default:
- goto no_apic;
- }
-
- if (!cpu_has_apic) {
- /*
- * Over-ride BIOS and try to enable the local
- * APIC only if "lapic" specified.
- */
- if (enable_local_apic <= 0) {
- printk("Local APIC disabled by BIOS -- "
- "you can enable it with \"lapic\"\n");
- return -1;
- }
- /*
- * Some BIOSes disable the local APIC in the
- * APIC_BASE MSR. This can only be done in
- * software for Intel P6 or later and AMD K7
- * (Model > 1) or later.
- */
- rdmsr(MSR_IA32_APICBASE, l, h);
- if (!(l & MSR_IA32_APICBASE_ENABLE)) {
- printk("Local APIC disabled by BIOS -- reenabling.\n");
- l &= ~MSR_IA32_APICBASE_BASE;
- l |= MSR_IA32_APICBASE_ENABLE | APIC_DEFAULT_PHYS_BASE;
- wrmsr(MSR_IA32_APICBASE, l, h);
- enabled_via_apicbase = 1;
- }
- }
- /*
- * The APIC feature bit should now be enabled
- * in `cpuid'
- */
- features = cpuid_edx(1);
- if (!(features & (1 << X86_FEATURE_APIC))) {
- printk("Could not enable APIC!\n");
- return -1;
- }
- set_bit(X86_FEATURE_APIC, boot_cpu_data.x86_capability);
- mp_lapic_addr = APIC_DEFAULT_PHYS_BASE;
-
- /* The BIOS may have set up the APIC at some other address */
- rdmsr(MSR_IA32_APICBASE, l, h);
- if (l & MSR_IA32_APICBASE_ENABLE)
- mp_lapic_addr = l & MSR_IA32_APICBASE_BASE;
-
- if (nmi_watchdog != NMI_NONE)
- nmi_watchdog = NMI_LOCAL_APIC;
-
- printk("Found and enabled local APIC!\n");
-
- apic_pm_activate();
-
- return 0;
-
-no_apic:
- printk("No local APIC present or hardware disabled\n");
- return -1;
-}
-
-void __init init_apic_mappings(void)
-{
- unsigned long apic_phys;
-
- /*
- * If no local APIC can be found then set up a fake all
- * zeroes page to simulate the local APIC and another
- * one for the IO-APIC.
- */
- if (!smp_found_config && detect_init_APIC()) {
- apic_phys = (unsigned long) alloc_bootmem_pages(PAGE_SIZE);
- apic_phys = __pa(apic_phys);
- } else
- apic_phys = mp_lapic_addr;
-
- set_fixmap_nocache(FIX_APIC_BASE, apic_phys);
- printk(KERN_DEBUG "mapped APIC to %08lx (%08lx)\n", APIC_BASE,
- apic_phys);
-
- /*
- * Fetch the APIC ID of the BSP in case we have a
- * default configuration (or the MP table is broken).
- */
- if (boot_cpu_physical_apicid == -1U)
- boot_cpu_physical_apicid = GET_APIC_ID(apic_read(APIC_ID));
-
-#ifdef CONFIG_X86_IO_APIC
- {
- unsigned long ioapic_phys, idx = FIX_IO_APIC_BASE_0;
- int i;
-
- for (i = 0; i < nr_ioapics; i++) {
- if (smp_found_config) {
- ioapic_phys = mp_ioapics[i].mpc_apicaddr;
- if (!ioapic_phys) {
- printk(KERN_ERR
- "WARNING: bogus zero IO-APIC "
- "address found in MPTABLE, "
- "disabling IO/APIC support!\n");
- smp_found_config = 0;
- skip_ioapic_setup = 1;
- goto fake_ioapic_page;
- }
- } else {
-fake_ioapic_page:
- ioapic_phys = (unsigned long)
- alloc_bootmem_pages(PAGE_SIZE);
- ioapic_phys = __pa(ioapic_phys);
- }
- set_fixmap_nocache(idx, ioapic_phys);
- printk(KERN_DEBUG "mapped IOAPIC to %08lx (%08lx)\n",
- __fix_to_virt(idx), ioapic_phys);
- idx++;
- }
- }
-#endif
-}
-
-/*
- * This part sets up the APIC 32 bit clock in LVTT1, with HZ interrupts
- * per second. We assume that the caller has already set up the local
- * APIC.
- *
- * The APIC timer is not exactly sync with the external timer chip, it
- * closely follows bus clocks.
- */
-
-/*
- * The timer chip is already set up at HZ interrupts per second here,
- * but we do not accept timer interrupts yet. We only allow the BP
- * to calibrate.
- */
-static unsigned int __devinit get_8254_timer_count(void)
-{
- unsigned long flags;
-
- unsigned int count;
-
- spin_lock_irqsave(&i8253_lock, flags);
-
- outb_p(0x00, PIT_MODE);
- count = inb_p(PIT_CH0);
- count |= inb_p(PIT_CH0) << 8;
-
- spin_unlock_irqrestore(&i8253_lock, flags);
-
- return count;
-}
-
-/* next tick in 8254 can be caught by catching timer wraparound */
-static void __devinit wait_8254_wraparound(void)
-{
- unsigned int curr_count, prev_count;
-
- curr_count = get_8254_timer_count();
- do {
- prev_count = curr_count;
- curr_count = get_8254_timer_count();
-
- /* workaround for broken Mercury/Neptune */
- if (prev_count >= curr_count + 0x100)
- curr_count = get_8254_timer_count();
-
- } while (prev_count >= curr_count);
-}
-
-/*
- * Default initialization for 8254 timers. If we use other timers like HPET,
- * we override this later
- */
-void (*wait_timer_tick)(void) __devinitdata = wait_8254_wraparound;
-
-/*
- * This function sets up the local APIC timer, with a timeout of
- * 'clocks' APIC bus clock. During calibration we actually call
- * this function twice on the boot CPU, once with a bogus timeout
- * value, second time for real. The other (noncalibrating) CPUs
- * call this function only once, with the real, calibrated value.
- *
- * We do reads before writes even if unnecessary, to get around the
- * P5 APIC double write bug.
- */
-
-#define APIC_DIVISOR 16
-
-static void __setup_APIC_LVTT(unsigned int clocks)
-{
- unsigned int lvtt_value, tmp_value, ver;
-
- ver = GET_APIC_VERSION(apic_read(APIC_LVR));
- lvtt_value = APIC_LVT_TIMER_PERIODIC | LOCAL_TIMER_VECTOR;
- if (!APIC_INTEGRATED(ver))
- lvtt_value |= SET_APIC_TIMER_BASE(APIC_TIMER_BASE_DIV);
- apic_write_around(APIC_LVTT, lvtt_value);
-
- /*
- * Divide PICLK by 16
- */
- tmp_value = apic_read(APIC_TDCR);
- apic_write_around(APIC_TDCR, (tmp_value
- & ~(APIC_TDR_DIV_1 | APIC_TDR_DIV_TMBASE))
- | APIC_TDR_DIV_16);
-
- apic_write_around(APIC_TMICT, clocks/APIC_DIVISOR);
-}
-
-static void __devinit setup_APIC_timer(unsigned int clocks)
-{
- unsigned long flags;
-
- local_irq_save(flags);
-
- /*
- * Wait for IRQ0's slice:
- */
- wait_timer_tick();
-
- __setup_APIC_LVTT(clocks);
-
- local_irq_restore(flags);
-}
-
-/*
- * In this function we calibrate APIC bus clocks to the external
- * timer. Unfortunately we cannot use jiffies and the timer irq
- * to calibrate, since some later bootup code depends on getting
- * the first irq? Ugh.
- *
- * We want to do the calibration only once since we
- * want to have local timer irqs syncron. CPUs connected
- * by the same APIC bus have the very same bus frequency.
- * And we want to have irqs off anyways, no accidental
- * APIC irq that way.
- */
-
-static int __init calibrate_APIC_clock(void)
-{
- unsigned long long t1 = 0, t2 = 0;
- long tt1, tt2;
- long result;
- int i;
- const int LOOPS = HZ/10;
-
- apic_printk(APIC_VERBOSE, "calibrating APIC timer ...\n");
-
- /*
- * Put whatever arbitrary (but long enough) timeout
- * value into the APIC clock, we just want to get the
- * counter running for calibration.
- */
- __setup_APIC_LVTT(1000000000);
-
- /*
- * The timer chip counts down to zero. Let's wait
- * for a wraparound to start exact measurement:
- * (the current tick might have been already half done)
- */
-
- wait_timer_tick();
-
- /*
- * We wrapped around just now. Let's start:
- */
- if (cpu_has_tsc)
- rdtscll(t1);
- tt1 = apic_read(APIC_TMCCT);
-
- /*
- * Let's wait LOOPS wraprounds:
- */
- for (i = 0; i < LOOPS; i++)
- wait_timer_tick();
-
- tt2 = apic_read(APIC_TMCCT);
- if (cpu_has_tsc)
- rdtscll(t2);
-
- /*
- * The APIC bus clock counter is 32 bits only, it
- * might have overflown, but note that we use signed
- * longs, thus no extra care needed.
- *
- * underflown to be exact, as the timer counts down ;)
- */
-
- result = (tt1-tt2)*APIC_DIVISOR/LOOPS;
-
- if (cpu_has_tsc)
- apic_printk(APIC_VERBOSE, "..... CPU clock speed is "
- "%ld.%04ld MHz.\n",
- ((long)(t2-t1)/LOOPS)/(1000000/HZ),
- ((long)(t2-t1)/LOOPS)%(1000000/HZ));
-
- apic_printk(APIC_VERBOSE, "..... host bus clock speed is "
- "%ld.%04ld MHz.\n",
- result/(1000000/HZ),
- result%(1000000/HZ));
-
- return result;
-}
-
-static unsigned int calibration_result;
-
-void __init setup_boot_APIC_clock(void)
-{
- unsigned long flags;
- apic_printk(APIC_VERBOSE, "Using local APIC timer interrupts.\n");
- using_apic_timer = 1;
-
- local_irq_save(flags);
-
- calibration_result = calibrate_APIC_clock();
- /*
- * Now set up the timer for real.
- */
- setup_APIC_timer(calibration_result);
-
- local_irq_restore(flags);
-}
-
-void __devinit setup_secondary_APIC_clock(void)
-{
- setup_APIC_timer(calibration_result);
-}
-
-void __devinit disable_APIC_timer(void)
-{
- if (using_apic_timer) {
- unsigned long v;
-
- v = apic_read(APIC_LVTT);
- apic_write_around(APIC_LVTT, v | APIC_LVT_MASKED);
- }
-}
-
-void enable_APIC_timer(void)
-{
- if (using_apic_timer) {
- unsigned long v;
-
- v = apic_read(APIC_LVTT);
- apic_write_around(APIC_LVTT, v & ~APIC_LVT_MASKED);
- }
-}
-
-/*
- * the frequency of the profiling timer can be changed
- * by writing a multiplier value into /proc/profile.
- */
-int setup_profiling_timer(unsigned int multiplier)
-{
- int i;
-
- /*
- * Sanity check. [at least 500 APIC cycles should be
- * between APIC interrupts as a rule of thumb, to avoid
- * irqs flooding us]
- */
- if ( (!multiplier) || (calibration_result/multiplier < 500))
- return -EINVAL;
-
- /*
- * Set the new multiplier for each CPU. CPUs don't start using the
- * new values until the next timer interrupt in which they do process
- * accounting. At that time they also adjust their APIC timers
- * accordingly.
- */
- for (i = 0; i < NR_CPUS; ++i)
- per_cpu(prof_multiplier, i) = multiplier;
-
- return 0;
-}
-
-#undef APIC_DIVISOR
-
-/*
- * Local timer interrupt handler. It does both profiling and
- * process statistics/rescheduling.
- *
- * We do profiling in every local tick, statistics/rescheduling
- * happen only every 'profiling multiplier' ticks. The default
- * multiplier is 1 and it can be changed by writing the new multiplier
- * value into /proc/profile.
- */
-
-inline void smp_local_timer_interrupt(struct pt_regs * regs)
-{
- int cpu = smp_processor_id();
-
- profile_tick(CPU_PROFILING, regs);
- if (--per_cpu(prof_counter, cpu) <= 0) {
- /*
- * The multiplier may have changed since the last time we got
- * to this point as a result of the user writing to
- * /proc/profile. In this case we need to adjust the APIC
- * timer accordingly.
- *
- * Interrupts are already masked off at this point.
- */
- per_cpu(prof_counter, cpu) = per_cpu(prof_multiplier, cpu);
- if (per_cpu(prof_counter, cpu) !=
- per_cpu(prof_old_multiplier, cpu)) {
- __setup_APIC_LVTT(
- calibration_result/
- per_cpu(prof_counter, cpu));
- per_cpu(prof_old_multiplier, cpu) =
- per_cpu(prof_counter, cpu);
- }
-
-#ifdef CONFIG_SMP
- update_process_times(user_mode_vm(regs));
-#endif
- }
-
- /*
- * We take the 'long' return path, and there every subsystem
- * grabs the apropriate locks (kernel lock/ irq lock).
- *
- * we might want to decouple profiling from the 'long path',
- * and do the profiling totally in assembly.
- *
- * Currently this isn't too much of an issue (performance wise),
- * we can take more than 100K local irqs per second on a 100 MHz P5.
- */
-}
-
-/*
- * Local APIC timer interrupt. This is the most natural way for doing
- * local interrupts, but local timer interrupts can be emulated by
- * broadcast interrupts too. [in case the hw doesn't support APIC timers]
- *
- * [ if a single-CPU system runs an SMP kernel then we call the local
- * interrupt as well. Thus we cannot inline the local irq ... ]
- */
-
-fastcall void smp_apic_timer_interrupt(struct pt_regs *regs)
-{
- int cpu = smp_processor_id();
-
- /*
- * the NMI deadlock-detector uses this.
- */
- per_cpu(irq_stat, cpu).apic_timer_irqs++;
-
- /*
- * NOTE! We'd better ACK the irq immediately,
- * because timer handling can be slow.
- */
- ack_APIC_irq();
- /*
- * update_process_times() expects us to have done irq_enter().
- * Besides, if we don't timer interrupts ignore the global
- * interrupt lock, which is the WrongThing (tm) to do.
- */
- irq_enter();
- smp_local_timer_interrupt(regs);
- irq_exit();
-}
-
-/*
- * This interrupt should _never_ happen with our APIC/SMP architecture
- */
-fastcall void smp_spurious_interrupt(struct pt_regs *regs)
-{
- unsigned long v;
-
- irq_enter();
- /*
- * Check if this really is a spurious interrupt and ACK it
- * if it is a vectored one. Just in case...
- * Spurious interrupts should not be ACKed.
- */
- v = apic_read(APIC_ISR + ((SPURIOUS_APIC_VECTOR & ~0x1f) >> 1));
- if (v & (1 << (SPURIOUS_APIC_VECTOR & 0x1f)))
- ack_APIC_irq();
-
- /* see sw-dev-man vol 3, chapter 7.4.13.5 */
- printk(KERN_INFO "spurious APIC interrupt on CPU#%d, should never happen.\n",
- smp_processor_id());
- irq_exit();
-}
-
-/*
- * This interrupt should never happen with our APIC/SMP architecture
- */
-
-fastcall void smp_error_interrupt(struct pt_regs *regs)
-{
- unsigned long v, v1;
-
- irq_enter();
- /* First tickle the hardware, only then report what went on. -- REW */
- v = apic_read(APIC_ESR);
- apic_write(APIC_ESR, 0);
- v1 = apic_read(APIC_ESR);
- ack_APIC_irq();
- atomic_inc(&irq_err_count);
-
- /* Here is what the APIC error bits mean:
- 0: Send CS error
- 1: Receive CS error
- 2: Send accept error
- 3: Receive accept error
- 4: Reserved
- 5: Send illegal vector
- 6: Received illegal vector
- 7: Illegal register address
- */
- printk (KERN_DEBUG "APIC error on CPU%d: %02lx(%02lx)\n",
- smp_processor_id(), v , v1);
- irq_exit();
-}
-
-/*
- * This initializes the IO-APIC and APIC hardware if this is
- * a UP kernel.
- */
-int __init APIC_init_uniprocessor (void)
-{
- if (enable_local_apic < 0)
- clear_bit(X86_FEATURE_APIC, boot_cpu_data.x86_capability);
-
- if (!smp_found_config && !cpu_has_apic)
- return -1;
-
- /*
- * Complain if the BIOS pretends there is one.
- */
- if (!cpu_has_apic && APIC_INTEGRATED(apic_version[boot_cpu_physical_apicid])) {
- printk(KERN_ERR "BIOS bug, local APIC #%d not detected!...\n",
- boot_cpu_physical_apicid);
- return -1;
- }
-
- verify_local_APIC();
-
- connect_bsp_APIC();
-
- phys_cpu_present_map = physid_mask_of_physid(boot_cpu_physical_apicid);
-
- setup_local_APIC();
-
-#ifdef CONFIG_X86_IO_APIC
- if (smp_found_config)
- if (!skip_ioapic_setup && nr_ioapics)
- setup_IO_APIC();
-#endif
- setup_boot_APIC_clock();
-
- return 0;
-}
diff --git a/arch/i386/kernel/apm.c b/arch/i386/kernel/apm.c
deleted file mode 100644
index 86e80c55147..00000000000
--- a/arch/i386/kernel/apm.c
+++ /dev/null
@@ -1,2423 +0,0 @@
-/* -*- linux-c -*-
- * APM BIOS driver for Linux
- * Copyright 1994-2001 Stephen Rothwell (sfr@canb.auug.org.au)
- *
- * Initial development of this driver was funded by NEC Australia P/L
- * and NEC Corporation
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License as published by the
- * Free Software Foundation; either version 2, or (at your option) any
- * later version.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License for more details.
- *
- * October 1995, Rik Faith (faith@cs.unc.edu):
- * Minor enhancements and updates (to the patch set) for 1.3.x
- * Documentation
- * January 1996, Rik Faith (faith@cs.unc.edu):
- * Make /proc/apm easy to format (bump driver version)
- * March 1996, Rik Faith (faith@cs.unc.edu):
- * Prohibit APM BIOS calls unless apm_enabled.
- * (Thanks to Ulrich Windl <Ulrich.Windl@rz.uni-regensburg.de>)
- * April 1996, Stephen Rothwell (sfr@canb.auug.org.au)
- * Version 1.0 and 1.1
- * May 1996, Version 1.2
- * Feb 1998, Version 1.3
- * Feb 1998, Version 1.4
- * Aug 1998, Version 1.5
- * Sep 1998, Version 1.6
- * Nov 1998, Version 1.7
- * Jan 1999, Version 1.8
- * Jan 1999, Version 1.9
- * Oct 1999, Version 1.10
- * Nov 1999, Version 1.11
- * Jan 2000, Version 1.12
- * Feb 2000, Version 1.13
- * Nov 2000, Version 1.14
- * Oct 2001, Version 1.15
- * Jan 2002, Version 1.16
- * Oct 2002, Version 1.16ac
- *
- * History:
- * 0.6b: first version in official kernel, Linux 1.3.46
- * 0.7: changed /proc/apm format, Linux 1.3.58
- * 0.8: fixed gcc 2.7.[12] compilation problems, Linux 1.3.59
- * 0.9: only call bios if bios is present, Linux 1.3.72
- * 1.0: use fixed device number, consolidate /proc/apm into this file,
- * Linux 1.3.85
- * 1.1: support user-space standby and suspend, power off after system
- * halted, Linux 1.3.98
- * 1.2: When resetting RTC after resume, take care so that the time
- * is only incorrect by 30-60mS (vs. 1S previously) (Gabor J. Toth
- * <jtoth@princeton.edu>); improve interaction between
- * screen-blanking and gpm (Stephen Rothwell); Linux 1.99.4
- * 1.2a:Simple change to stop mysterious bug reports with SMP also added
- * levels to the printk calls. APM is not defined for SMP machines.
- * The new replacment for it is, but Linux doesn't yet support this.
- * Alan Cox Linux 2.1.55
- * 1.3: Set up a valid data descriptor 0x40 for buggy BIOS's
- * 1.4: Upgraded to support APM 1.2. Integrated ThinkPad suspend patch by
- * Dean Gaudet <dgaudet@arctic.org>.
- * C. Scott Ananian <cananian@alumni.princeton.edu> Linux 2.1.87
- * 1.5: Fix segment register reloading (in case of bad segments saved
- * across BIOS call).
- * Stephen Rothwell
- * 1.6: Cope with complier/assembler differences.
- * Only try to turn off the first display device.
- * Fix OOPS at power off with no APM BIOS by Jan Echternach
- * <echter@informatik.uni-rostock.de>
- * Stephen Rothwell
- * 1.7: Modify driver's cached copy of the disabled/disengaged flags
- * to reflect current state of APM BIOS.
- * Chris Rankin <rankinc@bellsouth.net>
- * Reset interrupt 0 timer to 100Hz after suspend
- * Chad Miller <cmiller@surfsouth.com>
- * Add CONFIG_APM_IGNORE_SUSPEND_BOUNCE
- * Richard Gooch <rgooch@atnf.csiro.au>
- * Allow boot time disabling of APM
- * Make boot messages far less verbose by default
- * Make asm safer
- * Stephen Rothwell
- * 1.8: Add CONFIG_APM_RTC_IS_GMT
- * Richard Gooch <rgooch@atnf.csiro.au>
- * change APM_NOINTS to CONFIG_APM_ALLOW_INTS
- * remove dependency on CONFIG_PROC_FS
- * Stephen Rothwell
- * 1.9: Fix small typo. <laslo@wodip.opole.pl>
- * Try to cope with BIOS's that need to have all display
- * devices blanked and not just the first one.
- * Ross Paterson <ross@soi.city.ac.uk>
- * Fix segment limit setting it has always been wrong as
- * the segments needed to have byte granularity.
- * Mark a few things __init.
- * Add hack to allow power off of SMP systems by popular request.
- * Use CONFIG_SMP instead of __SMP__
- * Ignore BOUNCES for three seconds.
- * Stephen Rothwell
- * 1.10: Fix for Thinkpad return code.
- * Merge 2.2 and 2.3 drivers.
- * Remove APM dependencies in arch/i386/kernel/process.c
- * Remove APM dependencies in drivers/char/sysrq.c
- * Reset time across standby.
- * Allow more inititialisation on SMP.
- * Remove CONFIG_APM_POWER_OFF and make it boot time
- * configurable (default on).
- * Make debug only a boot time parameter (remove APM_DEBUG).
- * Try to blank all devices on any error.
- * 1.11: Remove APM dependencies in drivers/char/console.c
- * Check nr_running to detect if we are idle (from
- * Borislav Deianov <borislav@lix.polytechnique.fr>)
- * Fix for bioses that don't zero the top part of the
- * entrypoint offset (Mario Sitta <sitta@al.unipmn.it>)
- * (reported by Panos Katsaloulis <teras@writeme.com>).
- * Real mode power off patch (Walter Hofmann
- * <Walter.Hofmann@physik.stud.uni-erlangen.de>).
- * 1.12: Remove CONFIG_SMP as the compiler will optimize
- * the code away anyway (smp_num_cpus == 1 in UP)
- * noted by Artur Skawina <skawina@geocities.com>.
- * Make power off under SMP work again.
- * Fix thinko with initial engaging of BIOS.
- * Make sure power off only happens on CPU 0
- * (Paul "Rusty" Russell <rusty@rustcorp.com.au>).
- * Do error notification to user mode if BIOS calls fail.
- * Move entrypoint offset fix to ...boot/setup.S
- * where it belongs (Cosmos <gis88564@cis.nctu.edu.tw>).
- * Remove smp-power-off. SMP users must now specify
- * "apm=power-off" on the kernel command line. Suggested
- * by Jim Avera <jima@hal.com>, modified by Alan Cox
- * <alan@lxorguk.ukuu.org.uk>.
- * Register the /proc/apm entry even on SMP so that
- * scripts that check for it before doing power off
- * work (Jim Avera <jima@hal.com>).
- * 1.13: Changes for new pm_ interfaces (Andy Henroid
- * <andy_henroid@yahoo.com>).
- * Modularize the code.
- * Fix the Thinkpad (again) :-( (CONFIG_APM_IGNORE_MULTIPLE_SUSPENDS
- * is now the way life works).
- * Fix thinko in suspend() (wrong return).
- * Notify drivers on critical suspend.
- * Make kapmd absorb more idle time (Pavel Machek <pavel@suse.cz>
- * modified by sfr).
- * Disable interrupts while we are suspended (Andy Henroid
- * <andy_henroid@yahoo.com> fixed by sfr).
- * Make power off work on SMP again (Tony Hoyle
- * <tmh@magenta-logic.com> and <zlatko@iskon.hr>) modified by sfr.
- * Remove CONFIG_APM_SUSPEND_BOUNCE. The bounce ignore
- * interval is now configurable.
- * 1.14: Make connection version persist across module unload/load.
- * Enable and engage power management earlier.
- * Disengage power management on module unload.
- * Changed to use the sysrq-register hack for registering the
- * power off function called by magic sysrq based upon discussions
- * in irc://irc.openprojects.net/#kernelnewbies
- * (Crutcher Dunnavant <crutcher+kernel@datastacks.com>).
- * Make CONFIG_APM_REAL_MODE_POWER_OFF run time configurable.
- * (Arjan van de Ven <arjanv@redhat.com>) modified by sfr.
- * Work around byte swap bug in one of the Vaio's BIOS's
- * (Marc Boucher <marc@mbsi.ca>).
- * Exposed the disable flag to dmi so that we can handle known
- * broken APM (Alan Cox <alan@redhat.com>).
- * 1.14ac: If the BIOS says "I slowed the CPU down" then don't spin
- * calling it - instead idle. (Alan Cox <alan@redhat.com>)
- * If an APM idle fails log it and idle sensibly
- * 1.15: Don't queue events to clients who open the device O_WRONLY.
- * Don't expect replies from clients who open the device O_RDONLY.
- * (Idea from Thomas Hood)
- * Minor waitqueue cleanups. (John Fremlin <chief@bandits.org>)
- * 1.16: Fix idle calling. (Andreas Steinmetz <ast@domdv.de> et al.)
- * Notify listeners of standby or suspend events before notifying
- * drivers. Return EBUSY to ioctl() if suspend is rejected.
- * (Russell King <rmk@arm.linux.org.uk> and Thomas Hood)
- * Ignore first resume after we generate our own resume event
- * after a suspend (Thomas Hood)
- * Daemonize now gets rid of our controlling terminal (sfr).
- * CONFIG_APM_CPU_IDLE now just affects the default value of
- * idle_threshold (sfr).
- * Change name of kernel apm daemon (as it no longer idles) (sfr).
- * 1.16ac: Fix up SMP support somewhat. You can now force SMP on and we
- * make _all_ APM calls on the CPU#0. Fix unsafe sign bug.
- * TODO: determine if its "boot CPU" or "CPU0" we want to lock to.
- *
- * APM 1.1 Reference:
- *
- * Intel Corporation, Microsoft Corporation. Advanced Power Management
- * (APM) BIOS Interface Specification, Revision 1.1, September 1993.
- * Intel Order Number 241704-001. Microsoft Part Number 781-110-X01.
- *
- * [This document is available free from Intel by calling 800.628.8686 (fax
- * 916.356.6100) or 800.548.4725; or via anonymous ftp from
- * ftp://ftp.intel.com/pub/IAL/software_specs/apmv11.doc. It is also
- * available from Microsoft by calling 206.882.8080.]
- *
- * APM 1.2 Reference:
- * Intel Corporation, Microsoft Corporation. Advanced Power Management
- * (APM) BIOS Interface Specification, Revision 1.2, February 1996.
- *
- * [This document is available from Microsoft at:
- * http://www.microsoft.com/hwdev/busbios/amp_12.htm]
- */
-
-#include <linux/config.h>
-#include <linux/module.h>
-
-#include <linux/poll.h>
-#include <linux/types.h>
-#include <linux/stddef.h>
-#include <linux/timer.h>
-#include <linux/fcntl.h>
-#include <linux/slab.h>
-#include <linux/stat.h>
-#include <linux/proc_fs.h>
-#include <linux/miscdevice.h>
-#include <linux/apm_bios.h>
-#include <linux/init.h>
-#include <linux/time.h>
-#include <linux/sched.h>
-#include <linux/pm.h>
-#include <linux/device.h>
-#include <linux/kernel.h>
-#include <linux/smp.h>
-#include <linux/smp_lock.h>
-#include <linux/dmi.h>
-#include <linux/suspend.h>
-
-#include <asm/system.h>
-#include <asm/uaccess.h>
-#include <asm/desc.h>
-#include <asm/i8253.h>
-
-#include "io_ports.h"
-
-extern unsigned long get_cmos_time(void);
-extern void machine_real_restart(unsigned char *, int);
-
-#if defined(CONFIG_APM_DISPLAY_BLANK) && defined(CONFIG_VT)
-extern int (*console_blank_hook)(int);
-#endif
-
-/*
- * The apm_bios device is one of the misc char devices.
- * This is its minor number.
- */
-#define APM_MINOR_DEV 134
-
-/*
- * See Documentation/Config.help for the configuration options.
- *
- * Various options can be changed at boot time as follows:
- * (We allow underscores for compatibility with the modules code)
- * apm=on/off enable/disable APM
- * [no-]allow[-_]ints allow interrupts during BIOS calls
- * [no-]broken[-_]psr BIOS has a broken GetPowerStatus call
- * [no-]realmode[-_]power[-_]off switch to real mode before
- * powering off
- * [no-]debug log some debugging messages
- * [no-]power[-_]off power off on shutdown
- * [no-]smp Use apm even on an SMP box
- * bounce[-_]interval=<n> number of ticks to ignore suspend
- * bounces
- * idle[-_]threshold=<n> System idle percentage above which to
- * make APM BIOS idle calls. Set it to
- * 100 to disable.
- * idle[-_]period=<n> Period (in 1/100s of a second) over
- * which the idle percentage is
- * calculated.
- */
-
-/* KNOWN PROBLEM MACHINES:
- *
- * U: TI 4000M TravelMate: BIOS is *NOT* APM compliant
- * [Confirmed by TI representative]
- * ?: ACER 486DX4/75: uses dseg 0040, in violation of APM specification
- * [Confirmed by BIOS disassembly]
- * [This may work now ...]
- * P: Toshiba 1950S: battery life information only gets updated after resume
- * P: Midwest Micro Soundbook Elite DX2/66 monochrome: screen blanking
- * broken in BIOS [Reported by Garst R. Reese <reese@isn.net>]
- * ?: AcerNote-950: oops on reading /proc/apm - workaround is a WIP
- * Neale Banks <neale@lowendale.com.au> December 2000
- *
- * Legend: U = unusable with APM patches
- * P = partially usable with APM patches
- */
-
-/*
- * Define as 1 to make the driver always call the APM BIOS busy
- * routine even if the clock was not reported as slowed by the
- * idle routine. Otherwise, define as 0.
- */
-#define ALWAYS_CALL_BUSY 1
-
-/*
- * Define to make the APM BIOS calls zero all data segment registers (so
- * that an incorrect BIOS implementation will cause a kernel panic if it
- * tries to write to arbitrary memory).
- */
-#define APM_ZERO_SEGS
-
-#include "apm.h"
-
-/*
- * Define to make all _set_limit calls use 64k limits. The APM 1.1 BIOS is
- * supposed to provide limit information that it recognizes. Many machines
- * do this correctly, but many others do not restrict themselves to their
- * claimed limit. When this happens, they will cause a segmentation
- * violation in the kernel at boot time. Most BIOS's, however, will
- * respect a 64k limit, so we use that. If you want to be pedantic and
- * hold your BIOS to its claims, then undefine this.
- */
-#define APM_RELAX_SEGMENTS
-
-/*
- * Define to re-initialize the interrupt 0 timer to 100 Hz after a suspend.
- * This patched by Chad Miller <cmiller@surfsouth.com>, original code by
- * David Chen <chen@ctpa04.mit.edu>
- */
-#undef INIT_TIMER_AFTER_SUSPEND
-
-#ifdef INIT_TIMER_AFTER_SUSPEND
-#include <linux/timex.h>
-#include <asm/io.h>
-#include <linux/delay.h>
-#endif
-
-/*
- * Need to poll the APM BIOS every second
- */
-#define APM_CHECK_TIMEOUT (HZ)
-
-/*
- * Ignore suspend events for this amount of time after a resume
- */
-#define DEFAULT_BOUNCE_INTERVAL (3 * HZ)
-
-/*
- * Maximum number of events stored
- */
-#define APM_MAX_EVENTS 20
-
-/*
- * The per-file APM data
- */
-struct apm_user {
- int magic;
- struct apm_user * next;
- unsigned int suser: 1;
- unsigned int writer: 1;
- unsigned int reader: 1;
- unsigned int suspend_wait: 1;
- int suspend_result;
- int suspends_pending;
- int standbys_pending;
- int suspends_read;
- int standbys_read;
- int event_head;
- int event_tail;
- apm_event_t events[APM_MAX_EVENTS];
-};
-
-/*
- * The magic number in apm_user
- */
-#define APM_BIOS_MAGIC 0x4101
-
-/*
- * idle percentage above which bios idle calls are done
- */
-#ifdef CONFIG_APM_CPU_IDLE
-#define DEFAULT_IDLE_THRESHOLD 95
-#else
-#define DEFAULT_IDLE_THRESHOLD 100
-#endif
-#define DEFAULT_IDLE_PERIOD (100 / 3)
-
-/*
- * Local variables
- */
-static struct {
- unsigned long offset;
- unsigned short segment;
-} apm_bios_entry;
-static int clock_slowed;
-static int idle_threshold = DEFAULT_IDLE_THRESHOLD;
-static int idle_period = DEFAULT_IDLE_PERIOD;
-static int set_pm_idle;
-static int suspends_pending;
-static int standbys_pending;
-static int ignore_sys_suspend;
-static int ignore_normal_resume;
-static int bounce_interval = DEFAULT_BOUNCE_INTERVAL;
-
-#ifdef CONFIG_APM_RTC_IS_GMT
-# define clock_cmos_diff 0
-# define got_clock_diff 1
-#else
-static long clock_cmos_diff;
-static int got_clock_diff;
-#endif
-static int debug;
-static int smp;
-static int apm_disabled = -1;
-#ifdef CONFIG_SMP
-static int power_off;
-#else
-static int power_off = 1;
-#endif
-#ifdef CONFIG_APM_REAL_MODE_POWER_OFF
-static int realmode_power_off = 1;
-#else
-static int realmode_power_off;
-#endif
-static int exit_kapmd;
-static int kapmd_running;
-#ifdef CONFIG_APM_ALLOW_INTS
-static int allow_ints = 1;
-#else
-static int allow_ints;
-#endif
-static int broken_psr;
-
-static DECLARE_WAIT_QUEUE_HEAD(apm_waitqueue);
-static DECLARE_WAIT_QUEUE_HEAD(apm_suspend_waitqueue);
-static struct apm_user * user_list;
-static DEFINE_SPINLOCK(user_list_lock);
-static struct desc_struct bad_bios_desc = { 0, 0x00409200 };
-
-static char driver_version[] = "1.16ac"; /* no spaces */
-
-/*
- * APM event names taken from the APM 1.2 specification. These are
- * the message codes that the BIOS uses to tell us about events
- */
-static char * apm_event_name[] = {
- "system standby",
- "system suspend",
- "normal resume",
- "critical resume",
- "low battery",
- "power status change",
- "update time",
- "critical suspend",
- "user standby",
- "user suspend",
- "system standby resume",
- "capabilities change"
-};
-#define NR_APM_EVENT_NAME ARRAY_SIZE(apm_event_name)
-
-typedef struct lookup_t {
- int key;
- char * msg;
-} lookup_t;
-
-/*
- * The BIOS returns a set of standard error codes in AX when the
- * carry flag is set.
- */
-
-static const lookup_t error_table[] = {
-/* N/A { APM_SUCCESS, "Operation succeeded" }, */
- { APM_DISABLED, "Power management disabled" },
- { APM_CONNECTED, "Real mode interface already connected" },
- { APM_NOT_CONNECTED, "Interface not connected" },
- { APM_16_CONNECTED, "16 bit interface already connected" },
-/* N/A { APM_16_UNSUPPORTED, "16 bit interface not supported" }, */
- { APM_32_CONNECTED, "32 bit interface already connected" },
- { APM_32_UNSUPPORTED, "32 bit interface not supported" },
- { APM_BAD_DEVICE, "Unrecognized device ID" },
- { APM_BAD_PARAM, "Parameter out of range" },
- { APM_NOT_ENGAGED, "Interface not engaged" },
- { APM_BAD_FUNCTION, "Function not supported" },
- { APM_RESUME_DISABLED, "Resume timer disabled" },
- { APM_BAD_STATE, "Unable to enter requested state" },
-/* N/A { APM_NO_EVENTS, "No events pending" }, */
- { APM_NO_ERROR, "BIOS did not set a return code" },
- { APM_NOT_PRESENT, "No APM present" }
-};
-#define ERROR_COUNT ARRAY_SIZE(error_table)
-
-/**
- * apm_error - display an APM error
- * @str: information string
- * @err: APM BIOS return code
- *
- * Write a meaningful log entry to the kernel log in the event of
- * an APM error.
- */
-
-static void apm_error(char *str, int err)
-{
- int i;
-
- for (i = 0; i < ERROR_COUNT; i++)
- if (error_table[i].key == err) break;
- if (i < ERROR_COUNT)
- printk(KERN_NOTICE "apm: %s: %s\n", str, error_table[i].msg);
- else
- printk(KERN_NOTICE "apm: %s: unknown error code %#2.2x\n",
- str, err);
-}
-
-/*
- * Lock APM functionality to physical CPU 0
- */
-
-#ifdef CONFIG_SMP
-
-static cpumask_t apm_save_cpus(void)
-{
- cpumask_t x = current->cpus_allowed;
- /* Some bioses don't like being called from CPU != 0 */
- set_cpus_allowed(current, cpumask_of_cpu(0));
- BUG_ON(smp_processor_id() != 0);
- return x;
-}
-
-static inline void apm_restore_cpus(cpumask_t mask)
-{
- set_cpus_allowed(current, mask);
-}
-
-#else
-
-/*
- * No CPU lockdown needed on a uniprocessor
- */
-
-#define apm_save_cpus() (current->cpus_allowed)
-#define apm_restore_cpus(x) (void)(x)
-
-#endif
-
-/*
- * These are the actual BIOS calls. Depending on APM_ZERO_SEGS and
- * apm_info.allow_ints, we are being really paranoid here! Not only
- * are interrupts disabled, but all the segment registers (except SS)
- * are saved and zeroed this means that if the BIOS tries to reference
- * any data without explicitly loading the segment registers, the kernel
- * will fault immediately rather than have some unforeseen circumstances
- * for the rest of the kernel. And it will be very obvious! :-) Doing
- * this depends on CS referring to the same physical memory as DS so that
- * DS can be zeroed before the call. Unfortunately, we can't do anything
- * about the stack segment/pointer. Also, we tell the compiler that
- * everything could change.
- *
- * Also, we KNOW that for the non error case of apm_bios_call, there
- * is no useful data returned in the low order 8 bits of eax.
- */
-#define APM_DO_CLI \
- if (apm_info.allow_ints) \
- local_irq_enable(); \
- else \
- local_irq_disable();
-
-#ifdef APM_ZERO_SEGS
-# define APM_DECL_SEGS \
- unsigned int saved_fs; unsigned int saved_gs;
-# define APM_DO_SAVE_SEGS \
- savesegment(fs, saved_fs); savesegment(gs, saved_gs)
-# define APM_DO_RESTORE_SEGS \
- loadsegment(fs, saved_fs); loadsegment(gs, saved_gs)
-#else
-# define APM_DECL_SEGS
-# define APM_DO_SAVE_SEGS
-# define APM_DO_RESTORE_SEGS
-#endif
-
-/**
- * apm_bios_call - Make an APM BIOS 32bit call
- * @func: APM function to execute
- * @ebx_in: EBX register for call entry
- * @ecx_in: ECX register for call entry
- * @eax: EAX register return
- * @ebx: EBX register return
- * @ecx: ECX register return
- * @edx: EDX register return
- * @esi: ESI register return
- *
- * Make an APM call using the 32bit protected mode interface. The
- * caller is responsible for knowing if APM BIOS is configured and
- * enabled. This call can disable interrupts for a long period of
- * time on some laptops. The return value is in AH and the carry
- * flag is loaded into AL. If there is an error, then the error
- * code is returned in AH (bits 8-15 of eax) and this function
- * returns non-zero.
- */
-
-static u8 apm_bios_call(u32 func, u32 ebx_in, u32 ecx_in,
- u32 *eax, u32 *ebx, u32 *ecx, u32 *edx, u32 *esi)
-{
- APM_DECL_SEGS
- unsigned long flags;
- cpumask_t cpus;
- int cpu;
- struct desc_struct save_desc_40;
- struct desc_struct *gdt;
-
- cpus = apm_save_cpus();
-
- cpu = get_cpu();
- gdt = get_cpu_gdt_table(cpu);
- save_desc_40 = gdt[0x40 / 8];
- gdt[0x40 / 8] = bad_bios_desc;
-
- local_save_flags(flags);
- APM_DO_CLI;
- APM_DO_SAVE_SEGS;
- apm_bios_call_asm(func, ebx_in, ecx_in, eax, ebx, ecx, edx, esi);
- APM_DO_RESTORE_SEGS;
- local_irq_restore(flags);
- gdt[0x40 / 8] = save_desc_40;
- put_cpu();
- apm_restore_cpus(cpus);
-
- return *eax & 0xff;
-}
-
-/**
- * apm_bios_call_simple - make a simple APM BIOS 32bit call
- * @func: APM function to invoke
- * @ebx_in: EBX register value for BIOS call
- * @ecx_in: ECX register value for BIOS call
- * @eax: EAX register on return from the BIOS call
- *
- * Make a BIOS call that does only returns one value, or just status.
- * If there is an error, then the error code is returned in AH
- * (bits 8-15 of eax) and this function returns non-zero. This is
- * used for simpler BIOS operations. This call may hold interrupts
- * off for a long time on some laptops.
- */
-
-static u8 apm_bios_call_simple(u32 func, u32 ebx_in, u32 ecx_in, u32 *eax)
-{
- u8 error;
- APM_DECL_SEGS
- unsigned long flags;
- cpumask_t cpus;
- int cpu;
- struct desc_struct save_desc_40;
- struct desc_struct *gdt;
-
- cpus = apm_save_cpus();
-
- cpu = get_cpu();
- gdt = get_cpu_gdt_table(cpu);
- save_desc_40 = gdt[0x40 / 8];
- gdt[0x40 / 8] = bad_bios_desc;
-
- local_save_flags(flags);
- APM_DO_CLI;
- APM_DO_SAVE_SEGS;
- error = apm_bios_call_simple_asm(func, ebx_in, ecx_in, eax);
- APM_DO_RESTORE_SEGS;
- local_irq_restore(flags);
- gdt[0x40 / 8] = save_desc_40;
- put_cpu();
- apm_restore_cpus(cpus);
- return error;
-}
-
-/**
- * apm_driver_version - APM driver version
- * @val: loaded with the APM version on return
- *
- * Retrieve the APM version supported by the BIOS. This is only
- * supported for APM 1.1 or higher. An error indicates APM 1.0 is
- * probably present.
- *
- * On entry val should point to a value indicating the APM driver
- * version with the high byte being the major and the low byte the
- * minor number both in BCD
- *
- * On return it will hold the BIOS revision supported in the
- * same format.
- */
-
-static int apm_driver_version(u_short *val)
-{
- u32 eax;
-
- if (apm_bios_call_simple(APM_FUNC_VERSION, 0, *val, &eax))
- return (eax >> 8) & 0xff;
- *val = eax;
- return APM_SUCCESS;
-}
-
-/**
- * apm_get_event - get an APM event from the BIOS
- * @event: pointer to the event
- * @info: point to the event information
- *
- * The APM BIOS provides a polled information for event
- * reporting. The BIOS expects to be polled at least every second
- * when events are pending. When a message is found the caller should
- * poll until no more messages are present. However, this causes
- * problems on some laptops where a suspend event notification is
- * not cleared until it is acknowledged.
- *
- * Additional information is returned in the info pointer, providing
- * that APM 1.2 is in use. If no messges are pending the value 0x80
- * is returned (No power management events pending).
- */
-
-static int apm_get_event(apm_event_t *event, apm_eventinfo_t *info)
-{
- u32 eax;
- u32 ebx;
- u32 ecx;
- u32 dummy;
-
- if (apm_bios_call(APM_FUNC_GET_EVENT, 0, 0, &eax, &ebx, &ecx,
- &dummy, &dummy))
- return (eax >> 8) & 0xff;
- *event = ebx;
- if (apm_info.connection_version < 0x0102)
- *info = ~0; /* indicate info not valid */
- else
- *info = ecx;
- return APM_SUCCESS;
-}
-
-/**
- * set_power_state - set the power management state
- * @what: which items to transition
- * @state: state to transition to
- *
- * Request an APM change of state for one or more system devices. The
- * processor state must be transitioned last of all. what holds the
- * class of device in the upper byte and the device number (0xFF for
- * all) for the object to be transitioned.
- *
- * The state holds the state to transition to, which may in fact
- * be an acceptance of a BIOS requested state change.
- */
-
-static int set_power_state(u_short what, u_short state)
-{
- u32 eax;
-
- if (apm_bios_call_simple(APM_FUNC_SET_STATE, what, state, &eax))
- return (eax >> 8) & 0xff;
- return APM_SUCCESS;
-}
-
-/**
- * set_system_power_state - set system wide power state
- * @state: which state to enter
- *
- * Transition the entire system into a new APM power state.
- */
-
-static int set_system_power_state(u_short state)
-{
- return set_power_state(APM_DEVICE_ALL, state);
-}
-
-/**
- * apm_do_idle - perform power saving
- *
- * This function notifies the BIOS that the processor is (in the view
- * of the OS) idle. It returns -1 in the event that the BIOS refuses
- * to handle the idle request. On a success the function returns 1
- * if the BIOS did clock slowing or 0 otherwise.
- */
-
-static int apm_do_idle(void)
-{
- u32 eax;
-
- if (apm_bios_call_simple(APM_FUNC_IDLE, 0, 0, &eax)) {
- static unsigned long t;
-
- /* This always fails on some SMP boards running UP kernels.
- * Only report the failure the first 5 times.
- */
- if (++t < 5)
- {
- printk(KERN_DEBUG "apm_do_idle failed (%d)\n",
- (eax >> 8) & 0xff);
- t = jiffies;
- }
- return -1;
- }
- clock_slowed = (apm_info.bios.flags & APM_IDLE_SLOWS_CLOCK) != 0;
- return clock_slowed;
-}
-
-/**
- * apm_do_busy - inform the BIOS the CPU is busy
- *
- * Request that the BIOS brings the CPU back to full performance.
- */
-
-static void apm_do_busy(void)
-{
- u32 dummy;
-
- if (clock_slowed || ALWAYS_CALL_BUSY) {
- (void) apm_bios_call_simple(APM_FUNC_BUSY, 0, 0, &dummy);
- clock_slowed = 0;
- }
-}
-
-/*
- * If no process has really been interested in
- * the CPU for some time, we want to call BIOS
- * power management - we probably want
- * to conserve power.
- */
-#define IDLE_CALC_LIMIT (HZ * 100)
-#define IDLE_LEAKY_MAX 16
-
-static void (*original_pm_idle)(void);
-
-extern void default_idle(void);
-
-/**
- * apm_cpu_idle - cpu idling for APM capable Linux
- *
- * This is the idling function the kernel executes when APM is available. It
- * tries to do BIOS powermanagement based on the average system idle time.
- * Furthermore it calls the system default idle routine.
- */
-
-static void apm_cpu_idle(void)
-{
- static int use_apm_idle; /* = 0 */
- static unsigned int last_jiffies; /* = 0 */
- static unsigned int last_stime; /* = 0 */
-
- int apm_idle_done = 0;
- unsigned int jiffies_since_last_check = jiffies - last_jiffies;
- unsigned int bucket;
-
-recalc:
- if (jiffies_since_last_check > IDLE_CALC_LIMIT) {
- use_apm_idle = 0;
- last_jiffies = jiffies;
- last_stime = current->stime;
- } else if (jiffies_since_last_check > idle_period) {
- unsigned int idle_percentage;
-
- idle_percentage = current->stime - last_stime;
- idle_percentage *= 100;
- idle_percentage /= jiffies_since_last_check;
- use_apm_idle = (idle_percentage > idle_threshold);
- if (apm_info.forbid_idle)
- use_apm_idle = 0;
- last_jiffies = jiffies;
- last_stime = current->stime;
- }
-
- bucket = IDLE_LEAKY_MAX;
-
- while (!need_resched()) {
- if (use_apm_idle) {
- unsigned int t;
-
- t = jiffies;
- switch (apm_do_idle()) {
- case 0: apm_idle_done = 1;
- if (t != jiffies) {
- if (bucket) {
- bucket = IDLE_LEAKY_MAX;
- continue;
- }
- } else if (bucket) {
- bucket--;
- continue;
- }
- break;
- case 1: apm_idle_done = 1;
- break;
- default: /* BIOS refused */
- break;
- }
- }
- if (original_pm_idle)
- original_pm_idle();
- else
- default_idle();
- jiffies_since_last_check = jiffies - last_jiffies;
- if (jiffies_since_last_check > idle_period)
- goto recalc;
- }
-
- if (apm_idle_done)
- apm_do_busy();
-}
-
-/**
- * apm_power_off - ask the BIOS to power off
- *
- * Handle the power off sequence. This is the one piece of code we
- * will execute even on SMP machines. In order to deal with BIOS
- * bugs we support real mode APM BIOS power off calls. We also make
- * the SMP call on CPU0 as some systems will only honour this call
- * on their first cpu.
- */
-
-static void apm_power_off(void)
-{
- unsigned char po_bios_call[] = {
- 0xb8, 0x00, 0x10, /* movw $0x1000,ax */
- 0x8e, 0xd0, /* movw ax,ss */
- 0xbc, 0x00, 0xf0, /* movw $0xf000,sp */
- 0xb8, 0x07, 0x53, /* movw $0x5307,ax */
- 0xbb, 0x01, 0x00, /* movw $0x0001,bx */
- 0xb9, 0x03, 0x00, /* movw $0x0003,cx */
- 0xcd, 0x15 /* int $0x15 */
- };
-
- /* Some bioses don't like being called from CPU != 0 */
- if (apm_info.realmode_power_off)
- {
- (void)apm_save_cpus();
- machine_real_restart(po_bios_call, sizeof(po_bios_call));
- }
- else
- (void) set_system_power_state(APM_STATE_OFF);
-}
-
-#ifdef CONFIG_APM_DO_ENABLE
-
-/**
- * apm_enable_power_management - enable BIOS APM power management
- * @enable: enable yes/no
- *
- * Enable or disable the APM BIOS power services.
- */
-
-static int apm_enable_power_management(int enable)
-{
- u32 eax;
-
- if ((enable == 0) && (apm_info.bios.flags & APM_BIOS_DISENGAGED))
- return APM_NOT_ENGAGED;
- if (apm_bios_call_simple(APM_FUNC_ENABLE_PM, APM_DEVICE_BALL,
- enable, &eax))
- return (eax >> 8) & 0xff;
- if (enable)
- apm_info.bios.flags &= ~APM_BIOS_DISABLED;
- else
- apm_info.bios.flags |= APM_BIOS_DISABLED;
- return APM_SUCCESS;
-}
-#endif
-
-/**
- * apm_get_power_status - get current power state
- * @status: returned status
- * @bat: battery info
- * @life: estimated life
- *
- * Obtain the current power status from the APM BIOS. We return a
- * status which gives the rough battery status, and current power
- * source. The bat value returned give an estimate as a percentage
- * of life and a status value for the battery. The estimated life
- * if reported is a lifetime in secodnds/minutes at current powwer
- * consumption.
- */
-
-static int apm_get_power_status(u_short *status, u_short *bat, u_short *life)
-{
- u32 eax;
- u32 ebx;
- u32 ecx;
- u32 edx;
- u32 dummy;
-
- if (apm_info.get_power_status_broken)
- return APM_32_UNSUPPORTED;
- if (apm_bios_call(APM_FUNC_GET_STATUS, APM_DEVICE_ALL, 0,
- &eax, &ebx, &ecx, &edx, &dummy))
- return (eax >> 8) & 0xff;
- *status = ebx;
- *bat = ecx;
- if (apm_info.get_power_status_swabinminutes) {
- *life = swab16((u16)edx);
- *life |= 0x8000;
- } else
- *life = edx;
- return APM_SUCCESS;
-}
-
-#if 0
-static int apm_get_battery_status(u_short which, u_short *status,
- u_short *bat, u_short *life, u_short *nbat)
-{
- u32 eax;
- u32 ebx;
- u32 ecx;
- u32 edx;
- u32 esi;
-
- if (apm_info.connection_version < 0x0102) {
- /* pretend we only have one battery. */
- if (which != 1)
- return APM_BAD_DEVICE;
- *nbat = 1;
- return apm_get_power_status(status, bat, life);
- }
-
- if (apm_bios_call(APM_FUNC_GET_STATUS, (0x8000 | (which)), 0, &eax,
- &ebx, &ecx, &edx, &esi))
- return (eax >> 8) & 0xff;
- *status = ebx;
- *bat = ecx;
- *life = edx;
- *nbat = esi;
- return APM_SUCCESS;
-}
-#endif
-
-/**
- * apm_engage_power_management - enable PM on a device
- * @device: identity of device
- * @enable: on/off
- *
- * Activate or deactive power management on either a specific device
- * or the entire system (%APM_DEVICE_ALL).
- */
-
-static int apm_engage_power_management(u_short device, int enable)
-{
- u32 eax;
-
- if ((enable == 0) && (device == APM_DEVICE_ALL)
- && (apm_info.bios.flags & APM_BIOS_DISABLED))
- return APM_DISABLED;
- if (apm_bios_call_simple(APM_FUNC_ENGAGE_PM, device, enable, &eax))
- return (eax >> 8) & 0xff;
- if (device == APM_DEVICE_ALL) {
- if (enable)
- apm_info.bios.flags &= ~APM_BIOS_DISENGAGED;
- else
- apm_info.bios.flags |= APM_BIOS_DISENGAGED;
- }
- return APM_SUCCESS;
-}
-
-#if defined(CONFIG_APM_DISPLAY_BLANK) && defined(CONFIG_VT)
-
-/**
- * apm_console_blank - blank the display
- * @blank: on/off
- *
- * Attempt to blank the console, firstly by blanking just video device
- * zero, and if that fails (some BIOSes don't support it) then it blanks
- * all video devices. Typically the BIOS will do laptop backlight and
- * monitor powerdown for us.
- */
-
-static int apm_console_blank(int blank)
-{
- int error;
- u_short state;
-
- state = blank ? APM_STATE_STANDBY : APM_STATE_READY;
- /* Blank the first display device */
- error = set_power_state(0x100, state);
- if ((error != APM_SUCCESS) && (error != APM_NO_ERROR)) {
- /* try to blank them all instead */
- error = set_power_state(0x1ff, state);
- if ((error != APM_SUCCESS) && (error != APM_NO_ERROR))
- /* try to blank device one instead */
- error = set_power_state(0x101, state);
- }
- if ((error == APM_SUCCESS) || (error == APM_NO_ERROR))
- return 1;
- if (error == APM_NOT_ENGAGED) {
- static int tried;
- int eng_error;
- if (tried++ == 0) {
- eng_error = apm_engage_power_management(APM_DEVICE_ALL, 1);
- if (eng_error) {
- apm_error("set display", error);
- apm_error("engage interface", eng_error);
- return 0;
- } else
- return apm_console_blank(blank);
- }
- }
- apm_error("set display", error);
- return 0;
-}
-#endif
-
-static int queue_empty(struct apm_user *as)
-{
- return as->event_head == as->event_tail;
-}
-
-static apm_event_t get_queued_event(struct apm_user *as)
-{
- as->event_tail = (as->event_tail + 1) % APM_MAX_EVENTS;
- return as->events[as->event_tail];
-}
-
-static void queue_event(apm_event_t event, struct apm_user *sender)
-{
- struct apm_user * as;
-
- spin_lock(&user_list_lock);
- if (user_list == NULL)
- goto out;
- for (as = user_list; as != NULL; as = as->next) {
- if ((as == sender) || (!as->reader))
- continue;
- as->event_head = (as->event_head + 1) % APM_MAX_EVENTS;
- if (as->event_head == as->event_tail) {
- static int notified;
-
- if (notified++ == 0)
- printk(KERN_ERR "apm: an event queue overflowed\n");
- as->event_tail = (as->event_tail + 1) % APM_MAX_EVENTS;
- }
- as->events[as->event_head] = event;
- if ((!as->suser) || (!as->writer))
- continue;
- switch (event) {
- case APM_SYS_SUSPEND:
- case APM_USER_SUSPEND:
- as->suspends_pending++;
- suspends_pending++;
- break;
-
- case APM_SYS_STANDBY:
- case APM_USER_STANDBY:
- as->standbys_pending++;
- standbys_pending++;
- break;
- }
- }
- wake_up_interruptible(&apm_waitqueue);
-out:
- spin_unlock(&user_list_lock);
-}
-
-static void set_time(void)
-{
- if (got_clock_diff) { /* Must know time zone in order to set clock */
- xtime.tv_sec = get_cmos_time() + clock_cmos_diff;
- xtime.tv_nsec = 0;
- }
-}
-
-static void get_time_diff(void)
-{
-#ifndef CONFIG_APM_RTC_IS_GMT
- /*
- * Estimate time zone so that set_time can update the clock
- */
- clock_cmos_diff = -get_cmos_time();
- clock_cmos_diff += get_seconds();
- got_clock_diff = 1;
-#endif
-}
-
-static void reinit_timer(void)
-{
-#ifdef INIT_TIMER_AFTER_SUSPEND
- unsigned long flags;
-
- spin_lock_irqsave(&i8253_lock, flags);
- /* set the clock to 100 Hz */
- outb_p(0x34, PIT_MODE); /* binary, mode 2, LSB/MSB, ch 0 */
- udelay(10);
- outb_p(LATCH & 0xff, PIT_CH0); /* LSB */
- udelay(10);
- outb(LATCH >> 8, PIT_CH0); /* MSB */
- udelay(10);
- spin_unlock_irqrestore(&i8253_lock, flags);
-#endif
-}
-
-static int suspend(int vetoable)
-{
- int err;
- struct apm_user *as;
-
- if (pm_send_all(PM_SUSPEND, (void *)3)) {
- /* Vetoed */
- if (vetoable) {
- if (apm_info.connection_version > 0x100)
- set_system_power_state(APM_STATE_REJECT);
- err = -EBUSY;
- ignore_sys_suspend = 0;
- printk(KERN_WARNING "apm: suspend was vetoed.\n");
- goto out;
- }
- printk(KERN_CRIT "apm: suspend was vetoed, but suspending anyway.\n");
- }
-
- device_suspend(PMSG_SUSPEND);
- local_irq_disable();
- device_power_down(PMSG_SUSPEND);
-
- /* serialize with the timer interrupt */
- write_seqlock(&xtime_lock);
-
- /* protect against access to timer chip registers */
- spin_lock(&i8253_lock);
-
- get_time_diff();
- /*
- * Irq spinlock must be dropped around set_system_power_state.
- * We'll undo any timer changes due to interrupts below.
- */
- spin_unlock(&i8253_lock);
- write_sequnlock(&xtime_lock);
- local_irq_enable();
-
- save_processor_state();
- err = set_system_power_state(APM_STATE_SUSPEND);
- ignore_normal_resume = 1;
- restore_processor_state();
-
- local_irq_disable();
- write_seqlock(&xtime_lock);
- spin_lock(&i8253_lock);
- reinit_timer();
- set_time();
-
- spin_unlock(&i8253_lock);
- write_sequnlock(&xtime_lock);
-
- if (err == APM_NO_ERROR)
- err = APM_SUCCESS;
- if (err != APM_SUCCESS)
- apm_error("suspend", err);
- err = (err == APM_SUCCESS) ? 0 : -EIO;
- device_power_up();
- local_irq_enable();
- device_resume();
- pm_send_all(PM_RESUME, (void *)0);
- queue_event(APM_NORMAL_RESUME, NULL);
- out:
- spin_lock(&user_list_lock);
- for (as = user_list; as != NULL; as = as->next) {
- as->suspend_wait = 0;
- as->suspend_result = err;
- }
- spin_unlock(&user_list_lock);
- wake_up_interruptible(&apm_suspend_waitqueue);
- return err;
-}
-
-static void standby(void)
-{
- int err;
-
- local_irq_disable();
- device_power_down(PMSG_SUSPEND);
- /* serialize with the timer interrupt */
- write_seqlock(&xtime_lock);
- /* If needed, notify drivers here */
- get_time_diff();
- write_sequnlock(&xtime_lock);
- local_irq_enable();
-
- err = set_system_power_state(APM_STATE_STANDBY);
- if ((err != APM_SUCCESS) && (err != APM_NO_ERROR))
- apm_error("standby", err);
-
- local_irq_disable();
- device_power_up();
- local_irq_enable();
-}
-
-static apm_event_t get_event(void)
-{
- int error;
- apm_event_t event;
- apm_eventinfo_t info;
-
- static int notified;
-
- /* we don't use the eventinfo */
- error = apm_get_event(&event, &info);
- if (error == APM_SUCCESS)
- return event;
-
- if ((error != APM_NO_EVENTS) && (notified++ == 0))
- apm_error("get_event", error);
-
- return 0;
-}
-
-static void check_events(void)
-{
- apm_event_t event;
- static unsigned long last_resume;
- static int ignore_bounce;
-
- while ((event = get_event()) != 0) {
- if (debug) {
- if (event <= NR_APM_EVENT_NAME)
- printk(KERN_DEBUG "apm: received %s notify\n",
- apm_event_name[event - 1]);
- else
- printk(KERN_DEBUG "apm: received unknown "
- "event 0x%02x\n", event);
- }
- if (ignore_bounce
- && ((jiffies - last_resume) > bounce_interval))
- ignore_bounce = 0;
-
- switch (event) {
- case APM_SYS_STANDBY:
- case APM_USER_STANDBY:
- queue_event(event, NULL);
- if (standbys_pending <= 0)
- standby();
- break;
-
- case APM_USER_SUSPEND:
-#ifdef CONFIG_APM_IGNORE_USER_SUSPEND
- if (apm_info.connection_version > 0x100)
- set_system_power_state(APM_STATE_REJECT);
- break;
-#endif
- case APM_SYS_SUSPEND:
- if (ignore_bounce) {
- if (apm_info.connection_version > 0x100)
- set_system_power_state(APM_STATE_REJECT);
- break;
- }
- /*
- * If we are already processing a SUSPEND,
- * then further SUSPEND events from the BIOS
- * will be ignored. We also return here to
- * cope with the fact that the Thinkpads keep
- * sending a SUSPEND event until something else
- * happens!
- */
- if (ignore_sys_suspend)
- return;
- ignore_sys_suspend = 1;
- queue_event(event, NULL);
- if (suspends_pending <= 0)
- (void) suspend(1);
- break;
-
- case APM_NORMAL_RESUME:
- case APM_CRITICAL_RESUME:
- case APM_STANDBY_RESUME:
- ignore_sys_suspend = 0;
- last_resume = jiffies;
- ignore_bounce = 1;
- if ((event != APM_NORMAL_RESUME)
- || (ignore_normal_resume == 0)) {
- write_seqlock_irq(&xtime_lock);
- set_time();
- write_sequnlock_irq(&xtime_lock);
- device_resume();
- pm_send_all(PM_RESUME, (void *)0);
- queue_event(event, NULL);
- }
- ignore_normal_resume = 0;
- break;
-
- case APM_CAPABILITY_CHANGE:
- case APM_LOW_BATTERY:
- case APM_POWER_STATUS_CHANGE:
- queue_event(event, NULL);
- /* If needed, notify drivers here */
- break;
-
- case APM_UPDATE_TIME:
- write_seqlock_irq(&xtime_lock);
- set_time();
- write_sequnlock_irq(&xtime_lock);
- break;
-
- case APM_CRITICAL_SUSPEND:
- /*
- * We are not allowed to reject a critical suspend.
- */
- (void) suspend(0);
- break;
- }
- }
-}
-
-static void apm_event_handler(void)
-{
- static int pending_count = 4;
- int err;
-
- if ((standbys_pending > 0) || (suspends_pending > 0)) {
- if ((apm_info.connection_version > 0x100) &&
- (pending_count-- <= 0)) {
- pending_count = 4;
- if (debug)
- printk(KERN_DEBUG "apm: setting state busy\n");
- err = set_system_power_state(APM_STATE_BUSY);
- if (err)
- apm_error("busy", err);
- }
- } else
- pending_count = 4;
- check_events();
-}
-
-/*
- * This is the APM thread main loop.
- */
-
-static void apm_mainloop(void)
-{
- DECLARE_WAITQUEUE(wait, current);
-
- add_wait_queue(&apm_waitqueue, &wait);
- set_current_state(TASK_INTERRUPTIBLE);
- for (;;) {
- schedule_timeout(APM_CHECK_TIMEOUT);
- if (exit_kapmd)
- break;
- /*
- * Ok, check all events, check for idle (and mark us sleeping
- * so as not to count towards the load average)..
- */
- set_current_state(TASK_INTERRUPTIBLE);
- apm_event_handler();
- }
- remove_wait_queue(&apm_waitqueue, &wait);
-}
-
-static int check_apm_user(struct apm_user *as, const char *func)
-{
- if ((as == NULL) || (as->magic != APM_BIOS_MAGIC)) {
- printk(KERN_ERR "apm: %s passed bad filp\n", func);
- return 1;
- }
- return 0;
-}
-
-static ssize_t do_read(struct file *fp, char __user *buf, size_t count, loff_t *ppos)
-{
- struct apm_user * as;
- int i;
- apm_event_t event;
-
- as = fp->private_data;
- if (check_apm_user(as, "read"))
- return -EIO;
- if ((int)count < sizeof(apm_event_t))
- return -EINVAL;
- if ((queue_empty(as)) && (fp->f_flags & O_NONBLOCK))
- return -EAGAIN;
- wait_event_interruptible(apm_waitqueue, !queue_empty(as));
- i = count;
- while ((i >= sizeof(event)) && !queue_empty(as)) {
- event = get_queued_event(as);
- if (copy_to_user(buf, &event, sizeof(event))) {
- if (i < count)
- break;
- return -EFAULT;
- }
- switch (event) {
- case APM_SYS_SUSPEND:
- case APM_USER_SUSPEND:
- as->suspends_read++;
- break;
-
- case APM_SYS_STANDBY:
- case APM_USER_STANDBY:
- as->standbys_read++;
- break;
- }
- buf += sizeof(event);
- i -= sizeof(event);
- }
- if (i < count)
- return count - i;
- if (signal_pending(current))
- return -ERESTARTSYS;
- return 0;
-}
-
-static unsigned int do_poll(struct file *fp, poll_table * wait)
-{
- struct apm_user * as;
-
- as = fp->private_data;
- if (check_apm_user(as, "poll"))
- return 0;
- poll_wait(fp, &apm_waitqueue, wait);
- if (!queue_empty(as))
- return POLLIN | POLLRDNORM;
- return 0;
-}
-
-static int do_ioctl(struct inode * inode, struct file *filp,
- u_int cmd, u_long arg)
-{
- struct apm_user * as;
-
- as = filp->private_data;
- if (check_apm_user(as, "ioctl"))
- return -EIO;
- if ((!as->suser) || (!as->writer))
- return -EPERM;
- switch (cmd) {
- case APM_IOC_STANDBY:
- if (as->standbys_read > 0) {
- as->standbys_read--;
- as->standbys_pending--;
- standbys_pending--;
- } else
- queue_event(APM_USER_STANDBY, as);
- if (standbys_pending <= 0)
- standby();
- break;
- case APM_IOC_SUSPEND:
- if (as->suspends_read > 0) {
- as->suspends_read--;
- as->suspends_pending--;
- suspends_pending--;
- } else
- queue_event(APM_USER_SUSPEND, as);
- if (suspends_pending <= 0) {
- return suspend(1);
- } else {
- as->suspend_wait = 1;
- wait_event_interruptible(apm_suspend_waitqueue,
- as->suspend_wait == 0);
- return as->suspend_result;
- }
- break;
- default:
- return -EINVAL;
- }
- return 0;
-}
-
-static int do_release(struct inode * inode, struct file * filp)
-{
- struct apm_user * as;
-
- as = filp->private_data;
- if (check_apm_user(as, "release"))
- return 0;
- filp->private_data = NULL;
- if (as->standbys_pending > 0) {
- standbys_pending -= as->standbys_pending;
- if (standbys_pending <= 0)
- standby();
- }
- if (as->suspends_pending > 0) {
- suspends_pending -= as->suspends_pending;
- if (suspends_pending <= 0)
- (void) suspend(1);
- }
- spin_lock(&user_list_lock);
- if (user_list == as)
- user_list = as->next;
- else {
- struct apm_user * as1;
-
- for (as1 = user_list;
- (as1 != NULL) && (as1->next != as);
- as1 = as1->next)
- ;
- if (as1 == NULL)
- printk(KERN_ERR "apm: filp not in user list\n");
- else
- as1->next = as->next;
- }
- spin_unlock(&user_list_lock);
- kfree(as);
- return 0;
-}
-
-static int do_open(struct inode * inode, struct file * filp)
-{
- struct apm_user * as;
-
- as = (struct apm_user *)kmalloc(sizeof(*as), GFP_KERNEL);
- if (as == NULL) {
- printk(KERN_ERR "apm: cannot allocate struct of size %d bytes\n",
- sizeof(*as));
- return -ENOMEM;
- }
- as->magic = APM_BIOS_MAGIC;
- as->event_tail = as->event_head = 0;
- as->suspends_pending = as->standbys_pending = 0;
- as->suspends_read = as->standbys_read = 0;
- /*
- * XXX - this is a tiny bit broken, when we consider BSD
- * process accounting. If the device is opened by root, we
- * instantly flag that we used superuser privs. Who knows,
- * we might close the device immediately without doing a
- * privileged operation -- cevans
- */
- as->suser = capable(CAP_SYS_ADMIN);
- as->writer = (filp->f_mode & FMODE_WRITE) == FMODE_WRITE;
- as->reader = (filp->f_mode & FMODE_READ) == FMODE_READ;
- spin_lock(&user_list_lock);
- as->next = user_list;
- user_list = as;
- spin_unlock(&user_list_lock);
- filp->private_data = as;
- return 0;
-}
-
-static int apm_get_info(char *buf, char **start, off_t fpos, int length)
-{
- char * p;
- unsigned short bx;
- unsigned short cx;
- unsigned short dx;
- int error;
- unsigned short ac_line_status = 0xff;
- unsigned short battery_status = 0xff;
- unsigned short battery_flag = 0xff;
- int percentage = -1;
- int time_units = -1;
- char *units = "?";
-
- p = buf;
-
- if ((num_online_cpus() == 1) &&
- !(error = apm_get_power_status(&bx, &cx, &dx))) {
- ac_line_status = (bx >> 8) & 0xff;
- battery_status = bx & 0xff;
- if ((cx & 0xff) != 0xff)
- percentage = cx & 0xff;
-
- if (apm_info.connection_version > 0x100) {
- battery_flag = (cx >> 8) & 0xff;
- if (dx != 0xffff) {
- units = (dx & 0x8000) ? "min" : "sec";
- time_units = dx & 0x7fff;
- }
- }
- }
- /* Arguments, with symbols from linux/apm_bios.h. Information is
- from the Get Power Status (0x0a) call unless otherwise noted.
-
- 0) Linux driver version (this will change if format changes)
- 1) APM BIOS Version. Usually 1.0, 1.1 or 1.2.
- 2) APM flags from APM Installation Check (0x00):
- bit 0: APM_16_BIT_SUPPORT
- bit 1: APM_32_BIT_SUPPORT
- bit 2: APM_IDLE_SLOWS_CLOCK
- bit 3: APM_BIOS_DISABLED
- bit 4: APM_BIOS_DISENGAGED
- 3) AC line status
- 0x00: Off-line
- 0x01: On-line
- 0x02: On backup power (BIOS >= 1.1 only)
- 0xff: Unknown
- 4) Battery status
- 0x00: High
- 0x01: Low
- 0x02: Critical
- 0x03: Charging
- 0x04: Selected battery not present (BIOS >= 1.2 only)
- 0xff: Unknown
- 5) Battery flag
- bit 0: High
- bit 1: Low
- bit 2: Critical
- bit 3: Charging
- bit 7: No system battery
- 0xff: Unknown
- 6) Remaining battery life (percentage of charge):
- 0-100: valid
- -1: Unknown
- 7) Remaining battery life (time units):
- Number of remaining minutes or seconds
- -1: Unknown
- 8) min = minutes; sec = seconds */
-
- p += sprintf(p, "%s %d.%d 0x%02x 0x%02x 0x%02x 0x%02x %d%% %d %s\n",
- driver_version,
- (apm_info.bios.version >> 8) & 0xff,
- apm_info.bios.version & 0xff,
- apm_info.bios.flags,
- ac_line_status,
- battery_status,
- battery_flag,
- percentage,
- time_units,
- units);
-
- return p - buf;
-}
-
-static int apm(void *unused)
-{
- unsigned short bx;
- unsigned short cx;
- unsigned short dx;
- int error;
- char * power_stat;
- char * bat_stat;
-
- kapmd_running = 1;
-
- daemonize("kapmd");
-
- current->flags |= PF_NOFREEZE;
-
-#ifdef CONFIG_SMP
- /* 2002/08/01 - WT
- * This is to avoid random crashes at boot time during initialization
- * on SMP systems in case of "apm=power-off" mode. Seen on ASUS A7M266D.
- * Some bioses don't like being called from CPU != 0.
- * Method suggested by Ingo Molnar.
- */
- set_cpus_allowed(current, cpumask_of_cpu(0));
- BUG_ON(smp_processor_id() != 0);
-#endif
-
- if (apm_info.connection_version == 0) {
- apm_info.connection_version = apm_info.bios.version;
- if (apm_info.connection_version > 0x100) {
- /*
- * We only support BIOSs up to version 1.2
- */
- if (apm_info.connection_version > 0x0102)
- apm_info.connection_version = 0x0102;
- error = apm_driver_version(&apm_info.connection_version);
- if (error != APM_SUCCESS) {
- apm_error("driver version", error);
- /* Fall back to an APM 1.0 connection. */
- apm_info.connection_version = 0x100;
- }
- }
- }
-
- if (debug)
- printk(KERN_INFO "apm: Connection version %d.%d\n",
- (apm_info.connection_version >> 8) & 0xff,
- apm_info.connection_version & 0xff);
-
-#ifdef CONFIG_APM_DO_ENABLE
- if (apm_info.bios.flags & APM_BIOS_DISABLED) {
- /*
- * This call causes my NEC UltraLite Versa 33/C to hang if it
- * is booted with PM disabled but not in the docking station.
- * Unfortunate ...
- */
- error = apm_enable_power_management(1);
- if (error) {
- apm_error("enable power management", error);
- return -1;
- }
- }
-#endif
-
- if ((apm_info.bios.flags & APM_BIOS_DISENGAGED)
- && (apm_info.connection_version > 0x0100)) {
- error = apm_engage_power_management(APM_DEVICE_ALL, 1);
- if (error) {
- apm_error("engage power management", error);
- return -1;
- }
- }
-
- if (debug && (num_online_cpus() == 1 || smp )) {
- error = apm_get_power_status(&bx, &cx, &dx);
- if (error)
- printk(KERN_INFO "apm: power status not available\n");
- else {
- switch ((bx >> 8) & 0xff) {
- case 0: power_stat = "off line"; break;
- case 1: power_stat = "on line"; break;
- case 2: power_stat = "on backup power"; break;
- default: power_stat = "unknown"; break;
- }
- switch (bx & 0xff) {
- case 0: bat_stat = "high"; break;
- case 1: bat_stat = "low"; break;
- case 2: bat_stat = "critical"; break;
- case 3: bat_stat = "charging"; break;
- default: bat_stat = "unknown"; break;
- }
- printk(KERN_INFO
- "apm: AC %s, battery status %s, battery life ",
- power_stat, bat_stat);
- if ((cx & 0xff) == 0xff)
- printk("unknown\n");
- else
- printk("%d%%\n", cx & 0xff);
- if (apm_info.connection_version > 0x100) {
- printk(KERN_INFO
- "apm: battery flag 0x%02x, battery life ",
- (cx >> 8) & 0xff);
- if (dx == 0xffff)
- printk("unknown\n");
- else
- printk("%d %s\n", dx & 0x7fff,
- (dx & 0x8000) ?
- "minutes" : "seconds");
- }
- }
- }
-
- /* Install our power off handler.. */
- if (power_off)
- pm_power_off = apm_power_off;
-
- if (num_online_cpus() == 1 || smp) {
-#if defined(CONFIG_APM_DISPLAY_BLANK) && defined(CONFIG_VT)
- console_blank_hook = apm_console_blank;
-#endif
- apm_mainloop();
-#if defined(CONFIG_APM_DISPLAY_BLANK) && defined(CONFIG_VT)
- console_blank_hook = NULL;
-#endif
- }
- kapmd_running = 0;
-
- return 0;
-}
-
-#ifndef MODULE
-static int __init apm_setup(char *str)
-{
- int invert;
-
- while ((str != NULL) && (*str != '\0')) {
- if (strncmp(str, "off", 3) == 0)
- apm_disabled = 1;
- if (strncmp(str, "on", 2) == 0)
- apm_disabled = 0;
- if ((strncmp(str, "bounce-interval=", 16) == 0) ||
- (strncmp(str, "bounce_interval=", 16) == 0))
- bounce_interval = simple_strtol(str + 16, NULL, 0);
- if ((strncmp(str, "idle-threshold=", 15) == 0) ||
- (strncmp(str, "idle_threshold=", 15) == 0))
- idle_threshold = simple_strtol(str + 15, NULL, 0);
- if ((strncmp(str, "idle-period=", 12) == 0) ||
- (strncmp(str, "idle_period=", 12) == 0))
- idle_period = simple_strtol(str + 12, NULL, 0);
- invert = (strncmp(str, "no-", 3) == 0) ||
- (strncmp(str, "no_", 3) == 0);
- if (invert)
- str += 3;
- if (strncmp(str, "debug", 5) == 0)
- debug = !invert;
- if ((strncmp(str, "power-off", 9) == 0) ||
- (strncmp(str, "power_off", 9) == 0))
- power_off = !invert;
- if (strncmp(str, "smp", 3) == 0)
- {
- smp = !invert;
- idle_threshold = 100;
- }
- if ((strncmp(str, "allow-ints", 10) == 0) ||
- (strncmp(str, "allow_ints", 10) == 0))
- apm_info.allow_ints = !invert;
- if ((strncmp(str, "broken-psr", 10) == 0) ||
- (strncmp(str, "broken_psr", 10) == 0))
- apm_info.get_power_status_broken = !invert;
- if ((strncmp(str, "realmode-power-off", 18) == 0) ||
- (strncmp(str, "realmode_power_off", 18) == 0))
- apm_info.realmode_power_off = !invert;
- str = strchr(str, ',');
- if (str != NULL)
- str += strspn(str, ", \t");
- }
- return 1;
-}
-
-__setup("apm=", apm_setup);
-#endif
-
-static struct file_operations apm_bios_fops = {
- .owner = THIS_MODULE,
- .read = do_read,
- .poll = do_poll,
- .ioctl = do_ioctl,
- .open = do_open,
- .release = do_release,
-};
-
-static struct miscdevice apm_device = {
- APM_MINOR_DEV,
- "apm_bios",
- &apm_bios_fops
-};
-
-
-/* Simple "print if true" callback */
-static int __init print_if_true(struct dmi_system_id *d)
-{
- printk("%s\n", d->ident);
- return 0;
-}
-
-/*
- * Some Bioses enable the PS/2 mouse (touchpad) at resume, even if it was
- * disabled before the suspend. Linux used to get terribly confused by that.
- */
-static int __init broken_ps2_resume(struct dmi_system_id *d)
-{
- printk(KERN_INFO "%s machine detected. Mousepad Resume Bug workaround hopefully not needed.\n", d->ident);
- return 0;
-}
-
-/* Some bioses have a broken protected mode poweroff and need to use realmode */
-static int __init set_realmode_power_off(struct dmi_system_id *d)
-{
- if (apm_info.realmode_power_off == 0) {
- apm_info.realmode_power_off = 1;
- printk(KERN_INFO "%s bios detected. Using realmode poweroff only.\n", d->ident);
- }
- return 0;
-}
-
-/* Some laptops require interrupts to be enabled during APM calls */
-static int __init set_apm_ints(struct dmi_system_id *d)
-{
- if (apm_info.allow_ints == 0) {
- apm_info.allow_ints = 1;
- printk(KERN_INFO "%s machine detected. Enabling interrupts during APM calls.\n", d->ident);
- }
- return 0;
-}
-
-/* Some APM bioses corrupt memory or just plain do not work */
-static int __init apm_is_horked(struct dmi_system_id *d)
-{
- if (apm_info.disabled == 0) {
- apm_info.disabled = 1;
- printk(KERN_INFO "%s machine detected. Disabling APM.\n", d->ident);
- }
- return 0;
-}
-
-static int __init apm_is_horked_d850md(struct dmi_system_id *d)
-{
- if (apm_info.disabled == 0) {
- apm_info.disabled = 1;
- printk(KERN_INFO "%s machine detected. Disabling APM.\n", d->ident);
- printk(KERN_INFO "This bug is fixed in bios P15 which is available for \n");
- printk(KERN_INFO "download from support.intel.com \n");
- }
- return 0;
-}
-
-/* Some APM bioses hang on APM idle calls */
-static int __init apm_likes_to_melt(struct dmi_system_id *d)
-{
- if (apm_info.forbid_idle == 0) {
- apm_info.forbid_idle = 1;
- printk(KERN_INFO "%s machine detected. Disabling APM idle calls.\n", d->ident);
- }
- return 0;
-}
-
-/*
- * Check for clue free BIOS implementations who use
- * the following QA technique
- *
- * [ Write BIOS Code ]<------
- * | ^
- * < Does it Compile >----N--
- * |Y ^
- * < Does it Boot Win98 >-N--
- * |Y
- * [Ship It]
- *
- * Phoenix A04 08/24/2000 is known bad (Dell Inspiron 5000e)
- * Phoenix A07 09/29/2000 is known good (Dell Inspiron 5000)
- */
-static int __init broken_apm_power(struct dmi_system_id *d)
-{
- apm_info.get_power_status_broken = 1;
- printk(KERN_WARNING "BIOS strings suggest APM bugs, disabling power status reporting.\n");
- return 0;
-}
-
-/*
- * This bios swaps the APM minute reporting bytes over (Many sony laptops
- * have this problem).
- */
-static int __init swab_apm_power_in_minutes(struct dmi_system_id *d)
-{
- apm_info.get_power_status_swabinminutes = 1;
- printk(KERN_WARNING "BIOS strings suggest APM reports battery life in minutes and wrong byte order.\n");
- return 0;
-}
-
-static struct dmi_system_id __initdata apm_dmi_table[] = {
- {
- print_if_true,
- KERN_WARNING "IBM T23 - BIOS 1.03b+ and controller firmware 1.02+ may be needed for Linux APM.",
- { DMI_MATCH(DMI_SYS_VENDOR, "IBM"),
- DMI_MATCH(DMI_BIOS_VERSION, "1AET38WW (1.01b)"), },
- },
- { /* Handle problems with APM on the C600 */
- broken_ps2_resume, "Dell Latitude C600",
- { DMI_MATCH(DMI_SYS_VENDOR, "Dell"),
- DMI_MATCH(DMI_PRODUCT_NAME, "Latitude C600"), },
- },
- { /* Allow interrupts during suspend on Dell Latitude laptops*/
- set_apm_ints, "Dell Latitude",
- { DMI_MATCH(DMI_SYS_VENDOR, "Dell Computer Corporation"),
- DMI_MATCH(DMI_PRODUCT_NAME, "Latitude C510"), }
- },
- { /* APM crashes */
- apm_is_horked, "Dell Inspiron 2500",
- { DMI_MATCH(DMI_SYS_VENDOR, "Dell Computer Corporation"),
- DMI_MATCH(DMI_PRODUCT_NAME, "Inspiron 2500"),
- DMI_MATCH(DMI_BIOS_VENDOR,"Phoenix Technologies LTD"),
- DMI_MATCH(DMI_BIOS_VERSION,"A11"), },
- },
- { /* Allow interrupts during suspend on Dell Inspiron laptops*/
- set_apm_ints, "Dell Inspiron", {
- DMI_MATCH(DMI_SYS_VENDOR, "Dell Computer Corporation"),
- DMI_MATCH(DMI_PRODUCT_NAME, "Inspiron 4000"), },
- },
- { /* Handle problems with APM on Inspiron 5000e */
- broken_apm_power, "Dell Inspiron 5000e",
- { DMI_MATCH(DMI_BIOS_VENDOR, "Phoenix Technologies LTD"),
- DMI_MATCH(DMI_BIOS_VERSION, "A04"),
- DMI_MATCH(DMI_BIOS_DATE, "08/24/2000"), },
- },
- { /* Handle problems with APM on Inspiron 2500 */
- broken_apm_power, "Dell Inspiron 2500",
- { DMI_MATCH(DMI_BIOS_VENDOR, "Phoenix Technologies LTD"),
- DMI_MATCH(DMI_BIOS_VERSION, "A12"),
- DMI_MATCH(DMI_BIOS_DATE, "02/04/2002"), },
- },
- { /* APM crashes */
- apm_is_horked, "Dell Dimension 4100",
- { DMI_MATCH(DMI_SYS_VENDOR, "Dell Computer Corporation"),
- DMI_MATCH(DMI_PRODUCT_NAME, "XPS-Z"),
- DMI_MATCH(DMI_BIOS_VENDOR,"Intel Corp."),
- DMI_MATCH(DMI_BIOS_VERSION,"A11"), },
- },
- { /* Allow interrupts during suspend on Compaq Laptops*/
- set_apm_ints, "Compaq 12XL125",
- { DMI_MATCH(DMI_SYS_VENDOR, "Compaq"),
- DMI_MATCH(DMI_PRODUCT_NAME, "Compaq PC"),
- DMI_MATCH(DMI_BIOS_VENDOR, "Phoenix Technologies LTD"),
- DMI_MATCH(DMI_BIOS_VERSION,"4.06"), },
- },
- { /* Allow interrupts during APM or the clock goes slow */
- set_apm_ints, "ASUSTeK",
- { DMI_MATCH(DMI_SYS_VENDOR, "ASUSTeK Computer Inc."),
- DMI_MATCH(DMI_PRODUCT_NAME, "L8400K series Notebook PC"), },
- },
- { /* APM blows on shutdown */
- apm_is_horked, "ABIT KX7-333[R]",
- { DMI_MATCH(DMI_BOARD_VENDOR, "ABIT"),
- DMI_MATCH(DMI_BOARD_NAME, "VT8367-8233A (KX7-333[R])"), },
- },
- { /* APM crashes */
- apm_is_horked, "Trigem Delhi3",
- { DMI_MATCH(DMI_SYS_VENDOR, "TriGem Computer, Inc"),
- DMI_MATCH(DMI_PRODUCT_NAME, "Delhi3"), },
- },
- { /* APM crashes */
- apm_is_horked, "Fujitsu-Siemens",
- { DMI_MATCH(DMI_BIOS_VENDOR, "hoenix/FUJITSU SIEMENS"),
- DMI_MATCH(DMI_BIOS_VERSION, "Version1.01"), },
- },
- { /* APM crashes */
- apm_is_horked_d850md, "Intel D850MD",
- { DMI_MATCH(DMI_BIOS_VENDOR, "Intel Corp."),
- DMI_MATCH(DMI_BIOS_VERSION, "MV85010A.86A.0016.P07.0201251536"), },
- },
- { /* APM crashes */
- apm_is_horked, "Intel D810EMO",
- { DMI_MATCH(DMI_BIOS_VENDOR, "Intel Corp."),
- DMI_MATCH(DMI_BIOS_VERSION, "MO81010A.86A.0008.P04.0004170800"), },
- },
- { /* APM crashes */
- apm_is_horked, "Dell XPS-Z",
- { DMI_MATCH(DMI_BIOS_VENDOR, "Intel Corp."),
- DMI_MATCH(DMI_BIOS_VERSION, "A11"),
- DMI_MATCH(DMI_PRODUCT_NAME, "XPS-Z"), },
- },
- { /* APM crashes */
- apm_is_horked, "Sharp PC-PJ/AX",
- { DMI_MATCH(DMI_SYS_VENDOR, "SHARP"),
- DMI_MATCH(DMI_PRODUCT_NAME, "PC-PJ/AX"),
- DMI_MATCH(DMI_BIOS_VENDOR,"SystemSoft"),
- DMI_MATCH(DMI_BIOS_VERSION,"Version R2.08"), },
- },
- { /* APM crashes */
- apm_is_horked, "Dell Inspiron 2500",
- { DMI_MATCH(DMI_SYS_VENDOR, "Dell Computer Corporation"),
- DMI_MATCH(DMI_PRODUCT_NAME, "Inspiron 2500"),
- DMI_MATCH(DMI_BIOS_VENDOR,"Phoenix Technologies LTD"),
- DMI_MATCH(DMI_BIOS_VERSION,"A11"), },
- },
- { /* APM idle hangs */
- apm_likes_to_melt, "Jabil AMD",
- { DMI_MATCH(DMI_BIOS_VENDOR, "American Megatrends Inc."),
- DMI_MATCH(DMI_BIOS_VERSION, "0AASNP06"), },
- },
- { /* APM idle hangs */
- apm_likes_to_melt, "AMI Bios",
- { DMI_MATCH(DMI_BIOS_VENDOR, "American Megatrends Inc."),
- DMI_MATCH(DMI_BIOS_VERSION, "0AASNP05"), },
- },
- { /* Handle problems with APM on Sony Vaio PCG-N505X(DE) */
- swab_apm_power_in_minutes, "Sony VAIO",
- { DMI_MATCH(DMI_BIOS_VENDOR, "Phoenix Technologies LTD"),
- DMI_MATCH(DMI_BIOS_VERSION, "R0206H"),
- DMI_MATCH(DMI_BIOS_DATE, "08/23/99"), },
- },
- { /* Handle problems with APM on Sony Vaio PCG-N505VX */
- swab_apm_power_in_minutes, "Sony VAIO",
- { DMI_MATCH(DMI_BIOS_VENDOR, "Phoenix Technologies LTD"),
- DMI_MATCH(DMI_BIOS_VERSION, "W2K06H0"),
- DMI_MATCH(DMI_BIOS_DATE, "02/03/00"), },
- },
- { /* Handle problems with APM on Sony Vaio PCG-XG29 */
- swab_apm_power_in_minutes, "Sony VAIO",
- { DMI_MATCH(DMI_BIOS_VENDOR, "Phoenix Technologies LTD"),
- DMI_MATCH(DMI_BIOS_VERSION, "R0117A0"),
- DMI_MATCH(DMI_BIOS_DATE, "04/25/00"), },
- },
- { /* Handle problems with APM on Sony Vaio PCG-Z600NE */
- swab_apm_power_in_minutes, "Sony VAIO",
- { DMI_MATCH(DMI_BIOS_VENDOR, "Phoenix Technologies LTD"),
- DMI_MATCH(DMI_BIOS_VERSION, "R0121Z1"),
- DMI_MATCH(DMI_BIOS_DATE, "05/11/00"), },
- },
- { /* Handle problems with APM on Sony Vaio PCG-Z600NE */
- swab_apm_power_in_minutes, "Sony VAIO",
- { DMI_MATCH(DMI_BIOS_VENDOR, "Phoenix Technologies LTD"),
- DMI_MATCH(DMI_BIOS_VERSION, "WME01Z1"),
- DMI_MATCH(DMI_BIOS_DATE, "08/11/00"), },
- },
- { /* Handle problems with APM on Sony Vaio PCG-Z600LEK(DE) */
- swab_apm_power_in_minutes, "Sony VAIO",
- { DMI_MATCH(DMI_BIOS_VENDOR, "Phoenix Technologies LTD"),
- DMI_MATCH(DMI_BIOS_VERSION, "R0206Z3"),
- DMI_MATCH(DMI_BIOS_DATE, "12/25/00"), },
- },
- { /* Handle problems with APM on Sony Vaio PCG-Z505LS */
- swab_apm_power_in_minutes, "Sony VAIO",
- { DMI_MATCH(DMI_BIOS_VENDOR, "Phoenix Technologies LTD"),
- DMI_MATCH(DMI_BIOS_VERSION, "R0203D0"),
- DMI_MATCH(DMI_BIOS_DATE, "05/12/00"), },
- },
- { /* Handle problems with APM on Sony Vaio PCG-Z505LS */
- swab_apm_power_in_minutes, "Sony VAIO",
- { DMI_MATCH(DMI_BIOS_VENDOR, "Phoenix Technologies LTD"),
- DMI_MATCH(DMI_BIOS_VERSION, "R0203Z3"),
- DMI_MATCH(DMI_BIOS_DATE, "08/25/00"), },
- },
- { /* Handle problems with APM on Sony Vaio PCG-Z505LS (with updated BIOS) */
- swab_apm_power_in_minutes, "Sony VAIO",
- { DMI_MATCH(DMI_BIOS_VENDOR, "Phoenix Technologies LTD"),
- DMI_MATCH(DMI_BIOS_VERSION, "R0209Z3"),
- DMI_MATCH(DMI_BIOS_DATE, "05/12/01"), },
- },
- { /* Handle problems with APM on Sony Vaio PCG-F104K */
- swab_apm_power_in_minutes, "Sony VAIO",
- { DMI_MATCH(DMI_BIOS_VENDOR, "Phoenix Technologies LTD"),
- DMI_MATCH(DMI_BIOS_VERSION, "R0204K2"),
- DMI_MATCH(DMI_BIOS_DATE, "08/28/00"), },
- },
-
- { /* Handle problems with APM on Sony Vaio PCG-C1VN/C1VE */
- swab_apm_power_in_minutes, "Sony VAIO",
- { DMI_MATCH(DMI_BIOS_VENDOR, "Phoenix Technologies LTD"),
- DMI_MATCH(DMI_BIOS_VERSION, "R0208P1"),
- DMI_MATCH(DMI_BIOS_DATE, "11/09/00"), },
- },
- { /* Handle problems with APM on Sony Vaio PCG-C1VE */
- swab_apm_power_in_minutes, "Sony VAIO",
- { DMI_MATCH(DMI_BIOS_VENDOR, "Phoenix Technologies LTD"),
- DMI_MATCH(DMI_BIOS_VERSION, "R0204P1"),
- DMI_MATCH(DMI_BIOS_DATE, "09/12/00"), },
- },
- { /* Handle problems with APM on Sony Vaio PCG-C1VE */
- swab_apm_power_in_minutes, "Sony VAIO",
- { DMI_MATCH(DMI_BIOS_VENDOR, "Phoenix Technologies LTD"),
- DMI_MATCH(DMI_BIOS_VERSION, "WXPO1Z3"),
- DMI_MATCH(DMI_BIOS_DATE, "10/26/01"), },
- },
- { /* broken PM poweroff bios */
- set_realmode_power_off, "Award Software v4.60 PGMA",
- { DMI_MATCH(DMI_BIOS_VENDOR, "Award Software International, Inc."),
- DMI_MATCH(DMI_BIOS_VERSION, "4.60 PGMA"),
- DMI_MATCH(DMI_BIOS_DATE, "134526184"), },
- },
-
- /* Generic per vendor APM settings */
-
- { /* Allow interrupts during suspend on IBM laptops */
- set_apm_ints, "IBM",
- { DMI_MATCH(DMI_SYS_VENDOR, "IBM"), },
- },
-
- { }
-};
-
-/*
- * Just start the APM thread. We do NOT want to do APM BIOS
- * calls from anything but the APM thread, if for no other reason
- * than the fact that we don't trust the APM BIOS. This way,
- * most common APM BIOS problems that lead to protection errors
- * etc will have at least some level of being contained...
- *
- * In short, if something bad happens, at least we have a choice
- * of just killing the apm thread..
- */
-static int __init apm_init(void)
-{
- struct proc_dir_entry *apm_proc;
- int ret;
- int i;
-
- dmi_check_system(apm_dmi_table);
-
- if (apm_info.bios.version == 0) {
- printk(KERN_INFO "apm: BIOS not found.\n");
- return -ENODEV;
- }
- printk(KERN_INFO
- "apm: BIOS version %d.%d Flags 0x%02x (Driver version %s)\n",
- ((apm_info.bios.version >> 8) & 0xff),
- (apm_info.bios.version & 0xff),
- apm_info.bios.flags,
- driver_version);
- if ((apm_info.bios.flags & APM_32_BIT_SUPPORT) == 0) {
- printk(KERN_INFO "apm: no 32 bit BIOS support\n");
- return -ENODEV;
- }
-
- if (allow_ints)
- apm_info.allow_ints = 1;
- if (broken_psr)
- apm_info.get_power_status_broken = 1;
- if (realmode_power_off)
- apm_info.realmode_power_off = 1;
- /* User can override, but default is to trust DMI */
- if (apm_disabled != -1)
- apm_info.disabled = apm_disabled;
-
- /*
- * Fix for the Compaq Contura 3/25c which reports BIOS version 0.1
- * but is reportedly a 1.0 BIOS.
- */
- if (apm_info.bios.version == 0x001)
- apm_info.bios.version = 0x100;
-
- /* BIOS < 1.2 doesn't set cseg_16_len */
- if (apm_info.bios.version < 0x102)
- apm_info.bios.cseg_16_len = 0; /* 64k */
-
- if (debug) {
- printk(KERN_INFO "apm: entry %x:%lx cseg16 %x dseg %x",
- apm_info.bios.cseg, apm_info.bios.offset,
- apm_info.bios.cseg_16, apm_info.bios.dseg);
- if (apm_info.bios.version > 0x100)
- printk(" cseg len %x, dseg len %x",
- apm_info.bios.cseg_len,
- apm_info.bios.dseg_len);
- if (apm_info.bios.version > 0x101)
- printk(" cseg16 len %x", apm_info.bios.cseg_16_len);
- printk("\n");
- }
-
- if (apm_info.disabled) {
- printk(KERN_NOTICE "apm: disabled on user request.\n");
- return -ENODEV;
- }
- if ((num_online_cpus() > 1) && !power_off && !smp) {
- printk(KERN_NOTICE "apm: disabled - APM is not SMP safe.\n");
- apm_info.disabled = 1;
- return -ENODEV;
- }
- if (PM_IS_ACTIVE()) {
- printk(KERN_NOTICE "apm: overridden by ACPI.\n");
- apm_info.disabled = 1;
- return -ENODEV;
- }
- pm_active = 1;
-
- /*
- * Set up a segment that references the real mode segment 0x40
- * that extends up to the end of page zero (that we have reserved).
- * This is for buggy BIOS's that refer to (real mode) segment 0x40
- * even though they are called in protected mode.
- */
- set_base(bad_bios_desc, __va((unsigned long)0x40 << 4));
- _set_limit((char *)&bad_bios_desc, 4095 - (0x40 << 4));
-
- apm_bios_entry.offset = apm_info.bios.offset;
- apm_bios_entry.segment = APM_CS;
-
- for (i = 0; i < NR_CPUS; i++) {
- struct desc_struct *gdt = get_cpu_gdt_table(i);
- set_base(gdt[APM_CS >> 3],
- __va((unsigned long)apm_info.bios.cseg << 4));
- set_base(gdt[APM_CS_16 >> 3],
- __va((unsigned long)apm_info.bios.cseg_16 << 4));
- set_base(gdt[APM_DS >> 3],
- __va((unsigned long)apm_info.bios.dseg << 4));
-#ifndef APM_RELAX_SEGMENTS
- if (apm_info.bios.version == 0x100) {
-#endif
- /* For ASUS motherboard, Award BIOS rev 110 (and others?) */
- _set_limit((char *)&gdt[APM_CS >> 3], 64 * 1024 - 1);
- /* For some unknown machine. */
- _set_limit((char *)&gdt[APM_CS_16 >> 3], 64 * 1024 - 1);
- /* For the DEC Hinote Ultra CT475 (and others?) */
- _set_limit((char *)&gdt[APM_DS >> 3], 64 * 1024 - 1);
-#ifndef APM_RELAX_SEGMENTS
- } else {
- _set_limit((char *)&gdt[APM_CS >> 3],
- (apm_info.bios.cseg_len - 1) & 0xffff);
- _set_limit((char *)&gdt[APM_CS_16 >> 3],
- (apm_info.bios.cseg_16_len - 1) & 0xffff);
- _set_limit((char *)&gdt[APM_DS >> 3],
- (apm_info.bios.dseg_len - 1) & 0xffff);
- /* workaround for broken BIOSes */
- if (apm_info.bios.cseg_len <= apm_info.bios.offset)
- _set_limit((char *)&gdt[APM_CS >> 3], 64 * 1024 -1);
- if (apm_info.bios.dseg_len <= 0x40) { /* 0x40 * 4kB == 64kB */
- /* for the BIOS that assumes granularity = 1 */
- gdt[APM_DS >> 3].b |= 0x800000;
- printk(KERN_NOTICE "apm: we set the granularity of dseg.\n");
- }
- }
-#endif
- }
-
- apm_proc = create_proc_info_entry("apm", 0, NULL, apm_get_info);
- if (apm_proc)
- apm_proc->owner = THIS_MODULE;
-
- ret = kernel_thread(apm, NULL, CLONE_KERNEL | SIGCHLD);
- if (ret < 0) {
- printk(KERN_ERR "apm: disabled - Unable to start kernel thread.\n");
- return -ENOMEM;
- }
-
- if (num_online_cpus() > 1 && !smp ) {
- printk(KERN_NOTICE
- "apm: disabled - APM is not SMP safe (power off active).\n");
- return 0;
- }
-
- misc_register(&apm_device);
-
- if (HZ != 100)
- idle_period = (idle_period * HZ) / 100;
- if (idle_threshold < 100) {
- original_pm_idle = pm_idle;
- pm_idle = apm_cpu_idle;
- set_pm_idle = 1;
- }
-
- return 0;
-}
-
-static void __exit apm_exit(void)
-{
- int error;
-
- if (set_pm_idle) {
- pm_idle = original_pm_idle;
- /*
- * We are about to unload the current idle thread pm callback
- * (pm_idle), Wait for all processors to update cached/local
- * copies of pm_idle before proceeding.
- */
- cpu_idle_wait();
- }
- if (((apm_info.bios.flags & APM_BIOS_DISENGAGED) == 0)
- && (apm_info.connection_version > 0x0100)) {
- error = apm_engage_power_management(APM_DEVICE_ALL, 0);
- if (error)
- apm_error("disengage power management", error);
- }
- misc_deregister(&apm_device);
- remove_proc_entry("apm", NULL);
- if (power_off)
- pm_power_off = NULL;
- exit_kapmd = 1;
- while (kapmd_running)
- schedule();
- pm_active = 0;
-}
-
-module_init(apm_init);
-module_exit(apm_exit);
-
-MODULE_AUTHOR("Stephen Rothwell");
-MODULE_DESCRIPTION("Advanced Power Management");
-MODULE_LICENSE("GPL");
-module_param(debug, bool, 0644);
-MODULE_PARM_DESC(debug, "Enable debug mode");
-module_param(power_off, bool, 0444);
-MODULE_PARM_DESC(power_off, "Enable power off");
-module_param(bounce_interval, int, 0444);
-MODULE_PARM_DESC(bounce_interval,
- "Set the number of ticks to ignore suspend bounces");
-module_param(allow_ints, bool, 0444);
-MODULE_PARM_DESC(allow_ints, "Allow interrupts during BIOS calls");
-module_param(broken_psr, bool, 0444);
-MODULE_PARM_DESC(broken_psr, "BIOS has a broken GetPowerStatus call");
-module_param(realmode_power_off, bool, 0444);
-MODULE_PARM_DESC(realmode_power_off,
- "Switch to real mode before powering off");
-module_param(idle_threshold, int, 0444);
-MODULE_PARM_DESC(idle_threshold,
- "System idle percentage above which to make APM BIOS idle calls");
-module_param(idle_period, int, 0444);
-MODULE_PARM_DESC(idle_period,
- "Period (in sec/100) over which to caculate the idle percentage");
-module_param(smp, bool, 0444);
-MODULE_PARM_DESC(smp,
- "Set this to enable APM use on an SMP platform. Use with caution on older systems");
-MODULE_ALIAS_MISCDEV(APM_MINOR_DEV);
diff --git a/arch/i386/kernel/asm-offsets.c b/arch/i386/kernel/asm-offsets.c
deleted file mode 100644
index 36d66e2077d..00000000000
--- a/arch/i386/kernel/asm-offsets.c
+++ /dev/null
@@ -1,72 +0,0 @@
-/*
- * Generate definitions needed by assembly language modules.
- * This code generates raw asm output which is post-processed
- * to extract and format the required data.
- */
-
-#include <linux/sched.h>
-#include <linux/signal.h>
-#include <linux/personality.h>
-#include <linux/suspend.h>
-#include <asm/ucontext.h>
-#include "sigframe.h"
-#include <asm/fixmap.h>
-#include <asm/processor.h>
-#include <asm/thread_info.h>
-
-#define DEFINE(sym, val) \
- asm volatile("\n->" #sym " %0 " #val : : "i" (val))
-
-#define BLANK() asm volatile("\n->" : : )
-
-#define OFFSET(sym, str, mem) \
- DEFINE(sym, offsetof(struct str, mem));
-
-void foo(void)
-{
- OFFSET(SIGCONTEXT_eax, sigcontext, eax);
- OFFSET(SIGCONTEXT_ebx, sigcontext, ebx);
- OFFSET(SIGCONTEXT_ecx, sigcontext, ecx);
- OFFSET(SIGCONTEXT_edx, sigcontext, edx);
- OFFSET(SIGCONTEXT_esi, sigcontext, esi);
- OFFSET(SIGCONTEXT_edi, sigcontext, edi);
- OFFSET(SIGCONTEXT_ebp, sigcontext, ebp);
- OFFSET(SIGCONTEXT_esp, sigcontext, esp);
- OFFSET(SIGCONTEXT_eip, sigcontext, eip);
- BLANK();
-
- OFFSET(CPUINFO_x86, cpuinfo_x86, x86);
- OFFSET(CPUINFO_x86_vendor, cpuinfo_x86, x86_vendor);
- OFFSET(CPUINFO_x86_model, cpuinfo_x86, x86_model);
- OFFSET(CPUINFO_x86_mask, cpuinfo_x86, x86_mask);
- OFFSET(CPUINFO_hard_math, cpuinfo_x86, hard_math);
- OFFSET(CPUINFO_cpuid_level, cpuinfo_x86, cpuid_level);
- OFFSET(CPUINFO_x86_capability, cpuinfo_x86, x86_capability);
- OFFSET(CPUINFO_x86_vendor_id, cpuinfo_x86, x86_vendor_id);
- BLANK();
-
- OFFSET(TI_task, thread_info, task);
- OFFSET(TI_exec_domain, thread_info, exec_domain);
- OFFSET(TI_flags, thread_info, flags);
- OFFSET(TI_status, thread_info, status);
- OFFSET(TI_cpu, thread_info, cpu);
- OFFSET(TI_preempt_count, thread_info, preempt_count);
- OFFSET(TI_addr_limit, thread_info, addr_limit);
- OFFSET(TI_restart_block, thread_info, restart_block);
- BLANK();
-
- OFFSET(EXEC_DOMAIN_handler, exec_domain, handler);
- OFFSET(RT_SIGFRAME_sigcontext, rt_sigframe, uc.uc_mcontext);
- BLANK();
-
- OFFSET(pbe_address, pbe, address);
- OFFSET(pbe_orig_address, pbe, orig_address);
- OFFSET(pbe_next, pbe, next);
-
- /* Offset from the sysenter stack to tss.esp0 */
- DEFINE(TSS_sysenter_esp0, offsetof(struct tss_struct, esp0) -
- sizeof(struct tss_struct));
-
- DEFINE(PAGE_SIZE_asm, PAGE_SIZE);
- DEFINE(VSYSCALL_BASE, __fix_to_virt(FIX_VSYSCALL));
-}
diff --git a/arch/i386/kernel/bootflag.c b/arch/i386/kernel/bootflag.c
deleted file mode 100644
index 4c30ed01f4e..00000000000
--- a/arch/i386/kernel/bootflag.c
+++ /dev/null
@@ -1,99 +0,0 @@
-/*
- * Implement 'Simple Boot Flag Specification 2.0'
- */
-
-
-#include <linux/config.h>
-#include <linux/types.h>
-#include <linux/kernel.h>
-#include <linux/init.h>
-#include <linux/string.h>
-#include <linux/slab.h>
-#include <linux/spinlock.h>
-#include <linux/acpi.h>
-#include <asm/io.h>
-
-#include <linux/mc146818rtc.h>
-
-
-#define SBF_RESERVED (0x78)
-#define SBF_PNPOS (1<<0)
-#define SBF_BOOTING (1<<1)
-#define SBF_DIAG (1<<2)
-#define SBF_PARITY (1<<7)
-
-
-int sbf_port __initdata = -1; /* set via acpi_boot_init() */
-
-
-static int __init parity(u8 v)
-{
- int x = 0;
- int i;
-
- for(i=0;i<8;i++)
- {
- x^=(v&1);
- v>>=1;
- }
- return x;
-}
-
-static void __init sbf_write(u8 v)
-{
- unsigned long flags;
- if(sbf_port != -1)
- {
- v &= ~SBF_PARITY;
- if(!parity(v))
- v|=SBF_PARITY;
-
- printk(KERN_INFO "Simple Boot Flag at 0x%x set to 0x%x\n", sbf_port, v);
-
- spin_lock_irqsave(&rtc_lock, flags);
- CMOS_WRITE(v, sbf_port);
- spin_unlock_irqrestore(&rtc_lock, flags);
- }
-}
-
-static u8 __init sbf_read(void)
-{
- u8 v;
- unsigned long flags;
- if(sbf_port == -1)
- return 0;
- spin_lock_irqsave(&rtc_lock, flags);
- v = CMOS_READ(sbf_port);
- spin_unlock_irqrestore(&rtc_lock, flags);
- return v;
-}
-
-static int __init sbf_value_valid(u8 v)
-{
- if(v&SBF_RESERVED) /* Reserved bits */
- return 0;
- if(!parity(v))
- return 0;
- return 1;
-}
-
-static int __init sbf_init(void)
-{
- u8 v;
- if(sbf_port == -1)
- return 0;
- v = sbf_read();
- if(!sbf_value_valid(v))
- printk(KERN_WARNING "Simple Boot Flag value 0x%x read from CMOS RAM was invalid\n",v);
-
- v &= ~SBF_RESERVED;
- v &= ~SBF_BOOTING;
- v &= ~SBF_DIAG;
-#if defined(CONFIG_ISAPNP)
- v |= SBF_PNPOS;
-#endif
- sbf_write(v);
- return 0;
-}
-
-module_init(sbf_init);
diff --git a/arch/i386/kernel/cpu/Makefile b/arch/i386/kernel/cpu/Makefile
deleted file mode 100644
index 010aecfffbc..00000000000
--- a/arch/i386/kernel/cpu/Makefile
+++ /dev/null
@@ -1,19 +0,0 @@
-#
-# Makefile for x86-compatible CPU details and quirks
-#
-
-obj-y := common.o proc.o
-
-obj-y += amd.o
-obj-y += cyrix.o
-obj-y += centaur.o
-obj-y += transmeta.o
-obj-y += intel.o intel_cacheinfo.o
-obj-y += rise.o
-obj-y += nexgen.o
-obj-y += umc.o
-
-obj-$(CONFIG_X86_MCE) += mcheck/
-
-obj-$(CONFIG_MTRR) += mtrr/
-obj-$(CONFIG_CPU_FREQ) += cpufreq/
diff --git a/arch/i386/kernel/cpu/amd.c b/arch/i386/kernel/cpu/amd.c
deleted file mode 100644
index 53a1681cd96..00000000000
--- a/arch/i386/kernel/cpu/amd.c
+++ /dev/null
@@ -1,272 +0,0 @@
-#include <linux/init.h>
-#include <linux/bitops.h>
-#include <linux/mm.h>
-#include <asm/io.h>
-#include <asm/processor.h>
-
-#include "cpu.h"
-
-/*
- * B step AMD K6 before B 9730xxxx have hardware bugs that can cause
- * misexecution of code under Linux. Owners of such processors should
- * contact AMD for precise details and a CPU swap.
- *
- * See http://www.multimania.com/poulot/k6bug.html
- * http://www.amd.com/K6/k6docs/revgd.html
- *
- * The following test is erm.. interesting. AMD neglected to up
- * the chip setting when fixing the bug but they also tweaked some
- * performance at the same time..
- */
-
-extern void vide(void);
-__asm__(".align 4\nvide: ret");
-
-static void __init init_amd(struct cpuinfo_x86 *c)
-{
- u32 l, h;
- int mbytes = num_physpages >> (20-PAGE_SHIFT);
- int r;
-
-#ifdef CONFIG_SMP
- unsigned long long value;
-
- /* Disable TLB flush filter by setting HWCR.FFDIS on K8
- * bit 6 of msr C001_0015
- *
- * Errata 63 for SH-B3 steppings
- * Errata 122 for all steppings (F+ have it disabled by default)
- */
- if (c->x86 == 15) {
- rdmsrl(MSR_K7_HWCR, value);
- value |= 1 << 6;
- wrmsrl(MSR_K7_HWCR, value);
- }
-#endif
-
- /*
- * FIXME: We should handle the K5 here. Set up the write
- * range and also turn on MSR 83 bits 4 and 31 (write alloc,
- * no bus pipeline)
- */
-
- /* Bit 31 in normal CPUID used for nonstandard 3DNow ID;
- 3DNow is IDd by bit 31 in extended CPUID (1*32+31) anyway */
- clear_bit(0*32+31, c->x86_capability);
-
- r = get_model_name(c);
-
- switch(c->x86)
- {
- case 4:
- /*
- * General Systems BIOSen alias the cpu frequency registers
- * of the Elan at 0x000df000. Unfortuantly, one of the Linux
- * drivers subsequently pokes it, and changes the CPU speed.
- * Workaround : Remove the unneeded alias.
- */
-#define CBAR (0xfffc) /* Configuration Base Address (32-bit) */
-#define CBAR_ENB (0x80000000)
-#define CBAR_KEY (0X000000CB)
- if (c->x86_model==9 || c->x86_model == 10) {
- if (inl (CBAR) & CBAR_ENB)
- outl (0 | CBAR_KEY, CBAR);
- }
- break;
- case 5:
- if( c->x86_model < 6 )
- {
- /* Based on AMD doc 20734R - June 2000 */
- if ( c->x86_model == 0 ) {
- clear_bit(X86_FEATURE_APIC, c->x86_capability);
- set_bit(X86_FEATURE_PGE, c->x86_capability);
- }
- break;
- }
-
- if ( c->x86_model == 6 && c->x86_mask == 1 ) {
- const int K6_BUG_LOOP = 1000000;
- int n;
- void (*f_vide)(void);
- unsigned long d, d2;
-
- printk(KERN_INFO "AMD K6 stepping B detected - ");
-
- /*
- * It looks like AMD fixed the 2.6.2 bug and improved indirect
- * calls at the same time.
- */
-
- n = K6_BUG_LOOP;
- f_vide = vide;
- rdtscl(d);
- while (n--)
- f_vide();
- rdtscl(d2);
- d = d2-d;
-
- /* Knock these two lines out if it debugs out ok */
- printk(KERN_INFO "AMD K6 stepping B detected - ");
- /* -- cut here -- */
- if (d > 20*K6_BUG_LOOP)
- printk("system stability may be impaired when more than 32 MB are used.\n");
- else
- printk("probably OK (after B9730xxxx).\n");
- printk(KERN_INFO "Please see http://membres.lycos.fr/poulot/k6bug.html\n");
- }
-
- /* K6 with old style WHCR */
- if (c->x86_model < 8 ||
- (c->x86_model== 8 && c->x86_mask < 8)) {
- /* We can only write allocate on the low 508Mb */
- if(mbytes>508)
- mbytes=508;
-
- rdmsr(MSR_K6_WHCR, l, h);
- if ((l&0x0000FFFF)==0) {
- unsigned long flags;
- l=(1<<0)|((mbytes/4)<<1);
- local_irq_save(flags);
- wbinvd();
- wrmsr(MSR_K6_WHCR, l, h);
- local_irq_restore(flags);
- printk(KERN_INFO "Enabling old style K6 write allocation for %d Mb\n",
- mbytes);
- }
- break;
- }
-
- if ((c->x86_model == 8 && c->x86_mask >7) ||
- c->x86_model == 9 || c->x86_model == 13) {
- /* The more serious chips .. */
-
- if(mbytes>4092)
- mbytes=4092;
-
- rdmsr(MSR_K6_WHCR, l, h);
- if ((l&0xFFFF0000)==0) {
- unsigned long flags;
- l=((mbytes>>2)<<22)|(1<<16);
- local_irq_save(flags);
- wbinvd();
- wrmsr(MSR_K6_WHCR, l, h);
- local_irq_restore(flags);
- printk(KERN_INFO "Enabling new style K6 write allocation for %d Mb\n",
- mbytes);
- }
-
- /* Set MTRR capability flag if appropriate */
- if (c->x86_model == 13 || c->x86_model == 9 ||
- (c->x86_model == 8 && c->x86_mask >= 8))
- set_bit(X86_FEATURE_K6_MTRR, c->x86_capability);
- break;
- }
- break;
-
- case 6: /* An Athlon/Duron */
-
- /* Bit 15 of Athlon specific MSR 15, needs to be 0
- * to enable SSE on Palomino/Morgan/Barton CPU's.
- * If the BIOS didn't enable it already, enable it here.
- */
- if (c->x86_model >= 6 && c->x86_model <= 10) {
- if (!cpu_has(c, X86_FEATURE_XMM)) {
- printk(KERN_INFO "Enabling disabled K7/SSE Support.\n");
- rdmsr(MSR_K7_HWCR, l, h);
- l &= ~0x00008000;
- wrmsr(MSR_K7_HWCR, l, h);
- set_bit(X86_FEATURE_XMM, c->x86_capability);
- }
- }
-
- /* It's been determined by AMD that Athlons since model 8 stepping 1
- * are more robust with CLK_CTL set to 200xxxxx instead of 600xxxxx
- * As per AMD technical note 27212 0.2
- */
- if ((c->x86_model == 8 && c->x86_mask>=1) || (c->x86_model > 8)) {
- rdmsr(MSR_K7_CLK_CTL, l, h);
- if ((l & 0xfff00000) != 0x20000000) {
- printk ("CPU: CLK_CTL MSR was %x. Reprogramming to %x\n", l,
- ((l & 0x000fffff)|0x20000000));
- wrmsr(MSR_K7_CLK_CTL, (l & 0x000fffff)|0x20000000, h);
- }
- }
- break;
- }
-
- switch (c->x86) {
- case 15:
- set_bit(X86_FEATURE_K8, c->x86_capability);
- break;
- case 6:
- set_bit(X86_FEATURE_K7, c->x86_capability);
- break;
- }
-
- display_cacheinfo(c);
-
- if (cpuid_eax(0x80000000) >= 0x80000008) {
- c->x86_num_cores = (cpuid_ecx(0x80000008) & 0xff) + 1;
- if (c->x86_num_cores & (c->x86_num_cores - 1))
- c->x86_num_cores = 1;
- }
-
-#ifdef CONFIG_X86_HT
- /*
- * On a AMD dual core setup the lower bits of the APIC id
- * distingush the cores. Assumes number of cores is a power
- * of two.
- */
- if (c->x86_num_cores > 1) {
- int cpu = smp_processor_id();
- unsigned bits = 0;
- while ((1 << bits) < c->x86_num_cores)
- bits++;
- cpu_core_id[cpu] = phys_proc_id[cpu] & ((1<<bits)-1);
- phys_proc_id[cpu] >>= bits;
- printk(KERN_INFO "CPU %d(%d) -> Core %d\n",
- cpu, c->x86_num_cores, cpu_core_id[cpu]);
- }
-#endif
-}
-
-static unsigned int amd_size_cache(struct cpuinfo_x86 * c, unsigned int size)
-{
- /* AMD errata T13 (order #21922) */
- if ((c->x86 == 6)) {
- if (c->x86_model == 3 && c->x86_mask == 0) /* Duron Rev A0 */
- size = 64;
- if (c->x86_model == 4 &&
- (c->x86_mask==0 || c->x86_mask==1)) /* Tbird rev A1/A2 */
- size = 256;
- }
- return size;
-}
-
-static struct cpu_dev amd_cpu_dev __initdata = {
- .c_vendor = "AMD",
- .c_ident = { "AuthenticAMD" },
- .c_models = {
- { .vendor = X86_VENDOR_AMD, .family = 4, .model_names =
- {
- [3] = "486 DX/2",
- [7] = "486 DX/2-WB",
- [8] = "486 DX/4",
- [9] = "486 DX/4-WB",
- [14] = "Am5x86-WT",
- [15] = "Am5x86-WB"
- }
- },
- },
- .c_init = init_amd,
- .c_identify = generic_identify,
- .c_size_cache = amd_size_cache,
-};
-
-int __init amd_init_cpu(void)
-{
- cpu_devs[X86_VENDOR_AMD] = &amd_cpu_dev;
- return 0;
-}
-
-//early_arch_initcall(amd_init_cpu);
diff --git a/arch/i386/kernel/cpu/centaur.c b/arch/i386/kernel/cpu/centaur.c
deleted file mode 100644
index 394814e5767..00000000000
--- a/arch/i386/kernel/cpu/centaur.c
+++ /dev/null
@@ -1,476 +0,0 @@
-#include <linux/kernel.h>
-#include <linux/init.h>
-#include <linux/bitops.h>
-#include <asm/processor.h>
-#include <asm/msr.h>
-#include <asm/e820.h>
-#include "cpu.h"
-
-#ifdef CONFIG_X86_OOSTORE
-
-static u32 __init power2(u32 x)
-{
- u32 s=1;
- while(s<=x)
- s<<=1;
- return s>>=1;
-}
-
-
-/*
- * Set up an actual MCR
- */
-
-static void __init centaur_mcr_insert(int reg, u32 base, u32 size, int key)
-{
- u32 lo, hi;
-
- hi = base & ~0xFFF;
- lo = ~(size-1); /* Size is a power of 2 so this makes a mask */
- lo &= ~0xFFF; /* Remove the ctrl value bits */
- lo |= key; /* Attribute we wish to set */
- wrmsr(reg+MSR_IDT_MCR0, lo, hi);
- mtrr_centaur_report_mcr(reg, lo, hi); /* Tell the mtrr driver */
-}
-
-/*
- * Figure what we can cover with MCR's
- *
- * Shortcut: We know you can't put 4Gig of RAM on a winchip
- */
-
-static u32 __init ramtop(void) /* 16388 */
-{
- int i;
- u32 top = 0;
- u32 clip = 0xFFFFFFFFUL;
-
- for (i = 0; i < e820.nr_map; i++) {
- unsigned long start, end;
-
- if (e820.map[i].addr > 0xFFFFFFFFUL)
- continue;
- /*
- * Don't MCR over reserved space. Ignore the ISA hole
- * we frob around that catastrophy already
- */
-
- if (e820.map[i].type == E820_RESERVED)
- {
- if(e820.map[i].addr >= 0x100000UL && e820.map[i].addr < clip)
- clip = e820.map[i].addr;
- continue;
- }
- start = e820.map[i].addr;
- end = e820.map[i].addr + e820.map[i].size;
- if (start >= end)
- continue;
- if (end > top)
- top = end;
- }
- /* Everything below 'top' should be RAM except for the ISA hole.
- Because of the limited MCR's we want to map NV/ACPI into our
- MCR range for gunk in RAM
-
- Clip might cause us to MCR insufficient RAM but that is an
- acceptable failure mode and should only bite obscure boxes with
- a VESA hole at 15Mb
-
- The second case Clip sometimes kicks in is when the EBDA is marked
- as reserved. Again we fail safe with reasonable results
- */
-
- if(top>clip)
- top=clip;
-
- return top;
-}
-
-/*
- * Compute a set of MCR's to give maximum coverage
- */
-
-static int __init centaur_mcr_compute(int nr, int key)
-{
- u32 mem = ramtop();
- u32 root = power2(mem);
- u32 base = root;
- u32 top = root;
- u32 floor = 0;
- int ct = 0;
-
- while(ct<nr)
- {
- u32 fspace = 0;
-
- /*
- * Find the largest block we will fill going upwards
- */
-
- u32 high = power2(mem-top);
-
- /*
- * Find the largest block we will fill going downwards
- */
-
- u32 low = base/2;
-
- /*
- * Don't fill below 1Mb going downwards as there
- * is an ISA hole in the way.
- */
-
- if(base <= 1024*1024)
- low = 0;
-
- /*
- * See how much space we could cover by filling below
- * the ISA hole
- */
-
- if(floor == 0)
- fspace = 512*1024;
- else if(floor ==512*1024)
- fspace = 128*1024;
-
- /* And forget ROM space */
-
- /*
- * Now install the largest coverage we get
- */
-
- if(fspace > high && fspace > low)
- {
- centaur_mcr_insert(ct, floor, fspace, key);
- floor += fspace;
- }
- else if(high > low)
- {
- centaur_mcr_insert(ct, top, high, key);
- top += high;
- }
- else if(low > 0)
- {
- base -= low;
- centaur_mcr_insert(ct, base, low, key);
- }
- else break;
- ct++;
- }
- /*
- * We loaded ct values. We now need to set the mask. The caller
- * must do this bit.
- */
-
- return ct;
-}
-
-static void __init centaur_create_optimal_mcr(void)
-{
- int i;
- /*
- * Allocate up to 6 mcrs to mark as much of ram as possible
- * as write combining and weak write ordered.
- *
- * To experiment with: Linux never uses stack operations for
- * mmio spaces so we could globally enable stack operation wc
- *
- * Load the registers with type 31 - full write combining, all
- * writes weakly ordered.
- */
- int used = centaur_mcr_compute(6, 31);
-
- /*
- * Wipe unused MCRs
- */
-
- for(i=used;i<8;i++)
- wrmsr(MSR_IDT_MCR0+i, 0, 0);
-}
-
-static void __init winchip2_create_optimal_mcr(void)
-{
- u32 lo, hi;
- int i;
-
- /*
- * Allocate up to 6 mcrs to mark as much of ram as possible
- * as write combining, weak store ordered.
- *
- * Load the registers with type 25
- * 8 - weak write ordering
- * 16 - weak read ordering
- * 1 - write combining
- */
-
- int used = centaur_mcr_compute(6, 25);
-
- /*
- * Mark the registers we are using.
- */
-
- rdmsr(MSR_IDT_MCR_CTRL, lo, hi);
- for(i=0;i<used;i++)
- lo|=1<<(9+i);
- wrmsr(MSR_IDT_MCR_CTRL, lo, hi);
-
- /*
- * Wipe unused MCRs
- */
-
- for(i=used;i<8;i++)
- wrmsr(MSR_IDT_MCR0+i, 0, 0);
-}
-
-/*
- * Handle the MCR key on the Winchip 2.
- */
-
-static void __init winchip2_unprotect_mcr(void)
-{
- u32 lo, hi;
- u32 key;
-
- rdmsr(MSR_IDT_MCR_CTRL, lo, hi);
- lo&=~0x1C0; /* blank bits 8-6 */
- key = (lo>>17) & 7;
- lo |= key<<6; /* replace with unlock key */
- wrmsr(MSR_IDT_MCR_CTRL, lo, hi);
-}
-
-static void __init winchip2_protect_mcr(void)
-{
- u32 lo, hi;
-
- rdmsr(MSR_IDT_MCR_CTRL, lo, hi);
- lo&=~0x1C0; /* blank bits 8-6 */
- wrmsr(MSR_IDT_MCR_CTRL, lo, hi);
-}
-#endif /* CONFIG_X86_OOSTORE */
-
-#define ACE_PRESENT (1 << 6)
-#define ACE_ENABLED (1 << 7)
-#define ACE_FCR (1 << 28) /* MSR_VIA_FCR */
-
-#define RNG_PRESENT (1 << 2)
-#define RNG_ENABLED (1 << 3)
-#define RNG_ENABLE (1 << 6) /* MSR_VIA_RNG */
-
-static void __init init_c3(struct cpuinfo_x86 *c)
-{
- u32 lo, hi;
-
- /* Test for Centaur Extended Feature Flags presence */
- if (cpuid_eax(0xC0000000) >= 0xC0000001) {
- u32 tmp = cpuid_edx(0xC0000001);
-
- /* enable ACE unit, if present and disabled */
- if ((tmp & (ACE_PRESENT | ACE_ENABLED)) == ACE_PRESENT) {
- rdmsr (MSR_VIA_FCR, lo, hi);
- lo |= ACE_FCR; /* enable ACE unit */
- wrmsr (MSR_VIA_FCR, lo, hi);
- printk(KERN_INFO "CPU: Enabled ACE h/w crypto\n");
- }
-
- /* enable RNG unit, if present and disabled */
- if ((tmp & (RNG_PRESENT | RNG_ENABLED)) == RNG_PRESENT) {
- rdmsr (MSR_VIA_RNG, lo, hi);
- lo |= RNG_ENABLE; /* enable RNG unit */
- wrmsr (MSR_VIA_RNG, lo, hi);
- printk(KERN_INFO "CPU: Enabled h/w RNG\n");
- }
-
- /* store Centaur Extended Feature Flags as
- * word 5 of the CPU capability bit array
- */
- c->x86_capability[5] = cpuid_edx(0xC0000001);
- }
-
- /* Cyrix III family needs CX8 & PGE explicity enabled. */
- if (c->x86_model >=6 && c->x86_model <= 9) {
- rdmsr (MSR_VIA_FCR, lo, hi);
- lo |= (1<<1 | 1<<7);
- wrmsr (MSR_VIA_FCR, lo, hi);
- set_bit(X86_FEATURE_CX8, c->x86_capability);
- }
-
- /* Before Nehemiah, the C3's had 3dNOW! */
- if (c->x86_model >=6 && c->x86_model <9)
- set_bit(X86_FEATURE_3DNOW, c->x86_capability);
-
- get_model_name(c);
- display_cacheinfo(c);
-}
-
-static void __init init_centaur(struct cpuinfo_x86 *c)
-{
- enum {
- ECX8=1<<1,
- EIERRINT=1<<2,
- DPM=1<<3,
- DMCE=1<<4,
- DSTPCLK=1<<5,
- ELINEAR=1<<6,
- DSMC=1<<7,
- DTLOCK=1<<8,
- EDCTLB=1<<8,
- EMMX=1<<9,
- DPDC=1<<11,
- EBRPRED=1<<12,
- DIC=1<<13,
- DDC=1<<14,
- DNA=1<<15,
- ERETSTK=1<<16,
- E2MMX=1<<19,
- EAMD3D=1<<20,
- };
-
- char *name;
- u32 fcr_set=0;
- u32 fcr_clr=0;
- u32 lo,hi,newlo;
- u32 aa,bb,cc,dd;
-
- /* Bit 31 in normal CPUID used for nonstandard 3DNow ID;
- 3DNow is IDd by bit 31 in extended CPUID (1*32+31) anyway */
- clear_bit(0*32+31, c->x86_capability);
-
- switch (c->x86) {
-
- case 5:
- switch(c->x86_model) {
- case 4:
- name="C6";
- fcr_set=ECX8|DSMC|EDCTLB|EMMX|ERETSTK;
- fcr_clr=DPDC;
- printk(KERN_NOTICE "Disabling bugged TSC.\n");
- clear_bit(X86_FEATURE_TSC, c->x86_capability);
-#ifdef CONFIG_X86_OOSTORE
- centaur_create_optimal_mcr();
- /* Enable
- write combining on non-stack, non-string
- write combining on string, all types
- weak write ordering
-
- The C6 original lacks weak read order
-
- Note 0x120 is write only on Winchip 1 */
-
- wrmsr(MSR_IDT_MCR_CTRL, 0x01F0001F, 0);
-#endif
- break;
- case 8:
- switch(c->x86_mask) {
- default:
- name="2";
- break;
- case 7 ... 9:
- name="2A";
- break;
- case 10 ... 15:
- name="2B";
- break;
- }
- fcr_set=ECX8|DSMC|DTLOCK|EMMX|EBRPRED|ERETSTK|E2MMX|EAMD3D;
- fcr_clr=DPDC;
-#ifdef CONFIG_X86_OOSTORE
- winchip2_unprotect_mcr();
- winchip2_create_optimal_mcr();
- rdmsr(MSR_IDT_MCR_CTRL, lo, hi);
- /* Enable
- write combining on non-stack, non-string
- write combining on string, all types
- weak write ordering
- */
- lo|=31;
- wrmsr(MSR_IDT_MCR_CTRL, lo, hi);
- winchip2_protect_mcr();
-#endif
- break;
- case 9:
- name="3";
- fcr_set=ECX8|DSMC|DTLOCK|EMMX|EBRPRED|ERETSTK|E2MMX|EAMD3D;
- fcr_clr=DPDC;
-#ifdef CONFIG_X86_OOSTORE
- winchip2_unprotect_mcr();
- winchip2_create_optimal_mcr();
- rdmsr(MSR_IDT_MCR_CTRL, lo, hi);
- /* Enable
- write combining on non-stack, non-string
- write combining on string, all types
- weak write ordering
- */
- lo|=31;
- wrmsr(MSR_IDT_MCR_CTRL, lo, hi);
- winchip2_protect_mcr();
-#endif
- break;
- case 10:
- name="4";
- /* no info on the WC4 yet */
- break;
- default:
- name="??";
- }
-
- rdmsr(MSR_IDT_FCR1, lo, hi);
- newlo=(lo|fcr_set) & (~fcr_clr);
-
- if (newlo!=lo) {
- printk(KERN_INFO "Centaur FCR was 0x%X now 0x%X\n", lo, newlo );
- wrmsr(MSR_IDT_FCR1, newlo, hi );
- } else {
- printk(KERN_INFO "Centaur FCR is 0x%X\n",lo);
- }
- /* Emulate MTRRs using Centaur's MCR. */
- set_bit(X86_FEATURE_CENTAUR_MCR, c->x86_capability);
- /* Report CX8 */
- set_bit(X86_FEATURE_CX8, c->x86_capability);
- /* Set 3DNow! on Winchip 2 and above. */
- if (c->x86_model >=8)
- set_bit(X86_FEATURE_3DNOW, c->x86_capability);
- /* See if we can find out some more. */
- if ( cpuid_eax(0x80000000) >= 0x80000005 ) {
- /* Yes, we can. */
- cpuid(0x80000005,&aa,&bb,&cc,&dd);
- /* Add L1 data and code cache sizes. */
- c->x86_cache_size = (cc>>24)+(dd>>24);
- }
- sprintf( c->x86_model_id, "WinChip %s", name );
- break;
-
- case 6:
- init_c3(c);
- break;
- }
-}
-
-static unsigned int centaur_size_cache(struct cpuinfo_x86 * c, unsigned int size)
-{
- /* VIA C3 CPUs (670-68F) need further shifting. */
- if ((c->x86 == 6) && ((c->x86_model == 7) || (c->x86_model == 8)))
- size >>= 8;
-
- /* VIA also screwed up Nehemiah stepping 1, and made
- it return '65KB' instead of '64KB'
- - Note, it seems this may only be in engineering samples. */
- if ((c->x86==6) && (c->x86_model==9) && (c->x86_mask==1) && (size==65))
- size -=1;
-
- return size;
-}
-
-static struct cpu_dev centaur_cpu_dev __initdata = {
- .c_vendor = "Centaur",
- .c_ident = { "CentaurHauls" },
- .c_init = init_centaur,
- .c_size_cache = centaur_size_cache,
-};
-
-int __init centaur_init_cpu(void)
-{
- cpu_devs[X86_VENDOR_CENTAUR] = &centaur_cpu_dev;
- return 0;
-}
-
-//early_arch_initcall(centaur_init_cpu);
diff --git a/arch/i386/kernel/cpu/changelog b/arch/i386/kernel/cpu/changelog
deleted file mode 100644
index cef76b80a71..00000000000
--- a/arch/i386/kernel/cpu/changelog
+++ /dev/null
@@ -1,63 +0,0 @@
-/*
- * Enhanced CPU type detection by Mike Jagdis, Patrick St. Jean
- * and Martin Mares, November 1997.
- *
- * Force Cyrix 6x86(MX) and M II processors to report MTRR capability
- * and Cyrix "coma bug" recognition by
- * Zoltán Böszörményi <zboszor@mail.externet.hu> February 1999.
- *
- * Force Centaur C6 processors to report MTRR capability.
- * Bart Hartgers <bart@etpmod.phys.tue.nl>, May 1999.
- *
- * Intel Mobile Pentium II detection fix. Sean Gilley, June 1999.
- *
- * IDT Winchip tweaks, misc clean ups.
- * Dave Jones <davej@suse.de>, August 1999
- *
- * Better detection of Centaur/IDT WinChip models.
- * Bart Hartgers <bart@etpmod.phys.tue.nl>, August 1999.
- *
- * Cleaned up cache-detection code
- * Dave Jones <davej@suse.de>, October 1999
- *
- * Added proper L2 cache detection for Coppermine
- * Dragan Stancevic <visitor@valinux.com>, October 1999
- *
- * Added the original array for capability flags but forgot to credit
- * myself :) (~1998) Fixed/cleaned up some cpu_model_info and other stuff
- * Jauder Ho <jauderho@carumba.com>, January 2000
- *
- * Detection for Celeron coppermine, identify_cpu() overhauled,
- * and a few other clean ups.
- * Dave Jones <davej@suse.de>, April 2000
- *
- * Pentium III FXSR, SSE support
- * General FPU state handling cleanups
- * Gareth Hughes <gareth@valinux.com>, May 2000
- *
- * Added proper Cascades CPU and L2 cache detection for Cascades
- * and 8-way type cache happy bunch from Intel:^)
- * Dragan Stancevic <visitor@valinux.com>, May 2000
- *
- * Forward port AMD Duron errata T13 from 2.2.17pre
- * Dave Jones <davej@suse.de>, August 2000
- *
- * Forward port lots of fixes/improvements from 2.2.18pre
- * Cyrix III, Pentium IV support.
- * Dave Jones <davej@suse.de>, October 2000
- *
- * Massive cleanup of CPU detection and bug handling;
- * Transmeta CPU detection,
- * H. Peter Anvin <hpa@zytor.com>, November 2000
- *
- * VIA C3 Support.
- * Dave Jones <davej@suse.de>, March 2001
- *
- * AMD Athlon/Duron/Thunderbird bluesmoke support.
- * Dave Jones <davej@suse.de>, April 2001.
- *
- * CacheSize bug workaround updates for AMD, Intel & VIA Cyrix.
- * Dave Jones <davej@suse.de>, September, October 2001.
- *
- */
-
diff --git a/arch/i386/kernel/cpu/common.c b/arch/i386/kernel/cpu/common.c
deleted file mode 100644
index c145fb30002..00000000000
--- a/arch/i386/kernel/cpu/common.c
+++ /dev/null
@@ -1,660 +0,0 @@
-#include <linux/init.h>
-#include <linux/string.h>
-#include <linux/delay.h>
-#include <linux/smp.h>
-#include <linux/module.h>
-#include <linux/percpu.h>
-#include <asm/semaphore.h>
-#include <asm/processor.h>
-#include <asm/i387.h>
-#include <asm/msr.h>
-#include <asm/io.h>
-#include <asm/mmu_context.h>
-#ifdef CONFIG_X86_LOCAL_APIC
-#include <asm/mpspec.h>
-#include <asm/apic.h>
-#include <mach_apic.h>
-#endif
-
-#include "cpu.h"
-
-DEFINE_PER_CPU(struct desc_struct, cpu_gdt_table[GDT_ENTRIES]);
-EXPORT_PER_CPU_SYMBOL(cpu_gdt_table);
-
-DEFINE_PER_CPU(unsigned char, cpu_16bit_stack[CPU_16BIT_STACK_SIZE]);
-EXPORT_PER_CPU_SYMBOL(cpu_16bit_stack);
-
-static int cachesize_override __devinitdata = -1;
-static int disable_x86_fxsr __devinitdata = 0;
-static int disable_x86_serial_nr __devinitdata = 1;
-
-struct cpu_dev * cpu_devs[X86_VENDOR_NUM] = {};
-
-extern int disable_pse;
-
-static void default_init(struct cpuinfo_x86 * c)
-{
- /* Not much we can do here... */
- /* Check if at least it has cpuid */
- if (c->cpuid_level == -1) {
- /* No cpuid. It must be an ancient CPU */
- if (c->x86 == 4)
- strcpy(c->x86_model_id, "486");
- else if (c->x86 == 3)
- strcpy(c->x86_model_id, "386");
- }
-}
-
-static struct cpu_dev default_cpu = {
- .c_init = default_init,
-};
-static struct cpu_dev * this_cpu = &default_cpu;
-
-static int __init cachesize_setup(char *str)
-{
- get_option (&str, &cachesize_override);
- return 1;
-}
-__setup("cachesize=", cachesize_setup);
-
-int __devinit get_model_name(struct cpuinfo_x86 *c)
-{
- unsigned int *v;
- char *p, *q;
-
- if (cpuid_eax(0x80000000) < 0x80000004)
- return 0;
-
- v = (unsigned int *) c->x86_model_id;
- cpuid(0x80000002, &v[0], &v[1], &v[2], &v[3]);
- cpuid(0x80000003, &v[4], &v[5], &v[6], &v[7]);
- cpuid(0x80000004, &v[8], &v[9], &v[10], &v[11]);
- c->x86_model_id[48] = 0;
-
- /* Intel chips right-justify this string for some dumb reason;
- undo that brain damage */
- p = q = &c->x86_model_id[0];
- while ( *p == ' ' )
- p++;
- if ( p != q ) {
- while ( *p )
- *q++ = *p++;
- while ( q <= &c->x86_model_id[48] )
- *q++ = '\0'; /* Zero-pad the rest */
- }
-
- return 1;
-}
-
-
-void __devinit display_cacheinfo(struct cpuinfo_x86 *c)
-{
- unsigned int n, dummy, ecx, edx, l2size;
-
- n = cpuid_eax(0x80000000);
-
- if (n >= 0x80000005) {
- cpuid(0x80000005, &dummy, &dummy, &ecx, &edx);
- printk(KERN_INFO "CPU: L1 I Cache: %dK (%d bytes/line), D cache %dK (%d bytes/line)\n",
- edx>>24, edx&0xFF, ecx>>24, ecx&0xFF);
- c->x86_cache_size=(ecx>>24)+(edx>>24);
- }
-
- if (n < 0x80000006) /* Some chips just has a large L1. */
- return;
-
- ecx = cpuid_ecx(0x80000006);
- l2size = ecx >> 16;
-
- /* do processor-specific cache resizing */
- if (this_cpu->c_size_cache)
- l2size = this_cpu->c_size_cache(c,l2size);
-
- /* Allow user to override all this if necessary. */
- if (cachesize_override != -1)
- l2size = cachesize_override;
-
- if ( l2size == 0 )
- return; /* Again, no L2 cache is possible */
-
- c->x86_cache_size = l2size;
-
- printk(KERN_INFO "CPU: L2 Cache: %dK (%d bytes/line)\n",
- l2size, ecx & 0xFF);
-}
-
-/* Naming convention should be: <Name> [(<Codename>)] */
-/* This table only is used unless init_<vendor>() below doesn't set it; */
-/* in particular, if CPUID levels 0x80000002..4 are supported, this isn't used */
-
-/* Look up CPU names by table lookup. */
-static char __devinit *table_lookup_model(struct cpuinfo_x86 *c)
-{
- struct cpu_model_info *info;
-
- if ( c->x86_model >= 16 )
- return NULL; /* Range check */
-
- if (!this_cpu)
- return NULL;
-
- info = this_cpu->c_models;
-
- while (info && info->family) {
- if (info->family == c->x86)
- return info->model_names[c->x86_model];
- info++;
- }
- return NULL; /* Not found */
-}
-
-
-static void __devinit get_cpu_vendor(struct cpuinfo_x86 *c, int early)
-{
- char *v = c->x86_vendor_id;
- int i;
-
- for (i = 0; i < X86_VENDOR_NUM; i++) {
- if (cpu_devs[i]) {
- if (!strcmp(v,cpu_devs[i]->c_ident[0]) ||
- (cpu_devs[i]->c_ident[1] &&
- !strcmp(v,cpu_devs[i]->c_ident[1]))) {
- c->x86_vendor = i;
- if (!early)
- this_cpu = cpu_devs[i];
- break;
- }
- }
- }
-}
-
-
-static int __init x86_fxsr_setup(char * s)
-{
- disable_x86_fxsr = 1;
- return 1;
-}
-__setup("nofxsr", x86_fxsr_setup);
-
-
-/* Standard macro to see if a specific flag is changeable */
-static inline int flag_is_changeable_p(u32 flag)
-{
- u32 f1, f2;
-
- asm("pushfl\n\t"
- "pushfl\n\t"
- "popl %0\n\t"
- "movl %0,%1\n\t"
- "xorl %2,%0\n\t"
- "pushl %0\n\t"
- "popfl\n\t"
- "pushfl\n\t"
- "popl %0\n\t"
- "popfl\n\t"
- : "=&r" (f1), "=&r" (f2)
- : "ir" (flag));
-
- return ((f1^f2) & flag) != 0;
-}
-
-
-/* Probe for the CPUID instruction */
-static int __devinit have_cpuid_p(void)
-{
- return flag_is_changeable_p(X86_EFLAGS_ID);
-}
-
-/* Do minimum CPU detection early.
- Fields really needed: vendor, cpuid_level, family, model, mask, cache alignment.
- The others are not touched to avoid unwanted side effects. */
-static void __init early_cpu_detect(void)
-{
- struct cpuinfo_x86 *c = &boot_cpu_data;
-
- c->x86_cache_alignment = 32;
-
- if (!have_cpuid_p())
- return;
-
- /* Get vendor name */
- cpuid(0x00000000, &c->cpuid_level,
- (int *)&c->x86_vendor_id[0],
- (int *)&c->x86_vendor_id[8],
- (int *)&c->x86_vendor_id[4]);
-
- get_cpu_vendor(c, 1);
-
- c->x86 = 4;
- if (c->cpuid_level >= 0x00000001) {
- u32 junk, tfms, cap0, misc;
- cpuid(0x00000001, &tfms, &misc, &junk, &cap0);
- c->x86 = (tfms >> 8) & 15;
- c->x86_model = (tfms >> 4) & 15;
- if (c->x86 == 0xf) {
- c->x86 += (tfms >> 20) & 0xff;
- c->x86_model += ((tfms >> 16) & 0xF) << 4;
- }
- c->x86_mask = tfms & 15;
- if (cap0 & (1<<19))
- c->x86_cache_alignment = ((misc >> 8) & 0xff) * 8;
- }
-
- early_intel_workaround(c);
-
-#ifdef CONFIG_X86_HT
- phys_proc_id[smp_processor_id()] = (cpuid_ebx(1) >> 24) & 0xff;
-#endif
-}
-
-void __devinit generic_identify(struct cpuinfo_x86 * c)
-{
- u32 tfms, xlvl;
- int junk;
-
- if (have_cpuid_p()) {
- /* Get vendor name */
- cpuid(0x00000000, &c->cpuid_level,
- (int *)&c->x86_vendor_id[0],
- (int *)&c->x86_vendor_id[8],
- (int *)&c->x86_vendor_id[4]);
-
- get_cpu_vendor(c, 0);
- /* Initialize the standard set of capabilities */
- /* Note that the vendor-specific code below might override */
-
- /* Intel-defined flags: level 0x00000001 */
- if ( c->cpuid_level >= 0x00000001 ) {
- u32 capability, excap;
- cpuid(0x00000001, &tfms, &junk, &excap, &capability);
- c->x86_capability[0] = capability;
- c->x86_capability[4] = excap;
- c->x86 = (tfms >> 8) & 15;
- c->x86_model = (tfms >> 4) & 15;
- if (c->x86 == 0xf) {
- c->x86 += (tfms >> 20) & 0xff;
- c->x86_model += ((tfms >> 16) & 0xF) << 4;
- }
- c->x86_mask = tfms & 15;
- } else {
- /* Have CPUID level 0 only - unheard of */
- c->x86 = 4;
- }
-
- /* AMD-defined flags: level 0x80000001 */
- xlvl = cpuid_eax(0x80000000);
- if ( (xlvl & 0xffff0000) == 0x80000000 ) {
- if ( xlvl >= 0x80000001 ) {
- c->x86_capability[1] = cpuid_edx(0x80000001);
- c->x86_capability[6] = cpuid_ecx(0x80000001);
- }
- if ( xlvl >= 0x80000004 )
- get_model_name(c); /* Default name */
- }
- }
-}
-
-static void __devinit squash_the_stupid_serial_number(struct cpuinfo_x86 *c)
-{
- if (cpu_has(c, X86_FEATURE_PN) && disable_x86_serial_nr ) {
- /* Disable processor serial number */
- unsigned long lo,hi;
- rdmsr(MSR_IA32_BBL_CR_CTL,lo,hi);
- lo |= 0x200000;
- wrmsr(MSR_IA32_BBL_CR_CTL,lo,hi);
- printk(KERN_NOTICE "CPU serial number disabled.\n");
- clear_bit(X86_FEATURE_PN, c->x86_capability);
-
- /* Disabling the serial number may affect the cpuid level */
- c->cpuid_level = cpuid_eax(0);
- }
-}
-
-static int __init x86_serial_nr_setup(char *s)
-{
- disable_x86_serial_nr = 0;
- return 1;
-}
-__setup("serialnumber", x86_serial_nr_setup);
-
-
-
-/*
- * This does the hard work of actually picking apart the CPU stuff...
- */
-void __devinit identify_cpu(struct cpuinfo_x86 *c)
-{
- int i;
-
- c->loops_per_jiffy = loops_per_jiffy;
- c->x86_cache_size = -1;
- c->x86_vendor = X86_VENDOR_UNKNOWN;
- c->cpuid_level = -1; /* CPUID not detected */
- c->x86_model = c->x86_mask = 0; /* So far unknown... */
- c->x86_vendor_id[0] = '\0'; /* Unset */
- c->x86_model_id[0] = '\0'; /* Unset */
- c->x86_num_cores = 1;
- memset(&c->x86_capability, 0, sizeof c->x86_capability);
-
- if (!have_cpuid_p()) {
- /* First of all, decide if this is a 486 or higher */
- /* It's a 486 if we can modify the AC flag */
- if ( flag_is_changeable_p(X86_EFLAGS_AC) )
- c->x86 = 4;
- else
- c->x86 = 3;
- }
-
- generic_identify(c);
-
- printk(KERN_DEBUG "CPU: After generic identify, caps:");
- for (i = 0; i < NCAPINTS; i++)
- printk(" %08lx", c->x86_capability[i]);
- printk("\n");
-
- if (this_cpu->c_identify) {
- this_cpu->c_identify(c);
-
- printk(KERN_DEBUG "CPU: After vendor identify, caps:");
- for (i = 0; i < NCAPINTS; i++)
- printk(" %08lx", c->x86_capability[i]);
- printk("\n");
- }
-
- /*
- * Vendor-specific initialization. In this section we
- * canonicalize the feature flags, meaning if there are
- * features a certain CPU supports which CPUID doesn't
- * tell us, CPUID claiming incorrect flags, or other bugs,
- * we handle them here.
- *
- * At the end of this section, c->x86_capability better
- * indicate the features this CPU genuinely supports!
- */
- if (this_cpu->c_init)
- this_cpu->c_init(c);
-
- /* Disable the PN if appropriate */
- squash_the_stupid_serial_number(c);
-
- /*
- * The vendor-specific functions might have changed features. Now
- * we do "generic changes."
- */
-
- /* TSC disabled? */
- if ( tsc_disable )
- clear_bit(X86_FEATURE_TSC, c->x86_capability);
-
- /* FXSR disabled? */
- if (disable_x86_fxsr) {
- clear_bit(X86_FEATURE_FXSR, c->x86_capability);
- clear_bit(X86_FEATURE_XMM, c->x86_capability);
- }
-
- if (disable_pse)
- clear_bit(X86_FEATURE_PSE, c->x86_capability);
-
- /* If the model name is still unset, do table lookup. */
- if ( !c->x86_model_id[0] ) {
- char *p;
- p = table_lookup_model(c);
- if ( p )
- strcpy(c->x86_model_id, p);
- else
- /* Last resort... */
- sprintf(c->x86_model_id, "%02x/%02x",
- c->x86_vendor, c->x86_model);
- }
-
- /* Now the feature flags better reflect actual CPU features! */
-
- printk(KERN_DEBUG "CPU: After all inits, caps:");
- for (i = 0; i < NCAPINTS; i++)
- printk(" %08lx", c->x86_capability[i]);
- printk("\n");
-
- /*
- * On SMP, boot_cpu_data holds the common feature set between
- * all CPUs; so make sure that we indicate which features are
- * common between the CPUs. The first time this routine gets
- * executed, c == &boot_cpu_data.
- */
- if ( c != &boot_cpu_data ) {
- /* AND the already accumulated flags with these */
- for ( i = 0 ; i < NCAPINTS ; i++ )
- boot_cpu_data.x86_capability[i] &= c->x86_capability[i];
- }
-
- /* Init Machine Check Exception if available. */
- mcheck_init(c);
-
- if (c == &boot_cpu_data)
- sysenter_setup();
- enable_sep_cpu();
-
- if (c == &boot_cpu_data)
- mtrr_bp_init();
- else
- mtrr_ap_init();
-}
-
-#ifdef CONFIG_X86_HT
-void __devinit detect_ht(struct cpuinfo_x86 *c)
-{
- u32 eax, ebx, ecx, edx;
- int index_msb, tmp;
- int cpu = smp_processor_id();
-
- if (!cpu_has(c, X86_FEATURE_HT) || cpu_has(c, X86_FEATURE_CMP_LEGACY))
- return;
-
- cpuid(1, &eax, &ebx, &ecx, &edx);
- smp_num_siblings = (ebx & 0xff0000) >> 16;
-
- if (smp_num_siblings == 1) {
- printk(KERN_INFO "CPU: Hyper-Threading is disabled\n");
- } else if (smp_num_siblings > 1 ) {
- index_msb = 31;
-
- if (smp_num_siblings > NR_CPUS) {
- printk(KERN_WARNING "CPU: Unsupported number of the siblings %d", smp_num_siblings);
- smp_num_siblings = 1;
- return;
- }
- tmp = smp_num_siblings;
- while ((tmp & 0x80000000 ) == 0) {
- tmp <<=1 ;
- index_msb--;
- }
- if (smp_num_siblings & (smp_num_siblings - 1))
- index_msb++;
- phys_proc_id[cpu] = phys_pkg_id((ebx >> 24) & 0xFF, index_msb);
-
- printk(KERN_INFO "CPU: Physical Processor ID: %d\n",
- phys_proc_id[cpu]);
-
- smp_num_siblings = smp_num_siblings / c->x86_num_cores;
-
- tmp = smp_num_siblings;
- index_msb = 31;
- while ((tmp & 0x80000000) == 0) {
- tmp <<=1 ;
- index_msb--;
- }
-
- if (smp_num_siblings & (smp_num_siblings - 1))
- index_msb++;
-
- cpu_core_id[cpu] = phys_pkg_id((ebx >> 24) & 0xFF, index_msb);
-
- if (c->x86_num_cores > 1)
- printk(KERN_INFO "CPU: Processor Core ID: %d\n",
- cpu_core_id[cpu]);
- }
-}
-#endif
-
-void __devinit print_cpu_info(struct cpuinfo_x86 *c)
-{
- char *vendor = NULL;
-
- if (c->x86_vendor < X86_VENDOR_NUM)
- vendor = this_cpu->c_vendor;
- else if (c->cpuid_level >= 0)
- vendor = c->x86_vendor_id;
-
- if (vendor && strncmp(c->x86_model_id, vendor, strlen(vendor)))
- printk("%s ", vendor);
-
- if (!c->x86_model_id[0])
- printk("%d86", c->x86);
- else
- printk("%s", c->x86_model_id);
-
- if (c->x86_mask || c->cpuid_level >= 0)
- printk(" stepping %02x\n", c->x86_mask);
- else
- printk("\n");
-}
-
-cpumask_t cpu_initialized __devinitdata = CPU_MASK_NONE;
-
-/* This is hacky. :)
- * We're emulating future behavior.
- * In the future, the cpu-specific init functions will be called implicitly
- * via the magic of initcalls.
- * They will insert themselves into the cpu_devs structure.
- * Then, when cpu_init() is called, we can just iterate over that array.
- */
-
-extern int intel_cpu_init(void);
-extern int cyrix_init_cpu(void);
-extern int nsc_init_cpu(void);
-extern int amd_init_cpu(void);
-extern int centaur_init_cpu(void);
-extern int transmeta_init_cpu(void);
-extern int rise_init_cpu(void);
-extern int nexgen_init_cpu(void);
-extern int umc_init_cpu(void);
-
-void __init early_cpu_init(void)
-{
- intel_cpu_init();
- cyrix_init_cpu();
- nsc_init_cpu();
- amd_init_cpu();
- centaur_init_cpu();
- transmeta_init_cpu();
- rise_init_cpu();
- nexgen_init_cpu();
- umc_init_cpu();
- early_cpu_detect();
-
-#ifdef CONFIG_DEBUG_PAGEALLOC
- /* pse is not compatible with on-the-fly unmapping,
- * disable it even if the cpus claim to support it.
- */
- clear_bit(X86_FEATURE_PSE, boot_cpu_data.x86_capability);
- disable_pse = 1;
-#endif
-}
-/*
- * cpu_init() initializes state that is per-CPU. Some data is already
- * initialized (naturally) in the bootstrap process, such as the GDT
- * and IDT. We reload them nevertheless, this function acts as a
- * 'CPU state barrier', nothing should get across.
- */
-void __devinit cpu_init(void)
-{
- int cpu = smp_processor_id();
- struct tss_struct * t = &per_cpu(init_tss, cpu);
- struct thread_struct *thread = &current->thread;
- struct desc_struct *gdt = get_cpu_gdt_table(cpu);
- __u32 stk16_off = (__u32)&per_cpu(cpu_16bit_stack, cpu);
-
- if (cpu_test_and_set(cpu, cpu_initialized)) {
- printk(KERN_WARNING "CPU#%d already initialized!\n", cpu);
- for (;;) local_irq_enable();
- }
- printk(KERN_INFO "Initializing CPU#%d\n", cpu);
-
- if (cpu_has_vme || cpu_has_tsc || cpu_has_de)
- clear_in_cr4(X86_CR4_VME|X86_CR4_PVI|X86_CR4_TSD|X86_CR4_DE);
- if (tsc_disable && cpu_has_tsc) {
- printk(KERN_NOTICE "Disabling TSC...\n");
- /**** FIX-HPA: DOES THIS REALLY BELONG HERE? ****/
- clear_bit(X86_FEATURE_TSC, boot_cpu_data.x86_capability);
- set_in_cr4(X86_CR4_TSD);
- }
-
- /*
- * Initialize the per-CPU GDT with the boot GDT,
- * and set up the GDT descriptor:
- */
- memcpy(gdt, cpu_gdt_table, GDT_SIZE);
-
- /* Set up GDT entry for 16bit stack */
- *(__u64 *)(&gdt[GDT_ENTRY_ESPFIX_SS]) |=
- ((((__u64)stk16_off) << 16) & 0x000000ffffff0000ULL) |
- ((((__u64)stk16_off) << 32) & 0xff00000000000000ULL) |
- (CPU_16BIT_STACK_SIZE - 1);
-
- cpu_gdt_descr[cpu].size = GDT_SIZE - 1;
- cpu_gdt_descr[cpu].address = (unsigned long)gdt;
-
- load_gdt(&cpu_gdt_descr[cpu]);
- load_idt(&idt_descr);
-
- /*
- * Delete NT
- */
- __asm__("pushfl ; andl $0xffffbfff,(%esp) ; popfl");
-
- /*
- * Set up and load the per-CPU TSS and LDT
- */
- atomic_inc(&init_mm.mm_count);
- current->active_mm = &init_mm;
- if (current->mm)
- BUG();
- enter_lazy_tlb(&init_mm, current);
-
- load_esp0(t, thread);
- set_tss_desc(cpu,t);
- load_TR_desc();
- load_LDT(&init_mm.context);
-
- /* Set up doublefault TSS pointer in the GDT */
- __set_tss_desc(cpu, GDT_ENTRY_DOUBLEFAULT_TSS, &doublefault_tss);
-
- /* Clear %fs and %gs. */
- asm volatile ("xorl %eax, %eax; movl %eax, %fs; movl %eax, %gs");
-
- /* Clear all 6 debug registers: */
- set_debugreg(0, 0);
- set_debugreg(0, 1);
- set_debugreg(0, 2);
- set_debugreg(0, 3);
- set_debugreg(0, 6);
- set_debugreg(0, 7);
-
- /*
- * Force FPU initialization:
- */
- current_thread_info()->status = 0;
- clear_used_math();
- mxcsr_feature_mask_init();
-}
-
-#ifdef CONFIG_HOTPLUG_CPU
-void __devinit cpu_uninit(void)
-{
- int cpu = raw_smp_processor_id();
- cpu_clear(cpu, cpu_initialized);
-
- /* lazy TLB state */
- per_cpu(cpu_tlbstate, cpu).state = 0;
- per_cpu(cpu_tlbstate, cpu).active_mm = &init_mm;
-}
-#endif
diff --git a/arch/i386/kernel/cpu/cpu.h b/arch/i386/kernel/cpu/cpu.h
deleted file mode 100644
index 5a1d4f163e8..00000000000
--- a/arch/i386/kernel/cpu/cpu.h
+++ /dev/null
@@ -1,30 +0,0 @@
-
-struct cpu_model_info {
- int vendor;
- int family;
- char *model_names[16];
-};
-
-/* attempt to consolidate cpu attributes */
-struct cpu_dev {
- char * c_vendor;
-
- /* some have two possibilities for cpuid string */
- char * c_ident[2];
-
- struct cpu_model_info c_models[4];
-
- void (*c_init)(struct cpuinfo_x86 * c);
- void (*c_identify)(struct cpuinfo_x86 * c);
- unsigned int (*c_size_cache)(struct cpuinfo_x86 * c, unsigned int size);
-};
-
-extern struct cpu_dev * cpu_devs [X86_VENDOR_NUM];
-
-extern int get_model_name(struct cpuinfo_x86 *c);
-extern void display_cacheinfo(struct cpuinfo_x86 *c);
-
-extern void generic_identify(struct cpuinfo_x86 * c);
-
-extern void early_intel_workaround(struct cpuinfo_x86 *c);
-
diff --git a/arch/i386/kernel/cpu/cpufreq/Kconfig b/arch/i386/kernel/cpu/cpufreq/Kconfig
deleted file mode 100644
index 0f1eb507233..00000000000
--- a/arch/i386/kernel/cpu/cpufreq/Kconfig
+++ /dev/null
@@ -1,243 +0,0 @@
-#
-# CPU Frequency scaling
-#
-
-menu "CPU Frequency scaling"
-
-source "drivers/cpufreq/Kconfig"
-
-if CPU_FREQ
-
-comment "CPUFreq processor drivers"
-
-config X86_ACPI_CPUFREQ
- tristate "ACPI Processor P-States driver"
- select CPU_FREQ_TABLE
- depends on ACPI_PROCESSOR
- help
- This driver adds a CPUFreq driver which utilizes the ACPI
- Processor Performance States.
-
- For details, take a look at <file:Documentation/cpu-freq/>.
-
- If in doubt, say N.
-
-config ELAN_CPUFREQ
- tristate "AMD Elan SC400 and SC410"
- select CPU_FREQ_TABLE
- depends on X86_ELAN
- ---help---
- This adds the CPUFreq driver for AMD Elan SC400 and SC410
- processors.
-
- You need to specify the processor maximum speed as boot
- parameter: elanfreq=maxspeed (in kHz) or as module
- parameter "max_freq".
-
- For details, take a look at <file:Documentation/cpu-freq/>.
-
- If in doubt, say N.
-
-config SC520_CPUFREQ
- tristate "AMD Elan SC520"
- select CPU_FREQ_TABLE
- depends on X86_ELAN
- ---help---
- This adds the CPUFreq driver for AMD Elan SC520 processor.
-
- For details, take a look at <file:Documentation/cpu-freq/>.
-
- If in doubt, say N.
-
-
-config X86_POWERNOW_K6
- tristate "AMD Mobile K6-2/K6-3 PowerNow!"
- select CPU_FREQ_TABLE
- help
- This adds the CPUFreq driver for mobile AMD K6-2+ and mobile
- AMD K6-3+ processors.
-
- For details, take a look at <file:Documentation/cpu-freq/>.
-
- If in doubt, say N.
-
-config X86_POWERNOW_K7
- tristate "AMD Mobile Athlon/Duron PowerNow!"
- select CPU_FREQ_TABLE
- help
- This adds the CPUFreq driver for mobile AMD K7 mobile processors.
-
- For details, take a look at <file:Documentation/cpu-freq/>.
-
- If in doubt, say N.
-
-config X86_POWERNOW_K7_ACPI
- bool
- depends on X86_POWERNOW_K7 && ACPI_PROCESSOR
- depends on !(X86_POWERNOW_K7 = y && ACPI_PROCESSOR = m)
- default y
-
-config X86_POWERNOW_K8
- tristate "AMD Opteron/Athlon64 PowerNow!"
- select CPU_FREQ_TABLE
- depends on EXPERIMENTAL
- help
- This adds the CPUFreq driver for mobile AMD Opteron/Athlon64 processors.
-
- For details, take a look at <file:Documentation/cpu-freq/>.
-
- If in doubt, say N.
-
-config X86_POWERNOW_K8_ACPI
- bool
- depends on X86_POWERNOW_K8 && ACPI_PROCESSOR
- depends on !(X86_POWERNOW_K8 = y && ACPI_PROCESSOR = m)
- default y
-
-config X86_GX_SUSPMOD
- tristate "Cyrix MediaGX/NatSemi Geode Suspend Modulation"
- help
- This add the CPUFreq driver for NatSemi Geode processors which
- support suspend modulation.
-
- For details, take a look at <file:Documentation/cpu-freq/>.
-
- If in doubt, say N.
-
-config X86_SPEEDSTEP_CENTRINO
- tristate "Intel Enhanced SpeedStep"
- select CPU_FREQ_TABLE
- select X86_SPEEDSTEP_CENTRINO_TABLE if (!X86_SPEEDSTEP_CENTRINO_ACPI)
- help
- This adds the CPUFreq driver for Enhanced SpeedStep enabled
- mobile CPUs. This means Intel Pentium M (Centrino) CPUs. However,
- you also need to say Y to "Use ACPI tables to decode..." below
- [which might imply enabling ACPI] if you want to use this driver
- on non-Banias CPUs.
-
- For details, take a look at <file:Documentation/cpu-freq/>.
-
- If in doubt, say N.
-
-config X86_SPEEDSTEP_CENTRINO_ACPI
- bool "Use ACPI tables to decode valid frequency/voltage pairs"
- depends on X86_SPEEDSTEP_CENTRINO && ACPI_PROCESSOR
- depends on !(X86_SPEEDSTEP_CENTRINO = y && ACPI_PROCESSOR = m)
- default y
- help
- Use primarily the information provided in the BIOS ACPI tables
- to determine valid CPU frequency and voltage pairings. It is
- required for the driver to work on non-Banias CPUs.
-
- If in doubt, say Y.
-
-config X86_SPEEDSTEP_CENTRINO_TABLE
- bool "Built-in tables for Banias CPUs"
- depends on X86_SPEEDSTEP_CENTRINO
- default y
- help
- Use built-in tables for Banias CPUs if ACPI encoding
- is not available.
-
- If in doubt, say N.
-
-config X86_SPEEDSTEP_ICH
- tristate "Intel Speedstep on ICH-M chipsets (ioport interface)"
- select CPU_FREQ_TABLE
- help
- This adds the CPUFreq driver for certain mobile Intel Pentium III
- (Coppermine), all mobile Intel Pentium III-M (Tualatin) and all
- mobile Intel Pentium 4 P4-M on systems which have an Intel ICH2,
- ICH3 or ICH4 southbridge.
-
- For details, take a look at <file:Documentation/cpu-freq/>.
-
- If in doubt, say N.
-
-config X86_SPEEDSTEP_SMI
- tristate "Intel SpeedStep on 440BX/ZX/MX chipsets (SMI interface)"
- select CPU_FREQ_TABLE
- depends on EXPERIMENTAL
- help
- This adds the CPUFreq driver for certain mobile Intel Pentium III
- (Coppermine), all mobile Intel Pentium III-M (Tualatin)
- on systems which have an Intel 440BX/ZX/MX southbridge.
-
- For details, take a look at <file:Documentation/cpu-freq/>.
-
- If in doubt, say N.
-
-config X86_P4_CLOCKMOD
- tristate "Intel Pentium 4 clock modulation"
- select CPU_FREQ_TABLE
- help
- This adds the CPUFreq driver for Intel Pentium 4 / XEON
- processors.
-
- For details, take a look at <file:Documentation/cpu-freq/>.
-
- If in doubt, say N.
-
-config X86_CPUFREQ_NFORCE2
- tristate "nVidia nForce2 FSB changing"
- depends on EXPERIMENTAL
- help
- This adds the CPUFreq driver for FSB changing on nVidia nForce2
- platforms.
-
- For details, take a look at <file:Documentation/cpu-freq/>.
-
- If in doubt, say N.
-
-config X86_LONGRUN
- tristate "Transmeta LongRun"
- help
- This adds the CPUFreq driver for Transmeta Crusoe and Efficeon processors
- which support LongRun.
-
- For details, take a look at <file:Documentation/cpu-freq/>.
-
- If in doubt, say N.
-
-config X86_LONGHAUL
- tristate "VIA Cyrix III Longhaul"
- select CPU_FREQ_TABLE
- help
- This adds the CPUFreq driver for VIA Samuel/CyrixIII,
- VIA Cyrix Samuel/C3, VIA Cyrix Ezra and VIA Cyrix Ezra-T
- processors.
-
- For details, take a look at <file:Documentation/cpu-freq/>.
-
- If in doubt, say N.
-
-comment "shared options"
-
-config X86_ACPI_CPUFREQ_PROC_INTF
- bool "/proc/acpi/processor/../performance interface (deprecated)"
- depends on PROC_FS
- depends on X86_ACPI_CPUFREQ || X86_SPEEDSTEP_CENTRINO_ACPI || X86_POWERNOW_K7_ACPI || X86_POWERNOW_K8_ACPI
- help
- This enables the deprecated /proc/acpi/processor/../performance
- interface. While it is helpful for debugging, the generic,
- cross-architecture cpufreq interfaces should be used.
-
- If in doubt, say N.
-
-config X86_SPEEDSTEP_LIB
- tristate
- default X86_SPEEDSTEP_ICH || X86_SPEEDSTEP_SMI || X86_P4_CLOCKMOD
-
-config X86_SPEEDSTEP_RELAXED_CAP_CHECK
- bool "Relaxed speedstep capability checks"
- depends on (X86_SPEEDSTEP_SMI || X86_SPEEDSTEP_ICH)
- help
- Don't perform all checks for a speedstep capable system which would
- normally be done. Some ancient or strange systems, though speedstep
- capable, don't always indicate that they are speedstep capable. This
- option lets the probing code bypass some of those checks if the
- parameter "relaxed_check=1" is passed to the module.
-
-endif # CPU_FREQ
-
-endmenu
diff --git a/arch/i386/kernel/cpu/cpufreq/Makefile b/arch/i386/kernel/cpu/cpufreq/Makefile
deleted file mode 100644
index 2e894f1c891..00000000000
--- a/arch/i386/kernel/cpu/cpufreq/Makefile
+++ /dev/null
@@ -1,15 +0,0 @@
-obj-$(CONFIG_X86_POWERNOW_K6) += powernow-k6.o
-obj-$(CONFIG_X86_POWERNOW_K7) += powernow-k7.o
-obj-$(CONFIG_X86_POWERNOW_K8) += powernow-k8.o
-obj-$(CONFIG_X86_LONGHAUL) += longhaul.o
-obj-$(CONFIG_ELAN_CPUFREQ) += elanfreq.o
-obj-$(CONFIG_SC520_CPUFREQ) += sc520_freq.o
-obj-$(CONFIG_X86_LONGRUN) += longrun.o
-obj-$(CONFIG_X86_GX_SUSPMOD) += gx-suspmod.o
-obj-$(CONFIG_X86_SPEEDSTEP_ICH) += speedstep-ich.o
-obj-$(CONFIG_X86_SPEEDSTEP_CENTRINO) += speedstep-centrino.o
-obj-$(CONFIG_X86_SPEEDSTEP_LIB) += speedstep-lib.o
-obj-$(CONFIG_X86_SPEEDSTEP_SMI) += speedstep-smi.o
-obj-$(CONFIG_X86_ACPI_CPUFREQ) += acpi-cpufreq.o
-obj-$(CONFIG_X86_P4_CLOCKMOD) += p4-clockmod.o
-obj-$(CONFIG_X86_CPUFREQ_NFORCE2) += cpufreq-nforce2.o
diff --git a/arch/i386/kernel/cpu/cpufreq/acpi-cpufreq.c b/arch/i386/kernel/cpu/cpufreq/acpi-cpufreq.c
deleted file mode 100644
index caa9f771134..00000000000
--- a/arch/i386/kernel/cpu/cpufreq/acpi-cpufreq.c
+++ /dev/null
@@ -1,560 +0,0 @@
-/*
- * acpi-cpufreq.c - ACPI Processor P-States Driver ($Revision: 1.3 $)
- *
- * Copyright (C) 2001, 2002 Andy Grover <andrew.grover@intel.com>
- * Copyright (C) 2001, 2002 Paul Diefenbaugh <paul.s.diefenbaugh@intel.com>
- * Copyright (C) 2002 - 2004 Dominik Brodowski <linux@brodo.de>
- *
- * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or (at
- * your option) any later version.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License along
- * with this program; if not, write to the Free Software Foundation, Inc.,
- * 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA.
- *
- * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
- */
-
-#include <linux/config.h>
-#include <linux/kernel.h>
-#include <linux/module.h>
-#include <linux/init.h>
-#include <linux/cpufreq.h>
-#include <linux/proc_fs.h>
-#include <linux/seq_file.h>
-#include <linux/compiler.h>
-#include <linux/sched.h> /* current */
-#include <asm/io.h>
-#include <asm/delay.h>
-#include <asm/uaccess.h>
-
-#include <linux/acpi.h>
-#include <acpi/processor.h>
-
-#include "speedstep-est-common.h"
-
-#define dprintk(msg...) cpufreq_debug_printk(CPUFREQ_DEBUG_DRIVER, "acpi-cpufreq", msg)
-
-MODULE_AUTHOR("Paul Diefenbaugh, Dominik Brodowski");
-MODULE_DESCRIPTION("ACPI Processor P-States Driver");
-MODULE_LICENSE("GPL");
-
-
-struct cpufreq_acpi_io {
- struct acpi_processor_performance acpi_data;
- struct cpufreq_frequency_table *freq_table;
- unsigned int resume;
-};
-
-static struct cpufreq_acpi_io *acpi_io_data[NR_CPUS];
-
-static struct cpufreq_driver acpi_cpufreq_driver;
-
-static unsigned int acpi_pstate_strict;
-
-static int
-acpi_processor_write_port(
- u16 port,
- u8 bit_width,
- u32 value)
-{
- if (bit_width <= 8) {
- outb(value, port);
- } else if (bit_width <= 16) {
- outw(value, port);
- } else if (bit_width <= 32) {
- outl(value, port);
- } else {
- return -ENODEV;
- }
- return 0;
-}
-
-static int
-acpi_processor_read_port(
- u16 port,
- u8 bit_width,
- u32 *ret)
-{
- *ret = 0;
- if (bit_width <= 8) {
- *ret = inb(port);
- } else if (bit_width <= 16) {
- *ret = inw(port);
- } else if (bit_width <= 32) {
- *ret = inl(port);
- } else {
- return -ENODEV;
- }
- return 0;
-}
-
-static int
-acpi_processor_set_performance (
- struct cpufreq_acpi_io *data,
- unsigned int cpu,
- int state)
-{
- u16 port = 0;
- u8 bit_width = 0;
- int ret = 0;
- u32 value = 0;
- int i = 0;
- struct cpufreq_freqs cpufreq_freqs;
- cpumask_t saved_mask;
- int retval;
-
- dprintk("acpi_processor_set_performance\n");
-
- /*
- * TBD: Use something other than set_cpus_allowed.
- * As set_cpus_allowed is a bit racy,
- * with any other set_cpus_allowed for this process.
- */
- saved_mask = current->cpus_allowed;
- set_cpus_allowed(current, cpumask_of_cpu(cpu));
- if (smp_processor_id() != cpu) {
- return (-EAGAIN);
- }
-
- if (state == data->acpi_data.state) {
- if (unlikely(data->resume)) {
- dprintk("Called after resume, resetting to P%d\n", state);
- data->resume = 0;
- } else {
- dprintk("Already at target state (P%d)\n", state);
- retval = 0;
- goto migrate_end;
- }
- }
-
- dprintk("Transitioning from P%d to P%d\n",
- data->acpi_data.state, state);
-
- /* cpufreq frequency struct */
- cpufreq_freqs.cpu = cpu;
- cpufreq_freqs.old = data->freq_table[data->acpi_data.state].frequency;
- cpufreq_freqs.new = data->freq_table[state].frequency;
-
- /* notify cpufreq */
- cpufreq_notify_transition(&cpufreq_freqs, CPUFREQ_PRECHANGE);
-
- /*
- * First we write the target state's 'control' value to the
- * control_register.
- */
-
- port = data->acpi_data.control_register.address;
- bit_width = data->acpi_data.control_register.bit_width;
- value = (u32) data->acpi_data.states[state].control;
-
- dprintk("Writing 0x%08x to port 0x%04x\n", value, port);
-
- ret = acpi_processor_write_port(port, bit_width, value);
- if (ret) {
- dprintk("Invalid port width 0x%04x\n", bit_width);
- retval = ret;
- goto migrate_end;
- }
-
- /*
- * Assume the write went through when acpi_pstate_strict is not used.
- * As read status_register is an expensive operation and there
- * are no specific error cases where an IO port write will fail.
- */
- if (acpi_pstate_strict) {
- /* Then we read the 'status_register' and compare the value
- * with the target state's 'status' to make sure the
- * transition was successful.
- * Note that we'll poll for up to 1ms (100 cycles of 10us)
- * before giving up.
- */
-
- port = data->acpi_data.status_register.address;
- bit_width = data->acpi_data.status_register.bit_width;
-
- dprintk("Looking for 0x%08x from port 0x%04x\n",
- (u32) data->acpi_data.states[state].status, port);
-
- for (i=0; i<100; i++) {
- ret = acpi_processor_read_port(port, bit_width, &value);
- if (ret) {
- dprintk("Invalid port width 0x%04x\n", bit_width);
- retval = ret;
- goto migrate_end;
- }
- if (value == (u32) data->acpi_data.states[state].status)
- break;
- udelay(10);
- }
- } else {
- i = 0;
- value = (u32) data->acpi_data.states[state].status;
- }
-
- /* notify cpufreq */
- cpufreq_notify_transition(&cpufreq_freqs, CPUFREQ_POSTCHANGE);
-
- if (unlikely(value != (u32) data->acpi_data.states[state].status)) {
- unsigned int tmp = cpufreq_freqs.new;
- cpufreq_freqs.new = cpufreq_freqs.old;
- cpufreq_freqs.old = tmp;
- cpufreq_notify_transition(&cpufreq_freqs, CPUFREQ_PRECHANGE);
- cpufreq_notify_transition(&cpufreq_freqs, CPUFREQ_POSTCHANGE);
- printk(KERN_WARNING "acpi-cpufreq: Transition failed\n");
- retval = -ENODEV;
- goto migrate_end;
- }
-
- dprintk("Transition successful after %d microseconds\n", i * 10);
-
- data->acpi_data.state = state;
-
- retval = 0;
-migrate_end:
- set_cpus_allowed(current, saved_mask);
- return (retval);
-}
-
-
-static int
-acpi_cpufreq_target (
- struct cpufreq_policy *policy,
- unsigned int target_freq,
- unsigned int relation)
-{
- struct cpufreq_acpi_io *data = acpi_io_data[policy->cpu];
- unsigned int next_state = 0;
- unsigned int result = 0;
-
- dprintk("acpi_cpufreq_setpolicy\n");
-
- result = cpufreq_frequency_table_target(policy,
- data->freq_table,
- target_freq,
- relation,
- &next_state);
- if (result)
- return (result);
-
- result = acpi_processor_set_performance (data, policy->cpu, next_state);
-
- return (result);
-}
-
-
-static int
-acpi_cpufreq_verify (
- struct cpufreq_policy *policy)
-{
- unsigned int result = 0;
- struct cpufreq_acpi_io *data = acpi_io_data[policy->cpu];
-
- dprintk("acpi_cpufreq_verify\n");
-
- result = cpufreq_frequency_table_verify(policy,
- data->freq_table);
-
- return (result);
-}
-
-
-static unsigned long
-acpi_cpufreq_guess_freq (
- struct cpufreq_acpi_io *data,
- unsigned int cpu)
-{
- if (cpu_khz) {
- /* search the closest match to cpu_khz */
- unsigned int i;
- unsigned long freq;
- unsigned long freqn = data->acpi_data.states[0].core_frequency * 1000;
-
- for (i=0; i < (data->acpi_data.state_count - 1); i++) {
- freq = freqn;
- freqn = data->acpi_data.states[i+1].core_frequency * 1000;
- if ((2 * cpu_khz) > (freqn + freq)) {
- data->acpi_data.state = i;
- return (freq);
- }
- }
- data->acpi_data.state = data->acpi_data.state_count - 1;
- return (freqn);
- } else
- /* assume CPU is at P0... */
- data->acpi_data.state = 0;
- return data->acpi_data.states[0].core_frequency * 1000;
-
-}
-
-
-/*
- * acpi_processor_cpu_init_pdc_est - let BIOS know about the SMP capabilities
- * of this driver
- * @perf: processor-specific acpi_io_data struct
- * @cpu: CPU being initialized
- *
- * To avoid issues with legacy OSes, some BIOSes require to be informed of
- * the SMP capabilities of OS P-state driver. Here we set the bits in _PDC
- * accordingly, for Enhanced Speedstep. Actual call to _PDC is done in
- * driver/acpi/processor.c
- */
-static void
-acpi_processor_cpu_init_pdc_est(
- struct acpi_processor_performance *perf,
- unsigned int cpu,
- struct acpi_object_list *obj_list
- )
-{
- union acpi_object *obj;
- u32 *buf;
- struct cpuinfo_x86 *c = cpu_data + cpu;
- dprintk("acpi_processor_cpu_init_pdc_est\n");
-
- if (!cpu_has(c, X86_FEATURE_EST))
- return;
-
- /* Initialize pdc. It will be used later. */
- if (!obj_list)
- return;
-
- if (!(obj_list->count && obj_list->pointer))
- return;
-
- obj = obj_list->pointer;
- if ((obj->buffer.length == 12) && obj->buffer.pointer) {
- buf = (u32 *)obj->buffer.pointer;
- buf[0] = ACPI_PDC_REVISION_ID;
- buf[1] = 1;
- buf[2] = ACPI_PDC_EST_CAPABILITY_SMP;
- perf->pdc = obj_list;
- }
- return;
-}
-
-
-/* CPU specific PDC initialization */
-static void
-acpi_processor_cpu_init_pdc(
- struct acpi_processor_performance *perf,
- unsigned int cpu,
- struct acpi_object_list *obj_list
- )
-{
- struct cpuinfo_x86 *c = cpu_data + cpu;
- dprintk("acpi_processor_cpu_init_pdc\n");
- perf->pdc = NULL;
- if (cpu_has(c, X86_FEATURE_EST))
- acpi_processor_cpu_init_pdc_est(perf, cpu, obj_list);
- return;
-}
-
-
-static int
-acpi_cpufreq_cpu_init (
- struct cpufreq_policy *policy)
-{
- unsigned int i;
- unsigned int cpu = policy->cpu;
- struct cpufreq_acpi_io *data;
- unsigned int result = 0;
-
- union acpi_object arg0 = {ACPI_TYPE_BUFFER};
- u32 arg0_buf[3];
- struct acpi_object_list arg_list = {1, &arg0};
-
- dprintk("acpi_cpufreq_cpu_init\n");
- /* setup arg_list for _PDC settings */
- arg0.buffer.length = 12;
- arg0.buffer.pointer = (u8 *) arg0_buf;
-
- data = kmalloc(sizeof(struct cpufreq_acpi_io), GFP_KERNEL);
- if (!data)
- return (-ENOMEM);
- memset(data, 0, sizeof(struct cpufreq_acpi_io));
-
- acpi_io_data[cpu] = data;
-
- acpi_processor_cpu_init_pdc(&data->acpi_data, cpu, &arg_list);
- result = acpi_processor_register_performance(&data->acpi_data, cpu);
- data->acpi_data.pdc = NULL;
-
- if (result)
- goto err_free;
-
- if (is_const_loops_cpu(cpu)) {
- acpi_cpufreq_driver.flags |= CPUFREQ_CONST_LOOPS;
- }
-
- /* capability check */
- if (data->acpi_data.state_count <= 1) {
- dprintk("No P-States\n");
- result = -ENODEV;
- goto err_unreg;
- }
- if ((data->acpi_data.control_register.space_id != ACPI_ADR_SPACE_SYSTEM_IO) ||
- (data->acpi_data.status_register.space_id != ACPI_ADR_SPACE_SYSTEM_IO)) {
- dprintk("Unsupported address space [%d, %d]\n",
- (u32) (data->acpi_data.control_register.space_id),
- (u32) (data->acpi_data.status_register.space_id));
- result = -ENODEV;
- goto err_unreg;
- }
-
- /* alloc freq_table */
- data->freq_table = kmalloc(sizeof(struct cpufreq_frequency_table) * (data->acpi_data.state_count + 1), GFP_KERNEL);
- if (!data->freq_table) {
- result = -ENOMEM;
- goto err_unreg;
- }
-
- /* detect transition latency */
- policy->cpuinfo.transition_latency = 0;
- for (i=0; i<data->acpi_data.state_count; i++) {
- if ((data->acpi_data.states[i].transition_latency * 1000) > policy->cpuinfo.transition_latency)
- policy->cpuinfo.transition_latency = data->acpi_data.states[i].transition_latency * 1000;
- }
- policy->governor = CPUFREQ_DEFAULT_GOVERNOR;
-
- /* The current speed is unknown and not detectable by ACPI... */
- policy->cur = acpi_cpufreq_guess_freq(data, policy->cpu);
-
- /* table init */
- for (i=0; i<=data->acpi_data.state_count; i++)
- {
- data->freq_table[i].index = i;
- if (i<data->acpi_data.state_count)
- data->freq_table[i].frequency = data->acpi_data.states[i].core_frequency * 1000;
- else
- data->freq_table[i].frequency = CPUFREQ_TABLE_END;
- }
-
- result = cpufreq_frequency_table_cpuinfo(policy, data->freq_table);
- if (result) {
- goto err_freqfree;
- }
-
- /* notify BIOS that we exist */
- acpi_processor_notify_smm(THIS_MODULE);
-
- printk(KERN_INFO "acpi-cpufreq: CPU%u - ACPI performance management activated.\n",
- cpu);
- for (i = 0; i < data->acpi_data.state_count; i++)
- dprintk(" %cP%d: %d MHz, %d mW, %d uS\n",
- (i == data->acpi_data.state?'*':' '), i,
- (u32) data->acpi_data.states[i].core_frequency,
- (u32) data->acpi_data.states[i].power,
- (u32) data->acpi_data.states[i].transition_latency);
-
- cpufreq_frequency_table_get_attr(data->freq_table, policy->cpu);
-
- /*
- * the first call to ->target() should result in us actually
- * writing something to the appropriate registers.
- */
- data->resume = 1;
-
- return (result);
-
- err_freqfree:
- kfree(data->freq_table);
- err_unreg:
- acpi_processor_unregister_performance(&data->acpi_data, cpu);
- err_free:
- kfree(data);
- acpi_io_data[cpu] = NULL;
-
- return (result);
-}
-
-
-static int
-acpi_cpufreq_cpu_exit (
- struct cpufreq_policy *policy)
-{
- struct cpufreq_acpi_io *data = acpi_io_data[policy->cpu];
-
-
- dprintk("acpi_cpufreq_cpu_exit\n");
-
- if (data) {
- cpufreq_frequency_table_put_attr(policy->cpu);
- acpi_io_data[policy->cpu] = NULL;
- acpi_processor_unregister_performance(&data->acpi_data, policy->cpu);
- kfree(data);
- }
-
- return (0);
-}
-
-static int
-acpi_cpufreq_resume (
- struct cpufreq_policy *policy)
-{
- struct cpufreq_acpi_io *data = acpi_io_data[policy->cpu];
-
-
- dprintk("acpi_cpufreq_resume\n");
-
- data->resume = 1;
-
- return (0);
-}
-
-
-static struct freq_attr* acpi_cpufreq_attr[] = {
- &cpufreq_freq_attr_scaling_available_freqs,
- NULL,
-};
-
-static struct cpufreq_driver acpi_cpufreq_driver = {
- .verify = acpi_cpufreq_verify,
- .target = acpi_cpufreq_target,
- .init = acpi_cpufreq_cpu_init,
- .exit = acpi_cpufreq_cpu_exit,
- .resume = acpi_cpufreq_resume,
- .name = "acpi-cpufreq",
- .owner = THIS_MODULE,
- .attr = acpi_cpufreq_attr,
-};
-
-
-static int __init
-acpi_cpufreq_init (void)
-{
- int result = 0;
-
- dprintk("acpi_cpufreq_init\n");
-
- result = cpufreq_register_driver(&acpi_cpufreq_driver);
-
- return (result);
-}
-
-
-static void __exit
-acpi_cpufreq_exit (void)
-{
- dprintk("acpi_cpufreq_exit\n");
-
- cpufreq_unregister_driver(&acpi_cpufreq_driver);
-
- return;
-}
-
-module_param(acpi_pstate_strict, uint, 0644);
-MODULE_PARM_DESC(acpi_pstate_strict, "value 0 or non-zero. non-zero -> strict ACPI checks are performed during frequency changes.");
-
-late_initcall(acpi_cpufreq_init);
-module_exit(acpi_cpufreq_exit);
-
-MODULE_ALIAS("acpi");
diff --git a/arch/i386/kernel/cpu/cpufreq/cpufreq-nforce2.c b/arch/i386/kernel/cpu/cpufreq/cpufreq-nforce2.c
deleted file mode 100644
index 04a40534520..00000000000
--- a/arch/i386/kernel/cpu/cpufreq/cpufreq-nforce2.c
+++ /dev/null
@@ -1,457 +0,0 @@
-/*
- * (C) 2004 Sebastian Witt <se.witt@gmx.net>
- *
- * Licensed under the terms of the GNU GPL License version 2.
- * Based upon reverse engineered information
- *
- * BIG FAT DISCLAIMER: Work in progress code. Possibly *dangerous*
- */
-
-#include <linux/kernel.h>
-#include <linux/module.h>
-#include <linux/moduleparam.h>
-#include <linux/init.h>
-#include <linux/cpufreq.h>
-#include <linux/pci.h>
-#include <linux/delay.h>
-
-#define NFORCE2_XTAL 25
-#define NFORCE2_BOOTFSB 0x48
-#define NFORCE2_PLLENABLE 0xa8
-#define NFORCE2_PLLREG 0xa4
-#define NFORCE2_PLLADR 0xa0
-#define NFORCE2_PLL(mul, div) (0x100000 | (mul << 8) | div)
-
-#define NFORCE2_MIN_FSB 50
-#define NFORCE2_SAFE_DISTANCE 50
-
-/* Delay in ms between FSB changes */
-//#define NFORCE2_DELAY 10
-
-/* nforce2_chipset:
- * FSB is changed using the chipset
- */
-static struct pci_dev *nforce2_chipset_dev;
-
-/* fid:
- * multiplier * 10
- */
-static int fid = 0;
-
-/* min_fsb, max_fsb:
- * minimum and maximum FSB (= FSB at boot time)
- */
-static int min_fsb = 0;
-static int max_fsb = 0;
-
-MODULE_AUTHOR("Sebastian Witt <se.witt@gmx.net>");
-MODULE_DESCRIPTION("nForce2 FSB changing cpufreq driver");
-MODULE_LICENSE("GPL");
-
-module_param(fid, int, 0444);
-module_param(min_fsb, int, 0444);
-
-MODULE_PARM_DESC(fid, "CPU multiplier to use (11.5 = 115)");
-MODULE_PARM_DESC(min_fsb,
- "Minimum FSB to use, if not defined: current FSB - 50");
-
-#define dprintk(msg...) cpufreq_debug_printk(CPUFREQ_DEBUG_DRIVER, "cpufreq-nforce2", msg)
-
-/*
- * nforce2_calc_fsb - calculate FSB
- * @pll: PLL value
- *
- * Calculates FSB from PLL value
- */
-static int nforce2_calc_fsb(int pll)
-{
- unsigned char mul, div;
-
- mul = (pll >> 8) & 0xff;
- div = pll & 0xff;
-
- if (div > 0)
- return NFORCE2_XTAL * mul / div;
-
- return 0;
-}
-
-/*
- * nforce2_calc_pll - calculate PLL value
- * @fsb: FSB
- *
- * Calculate PLL value for given FSB
- */
-static int nforce2_calc_pll(unsigned int fsb)
-{
- unsigned char xmul, xdiv;
- unsigned char mul = 0, div = 0;
- int tried = 0;
-
- /* Try to calculate multiplier and divider up to 4 times */
- while (((mul == 0) || (div == 0)) && (tried <= 3)) {
- for (xdiv = 1; xdiv <= 0x80; xdiv++)
- for (xmul = 1; xmul <= 0xfe; xmul++)
- if (nforce2_calc_fsb(NFORCE2_PLL(xmul, xdiv)) ==
- fsb + tried) {
- mul = xmul;
- div = xdiv;
- }
- tried++;
- }
-
- if ((mul == 0) || (div == 0))
- return -1;
-
- return NFORCE2_PLL(mul, div);
-}
-
-/*
- * nforce2_write_pll - write PLL value to chipset
- * @pll: PLL value
- *
- * Writes new FSB PLL value to chipset
- */
-static void nforce2_write_pll(int pll)
-{
- int temp;
-
- /* Set the pll addr. to 0x00 */
- temp = 0x00;
- pci_write_config_dword(nforce2_chipset_dev, NFORCE2_PLLADR, temp);
-
- /* Now write the value in all 64 registers */
- for (temp = 0; temp <= 0x3f; temp++) {
- pci_write_config_dword(nforce2_chipset_dev,
- NFORCE2_PLLREG, pll);
- }
-
- return;
-}
-
-/*
- * nforce2_fsb_read - Read FSB
- *
- * Read FSB from chipset
- * If bootfsb != 0, return FSB at boot-time
- */
-static unsigned int nforce2_fsb_read(int bootfsb)
-{
- struct pci_dev *nforce2_sub5;
- u32 fsb, temp = 0;
-
-
- /* Get chipset boot FSB from subdevice 5 (FSB at boot-time) */
- nforce2_sub5 = pci_get_subsys(PCI_VENDOR_ID_NVIDIA,
- 0x01EF,
- PCI_ANY_ID,
- PCI_ANY_ID,
- NULL);
-
- if (!nforce2_sub5)
- return 0;
-
- pci_read_config_dword(nforce2_sub5, NFORCE2_BOOTFSB, &fsb);
- fsb /= 1000000;
-
- /* Check if PLL register is already set */
- pci_read_config_byte(nforce2_chipset_dev,
- NFORCE2_PLLENABLE, (u8 *)&temp);
-
- if(bootfsb || !temp)
- return fsb;
-
- /* Use PLL register FSB value */
- pci_read_config_dword(nforce2_chipset_dev,
- NFORCE2_PLLREG, &temp);
- fsb = nforce2_calc_fsb(temp);
-
- return fsb;
-}
-
-/*
- * nforce2_set_fsb - set new FSB
- * @fsb: New FSB
- *
- * Sets new FSB
- */
-static int nforce2_set_fsb(unsigned int fsb)
-{
- u32 pll, temp = 0;
- unsigned int tfsb;
- int diff;
-
- if ((fsb > max_fsb) || (fsb < NFORCE2_MIN_FSB)) {
- printk(KERN_ERR "cpufreq: FSB %d is out of range!\n", fsb);
- return -EINVAL;
- }
-
- tfsb = nforce2_fsb_read(0);
- if (!tfsb) {
- printk(KERN_ERR "cpufreq: Error while reading the FSB\n");
- return -EINVAL;
- }
-
- /* First write? Then set actual value */
- pci_read_config_byte(nforce2_chipset_dev,
- NFORCE2_PLLENABLE, (u8 *)&temp);
- if (!temp) {
- pll = nforce2_calc_pll(tfsb);
-
- if (pll < 0)
- return -EINVAL;
-
- nforce2_write_pll(pll);
- }
-
- /* Enable write access */
- temp = 0x01;
- pci_write_config_byte(nforce2_chipset_dev, NFORCE2_PLLENABLE, (u8)temp);
-
- diff = tfsb - fsb;
-
- if (!diff)
- return 0;
-
- while ((tfsb != fsb) && (tfsb <= max_fsb) && (tfsb >= min_fsb)) {
- if (diff < 0)
- tfsb++;
- else
- tfsb--;
-
- /* Calculate the PLL reg. value */
- if ((pll = nforce2_calc_pll(tfsb)) == -1)
- return -EINVAL;
-
- nforce2_write_pll(pll);
-#ifdef NFORCE2_DELAY
- mdelay(NFORCE2_DELAY);
-#endif
- }
-
- temp = 0x40;
- pci_write_config_byte(nforce2_chipset_dev, NFORCE2_PLLADR, (u8)temp);
-
- return 0;
-}
-
-/**
- * nforce2_get - get the CPU frequency
- * @cpu: CPU number
- *
- * Returns the CPU frequency
- */
-static unsigned int nforce2_get(unsigned int cpu)
-{
- if (cpu)
- return 0;
- return nforce2_fsb_read(0) * fid * 100;
-}
-
-/**
- * nforce2_target - set a new CPUFreq policy
- * @policy: new policy
- * @target_freq: the target frequency
- * @relation: how that frequency relates to achieved frequency (CPUFREQ_RELATION_L or CPUFREQ_RELATION_H)
- *
- * Sets a new CPUFreq policy.
- */
-static int nforce2_target(struct cpufreq_policy *policy,
- unsigned int target_freq, unsigned int relation)
-{
-// unsigned long flags;
- struct cpufreq_freqs freqs;
- unsigned int target_fsb;
-
- if ((target_freq > policy->max) || (target_freq < policy->min))
- return -EINVAL;
-
- target_fsb = target_freq / (fid * 100);
-
- freqs.old = nforce2_get(policy->cpu);
- freqs.new = target_fsb * fid * 100;
- freqs.cpu = 0; /* Only one CPU on nForce2 plattforms */
-
- if (freqs.old == freqs.new)
- return 0;
-
- dprintk(KERN_INFO "cpufreq: Old CPU frequency %d kHz, new %d kHz\n",
- freqs.old, freqs.new);
-
- cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE);
-
- /* Disable IRQs */
- //local_irq_save(flags);
-
- if (nforce2_set_fsb(target_fsb) < 0)
- printk(KERN_ERR "cpufreq: Changing FSB to %d failed\n",
- target_fsb);
- else
- dprintk(KERN_INFO "cpufreq: Changed FSB successfully to %d\n",
- target_fsb);
-
- /* Enable IRQs */
- //local_irq_restore(flags);
-
- cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE);
-
- return 0;
-}
-
-/**
- * nforce2_verify - verifies a new CPUFreq policy
- * @policy: new policy
- */
-static int nforce2_verify(struct cpufreq_policy *policy)
-{
- unsigned int fsb_pol_max;
-
- fsb_pol_max = policy->max / (fid * 100);
-
- if (policy->min < (fsb_pol_max * fid * 100))
- policy->max = (fsb_pol_max + 1) * fid * 100;
-
- cpufreq_verify_within_limits(policy,
- policy->cpuinfo.min_freq,
- policy->cpuinfo.max_freq);
- return 0;
-}
-
-static int nforce2_cpu_init(struct cpufreq_policy *policy)
-{
- unsigned int fsb;
- unsigned int rfid;
-
- /* capability check */
- if (policy->cpu != 0)
- return -ENODEV;
-
- /* Get current FSB */
- fsb = nforce2_fsb_read(0);
-
- if (!fsb)
- return -EIO;
-
- /* FIX: Get FID from CPU */
- if (!fid) {
- if (!cpu_khz) {
- printk(KERN_WARNING
- "cpufreq: cpu_khz not set, can't calculate multiplier!\n");
- return -ENODEV;
- }
-
- fid = cpu_khz / (fsb * 100);
- rfid = fid % 5;
-
- if (rfid) {
- if (rfid > 2)
- fid += 5 - rfid;
- else
- fid -= rfid;
- }
- }
-
- printk(KERN_INFO "cpufreq: FSB currently at %i MHz, FID %d.%d\n", fsb,
- fid / 10, fid % 10);
-
- /* Set maximum FSB to FSB at boot time */
- max_fsb = nforce2_fsb_read(1);
-
- if(!max_fsb)
- return -EIO;
-
- if (!min_fsb)
- min_fsb = max_fsb - NFORCE2_SAFE_DISTANCE;
-
- if (min_fsb < NFORCE2_MIN_FSB)
- min_fsb = NFORCE2_MIN_FSB;
-
- /* cpuinfo and default policy values */
- policy->cpuinfo.min_freq = min_fsb * fid * 100;
- policy->cpuinfo.max_freq = max_fsb * fid * 100;
- policy->cpuinfo.transition_latency = CPUFREQ_ETERNAL;
- policy->cur = nforce2_get(policy->cpu);
- policy->min = policy->cpuinfo.min_freq;
- policy->max = policy->cpuinfo.max_freq;
- policy->governor = CPUFREQ_DEFAULT_GOVERNOR;
-
- return 0;
-}
-
-static int nforce2_cpu_exit(struct cpufreq_policy *policy)
-{
- return 0;
-}
-
-static struct cpufreq_driver nforce2_driver = {
- .name = "nforce2",
- .verify = nforce2_verify,
- .target = nforce2_target,
- .get = nforce2_get,
- .init = nforce2_cpu_init,
- .exit = nforce2_cpu_exit,
- .owner = THIS_MODULE,
-};
-
-/**
- * nforce2_detect_chipset - detect the Southbridge which contains FSB PLL logic
- *
- * Detects nForce2 A2 and C1 stepping
- *
- */
-static unsigned int nforce2_detect_chipset(void)
-{
- u8 revision;
-
- nforce2_chipset_dev = pci_get_subsys(PCI_VENDOR_ID_NVIDIA,
- PCI_DEVICE_ID_NVIDIA_NFORCE2,
- PCI_ANY_ID,
- PCI_ANY_ID,
- NULL);
-
- if (nforce2_chipset_dev == NULL)
- return -ENODEV;
-
- pci_read_config_byte(nforce2_chipset_dev, PCI_REVISION_ID, &revision);
-
- printk(KERN_INFO "cpufreq: Detected nForce2 chipset revision %X\n",
- revision);
- printk(KERN_INFO
- "cpufreq: FSB changing is maybe unstable and can lead to crashes and data loss.\n");
-
- return 0;
-}
-
-/**
- * nforce2_init - initializes the nForce2 CPUFreq driver
- *
- * Initializes the nForce2 FSB support. Returns -ENODEV on unsupported
- * devices, -EINVAL on problems during initiatization, and zero on
- * success.
- */
-static int __init nforce2_init(void)
-{
- /* TODO: do we need to detect the processor? */
-
- /* detect chipset */
- if (nforce2_detect_chipset()) {
- printk(KERN_ERR "cpufreq: No nForce2 chipset.\n");
- return -ENODEV;
- }
-
- return cpufreq_register_driver(&nforce2_driver);
-}
-
-/**
- * nforce2_exit - unregisters cpufreq module
- *
- * Unregisters nForce2 FSB change support.
- */
-static void __exit nforce2_exit(void)
-{
- cpufreq_unregister_driver(&nforce2_driver);
-}
-
-module_init(nforce2_init);
-module_exit(nforce2_exit);
-
diff --git a/arch/i386/kernel/cpu/cpufreq/elanfreq.c b/arch/i386/kernel/cpu/cpufreq/elanfreq.c
deleted file mode 100644
index 3f7caa4ae6d..00000000000
--- a/arch/i386/kernel/cpu/cpufreq/elanfreq.c
+++ /dev/null
@@ -1,312 +0,0 @@
-/*
- * elanfreq: cpufreq driver for the AMD ELAN family
- *
- * (c) Copyright 2002 Robert Schwebel <r.schwebel@pengutronix.de>
- *
- * Parts of this code are (c) Sven Geggus <sven@geggus.net>
- *
- * All Rights Reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
- *
- * 2002-02-13: - initial revision for 2.4.18-pre9 by Robert Schwebel
- *
- */
-
-#include <linux/kernel.h>
-#include <linux/module.h>
-#include <linux/init.h>
-
-#include <linux/slab.h>
-#include <linux/delay.h>
-#include <linux/cpufreq.h>
-
-#include <asm/msr.h>
-#include <asm/timex.h>
-#include <asm/io.h>
-
-#define REG_CSCIR 0x22 /* Chip Setup and Control Index Register */
-#define REG_CSCDR 0x23 /* Chip Setup and Control Data Register */
-
-/* Module parameter */
-static int max_freq;
-
-struct s_elan_multiplier {
- int clock; /* frequency in kHz */
- int val40h; /* PMU Force Mode register */
- int val80h; /* CPU Clock Speed Register */
-};
-
-/*
- * It is important that the frequencies
- * are listed in ascending order here!
- */
-struct s_elan_multiplier elan_multiplier[] = {
- {1000, 0x02, 0x18},
- {2000, 0x02, 0x10},
- {4000, 0x02, 0x08},
- {8000, 0x00, 0x00},
- {16000, 0x00, 0x02},
- {33000, 0x00, 0x04},
- {66000, 0x01, 0x04},
- {99000, 0x01, 0x05}
-};
-
-static struct cpufreq_frequency_table elanfreq_table[] = {
- {0, 1000},
- {1, 2000},
- {2, 4000},
- {3, 8000},
- {4, 16000},
- {5, 33000},
- {6, 66000},
- {7, 99000},
- {0, CPUFREQ_TABLE_END},
-};
-
-
-/**
- * elanfreq_get_cpu_frequency: determine current cpu speed
- *
- * Finds out at which frequency the CPU of the Elan SOC runs
- * at the moment. Frequencies from 1 to 33 MHz are generated
- * the normal way, 66 and 99 MHz are called "Hyperspeed Mode"
- * and have the rest of the chip running with 33 MHz.
- */
-
-static unsigned int elanfreq_get_cpu_frequency(unsigned int cpu)
-{
- u8 clockspeed_reg; /* Clock Speed Register */
-
- local_irq_disable();
- outb_p(0x80,REG_CSCIR);
- clockspeed_reg = inb_p(REG_CSCDR);
- local_irq_enable();
-
- if ((clockspeed_reg & 0xE0) == 0xE0) { return 0; }
-
- /* Are we in CPU clock multiplied mode (66/99 MHz)? */
- if ((clockspeed_reg & 0xE0) == 0xC0) {
- if ((clockspeed_reg & 0x01) == 0) {
- return 66000;
- } else {
- return 99000;
- }
- }
-
- /* 33 MHz is not 32 MHz... */
- if ((clockspeed_reg & 0xE0)==0xA0)
- return 33000;
-
- return ((1<<((clockspeed_reg & 0xE0) >> 5)) * 1000);
-}
-
-
-/**
- * elanfreq_set_cpu_frequency: Change the CPU core frequency
- * @cpu: cpu number
- * @freq: frequency in kHz
- *
- * This function takes a frequency value and changes the CPU frequency
- * according to this. Note that the frequency has to be checked by
- * elanfreq_validatespeed() for correctness!
- *
- * There is no return value.
- */
-
-static void elanfreq_set_cpu_state (unsigned int state) {
-
- struct cpufreq_freqs freqs;
-
- freqs.old = elanfreq_get_cpu_frequency(0);
- freqs.new = elan_multiplier[state].clock;
- freqs.cpu = 0; /* elanfreq.c is UP only driver */
-
- cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE);
-
- printk(KERN_INFO "elanfreq: attempting to set frequency to %i kHz\n",elan_multiplier[state].clock);
-
-
- /*
- * Access to the Elan's internal registers is indexed via
- * 0x22: Chip Setup & Control Register Index Register (CSCI)
- * 0x23: Chip Setup & Control Register Data Register (CSCD)
- *
- */
-
- /*
- * 0x40 is the Power Management Unit's Force Mode Register.
- * Bit 6 enables Hyperspeed Mode (66/100 MHz core frequency)
- */
-
- local_irq_disable();
- outb_p(0x40,REG_CSCIR); /* Disable hyperspeed mode */
- outb_p(0x00,REG_CSCDR);
- local_irq_enable(); /* wait till internal pipelines and */
- udelay(1000); /* buffers have cleaned up */
-
- local_irq_disable();
-
- /* now, set the CPU clock speed register (0x80) */
- outb_p(0x80,REG_CSCIR);
- outb_p(elan_multiplier[state].val80h,REG_CSCDR);
-
- /* now, the hyperspeed bit in PMU Force Mode Register (0x40) */
- outb_p(0x40,REG_CSCIR);
- outb_p(elan_multiplier[state].val40h,REG_CSCDR);
- udelay(10000);
- local_irq_enable();
-
- cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE);
-};
-
-
-/**
- * elanfreq_validatespeed: test if frequency range is valid
- * @policy: the policy to validate
- *
- * This function checks if a given frequency range in kHz is valid
- * for the hardware supported by the driver.
- */
-
-static int elanfreq_verify (struct cpufreq_policy *policy)
-{
- return cpufreq_frequency_table_verify(policy, &elanfreq_table[0]);
-}
-
-static int elanfreq_target (struct cpufreq_policy *policy,
- unsigned int target_freq,
- unsigned int relation)
-{
- unsigned int newstate = 0;
-
- if (cpufreq_frequency_table_target(policy, &elanfreq_table[0], target_freq, relation, &newstate))
- return -EINVAL;
-
- elanfreq_set_cpu_state(newstate);
-
- return 0;
-}
-
-
-/*
- * Module init and exit code
- */
-
-static int elanfreq_cpu_init(struct cpufreq_policy *policy)
-{
- struct cpuinfo_x86 *c = cpu_data;
- unsigned int i;
- int result;
-
- /* capability check */
- if ((c->x86_vendor != X86_VENDOR_AMD) ||
- (c->x86 != 4) || (c->x86_model!=10))
- return -ENODEV;
-
- /* max freq */
- if (!max_freq)
- max_freq = elanfreq_get_cpu_frequency(0);
-
- /* table init */
- for (i=0; (elanfreq_table[i].frequency != CPUFREQ_TABLE_END); i++) {
- if (elanfreq_table[i].frequency > max_freq)
- elanfreq_table[i].frequency = CPUFREQ_ENTRY_INVALID;
- }
-
- /* cpuinfo and default policy values */
- policy->governor = CPUFREQ_DEFAULT_GOVERNOR;
- policy->cpuinfo.transition_latency = CPUFREQ_ETERNAL;
- policy->cur = elanfreq_get_cpu_frequency(0);
-
- result = cpufreq_frequency_table_cpuinfo(policy, elanfreq_table);
- if (result)
- return (result);
-
- cpufreq_frequency_table_get_attr(elanfreq_table, policy->cpu);
-
- return 0;
-}
-
-
-static int elanfreq_cpu_exit(struct cpufreq_policy *policy)
-{
- cpufreq_frequency_table_put_attr(policy->cpu);
- return 0;
-}
-
-
-#ifndef MODULE
-/**
- * elanfreq_setup - elanfreq command line parameter parsing
- *
- * elanfreq command line parameter. Use:
- * elanfreq=66000
- * to set the maximum CPU frequency to 66 MHz. Note that in
- * case you do not give this boot parameter, the maximum
- * frequency will fall back to _current_ CPU frequency which
- * might be lower. If you build this as a module, use the
- * max_freq module parameter instead.
- */
-static int __init elanfreq_setup(char *str)
-{
- max_freq = simple_strtoul(str, &str, 0);
- printk(KERN_WARNING "You're using the deprecated elanfreq command line option. Use elanfreq.max_freq instead, please!\n");
- return 1;
-}
-__setup("elanfreq=", elanfreq_setup);
-#endif
-
-
-static struct freq_attr* elanfreq_attr[] = {
- &cpufreq_freq_attr_scaling_available_freqs,
- NULL,
-};
-
-
-static struct cpufreq_driver elanfreq_driver = {
- .get = elanfreq_get_cpu_frequency,
- .verify = elanfreq_verify,
- .target = elanfreq_target,
- .init = elanfreq_cpu_init,
- .exit = elanfreq_cpu_exit,
- .name = "elanfreq",
- .owner = THIS_MODULE,
- .attr = elanfreq_attr,
-};
-
-
-static int __init elanfreq_init(void)
-{
- struct cpuinfo_x86 *c = cpu_data;
-
- /* Test if we have the right hardware */
- if ((c->x86_vendor != X86_VENDOR_AMD) ||
- (c->x86 != 4) || (c->x86_model!=10))
- {
- printk(KERN_INFO "elanfreq: error: no Elan processor found!\n");
- return -ENODEV;
- }
-
- return cpufreq_register_driver(&elanfreq_driver);
-}
-
-
-static void __exit elanfreq_exit(void)
-{
- cpufreq_unregister_driver(&elanfreq_driver);
-}
-
-
-module_param (max_freq, int, 0444);
-
-MODULE_LICENSE("GPL");
-MODULE_AUTHOR("Robert Schwebel <r.schwebel@pengutronix.de>, Sven Geggus <sven@geggus.net>");
-MODULE_DESCRIPTION("cpufreq driver for AMD's Elan CPUs");
-
-module_init(elanfreq_init);
-module_exit(elanfreq_exit);
-
diff --git a/arch/i386/kernel/cpu/cpufreq/gx-suspmod.c b/arch/i386/kernel/cpu/cpufreq/gx-suspmod.c
deleted file mode 100644
index e86ea486c31..00000000000
--- a/arch/i386/kernel/cpu/cpufreq/gx-suspmod.c
+++ /dev/null
@@ -1,502 +0,0 @@
-/*
- * Cyrix MediaGX and NatSemi Geode Suspend Modulation
- * (C) 2002 Zwane Mwaikambo <zwane@commfireservices.com>
- * (C) 2002 Hiroshi Miura <miura@da-cha.org>
- * All Rights Reserved
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * version 2 as published by the Free Software Foundation
- *
- * The author(s) of this software shall not be held liable for damages
- * of any nature resulting due to the use of this software. This
- * software is provided AS-IS with no warranties.
- *
- * Theoritical note:
- *
- * (see Geode(tm) CS5530 manual (rev.4.1) page.56)
- *
- * CPU frequency control on NatSemi Geode GX1/GXLV processor and CS55x0
- * are based on Suspend Moduration.
- *
- * Suspend Modulation works by asserting and de-asserting the SUSP# pin
- * to CPU(GX1/GXLV) for configurable durations. When asserting SUSP#
- * the CPU enters an idle state. GX1 stops its core clock when SUSP# is
- * asserted then power consumption is reduced.
- *
- * Suspend Modulation's OFF/ON duration are configurable
- * with 'Suspend Modulation OFF Count Register'
- * and 'Suspend Modulation ON Count Register'.
- * These registers are 8bit counters that represent the number of
- * 32us intervals which the SUSP# pin is asserted(ON)/de-asserted(OFF)
- * to the processor.
- *
- * These counters define a ratio which is the effective frequency
- * of operation of the system.
- *
- * OFF Count
- * F_eff = Fgx * ----------------------
- * OFF Count + ON Count
- *
- * 0 <= On Count, Off Count <= 255
- *
- * From these limits, we can get register values
- *
- * off_duration + on_duration <= MAX_DURATION
- * on_duration = off_duration * (stock_freq - freq) / freq
- *
- * off_duration = (freq * DURATION) / stock_freq
- * on_duration = DURATION - off_duration
- *
- *
- *---------------------------------------------------------------------------
- *
- * ChangeLog:
- * Dec. 12, 2003 Hiroshi Miura <miura@da-cha.org>
- * - fix on/off register mistake
- * - fix cpu_khz calc when it stops cpu modulation.
- *
- * Dec. 11, 2002 Hiroshi Miura <miura@da-cha.org>
- * - rewrite for Cyrix MediaGX Cx5510/5520 and
- * NatSemi Geode Cs5530(A).
- *
- * Jul. ??, 2002 Zwane Mwaikambo <zwane@commfireservices.com>
- * - cs5530_mod patch for 2.4.19-rc1.
- *
- *---------------------------------------------------------------------------
- *
- * Todo
- * Test on machines with 5510, 5530, 5530A
- */
-
-/************************************************************************
- * Suspend Modulation - Definitions *
- ************************************************************************/
-
-#include <linux/kernel.h>
-#include <linux/module.h>
-#include <linux/init.h>
-#include <linux/smp.h>
-#include <linux/cpufreq.h>
-#include <linux/pci.h>
-#include <asm/processor.h>
-#include <asm/errno.h>
-
-/* PCI config registers, all at F0 */
-#define PCI_PMER1 0x80 /* power management enable register 1 */
-#define PCI_PMER2 0x81 /* power management enable register 2 */
-#define PCI_PMER3 0x82 /* power management enable register 3 */
-#define PCI_IRQTC 0x8c /* irq speedup timer counter register:typical 2 to 4ms */
-#define PCI_VIDTC 0x8d /* video speedup timer counter register: typical 50 to 100ms */
-#define PCI_MODOFF 0x94 /* suspend modulation OFF counter register, 1 = 32us */
-#define PCI_MODON 0x95 /* suspend modulation ON counter register */
-#define PCI_SUSCFG 0x96 /* suspend configuration register */
-
-/* PMER1 bits */
-#define GPM (1<<0) /* global power management */
-#define GIT (1<<1) /* globally enable PM device idle timers */
-#define GTR (1<<2) /* globally enable IO traps */
-#define IRQ_SPDUP (1<<3) /* disable clock throttle during interrupt handling */
-#define VID_SPDUP (1<<4) /* disable clock throttle during vga video handling */
-
-/* SUSCFG bits */
-#define SUSMOD (1<<0) /* enable/disable suspend modulation */
-/* the belows support only with cs5530 (after rev.1.2)/cs5530A */
-#define SMISPDUP (1<<1) /* select how SMI re-enable suspend modulation: */
- /* IRQTC timer or read SMI speedup disable reg.(F1BAR[08-09h]) */
-#define SUSCFG (1<<2) /* enable powering down a GXLV processor. "Special 3Volt Suspend" mode */
-/* the belows support only with cs5530A */
-#define PWRSVE_ISA (1<<3) /* stop ISA clock */
-#define PWRSVE (1<<4) /* active idle */
-
-struct gxfreq_params {
- u8 on_duration;
- u8 off_duration;
- u8 pci_suscfg;
- u8 pci_pmer1;
- u8 pci_pmer2;
- u8 pci_rev;
- struct pci_dev *cs55x0;
-};
-
-static struct gxfreq_params *gx_params;
-static int stock_freq;
-
-/* PCI bus clock - defaults to 30.000 if cpu_khz is not available */
-static int pci_busclk = 0;
-module_param (pci_busclk, int, 0444);
-
-/* maximum duration for which the cpu may be suspended
- * (32us * MAX_DURATION). If no parameter is given, this defaults
- * to 255.
- * Note that this leads to a maximum of 8 ms(!) where the CPU clock
- * is suspended -- processing power is just 0.39% of what it used to be,
- * though. 781.25 kHz(!) for a 200 MHz processor -- wow. */
-static int max_duration = 255;
-module_param (max_duration, int, 0444);
-
-/* For the default policy, we want at least some processing power
- * - let's say 5%. (min = maxfreq / POLICY_MIN_DIV)
- */
-#define POLICY_MIN_DIV 20
-
-
-#define dprintk(msg...) cpufreq_debug_printk(CPUFREQ_DEBUG_DRIVER, "gx-suspmod", msg)
-
-/**
- * we can detect a core multipiler from dir0_lsb
- * from GX1 datasheet p.56,
- * MULT[3:0]:
- * 0000 = SYSCLK multiplied by 4 (test only)
- * 0001 = SYSCLK multiplied by 10
- * 0010 = SYSCLK multiplied by 4
- * 0011 = SYSCLK multiplied by 6
- * 0100 = SYSCLK multiplied by 9
- * 0101 = SYSCLK multiplied by 5
- * 0110 = SYSCLK multiplied by 7
- * 0111 = SYSCLK multiplied by 8
- * of 33.3MHz
- **/
-static int gx_freq_mult[16] = {
- 4, 10, 4, 6, 9, 5, 7, 8,
- 0, 0, 0, 0, 0, 0, 0, 0
-};
-
-
-/****************************************************************
- * Low Level chipset interface *
- ****************************************************************/
-static struct pci_device_id gx_chipset_tbl[] __initdata = {
- { PCI_VENDOR_ID_CYRIX, PCI_DEVICE_ID_CYRIX_5530_LEGACY, PCI_ANY_ID, PCI_ANY_ID },
- { PCI_VENDOR_ID_CYRIX, PCI_DEVICE_ID_CYRIX_5520, PCI_ANY_ID, PCI_ANY_ID },
- { PCI_VENDOR_ID_CYRIX, PCI_DEVICE_ID_CYRIX_5510, PCI_ANY_ID, PCI_ANY_ID },
- { 0, },
-};
-
-/**
- * gx_detect_chipset:
- *
- **/
-static __init struct pci_dev *gx_detect_chipset(void)
-{
- struct pci_dev *gx_pci = NULL;
-
- /* check if CPU is a MediaGX or a Geode. */
- if ((current_cpu_data.x86_vendor != X86_VENDOR_NSC) &&
- (current_cpu_data.x86_vendor != X86_VENDOR_CYRIX)) {
- dprintk("error: no MediaGX/Geode processor found!\n");
- return NULL;
- }
-
- /* detect which companion chip is used */
- while ((gx_pci = pci_get_device(PCI_ANY_ID, PCI_ANY_ID, gx_pci)) != NULL) {
- if ((pci_match_id(gx_chipset_tbl, gx_pci)) != NULL) {
- return gx_pci;
- }
- }
-
- dprintk("error: no supported chipset found!\n");
- return NULL;
-}
-
-/**
- * gx_get_cpuspeed:
- *
- * Finds out at which efficient frequency the Cyrix MediaGX/NatSemi Geode CPU runs.
- */
-static unsigned int gx_get_cpuspeed(unsigned int cpu)
-{
- if ((gx_params->pci_suscfg & SUSMOD) == 0)
- return stock_freq;
-
- return (stock_freq * gx_params->off_duration)
- / (gx_params->on_duration + gx_params->off_duration);
-}
-
-/**
- * gx_validate_speed:
- * determine current cpu speed
- *
-**/
-
-static unsigned int gx_validate_speed(unsigned int khz, u8 *on_duration, u8 *off_duration)
-{
- unsigned int i;
- u8 tmp_on, tmp_off;
- int old_tmp_freq = stock_freq;
- int tmp_freq;
-
- *off_duration=1;
- *on_duration=0;
-
- for (i=max_duration; i>0; i--) {
- tmp_off = ((khz * i) / stock_freq) & 0xff;
- tmp_on = i - tmp_off;
- tmp_freq = (stock_freq * tmp_off) / i;
- /* if this relation is closer to khz, use this. If it's equal,
- * prefer it, too - lower latency */
- if (abs(tmp_freq - khz) <= abs(old_tmp_freq - khz)) {
- *on_duration = tmp_on;
- *off_duration = tmp_off;
- old_tmp_freq = tmp_freq;
- }
- }
-
- return old_tmp_freq;
-}
-
-
-/**
- * gx_set_cpuspeed:
- * set cpu speed in khz.
- **/
-
-static void gx_set_cpuspeed(unsigned int khz)
-{
- u8 suscfg, pmer1;
- unsigned int new_khz;
- unsigned long flags;
- struct cpufreq_freqs freqs;
-
-
- freqs.cpu = 0;
- freqs.old = gx_get_cpuspeed(0);
-
- new_khz = gx_validate_speed(khz, &gx_params->on_duration, &gx_params->off_duration);
-
- freqs.new = new_khz;
-
- cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE);
- local_irq_save(flags);
-
- if (new_khz != stock_freq) { /* if new khz == 100% of CPU speed, it is special case */
- switch (gx_params->cs55x0->device) {
- case PCI_DEVICE_ID_CYRIX_5530_LEGACY:
- pmer1 = gx_params->pci_pmer1 | IRQ_SPDUP | VID_SPDUP;
- /* FIXME: need to test other values -- Zwane,Miura */
- pci_write_config_byte(gx_params->cs55x0, PCI_IRQTC, 4); /* typical 2 to 4ms */
- pci_write_config_byte(gx_params->cs55x0, PCI_VIDTC, 100);/* typical 50 to 100ms */
- pci_write_config_byte(gx_params->cs55x0, PCI_PMER1, pmer1);
-
- if (gx_params->pci_rev < 0x10) { /* CS5530(rev 1.2, 1.3) */
- suscfg = gx_params->pci_suscfg | SUSMOD;
- } else { /* CS5530A,B.. */
- suscfg = gx_params->pci_suscfg | SUSMOD | PWRSVE;
- }
- break;
- case PCI_DEVICE_ID_CYRIX_5520:
- case PCI_DEVICE_ID_CYRIX_5510:
- suscfg = gx_params->pci_suscfg | SUSMOD;
- break;
- default:
- local_irq_restore(flags);
- dprintk("fatal: try to set unknown chipset.\n");
- return;
- }
- } else {
- suscfg = gx_params->pci_suscfg & ~(SUSMOD);
- gx_params->off_duration = 0;
- gx_params->on_duration = 0;
- dprintk("suspend modulation disabled: cpu runs 100 percent speed.\n");
- }
-
- pci_write_config_byte(gx_params->cs55x0, PCI_MODOFF, gx_params->off_duration);
- pci_write_config_byte(gx_params->cs55x0, PCI_MODON, gx_params->on_duration);
-
- pci_write_config_byte(gx_params->cs55x0, PCI_SUSCFG, suscfg);
- pci_read_config_byte(gx_params->cs55x0, PCI_SUSCFG, &suscfg);
-
- local_irq_restore(flags);
-
- gx_params->pci_suscfg = suscfg;
-
- cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE);
-
- dprintk("suspend modulation w/ duration of ON:%d us, OFF:%d us\n",
- gx_params->on_duration * 32, gx_params->off_duration * 32);
- dprintk("suspend modulation w/ clock speed: %d kHz.\n", freqs.new);
-}
-
-/****************************************************************
- * High level functions *
- ****************************************************************/
-
-/*
- * cpufreq_gx_verify: test if frequency range is valid
- *
- * This function checks if a given frequency range in kHz is valid
- * for the hardware supported by the driver.
- */
-
-static int cpufreq_gx_verify(struct cpufreq_policy *policy)
-{
- unsigned int tmp_freq = 0;
- u8 tmp1, tmp2;
-
- if (!stock_freq || !policy)
- return -EINVAL;
-
- policy->cpu = 0;
- cpufreq_verify_within_limits(policy, (stock_freq / max_duration), stock_freq);
-
- /* it needs to be assured that at least one supported frequency is
- * within policy->min and policy->max. If it is not, policy->max
- * needs to be increased until one freuqency is supported.
- * policy->min may not be decreased, though. This way we guarantee a
- * specific processing capacity.
- */
- tmp_freq = gx_validate_speed(policy->min, &tmp1, &tmp2);
- if (tmp_freq < policy->min)
- tmp_freq += stock_freq / max_duration;
- policy->min = tmp_freq;
- if (policy->min > policy->max)
- policy->max = tmp_freq;
- tmp_freq = gx_validate_speed(policy->max, &tmp1, &tmp2);
- if (tmp_freq > policy->max)
- tmp_freq -= stock_freq / max_duration;
- policy->max = tmp_freq;
- if (policy->max < policy->min)
- policy->max = policy->min;
- cpufreq_verify_within_limits(policy, (stock_freq / max_duration), stock_freq);
-
- return 0;
-}
-
-/*
- * cpufreq_gx_target:
- *
- */
-static int cpufreq_gx_target(struct cpufreq_policy *policy,
- unsigned int target_freq,
- unsigned int relation)
-{
- u8 tmp1, tmp2;
- unsigned int tmp_freq;
-
- if (!stock_freq || !policy)
- return -EINVAL;
-
- policy->cpu = 0;
-
- tmp_freq = gx_validate_speed(target_freq, &tmp1, &tmp2);
- while (tmp_freq < policy->min) {
- tmp_freq += stock_freq / max_duration;
- tmp_freq = gx_validate_speed(tmp_freq, &tmp1, &tmp2);
- }
- while (tmp_freq > policy->max) {
- tmp_freq -= stock_freq / max_duration;
- tmp_freq = gx_validate_speed(tmp_freq, &tmp1, &tmp2);
- }
-
- gx_set_cpuspeed(tmp_freq);
-
- return 0;
-}
-
-static int cpufreq_gx_cpu_init(struct cpufreq_policy *policy)
-{
- unsigned int maxfreq, curfreq;
-
- if (!policy || policy->cpu != 0)
- return -ENODEV;
-
- /* determine maximum frequency */
- if (pci_busclk) {
- maxfreq = pci_busclk * gx_freq_mult[getCx86(CX86_DIR1) & 0x0f];
- } else if (cpu_khz) {
- maxfreq = cpu_khz;
- } else {
- maxfreq = 30000 * gx_freq_mult[getCx86(CX86_DIR1) & 0x0f];
- }
- stock_freq = maxfreq;
- curfreq = gx_get_cpuspeed(0);
-
- dprintk("cpu max frequency is %d.\n", maxfreq);
- dprintk("cpu current frequency is %dkHz.\n",curfreq);
-
- /* setup basic struct for cpufreq API */
- policy->cpu = 0;
-
- if (max_duration < POLICY_MIN_DIV)
- policy->min = maxfreq / max_duration;
- else
- policy->min = maxfreq / POLICY_MIN_DIV;
- policy->max = maxfreq;
- policy->cur = curfreq;
- policy->governor = CPUFREQ_DEFAULT_GOVERNOR;
- policy->cpuinfo.min_freq = maxfreq / max_duration;
- policy->cpuinfo.max_freq = maxfreq;
- policy->cpuinfo.transition_latency = CPUFREQ_ETERNAL;
-
- return 0;
-}
-
-/*
- * cpufreq_gx_init:
- * MediaGX/Geode GX initialize cpufreq driver
- */
-static struct cpufreq_driver gx_suspmod_driver = {
- .get = gx_get_cpuspeed,
- .verify = cpufreq_gx_verify,
- .target = cpufreq_gx_target,
- .init = cpufreq_gx_cpu_init,
- .name = "gx-suspmod",
- .owner = THIS_MODULE,
-};
-
-static int __init cpufreq_gx_init(void)
-{
- int ret;
- struct gxfreq_params *params;
- struct pci_dev *gx_pci;
- u32 class_rev;
-
- /* Test if we have the right hardware */
- if ((gx_pci = gx_detect_chipset()) == NULL)
- return -ENODEV;
-
- /* check whether module parameters are sane */
- if (max_duration > 0xff)
- max_duration = 0xff;
-
- dprintk("geode suspend modulation available.\n");
-
- params = kmalloc(sizeof(struct gxfreq_params), GFP_KERNEL);
- if (params == NULL)
- return -ENOMEM;
- memset(params, 0, sizeof(struct gxfreq_params));
-
- params->cs55x0 = gx_pci;
- gx_params = params;
-
- /* keep cs55x0 configurations */
- pci_read_config_byte(params->cs55x0, PCI_SUSCFG, &(params->pci_suscfg));
- pci_read_config_byte(params->cs55x0, PCI_PMER1, &(params->pci_pmer1));
- pci_read_config_byte(params->cs55x0, PCI_PMER2, &(params->pci_pmer2));
- pci_read_config_byte(params->cs55x0, PCI_MODON, &(params->on_duration));
- pci_read_config_byte(params->cs55x0, PCI_MODOFF, &(params->off_duration));
- pci_read_config_dword(params->cs55x0, PCI_CLASS_REVISION, &class_rev);
- params->pci_rev = class_rev && 0xff;
-
- if ((ret = cpufreq_register_driver(&gx_suspmod_driver))) {
- kfree(params);
- return ret; /* register error! */
- }
-
- return 0;
-}
-
-static void __exit cpufreq_gx_exit(void)
-{
- cpufreq_unregister_driver(&gx_suspmod_driver);
- pci_dev_put(gx_params->cs55x0);
- kfree(gx_params);
-}
-
-MODULE_AUTHOR ("Hiroshi Miura <miura@da-cha.org>");
-MODULE_DESCRIPTION ("Cpufreq driver for Cyrix MediaGX and NatSemi Geode");
-MODULE_LICENSE ("GPL");
-
-module_init(cpufreq_gx_init);
-module_exit(cpufreq_gx_exit);
-
diff --git a/arch/i386/kernel/cpu/cpufreq/longhaul.c b/arch/i386/kernel/cpu/cpufreq/longhaul.c
deleted file mode 100644
index 8ef38544453..00000000000
--- a/arch/i386/kernel/cpu/cpufreq/longhaul.c
+++ /dev/null
@@ -1,700 +0,0 @@
-/*
- * (C) 2001-2004 Dave Jones. <davej@codemonkey.org.uk>
- * (C) 2002 Padraig Brady. <padraig@antefacto.com>
- *
- * Licensed under the terms of the GNU GPL License version 2.
- * Based upon datasheets & sample CPUs kindly provided by VIA.
- *
- * VIA have currently 3 different versions of Longhaul.
- * Version 1 (Longhaul) uses the BCR2 MSR at 0x1147.
- * It is present only in Samuel 1 (C5A), Samuel 2 (C5B) stepping 0.
- * Version 2 of longhaul is the same as v1, but adds voltage scaling.
- * Present in Samuel 2 (steppings 1-7 only) (C5B), and Ezra (C5C)
- * voltage scaling support has currently been disabled in this driver
- * until we have code that gets it right.
- * Version 3 of longhaul got renamed to Powersaver and redesigned
- * to use the POWERSAVER MSR at 0x110a.
- * It is present in Ezra-T (C5M), Nehemiah (C5X) and above.
- * It's pretty much the same feature wise to longhaul v2, though
- * there is provision for scaling FSB too, but this doesn't work
- * too well in practice so we don't even try to use this.
- *
- * BIG FAT DISCLAIMER: Work in progress code. Possibly *dangerous*
- */
-
-#include <linux/kernel.h>
-#include <linux/module.h>
-#include <linux/moduleparam.h>
-#include <linux/init.h>
-#include <linux/cpufreq.h>
-#include <linux/slab.h>
-#include <linux/string.h>
-#include <linux/pci.h>
-
-#include <asm/msr.h>
-#include <asm/timex.h>
-#include <asm/io.h>
-
-#include "longhaul.h"
-
-#define PFX "longhaul: "
-
-#define TYPE_LONGHAUL_V1 1
-#define TYPE_LONGHAUL_V2 2
-#define TYPE_POWERSAVER 3
-
-#define CPU_SAMUEL 1
-#define CPU_SAMUEL2 2
-#define CPU_EZRA 3
-#define CPU_EZRA_T 4
-#define CPU_NEHEMIAH 5
-
-static int cpu_model;
-static unsigned int numscales=16, numvscales;
-static unsigned int fsb;
-static int minvid, maxvid;
-static unsigned int minmult, maxmult;
-static int can_scale_voltage;
-static int vrmrev;
-
-/* Module parameters */
-static int dont_scale_voltage;
-
-
-#define dprintk(msg...) cpufreq_debug_printk(CPUFREQ_DEBUG_DRIVER, "longhaul", msg)
-
-
-/* Clock ratios multiplied by 10 */
-static int clock_ratio[32];
-static int eblcr_table[32];
-static int voltage_table[32];
-static unsigned int highest_speed, lowest_speed; /* kHz */
-static int longhaul_version;
-static struct cpufreq_frequency_table *longhaul_table;
-
-#ifdef CONFIG_CPU_FREQ_DEBUG
-static char speedbuffer[8];
-
-static char *print_speed(int speed)
-{
- if (speed > 1000) {
- if (speed%1000 == 0)
- sprintf (speedbuffer, "%dGHz", speed/1000);
- else
- sprintf (speedbuffer, "%d.%dGHz", speed/1000, (speed%1000)/100);
- } else
- sprintf (speedbuffer, "%dMHz", speed);
-
- return speedbuffer;
-}
-#endif
-
-
-static unsigned int calc_speed(int mult)
-{
- int khz;
- khz = (mult/10)*fsb;
- if (mult%10)
- khz += fsb/2;
- khz *= 1000;
- return khz;
-}
-
-
-static int longhaul_get_cpu_mult(void)
-{
- unsigned long invalue=0,lo, hi;
-
- rdmsr (MSR_IA32_EBL_CR_POWERON, lo, hi);
- invalue = (lo & (1<<22|1<<23|1<<24|1<<25)) >>22;
- if (longhaul_version==TYPE_LONGHAUL_V2 || longhaul_version==TYPE_POWERSAVER) {
- if (lo & (1<<27))
- invalue+=16;
- }
- return eblcr_table[invalue];
-}
-
-
-static void do_powersaver(union msr_longhaul *longhaul,
- unsigned int clock_ratio_index)
-{
- struct pci_dev *dev;
- unsigned long flags;
- unsigned int tmp_mask;
- int version;
- int i;
- u16 pci_cmd;
- u16 cmd_state[64];
-
- switch (cpu_model) {
- case CPU_EZRA_T:
- version = 3;
- break;
- case CPU_NEHEMIAH:
- version = 0xf;
- break;
- default:
- return;
- }
-
- rdmsrl(MSR_VIA_LONGHAUL, longhaul->val);
- longhaul->bits.SoftBusRatio = clock_ratio_index & 0xf;
- longhaul->bits.SoftBusRatio4 = (clock_ratio_index & 0x10) >> 4;
- longhaul->bits.EnableSoftBusRatio = 1;
- longhaul->bits.RevisionKey = 0;
-
- preempt_disable();
- local_irq_save(flags);
-
- /*
- * get current pci bus master state for all devices
- * and clear bus master bit
- */
- dev = NULL;
- i = 0;
- do {
- dev = pci_get_device(PCI_ANY_ID, PCI_ANY_ID, dev);
- if (dev != NULL) {
- pci_read_config_word(dev, PCI_COMMAND, &pci_cmd);
- cmd_state[i++] = pci_cmd;
- pci_cmd &= ~PCI_COMMAND_MASTER;
- pci_write_config_word(dev, PCI_COMMAND, pci_cmd);
- }
- } while (dev != NULL);
-
- tmp_mask=inb(0x21); /* works on C3. save mask. */
- outb(0xFE,0x21); /* TMR0 only */
- outb(0xFF,0x80); /* delay */
-
- safe_halt();
- wrmsrl(MSR_VIA_LONGHAUL, longhaul->val);
- halt();
-
- local_irq_disable();
-
- outb(tmp_mask,0x21); /* restore mask */
-
- /* restore pci bus master state for all devices */
- dev = NULL;
- i = 0;
- do {
- dev = pci_get_device(PCI_ANY_ID, PCI_ANY_ID, dev);
- if (dev != NULL) {
- pci_cmd = cmd_state[i++];
- pci_write_config_byte(dev, PCI_COMMAND, pci_cmd);
- }
- } while (dev != NULL);
- local_irq_restore(flags);
- preempt_enable();
-
- /* disable bus ratio bit */
- rdmsrl(MSR_VIA_LONGHAUL, longhaul->val);
- longhaul->bits.EnableSoftBusRatio = 0;
- longhaul->bits.RevisionKey = version;
- wrmsrl(MSR_VIA_LONGHAUL, longhaul->val);
-}
-
-/**
- * longhaul_set_cpu_frequency()
- * @clock_ratio_index : bitpattern of the new multiplier.
- *
- * Sets a new clock ratio.
- */
-
-static void longhaul_setstate(unsigned int clock_ratio_index)
-{
- int speed, mult;
- struct cpufreq_freqs freqs;
- union msr_longhaul longhaul;
- union msr_bcr2 bcr2;
- static unsigned int old_ratio=-1;
-
- if (old_ratio == clock_ratio_index)
- return;
- old_ratio = clock_ratio_index;
-
- mult = clock_ratio[clock_ratio_index];
- if (mult == -1)
- return;
-
- speed = calc_speed(mult);
- if ((speed > highest_speed) || (speed < lowest_speed))
- return;
-
- freqs.old = calc_speed(longhaul_get_cpu_mult());
- freqs.new = speed;
- freqs.cpu = 0; /* longhaul.c is UP only driver */
-
- cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE);
-
- dprintk ("Setting to FSB:%dMHz Mult:%d.%dx (%s)\n",
- fsb, mult/10, mult%10, print_speed(speed/1000));
-
- switch (longhaul_version) {
-
- /*
- * Longhaul v1. (Samuel[C5A] and Samuel2 stepping 0[C5B])
- * Software controlled multipliers only.
- *
- * *NB* Until we get voltage scaling working v1 & v2 are the same code.
- * Longhaul v2 appears in Samuel2 Steppings 1->7 [C5b] and Ezra [C5C]
- */
- case TYPE_LONGHAUL_V1:
- case TYPE_LONGHAUL_V2:
- rdmsrl (MSR_VIA_BCR2, bcr2.val);
- /* Enable software clock multiplier */
- bcr2.bits.ESOFTBF = 1;
- bcr2.bits.CLOCKMUL = clock_ratio_index;
- local_irq_disable();
- wrmsrl (MSR_VIA_BCR2, bcr2.val);
- safe_halt();
-
- /* Disable software clock multiplier */
- rdmsrl (MSR_VIA_BCR2, bcr2.val);
- bcr2.bits.ESOFTBF = 0;
- local_irq_disable();
- wrmsrl (MSR_VIA_BCR2, bcr2.val);
- local_irq_enable();
- break;
-
- /*
- * Longhaul v3 (aka Powersaver). (Ezra-T [C5M] & Nehemiah [C5N])
- * We can scale voltage with this too, but that's currently
- * disabled until we come up with a decent 'match freq to voltage'
- * algorithm.
- * When we add voltage scaling, we will also need to do the
- * voltage/freq setting in order depending on the direction
- * of scaling (like we do in powernow-k7.c)
- * Nehemiah can do FSB scaling too, but this has never been proven
- * to work in practice.
- */
- case TYPE_POWERSAVER:
- do_powersaver(&longhaul, clock_ratio_index);
- break;
- }
-
- cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE);
-}
-
-/*
- * Centaur decided to make life a little more tricky.
- * Only longhaul v1 is allowed to read EBLCR BSEL[0:1].
- * Samuel2 and above have to try and guess what the FSB is.
- * We do this by assuming we booted at maximum multiplier, and interpolate
- * between that value multiplied by possible FSBs and cpu_mhz which
- * was calculated at boot time. Really ugly, but no other way to do this.
- */
-
-#define ROUNDING 0xf
-
-static int _guess(int guess)
-{
- int target;
-
- target = ((maxmult/10)*guess);
- if (maxmult%10 != 0)
- target += (guess/2);
- target += ROUNDING/2;
- target &= ~ROUNDING;
- return target;
-}
-
-
-static int guess_fsb(void)
-{
- int speed = (cpu_khz/1000);
- int i;
- int speeds[3] = { 66, 100, 133 };
-
- speed += ROUNDING/2;
- speed &= ~ROUNDING;
-
- for (i=0; i<3; i++) {
- if (_guess(speeds[i]) == speed)
- return speeds[i];
- }
- return 0;
-}
-
-
-static int __init longhaul_get_ranges(void)
-{
- unsigned long invalue;
- unsigned int multipliers[32]= {
- 50,30,40,100,55,35,45,95,90,70,80,60,120,75,85,65,
- -1,110,120,-1,135,115,125,105,130,150,160,140,-1,155,-1,145 };
- unsigned int j, k = 0;
- union msr_longhaul longhaul;
- unsigned long lo, hi;
- unsigned int eblcr_fsb_table_v1[] = { 66, 133, 100, -1 };
- unsigned int eblcr_fsb_table_v2[] = { 133, 100, -1, 66 };
-
- switch (longhaul_version) {
- case TYPE_LONGHAUL_V1:
- case TYPE_LONGHAUL_V2:
- /* Ugh, Longhaul v1 didn't have the min/max MSRs.
- Assume min=3.0x & max = whatever we booted at. */
- minmult = 30;
- maxmult = longhaul_get_cpu_mult();
- rdmsr (MSR_IA32_EBL_CR_POWERON, lo, hi);
- invalue = (lo & (1<<18|1<<19)) >>18;
- if (cpu_model==CPU_SAMUEL || cpu_model==CPU_SAMUEL2)
- fsb = eblcr_fsb_table_v1[invalue];
- else
- fsb = guess_fsb();
- break;
-
- case TYPE_POWERSAVER:
- /* Ezra-T */
- if (cpu_model==CPU_EZRA_T) {
- rdmsrl (MSR_VIA_LONGHAUL, longhaul.val);
- invalue = longhaul.bits.MaxMHzBR;
- if (longhaul.bits.MaxMHzBR4)
- invalue += 16;
- maxmult=multipliers[invalue];
-
- invalue = longhaul.bits.MinMHzBR;
- if (longhaul.bits.MinMHzBR4 == 1)
- minmult = 30;
- else
- minmult = multipliers[invalue];
- fsb = eblcr_fsb_table_v2[longhaul.bits.MaxMHzFSB];
- break;
- }
-
- /* Nehemiah */
- if (cpu_model==CPU_NEHEMIAH) {
- rdmsrl (MSR_VIA_LONGHAUL, longhaul.val);
-
- /*
- * TODO: This code works, but raises a lot of questions.
- * - Some Nehemiah's seem to have broken Min/MaxMHzBR's.
- * We get around this by using a hardcoded multiplier of 4.0x
- * for the minimimum speed, and the speed we booted up at for the max.
- * This is done in longhaul_get_cpu_mult() by reading the EBLCR register.
- * - According to some VIA documentation EBLCR is only
- * in pre-Nehemiah C3s. How this still works is a mystery.
- * We're possibly using something undocumented and unsupported,
- * But it works, so we don't grumble.
- */
- minmult=40;
- maxmult=longhaul_get_cpu_mult();
-
- /* Starting with the 1.2GHz parts, theres a 200MHz bus. */
- if ((cpu_khz/1000) > 1200)
- fsb = 200;
- else
- fsb = eblcr_fsb_table_v2[longhaul.bits.MaxMHzFSB];
- break;
- }
- }
-
- dprintk ("MinMult:%d.%dx MaxMult:%d.%dx\n",
- minmult/10, minmult%10, maxmult/10, maxmult%10);
-
- if (fsb == -1) {
- printk (KERN_INFO PFX "Invalid (reserved) FSB!\n");
- return -EINVAL;
- }
-
- highest_speed = calc_speed(maxmult);
- lowest_speed = calc_speed(minmult);
- dprintk ("FSB:%dMHz Lowest speed: %s Highest speed:%s\n", fsb,
- print_speed(lowest_speed/1000),
- print_speed(highest_speed/1000));
-
- if (lowest_speed == highest_speed) {
- printk (KERN_INFO PFX "highestspeed == lowest, aborting.\n");
- return -EINVAL;
- }
- if (lowest_speed > highest_speed) {
- printk (KERN_INFO PFX "nonsense! lowest (%d > %d) !\n",
- lowest_speed, highest_speed);
- return -EINVAL;
- }
-
- longhaul_table = kmalloc((numscales + 1) * sizeof(struct cpufreq_frequency_table), GFP_KERNEL);
- if(!longhaul_table)
- return -ENOMEM;
-
- for (j=0; j < numscales; j++) {
- unsigned int ratio;
- ratio = clock_ratio[j];
- if (ratio == -1)
- continue;
- if (ratio > maxmult || ratio < minmult)
- continue;
- longhaul_table[k].frequency = calc_speed(ratio);
- longhaul_table[k].index = j;
- k++;
- }
-
- longhaul_table[k].frequency = CPUFREQ_TABLE_END;
- if (!k) {
- kfree (longhaul_table);
- return -EINVAL;
- }
-
- return 0;
-}
-
-
-static void __init longhaul_setup_voltagescaling(void)
-{
- union msr_longhaul longhaul;
-
- rdmsrl (MSR_VIA_LONGHAUL, longhaul.val);
-
- if (!(longhaul.bits.RevisionID & 1))
- return;
-
- minvid = longhaul.bits.MinimumVID;
- maxvid = longhaul.bits.MaximumVID;
- vrmrev = longhaul.bits.VRMRev;
-
- if (minvid == 0 || maxvid == 0) {
- printk (KERN_INFO PFX "Bogus values Min:%d.%03d Max:%d.%03d. "
- "Voltage scaling disabled.\n",
- minvid/1000, minvid%1000, maxvid/1000, maxvid%1000);
- return;
- }
-
- if (minvid == maxvid) {
- printk (KERN_INFO PFX "Claims to support voltage scaling but min & max are "
- "both %d.%03d. Voltage scaling disabled\n",
- maxvid/1000, maxvid%1000);
- return;
- }
-
- if (vrmrev==0) {
- dprintk ("VRM 8.5\n");
- memcpy (voltage_table, vrm85scales, sizeof(voltage_table));
- numvscales = (voltage_table[maxvid]-voltage_table[minvid])/25;
- } else {
- dprintk ("Mobile VRM\n");
- memcpy (voltage_table, mobilevrmscales, sizeof(voltage_table));
- numvscales = (voltage_table[maxvid]-voltage_table[minvid])/5;
- }
-
- /* Current voltage isn't readable at first, so we need to
- set it to a known value. The spec says to use maxvid */
- longhaul.bits.RevisionKey = longhaul.bits.RevisionID; /* FIXME: This is bad. */
- longhaul.bits.EnableSoftVID = 1;
- longhaul.bits.SoftVID = maxvid;
- wrmsrl (MSR_VIA_LONGHAUL, longhaul.val);
-
- minvid = voltage_table[minvid];
- maxvid = voltage_table[maxvid];
-
- dprintk ("Min VID=%d.%03d Max VID=%d.%03d, %d possible voltage scales\n",
- maxvid/1000, maxvid%1000, minvid/1000, minvid%1000, numvscales);
-
- can_scale_voltage = 1;
-}
-
-
-static int longhaul_verify(struct cpufreq_policy *policy)
-{
- return cpufreq_frequency_table_verify(policy, longhaul_table);
-}
-
-
-static int longhaul_target(struct cpufreq_policy *policy,
- unsigned int target_freq, unsigned int relation)
-{
- unsigned int table_index = 0;
- unsigned int new_clock_ratio = 0;
-
- if (cpufreq_frequency_table_target(policy, longhaul_table, target_freq, relation, &table_index))
- return -EINVAL;
-
- new_clock_ratio = longhaul_table[table_index].index & 0xFF;
-
- longhaul_setstate(new_clock_ratio);
-
- return 0;
-}
-
-
-static unsigned int longhaul_get(unsigned int cpu)
-{
- if (cpu)
- return 0;
- return calc_speed(longhaul_get_cpu_mult());
-}
-
-
-static int __init longhaul_cpu_init(struct cpufreq_policy *policy)
-{
- struct cpuinfo_x86 *c = cpu_data;
- char *cpuname=NULL;
- int ret;
-
- switch (c->x86_model) {
- case 6:
- cpu_model = CPU_SAMUEL;
- cpuname = "C3 'Samuel' [C5A]";
- longhaul_version = TYPE_LONGHAUL_V1;
- memcpy (clock_ratio, samuel1_clock_ratio, sizeof(samuel1_clock_ratio));
- memcpy (eblcr_table, samuel1_eblcr, sizeof(samuel1_eblcr));
- break;
-
- case 7:
- longhaul_version = TYPE_LONGHAUL_V1;
- switch (c->x86_mask) {
- case 0:
- cpu_model = CPU_SAMUEL2;
- cpuname = "C3 'Samuel 2' [C5B]";
- /* Note, this is not a typo, early Samuel2's had Samuel1 ratios. */
- memcpy (clock_ratio, samuel1_clock_ratio, sizeof(samuel1_clock_ratio));
- memcpy (eblcr_table, samuel2_eblcr, sizeof(samuel2_eblcr));
- break;
- case 1 ... 15:
- if (c->x86_mask < 8) {
- cpu_model = CPU_SAMUEL2;
- cpuname = "C3 'Samuel 2' [C5B]";
- } else {
- cpu_model = CPU_EZRA;
- cpuname = "C3 'Ezra' [C5C]";
- }
- memcpy (clock_ratio, ezra_clock_ratio, sizeof(ezra_clock_ratio));
- memcpy (eblcr_table, ezra_eblcr, sizeof(ezra_eblcr));
- break;
- }
- break;
-
- case 8:
- cpu_model = CPU_EZRA_T;
- cpuname = "C3 'Ezra-T' [C5M]";
- longhaul_version = TYPE_POWERSAVER;
- numscales=32;
- memcpy (clock_ratio, ezrat_clock_ratio, sizeof(ezrat_clock_ratio));
- memcpy (eblcr_table, ezrat_eblcr, sizeof(ezrat_eblcr));
- break;
-
- case 9:
- cpu_model = CPU_NEHEMIAH;
- longhaul_version = TYPE_POWERSAVER;
- numscales=32;
- switch (c->x86_mask) {
- case 0 ... 1:
- cpuname = "C3 'Nehemiah A' [C5N]";
- memcpy (clock_ratio, nehemiah_a_clock_ratio, sizeof(nehemiah_a_clock_ratio));
- memcpy (eblcr_table, nehemiah_a_eblcr, sizeof(nehemiah_a_eblcr));
- break;
- case 2 ... 4:
- cpuname = "C3 'Nehemiah B' [C5N]";
- memcpy (clock_ratio, nehemiah_b_clock_ratio, sizeof(nehemiah_b_clock_ratio));
- memcpy (eblcr_table, nehemiah_b_eblcr, sizeof(nehemiah_b_eblcr));
- break;
- case 5 ... 15:
- cpuname = "C3 'Nehemiah C' [C5N]";
- memcpy (clock_ratio, nehemiah_c_clock_ratio, sizeof(nehemiah_c_clock_ratio));
- memcpy (eblcr_table, nehemiah_c_eblcr, sizeof(nehemiah_c_eblcr));
- break;
- }
- break;
-
- default:
- cpuname = "Unknown";
- break;
- }
-
- printk (KERN_INFO PFX "VIA %s CPU detected. ", cpuname);
- switch (longhaul_version) {
- case TYPE_LONGHAUL_V1:
- case TYPE_LONGHAUL_V2:
- printk ("Longhaul v%d supported.\n", longhaul_version);
- break;
- case TYPE_POWERSAVER:
- printk ("Powersaver supported.\n");
- break;
- };
-
- ret = longhaul_get_ranges();
- if (ret != 0)
- return ret;
-
- if ((longhaul_version==TYPE_LONGHAUL_V2 || longhaul_version==TYPE_POWERSAVER) &&
- (dont_scale_voltage==0))
- longhaul_setup_voltagescaling();
-
- policy->governor = CPUFREQ_DEFAULT_GOVERNOR;
- policy->cpuinfo.transition_latency = 200000; /* nsec */
- policy->cur = calc_speed(longhaul_get_cpu_mult());
-
- ret = cpufreq_frequency_table_cpuinfo(policy, longhaul_table);
- if (ret)
- return ret;
-
- cpufreq_frequency_table_get_attr(longhaul_table, policy->cpu);
-
- return 0;
-}
-
-static int __devexit longhaul_cpu_exit(struct cpufreq_policy *policy)
-{
- cpufreq_frequency_table_put_attr(policy->cpu);
- return 0;
-}
-
-static struct freq_attr* longhaul_attr[] = {
- &cpufreq_freq_attr_scaling_available_freqs,
- NULL,
-};
-
-static struct cpufreq_driver longhaul_driver = {
- .verify = longhaul_verify,
- .target = longhaul_target,
- .get = longhaul_get,
- .init = longhaul_cpu_init,
- .exit = __devexit_p(longhaul_cpu_exit),
- .name = "longhaul",
- .owner = THIS_MODULE,
- .attr = longhaul_attr,
-};
-
-
-static int __init longhaul_init(void)
-{
- struct cpuinfo_x86 *c = cpu_data;
-
- if (c->x86_vendor != X86_VENDOR_CENTAUR || c->x86 != 6)
- return -ENODEV;
-
- switch (c->x86_model) {
- case 6 ... 9:
- return cpufreq_register_driver(&longhaul_driver);
- default:
- printk (KERN_INFO PFX "Unknown VIA CPU. Contact davej@codemonkey.org.uk\n");
- }
-
- return -ENODEV;
-}
-
-
-static void __exit longhaul_exit(void)
-{
- int i=0;
-
- for (i=0; i < numscales; i++) {
- if (clock_ratio[i] == maxmult) {
- longhaul_setstate(i);
- break;
- }
- }
-
- cpufreq_unregister_driver(&longhaul_driver);
- kfree(longhaul_table);
-}
-
-module_param (dont_scale_voltage, int, 0644);
-MODULE_PARM_DESC(dont_scale_voltage, "Don't scale voltage of processor");
-
-MODULE_AUTHOR ("Dave Jones <davej@codemonkey.org.uk>");
-MODULE_DESCRIPTION ("Longhaul driver for VIA Cyrix processors.");
-MODULE_LICENSE ("GPL");
-
-module_init(longhaul_init);
-module_exit(longhaul_exit);
-
diff --git a/arch/i386/kernel/cpu/cpufreq/longhaul.h b/arch/i386/kernel/cpu/cpufreq/longhaul.h
deleted file mode 100644
index 2a495c162ec..00000000000
--- a/arch/i386/kernel/cpu/cpufreq/longhaul.h
+++ /dev/null
@@ -1,466 +0,0 @@
-/*
- * longhaul.h
- * (C) 2003 Dave Jones.
- *
- * Licensed under the terms of the GNU GPL License version 2.
- *
- * VIA-specific information
- */
-
-union msr_bcr2 {
- struct {
- unsigned Reseved:19, // 18:0
- ESOFTBF:1, // 19
- Reserved2:3, // 22:20
- CLOCKMUL:4, // 26:23
- Reserved3:5; // 31:27
- } bits;
- unsigned long val;
-};
-
-union msr_longhaul {
- struct {
- unsigned RevisionID:4, // 3:0
- RevisionKey:4, // 7:4
- EnableSoftBusRatio:1, // 8
- EnableSoftVID:1, // 9
- EnableSoftBSEL:1, // 10
- Reserved:3, // 11:13
- SoftBusRatio4:1, // 14
- VRMRev:1, // 15
- SoftBusRatio:4, // 19:16
- SoftVID:5, // 24:20
- Reserved2:3, // 27:25
- SoftBSEL:2, // 29:28
- Reserved3:2, // 31:30
- MaxMHzBR:4, // 35:32
- MaximumVID:5, // 40:36
- MaxMHzFSB:2, // 42:41
- MaxMHzBR4:1, // 43
- Reserved4:4, // 47:44
- MinMHzBR:4, // 51:48
- MinimumVID:5, // 56:52
- MinMHzFSB:2, // 58:57
- MinMHzBR4:1, // 59
- Reserved5:4; // 63:60
- } bits;
- unsigned long long val;
-};
-
-/*
- * Clock ratio tables. Div/Mod by 10 to get ratio.
- * The eblcr ones specify the ratio read from the CPU.
- * The clock_ratio ones specify what to write to the CPU.
- */
-
-/*
- * VIA C3 Samuel 1 & Samuel 2 (stepping 0)
- */
-static int __initdata samuel1_clock_ratio[16] = {
- -1, /* 0000 -> RESERVED */
- 30, /* 0001 -> 3.0x */
- 40, /* 0010 -> 4.0x */
- -1, /* 0011 -> RESERVED */
- -1, /* 0100 -> RESERVED */
- 35, /* 0101 -> 3.5x */
- 45, /* 0110 -> 4.5x */
- 55, /* 0111 -> 5.5x */
- 60, /* 1000 -> 6.0x */
- 70, /* 1001 -> 7.0x */
- 80, /* 1010 -> 8.0x */
- 50, /* 1011 -> 5.0x */
- 65, /* 1100 -> 6.5x */
- 75, /* 1101 -> 7.5x */
- -1, /* 1110 -> RESERVED */
- -1, /* 1111 -> RESERVED */
-};
-
-static int __initdata samuel1_eblcr[16] = {
- 50, /* 0000 -> RESERVED */
- 30, /* 0001 -> 3.0x */
- 40, /* 0010 -> 4.0x */
- -1, /* 0011 -> RESERVED */
- 55, /* 0100 -> 5.5x */
- 35, /* 0101 -> 3.5x */
- 45, /* 0110 -> 4.5x */
- -1, /* 0111 -> RESERVED */
- -1, /* 1000 -> RESERVED */
- 70, /* 1001 -> 7.0x */
- 80, /* 1010 -> 8.0x */
- 60, /* 1011 -> 6.0x */
- -1, /* 1100 -> RESERVED */
- 75, /* 1101 -> 7.5x */
- -1, /* 1110 -> RESERVED */
- 65, /* 1111 -> 6.5x */
-};
-
-/*
- * VIA C3 Samuel2 Stepping 1->15
- */
-static int __initdata samuel2_eblcr[16] = {
- 50, /* 0000 -> 5.0x */
- 30, /* 0001 -> 3.0x */
- 40, /* 0010 -> 4.0x */
- 100, /* 0011 -> 10.0x */
- 55, /* 0100 -> 5.5x */
- 35, /* 0101 -> 3.5x */
- 45, /* 0110 -> 4.5x */
- 110, /* 0111 -> 11.0x */
- 90, /* 1000 -> 9.0x */
- 70, /* 1001 -> 7.0x */
- 80, /* 1010 -> 8.0x */
- 60, /* 1011 -> 6.0x */
- 120, /* 1100 -> 12.0x */
- 75, /* 1101 -> 7.5x */
- 130, /* 1110 -> 13.0x */
- 65, /* 1111 -> 6.5x */
-};
-
-/*
- * VIA C3 Ezra
- */
-static int __initdata ezra_clock_ratio[16] = {
- 100, /* 0000 -> 10.0x */
- 30, /* 0001 -> 3.0x */
- 40, /* 0010 -> 4.0x */
- 90, /* 0011 -> 9.0x */
- 95, /* 0100 -> 9.5x */
- 35, /* 0101 -> 3.5x */
- 45, /* 0110 -> 4.5x */
- 55, /* 0111 -> 5.5x */
- 60, /* 1000 -> 6.0x */
- 70, /* 1001 -> 7.0x */
- 80, /* 1010 -> 8.0x */
- 50, /* 1011 -> 5.0x */
- 65, /* 1100 -> 6.5x */
- 75, /* 1101 -> 7.5x */
- 85, /* 1110 -> 8.5x */
- 120, /* 1111 -> 12.0x */
-};
-
-static int __initdata ezra_eblcr[16] = {
- 50, /* 0000 -> 5.0x */
- 30, /* 0001 -> 3.0x */
- 40, /* 0010 -> 4.0x */
- 100, /* 0011 -> 10.0x */
- 55, /* 0100 -> 5.5x */
- 35, /* 0101 -> 3.5x */
- 45, /* 0110 -> 4.5x */
- 95, /* 0111 -> 9.5x */
- 90, /* 1000 -> 9.0x */
- 70, /* 1001 -> 7.0x */
- 80, /* 1010 -> 8.0x */
- 60, /* 1011 -> 6.0x */
- 120, /* 1100 -> 12.0x */
- 75, /* 1101 -> 7.5x */
- 85, /* 1110 -> 8.5x */
- 65, /* 1111 -> 6.5x */
-};
-
-/*
- * VIA C3 (Ezra-T) [C5M].
- */
-static int __initdata ezrat_clock_ratio[32] = {
- 100, /* 0000 -> 10.0x */
- 30, /* 0001 -> 3.0x */
- 40, /* 0010 -> 4.0x */
- 90, /* 0011 -> 9.0x */
- 95, /* 0100 -> 9.5x */
- 35, /* 0101 -> 3.5x */
- 45, /* 0110 -> 4.5x */
- 55, /* 0111 -> 5.5x */
- 60, /* 1000 -> 6.0x */
- 70, /* 1001 -> 7.0x */
- 80, /* 1010 -> 8.0x */
- 50, /* 1011 -> 5.0x */
- 65, /* 1100 -> 6.5x */
- 75, /* 1101 -> 7.5x */
- 85, /* 1110 -> 8.5x */
- 120, /* 1111 -> 12.0x */
-
- -1, /* 0000 -> RESERVED (10.0x) */
- 110, /* 0001 -> 11.0x */
- 120, /* 0010 -> 12.0x */
- -1, /* 0011 -> RESERVED (9.0x)*/
- 105, /* 0100 -> 10.5x */
- 115, /* 0101 -> 11.5x */
- 125, /* 0110 -> 12.5x */
- 135, /* 0111 -> 13.5x */
- 140, /* 1000 -> 14.0x */
- 150, /* 1001 -> 15.0x */
- 160, /* 1010 -> 16.0x */
- 130, /* 1011 -> 13.0x */
- 145, /* 1100 -> 14.5x */
- 155, /* 1101 -> 15.5x */
- -1, /* 1110 -> RESERVED (13.0x) */
- -1, /* 1111 -> RESERVED (12.0x) */
-};
-
-static int __initdata ezrat_eblcr[32] = {
- 50, /* 0000 -> 5.0x */
- 30, /* 0001 -> 3.0x */
- 40, /* 0010 -> 4.0x */
- 100, /* 0011 -> 10.0x */
- 55, /* 0100 -> 5.5x */
- 35, /* 0101 -> 3.5x */
- 45, /* 0110 -> 4.5x */
- 95, /* 0111 -> 9.5x */
- 90, /* 1000 -> 9.0x */
- 70, /* 1001 -> 7.0x */
- 80, /* 1010 -> 8.0x */
- 60, /* 1011 -> 6.0x */
- 120, /* 1100 -> 12.0x */
- 75, /* 1101 -> 7.5x */
- 85, /* 1110 -> 8.5x */
- 65, /* 1111 -> 6.5x */
-
- -1, /* 0000 -> RESERVED (9.0x) */
- 110, /* 0001 -> 11.0x */
- 120, /* 0010 -> 12.0x */
- -1, /* 0011 -> RESERVED (10.0x)*/
- 135, /* 0100 -> 13.5x */
- 115, /* 0101 -> 11.5x */
- 125, /* 0110 -> 12.5x */
- 105, /* 0111 -> 10.5x */
- 130, /* 1000 -> 13.0x */
- 150, /* 1001 -> 15.0x */
- 160, /* 1010 -> 16.0x */
- 140, /* 1011 -> 14.0x */
- -1, /* 1100 -> RESERVED (12.0x) */
- 155, /* 1101 -> 15.5x */
- -1, /* 1110 -> RESERVED (13.0x) */
- 145, /* 1111 -> 14.5x */
-};
-
-/*
- * VIA C3 Nehemiah */
-
-static int __initdata nehemiah_a_clock_ratio[32] = {
- 100, /* 0000 -> 10.0x */
- 160, /* 0001 -> 16.0x */
- -1, /* 0010 -> RESERVED */
- 90, /* 0011 -> 9.0x */
- 95, /* 0100 -> 9.5x */
- -1, /* 0101 -> RESERVED */
- -1, /* 0110 -> RESERVED */
- 55, /* 0111 -> 5.5x */
- 60, /* 1000 -> 6.0x */
- 70, /* 1001 -> 7.0x */
- 80, /* 1010 -> 8.0x */
- 50, /* 1011 -> 5.0x */
- 65, /* 1100 -> 6.5x */
- 75, /* 1101 -> 7.5x */
- 85, /* 1110 -> 8.5x */
- 120, /* 1111 -> 12.0x */
- 100, /* 0000 -> 10.0x */
- -1, /* 0001 -> RESERVED */
- 120, /* 0010 -> 12.0x */
- 90, /* 0011 -> 9.0x */
- 105, /* 0100 -> 10.5x */
- 115, /* 0101 -> 11.5x */
- 125, /* 0110 -> 12.5x */
- 135, /* 0111 -> 13.5x */
- 140, /* 1000 -> 14.0x */
- 150, /* 1001 -> 15.0x */
- 160, /* 1010 -> 16.0x */
- 130, /* 1011 -> 13.0x */
- 145, /* 1100 -> 14.5x */
- 155, /* 1101 -> 15.5x */
- -1, /* 1110 -> RESERVED (13.0x) */
- 120, /* 1111 -> 12.0x */
-};
-
-static int __initdata nehemiah_b_clock_ratio[32] = {
- 100, /* 0000 -> 10.0x */
- 160, /* 0001 -> 16.0x */
- -1, /* 0010 -> RESERVED */
- 90, /* 0011 -> 9.0x */
- 95, /* 0100 -> 9.5x */
- -1, /* 0101 -> RESERVED */
- -1, /* 0110 -> RESERVED */
- 55, /* 0111 -> 5.5x */
- 60, /* 1000 -> 6.0x */
- 70, /* 1001 -> 7.0x */
- 80, /* 1010 -> 8.0x */
- 50, /* 1011 -> 5.0x */
- 65, /* 1100 -> 6.5x */
- 75, /* 1101 -> 7.5x */
- 85, /* 1110 -> 8.5x */
- 120, /* 1111 -> 12.0x */
- 100, /* 0000 -> 10.0x */
- 110, /* 0001 -> 11.0x */
- 120, /* 0010 -> 12.0x */
- 90, /* 0011 -> 9.0x */
- 105, /* 0100 -> 10.5x */
- 115, /* 0101 -> 11.5x */
- 125, /* 0110 -> 12.5x */
- 135, /* 0111 -> 13.5x */
- 140, /* 1000 -> 14.0x */
- 150, /* 1001 -> 15.0x */
- 160, /* 1010 -> 16.0x */
- 130, /* 1011 -> 13.0x */
- 145, /* 1100 -> 14.5x */
- 155, /* 1101 -> 15.5x */
- -1, /* 1110 -> RESERVED (13.0x) */
- 120, /* 1111 -> 12.0x */
-};
-
-static int __initdata nehemiah_c_clock_ratio[32] = {
- 100, /* 0000 -> 10.0x */
- 160, /* 0001 -> 16.0x */
- 40, /* 0010 -> RESERVED */
- 90, /* 0011 -> 9.0x */
- 95, /* 0100 -> 9.5x */
- -1, /* 0101 -> RESERVED */
- 45, /* 0110 -> RESERVED */
- 55, /* 0111 -> 5.5x */
- 60, /* 1000 -> 6.0x */
- 70, /* 1001 -> 7.0x */
- 80, /* 1010 -> 8.0x */
- 50, /* 1011 -> 5.0x */
- 65, /* 1100 -> 6.5x */
- 75, /* 1101 -> 7.5x */
- 85, /* 1110 -> 8.5x */
- 120, /* 1111 -> 12.0x */
- 100, /* 0000 -> 10.0x */
- 110, /* 0001 -> 11.0x */
- 120, /* 0010 -> 12.0x */
- 90, /* 0011 -> 9.0x */
- 105, /* 0100 -> 10.5x */
- 115, /* 0101 -> 11.5x */
- 125, /* 0110 -> 12.5x */
- 135, /* 0111 -> 13.5x */
- 140, /* 1000 -> 14.0x */
- 150, /* 1001 -> 15.0x */
- 160, /* 1010 -> 16.0x */
- 130, /* 1011 -> 13.0x */
- 145, /* 1100 -> 14.5x */
- 155, /* 1101 -> 15.5x */
- -1, /* 1110 -> RESERVED (13.0x) */
- 120, /* 1111 -> 12.0x */
-};
-
-static int __initdata nehemiah_a_eblcr[32] = {
- 50, /* 0000 -> 5.0x */
- 160, /* 0001 -> 16.0x */
- -1, /* 0010 -> RESERVED */
- 100, /* 0011 -> 10.0x */
- 55, /* 0100 -> 5.5x */
- -1, /* 0101 -> RESERVED */
- -1, /* 0110 -> RESERVED */
- 95, /* 0111 -> 9.5x */
- 90, /* 1000 -> 9.0x */
- 70, /* 1001 -> 7.0x */
- 80, /* 1010 -> 8.0x */
- 60, /* 1011 -> 6.0x */
- 120, /* 1100 -> 12.0x */
- 75, /* 1101 -> 7.5x */
- 85, /* 1110 -> 8.5x */
- 65, /* 1111 -> 6.5x */
- 90, /* 0000 -> 9.0x */
- -1, /* 0001 -> RESERVED */
- 120, /* 0010 -> 12.0x */
- 100, /* 0011 -> 10.0x */
- 135, /* 0100 -> 13.5x */
- 115, /* 0101 -> 11.5x */
- 125, /* 0110 -> 12.5x */
- 105, /* 0111 -> 10.5x */
- 130, /* 1000 -> 13.0x */
- 150, /* 1001 -> 15.0x */
- 160, /* 1010 -> 16.0x */
- 140, /* 1011 -> 14.0x */
- 120, /* 1100 -> 12.0x */
- 155, /* 1101 -> 15.5x */
- -1, /* 1110 -> RESERVED (13.0x) */
- 145 /* 1111 -> 14.5x */
- /* end of table */
-};
-static int __initdata nehemiah_b_eblcr[32] = {
- 50, /* 0000 -> 5.0x */
- 160, /* 0001 -> 16.0x */
- -1, /* 0010 -> RESERVED */
- 100, /* 0011 -> 10.0x */
- 55, /* 0100 -> 5.5x */
- -1, /* 0101 -> RESERVED */
- -1, /* 0110 -> RESERVED */
- 95, /* 0111 -> 9.5x */
- 90, /* 1000 -> 9.0x */
- 70, /* 1001 -> 7.0x */
- 80, /* 1010 -> 8.0x */
- 60, /* 1011 -> 6.0x */
- 120, /* 1100 -> 12.0x */
- 75, /* 1101 -> 7.5x */
- 85, /* 1110 -> 8.5x */
- 65, /* 1111 -> 6.5x */
- 90, /* 0000 -> 9.0x */
- 110, /* 0001 -> 11.0x */
- 120, /* 0010 -> 12.0x */
- 100, /* 0011 -> 10.0x */
- 135, /* 0100 -> 13.5x */
- 115, /* 0101 -> 11.5x */
- 125, /* 0110 -> 12.5x */
- 105, /* 0111 -> 10.5x */
- 130, /* 1000 -> 13.0x */
- 150, /* 1001 -> 15.0x */
- 160, /* 1010 -> 16.0x */
- 140, /* 1011 -> 14.0x */
- 120, /* 1100 -> 12.0x */
- 155, /* 1101 -> 15.5x */
- -1, /* 1110 -> RESERVED (13.0x) */
- 145 /* 1111 -> 14.5x */
- /* end of table */
-};
-static int __initdata nehemiah_c_eblcr[32] = {
- 50, /* 0000 -> 5.0x */
- 160, /* 0001 -> 16.0x */
- 40, /* 0010 -> RESERVED */
- 100, /* 0011 -> 10.0x */
- 55, /* 0100 -> 5.5x */
- -1, /* 0101 -> RESERVED */
- 45, /* 0110 -> RESERVED */
- 95, /* 0111 -> 9.5x */
- 90, /* 1000 -> 9.0x */
- 70, /* 1001 -> 7.0x */
- 80, /* 1010 -> 8.0x */
- 60, /* 1011 -> 6.0x */
- 120, /* 1100 -> 12.0x */
- 75, /* 1101 -> 7.5x */
- 85, /* 1110 -> 8.5x */
- 65, /* 1111 -> 6.5x */
- 90, /* 0000 -> 9.0x */
- 110, /* 0001 -> 11.0x */
- 120, /* 0010 -> 12.0x */
- 100, /* 0011 -> 10.0x */
- 135, /* 0100 -> 13.5x */
- 115, /* 0101 -> 11.5x */
- 125, /* 0110 -> 12.5x */
- 105, /* 0111 -> 10.5x */
- 130, /* 1000 -> 13.0x */
- 150, /* 1001 -> 15.0x */
- 160, /* 1010 -> 16.0x */
- 140, /* 1011 -> 14.0x */
- 120, /* 1100 -> 12.0x */
- 155, /* 1101 -> 15.5x */
- -1, /* 1110 -> RESERVED (13.0x) */
- 145 /* 1111 -> 14.5x */
- /* end of table */
-};
-
-/*
- * Voltage scales. Div/Mod by 1000 to get actual voltage.
- * Which scale to use depends on the VRM type in use.
- */
-static int __initdata vrm85scales[32] = {
- 1250, 1200, 1150, 1100, 1050, 1800, 1750, 1700,
- 1650, 1600, 1550, 1500, 1450, 1400, 1350, 1300,
- 1275, 1225, 1175, 1125, 1075, 1825, 1775, 1725,
- 1675, 1625, 1575, 1525, 1475, 1425, 1375, 1325,
-};
-
-static int __initdata mobilevrmscales[32] = {
- 2000, 1950, 1900, 1850, 1800, 1750, 1700, 1650,
- 1600, 1550, 1500, 1450, 1500, 1350, 1300, -1,
- 1275, 1250, 1225, 1200, 1175, 1150, 1125, 1100,
- 1075, 1050, 1025, 1000, 975, 950, 925, -1,
-};
-
diff --git a/arch/i386/kernel/cpu/cpufreq/longrun.c b/arch/i386/kernel/cpu/cpufreq/longrun.c
deleted file mode 100644
index e3868de4dc2..00000000000
--- a/arch/i386/kernel/cpu/cpufreq/longrun.c
+++ /dev/null
@@ -1,326 +0,0 @@
-/*
- * (C) 2002 - 2003 Dominik Brodowski <linux@brodo.de>
- *
- * Licensed under the terms of the GNU GPL License version 2.
- *
- * BIG FAT DISCLAIMER: Work in progress code. Possibly *dangerous*
- */
-
-#include <linux/kernel.h>
-#include <linux/module.h>
-#include <linux/init.h>
-#include <linux/slab.h>
-#include <linux/cpufreq.h>
-
-#include <asm/msr.h>
-#include <asm/processor.h>
-#include <asm/timex.h>
-
-#define dprintk(msg...) cpufreq_debug_printk(CPUFREQ_DEBUG_DRIVER, "longrun", msg)
-
-static struct cpufreq_driver longrun_driver;
-
-/**
- * longrun_{low,high}_freq is needed for the conversion of cpufreq kHz
- * values into per cent values. In TMTA microcode, the following is valid:
- * performance_pctg = (current_freq - low_freq)/(high_freq - low_freq)
- */
-static unsigned int longrun_low_freq, longrun_high_freq;
-
-
-/**
- * longrun_get_policy - get the current LongRun policy
- * @policy: struct cpufreq_policy where current policy is written into
- *
- * Reads the current LongRun policy by access to MSR_TMTA_LONGRUN_FLAGS
- * and MSR_TMTA_LONGRUN_CTRL
- */
-static void __init longrun_get_policy(struct cpufreq_policy *policy)
-{
- u32 msr_lo, msr_hi;
-
- rdmsr(MSR_TMTA_LONGRUN_FLAGS, msr_lo, msr_hi);
- dprintk("longrun flags are %x - %x\n", msr_lo, msr_hi);
- if (msr_lo & 0x01)
- policy->policy = CPUFREQ_POLICY_PERFORMANCE;
- else
- policy->policy = CPUFREQ_POLICY_POWERSAVE;
-
- rdmsr(MSR_TMTA_LONGRUN_CTRL, msr_lo, msr_hi);
- dprintk("longrun ctrl is %x - %x\n", msr_lo, msr_hi);
- msr_lo &= 0x0000007F;
- msr_hi &= 0x0000007F;
-
- if ( longrun_high_freq <= longrun_low_freq ) {
- /* Assume degenerate Longrun table */
- policy->min = policy->max = longrun_high_freq;
- } else {
- policy->min = longrun_low_freq + msr_lo *
- ((longrun_high_freq - longrun_low_freq) / 100);
- policy->max = longrun_low_freq + msr_hi *
- ((longrun_high_freq - longrun_low_freq) / 100);
- }
- policy->cpu = 0;
-}
-
-
-/**
- * longrun_set_policy - sets a new CPUFreq policy
- * @policy: new policy
- *
- * Sets a new CPUFreq policy on LongRun-capable processors. This function
- * has to be called with cpufreq_driver locked.
- */
-static int longrun_set_policy(struct cpufreq_policy *policy)
-{
- u32 msr_lo, msr_hi;
- u32 pctg_lo, pctg_hi;
-
- if (!policy)
- return -EINVAL;
-
- if ( longrun_high_freq <= longrun_low_freq ) {
- /* Assume degenerate Longrun table */
- pctg_lo = pctg_hi = 100;
- } else {
- pctg_lo = (policy->min - longrun_low_freq) /
- ((longrun_high_freq - longrun_low_freq) / 100);
- pctg_hi = (policy->max - longrun_low_freq) /
- ((longrun_high_freq - longrun_low_freq) / 100);
- }
-
- if (pctg_hi > 100)
- pctg_hi = 100;
- if (pctg_lo > pctg_hi)
- pctg_lo = pctg_hi;
-
- /* performance or economy mode */
- rdmsr(MSR_TMTA_LONGRUN_FLAGS, msr_lo, msr_hi);
- msr_lo &= 0xFFFFFFFE;
- switch (policy->policy) {
- case CPUFREQ_POLICY_PERFORMANCE:
- msr_lo |= 0x00000001;
- break;
- case CPUFREQ_POLICY_POWERSAVE:
- break;
- }
- wrmsr(MSR_TMTA_LONGRUN_FLAGS, msr_lo, msr_hi);
-
- /* lower and upper boundary */
- rdmsr(MSR_TMTA_LONGRUN_CTRL, msr_lo, msr_hi);
- msr_lo &= 0xFFFFFF80;
- msr_hi &= 0xFFFFFF80;
- msr_lo |= pctg_lo;
- msr_hi |= pctg_hi;
- wrmsr(MSR_TMTA_LONGRUN_CTRL, msr_lo, msr_hi);
-
- return 0;
-}
-
-
-/**
- * longrun_verify_poliy - verifies a new CPUFreq policy
- * @policy: the policy to verify
- *
- * Validates a new CPUFreq policy. This function has to be called with
- * cpufreq_driver locked.
- */
-static int longrun_verify_policy(struct cpufreq_policy *policy)
-{
- if (!policy)
- return -EINVAL;
-
- policy->cpu = 0;
- cpufreq_verify_within_limits(policy,
- policy->cpuinfo.min_freq,
- policy->cpuinfo.max_freq);
-
- if ((policy->policy != CPUFREQ_POLICY_POWERSAVE) &&
- (policy->policy != CPUFREQ_POLICY_PERFORMANCE))
- return -EINVAL;
-
- return 0;
-}
-
-static unsigned int longrun_get(unsigned int cpu)
-{
- u32 eax, ebx, ecx, edx;
-
- if (cpu)
- return 0;
-
- cpuid(0x80860007, &eax, &ebx, &ecx, &edx);
- dprintk("cpuid eax is %u\n", eax);
-
- return (eax * 1000);
-}
-
-/**
- * longrun_determine_freqs - determines the lowest and highest possible core frequency
- * @low_freq: an int to put the lowest frequency into
- * @high_freq: an int to put the highest frequency into
- *
- * Determines the lowest and highest possible core frequencies on this CPU.
- * This is necessary to calculate the performance percentage according to
- * TMTA rules:
- * performance_pctg = (target_freq - low_freq)/(high_freq - low_freq)
- */
-static unsigned int __init longrun_determine_freqs(unsigned int *low_freq,
- unsigned int *high_freq)
-{
- u32 msr_lo, msr_hi;
- u32 save_lo, save_hi;
- u32 eax, ebx, ecx, edx;
- u32 try_hi;
- struct cpuinfo_x86 *c = cpu_data;
-
- if (!low_freq || !high_freq)
- return -EINVAL;
-
- if (cpu_has(c, X86_FEATURE_LRTI)) {
- /* if the LongRun Table Interface is present, the
- * detection is a bit easier:
- * For minimum frequency, read out the maximum
- * level (msr_hi), write that into "currently
- * selected level", and read out the frequency.
- * For maximum frequency, read out level zero.
- */
- /* minimum */
- rdmsr(MSR_TMTA_LRTI_READOUT, msr_lo, msr_hi);
- wrmsr(MSR_TMTA_LRTI_READOUT, msr_hi, msr_hi);
- rdmsr(MSR_TMTA_LRTI_VOLT_MHZ, msr_lo, msr_hi);
- *low_freq = msr_lo * 1000; /* to kHz */
-
- /* maximum */
- wrmsr(MSR_TMTA_LRTI_READOUT, 0, msr_hi);
- rdmsr(MSR_TMTA_LRTI_VOLT_MHZ, msr_lo, msr_hi);
- *high_freq = msr_lo * 1000; /* to kHz */
-
- dprintk("longrun table interface told %u - %u kHz\n", *low_freq, *high_freq);
-
- if (*low_freq > *high_freq)
- *low_freq = *high_freq;
- return 0;
- }
-
- /* set the upper border to the value determined during TSC init */
- *high_freq = (cpu_khz / 1000);
- *high_freq = *high_freq * 1000;
- dprintk("high frequency is %u kHz\n", *high_freq);
-
- /* get current borders */
- rdmsr(MSR_TMTA_LONGRUN_CTRL, msr_lo, msr_hi);
- save_lo = msr_lo & 0x0000007F;
- save_hi = msr_hi & 0x0000007F;
-
- /* if current perf_pctg is larger than 90%, we need to decrease the
- * upper limit to make the calculation more accurate.
- */
- cpuid(0x80860007, &eax, &ebx, &ecx, &edx);
- /* try decreasing in 10% steps, some processors react only
- * on some barrier values */
- for (try_hi = 80; try_hi > 0 && ecx > 90; try_hi -=10) {
- /* set to 0 to try_hi perf_pctg */
- msr_lo &= 0xFFFFFF80;
- msr_hi &= 0xFFFFFF80;
- msr_lo |= 0;
- msr_hi |= try_hi;
- wrmsr(MSR_TMTA_LONGRUN_CTRL, msr_lo, msr_hi);
-
- /* read out current core MHz and current perf_pctg */
- cpuid(0x80860007, &eax, &ebx, &ecx, &edx);
-
- /* restore values */
- wrmsr(MSR_TMTA_LONGRUN_CTRL, save_lo, save_hi);
- }
- dprintk("percentage is %u %%, freq is %u MHz\n", ecx, eax);
-
- /* performance_pctg = (current_freq - low_freq)/(high_freq - low_freq)
- * eqals
- * low_freq * ( 1 - perf_pctg) = (cur_freq - high_freq * perf_pctg)
- *
- * high_freq * perf_pctg is stored tempoarily into "ebx".
- */
- ebx = (((cpu_khz / 1000) * ecx) / 100); /* to MHz */
-
- if ((ecx > 95) || (ecx == 0) || (eax < ebx))
- return -EIO;
-
- edx = (eax - ebx) / (100 - ecx);
- *low_freq = edx * 1000; /* back to kHz */
-
- dprintk("low frequency is %u kHz\n", *low_freq);
-
- if (*low_freq > *high_freq)
- *low_freq = *high_freq;
-
- return 0;
-}
-
-
-static int __init longrun_cpu_init(struct cpufreq_policy *policy)
-{
- int result = 0;
-
- /* capability check */
- if (policy->cpu != 0)
- return -ENODEV;
-
- /* detect low and high frequency */
- result = longrun_determine_freqs(&longrun_low_freq, &longrun_high_freq);
- if (result)
- return result;
-
- /* cpuinfo and default policy values */
- policy->cpuinfo.min_freq = longrun_low_freq;
- policy->cpuinfo.max_freq = longrun_high_freq;
- policy->cpuinfo.transition_latency = CPUFREQ_ETERNAL;
- longrun_get_policy(policy);
-
- return 0;
-}
-
-
-static struct cpufreq_driver longrun_driver = {
- .flags = CPUFREQ_CONST_LOOPS,
- .verify = longrun_verify_policy,
- .setpolicy = longrun_set_policy,
- .get = longrun_get,
- .init = longrun_cpu_init,
- .name = "longrun",
- .owner = THIS_MODULE,
-};
-
-
-/**
- * longrun_init - initializes the Transmeta Crusoe LongRun CPUFreq driver
- *
- * Initializes the LongRun support.
- */
-static int __init longrun_init(void)
-{
- struct cpuinfo_x86 *c = cpu_data;
-
- if (c->x86_vendor != X86_VENDOR_TRANSMETA ||
- !cpu_has(c, X86_FEATURE_LONGRUN))
- return -ENODEV;
-
- return cpufreq_register_driver(&longrun_driver);
-}
-
-
-/**
- * longrun_exit - unregisters LongRun support
- */
-static void __exit longrun_exit(void)
-{
- cpufreq_unregister_driver(&longrun_driver);
-}
-
-
-MODULE_AUTHOR ("Dominik Brodowski <linux@brodo.de>");
-MODULE_DESCRIPTION ("LongRun driver for Transmeta Crusoe and Efficeon processors.");
-MODULE_LICENSE ("GPL");
-
-module_init(longrun_init);
-module_exit(longrun_exit);
diff --git a/arch/i386/kernel/cpu/cpufreq/p4-clockmod.c b/arch/i386/kernel/cpu/cpufreq/p4-clockmod.c
deleted file mode 100644
index 270f2188d68..00000000000
--- a/arch/i386/kernel/cpu/cpufreq/p4-clockmod.c
+++ /dev/null
@@ -1,338 +0,0 @@
-/*
- * Pentium 4/Xeon CPU on demand clock modulation/speed scaling
- * (C) 2002 - 2003 Dominik Brodowski <linux@brodo.de>
- * (C) 2002 Zwane Mwaikambo <zwane@commfireservices.com>
- * (C) 2002 Arjan van de Ven <arjanv@redhat.com>
- * (C) 2002 Tora T. Engstad
- * All Rights Reserved
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
- *
- * The author(s) of this software shall not be held liable for damages
- * of any nature resulting due to the use of this software. This
- * software is provided AS-IS with no warranties.
- *
- * Date Errata Description
- * 20020525 N44, O17 12.5% or 25% DC causes lockup
- *
- */
-
-#include <linux/config.h>
-#include <linux/kernel.h>
-#include <linux/module.h>
-#include <linux/init.h>
-#include <linux/smp.h>
-#include <linux/cpufreq.h>
-#include <linux/slab.h>
-#include <linux/cpumask.h>
-#include <linux/sched.h> /* current / set_cpus_allowed() */
-
-#include <asm/processor.h>
-#include <asm/msr.h>
-#include <asm/timex.h>
-
-#include "speedstep-lib.h"
-
-#define PFX "p4-clockmod: "
-#define dprintk(msg...) cpufreq_debug_printk(CPUFREQ_DEBUG_DRIVER, "p4-clockmod", msg)
-
-/*
- * Duty Cycle (3bits), note DC_DISABLE is not specified in
- * intel docs i just use it to mean disable
- */
-enum {
- DC_RESV, DC_DFLT, DC_25PT, DC_38PT, DC_50PT,
- DC_64PT, DC_75PT, DC_88PT, DC_DISABLE
-};
-
-#define DC_ENTRIES 8
-
-
-static int has_N44_O17_errata[NR_CPUS];
-static unsigned int stock_freq;
-static struct cpufreq_driver p4clockmod_driver;
-static unsigned int cpufreq_p4_get(unsigned int cpu);
-
-static int cpufreq_p4_setdc(unsigned int cpu, unsigned int newstate)
-{
- u32 l, h;
-
- if (!cpu_online(cpu) || (newstate > DC_DISABLE) || (newstate == DC_RESV))
- return -EINVAL;
-
- rdmsr(MSR_IA32_THERM_STATUS, l, h);
-
- if (l & 0x01)
- dprintk("CPU#%d currently thermal throttled\n", cpu);
-
- if (has_N44_O17_errata[cpu] && (newstate == DC_25PT || newstate == DC_DFLT))
- newstate = DC_38PT;
-
- rdmsr(MSR_IA32_THERM_CONTROL, l, h);
- if (newstate == DC_DISABLE) {
- dprintk("CPU#%d disabling modulation\n", cpu);
- wrmsr(MSR_IA32_THERM_CONTROL, l & ~(1<<4), h);
- } else {
- dprintk("CPU#%d setting duty cycle to %d%%\n",
- cpu, ((125 * newstate) / 10));
- /* bits 63 - 5 : reserved
- * bit 4 : enable/disable
- * bits 3-1 : duty cycle
- * bit 0 : reserved
- */
- l = (l & ~14);
- l = l | (1<<4) | ((newstate & 0x7)<<1);
- wrmsr(MSR_IA32_THERM_CONTROL, l, h);
- }
-
- return 0;
-}
-
-
-static struct cpufreq_frequency_table p4clockmod_table[] = {
- {DC_RESV, CPUFREQ_ENTRY_INVALID},
- {DC_DFLT, 0},
- {DC_25PT, 0},
- {DC_38PT, 0},
- {DC_50PT, 0},
- {DC_64PT, 0},
- {DC_75PT, 0},
- {DC_88PT, 0},
- {DC_DISABLE, 0},
- {DC_RESV, CPUFREQ_TABLE_END},
-};
-
-
-static int cpufreq_p4_target(struct cpufreq_policy *policy,
- unsigned int target_freq,
- unsigned int relation)
-{
- unsigned int newstate = DC_RESV;
- struct cpufreq_freqs freqs;
- cpumask_t cpus_allowed;
- int i;
-
- if (cpufreq_frequency_table_target(policy, &p4clockmod_table[0], target_freq, relation, &newstate))
- return -EINVAL;
-
- freqs.old = cpufreq_p4_get(policy->cpu);
- freqs.new = stock_freq * p4clockmod_table[newstate].index / 8;
-
- if (freqs.new == freqs.old)
- return 0;
-
- /* notifiers */
- for_each_cpu_mask(i, policy->cpus) {
- freqs.cpu = i;
- cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE);
- }
-
- /* run on each logical CPU, see section 13.15.3 of IA32 Intel Architecture Software
- * Developer's Manual, Volume 3
- */
- cpus_allowed = current->cpus_allowed;
-
- for_each_cpu_mask(i, policy->cpus) {
- cpumask_t this_cpu = cpumask_of_cpu(i);
-
- set_cpus_allowed(current, this_cpu);
- BUG_ON(smp_processor_id() != i);
-
- cpufreq_p4_setdc(i, p4clockmod_table[newstate].index);
- }
- set_cpus_allowed(current, cpus_allowed);
-
- /* notifiers */
- for_each_cpu_mask(i, policy->cpus) {
- freqs.cpu = i;
- cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE);
- }
-
- return 0;
-}
-
-
-static int cpufreq_p4_verify(struct cpufreq_policy *policy)
-{
- return cpufreq_frequency_table_verify(policy, &p4clockmod_table[0]);
-}
-
-
-static unsigned int cpufreq_p4_get_frequency(struct cpuinfo_x86 *c)
-{
- if ((c->x86 == 0x06) && (c->x86_model == 0x09)) {
- /* Pentium M (Banias) */
- printk(KERN_WARNING PFX "Warning: Pentium M detected. "
- "The speedstep_centrino module offers voltage scaling"
- " in addition of frequency scaling. You should use "
- "that instead of p4-clockmod, if possible.\n");
- return speedstep_get_processor_frequency(SPEEDSTEP_PROCESSOR_PM);
- }
-
- if ((c->x86 == 0x06) && (c->x86_model == 0x0D)) {
- /* Pentium M (Dothan) */
- printk(KERN_WARNING PFX "Warning: Pentium M detected. "
- "The speedstep_centrino module offers voltage scaling"
- " in addition of frequency scaling. You should use "
- "that instead of p4-clockmod, if possible.\n");
- /* on P-4s, the TSC runs with constant frequency independent whether
- * throttling is active or not. */
- p4clockmod_driver.flags |= CPUFREQ_CONST_LOOPS;
- return speedstep_get_processor_frequency(SPEEDSTEP_PROCESSOR_PM);
- }
-
- if (c->x86 != 0xF) {
- printk(KERN_WARNING PFX "Unknown p4-clockmod-capable CPU. Please send an e-mail to <linux@brodo.de>\n");
- return 0;
- }
-
- /* on P-4s, the TSC runs with constant frequency independent whether
- * throttling is active or not. */
- p4clockmod_driver.flags |= CPUFREQ_CONST_LOOPS;
-
- if (speedstep_detect_processor() == SPEEDSTEP_PROCESSOR_P4M) {
- printk(KERN_WARNING PFX "Warning: Pentium 4-M detected. "
- "The speedstep-ich or acpi cpufreq modules offer "
- "voltage scaling in addition of frequency scaling. "
- "You should use either one instead of p4-clockmod, "
- "if possible.\n");
- return speedstep_get_processor_frequency(SPEEDSTEP_PROCESSOR_P4M);
- }
-
- return speedstep_get_processor_frequency(SPEEDSTEP_PROCESSOR_P4D);
-}
-
-
-
-static int cpufreq_p4_cpu_init(struct cpufreq_policy *policy)
-{
- struct cpuinfo_x86 *c = &cpu_data[policy->cpu];
- int cpuid = 0;
- unsigned int i;
-
-#ifdef CONFIG_SMP
- policy->cpus = cpu_sibling_map[policy->cpu];
-#endif
-
- /* Errata workaround */
- cpuid = (c->x86 << 8) | (c->x86_model << 4) | c->x86_mask;
- switch (cpuid) {
- case 0x0f07:
- case 0x0f0a:
- case 0x0f11:
- case 0x0f12:
- has_N44_O17_errata[policy->cpu] = 1;
- dprintk("has errata -- disabling low frequencies\n");
- }
-
- /* get max frequency */
- stock_freq = cpufreq_p4_get_frequency(c);
- if (!stock_freq)
- return -EINVAL;
-
- /* table init */
- for (i=1; (p4clockmod_table[i].frequency != CPUFREQ_TABLE_END); i++) {
- if ((i<2) && (has_N44_O17_errata[policy->cpu]))
- p4clockmod_table[i].frequency = CPUFREQ_ENTRY_INVALID;
- else
- p4clockmod_table[i].frequency = (stock_freq * i)/8;
- }
- cpufreq_frequency_table_get_attr(p4clockmod_table, policy->cpu);
-
- /* cpuinfo and default policy values */
- policy->governor = CPUFREQ_DEFAULT_GOVERNOR;
- policy->cpuinfo.transition_latency = 1000000; /* assumed */
- policy->cur = stock_freq;
-
- return cpufreq_frequency_table_cpuinfo(policy, &p4clockmod_table[0]);
-}
-
-
-static int cpufreq_p4_cpu_exit(struct cpufreq_policy *policy)
-{
- cpufreq_frequency_table_put_attr(policy->cpu);
- return 0;
-}
-
-static unsigned int cpufreq_p4_get(unsigned int cpu)
-{
- cpumask_t cpus_allowed;
- u32 l, h;
-
- cpus_allowed = current->cpus_allowed;
-
- set_cpus_allowed(current, cpumask_of_cpu(cpu));
- BUG_ON(smp_processor_id() != cpu);
-
- rdmsr(MSR_IA32_THERM_CONTROL, l, h);
-
- set_cpus_allowed(current, cpus_allowed);
-
- if (l & 0x10) {
- l = l >> 1;
- l &= 0x7;
- } else
- l = DC_DISABLE;
-
- if (l != DC_DISABLE)
- return (stock_freq * l / 8);
-
- return stock_freq;
-}
-
-static struct freq_attr* p4clockmod_attr[] = {
- &cpufreq_freq_attr_scaling_available_freqs,
- NULL,
-};
-
-static struct cpufreq_driver p4clockmod_driver = {
- .verify = cpufreq_p4_verify,
- .target = cpufreq_p4_target,
- .init = cpufreq_p4_cpu_init,
- .exit = cpufreq_p4_cpu_exit,
- .get = cpufreq_p4_get,
- .name = "p4-clockmod",
- .owner = THIS_MODULE,
- .attr = p4clockmod_attr,
-};
-
-
-static int __init cpufreq_p4_init(void)
-{
- struct cpuinfo_x86 *c = cpu_data;
- int ret;
-
- /*
- * THERM_CONTROL is architectural for IA32 now, so
- * we can rely on the capability checks
- */
- if (c->x86_vendor != X86_VENDOR_INTEL)
- return -ENODEV;
-
- if (!test_bit(X86_FEATURE_ACPI, c->x86_capability) ||
- !test_bit(X86_FEATURE_ACC, c->x86_capability))
- return -ENODEV;
-
- ret = cpufreq_register_driver(&p4clockmod_driver);
- if (!ret)
- printk(KERN_INFO PFX "P4/Xeon(TM) CPU On-Demand Clock Modulation available\n");
-
- return (ret);
-}
-
-
-static void __exit cpufreq_p4_exit(void)
-{
- cpufreq_unregister_driver(&p4clockmod_driver);
-}
-
-
-MODULE_AUTHOR ("Zwane Mwaikambo <zwane@commfireservices.com>");
-MODULE_DESCRIPTION ("cpufreq driver for Pentium(TM) 4/Xeon(TM)");
-MODULE_LICENSE ("GPL");
-
-late_initcall(cpufreq_p4_init);
-module_exit(cpufreq_p4_exit);
diff --git a/arch/i386/kernel/cpu/cpufreq/powernow-k6.c b/arch/i386/kernel/cpu/cpufreq/powernow-k6.c
deleted file mode 100644
index 222f8cfe3c5..00000000000
--- a/arch/i386/kernel/cpu/cpufreq/powernow-k6.c
+++ /dev/null
@@ -1,256 +0,0 @@
-/*
- * This file was based upon code in Powertweak Linux (http://powertweak.sf.net)
- * (C) 2000-2003 Dave Jones, Arjan van de Ven, Janne Pänkälä, Dominik Brodowski.
- *
- * Licensed under the terms of the GNU GPL License version 2.
- *
- * BIG FAT DISCLAIMER: Work in progress code. Possibly *dangerous*
- */
-
-#include <linux/kernel.h>
-#include <linux/module.h>
-#include <linux/init.h>
-#include <linux/cpufreq.h>
-#include <linux/ioport.h>
-#include <linux/slab.h>
-
-#include <asm/msr.h>
-#include <asm/timex.h>
-#include <asm/io.h>
-
-
-#define POWERNOW_IOPORT 0xfff0 /* it doesn't matter where, as long
- as it is unused */
-
-static unsigned int busfreq; /* FSB, in 10 kHz */
-static unsigned int max_multiplier;
-
-
-/* Clock ratio multiplied by 10 - see table 27 in AMD#23446 */
-static struct cpufreq_frequency_table clock_ratio[] = {
- {45, /* 000 -> 4.5x */ 0},
- {50, /* 001 -> 5.0x */ 0},
- {40, /* 010 -> 4.0x */ 0},
- {55, /* 011 -> 5.5x */ 0},
- {20, /* 100 -> 2.0x */ 0},
- {30, /* 101 -> 3.0x */ 0},
- {60, /* 110 -> 6.0x */ 0},
- {35, /* 111 -> 3.5x */ 0},
- {0, CPUFREQ_TABLE_END}
-};
-
-
-/**
- * powernow_k6_get_cpu_multiplier - returns the current FSB multiplier
- *
- * Returns the current setting of the frequency multiplier. Core clock
- * speed is frequency of the Front-Side Bus multiplied with this value.
- */
-static int powernow_k6_get_cpu_multiplier(void)
-{
- u64 invalue = 0;
- u32 msrval;
-
- msrval = POWERNOW_IOPORT + 0x1;
- wrmsr(MSR_K6_EPMR, msrval, 0); /* enable the PowerNow port */
- invalue=inl(POWERNOW_IOPORT + 0x8);
- msrval = POWERNOW_IOPORT + 0x0;
- wrmsr(MSR_K6_EPMR, msrval, 0); /* disable it again */
-
- return clock_ratio[(invalue >> 5)&7].index;
-}
-
-
-/**
- * powernow_k6_set_state - set the PowerNow! multiplier
- * @best_i: clock_ratio[best_i] is the target multiplier
- *
- * Tries to change the PowerNow! multiplier
- */
-static void powernow_k6_set_state (unsigned int best_i)
-{
- unsigned long outvalue=0, invalue=0;
- unsigned long msrval;
- struct cpufreq_freqs freqs;
-
- if (clock_ratio[best_i].index > max_multiplier) {
- printk(KERN_ERR "cpufreq: invalid target frequency\n");
- return;
- }
-
- freqs.old = busfreq * powernow_k6_get_cpu_multiplier();
- freqs.new = busfreq * clock_ratio[best_i].index;
- freqs.cpu = 0; /* powernow-k6.c is UP only driver */
-
- cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE);
-
- /* we now need to transform best_i to the BVC format, see AMD#23446 */
-
- outvalue = (1<<12) | (1<<10) | (1<<9) | (best_i<<5);
-
- msrval = POWERNOW_IOPORT + 0x1;
- wrmsr(MSR_K6_EPMR, msrval, 0); /* enable the PowerNow port */
- invalue=inl(POWERNOW_IOPORT + 0x8);
- invalue = invalue & 0xf;
- outvalue = outvalue | invalue;
- outl(outvalue ,(POWERNOW_IOPORT + 0x8));
- msrval = POWERNOW_IOPORT + 0x0;
- wrmsr(MSR_K6_EPMR, msrval, 0); /* disable it again */
-
- cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE);
-
- return;
-}
-
-
-/**
- * powernow_k6_verify - verifies a new CPUfreq policy
- * @policy: new policy
- *
- * Policy must be within lowest and highest possible CPU Frequency,
- * and at least one possible state must be within min and max.
- */
-static int powernow_k6_verify(struct cpufreq_policy *policy)
-{
- return cpufreq_frequency_table_verify(policy, &clock_ratio[0]);
-}
-
-
-/**
- * powernow_k6_setpolicy - sets a new CPUFreq policy
- * @policy: new policy
- * @target_freq: the target frequency
- * @relation: how that frequency relates to achieved frequency (CPUFREQ_RELATION_L or CPUFREQ_RELATION_H)
- *
- * sets a new CPUFreq policy
- */
-static int powernow_k6_target (struct cpufreq_policy *policy,
- unsigned int target_freq,
- unsigned int relation)
-{
- unsigned int newstate = 0;
-
- if (cpufreq_frequency_table_target(policy, &clock_ratio[0], target_freq, relation, &newstate))
- return -EINVAL;
-
- powernow_k6_set_state(newstate);
-
- return 0;
-}
-
-
-static int powernow_k6_cpu_init(struct cpufreq_policy *policy)
-{
- unsigned int i;
- int result;
-
- if (policy->cpu != 0)
- return -ENODEV;
-
- /* get frequencies */
- max_multiplier = powernow_k6_get_cpu_multiplier();
- busfreq = cpu_khz / max_multiplier;
-
- /* table init */
- for (i=0; (clock_ratio[i].frequency != CPUFREQ_TABLE_END); i++) {
- if (clock_ratio[i].index > max_multiplier)
- clock_ratio[i].frequency = CPUFREQ_ENTRY_INVALID;
- else
- clock_ratio[i].frequency = busfreq * clock_ratio[i].index;
- }
-
- /* cpuinfo and default policy values */
- policy->governor = CPUFREQ_DEFAULT_GOVERNOR;
- policy->cpuinfo.transition_latency = CPUFREQ_ETERNAL;
- policy->cur = busfreq * max_multiplier;
-
- result = cpufreq_frequency_table_cpuinfo(policy, clock_ratio);
- if (result)
- return (result);
-
- cpufreq_frequency_table_get_attr(clock_ratio, policy->cpu);
-
- return 0;
-}
-
-
-static int powernow_k6_cpu_exit(struct cpufreq_policy *policy)
-{
- unsigned int i;
- for (i=0; i<8; i++) {
- if (i==max_multiplier)
- powernow_k6_set_state(i);
- }
- cpufreq_frequency_table_put_attr(policy->cpu);
- return 0;
-}
-
-static unsigned int powernow_k6_get(unsigned int cpu)
-{
- return busfreq * powernow_k6_get_cpu_multiplier();
-}
-
-static struct freq_attr* powernow_k6_attr[] = {
- &cpufreq_freq_attr_scaling_available_freqs,
- NULL,
-};
-
-static struct cpufreq_driver powernow_k6_driver = {
- .verify = powernow_k6_verify,
- .target = powernow_k6_target,
- .init = powernow_k6_cpu_init,
- .exit = powernow_k6_cpu_exit,
- .get = powernow_k6_get,
- .name = "powernow-k6",
- .owner = THIS_MODULE,
- .attr = powernow_k6_attr,
-};
-
-
-/**
- * powernow_k6_init - initializes the k6 PowerNow! CPUFreq driver
- *
- * Initializes the K6 PowerNow! support. Returns -ENODEV on unsupported
- * devices, -EINVAL or -ENOMEM on problems during initiatization, and zero
- * on success.
- */
-static int __init powernow_k6_init(void)
-{
- struct cpuinfo_x86 *c = cpu_data;
-
- if ((c->x86_vendor != X86_VENDOR_AMD) || (c->x86 != 5) ||
- ((c->x86_model != 12) && (c->x86_model != 13)))
- return -ENODEV;
-
- if (!request_region(POWERNOW_IOPORT, 16, "PowerNow!")) {
- printk("cpufreq: PowerNow IOPORT region already used.\n");
- return -EIO;
- }
-
- if (cpufreq_register_driver(&powernow_k6_driver)) {
- release_region (POWERNOW_IOPORT, 16);
- return -EINVAL;
- }
-
- return 0;
-}
-
-
-/**
- * powernow_k6_exit - unregisters AMD K6-2+/3+ PowerNow! support
- *
- * Unregisters AMD K6-2+ / K6-3+ PowerNow! support.
- */
-static void __exit powernow_k6_exit(void)
-{
- cpufreq_unregister_driver(&powernow_k6_driver);
- release_region (POWERNOW_IOPORT, 16);
-}
-
-
-MODULE_AUTHOR ("Arjan van de Ven <arjanv@redhat.com>, Dave Jones <davej@codemonkey.org.uk>, Dominik Brodowski <linux@brodo.de>");
-MODULE_DESCRIPTION ("PowerNow! driver for AMD K6-2+ / K6-3+ processors.");
-MODULE_LICENSE ("GPL");
-
-module_init(powernow_k6_init);
-module_exit(powernow_k6_exit);
diff --git a/arch/i386/kernel/cpu/cpufreq/powernow-k7.c b/arch/i386/kernel/cpu/cpufreq/powernow-k7.c
deleted file mode 100644
index 73a5dc5b26b..00000000000
--- a/arch/i386/kernel/cpu/cpufreq/powernow-k7.c
+++ /dev/null
@@ -1,693 +0,0 @@
-/*
- * AMD K7 Powernow driver.
- * (C) 2003 Dave Jones <davej@codemonkey.org.uk> on behalf of SuSE Labs.
- * (C) 2003-2004 Dave Jones <davej@redhat.com>
- *
- * Licensed under the terms of the GNU GPL License version 2.
- * Based upon datasheets & sample CPUs kindly provided by AMD.
- *
- * Errata 5: Processor may fail to execute a FID/VID change in presence of interrupt.
- * - We cli/sti on stepping A0 CPUs around the FID/VID transition.
- * Errata 15: Processors with half frequency multipliers may hang upon wakeup from disconnect.
- * - We disable half multipliers if ACPI is used on A0 stepping CPUs.
- */
-
-#include <linux/config.h>
-#include <linux/kernel.h>
-#include <linux/module.h>
-#include <linux/moduleparam.h>
-#include <linux/init.h>
-#include <linux/cpufreq.h>
-#include <linux/slab.h>
-#include <linux/string.h>
-#include <linux/dmi.h>
-
-#include <asm/msr.h>
-#include <asm/timer.h>
-#include <asm/timex.h>
-#include <asm/io.h>
-#include <asm/system.h>
-
-#ifdef CONFIG_X86_POWERNOW_K7_ACPI
-#include <linux/acpi.h>
-#include <acpi/processor.h>
-#endif
-
-#include "powernow-k7.h"
-
-#define PFX "powernow: "
-
-
-struct psb_s {
- u8 signature[10];
- u8 tableversion;
- u8 flags;
- u16 settlingtime;
- u8 reserved1;
- u8 numpst;
-};
-
-struct pst_s {
- u32 cpuid;
- u8 fsbspeed;
- u8 maxfid;
- u8 startvid;
- u8 numpstates;
-};
-
-#ifdef CONFIG_X86_POWERNOW_K7_ACPI
-union powernow_acpi_control_t {
- struct {
- unsigned long fid:5,
- vid:5,
- sgtc:20,
- res1:2;
- } bits;
- unsigned long val;
-};
-#endif
-
-#ifdef CONFIG_CPU_FREQ_DEBUG
-/* divide by 1000 to get VCore voltage in V. */
-static int mobile_vid_table[32] = {
- 2000, 1950, 1900, 1850, 1800, 1750, 1700, 1650,
- 1600, 1550, 1500, 1450, 1400, 1350, 1300, 0,
- 1275, 1250, 1225, 1200, 1175, 1150, 1125, 1100,
- 1075, 1050, 1025, 1000, 975, 950, 925, 0,
-};
-#endif
-
-/* divide by 10 to get FID. */
-static int fid_codes[32] = {
- 110, 115, 120, 125, 50, 55, 60, 65,
- 70, 75, 80, 85, 90, 95, 100, 105,
- 30, 190, 40, 200, 130, 135, 140, 210,
- 150, 225, 160, 165, 170, 180, -1, -1,
-};
-
-/* This parameter is used in order to force ACPI instead of legacy method for
- * configuration purpose.
- */
-
-static int acpi_force;
-
-static struct cpufreq_frequency_table *powernow_table;
-
-static unsigned int can_scale_bus;
-static unsigned int can_scale_vid;
-static unsigned int minimum_speed=-1;
-static unsigned int maximum_speed;
-static unsigned int number_scales;
-static unsigned int fsb;
-static unsigned int latency;
-static char have_a0;
-
-#define dprintk(msg...) cpufreq_debug_printk(CPUFREQ_DEBUG_DRIVER, "powernow-k7", msg)
-
-static int check_fsb(unsigned int fsbspeed)
-{
- int delta;
- unsigned int f = fsb / 1000;
-
- delta = (fsbspeed > f) ? fsbspeed - f : f - fsbspeed;
- return (delta < 5);
-}
-
-static int check_powernow(void)
-{
- struct cpuinfo_x86 *c = cpu_data;
- unsigned int maxei, eax, ebx, ecx, edx;
-
- if ((c->x86_vendor != X86_VENDOR_AMD) || (c->x86 !=6)) {
-#ifdef MODULE
- printk (KERN_INFO PFX "This module only works with AMD K7 CPUs\n");
-#endif
- return 0;
- }
-
- /* Get maximum capabilities */
- maxei = cpuid_eax (0x80000000);
- if (maxei < 0x80000007) { /* Any powernow info ? */
-#ifdef MODULE
- printk (KERN_INFO PFX "No powernow capabilities detected\n");
-#endif
- return 0;
- }
-
- if ((c->x86_model == 6) && (c->x86_mask == 0)) {
- printk (KERN_INFO PFX "K7 660[A0] core detected, enabling errata workarounds\n");
- have_a0 = 1;
- }
-
- cpuid(0x80000007, &eax, &ebx, &ecx, &edx);
-
- /* Check we can actually do something before we say anything.*/
- if (!(edx & (1 << 1 | 1 << 2)))
- return 0;
-
- printk (KERN_INFO PFX "PowerNOW! Technology present. Can scale: ");
-
- if (edx & 1 << 1) {
- printk ("frequency");
- can_scale_bus=1;
- }
-
- if ((edx & (1 << 1 | 1 << 2)) == 0x6)
- printk (" and ");
-
- if (edx & 1 << 2) {
- printk ("voltage");
- can_scale_vid=1;
- }
-
- printk (".\n");
- return 1;
-}
-
-
-static int get_ranges (unsigned char *pst)
-{
- unsigned int j;
- unsigned int speed;
- u8 fid, vid;
-
- powernow_table = kmalloc((sizeof(struct cpufreq_frequency_table) * (number_scales + 1)), GFP_KERNEL);
- if (!powernow_table)
- return -ENOMEM;
- memset(powernow_table, 0, (sizeof(struct cpufreq_frequency_table) * (number_scales + 1)));
-
- for (j=0 ; j < number_scales; j++) {
- fid = *pst++;
-
- powernow_table[j].frequency = (fsb * fid_codes[fid]) / 10;
- powernow_table[j].index = fid; /* lower 8 bits */
-
- speed = powernow_table[j].frequency;
-
- if ((fid_codes[fid] % 10)==5) {
-#ifdef CONFIG_X86_POWERNOW_K7_ACPI
- if (have_a0 == 1)
- powernow_table[j].frequency = CPUFREQ_ENTRY_INVALID;
-#endif
- }
-
- if (speed < minimum_speed)
- minimum_speed = speed;
- if (speed > maximum_speed)
- maximum_speed = speed;
-
- vid = *pst++;
- powernow_table[j].index |= (vid << 8); /* upper 8 bits */
-
- dprintk (" FID: 0x%x (%d.%dx [%dMHz]) "
- "VID: 0x%x (%d.%03dV)\n", fid, fid_codes[fid] / 10,
- fid_codes[fid] % 10, speed/1000, vid,
- mobile_vid_table[vid]/1000,
- mobile_vid_table[vid]%1000);
- }
- powernow_table[number_scales].frequency = CPUFREQ_TABLE_END;
- powernow_table[number_scales].index = 0;
-
- return 0;
-}
-
-
-static void change_FID(int fid)
-{
- union msr_fidvidctl fidvidctl;
-
- rdmsrl (MSR_K7_FID_VID_CTL, fidvidctl.val);
- if (fidvidctl.bits.FID != fid) {
- fidvidctl.bits.SGTC = latency;
- fidvidctl.bits.FID = fid;
- fidvidctl.bits.VIDC = 0;
- fidvidctl.bits.FIDC = 1;
- wrmsrl (MSR_K7_FID_VID_CTL, fidvidctl.val);
- }
-}
-
-
-static void change_VID(int vid)
-{
- union msr_fidvidctl fidvidctl;
-
- rdmsrl (MSR_K7_FID_VID_CTL, fidvidctl.val);
- if (fidvidctl.bits.VID != vid) {
- fidvidctl.bits.SGTC = latency;
- fidvidctl.bits.VID = vid;
- fidvidctl.bits.FIDC = 0;
- fidvidctl.bits.VIDC = 1;
- wrmsrl (MSR_K7_FID_VID_CTL, fidvidctl.val);
- }
-}
-
-
-static void change_speed (unsigned int index)
-{
- u8 fid, vid;
- struct cpufreq_freqs freqs;
- union msr_fidvidstatus fidvidstatus;
- int cfid;
-
- /* fid are the lower 8 bits of the index we stored into
- * the cpufreq frequency table in powernow_decode_bios,
- * vid are the upper 8 bits.
- */
-
- fid = powernow_table[index].index & 0xFF;
- vid = (powernow_table[index].index & 0xFF00) >> 8;
-
- freqs.cpu = 0;
-
- rdmsrl (MSR_K7_FID_VID_STATUS, fidvidstatus.val);
- cfid = fidvidstatus.bits.CFID;
- freqs.old = fsb * fid_codes[cfid] / 10;
-
- freqs.new = powernow_table[index].frequency;
-
- cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE);
-
- /* Now do the magic poking into the MSRs. */
-
- if (have_a0 == 1) /* A0 errata 5 */
- local_irq_disable();
-
- if (freqs.old > freqs.new) {
- /* Going down, so change FID first */
- change_FID(fid);
- change_VID(vid);
- } else {
- /* Going up, so change VID first */
- change_VID(vid);
- change_FID(fid);
- }
-
-
- if (have_a0 == 1)
- local_irq_enable();
-
- cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE);
-}
-
-
-#ifdef CONFIG_X86_POWERNOW_K7_ACPI
-
-static struct acpi_processor_performance *acpi_processor_perf;
-
-static int powernow_acpi_init(void)
-{
- int i;
- int retval = 0;
- union powernow_acpi_control_t pc;
-
- if (acpi_processor_perf != NULL && powernow_table != NULL) {
- retval = -EINVAL;
- goto err0;
- }
-
- acpi_processor_perf = kmalloc(sizeof(struct acpi_processor_performance),
- GFP_KERNEL);
-
- if (!acpi_processor_perf) {
- retval = -ENOMEM;
- goto err0;
- }
-
- memset(acpi_processor_perf, 0, sizeof(struct acpi_processor_performance));
-
- if (acpi_processor_register_performance(acpi_processor_perf, 0)) {
- retval = -EIO;
- goto err1;
- }
-
- if (acpi_processor_perf->control_register.space_id != ACPI_ADR_SPACE_FIXED_HARDWARE) {
- retval = -ENODEV;
- goto err2;
- }
-
- if (acpi_processor_perf->status_register.space_id != ACPI_ADR_SPACE_FIXED_HARDWARE) {
- retval = -ENODEV;
- goto err2;
- }
-
- number_scales = acpi_processor_perf->state_count;
-
- if (number_scales < 2) {
- retval = -ENODEV;
- goto err2;
- }
-
- powernow_table = kmalloc((number_scales + 1) * (sizeof(struct cpufreq_frequency_table)), GFP_KERNEL);
- if (!powernow_table) {
- retval = -ENOMEM;
- goto err2;
- }
-
- memset(powernow_table, 0, ((number_scales + 1) * sizeof(struct cpufreq_frequency_table)));
-
- pc.val = (unsigned long) acpi_processor_perf->states[0].control;
- for (i = 0; i < number_scales; i++) {
- u8 fid, vid;
- unsigned int speed;
-
- pc.val = (unsigned long) acpi_processor_perf->states[i].control;
- dprintk ("acpi: P%d: %d MHz %d mW %d uS control %08x SGTC %d\n",
- i,
- (u32) acpi_processor_perf->states[i].core_frequency,
- (u32) acpi_processor_perf->states[i].power,
- (u32) acpi_processor_perf->states[i].transition_latency,
- (u32) acpi_processor_perf->states[i].control,
- pc.bits.sgtc);
-
- vid = pc.bits.vid;
- fid = pc.bits.fid;
-
- powernow_table[i].frequency = fsb * fid_codes[fid] / 10;
- powernow_table[i].index = fid; /* lower 8 bits */
- powernow_table[i].index |= (vid << 8); /* upper 8 bits */
-
- speed = powernow_table[i].frequency;
-
- if ((fid_codes[fid] % 10)==5) {
- if (have_a0 == 1)
- powernow_table[i].frequency = CPUFREQ_ENTRY_INVALID;
- }
-
- dprintk (" FID: 0x%x (%d.%dx [%dMHz]) "
- "VID: 0x%x (%d.%03dV)\n", fid, fid_codes[fid] / 10,
- fid_codes[fid] % 10, speed/1000, vid,
- mobile_vid_table[vid]/1000,
- mobile_vid_table[vid]%1000);
-
- if (latency < pc.bits.sgtc)
- latency = pc.bits.sgtc;
-
- if (speed < minimum_speed)
- minimum_speed = speed;
- if (speed > maximum_speed)
- maximum_speed = speed;
- }
-
- powernow_table[i].frequency = CPUFREQ_TABLE_END;
- powernow_table[i].index = 0;
-
- /* notify BIOS that we exist */
- acpi_processor_notify_smm(THIS_MODULE);
-
- return 0;
-
-err2:
- acpi_processor_unregister_performance(acpi_processor_perf, 0);
-err1:
- kfree(acpi_processor_perf);
-err0:
- printk(KERN_WARNING PFX "ACPI perflib can not be used in this platform\n");
- acpi_processor_perf = NULL;
- return retval;
-}
-#else
-static int powernow_acpi_init(void)
-{
- printk(KERN_INFO PFX "no support for ACPI processor found."
- " Please recompile your kernel with ACPI processor\n");
- return -EINVAL;
-}
-#endif
-
-static int powernow_decode_bios (int maxfid, int startvid)
-{
- struct psb_s *psb;
- struct pst_s *pst;
- unsigned int i, j;
- unsigned char *p;
- unsigned int etuple;
- unsigned int ret;
-
- etuple = cpuid_eax(0x80000001);
-
- for (i=0xC0000; i < 0xffff0 ; i+=16) {
-
- p = phys_to_virt(i);
-
- if (memcmp(p, "AMDK7PNOW!", 10) == 0){
- dprintk ("Found PSB header at %p\n", p);
- psb = (struct psb_s *) p;
- dprintk ("Table version: 0x%x\n", psb->tableversion);
- if (psb->tableversion != 0x12) {
- printk (KERN_INFO PFX "Sorry, only v1.2 tables supported right now\n");
- return -ENODEV;
- }
-
- dprintk ("Flags: 0x%x\n", psb->flags);
- if ((psb->flags & 1)==0) {
- dprintk ("Mobile voltage regulator\n");
- } else {
- dprintk ("Desktop voltage regulator\n");
- }
-
- latency = psb->settlingtime;
- if (latency < 100) {
- printk (KERN_INFO PFX "BIOS set settling time to %d microseconds."
- "Should be at least 100. Correcting.\n", latency);
- latency = 100;
- }
- dprintk ("Settling Time: %d microseconds.\n", psb->settlingtime);
- dprintk ("Has %d PST tables. (Only dumping ones relevant to this CPU).\n", psb->numpst);
-
- p += sizeof (struct psb_s);
-
- pst = (struct pst_s *) p;
-
- for (i = 0 ; i <psb->numpst; i++) {
- pst = (struct pst_s *) p;
- number_scales = pst->numpstates;
-
- if ((etuple == pst->cpuid) && check_fsb(pst->fsbspeed) &&
- (maxfid==pst->maxfid) && (startvid==pst->startvid))
- {
- dprintk ("PST:%d (@%p)\n", i, pst);
- dprintk (" cpuid: 0x%x fsb: %d maxFID: 0x%x startvid: 0x%x\n",
- pst->cpuid, pst->fsbspeed, pst->maxfid, pst->startvid);
-
- ret = get_ranges ((char *) pst + sizeof (struct pst_s));
- return ret;
-
- } else {
- p = (char *) pst + sizeof (struct pst_s);
- for (j=0 ; j < number_scales; j++)
- p+=2;
- }
- }
- printk (KERN_INFO PFX "No PST tables match this cpuid (0x%x)\n", etuple);
- printk (KERN_INFO PFX "This is indicative of a broken BIOS.\n");
-
- return -EINVAL;
- }
- p++;
- }
-
- return -ENODEV;
-}
-
-
-static int powernow_target (struct cpufreq_policy *policy,
- unsigned int target_freq,
- unsigned int relation)
-{
- unsigned int newstate;
-
- if (cpufreq_frequency_table_target(policy, powernow_table, target_freq, relation, &newstate))
- return -EINVAL;
-
- change_speed(newstate);
-
- return 0;
-}
-
-
-static int powernow_verify (struct cpufreq_policy *policy)
-{
- return cpufreq_frequency_table_verify(policy, powernow_table);
-}
-
-/*
- * We use the fact that the bus frequency is somehow
- * a multiple of 100000/3 khz, then we compute sgtc according
- * to this multiple.
- * That way, we match more how AMD thinks all of that work.
- * We will then get the same kind of behaviour already tested under
- * the "well-known" other OS.
- */
-static int __init fixup_sgtc(void)
-{
- unsigned int sgtc;
- unsigned int m;
-
- m = fsb / 3333;
- if ((m % 10) >= 5)
- m += 5;
-
- m /= 10;
-
- sgtc = 100 * m * latency;
- sgtc = sgtc / 3;
- if (sgtc > 0xfffff) {
- printk(KERN_WARNING PFX "SGTC too large %d\n", sgtc);
- sgtc = 0xfffff;
- }
- return sgtc;
-}
-
-static unsigned int powernow_get(unsigned int cpu)
-{
- union msr_fidvidstatus fidvidstatus;
- unsigned int cfid;
-
- if (cpu)
- return 0;
- rdmsrl (MSR_K7_FID_VID_STATUS, fidvidstatus.val);
- cfid = fidvidstatus.bits.CFID;
-
- return (fsb * fid_codes[cfid] / 10);
-}
-
-
-static int __init acer_cpufreq_pst(struct dmi_system_id *d)
-{
- printk(KERN_WARNING "%s laptop with broken PST tables in BIOS detected.\n", d->ident);
- printk(KERN_WARNING "You need to downgrade to 3A21 (09/09/2002), or try a newer BIOS than 3A71 (01/20/2003)\n");
- printk(KERN_WARNING "cpufreq scaling has been disabled as a result of this.\n");
- return 0;
-}
-
-/*
- * Some Athlon laptops have really fucked PST tables.
- * A BIOS update is all that can save them.
- * Mention this, and disable cpufreq.
- */
-static struct dmi_system_id __initdata powernow_dmi_table[] = {
- {
- .callback = acer_cpufreq_pst,
- .ident = "Acer Aspire",
- .matches = {
- DMI_MATCH(DMI_SYS_VENDOR, "Insyde Software"),
- DMI_MATCH(DMI_BIOS_VERSION, "3A71"),
- },
- },
- { }
-};
-
-static int __init powernow_cpu_init (struct cpufreq_policy *policy)
-{
- union msr_fidvidstatus fidvidstatus;
- int result;
-
- if (policy->cpu != 0)
- return -ENODEV;
-
- rdmsrl (MSR_K7_FID_VID_STATUS, fidvidstatus.val);
-
- /* recalibrate cpu_khz */
- result = recalibrate_cpu_khz();
- if (result)
- return result;
-
- fsb = (10 * cpu_khz) / fid_codes[fidvidstatus.bits.CFID];
- if (!fsb) {
- printk(KERN_WARNING PFX "can not determine bus frequency\n");
- return -EINVAL;
- }
- dprintk("FSB: %3dMHz\n", fsb/1000);
-
- if (dmi_check_system(powernow_dmi_table) || acpi_force) {
- printk (KERN_INFO PFX "PSB/PST known to be broken. Trying ACPI instead\n");
- result = powernow_acpi_init();
- } else {
- result = powernow_decode_bios(fidvidstatus.bits.MFID, fidvidstatus.bits.SVID);
- if (result) {
- printk (KERN_INFO PFX "Trying ACPI perflib\n");
- maximum_speed = 0;
- minimum_speed = -1;
- latency = 0;
- result = powernow_acpi_init();
- if (result) {
- printk (KERN_INFO PFX "ACPI and legacy methods failed\n");
- printk (KERN_INFO PFX "See http://www.codemonkey.org.uk/projects/cpufreq/powernow-k7.shtml\n");
- }
- } else {
- /* SGTC use the bus clock as timer */
- latency = fixup_sgtc();
- printk(KERN_INFO PFX "SGTC: %d\n", latency);
- }
- }
-
- if (result)
- return result;
-
- printk (KERN_INFO PFX "Minimum speed %d MHz. Maximum speed %d MHz.\n",
- minimum_speed/1000, maximum_speed/1000);
-
- policy->governor = CPUFREQ_DEFAULT_GOVERNOR;
-
- policy->cpuinfo.transition_latency = cpufreq_scale(2000000UL, fsb, latency);
-
- policy->cur = powernow_get(0);
-
- cpufreq_frequency_table_get_attr(powernow_table, policy->cpu);
-
- return cpufreq_frequency_table_cpuinfo(policy, powernow_table);
-}
-
-static int powernow_cpu_exit (struct cpufreq_policy *policy) {
- cpufreq_frequency_table_put_attr(policy->cpu);
-
-#ifdef CONFIG_X86_POWERNOW_K7_ACPI
- if (acpi_processor_perf) {
- acpi_processor_unregister_performance(acpi_processor_perf, 0);
- kfree(acpi_processor_perf);
- }
-#endif
-
- kfree(powernow_table);
- return 0;
-}
-
-static struct freq_attr* powernow_table_attr[] = {
- &cpufreq_freq_attr_scaling_available_freqs,
- NULL,
-};
-
-static struct cpufreq_driver powernow_driver = {
- .verify = powernow_verify,
- .target = powernow_target,
- .get = powernow_get,
- .init = powernow_cpu_init,
- .exit = powernow_cpu_exit,
- .name = "powernow-k7",
- .owner = THIS_MODULE,
- .attr = powernow_table_attr,
-};
-
-static int __init powernow_init (void)
-{
- if (check_powernow()==0)
- return -ENODEV;
- return cpufreq_register_driver(&powernow_driver);
-}
-
-
-static void __exit powernow_exit (void)
-{
- cpufreq_unregister_driver(&powernow_driver);
-}
-
-module_param(acpi_force, int, 0444);
-MODULE_PARM_DESC(acpi_force, "Force ACPI to be used.");
-
-MODULE_AUTHOR ("Dave Jones <davej@codemonkey.org.uk>");
-MODULE_DESCRIPTION ("Powernow driver for AMD K7 processors.");
-MODULE_LICENSE ("GPL");
-
-late_initcall(powernow_init);
-module_exit(powernow_exit);
-
diff --git a/arch/i386/kernel/cpu/cpufreq/powernow-k7.h b/arch/i386/kernel/cpu/cpufreq/powernow-k7.h
deleted file mode 100644
index f8a63b3664e..00000000000
--- a/arch/i386/kernel/cpu/cpufreq/powernow-k7.h
+++ /dev/null
@@ -1,44 +0,0 @@
-/*
- * $Id: powernow-k7.h,v 1.2 2003/02/10 18:26:01 davej Exp $
- * (C) 2003 Dave Jones.
- *
- * Licensed under the terms of the GNU GPL License version 2.
- *
- * AMD-specific information
- *
- */
-
-union msr_fidvidctl {
- struct {
- unsigned FID:5, // 4:0
- reserved1:3, // 7:5
- VID:5, // 12:8
- reserved2:3, // 15:13
- FIDC:1, // 16
- VIDC:1, // 17
- reserved3:2, // 19:18
- FIDCHGRATIO:1, // 20
- reserved4:11, // 31-21
- SGTC:20, // 32:51
- reserved5:12; // 63:52
- } bits;
- unsigned long long val;
-};
-
-union msr_fidvidstatus {
- struct {
- unsigned CFID:5, // 4:0
- reserved1:3, // 7:5
- SFID:5, // 12:8
- reserved2:3, // 15:13
- MFID:5, // 20:16
- reserved3:11, // 31:21
- CVID:5, // 36:32
- reserved4:3, // 39:37
- SVID:5, // 44:40
- reserved5:3, // 47:45
- MVID:5, // 52:48
- reserved6:11; // 63:53
- } bits;
- unsigned long long val;
-};
diff --git a/arch/i386/kernel/cpu/cpufreq/powernow-k8.c b/arch/i386/kernel/cpu/cpufreq/powernow-k8.c
deleted file mode 100644
index 2d5c9adba0c..00000000000
--- a/arch/i386/kernel/cpu/cpufreq/powernow-k8.c
+++ /dev/null
@@ -1,1184 +0,0 @@
-/*
- * (c) 2003, 2004, 2005 Advanced Micro Devices, Inc.
- * Your use of this code is subject to the terms and conditions of the
- * GNU general public license version 2. See "COPYING" or
- * http://www.gnu.org/licenses/gpl.html
- *
- * Support : mark.langsdorf@amd.com
- *
- * Based on the powernow-k7.c module written by Dave Jones.
- * (C) 2003 Dave Jones <davej@codemonkey.org.uk> on behalf of SuSE Labs
- * (C) 2004 Dominik Brodowski <linux@brodo.de>
- * (C) 2004 Pavel Machek <pavel@suse.cz>
- * Licensed under the terms of the GNU GPL License version 2.
- * Based upon datasheets & sample CPUs kindly provided by AMD.
- *
- * Valuable input gratefully received from Dave Jones, Pavel Machek,
- * Dominik Brodowski, and others.
- * Originally developed by Paul Devriendt.
- * Processor information obtained from Chapter 9 (Power and Thermal Management)
- * of the "BIOS and Kernel Developer's Guide for the AMD Athlon 64 and AMD
- * Opteron Processors" available for download from www.amd.com
- *
- * Tables for specific CPUs can be infrerred from
- * http://www.amd.com/us-en/assets/content_type/white_papers_and_tech_docs/30430.pdf
- */
-
-#include <linux/kernel.h>
-#include <linux/smp.h>
-#include <linux/module.h>
-#include <linux/init.h>
-#include <linux/cpufreq.h>
-#include <linux/slab.h>
-#include <linux/string.h>
-#include <linux/cpumask.h>
-#include <linux/sched.h> /* for current / set_cpus_allowed() */
-
-#include <asm/msr.h>
-#include <asm/io.h>
-#include <asm/delay.h>
-
-#ifdef CONFIG_X86_POWERNOW_K8_ACPI
-#include <linux/acpi.h>
-#include <acpi/processor.h>
-#endif
-
-#define PFX "powernow-k8: "
-#define BFX PFX "BIOS error: "
-#define VERSION "version 1.50.4"
-#include "powernow-k8.h"
-
-/* serialize freq changes */
-static DECLARE_MUTEX(fidvid_sem);
-
-static struct powernow_k8_data *powernow_data[NR_CPUS];
-
-#ifndef CONFIG_SMP
-static cpumask_t cpu_core_map[1];
-#endif
-
-/* Return a frequency in MHz, given an input fid */
-static u32 find_freq_from_fid(u32 fid)
-{
- return 800 + (fid * 100);
-}
-
-/* Return a frequency in KHz, given an input fid */
-static u32 find_khz_freq_from_fid(u32 fid)
-{
- return 1000 * find_freq_from_fid(fid);
-}
-
-/* Return a voltage in miliVolts, given an input vid */
-static u32 find_millivolts_from_vid(struct powernow_k8_data *data, u32 vid)
-{
- return 1550-vid*25;
-}
-
-/* Return the vco fid for an input fid
- *
- * Each "low" fid has corresponding "high" fid, and you can get to "low" fids
- * only from corresponding high fids. This returns "high" fid corresponding to
- * "low" one.
- */
-static u32 convert_fid_to_vco_fid(u32 fid)
-{
- if (fid < HI_FID_TABLE_BOTTOM) {
- return 8 + (2 * fid);
- } else {
- return fid;
- }
-}
-
-/*
- * Return 1 if the pending bit is set. Unless we just instructed the processor
- * to transition to a new state, seeing this bit set is really bad news.
- */
-static int pending_bit_stuck(void)
-{
- u32 lo, hi;
-
- rdmsr(MSR_FIDVID_STATUS, lo, hi);
- return lo & MSR_S_LO_CHANGE_PENDING ? 1 : 0;
-}
-
-/*
- * Update the global current fid / vid values from the status msr.
- * Returns 1 on error.
- */
-static int query_current_values_with_pending_wait(struct powernow_k8_data *data)
-{
- u32 lo, hi;
- u32 i = 0;
-
- do {
- if (i++ > 10000) {
- dprintk("detected change pending stuck\n");
- return 1;
- }
- rdmsr(MSR_FIDVID_STATUS, lo, hi);
- } while (lo & MSR_S_LO_CHANGE_PENDING);
-
- data->currvid = hi & MSR_S_HI_CURRENT_VID;
- data->currfid = lo & MSR_S_LO_CURRENT_FID;
-
- return 0;
-}
-
-/* the isochronous relief time */
-static void count_off_irt(struct powernow_k8_data *data)
-{
- udelay((1 << data->irt) * 10);
- return;
-}
-
-/* the voltage stabalization time */
-static void count_off_vst(struct powernow_k8_data *data)
-{
- udelay(data->vstable * VST_UNITS_20US);
- return;
-}
-
-/* need to init the control msr to a safe value (for each cpu) */
-static void fidvid_msr_init(void)
-{
- u32 lo, hi;
- u8 fid, vid;
-
- rdmsr(MSR_FIDVID_STATUS, lo, hi);
- vid = hi & MSR_S_HI_CURRENT_VID;
- fid = lo & MSR_S_LO_CURRENT_FID;
- lo = fid | (vid << MSR_C_LO_VID_SHIFT);
- hi = MSR_C_HI_STP_GNT_BENIGN;
- dprintk("cpu%d, init lo 0x%x, hi 0x%x\n", smp_processor_id(), lo, hi);
- wrmsr(MSR_FIDVID_CTL, lo, hi);
-}
-
-
-/* write the new fid value along with the other control fields to the msr */
-static int write_new_fid(struct powernow_k8_data *data, u32 fid)
-{
- u32 lo;
- u32 savevid = data->currvid;
- u32 i = 0;
-
- if ((fid & INVALID_FID_MASK) || (data->currvid & INVALID_VID_MASK)) {
- printk(KERN_ERR PFX "internal error - overflow on fid write\n");
- return 1;
- }
-
- lo = fid | (data->currvid << MSR_C_LO_VID_SHIFT) | MSR_C_LO_INIT_FID_VID;
-
- dprintk("writing fid 0x%x, lo 0x%x, hi 0x%x\n",
- fid, lo, data->plllock * PLL_LOCK_CONVERSION);
-
- do {
- wrmsr(MSR_FIDVID_CTL, lo, data->plllock * PLL_LOCK_CONVERSION);
- if (i++ > 100) {
- printk(KERN_ERR PFX "internal error - pending bit very stuck - no further pstate changes possible\n");
- return 1;
- }
- } while (query_current_values_with_pending_wait(data));
-
- count_off_irt(data);
-
- if (savevid != data->currvid) {
- printk(KERN_ERR PFX "vid change on fid trans, old 0x%x, new 0x%x\n",
- savevid, data->currvid);
- return 1;
- }
-
- if (fid != data->currfid) {
- printk(KERN_ERR PFX "fid trans failed, fid 0x%x, curr 0x%x\n", fid,
- data->currfid);
- return 1;
- }
-
- return 0;
-}
-
-/* Write a new vid to the hardware */
-static int write_new_vid(struct powernow_k8_data *data, u32 vid)
-{
- u32 lo;
- u32 savefid = data->currfid;
- int i = 0;
-
- if ((data->currfid & INVALID_FID_MASK) || (vid & INVALID_VID_MASK)) {
- printk(KERN_ERR PFX "internal error - overflow on vid write\n");
- return 1;
- }
-
- lo = data->currfid | (vid << MSR_C_LO_VID_SHIFT) | MSR_C_LO_INIT_FID_VID;
-
- dprintk("writing vid 0x%x, lo 0x%x, hi 0x%x\n",
- vid, lo, STOP_GRANT_5NS);
-
- do {
- wrmsr(MSR_FIDVID_CTL, lo, STOP_GRANT_5NS);
- if (i++ > 100) {
- printk(KERN_ERR PFX "internal error - pending bit very stuck - no further pstate changes possible\n");
- return 1;
- }
- } while (query_current_values_with_pending_wait(data));
-
- if (savefid != data->currfid) {
- printk(KERN_ERR PFX "fid changed on vid trans, old 0x%x new 0x%x\n",
- savefid, data->currfid);
- return 1;
- }
-
- if (vid != data->currvid) {
- printk(KERN_ERR PFX "vid trans failed, vid 0x%x, curr 0x%x\n", vid,
- data->currvid);
- return 1;
- }
-
- return 0;
-}
-
-/*
- * Reduce the vid by the max of step or reqvid.
- * Decreasing vid codes represent increasing voltages:
- * vid of 0 is 1.550V, vid of 0x1e is 0.800V, vid of VID_OFF is off.
- */
-static int decrease_vid_code_by_step(struct powernow_k8_data *data, u32 reqvid, u32 step)
-{
- if ((data->currvid - reqvid) > step)
- reqvid = data->currvid - step;
-
- if (write_new_vid(data, reqvid))
- return 1;
-
- count_off_vst(data);
-
- return 0;
-}
-
-/* Change the fid and vid, by the 3 phases. */
-static int transition_fid_vid(struct powernow_k8_data *data, u32 reqfid, u32 reqvid)
-{
- if (core_voltage_pre_transition(data, reqvid))
- return 1;
-
- if (core_frequency_transition(data, reqfid))
- return 1;
-
- if (core_voltage_post_transition(data, reqvid))
- return 1;
-
- if (query_current_values_with_pending_wait(data))
- return 1;
-
- if ((reqfid != data->currfid) || (reqvid != data->currvid)) {
- printk(KERN_ERR PFX "failed (cpu%d): req 0x%x 0x%x, curr 0x%x 0x%x\n",
- smp_processor_id(),
- reqfid, reqvid, data->currfid, data->currvid);
- return 1;
- }
-
- dprintk("transitioned (cpu%d): new fid 0x%x, vid 0x%x\n",
- smp_processor_id(), data->currfid, data->currvid);
-
- return 0;
-}
-
-/* Phase 1 - core voltage transition ... setup voltage */
-static int core_voltage_pre_transition(struct powernow_k8_data *data, u32 reqvid)
-{
- u32 rvosteps = data->rvo;
- u32 savefid = data->currfid;
- u32 maxvid, lo;
-
- dprintk("ph1 (cpu%d): start, currfid 0x%x, currvid 0x%x, reqvid 0x%x, rvo 0x%x\n",
- smp_processor_id(),
- data->currfid, data->currvid, reqvid, data->rvo);
-
- rdmsr(MSR_FIDVID_STATUS, lo, maxvid);
- maxvid = 0x1f & (maxvid >> 16);
- dprintk("ph1 maxvid=0x%x\n", maxvid);
- if (reqvid < maxvid) /* lower numbers are higher voltages */
- reqvid = maxvid;
-
- while (data->currvid > reqvid) {
- dprintk("ph1: curr 0x%x, req vid 0x%x\n",
- data->currvid, reqvid);
- if (decrease_vid_code_by_step(data, reqvid, data->vidmvs))
- return 1;
- }
-
- while ((rvosteps > 0) && ((data->rvo + data->currvid) > reqvid)) {
- if (data->currvid == maxvid) {
- rvosteps = 0;
- } else {
- dprintk("ph1: changing vid for rvo, req 0x%x\n",
- data->currvid - 1);
- if (decrease_vid_code_by_step(data, data->currvid - 1, 1))
- return 1;
- rvosteps--;
- }
- }
-
- if (query_current_values_with_pending_wait(data))
- return 1;
-
- if (savefid != data->currfid) {
- printk(KERN_ERR PFX "ph1 err, currfid changed 0x%x\n", data->currfid);
- return 1;
- }
-
- dprintk("ph1 complete, currfid 0x%x, currvid 0x%x\n",
- data->currfid, data->currvid);
-
- return 0;
-}
-
-/* Phase 2 - core frequency transition */
-static int core_frequency_transition(struct powernow_k8_data *data, u32 reqfid)
-{
- u32 vcoreqfid, vcocurrfid, vcofiddiff, savevid = data->currvid;
-
- if ((reqfid < HI_FID_TABLE_BOTTOM) && (data->currfid < HI_FID_TABLE_BOTTOM)) {
- printk(KERN_ERR PFX "ph2: illegal lo-lo transition 0x%x 0x%x\n",
- reqfid, data->currfid);
- return 1;
- }
-
- if (data->currfid == reqfid) {
- printk(KERN_ERR PFX "ph2 null fid transition 0x%x\n", data->currfid);
- return 0;
- }
-
- dprintk("ph2 (cpu%d): starting, currfid 0x%x, currvid 0x%x, reqfid 0x%x\n",
- smp_processor_id(),
- data->currfid, data->currvid, reqfid);
-
- vcoreqfid = convert_fid_to_vco_fid(reqfid);
- vcocurrfid = convert_fid_to_vco_fid(data->currfid);
- vcofiddiff = vcocurrfid > vcoreqfid ? vcocurrfid - vcoreqfid
- : vcoreqfid - vcocurrfid;
-
- while (vcofiddiff > 2) {
- if (reqfid > data->currfid) {
- if (data->currfid > LO_FID_TABLE_TOP) {
- if (write_new_fid(data, data->currfid + 2)) {
- return 1;
- }
- } else {
- if (write_new_fid
- (data, 2 + convert_fid_to_vco_fid(data->currfid))) {
- return 1;
- }
- }
- } else {
- if (write_new_fid(data, data->currfid - 2))
- return 1;
- }
-
- vcocurrfid = convert_fid_to_vco_fid(data->currfid);
- vcofiddiff = vcocurrfid > vcoreqfid ? vcocurrfid - vcoreqfid
- : vcoreqfid - vcocurrfid;
- }
-
- if (write_new_fid(data, reqfid))
- return 1;
-
- if (query_current_values_with_pending_wait(data))
- return 1;
-
- if (data->currfid != reqfid) {
- printk(KERN_ERR PFX
- "ph2: mismatch, failed fid transition, curr 0x%x, req 0x%x\n",
- data->currfid, reqfid);
- return 1;
- }
-
- if (savevid != data->currvid) {
- printk(KERN_ERR PFX "ph2: vid changed, save 0x%x, curr 0x%x\n",
- savevid, data->currvid);
- return 1;
- }
-
- dprintk("ph2 complete, currfid 0x%x, currvid 0x%x\n",
- data->currfid, data->currvid);
-
- return 0;
-}
-
-/* Phase 3 - core voltage transition flow ... jump to the final vid. */
-static int core_voltage_post_transition(struct powernow_k8_data *data, u32 reqvid)
-{
- u32 savefid = data->currfid;
- u32 savereqvid = reqvid;
-
- dprintk("ph3 (cpu%d): starting, currfid 0x%x, currvid 0x%x\n",
- smp_processor_id(),
- data->currfid, data->currvid);
-
- if (reqvid != data->currvid) {
- if (write_new_vid(data, reqvid))
- return 1;
-
- if (savefid != data->currfid) {
- printk(KERN_ERR PFX
- "ph3: bad fid change, save 0x%x, curr 0x%x\n",
- savefid, data->currfid);
- return 1;
- }
-
- if (data->currvid != reqvid) {
- printk(KERN_ERR PFX
- "ph3: failed vid transition\n, req 0x%x, curr 0x%x",
- reqvid, data->currvid);
- return 1;
- }
- }
-
- if (query_current_values_with_pending_wait(data))
- return 1;
-
- if (savereqvid != data->currvid) {
- dprintk("ph3 failed, currvid 0x%x\n", data->currvid);
- return 1;
- }
-
- if (savefid != data->currfid) {
- dprintk("ph3 failed, currfid changed 0x%x\n",
- data->currfid);
- return 1;
- }
-
- dprintk("ph3 complete, currfid 0x%x, currvid 0x%x\n",
- data->currfid, data->currvid);
-
- return 0;
-}
-
-static int check_supported_cpu(unsigned int cpu)
-{
- cpumask_t oldmask = CPU_MASK_ALL;
- u32 eax, ebx, ecx, edx;
- unsigned int rc = 0;
-
- oldmask = current->cpus_allowed;
- set_cpus_allowed(current, cpumask_of_cpu(cpu));
- schedule();
-
- if (smp_processor_id() != cpu) {
- printk(KERN_ERR "limiting to cpu %u failed\n", cpu);
- goto out;
- }
-
- if (current_cpu_data.x86_vendor != X86_VENDOR_AMD)
- goto out;
-
- eax = cpuid_eax(CPUID_PROCESSOR_SIGNATURE);
- if (((eax & CPUID_USE_XFAM_XMOD) != CPUID_USE_XFAM_XMOD) ||
- ((eax & CPUID_XFAM) != CPUID_XFAM_K8) ||
- ((eax & CPUID_XMOD) > CPUID_XMOD_REV_F)) {
- printk(KERN_INFO PFX "Processor cpuid %x not supported\n", eax);
- goto out;
- }
-
- eax = cpuid_eax(CPUID_GET_MAX_CAPABILITIES);
- if (eax < CPUID_FREQ_VOLT_CAPABILITIES) {
- printk(KERN_INFO PFX
- "No frequency change capabilities detected\n");
- goto out;
- }
-
- cpuid(CPUID_FREQ_VOLT_CAPABILITIES, &eax, &ebx, &ecx, &edx);
- if ((edx & P_STATE_TRANSITION_CAPABLE) != P_STATE_TRANSITION_CAPABLE) {
- printk(KERN_INFO PFX "Power state transitions not supported\n");
- goto out;
- }
-
- rc = 1;
-
-out:
- set_cpus_allowed(current, oldmask);
- schedule();
- return rc;
-
-}
-
-static int check_pst_table(struct powernow_k8_data *data, struct pst_s *pst, u8 maxvid)
-{
- unsigned int j;
- u8 lastfid = 0xff;
-
- for (j = 0; j < data->numps; j++) {
- if (pst[j].vid > LEAST_VID) {
- printk(KERN_ERR PFX "vid %d invalid : 0x%x\n", j, pst[j].vid);
- return -EINVAL;
- }
- if (pst[j].vid < data->rvo) { /* vid + rvo >= 0 */
- printk(KERN_ERR BFX "0 vid exceeded with pstate %d\n", j);
- return -ENODEV;
- }
- if (pst[j].vid < maxvid + data->rvo) { /* vid + rvo >= maxvid */
- printk(KERN_ERR BFX "maxvid exceeded with pstate %d\n", j);
- return -ENODEV;
- }
- if ((pst[j].fid > MAX_FID)
- || (pst[j].fid & 1)
- || (j && (pst[j].fid < HI_FID_TABLE_BOTTOM))) {
- /* Only first fid is allowed to be in "low" range */
- printk(KERN_ERR PFX "two low fids - %d : 0x%x\n", j, pst[j].fid);
- return -EINVAL;
- }
- if (pst[j].fid < lastfid)
- lastfid = pst[j].fid;
- }
- if (lastfid & 1) {
- printk(KERN_ERR PFX "lastfid invalid\n");
- return -EINVAL;
- }
- if (lastfid > LO_FID_TABLE_TOP)
- printk(KERN_INFO PFX "first fid not from lo freq table\n");
-
- return 0;
-}
-
-static void print_basics(struct powernow_k8_data *data)
-{
- int j;
- for (j = 0; j < data->numps; j++) {
- if (data->powernow_table[j].frequency != CPUFREQ_ENTRY_INVALID)
- printk(KERN_INFO PFX " %d : fid 0x%x (%d MHz), vid 0x%x (%d mV)\n", j,
- data->powernow_table[j].index & 0xff,
- data->powernow_table[j].frequency/1000,
- data->powernow_table[j].index >> 8,
- find_millivolts_from_vid(data, data->powernow_table[j].index >> 8));
- }
- if (data->batps)
- printk(KERN_INFO PFX "Only %d pstates on battery\n", data->batps);
-}
-
-static int fill_powernow_table(struct powernow_k8_data *data, struct pst_s *pst, u8 maxvid)
-{
- struct cpufreq_frequency_table *powernow_table;
- unsigned int j;
-
- if (data->batps) { /* use ACPI support to get full speed on mains power */
- printk(KERN_WARNING PFX "Only %d pstates usable (use ACPI driver for full range\n", data->batps);
- data->numps = data->batps;
- }
-
- for ( j=1; j<data->numps; j++ ) {
- if (pst[j-1].fid >= pst[j].fid) {
- printk(KERN_ERR PFX "PST out of sequence\n");
- return -EINVAL;
- }
- }
-
- if (data->numps < 2) {
- printk(KERN_ERR PFX "no p states to transition\n");
- return -ENODEV;
- }
-
- if (check_pst_table(data, pst, maxvid))
- return -EINVAL;
-
- powernow_table = kmalloc((sizeof(struct cpufreq_frequency_table)
- * (data->numps + 1)), GFP_KERNEL);
- if (!powernow_table) {
- printk(KERN_ERR PFX "powernow_table memory alloc failure\n");
- return -ENOMEM;
- }
-
- for (j = 0; j < data->numps; j++) {
- powernow_table[j].index = pst[j].fid; /* lower 8 bits */
- powernow_table[j].index |= (pst[j].vid << 8); /* upper 8 bits */
- powernow_table[j].frequency = find_khz_freq_from_fid(pst[j].fid);
- }
- powernow_table[data->numps].frequency = CPUFREQ_TABLE_END;
- powernow_table[data->numps].index = 0;
-
- if (query_current_values_with_pending_wait(data)) {
- kfree(powernow_table);
- return -EIO;
- }
-
- dprintk("cfid 0x%x, cvid 0x%x\n", data->currfid, data->currvid);
- data->powernow_table = powernow_table;
- print_basics(data);
-
- for (j = 0; j < data->numps; j++)
- if ((pst[j].fid==data->currfid) && (pst[j].vid==data->currvid))
- return 0;
-
- dprintk("currfid/vid do not match PST, ignoring\n");
- return 0;
-}
-
-/* Find and validate the PSB/PST table in BIOS. */
-static int find_psb_table(struct powernow_k8_data *data)
-{
- struct psb_s *psb;
- unsigned int i;
- u32 mvs;
- u8 maxvid;
- u32 cpst = 0;
- u32 thiscpuid;
-
- for (i = 0xc0000; i < 0xffff0; i += 0x10) {
- /* Scan BIOS looking for the signature. */
- /* It can not be at ffff0 - it is too big. */
-
- psb = phys_to_virt(i);
- if (memcmp(psb, PSB_ID_STRING, PSB_ID_STRING_LEN) != 0)
- continue;
-
- dprintk("found PSB header at 0x%p\n", psb);
-
- dprintk("table vers: 0x%x\n", psb->tableversion);
- if (psb->tableversion != PSB_VERSION_1_4) {
- printk(KERN_INFO BFX "PSB table is not v1.4\n");
- return -ENODEV;
- }
-
- dprintk("flags: 0x%x\n", psb->flags1);
- if (psb->flags1) {
- printk(KERN_ERR BFX "unknown flags\n");
- return -ENODEV;
- }
-
- data->vstable = psb->vstable;
- dprintk("voltage stabilization time: %d(*20us)\n", data->vstable);
-
- dprintk("flags2: 0x%x\n", psb->flags2);
- data->rvo = psb->flags2 & 3;
- data->irt = ((psb->flags2) >> 2) & 3;
- mvs = ((psb->flags2) >> 4) & 3;
- data->vidmvs = 1 << mvs;
- data->batps = ((psb->flags2) >> 6) & 3;
-
- dprintk("ramp voltage offset: %d\n", data->rvo);
- dprintk("isochronous relief time: %d\n", data->irt);
- dprintk("maximum voltage step: %d - 0x%x\n", mvs, data->vidmvs);
-
- dprintk("numpst: 0x%x\n", psb->num_tables);
- cpst = psb->num_tables;
- if ((psb->cpuid == 0x00000fc0) || (psb->cpuid == 0x00000fe0) ){
- thiscpuid = cpuid_eax(CPUID_PROCESSOR_SIGNATURE);
- if ((thiscpuid == 0x00000fc0) || (thiscpuid == 0x00000fe0) ) {
- cpst = 1;
- }
- }
- if (cpst != 1) {
- printk(KERN_ERR BFX "numpst must be 1\n");
- return -ENODEV;
- }
-
- data->plllock = psb->plllocktime;
- dprintk("plllocktime: 0x%x (units 1us)\n", psb->plllocktime);
- dprintk("maxfid: 0x%x\n", psb->maxfid);
- dprintk("maxvid: 0x%x\n", psb->maxvid);
- maxvid = psb->maxvid;
-
- data->numps = psb->numps;
- dprintk("numpstates: 0x%x\n", data->numps);
- return fill_powernow_table(data, (struct pst_s *)(psb+1), maxvid);
- }
- /*
- * If you see this message, complain to BIOS manufacturer. If
- * he tells you "we do not support Linux" or some similar
- * nonsense, remember that Windows 2000 uses the same legacy
- * mechanism that the old Linux PSB driver uses. Tell them it
- * is broken with Windows 2000.
- *
- * The reference to the AMD documentation is chapter 9 in the
- * BIOS and Kernel Developer's Guide, which is available on
- * www.amd.com
- */
- printk(KERN_INFO PFX "BIOS error - no PSB or ACPI _PSS objects\n");
- return -ENODEV;
-}
-
-#ifdef CONFIG_X86_POWERNOW_K8_ACPI
-static void powernow_k8_acpi_pst_values(struct powernow_k8_data *data, unsigned int index)
-{
- if (!data->acpi_data.state_count)
- return;
-
- data->irt = (data->acpi_data.states[index].control >> IRT_SHIFT) & IRT_MASK;
- data->rvo = (data->acpi_data.states[index].control >> RVO_SHIFT) & RVO_MASK;
- data->exttype = (data->acpi_data.states[index].control >> EXT_TYPE_SHIFT) & EXT_TYPE_MASK;
- data->plllock = (data->acpi_data.states[index].control >> PLL_L_SHIFT) & PLL_L_MASK;
- data->vidmvs = 1 << ((data->acpi_data.states[index].control >> MVS_SHIFT) & MVS_MASK);
- data->vstable = (data->acpi_data.states[index].control >> VST_SHIFT) & VST_MASK;
-}
-
-static int powernow_k8_cpu_init_acpi(struct powernow_k8_data *data)
-{
- int i;
- int cntlofreq = 0;
- struct cpufreq_frequency_table *powernow_table;
-
- if (acpi_processor_register_performance(&data->acpi_data, data->cpu)) {
- dprintk("register performance failed: bad ACPI data\n");
- return -EIO;
- }
-
- /* verify the data contained in the ACPI structures */
- if (data->acpi_data.state_count <= 1) {
- dprintk("No ACPI P-States\n");
- goto err_out;
- }
-
- if ((data->acpi_data.control_register.space_id != ACPI_ADR_SPACE_FIXED_HARDWARE) ||
- (data->acpi_data.status_register.space_id != ACPI_ADR_SPACE_FIXED_HARDWARE)) {
- dprintk("Invalid control/status registers (%x - %x)\n",
- data->acpi_data.control_register.space_id,
- data->acpi_data.status_register.space_id);
- goto err_out;
- }
-
- /* fill in data->powernow_table */
- powernow_table = kmalloc((sizeof(struct cpufreq_frequency_table)
- * (data->acpi_data.state_count + 1)), GFP_KERNEL);
- if (!powernow_table) {
- dprintk("powernow_table memory alloc failure\n");
- goto err_out;
- }
-
- for (i = 0; i < data->acpi_data.state_count; i++) {
- u32 fid;
- u32 vid;
-
- if (data->exttype) {
- fid = data->acpi_data.states[i].status & FID_MASK;
- vid = (data->acpi_data.states[i].status >> VID_SHIFT) & VID_MASK;
- } else {
- fid = data->acpi_data.states[i].control & FID_MASK;
- vid = (data->acpi_data.states[i].control >> VID_SHIFT) & VID_MASK;
- }
-
- dprintk(" %d : fid 0x%x, vid 0x%x\n", i, fid, vid);
-
- powernow_table[i].index = fid; /* lower 8 bits */
- powernow_table[i].index |= (vid << 8); /* upper 8 bits */
- powernow_table[i].frequency = find_khz_freq_from_fid(fid);
-
- /* verify frequency is OK */
- if ((powernow_table[i].frequency > (MAX_FREQ * 1000)) ||
- (powernow_table[i].frequency < (MIN_FREQ * 1000))) {
- dprintk("invalid freq %u kHz, ignoring\n", powernow_table[i].frequency);
- powernow_table[i].frequency = CPUFREQ_ENTRY_INVALID;
- continue;
- }
-
- /* verify voltage is OK - BIOSs are using "off" to indicate invalid */
- if (vid == VID_OFF) {
- dprintk("invalid vid %u, ignoring\n", vid);
- powernow_table[i].frequency = CPUFREQ_ENTRY_INVALID;
- continue;
- }
-
- /* verify only 1 entry from the lo frequency table */
- if (fid < HI_FID_TABLE_BOTTOM) {
- if (cntlofreq) {
- /* if both entries are the same, ignore this
- * one...
- */
- if ((powernow_table[i].frequency != powernow_table[cntlofreq].frequency) ||
- (powernow_table[i].index != powernow_table[cntlofreq].index)) {
- printk(KERN_ERR PFX "Too many lo freq table entries\n");
- goto err_out_mem;
- }
-
- dprintk("double low frequency table entry, ignoring it.\n");
- powernow_table[i].frequency = CPUFREQ_ENTRY_INVALID;
- continue;
- } else
- cntlofreq = i;
- }
-
- if (powernow_table[i].frequency != (data->acpi_data.states[i].core_frequency * 1000)) {
- printk(KERN_INFO PFX "invalid freq entries %u kHz vs. %u kHz\n",
- powernow_table[i].frequency,
- (unsigned int) (data->acpi_data.states[i].core_frequency * 1000));
- powernow_table[i].frequency = CPUFREQ_ENTRY_INVALID;
- continue;
- }
- }
-
- powernow_table[data->acpi_data.state_count].frequency = CPUFREQ_TABLE_END;
- powernow_table[data->acpi_data.state_count].index = 0;
- data->powernow_table = powernow_table;
-
- /* fill in data */
- data->numps = data->acpi_data.state_count;
- print_basics(data);
- powernow_k8_acpi_pst_values(data, 0);
-
- /* notify BIOS that we exist */
- acpi_processor_notify_smm(THIS_MODULE);
-
- return 0;
-
-err_out_mem:
- kfree(powernow_table);
-
-err_out:
- acpi_processor_unregister_performance(&data->acpi_data, data->cpu);
-
- /* data->acpi_data.state_count informs us at ->exit() whether ACPI was used */
- data->acpi_data.state_count = 0;
-
- return -ENODEV;
-}
-
-static void powernow_k8_cpu_exit_acpi(struct powernow_k8_data *data)
-{
- if (data->acpi_data.state_count)
- acpi_processor_unregister_performance(&data->acpi_data, data->cpu);
-}
-
-#else
-static int powernow_k8_cpu_init_acpi(struct powernow_k8_data *data) { return -ENODEV; }
-static void powernow_k8_cpu_exit_acpi(struct powernow_k8_data *data) { return; }
-static void powernow_k8_acpi_pst_values(struct powernow_k8_data *data, unsigned int index) { return; }
-#endif /* CONFIG_X86_POWERNOW_K8_ACPI */
-
-/* Take a frequency, and issue the fid/vid transition command */
-static int transition_frequency(struct powernow_k8_data *data, unsigned int index)
-{
- u32 fid;
- u32 vid;
- int res, i;
- struct cpufreq_freqs freqs;
-
- dprintk("cpu %d transition to index %u\n", smp_processor_id(), index);
-
- /* fid are the lower 8 bits of the index we stored into
- * the cpufreq frequency table in find_psb_table, vid are
- * the upper 8 bits.
- */
-
- fid = data->powernow_table[index].index & 0xFF;
- vid = (data->powernow_table[index].index & 0xFF00) >> 8;
-
- dprintk("table matched fid 0x%x, giving vid 0x%x\n", fid, vid);
-
- if (query_current_values_with_pending_wait(data))
- return 1;
-
- if ((data->currvid == vid) && (data->currfid == fid)) {
- dprintk("target matches current values (fid 0x%x, vid 0x%x)\n",
- fid, vid);
- return 0;
- }
-
- if ((fid < HI_FID_TABLE_BOTTOM) && (data->currfid < HI_FID_TABLE_BOTTOM)) {
- printk(KERN_ERR PFX
- "ignoring illegal change in lo freq table-%x to 0x%x\n",
- data->currfid, fid);
- return 1;
- }
-
- dprintk("cpu %d, changing to fid 0x%x, vid 0x%x\n",
- smp_processor_id(), fid, vid);
-
- freqs.cpu = data->cpu;
- freqs.old = find_khz_freq_from_fid(data->currfid);
- freqs.new = find_khz_freq_from_fid(fid);
- for_each_cpu_mask(i, cpu_core_map[data->cpu]) {
- freqs.cpu = i;
- cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE);
- }
-
- res = transition_fid_vid(data, fid, vid);
-
- freqs.new = find_khz_freq_from_fid(data->currfid);
- for_each_cpu_mask(i, cpu_core_map[data->cpu]) {
- freqs.cpu = i;
- cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE);
- }
- return res;
-}
-
-/* Driver entry point to switch to the target frequency */
-static int powernowk8_target(struct cpufreq_policy *pol, unsigned targfreq, unsigned relation)
-{
- cpumask_t oldmask = CPU_MASK_ALL;
- struct powernow_k8_data *data = powernow_data[pol->cpu];
- u32 checkfid = data->currfid;
- u32 checkvid = data->currvid;
- unsigned int newstate;
- int ret = -EIO;
- int i;
-
- /* only run on specific CPU from here on */
- oldmask = current->cpus_allowed;
- set_cpus_allowed(current, cpumask_of_cpu(pol->cpu));
- schedule();
-
- if (smp_processor_id() != pol->cpu) {
- printk(KERN_ERR "limiting to cpu %u failed\n", pol->cpu);
- goto err_out;
- }
-
- if (pending_bit_stuck()) {
- printk(KERN_ERR PFX "failing targ, change pending bit set\n");
- goto err_out;
- }
-
- dprintk("targ: cpu %d, %d kHz, min %d, max %d, relation %d\n",
- pol->cpu, targfreq, pol->min, pol->max, relation);
-
- if (query_current_values_with_pending_wait(data)) {
- ret = -EIO;
- goto err_out;
- }
-
- dprintk("targ: curr fid 0x%x, vid 0x%x\n",
- data->currfid, data->currvid);
-
- if ((checkvid != data->currvid) || (checkfid != data->currfid)) {
- printk(KERN_INFO PFX
- "error - out of sync, fix 0x%x 0x%x, vid 0x%x 0x%x\n",
- checkfid, data->currfid, checkvid, data->currvid);
- }
-
- if (cpufreq_frequency_table_target(pol, data->powernow_table, targfreq, relation, &newstate))
- goto err_out;
-
- down(&fidvid_sem);
-
- powernow_k8_acpi_pst_values(data, newstate);
-
- if (transition_frequency(data, newstate)) {
- printk(KERN_ERR PFX "transition frequency failed\n");
- ret = 1;
- up(&fidvid_sem);
- goto err_out;
- }
-
- /* Update all the fid/vids of our siblings */
- for_each_cpu_mask(i, cpu_core_map[pol->cpu]) {
- powernow_data[i]->currvid = data->currvid;
- powernow_data[i]->currfid = data->currfid;
- }
- up(&fidvid_sem);
-
- pol->cur = find_khz_freq_from_fid(data->currfid);
- ret = 0;
-
-err_out:
- set_cpus_allowed(current, oldmask);
- schedule();
-
- return ret;
-}
-
-/* Driver entry point to verify the policy and range of frequencies */
-static int powernowk8_verify(struct cpufreq_policy *pol)
-{
- struct powernow_k8_data *data = powernow_data[pol->cpu];
-
- return cpufreq_frequency_table_verify(pol, data->powernow_table);
-}
-
-/* per CPU init entry point to the driver */
-static int __init powernowk8_cpu_init(struct cpufreq_policy *pol)
-{
- struct powernow_k8_data *data;
- cpumask_t oldmask = CPU_MASK_ALL;
- int rc, i;
-
- if (!check_supported_cpu(pol->cpu))
- return -ENODEV;
-
- data = kmalloc(sizeof(struct powernow_k8_data), GFP_KERNEL);
- if (!data) {
- printk(KERN_ERR PFX "unable to alloc powernow_k8_data");
- return -ENOMEM;
- }
- memset(data,0,sizeof(struct powernow_k8_data));
-
- data->cpu = pol->cpu;
-
- if (powernow_k8_cpu_init_acpi(data)) {
- /*
- * Use the PSB BIOS structure. This is only availabe on
- * an UP version, and is deprecated by AMD.
- */
-
- if ((num_online_cpus() != 1) || (num_possible_cpus() != 1)) {
- printk(KERN_ERR PFX "MP systems not supported by PSB BIOS structure\n");
- kfree(data);
- return -ENODEV;
- }
- if (pol->cpu != 0) {
- printk(KERN_ERR PFX "init not cpu 0\n");
- kfree(data);
- return -ENODEV;
- }
- rc = find_psb_table(data);
- if (rc) {
- kfree(data);
- return -ENODEV;
- }
- }
-
- /* only run on specific CPU from here on */
- oldmask = current->cpus_allowed;
- set_cpus_allowed(current, cpumask_of_cpu(pol->cpu));
- schedule();
-
- if (smp_processor_id() != pol->cpu) {
- printk(KERN_ERR "limiting to cpu %u failed\n", pol->cpu);
- goto err_out;
- }
-
- if (pending_bit_stuck()) {
- printk(KERN_ERR PFX "failing init, change pending bit set\n");
- goto err_out;
- }
-
- if (query_current_values_with_pending_wait(data))
- goto err_out;
-
- fidvid_msr_init();
-
- /* run on any CPU again */
- set_cpus_allowed(current, oldmask);
- schedule();
-
- pol->governor = CPUFREQ_DEFAULT_GOVERNOR;
- pol->cpus = cpu_core_map[pol->cpu];
-
- /* Take a crude guess here.
- * That guess was in microseconds, so multiply with 1000 */
- pol->cpuinfo.transition_latency = (((data->rvo + 8) * data->vstable * VST_UNITS_20US)
- + (3 * (1 << data->irt) * 10)) * 1000;
-
- pol->cur = find_khz_freq_from_fid(data->currfid);
- dprintk("policy current frequency %d kHz\n", pol->cur);
-
- /* min/max the cpu is capable of */
- if (cpufreq_frequency_table_cpuinfo(pol, data->powernow_table)) {
- printk(KERN_ERR PFX "invalid powernow_table\n");
- powernow_k8_cpu_exit_acpi(data);
- kfree(data->powernow_table);
- kfree(data);
- return -EINVAL;
- }
-
- cpufreq_frequency_table_get_attr(data->powernow_table, pol->cpu);
-
- printk("cpu_init done, current fid 0x%x, vid 0x%x\n",
- data->currfid, data->currvid);
-
- for_each_cpu_mask(i, cpu_core_map[pol->cpu]) {
- powernow_data[i] = data;
- }
-
- return 0;
-
-err_out:
- set_cpus_allowed(current, oldmask);
- schedule();
- powernow_k8_cpu_exit_acpi(data);
-
- kfree(data);
- return -ENODEV;
-}
-
-static int __devexit powernowk8_cpu_exit (struct cpufreq_policy *pol)
-{
- struct powernow_k8_data *data = powernow_data[pol->cpu];
-
- if (!data)
- return -EINVAL;
-
- powernow_k8_cpu_exit_acpi(data);
-
- cpufreq_frequency_table_put_attr(pol->cpu);
-
- kfree(data->powernow_table);
- kfree(data);
-
- return 0;
-}
-
-static unsigned int powernowk8_get (unsigned int cpu)
-{
- struct powernow_k8_data *data = powernow_data[cpu];
- cpumask_t oldmask = current->cpus_allowed;
- unsigned int khz = 0;
-
- set_cpus_allowed(current, cpumask_of_cpu(cpu));
- if (smp_processor_id() != cpu) {
- printk(KERN_ERR PFX "limiting to CPU %d failed in powernowk8_get\n", cpu);
- set_cpus_allowed(current, oldmask);
- return 0;
- }
- preempt_disable();
-
- if (query_current_values_with_pending_wait(data))
- goto out;
-
- khz = find_khz_freq_from_fid(data->currfid);
-
- out:
- preempt_enable_no_resched();
- set_cpus_allowed(current, oldmask);
-
- return khz;
-}
-
-static struct freq_attr* powernow_k8_attr[] = {
- &cpufreq_freq_attr_scaling_available_freqs,
- NULL,
-};
-
-static struct cpufreq_driver cpufreq_amd64_driver = {
- .verify = powernowk8_verify,
- .target = powernowk8_target,
- .init = powernowk8_cpu_init,
- .exit = __devexit_p(powernowk8_cpu_exit),
- .get = powernowk8_get,
- .name = "powernow-k8",
- .owner = THIS_MODULE,
- .attr = powernow_k8_attr,
-};
-
-/* driver entry point for init */
-static int __init powernowk8_init(void)
-{
- unsigned int i, supported_cpus = 0;
-
- for (i=0; i<NR_CPUS; i++) {
- if (!cpu_online(i))
- continue;
- if (check_supported_cpu(i))
- supported_cpus++;
- }
-
- if (supported_cpus == num_online_cpus()) {
- printk(KERN_INFO PFX "Found %d AMD Athlon 64 / Opteron processors (" VERSION ")\n",
- supported_cpus);
- return cpufreq_register_driver(&cpufreq_amd64_driver);
- }
-
- return -ENODEV;
-}
-
-/* driver entry point for term */
-static void __exit powernowk8_exit(void)
-{
- dprintk("exit\n");
-
- cpufreq_unregister_driver(&cpufreq_amd64_driver);
-}
-
-MODULE_AUTHOR("Paul Devriendt <paul.devriendt@amd.com> and Mark Langsdorf <mark.langsdorf@amd.com.");
-MODULE_DESCRIPTION("AMD Athlon 64 and Opteron processor frequency driver.");
-MODULE_LICENSE("GPL");
-
-late_initcall(powernowk8_init);
-module_exit(powernowk8_exit);
-
diff --git a/arch/i386/kernel/cpu/cpufreq/powernow-k8.h b/arch/i386/kernel/cpu/cpufreq/powernow-k8.h
deleted file mode 100644
index b1e85bb3639..00000000000
--- a/arch/i386/kernel/cpu/cpufreq/powernow-k8.h
+++ /dev/null
@@ -1,197 +0,0 @@
-/*
- * (c) 2003, 2004, 2005 Advanced Micro Devices, Inc.
- * Your use of this code is subject to the terms and conditions of the
- * GNU general public license version 2. See "COPYING" or
- * http://www.gnu.org/licenses/gpl.html
- */
-
-struct powernow_k8_data {
- unsigned int cpu;
-
- u32 numps; /* number of p-states */
- u32 batps; /* number of p-states supported on battery */
-
- /* these values are constant when the PSB is used to determine
- * vid/fid pairings, but are modified during the ->target() call
- * when ACPI is used */
- u32 rvo; /* ramp voltage offset */
- u32 irt; /* isochronous relief time */
- u32 vidmvs; /* usable value calculated from mvs */
- u32 vstable; /* voltage stabilization time, units 20 us */
- u32 plllock; /* pll lock time, units 1 us */
- u32 exttype; /* extended interface = 1 */
-
- /* keep track of the current fid / vid */
- u32 currvid, currfid;
-
- /* the powernow_table includes all frequency and vid/fid pairings:
- * fid are the lower 8 bits of the index, vid are the upper 8 bits.
- * frequency is in kHz */
- struct cpufreq_frequency_table *powernow_table;
-
-#ifdef CONFIG_X86_POWERNOW_K8_ACPI
- /* the acpi table needs to be kept. it's only available if ACPI was
- * used to determine valid frequency/vid/fid states */
- struct acpi_processor_performance acpi_data;
-#endif
-};
-
-
-/* processor's cpuid instruction support */
-#define CPUID_PROCESSOR_SIGNATURE 1 /* function 1 */
-#define CPUID_XFAM 0x0ff00000 /* extended family */
-#define CPUID_XFAM_K8 0
-#define CPUID_XMOD 0x000f0000 /* extended model */
-#define CPUID_XMOD_REV_F 0x00040000
-#define CPUID_USE_XFAM_XMOD 0x00000f00
-#define CPUID_GET_MAX_CAPABILITIES 0x80000000
-#define CPUID_FREQ_VOLT_CAPABILITIES 0x80000007
-#define P_STATE_TRANSITION_CAPABLE 6
-
-/* Model Specific Registers for p-state transitions. MSRs are 64-bit. For */
-/* writes (wrmsr - opcode 0f 30), the register number is placed in ecx, and */
-/* the value to write is placed in edx:eax. For reads (rdmsr - opcode 0f 32), */
-/* the register number is placed in ecx, and the data is returned in edx:eax. */
-
-#define MSR_FIDVID_CTL 0xc0010041
-#define MSR_FIDVID_STATUS 0xc0010042
-
-/* Field definitions within the FID VID Low Control MSR : */
-#define MSR_C_LO_INIT_FID_VID 0x00010000
-#define MSR_C_LO_NEW_VID 0x00003f00
-#define MSR_C_LO_NEW_FID 0x0000003f
-#define MSR_C_LO_VID_SHIFT 8
-
-/* Field definitions within the FID VID High Control MSR : */
-#define MSR_C_HI_STP_GNT_TO 0x000fffff
-
-/* Field definitions within the FID VID Low Status MSR : */
-#define MSR_S_LO_CHANGE_PENDING 0x80000000 /* cleared when completed */
-#define MSR_S_LO_MAX_RAMP_VID 0x3f000000
-#define MSR_S_LO_MAX_FID 0x003f0000
-#define MSR_S_LO_START_FID 0x00003f00
-#define MSR_S_LO_CURRENT_FID 0x0000003f
-
-/* Field definitions within the FID VID High Status MSR : */
-#define MSR_S_HI_MIN_WORKING_VID 0x3f000000
-#define MSR_S_HI_MAX_WORKING_VID 0x003f0000
-#define MSR_S_HI_START_VID 0x00003f00
-#define MSR_S_HI_CURRENT_VID 0x0000003f
-#define MSR_C_HI_STP_GNT_BENIGN 0x00000001
-
-/*
- * There are restrictions frequencies have to follow:
- * - only 1 entry in the low fid table ( <=1.4GHz )
- * - lowest entry in the high fid table must be >= 2 * the entry in the
- * low fid table
- * - lowest entry in the high fid table must be a <= 200MHz + 2 * the entry
- * in the low fid table
- * - the parts can only step at 200 MHz intervals, so 1.9 GHz is never valid
- * - lowest frequency must be >= interprocessor hypertransport link speed
- * (only applies to MP systems obviously)
- */
-
-/* fids (frequency identifiers) are arranged in 2 tables - lo and hi */
-#define LO_FID_TABLE_TOP 6 /* fid values marking the boundary */
-#define HI_FID_TABLE_BOTTOM 8 /* between the low and high tables */
-
-#define LO_VCOFREQ_TABLE_TOP 1400 /* corresponding vco frequency values */
-#define HI_VCOFREQ_TABLE_BOTTOM 1600
-
-#define MIN_FREQ_RESOLUTION 200 /* fids jump by 2 matching freq jumps by 200 */
-
-#define MAX_FID 0x2a /* Spec only gives FID values as far as 5 GHz */
-#define LEAST_VID 0x3e /* Lowest (numerically highest) useful vid value */
-
-#define MIN_FREQ 800 /* Min and max freqs, per spec */
-#define MAX_FREQ 5000
-
-#define INVALID_FID_MASK 0xffffffc1 /* not a valid fid if these bits are set */
-#define INVALID_VID_MASK 0xffffffc0 /* not a valid vid if these bits are set */
-
-#define VID_OFF 0x3f
-
-#define STOP_GRANT_5NS 1 /* min poss memory access latency for voltage change */
-
-#define PLL_LOCK_CONVERSION (1000/5) /* ms to ns, then divide by clock period */
-
-#define MAXIMUM_VID_STEPS 1 /* Current cpus only allow a single step of 25mV */
-#define VST_UNITS_20US 20 /* Voltage Stabalization Time is in units of 20us */
-
-/*
- * Most values of interest are enocoded in a single field of the _PSS
- * entries: the "control" value.
- */
-
-#define IRT_SHIFT 30
-#define RVO_SHIFT 28
-#define EXT_TYPE_SHIFT 27
-#define PLL_L_SHIFT 20
-#define MVS_SHIFT 18
-#define VST_SHIFT 11
-#define VID_SHIFT 6
-#define IRT_MASK 3
-#define RVO_MASK 3
-#define EXT_TYPE_MASK 1
-#define PLL_L_MASK 0x7f
-#define MVS_MASK 3
-#define VST_MASK 0x7f
-#define VID_MASK 0x1f
-#define FID_MASK 0x3f
-
-
-/*
- * Version 1.4 of the PSB table. This table is constructed by BIOS and is
- * to tell the OS's power management driver which VIDs and FIDs are
- * supported by this particular processor.
- * If the data in the PSB / PST is wrong, then this driver will program the
- * wrong values into hardware, which is very likely to lead to a crash.
- */
-
-#define PSB_ID_STRING "AMDK7PNOW!"
-#define PSB_ID_STRING_LEN 10
-
-#define PSB_VERSION_1_4 0x14
-
-struct psb_s {
- u8 signature[10];
- u8 tableversion;
- u8 flags1;
- u16 vstable;
- u8 flags2;
- u8 num_tables;
- u32 cpuid;
- u8 plllocktime;
- u8 maxfid;
- u8 maxvid;
- u8 numps;
-};
-
-/* Pairs of fid/vid values are appended to the version 1.4 PSB table. */
-struct pst_s {
- u8 fid;
- u8 vid;
-};
-
-#define dprintk(msg...) cpufreq_debug_printk(CPUFREQ_DEBUG_DRIVER, "powernow-k8", msg)
-
-static int core_voltage_pre_transition(struct powernow_k8_data *data, u32 reqvid);
-static int core_voltage_post_transition(struct powernow_k8_data *data, u32 reqvid);
-static int core_frequency_transition(struct powernow_k8_data *data, u32 reqfid);
-
-static void powernow_k8_acpi_pst_values(struct powernow_k8_data *data, unsigned int index);
-
-#ifndef for_each_cpu_mask
-#define for_each_cpu_mask(i,mask) for (i=0;i<1;i++)
-#endif
-
-#ifdef CONFIG_SMP
-static inline void define_siblings(int cpu, cpumask_t cpu_sharedcore_mask[])
-{
-}
-#else
-static inline void define_siblings(int cpu, cpumask_t cpu_sharedcore_mask[])
-{
- cpu_set(0, cpu_sharedcore_mask[0]);
-}
-#endif
diff --git a/arch/i386/kernel/cpu/cpufreq/sc520_freq.c b/arch/i386/kernel/cpu/cpufreq/sc520_freq.c
deleted file mode 100644
index ef457d50f4a..00000000000
--- a/arch/i386/kernel/cpu/cpufreq/sc520_freq.c
+++ /dev/null
@@ -1,186 +0,0 @@
-/*
- * sc520_freq.c: cpufreq driver for the AMD Elan sc520
- *
- * Copyright (C) 2005 Sean Young <sean@mess.org>
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
- *
- * Based on elanfreq.c
- *
- * 2005-03-30: - initial revision
- */
-
-#include <linux/kernel.h>
-#include <linux/module.h>
-#include <linux/init.h>
-
-#include <linux/delay.h>
-#include <linux/cpufreq.h>
-
-#include <asm/msr.h>
-#include <asm/timex.h>
-#include <asm/io.h>
-
-#define MMCR_BASE 0xfffef000 /* The default base address */
-#define OFFS_CPUCTL 0x2 /* CPU Control Register */
-
-static __u8 __iomem *cpuctl;
-
-#define dprintk(msg...) cpufreq_debug_printk(CPUFREQ_DEBUG_DRIVER, "sc520_freq", msg)
-
-static struct cpufreq_frequency_table sc520_freq_table[] = {
- {0x01, 100000},
- {0x02, 133000},
- {0, CPUFREQ_TABLE_END},
-};
-
-static unsigned int sc520_freq_get_cpu_frequency(unsigned int cpu)
-{
- u8 clockspeed_reg = *cpuctl;
-
- switch (clockspeed_reg & 0x03) {
- default:
- printk(KERN_ERR "sc520_freq: error: cpuctl register has unexpected value %02x\n", clockspeed_reg);
- case 0x01:
- return 100000;
- case 0x02:
- return 133000;
- }
-}
-
-static void sc520_freq_set_cpu_state (unsigned int state)
-{
-
- struct cpufreq_freqs freqs;
- u8 clockspeed_reg;
-
- freqs.old = sc520_freq_get_cpu_frequency(0);
- freqs.new = sc520_freq_table[state].frequency;
- freqs.cpu = 0; /* AMD Elan is UP */
-
- cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE);
-
- dprintk("attempting to set frequency to %i kHz\n",
- sc520_freq_table[state].frequency);
-
- local_irq_disable();
-
- clockspeed_reg = *cpuctl & ~0x03;
- *cpuctl = clockspeed_reg | sc520_freq_table[state].index;
-
- local_irq_enable();
-
- cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE);
-};
-
-static int sc520_freq_verify (struct cpufreq_policy *policy)
-{
- return cpufreq_frequency_table_verify(policy, &sc520_freq_table[0]);
-}
-
-static int sc520_freq_target (struct cpufreq_policy *policy,
- unsigned int target_freq,
- unsigned int relation)
-{
- unsigned int newstate = 0;
-
- if (cpufreq_frequency_table_target(policy, sc520_freq_table, target_freq, relation, &newstate))
- return -EINVAL;
-
- sc520_freq_set_cpu_state(newstate);
-
- return 0;
-}
-
-
-/*
- * Module init and exit code
- */
-
-static int sc520_freq_cpu_init(struct cpufreq_policy *policy)
-{
- struct cpuinfo_x86 *c = cpu_data;
- int result;
-
- /* capability check */
- if (c->x86_vendor != X86_VENDOR_AMD ||
- c->x86 != 4 || c->x86_model != 9)
- return -ENODEV;
-
- /* cpuinfo and default policy values */
- policy->governor = CPUFREQ_DEFAULT_GOVERNOR;
- policy->cpuinfo.transition_latency = 1000000; /* 1ms */
- policy->cur = sc520_freq_get_cpu_frequency(0);
-
- result = cpufreq_frequency_table_cpuinfo(policy, sc520_freq_table);
- if (result)
- return (result);
-
- cpufreq_frequency_table_get_attr(sc520_freq_table, policy->cpu);
-
- return 0;
-}
-
-
-static int sc520_freq_cpu_exit(struct cpufreq_policy *policy)
-{
- cpufreq_frequency_table_put_attr(policy->cpu);
- return 0;
-}
-
-
-static struct freq_attr* sc520_freq_attr[] = {
- &cpufreq_freq_attr_scaling_available_freqs,
- NULL,
-};
-
-
-static struct cpufreq_driver sc520_freq_driver = {
- .get = sc520_freq_get_cpu_frequency,
- .verify = sc520_freq_verify,
- .target = sc520_freq_target,
- .init = sc520_freq_cpu_init,
- .exit = sc520_freq_cpu_exit,
- .name = "sc520_freq",
- .owner = THIS_MODULE,
- .attr = sc520_freq_attr,
-};
-
-
-static int __init sc520_freq_init(void)
-{
- struct cpuinfo_x86 *c = cpu_data;
-
- /* Test if we have the right hardware */
- if(c->x86_vendor != X86_VENDOR_AMD ||
- c->x86 != 4 || c->x86_model != 9) {
- dprintk("no Elan SC520 processor found!\n");
- return -ENODEV;
- }
- cpuctl = ioremap((unsigned long)(MMCR_BASE + OFFS_CPUCTL), 1);
- if(!cpuctl) {
- printk(KERN_ERR "sc520_freq: error: failed to remap memory\n");
- return -ENOMEM;
- }
-
- return cpufreq_register_driver(&sc520_freq_driver);
-}
-
-
-static void __exit sc520_freq_exit(void)
-{
- cpufreq_unregister_driver(&sc520_freq_driver);
- iounmap(cpuctl);
-}
-
-
-MODULE_LICENSE("GPL");
-MODULE_AUTHOR("Sean Young <sean@mess.org>");
-MODULE_DESCRIPTION("cpufreq driver for AMD's Elan sc520 CPU");
-
-module_init(sc520_freq_init);
-module_exit(sc520_freq_exit);
-
diff --git a/arch/i386/kernel/cpu/cpufreq/speedstep-centrino.c b/arch/i386/kernel/cpu/cpufreq/speedstep-centrino.c
deleted file mode 100644
index 0ea010a7afb..00000000000
--- a/arch/i386/kernel/cpu/cpufreq/speedstep-centrino.c
+++ /dev/null
@@ -1,722 +0,0 @@
-/*
- * cpufreq driver for Enhanced SpeedStep, as found in Intel's Pentium
- * M (part of the Centrino chipset).
- *
- * Despite the "SpeedStep" in the name, this is almost entirely unlike
- * traditional SpeedStep.
- *
- * Modelled on speedstep.c
- *
- * Copyright (C) 2003 Jeremy Fitzhardinge <jeremy@goop.org>
- *
- * WARNING WARNING WARNING
- *
- * This driver manipulates the PERF_CTL MSR, which is only somewhat
- * documented. While it seems to work on my laptop, it has not been
- * tested anywhere else, and it may not work for you, do strange
- * things or simply crash.
- */
-
-#include <linux/kernel.h>
-#include <linux/module.h>
-#include <linux/init.h>
-#include <linux/cpufreq.h>
-#include <linux/config.h>
-#include <linux/sched.h> /* current */
-#include <linux/delay.h>
-#include <linux/compiler.h>
-
-#ifdef CONFIG_X86_SPEEDSTEP_CENTRINO_ACPI
-#include <linux/acpi.h>
-#include <acpi/processor.h>
-#endif
-
-#include <asm/msr.h>
-#include <asm/processor.h>
-#include <asm/cpufeature.h>
-
-#include "speedstep-est-common.h"
-
-#define PFX "speedstep-centrino: "
-#define MAINTAINER "Jeremy Fitzhardinge <jeremy@goop.org>"
-
-#define dprintk(msg...) cpufreq_debug_printk(CPUFREQ_DEBUG_DRIVER, "speedstep-centrino", msg)
-
-
-struct cpu_id
-{
- __u8 x86; /* CPU family */
- __u8 x86_model; /* model */
- __u8 x86_mask; /* stepping */
-};
-
-enum {
- CPU_BANIAS,
- CPU_DOTHAN_A1,
- CPU_DOTHAN_A2,
- CPU_DOTHAN_B0,
- CPU_MP4HT_D0,
- CPU_MP4HT_E0,
-};
-
-static const struct cpu_id cpu_ids[] = {
- [CPU_BANIAS] = { 6, 9, 5 },
- [CPU_DOTHAN_A1] = { 6, 13, 1 },
- [CPU_DOTHAN_A2] = { 6, 13, 2 },
- [CPU_DOTHAN_B0] = { 6, 13, 6 },
- [CPU_MP4HT_D0] = {15, 3, 4 },
- [CPU_MP4HT_E0] = {15, 4, 1 },
-};
-#define N_IDS ARRAY_SIZE(cpu_ids)
-
-struct cpu_model
-{
- const struct cpu_id *cpu_id;
- const char *model_name;
- unsigned max_freq; /* max clock in kHz */
-
- struct cpufreq_frequency_table *op_points; /* clock/voltage pairs */
-};
-static int centrino_verify_cpu_id(const struct cpuinfo_x86 *c, const struct cpu_id *x);
-
-/* Operating points for current CPU */
-static struct cpu_model *centrino_model[NR_CPUS];
-static const struct cpu_id *centrino_cpu[NR_CPUS];
-
-static struct cpufreq_driver centrino_driver;
-
-#ifdef CONFIG_X86_SPEEDSTEP_CENTRINO_TABLE
-
-/* Computes the correct form for IA32_PERF_CTL MSR for a particular
- frequency/voltage operating point; frequency in MHz, volts in mV.
- This is stored as "index" in the structure. */
-#define OP(mhz, mv) \
- { \
- .frequency = (mhz) * 1000, \
- .index = (((mhz)/100) << 8) | ((mv - 700) / 16) \
- }
-
-/*
- * These voltage tables were derived from the Intel Pentium M
- * datasheet, document 25261202.pdf, Table 5. I have verified they
- * are consistent with my IBM ThinkPad X31, which has a 1.3GHz Pentium
- * M.
- */
-
-/* Ultra Low Voltage Intel Pentium M processor 900MHz (Banias) */
-static struct cpufreq_frequency_table banias_900[] =
-{
- OP(600, 844),
- OP(800, 988),
- OP(900, 1004),
- { .frequency = CPUFREQ_TABLE_END }
-};
-
-/* Ultra Low Voltage Intel Pentium M processor 1000MHz (Banias) */
-static struct cpufreq_frequency_table banias_1000[] =
-{
- OP(600, 844),
- OP(800, 972),
- OP(900, 988),
- OP(1000, 1004),
- { .frequency = CPUFREQ_TABLE_END }
-};
-
-/* Low Voltage Intel Pentium M processor 1.10GHz (Banias) */
-static struct cpufreq_frequency_table banias_1100[] =
-{
- OP( 600, 956),
- OP( 800, 1020),
- OP( 900, 1100),
- OP(1000, 1164),
- OP(1100, 1180),
- { .frequency = CPUFREQ_TABLE_END }
-};
-
-
-/* Low Voltage Intel Pentium M processor 1.20GHz (Banias) */
-static struct cpufreq_frequency_table banias_1200[] =
-{
- OP( 600, 956),
- OP( 800, 1004),
- OP( 900, 1020),
- OP(1000, 1100),
- OP(1100, 1164),
- OP(1200, 1180),
- { .frequency = CPUFREQ_TABLE_END }
-};
-
-/* Intel Pentium M processor 1.30GHz (Banias) */
-static struct cpufreq_frequency_table banias_1300[] =
-{
- OP( 600, 956),
- OP( 800, 1260),
- OP(1000, 1292),
- OP(1200, 1356),
- OP(1300, 1388),
- { .frequency = CPUFREQ_TABLE_END }
-};
-
-/* Intel Pentium M processor 1.40GHz (Banias) */
-static struct cpufreq_frequency_table banias_1400[] =
-{
- OP( 600, 956),
- OP( 800, 1180),
- OP(1000, 1308),
- OP(1200, 1436),
- OP(1400, 1484),
- { .frequency = CPUFREQ_TABLE_END }
-};
-
-/* Intel Pentium M processor 1.50GHz (Banias) */
-static struct cpufreq_frequency_table banias_1500[] =
-{
- OP( 600, 956),
- OP( 800, 1116),
- OP(1000, 1228),
- OP(1200, 1356),
- OP(1400, 1452),
- OP(1500, 1484),
- { .frequency = CPUFREQ_TABLE_END }
-};
-
-/* Intel Pentium M processor 1.60GHz (Banias) */
-static struct cpufreq_frequency_table banias_1600[] =
-{
- OP( 600, 956),
- OP( 800, 1036),
- OP(1000, 1164),
- OP(1200, 1276),
- OP(1400, 1420),
- OP(1600, 1484),
- { .frequency = CPUFREQ_TABLE_END }
-};
-
-/* Intel Pentium M processor 1.70GHz (Banias) */
-static struct cpufreq_frequency_table banias_1700[] =
-{
- OP( 600, 956),
- OP( 800, 1004),
- OP(1000, 1116),
- OP(1200, 1228),
- OP(1400, 1308),
- OP(1700, 1484),
- { .frequency = CPUFREQ_TABLE_END }
-};
-#undef OP
-
-#define _BANIAS(cpuid, max, name) \
-{ .cpu_id = cpuid, \
- .model_name = "Intel(R) Pentium(R) M processor " name "MHz", \
- .max_freq = (max)*1000, \
- .op_points = banias_##max, \
-}
-#define BANIAS(max) _BANIAS(&cpu_ids[CPU_BANIAS], max, #max)
-
-/* CPU models, their operating frequency range, and freq/voltage
- operating points */
-static struct cpu_model models[] =
-{
- _BANIAS(&cpu_ids[CPU_BANIAS], 900, " 900"),
- BANIAS(1000),
- BANIAS(1100),
- BANIAS(1200),
- BANIAS(1300),
- BANIAS(1400),
- BANIAS(1500),
- BANIAS(1600),
- BANIAS(1700),
-
- /* NULL model_name is a wildcard */
- { &cpu_ids[CPU_DOTHAN_A1], NULL, 0, NULL },
- { &cpu_ids[CPU_DOTHAN_A2], NULL, 0, NULL },
- { &cpu_ids[CPU_DOTHAN_B0], NULL, 0, NULL },
- { &cpu_ids[CPU_MP4HT_D0], NULL, 0, NULL },
- { &cpu_ids[CPU_MP4HT_E0], NULL, 0, NULL },
-
- { NULL, }
-};
-#undef _BANIAS
-#undef BANIAS
-
-static int centrino_cpu_init_table(struct cpufreq_policy *policy)
-{
- struct cpuinfo_x86 *cpu = &cpu_data[policy->cpu];
- struct cpu_model *model;
-
- for(model = models; model->cpu_id != NULL; model++)
- if (centrino_verify_cpu_id(cpu, model->cpu_id) &&
- (model->model_name == NULL ||
- strcmp(cpu->x86_model_id, model->model_name) == 0))
- break;
-
- if (model->cpu_id == NULL) {
- /* No match at all */
- dprintk(KERN_INFO PFX "no support for CPU model \"%s\": "
- "send /proc/cpuinfo to " MAINTAINER "\n",
- cpu->x86_model_id);
- return -ENOENT;
- }
-
- if (model->op_points == NULL) {
- /* Matched a non-match */
- dprintk(KERN_INFO PFX "no table support for CPU model \"%s\"\n",
- cpu->x86_model_id);
-#ifndef CONFIG_X86_SPEEDSTEP_CENTRINO_ACPI
- dprintk(KERN_INFO PFX "try compiling with CONFIG_X86_SPEEDSTEP_CENTRINO_ACPI enabled\n");
-#endif
- return -ENOENT;
- }
-
- centrino_model[policy->cpu] = model;
-
- dprintk("found \"%s\": max frequency: %dkHz\n",
- model->model_name, model->max_freq);
-
- return 0;
-}
-
-#else
-static inline int centrino_cpu_init_table(struct cpufreq_policy *policy) { return -ENODEV; }
-#endif /* CONFIG_X86_SPEEDSTEP_CENTRINO_TABLE */
-
-static int centrino_verify_cpu_id(const struct cpuinfo_x86 *c, const struct cpu_id *x)
-{
- if ((c->x86 == x->x86) &&
- (c->x86_model == x->x86_model) &&
- (c->x86_mask == x->x86_mask))
- return 1;
- return 0;
-}
-
-/* To be called only after centrino_model is initialized */
-static unsigned extract_clock(unsigned msr, unsigned int cpu, int failsafe)
-{
- int i;
-
- /*
- * Extract clock in kHz from PERF_CTL value
- * for centrino, as some DSDTs are buggy.
- * Ideally, this can be done using the acpi_data structure.
- */
- if ((centrino_cpu[cpu] == &cpu_ids[CPU_BANIAS]) ||
- (centrino_cpu[cpu] == &cpu_ids[CPU_DOTHAN_A1]) ||
- (centrino_cpu[cpu] == &cpu_ids[CPU_DOTHAN_B0])) {
- msr = (msr >> 8) & 0xff;
- return msr * 100000;
- }
-
- if ((!centrino_model[cpu]) || (!centrino_model[cpu]->op_points))
- return 0;
-
- msr &= 0xffff;
- for (i=0;centrino_model[cpu]->op_points[i].frequency != CPUFREQ_TABLE_END; i++) {
- if (msr == centrino_model[cpu]->op_points[i].index)
- return centrino_model[cpu]->op_points[i].frequency;
- }
- if (failsafe)
- return centrino_model[cpu]->op_points[i-1].frequency;
- else
- return 0;
-}
-
-/* Return the current CPU frequency in kHz */
-static unsigned int get_cur_freq(unsigned int cpu)
-{
- unsigned l, h;
- unsigned clock_freq;
- cpumask_t saved_mask;
-
- saved_mask = current->cpus_allowed;
- set_cpus_allowed(current, cpumask_of_cpu(cpu));
- if (smp_processor_id() != cpu)
- return 0;
-
- rdmsr(MSR_IA32_PERF_STATUS, l, h);
- clock_freq = extract_clock(l, cpu, 0);
-
- if (unlikely(clock_freq == 0)) {
- /*
- * On some CPUs, we can see transient MSR values (which are
- * not present in _PSS), while CPU is doing some automatic
- * P-state transition (like TM2). Get the last freq set
- * in PERF_CTL.
- */
- rdmsr(MSR_IA32_PERF_CTL, l, h);
- clock_freq = extract_clock(l, cpu, 1);
- }
-
- set_cpus_allowed(current, saved_mask);
- return clock_freq;
-}
-
-
-#ifdef CONFIG_X86_SPEEDSTEP_CENTRINO_ACPI
-
-static struct acpi_processor_performance p;
-
-/*
- * centrino_cpu_init_acpi - register with ACPI P-States library
- *
- * Register with the ACPI P-States library (part of drivers/acpi/processor.c)
- * in order to determine correct frequency and voltage pairings by reading
- * the _PSS of the ACPI DSDT or SSDT tables.
- */
-static int centrino_cpu_init_acpi(struct cpufreq_policy *policy)
-{
- union acpi_object arg0 = {ACPI_TYPE_BUFFER};
- u32 arg0_buf[3];
- struct acpi_object_list arg_list = {1, &arg0};
- unsigned long cur_freq;
- int result = 0, i;
- unsigned int cpu = policy->cpu;
-
- /* _PDC settings */
- arg0.buffer.length = 12;
- arg0.buffer.pointer = (u8 *) arg0_buf;
- arg0_buf[0] = ACPI_PDC_REVISION_ID;
- arg0_buf[1] = 1;
- arg0_buf[2] = ACPI_PDC_EST_CAPABILITY_SMP_MSR;
-
- p.pdc = &arg_list;
-
- /* register with ACPI core */
- if (acpi_processor_register_performance(&p, cpu)) {
- dprintk(KERN_INFO PFX "obtaining ACPI data failed\n");
- return -EIO;
- }
-
- /* verify the acpi_data */
- if (p.state_count <= 1) {
- dprintk("No P-States\n");
- result = -ENODEV;
- goto err_unreg;
- }
-
- if ((p.control_register.space_id != ACPI_ADR_SPACE_FIXED_HARDWARE) ||
- (p.status_register.space_id != ACPI_ADR_SPACE_FIXED_HARDWARE)) {
- dprintk("Invalid control/status registers (%x - %x)\n",
- p.control_register.space_id, p.status_register.space_id);
- result = -EIO;
- goto err_unreg;
- }
-
- for (i=0; i<p.state_count; i++) {
- if (p.states[i].control != p.states[i].status) {
- dprintk("Different control (%llu) and status values (%llu)\n",
- p.states[i].control, p.states[i].status);
- result = -EINVAL;
- goto err_unreg;
- }
-
- if (!p.states[i].core_frequency) {
- dprintk("Zero core frequency for state %u\n", i);
- result = -EINVAL;
- goto err_unreg;
- }
-
- if (p.states[i].core_frequency > p.states[0].core_frequency) {
- dprintk("P%u has larger frequency (%llu) than P0 (%llu), skipping\n", i,
- p.states[i].core_frequency, p.states[0].core_frequency);
- p.states[i].core_frequency = 0;
- continue;
- }
- }
-
- centrino_model[cpu] = kmalloc(sizeof(struct cpu_model), GFP_KERNEL);
- if (!centrino_model[cpu]) {
- result = -ENOMEM;
- goto err_unreg;
- }
- memset(centrino_model[cpu], 0, sizeof(struct cpu_model));
-
- centrino_model[cpu]->model_name=NULL;
- centrino_model[cpu]->max_freq = p.states[0].core_frequency * 1000;
- centrino_model[cpu]->op_points = kmalloc(sizeof(struct cpufreq_frequency_table) *
- (p.state_count + 1), GFP_KERNEL);
- if (!centrino_model[cpu]->op_points) {
- result = -ENOMEM;
- goto err_kfree;
- }
-
- for (i=0; i<p.state_count; i++) {
- centrino_model[cpu]->op_points[i].index = p.states[i].control;
- centrino_model[cpu]->op_points[i].frequency = p.states[i].core_frequency * 1000;
- dprintk("adding state %i with frequency %u and control value %04x\n",
- i, centrino_model[cpu]->op_points[i].frequency, centrino_model[cpu]->op_points[i].index);
- }
- centrino_model[cpu]->op_points[p.state_count].frequency = CPUFREQ_TABLE_END;
-
- cur_freq = get_cur_freq(cpu);
-
- for (i=0; i<p.state_count; i++) {
- if (!p.states[i].core_frequency) {
- dprintk("skipping state %u\n", i);
- centrino_model[cpu]->op_points[i].frequency = CPUFREQ_ENTRY_INVALID;
- continue;
- }
-
- if (extract_clock(centrino_model[cpu]->op_points[i].index, cpu, 0) !=
- (centrino_model[cpu]->op_points[i].frequency)) {
- dprintk("Invalid encoded frequency (%u vs. %u)\n",
- extract_clock(centrino_model[cpu]->op_points[i].index, cpu, 0),
- centrino_model[cpu]->op_points[i].frequency);
- result = -EINVAL;
- goto err_kfree_all;
- }
-
- if (cur_freq == centrino_model[cpu]->op_points[i].frequency)
- p.state = i;
- }
-
- /* notify BIOS that we exist */
- acpi_processor_notify_smm(THIS_MODULE);
-
- return 0;
-
- err_kfree_all:
- kfree(centrino_model[cpu]->op_points);
- err_kfree:
- kfree(centrino_model[cpu]);
- err_unreg:
- acpi_processor_unregister_performance(&p, cpu);
- dprintk(KERN_INFO PFX "invalid ACPI data\n");
- return (result);
-}
-#else
-static inline int centrino_cpu_init_acpi(struct cpufreq_policy *policy) { return -ENODEV; }
-#endif
-
-static int centrino_cpu_init(struct cpufreq_policy *policy)
-{
- struct cpuinfo_x86 *cpu = &cpu_data[policy->cpu];
- unsigned freq;
- unsigned l, h;
- int ret;
- int i;
-
- /* Only Intel makes Enhanced Speedstep-capable CPUs */
- if (cpu->x86_vendor != X86_VENDOR_INTEL || !cpu_has(cpu, X86_FEATURE_EST))
- return -ENODEV;
-
- if (is_const_loops_cpu(policy->cpu)) {
- centrino_driver.flags |= CPUFREQ_CONST_LOOPS;
- }
-
- if (centrino_cpu_init_acpi(policy)) {
- if (policy->cpu != 0)
- return -ENODEV;
-
- for (i = 0; i < N_IDS; i++)
- if (centrino_verify_cpu_id(cpu, &cpu_ids[i]))
- break;
-
- if (i != N_IDS)
- centrino_cpu[policy->cpu] = &cpu_ids[i];
-
- if (!centrino_cpu[policy->cpu]) {
- dprintk(KERN_INFO PFX "found unsupported CPU with "
- "Enhanced SpeedStep: send /proc/cpuinfo to "
- MAINTAINER "\n");
- return -ENODEV;
- }
-
- if (centrino_cpu_init_table(policy)) {
- return -ENODEV;
- }
- }
-
- /* Check to see if Enhanced SpeedStep is enabled, and try to
- enable it if not. */
- rdmsr(MSR_IA32_MISC_ENABLE, l, h);
-
- if (!(l & (1<<16))) {
- l |= (1<<16);
- dprintk("trying to enable Enhanced SpeedStep (%x)\n", l);
- wrmsr(MSR_IA32_MISC_ENABLE, l, h);
-
- /* check to see if it stuck */
- rdmsr(MSR_IA32_MISC_ENABLE, l, h);
- if (!(l & (1<<16))) {
- printk(KERN_INFO PFX "couldn't enable Enhanced SpeedStep\n");
- return -ENODEV;
- }
- }
-
- freq = get_cur_freq(policy->cpu);
-
- policy->governor = CPUFREQ_DEFAULT_GOVERNOR;
- policy->cpuinfo.transition_latency = 10000; /* 10uS transition latency */
- policy->cur = freq;
-
- dprintk("centrino_cpu_init: cur=%dkHz\n", policy->cur);
-
- ret = cpufreq_frequency_table_cpuinfo(policy, centrino_model[policy->cpu]->op_points);
- if (ret)
- return (ret);
-
- cpufreq_frequency_table_get_attr(centrino_model[policy->cpu]->op_points, policy->cpu);
-
- return 0;
-}
-
-static int centrino_cpu_exit(struct cpufreq_policy *policy)
-{
- unsigned int cpu = policy->cpu;
-
- if (!centrino_model[cpu])
- return -ENODEV;
-
- cpufreq_frequency_table_put_attr(cpu);
-
-#ifdef CONFIG_X86_SPEEDSTEP_CENTRINO_ACPI
- if (!centrino_model[cpu]->model_name) {
- dprintk("unregistering and freeing ACPI data\n");
- acpi_processor_unregister_performance(&p, cpu);
- kfree(centrino_model[cpu]->op_points);
- kfree(centrino_model[cpu]);
- }
-#endif
-
- centrino_model[cpu] = NULL;
-
- return 0;
-}
-
-/**
- * centrino_verify - verifies a new CPUFreq policy
- * @policy: new policy
- *
- * Limit must be within this model's frequency range at least one
- * border included.
- */
-static int centrino_verify (struct cpufreq_policy *policy)
-{
- return cpufreq_frequency_table_verify(policy, centrino_model[policy->cpu]->op_points);
-}
-
-/**
- * centrino_setpolicy - set a new CPUFreq policy
- * @policy: new policy
- * @target_freq: the target frequency
- * @relation: how that frequency relates to achieved frequency (CPUFREQ_RELATION_L or CPUFREQ_RELATION_H)
- *
- * Sets a new CPUFreq policy.
- */
-static int centrino_target (struct cpufreq_policy *policy,
- unsigned int target_freq,
- unsigned int relation)
-{
- unsigned int newstate = 0;
- unsigned int msr, oldmsr, h, cpu = policy->cpu;
- struct cpufreq_freqs freqs;
- cpumask_t saved_mask;
- int retval;
-
- if (centrino_model[cpu] == NULL)
- return -ENODEV;
-
- /*
- * Support for SMP systems.
- * Make sure we are running on the CPU that wants to change frequency
- */
- saved_mask = current->cpus_allowed;
- set_cpus_allowed(current, policy->cpus);
- if (!cpu_isset(smp_processor_id(), policy->cpus)) {
- dprintk("couldn't limit to CPUs in this domain\n");
- return(-EAGAIN);
- }
-
- if (cpufreq_frequency_table_target(policy, centrino_model[cpu]->op_points, target_freq,
- relation, &newstate)) {
- retval = -EINVAL;
- goto migrate_end;
- }
-
- msr = centrino_model[cpu]->op_points[newstate].index;
- rdmsr(MSR_IA32_PERF_CTL, oldmsr, h);
-
- if (msr == (oldmsr & 0xffff)) {
- retval = 0;
- dprintk("no change needed - msr was and needs to be %x\n", oldmsr);
- goto migrate_end;
- }
-
- freqs.cpu = cpu;
- freqs.old = extract_clock(oldmsr, cpu, 0);
- freqs.new = extract_clock(msr, cpu, 0);
-
- dprintk("target=%dkHz old=%d new=%d msr=%04x\n",
- target_freq, freqs.old, freqs.new, msr);
-
- cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE);
-
- /* all but 16 LSB are "reserved", so treat them with
- care */
- oldmsr &= ~0xffff;
- msr &= 0xffff;
- oldmsr |= msr;
-
- wrmsr(MSR_IA32_PERF_CTL, oldmsr, h);
-
- cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE);
-
- retval = 0;
-migrate_end:
- set_cpus_allowed(current, saved_mask);
- return (retval);
-}
-
-static struct freq_attr* centrino_attr[] = {
- &cpufreq_freq_attr_scaling_available_freqs,
- NULL,
-};
-
-static struct cpufreq_driver centrino_driver = {
- .name = "centrino", /* should be speedstep-centrino,
- but there's a 16 char limit */
- .init = centrino_cpu_init,
- .exit = centrino_cpu_exit,
- .verify = centrino_verify,
- .target = centrino_target,
- .get = get_cur_freq,
- .attr = centrino_attr,
- .owner = THIS_MODULE,
-};
-
-
-/**
- * centrino_init - initializes the Enhanced SpeedStep CPUFreq driver
- *
- * Initializes the Enhanced SpeedStep support. Returns -ENODEV on
- * unsupported devices, -ENOENT if there's no voltage table for this
- * particular CPU model, -EINVAL on problems during initiatization,
- * and zero on success.
- *
- * This is quite picky. Not only does the CPU have to advertise the
- * "est" flag in the cpuid capability flags, we look for a specific
- * CPU model and stepping, and we need to have the exact model name in
- * our voltage tables. That is, be paranoid about not releasing
- * someone's valuable magic smoke.
- */
-static int __init centrino_init(void)
-{
- struct cpuinfo_x86 *cpu = cpu_data;
-
- if (!cpu_has(cpu, X86_FEATURE_EST))
- return -ENODEV;
-
- return cpufreq_register_driver(&centrino_driver);
-}
-
-static void __exit centrino_exit(void)
-{
- cpufreq_unregister_driver(&centrino_driver);
-}
-
-MODULE_AUTHOR ("Jeremy Fitzhardinge <jeremy@goop.org>");
-MODULE_DESCRIPTION ("Enhanced SpeedStep driver for Intel Pentium M processors.");
-MODULE_LICENSE ("GPL");
-
-late_initcall(centrino_init);
-module_exit(centrino_exit);
diff --git a/arch/i386/kernel/cpu/cpufreq/speedstep-est-common.h b/arch/i386/kernel/cpu/cpufreq/speedstep-est-common.h
deleted file mode 100644
index 5ce995c9d86..00000000000
--- a/arch/i386/kernel/cpu/cpufreq/speedstep-est-common.h
+++ /dev/null
@@ -1,25 +0,0 @@
-/*
- * Routines common for drivers handling Enhanced Speedstep Technology
- * Copyright (C) 2004 Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>
- *
- * Licensed under the terms of the GNU GPL License version 2 -- see
- * COPYING for details.
- */
-
-static inline int is_const_loops_cpu(unsigned int cpu)
-{
- struct cpuinfo_x86 *c = cpu_data + cpu;
-
- if (c->x86_vendor != X86_VENDOR_INTEL || !cpu_has(c, X86_FEATURE_EST))
- return 0;
-
- /*
- * on P-4s, the TSC runs with constant frequency independent of cpu freq
- * when we use EST
- */
- if (c->x86 == 0xf)
- return 1;
-
- return 0;
-}
-
diff --git a/arch/i386/kernel/cpu/cpufreq/speedstep-ich.c b/arch/i386/kernel/cpu/cpufreq/speedstep-ich.c
deleted file mode 100644
index 5b7d18a06af..00000000000
--- a/arch/i386/kernel/cpu/cpufreq/speedstep-ich.c
+++ /dev/null
@@ -1,424 +0,0 @@
-/*
- * (C) 2001 Dave Jones, Arjan van de ven.
- * (C) 2002 - 2003 Dominik Brodowski <linux@brodo.de>
- *
- * Licensed under the terms of the GNU GPL License version 2.
- * Based upon reverse engineered information, and on Intel documentation
- * for chipsets ICH2-M and ICH3-M.
- *
- * Many thanks to Ducrot Bruno for finding and fixing the last
- * "missing link" for ICH2-M/ICH3-M support, and to Thomas Winkler
- * for extensive testing.
- *
- * BIG FAT DISCLAIMER: Work in progress code. Possibly *dangerous*
- */
-
-
-/*********************************************************************
- * SPEEDSTEP - DEFINITIONS *
- *********************************************************************/
-
-#include <linux/kernel.h>
-#include <linux/module.h>
-#include <linux/init.h>
-#include <linux/cpufreq.h>
-#include <linux/pci.h>
-#include <linux/slab.h>
-
-#include "speedstep-lib.h"
-
-
-/* speedstep_chipset:
- * It is necessary to know which chipset is used. As accesses to
- * this device occur at various places in this module, we need a
- * static struct pci_dev * pointing to that device.
- */
-static struct pci_dev *speedstep_chipset_dev;
-
-
-/* speedstep_processor
- */
-static unsigned int speedstep_processor = 0;
-
-
-/*
- * There are only two frequency states for each processor. Values
- * are in kHz for the time being.
- */
-static struct cpufreq_frequency_table speedstep_freqs[] = {
- {SPEEDSTEP_HIGH, 0},
- {SPEEDSTEP_LOW, 0},
- {0, CPUFREQ_TABLE_END},
-};
-
-
-#define dprintk(msg...) cpufreq_debug_printk(CPUFREQ_DEBUG_DRIVER, "speedstep-ich", msg)
-
-
-/**
- * speedstep_set_state - set the SpeedStep state
- * @state: new processor frequency state (SPEEDSTEP_LOW or SPEEDSTEP_HIGH)
- *
- * Tries to change the SpeedStep state.
- */
-static void speedstep_set_state (unsigned int state)
-{
- u32 pmbase;
- u8 pm2_blk;
- u8 value;
- unsigned long flags;
-
- if (!speedstep_chipset_dev || (state > 0x1))
- return;
-
- /* get PMBASE */
- pci_read_config_dword(speedstep_chipset_dev, 0x40, &pmbase);
- if (!(pmbase & 0x01)) {
- printk(KERN_ERR "speedstep-ich: could not find speedstep register\n");
- return;
- }
-
- pmbase &= 0xFFFFFFFE;
- if (!pmbase) {
- printk(KERN_ERR "speedstep-ich: could not find speedstep register\n");
- return;
- }
-
- /* Disable IRQs */
- local_irq_save(flags);
-
- /* read state */
- value = inb(pmbase + 0x50);
-
- dprintk("read at pmbase 0x%x + 0x50 returned 0x%x\n", pmbase, value);
-
- /* write new state */
- value &= 0xFE;
- value |= state;
-
- dprintk("writing 0x%x to pmbase 0x%x + 0x50\n", value, pmbase);
-
- /* Disable bus master arbitration */
- pm2_blk = inb(pmbase + 0x20);
- pm2_blk |= 0x01;
- outb(pm2_blk, (pmbase + 0x20));
-
- /* Actual transition */
- outb(value, (pmbase + 0x50));
-
- /* Restore bus master arbitration */
- pm2_blk &= 0xfe;
- outb(pm2_blk, (pmbase + 0x20));
-
- /* check if transition was successful */
- value = inb(pmbase + 0x50);
-
- /* Enable IRQs */
- local_irq_restore(flags);
-
- dprintk("read at pmbase 0x%x + 0x50 returned 0x%x\n", pmbase, value);
-
- if (state == (value & 0x1)) {
- dprintk("change to %u MHz succeeded\n", (speedstep_get_processor_frequency(speedstep_processor) / 1000));
- } else {
- printk (KERN_ERR "cpufreq: change failed - I/O error\n");
- }
-
- return;
-}
-
-
-/**
- * speedstep_activate - activate SpeedStep control in the chipset
- *
- * Tries to activate the SpeedStep status and control registers.
- * Returns -EINVAL on an unsupported chipset, and zero on success.
- */
-static int speedstep_activate (void)
-{
- u16 value = 0;
-
- if (!speedstep_chipset_dev)
- return -EINVAL;
-
- pci_read_config_word(speedstep_chipset_dev, 0x00A0, &value);
- if (!(value & 0x08)) {
- value |= 0x08;
- dprintk("activating SpeedStep (TM) registers\n");
- pci_write_config_word(speedstep_chipset_dev, 0x00A0, value);
- }
-
- return 0;
-}
-
-
-/**
- * speedstep_detect_chipset - detect the Southbridge which contains SpeedStep logic
- *
- * Detects ICH2-M, ICH3-M and ICH4-M so far. The pci_dev points to
- * the LPC bridge / PM module which contains all power-management
- * functions. Returns the SPEEDSTEP_CHIPSET_-number for the detected
- * chipset, or zero on failure.
- */
-static unsigned int speedstep_detect_chipset (void)
-{
- speedstep_chipset_dev = pci_get_subsys(PCI_VENDOR_ID_INTEL,
- PCI_DEVICE_ID_INTEL_82801DB_12,
- PCI_ANY_ID,
- PCI_ANY_ID,
- NULL);
- if (speedstep_chipset_dev)
- return 4; /* 4-M */
-
- speedstep_chipset_dev = pci_get_subsys(PCI_VENDOR_ID_INTEL,
- PCI_DEVICE_ID_INTEL_82801CA_12,
- PCI_ANY_ID,
- PCI_ANY_ID,
- NULL);
- if (speedstep_chipset_dev)
- return 3; /* 3-M */
-
-
- speedstep_chipset_dev = pci_get_subsys(PCI_VENDOR_ID_INTEL,
- PCI_DEVICE_ID_INTEL_82801BA_10,
- PCI_ANY_ID,
- PCI_ANY_ID,
- NULL);
- if (speedstep_chipset_dev) {
- /* speedstep.c causes lockups on Dell Inspirons 8000 and
- * 8100 which use a pretty old revision of the 82815
- * host brige. Abort on these systems.
- */
- static struct pci_dev *hostbridge;
- u8 rev = 0;
-
- hostbridge = pci_get_subsys(PCI_VENDOR_ID_INTEL,
- PCI_DEVICE_ID_INTEL_82815_MC,
- PCI_ANY_ID,
- PCI_ANY_ID,
- NULL);
-
- if (!hostbridge)
- return 2; /* 2-M */
-
- pci_read_config_byte(hostbridge, PCI_REVISION_ID, &rev);
- if (rev < 5) {
- dprintk("hostbridge does not support speedstep\n");
- speedstep_chipset_dev = NULL;
- pci_dev_put(hostbridge);
- return 0;
- }
-
- pci_dev_put(hostbridge);
- return 2; /* 2-M */
- }
-
- return 0;
-}
-
-static unsigned int _speedstep_get(cpumask_t cpus)
-{
- unsigned int speed;
- cpumask_t cpus_allowed;
-
- cpus_allowed = current->cpus_allowed;
- set_cpus_allowed(current, cpus);
- speed = speedstep_get_processor_frequency(speedstep_processor);
- set_cpus_allowed(current, cpus_allowed);
- dprintk("detected %u kHz as current frequency\n", speed);
- return speed;
-}
-
-static unsigned int speedstep_get(unsigned int cpu)
-{
- return _speedstep_get(cpumask_of_cpu(cpu));
-}
-
-/**
- * speedstep_target - set a new CPUFreq policy
- * @policy: new policy
- * @target_freq: the target frequency
- * @relation: how that frequency relates to achieved frequency (CPUFREQ_RELATION_L or CPUFREQ_RELATION_H)
- *
- * Sets a new CPUFreq policy.
- */
-static int speedstep_target (struct cpufreq_policy *policy,
- unsigned int target_freq,
- unsigned int relation)
-{
- unsigned int newstate = 0;
- struct cpufreq_freqs freqs;
- cpumask_t cpus_allowed;
- int i;
-
- if (cpufreq_frequency_table_target(policy, &speedstep_freqs[0], target_freq, relation, &newstate))
- return -EINVAL;
-
- freqs.old = _speedstep_get(policy->cpus);
- freqs.new = speedstep_freqs[newstate].frequency;
- freqs.cpu = policy->cpu;
-
- dprintk("transiting from %u to %u kHz\n", freqs.old, freqs.new);
-
- /* no transition necessary */
- if (freqs.old == freqs.new)
- return 0;
-
- cpus_allowed = current->cpus_allowed;
-
- for_each_cpu_mask(i, policy->cpus) {
- freqs.cpu = i;
- cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE);
- }
-
- /* switch to physical CPU where state is to be changed */
- set_cpus_allowed(current, policy->cpus);
-
- speedstep_set_state(newstate);
-
- /* allow to be run on all CPUs */
- set_cpus_allowed(current, cpus_allowed);
-
- for_each_cpu_mask(i, policy->cpus) {
- freqs.cpu = i;
- cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE);
- }
-
- return 0;
-}
-
-
-/**
- * speedstep_verify - verifies a new CPUFreq policy
- * @policy: new policy
- *
- * Limit must be within speedstep_low_freq and speedstep_high_freq, with
- * at least one border included.
- */
-static int speedstep_verify (struct cpufreq_policy *policy)
-{
- return cpufreq_frequency_table_verify(policy, &speedstep_freqs[0]);
-}
-
-
-static int speedstep_cpu_init(struct cpufreq_policy *policy)
-{
- int result = 0;
- unsigned int speed;
- cpumask_t cpus_allowed;
-
- /* only run on CPU to be set, or on its sibling */
-#ifdef CONFIG_SMP
- policy->cpus = cpu_sibling_map[policy->cpu];
-#endif
-
- cpus_allowed = current->cpus_allowed;
- set_cpus_allowed(current, policy->cpus);
-
- /* detect low and high frequency */
- result = speedstep_get_freqs(speedstep_processor,
- &speedstep_freqs[SPEEDSTEP_LOW].frequency,
- &speedstep_freqs[SPEEDSTEP_HIGH].frequency,
- &speedstep_set_state);
- set_cpus_allowed(current, cpus_allowed);
- if (result)
- return result;
-
- /* get current speed setting */
- speed = _speedstep_get(policy->cpus);
- if (!speed)
- return -EIO;
-
- dprintk("currently at %s speed setting - %i MHz\n",
- (speed == speedstep_freqs[SPEEDSTEP_LOW].frequency) ? "low" : "high",
- (speed / 1000));
-
- /* cpuinfo and default policy values */
- policy->governor = CPUFREQ_DEFAULT_GOVERNOR;
- policy->cpuinfo.transition_latency = CPUFREQ_ETERNAL;
- policy->cur = speed;
-
- result = cpufreq_frequency_table_cpuinfo(policy, speedstep_freqs);
- if (result)
- return (result);
-
- cpufreq_frequency_table_get_attr(speedstep_freqs, policy->cpu);
-
- return 0;
-}
-
-
-static int speedstep_cpu_exit(struct cpufreq_policy *policy)
-{
- cpufreq_frequency_table_put_attr(policy->cpu);
- return 0;
-}
-
-static struct freq_attr* speedstep_attr[] = {
- &cpufreq_freq_attr_scaling_available_freqs,
- NULL,
-};
-
-
-static struct cpufreq_driver speedstep_driver = {
- .name = "speedstep-ich",
- .verify = speedstep_verify,
- .target = speedstep_target,
- .init = speedstep_cpu_init,
- .exit = speedstep_cpu_exit,
- .get = speedstep_get,
- .owner = THIS_MODULE,
- .attr = speedstep_attr,
-};
-
-
-/**
- * speedstep_init - initializes the SpeedStep CPUFreq driver
- *
- * Initializes the SpeedStep support. Returns -ENODEV on unsupported
- * devices, -EINVAL on problems during initiatization, and zero on
- * success.
- */
-static int __init speedstep_init(void)
-{
- /* detect processor */
- speedstep_processor = speedstep_detect_processor();
- if (!speedstep_processor) {
- dprintk("Intel(R) SpeedStep(TM) capable processor not found\n");
- return -ENODEV;
- }
-
- /* detect chipset */
- if (!speedstep_detect_chipset()) {
- dprintk("Intel(R) SpeedStep(TM) for this chipset not (yet) available.\n");
- return -ENODEV;
- }
-
- /* activate speedstep support */
- if (speedstep_activate()) {
- pci_dev_put(speedstep_chipset_dev);
- return -EINVAL;
- }
-
- return cpufreq_register_driver(&speedstep_driver);
-}
-
-
-/**
- * speedstep_exit - unregisters SpeedStep support
- *
- * Unregisters SpeedStep support.
- */
-static void __exit speedstep_exit(void)
-{
- pci_dev_put(speedstep_chipset_dev);
- cpufreq_unregister_driver(&speedstep_driver);
-}
-
-
-MODULE_AUTHOR ("Dave Jones <davej@codemonkey.org.uk>, Dominik Brodowski <linux@brodo.de>");
-MODULE_DESCRIPTION ("Speedstep driver for Intel mobile processors on chipsets with ICH-M southbridges.");
-MODULE_LICENSE ("GPL");
-
-module_init(speedstep_init);
-module_exit(speedstep_exit);
diff --git a/arch/i386/kernel/cpu/cpufreq/speedstep-lib.c b/arch/i386/kernel/cpu/cpufreq/speedstep-lib.c
deleted file mode 100644
index d368b3f5fce..00000000000
--- a/arch/i386/kernel/cpu/cpufreq/speedstep-lib.c
+++ /dev/null
@@ -1,385 +0,0 @@
-/*
- * (C) 2002 - 2003 Dominik Brodowski <linux@brodo.de>
- *
- * Licensed under the terms of the GNU GPL License version 2.
- *
- * Library for common functions for Intel SpeedStep v.1 and v.2 support
- *
- * BIG FAT DISCLAIMER: Work in progress code. Possibly *dangerous*
- */
-
-#include <linux/kernel.h>
-#include <linux/module.h>
-#include <linux/moduleparam.h>
-#include <linux/init.h>
-#include <linux/cpufreq.h>
-#include <linux/pci.h>
-#include <linux/slab.h>
-
-#include <asm/msr.h>
-#include "speedstep-lib.h"
-
-#define dprintk(msg...) cpufreq_debug_printk(CPUFREQ_DEBUG_DRIVER, "speedstep-lib", msg)
-
-#ifdef CONFIG_X86_SPEEDSTEP_RELAXED_CAP_CHECK
-static int relaxed_check = 0;
-#else
-#define relaxed_check 0
-#endif
-
-/*********************************************************************
- * GET PROCESSOR CORE SPEED IN KHZ *
- *********************************************************************/
-
-static unsigned int pentium3_get_frequency (unsigned int processor)
-{
- /* See table 14 of p3_ds.pdf and table 22 of 29834003.pdf */
- struct {
- unsigned int ratio; /* Frequency Multiplier (x10) */
- u8 bitmap; /* power on configuration bits
- [27, 25:22] (in MSR 0x2a) */
- } msr_decode_mult [] = {
- { 30, 0x01 },
- { 35, 0x05 },
- { 40, 0x02 },
- { 45, 0x06 },
- { 50, 0x00 },
- { 55, 0x04 },
- { 60, 0x0b },
- { 65, 0x0f },
- { 70, 0x09 },
- { 75, 0x0d },
- { 80, 0x0a },
- { 85, 0x26 },
- { 90, 0x20 },
- { 100, 0x2b },
- { 0, 0xff } /* error or unknown value */
- };
-
- /* PIII(-M) FSB settings: see table b1-b of 24547206.pdf */
- struct {
- unsigned int value; /* Front Side Bus speed in MHz */
- u8 bitmap; /* power on configuration bits [18: 19]
- (in MSR 0x2a) */
- } msr_decode_fsb [] = {
- { 66, 0x0 },
- { 100, 0x2 },
- { 133, 0x1 },
- { 0, 0xff}
- };
-
- u32 msr_lo, msr_tmp;
- int i = 0, j = 0;
-
- /* read MSR 0x2a - we only need the low 32 bits */
- rdmsr(MSR_IA32_EBL_CR_POWERON, msr_lo, msr_tmp);
- dprintk("P3 - MSR_IA32_EBL_CR_POWERON: 0x%x 0x%x\n", msr_lo, msr_tmp);
- msr_tmp = msr_lo;
-
- /* decode the FSB */
- msr_tmp &= 0x00c0000;
- msr_tmp >>= 18;
- while (msr_tmp != msr_decode_fsb[i].bitmap) {
- if (msr_decode_fsb[i].bitmap == 0xff)
- return 0;
- i++;
- }
-
- /* decode the multiplier */
- if (processor == SPEEDSTEP_PROCESSOR_PIII_C_EARLY) {
- dprintk("workaround for early PIIIs\n");
- msr_lo &= 0x03c00000;
- } else
- msr_lo &= 0x0bc00000;
- msr_lo >>= 22;
- while (msr_lo != msr_decode_mult[j].bitmap) {
- if (msr_decode_mult[j].bitmap == 0xff)
- return 0;
- j++;
- }
-
- dprintk("speed is %u\n", (msr_decode_mult[j].ratio * msr_decode_fsb[i].value * 100));
-
- return (msr_decode_mult[j].ratio * msr_decode_fsb[i].value * 100);
-}
-
-
-static unsigned int pentiumM_get_frequency(void)
-{
- u32 msr_lo, msr_tmp;
-
- rdmsr(MSR_IA32_EBL_CR_POWERON, msr_lo, msr_tmp);
- dprintk("PM - MSR_IA32_EBL_CR_POWERON: 0x%x 0x%x\n", msr_lo, msr_tmp);
-
- /* see table B-2 of 24547212.pdf */
- if (msr_lo & 0x00040000) {
- printk(KERN_DEBUG "speedstep-lib: PM - invalid FSB: 0x%x 0x%x\n", msr_lo, msr_tmp);
- return 0;
- }
-
- msr_tmp = (msr_lo >> 22) & 0x1f;
- dprintk("bits 22-26 are 0x%x, speed is %u\n", msr_tmp, (msr_tmp * 100 * 1000));
-
- return (msr_tmp * 100 * 1000);
-}
-
-
-static unsigned int pentium4_get_frequency(void)
-{
- struct cpuinfo_x86 *c = &boot_cpu_data;
- u32 msr_lo, msr_hi, mult;
- unsigned int fsb = 0;
-
- rdmsr(0x2c, msr_lo, msr_hi);
-
- dprintk("P4 - MSR_EBC_FREQUENCY_ID: 0x%x 0x%x\n", msr_lo, msr_hi);
-
- /* decode the FSB: see IA-32 Intel (C) Architecture Software
- * Developer's Manual, Volume 3: System Prgramming Guide,
- * revision #12 in Table B-1: MSRs in the Pentium 4 and
- * Intel Xeon Processors, on page B-4 and B-5.
- */
- if (c->x86_model < 2)
- fsb = 100 * 1000;
- else {
- u8 fsb_code = (msr_lo >> 16) & 0x7;
- switch (fsb_code) {
- case 0:
- fsb = 100 * 1000;
- break;
- case 1:
- fsb = 13333 * 10;
- break;
- case 2:
- fsb = 200 * 1000;
- break;
- }
- }
-
- if (!fsb)
- printk(KERN_DEBUG "speedstep-lib: couldn't detect FSB speed. Please send an e-mail to <linux@brodo.de>\n");
-
- /* Multiplier. */
- if (c->x86_model < 2)
- mult = msr_lo >> 27;
- else
- mult = msr_lo >> 24;
-
- dprintk("P4 - FSB %u kHz; Multiplier %u; Speed %u kHz\n", fsb, mult, (fsb * mult));
-
- return (fsb * mult);
-}
-
-
-unsigned int speedstep_get_processor_frequency(unsigned int processor)
-{
- switch (processor) {
- case SPEEDSTEP_PROCESSOR_PM:
- return pentiumM_get_frequency();
- case SPEEDSTEP_PROCESSOR_P4D:
- case SPEEDSTEP_PROCESSOR_P4M:
- return pentium4_get_frequency();
- case SPEEDSTEP_PROCESSOR_PIII_T:
- case SPEEDSTEP_PROCESSOR_PIII_C:
- case SPEEDSTEP_PROCESSOR_PIII_C_EARLY:
- return pentium3_get_frequency(processor);
- default:
- return 0;
- };
- return 0;
-}
-EXPORT_SYMBOL_GPL(speedstep_get_processor_frequency);
-
-
-/*********************************************************************
- * DETECT SPEEDSTEP-CAPABLE PROCESSOR *
- *********************************************************************/
-
-unsigned int speedstep_detect_processor (void)
-{
- struct cpuinfo_x86 *c = cpu_data;
- u32 ebx, msr_lo, msr_hi;
-
- dprintk("x86: %x, model: %x\n", c->x86, c->x86_model);
-
- if ((c->x86_vendor != X86_VENDOR_INTEL) ||
- ((c->x86 != 6) && (c->x86 != 0xF)))
- return 0;
-
- if (c->x86 == 0xF) {
- /* Intel Mobile Pentium 4-M
- * or Intel Mobile Pentium 4 with 533 MHz FSB */
- if (c->x86_model != 2)
- return 0;
-
- ebx = cpuid_ebx(0x00000001);
- ebx &= 0x000000FF;
-
- dprintk("ebx value is %x, x86_mask is %x\n", ebx, c->x86_mask);
-
- switch (c->x86_mask) {
- case 4:
- /*
- * B-stepping [M-P4-M]
- * sample has ebx = 0x0f, production has 0x0e.
- */
- if ((ebx == 0x0e) || (ebx == 0x0f))
- return SPEEDSTEP_PROCESSOR_P4M;
- break;
- case 7:
- /*
- * C-stepping [M-P4-M]
- * needs to have ebx=0x0e, else it's a celeron:
- * cf. 25130917.pdf / page 7, footnote 5 even
- * though 25072120.pdf / page 7 doesn't say
- * samples are only of B-stepping...
- */
- if (ebx == 0x0e)
- return SPEEDSTEP_PROCESSOR_P4M;
- break;
- case 9:
- /*
- * D-stepping [M-P4-M or M-P4/533]
- *
- * this is totally strange: CPUID 0x0F29 is
- * used by M-P4-M, M-P4/533 and(!) Celeron CPUs.
- * The latter need to be sorted out as they don't
- * support speedstep.
- * Celerons with CPUID 0x0F29 may have either
- * ebx=0x8 or 0xf -- 25130917.pdf doesn't say anything
- * specific.
- * M-P4-Ms may have either ebx=0xe or 0xf [see above]
- * M-P4/533 have either ebx=0xe or 0xf. [25317607.pdf]
- * also, M-P4M HTs have ebx=0x8, too
- * For now, they are distinguished by the model_id string
- */
- if ((ebx == 0x0e) || (strstr(c->x86_model_id,"Mobile Intel(R) Pentium(R) 4") != NULL))
- return SPEEDSTEP_PROCESSOR_P4M;
- break;
- default:
- break;
- }
- return 0;
- }
-
- switch (c->x86_model) {
- case 0x0B: /* Intel PIII [Tualatin] */
- /* cpuid_ebx(1) is 0x04 for desktop PIII,
- 0x06 for mobile PIII-M */
- ebx = cpuid_ebx(0x00000001);
- dprintk("ebx is %x\n", ebx);
-
- ebx &= 0x000000FF;
-
- if (ebx != 0x06)
- return 0;
-
- /* So far all PIII-M processors support SpeedStep. See
- * Intel's 24540640.pdf of June 2003
- */
-
- return SPEEDSTEP_PROCESSOR_PIII_T;
-
- case 0x08: /* Intel PIII [Coppermine] */
-
- /* all mobile PIII Coppermines have FSB 100 MHz
- * ==> sort out a few desktop PIIIs. */
- rdmsr(MSR_IA32_EBL_CR_POWERON, msr_lo, msr_hi);
- dprintk("Coppermine: MSR_IA32_EBL_CR_POWERON is 0x%x, 0x%x\n", msr_lo, msr_hi);
- msr_lo &= 0x00c0000;
- if (msr_lo != 0x0080000)
- return 0;
-
- /*
- * If the processor is a mobile version,
- * platform ID has bit 50 set
- * it has SpeedStep technology if either
- * bit 56 or 57 is set
- */
- rdmsr(MSR_IA32_PLATFORM_ID, msr_lo, msr_hi);
- dprintk("Coppermine: MSR_IA32_PLATFORM ID is 0x%x, 0x%x\n", msr_lo, msr_hi);
- if ((msr_hi & (1<<18)) && (relaxed_check ? 1 : (msr_hi & (3<<24)))) {
- if (c->x86_mask == 0x01) {
- dprintk("early PIII version\n");
- return SPEEDSTEP_PROCESSOR_PIII_C_EARLY;
- } else
- return SPEEDSTEP_PROCESSOR_PIII_C;
- }
-
- default:
- return 0;
- }
-}
-EXPORT_SYMBOL_GPL(speedstep_detect_processor);
-
-
-/*********************************************************************
- * DETECT SPEEDSTEP SPEEDS *
- *********************************************************************/
-
-unsigned int speedstep_get_freqs(unsigned int processor,
- unsigned int *low_speed,
- unsigned int *high_speed,
- void (*set_state) (unsigned int state))
-{
- unsigned int prev_speed;
- unsigned int ret = 0;
- unsigned long flags;
-
- if ((!processor) || (!low_speed) || (!high_speed) || (!set_state))
- return -EINVAL;
-
- dprintk("trying to determine both speeds\n");
-
- /* get current speed */
- prev_speed = speedstep_get_processor_frequency(processor);
- if (!prev_speed)
- return -EIO;
-
- dprintk("previous speed is %u\n", prev_speed);
-
- local_irq_save(flags);
-
- /* switch to low state */
- set_state(SPEEDSTEP_LOW);
- *low_speed = speedstep_get_processor_frequency(processor);
- if (!*low_speed) {
- ret = -EIO;
- goto out;
- }
-
- dprintk("low speed is %u\n", *low_speed);
-
- /* switch to high state */
- set_state(SPEEDSTEP_HIGH);
- *high_speed = speedstep_get_processor_frequency(processor);
- if (!*high_speed) {
- ret = -EIO;
- goto out;
- }
-
- dprintk("high speed is %u\n", *high_speed);
-
- if (*low_speed == *high_speed) {
- ret = -ENODEV;
- goto out;
- }
-
- /* switch to previous state, if necessary */
- if (*high_speed != prev_speed)
- set_state(SPEEDSTEP_LOW);
-
- out:
- local_irq_restore(flags);
- return (ret);
-}
-EXPORT_SYMBOL_GPL(speedstep_get_freqs);
-
-#ifdef CONFIG_X86_SPEEDSTEP_RELAXED_CAP_CHECK
-module_param(relaxed_check, int, 0444);
-MODULE_PARM_DESC(relaxed_check, "Don't do all checks for speedstep capability.");
-#endif
-
-MODULE_AUTHOR ("Dominik Brodowski <linux@brodo.de>");
-MODULE_DESCRIPTION ("Library for Intel SpeedStep 1 or 2 cpufreq drivers.");
-MODULE_LICENSE ("GPL");
diff --git a/arch/i386/kernel/cpu/cpufreq/speedstep-lib.h b/arch/i386/kernel/cpu/cpufreq/speedstep-lib.h
deleted file mode 100644
index 261a2c9b7f6..00000000000
--- a/arch/i386/kernel/cpu/cpufreq/speedstep-lib.h
+++ /dev/null
@@ -1,47 +0,0 @@
-/*
- * (C) 2002 - 2003 Dominik Brodowski <linux@brodo.de>
- *
- * Licensed under the terms of the GNU GPL License version 2.
- *
- * Library for common functions for Intel SpeedStep v.1 and v.2 support
- *
- * BIG FAT DISCLAIMER: Work in progress code. Possibly *dangerous*
- */
-
-
-
-/* processors */
-
-#define SPEEDSTEP_PROCESSOR_PIII_C_EARLY 0x00000001 /* Coppermine core */
-#define SPEEDSTEP_PROCESSOR_PIII_C 0x00000002 /* Coppermine core */
-#define SPEEDSTEP_PROCESSOR_PIII_T 0x00000003 /* Tualatin core */
-#define SPEEDSTEP_PROCESSOR_P4M 0x00000004 /* P4-M */
-
-/* the following processors are not speedstep-capable and are not auto-detected
- * in speedstep_detect_processor(). However, their speed can be detected using
- * the speedstep_get_processor_frequency() call. */
-#define SPEEDSTEP_PROCESSOR_PM 0xFFFFFF03 /* Pentium M */
-#define SPEEDSTEP_PROCESSOR_P4D 0xFFFFFF04 /* desktop P4 */
-
-/* speedstep states -- only two of them */
-
-#define SPEEDSTEP_HIGH 0x00000000
-#define SPEEDSTEP_LOW 0x00000001
-
-
-/* detect a speedstep-capable processor */
-extern unsigned int speedstep_detect_processor (void);
-
-/* detect the current speed (in khz) of the processor */
-extern unsigned int speedstep_get_processor_frequency(unsigned int processor);
-
-
-/* detect the low and high speeds of the processor. The callback
- * set_state"'s first argument is either SPEEDSTEP_HIGH or
- * SPEEDSTEP_LOW; the second argument is zero so that no
- * cpufreq_notify_transition calls are initiated.
- */
-extern unsigned int speedstep_get_freqs(unsigned int processor,
- unsigned int *low_speed,
- unsigned int *high_speed,
- void (*set_state) (unsigned int state));
diff --git a/arch/i386/kernel/cpu/cpufreq/speedstep-smi.c b/arch/i386/kernel/cpu/cpufreq/speedstep-smi.c
deleted file mode 100644
index 2718fb6f6ab..00000000000
--- a/arch/i386/kernel/cpu/cpufreq/speedstep-smi.c
+++ /dev/null
@@ -1,427 +0,0 @@
-/*
- * Intel SpeedStep SMI driver.
- *
- * (C) 2003 Hiroshi Miura <miura@da-cha.org>
- *
- * Licensed under the terms of the GNU GPL License version 2.
- *
- */
-
-
-/*********************************************************************
- * SPEEDSTEP - DEFINITIONS *
- *********************************************************************/
-
-#include <linux/kernel.h>
-#include <linux/module.h>
-#include <linux/moduleparam.h>
-#include <linux/init.h>
-#include <linux/cpufreq.h>
-#include <linux/pci.h>
-#include <linux/slab.h>
-#include <linux/delay.h>
-#include <asm/ist.h>
-
-#include "speedstep-lib.h"
-
-/* speedstep system management interface port/command.
- *
- * These parameters are got from IST-SMI BIOS call.
- * If user gives it, these are used.
- *
- */
-static int smi_port = 0;
-static int smi_cmd = 0;
-static unsigned int smi_sig = 0;
-
-/* info about the processor */
-static unsigned int speedstep_processor = 0;
-
-/*
- * There are only two frequency states for each processor. Values
- * are in kHz for the time being.
- */
-static struct cpufreq_frequency_table speedstep_freqs[] = {
- {SPEEDSTEP_HIGH, 0},
- {SPEEDSTEP_LOW, 0},
- {0, CPUFREQ_TABLE_END},
-};
-
-#define GET_SPEEDSTEP_OWNER 0
-#define GET_SPEEDSTEP_STATE 1
-#define SET_SPEEDSTEP_STATE 2
-#define GET_SPEEDSTEP_FREQS 4
-
-/* how often shall the SMI call be tried if it failed, e.g. because
- * of DMA activity going on? */
-#define SMI_TRIES 5
-
-#define dprintk(msg...) cpufreq_debug_printk(CPUFREQ_DEBUG_DRIVER, "speedstep-smi", msg)
-
-/**
- * speedstep_smi_ownership
- */
-static int speedstep_smi_ownership (void)
-{
- u32 command, result, magic;
- u32 function = GET_SPEEDSTEP_OWNER;
- unsigned char magic_data[] = "Copyright (c) 1999 Intel Corporation";
-
- command = (smi_sig & 0xffffff00) | (smi_cmd & 0xff);
- magic = virt_to_phys(magic_data);
-
- dprintk("trying to obtain ownership with command %x at port %x\n", command, smi_port);
-
- __asm__ __volatile__(
- "out %%al, (%%dx)\n"
- : "=D" (result)
- : "a" (command), "b" (function), "c" (0), "d" (smi_port), "D" (0), "S" (magic)
- );
-
- dprintk("result is %x\n", result);
-
- return result;
-}
-
-/**
- * speedstep_smi_get_freqs - get SpeedStep preferred & current freq.
- * @low: the low frequency value is placed here
- * @high: the high frequency value is placed here
- *
- * Only available on later SpeedStep-enabled systems, returns false results or
- * even hangs [cf. bugme.osdl.org # 1422] on earlier systems. Empirical testing
- * shows that the latter occurs if !(ist_info.event & 0xFFFF).
- */
-static int speedstep_smi_get_freqs (unsigned int *low, unsigned int *high)
-{
- u32 command, result = 0, edi, high_mhz, low_mhz;
- u32 state=0;
- u32 function = GET_SPEEDSTEP_FREQS;
-
- if (!(ist_info.event & 0xFFFF)) {
- dprintk("bug #1422 -- can't read freqs from BIOS\n");
- return -ENODEV;
- }
-
- command = (smi_sig & 0xffffff00) | (smi_cmd & 0xff);
-
- dprintk("trying to determine frequencies with command %x at port %x\n", command, smi_port);
-
- __asm__ __volatile__("movl $0, %%edi\n"
- "out %%al, (%%dx)\n"
- : "=a" (result), "=b" (high_mhz), "=c" (low_mhz), "=d" (state), "=D" (edi)
- : "a" (command), "b" (function), "c" (state), "d" (smi_port), "S" (0)
- );
-
- dprintk("result %x, low_freq %u, high_freq %u\n", result, low_mhz, high_mhz);
-
- /* abort if results are obviously incorrect... */
- if ((high_mhz + low_mhz) < 600)
- return -EINVAL;
-
- *high = high_mhz * 1000;
- *low = low_mhz * 1000;
-
- return result;
-}
-
-/**
- * speedstep_get_state - set the SpeedStep state
- * @state: processor frequency state (SPEEDSTEP_LOW or SPEEDSTEP_HIGH)
- *
- */
-static int speedstep_get_state (void)
-{
- u32 function=GET_SPEEDSTEP_STATE;
- u32 result, state, edi, command;
-
- command = (smi_sig & 0xffffff00) | (smi_cmd & 0xff);
-
- dprintk("trying to determine current setting with command %x at port %x\n", command, smi_port);
-
- __asm__ __volatile__("movl $0, %%edi\n"
- "out %%al, (%%dx)\n"
- : "=a" (result), "=b" (state), "=D" (edi)
- : "a" (command), "b" (function), "c" (0), "d" (smi_port), "S" (0)
- );
-
- dprintk("state is %x, result is %x\n", state, result);
-
- return (state & 1);
-}
-
-
-/**
- * speedstep_set_state - set the SpeedStep state
- * @state: new processor frequency state (SPEEDSTEP_LOW or SPEEDSTEP_HIGH)
- *
- */
-static void speedstep_set_state (unsigned int state)
-{
- unsigned int result = 0, command, new_state;
- unsigned long flags;
- unsigned int function=SET_SPEEDSTEP_STATE;
- unsigned int retry = 0;
-
- if (state > 0x1)
- return;
-
- /* Disable IRQs */
- local_irq_save(flags);
-
- command = (smi_sig & 0xffffff00) | (smi_cmd & 0xff);
-
- dprintk("trying to set frequency to state %u with command %x at port %x\n", state, command, smi_port);
-
- do {
- if (retry) {
- dprintk("retry %u, previous result %u, waiting...\n", retry, result);
- mdelay(retry * 50);
- }
- retry++;
- __asm__ __volatile__(
- "movl $0, %%edi\n"
- "out %%al, (%%dx)\n"
- : "=b" (new_state), "=D" (result)
- : "a" (command), "b" (function), "c" (state), "d" (smi_port), "S" (0)
- );
- } while ((new_state != state) && (retry <= SMI_TRIES));
-
- /* enable IRQs */
- local_irq_restore(flags);
-
- if (new_state == state) {
- dprintk("change to %u MHz succeeded after %u tries with result %u\n", (speedstep_freqs[new_state].frequency / 1000), retry, result);
- } else {
- printk(KERN_ERR "cpufreq: change failed with new_state %u and result %u\n", new_state, result);
- }
-
- return;
-}
-
-
-/**
- * speedstep_target - set a new CPUFreq policy
- * @policy: new policy
- * @target_freq: new freq
- * @relation:
- *
- * Sets a new CPUFreq policy/freq.
- */
-static int speedstep_target (struct cpufreq_policy *policy,
- unsigned int target_freq, unsigned int relation)
-{
- unsigned int newstate = 0;
- struct cpufreq_freqs freqs;
-
- if (cpufreq_frequency_table_target(policy, &speedstep_freqs[0], target_freq, relation, &newstate))
- return -EINVAL;
-
- freqs.old = speedstep_freqs[speedstep_get_state()].frequency;
- freqs.new = speedstep_freqs[newstate].frequency;
- freqs.cpu = 0; /* speedstep.c is UP only driver */
-
- if (freqs.old == freqs.new)
- return 0;
-
- cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE);
- speedstep_set_state(newstate);
- cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE);
-
- return 0;
-}
-
-
-/**
- * speedstep_verify - verifies a new CPUFreq policy
- * @policy: new policy
- *
- * Limit must be within speedstep_low_freq and speedstep_high_freq, with
- * at least one border included.
- */
-static int speedstep_verify (struct cpufreq_policy *policy)
-{
- return cpufreq_frequency_table_verify(policy, &speedstep_freqs[0]);
-}
-
-
-static int speedstep_cpu_init(struct cpufreq_policy *policy)
-{
- int result;
- unsigned int speed,state;
-
- /* capability check */
- if (policy->cpu != 0)
- return -ENODEV;
-
- result = speedstep_smi_ownership();
- if (result) {
- dprintk("fails in aquiring ownership of a SMI interface.\n");
- return -EINVAL;
- }
-
- /* detect low and high frequency */
- result = speedstep_smi_get_freqs(&speedstep_freqs[SPEEDSTEP_LOW].frequency,
- &speedstep_freqs[SPEEDSTEP_HIGH].frequency);
- if (result) {
- /* fall back to speedstep_lib.c dection mechanism: try both states out */
- dprintk("could not detect low and high frequencies by SMI call.\n");
- result = speedstep_get_freqs(speedstep_processor,
- &speedstep_freqs[SPEEDSTEP_LOW].frequency,
- &speedstep_freqs[SPEEDSTEP_HIGH].frequency,
- &speedstep_set_state);
-
- if (result) {
- dprintk("could not detect two different speeds -- aborting.\n");
- return result;
- } else
- dprintk("workaround worked.\n");
- }
-
- /* get current speed setting */
- state = speedstep_get_state();
- speed = speedstep_freqs[state].frequency;
-
- dprintk("currently at %s speed setting - %i MHz\n",
- (speed == speedstep_freqs[SPEEDSTEP_LOW].frequency) ? "low" : "high",
- (speed / 1000));
-
- /* cpuinfo and default policy values */
- policy->governor = CPUFREQ_DEFAULT_GOVERNOR;
- policy->cpuinfo.transition_latency = CPUFREQ_ETERNAL;
- policy->cur = speed;
-
- result = cpufreq_frequency_table_cpuinfo(policy, speedstep_freqs);
- if (result)
- return (result);
-
- cpufreq_frequency_table_get_attr(speedstep_freqs, policy->cpu);
-
- return 0;
-}
-
-static int speedstep_cpu_exit(struct cpufreq_policy *policy)
-{
- cpufreq_frequency_table_put_attr(policy->cpu);
- return 0;
-}
-
-static unsigned int speedstep_get(unsigned int cpu)
-{
- if (cpu)
- return -ENODEV;
- return speedstep_get_processor_frequency(speedstep_processor);
-}
-
-
-static int speedstep_resume(struct cpufreq_policy *policy)
-{
- int result = speedstep_smi_ownership();
-
- if (result)
- dprintk("fails in re-aquiring ownership of a SMI interface.\n");
-
- return result;
-}
-
-static struct freq_attr* speedstep_attr[] = {
- &cpufreq_freq_attr_scaling_available_freqs,
- NULL,
-};
-
-static struct cpufreq_driver speedstep_driver = {
- .name = "speedstep-smi",
- .verify = speedstep_verify,
- .target = speedstep_target,
- .init = speedstep_cpu_init,
- .exit = speedstep_cpu_exit,
- .get = speedstep_get,
- .resume = speedstep_resume,
- .owner = THIS_MODULE,
- .attr = speedstep_attr,
-};
-
-/**
- * speedstep_init - initializes the SpeedStep CPUFreq driver
- *
- * Initializes the SpeedStep support. Returns -ENODEV on unsupported
- * BIOS, -EINVAL on problems during initiatization, and zero on
- * success.
- */
-static int __init speedstep_init(void)
-{
- speedstep_processor = speedstep_detect_processor();
-
- switch (speedstep_processor) {
- case SPEEDSTEP_PROCESSOR_PIII_T:
- case SPEEDSTEP_PROCESSOR_PIII_C:
- case SPEEDSTEP_PROCESSOR_PIII_C_EARLY:
- break;
- case SPEEDSTEP_PROCESSOR_P4M:
- printk(KERN_INFO "speedstep-smi: you're trying to use this cpufreq driver on a Pentium 4-based CPU. Most likely it will not work.\n");
- break;
- default:
- speedstep_processor = 0;
- }
-
- if (!speedstep_processor) {
- dprintk ("No supported Intel CPU detected.\n");
- return -ENODEV;
- }
-
- dprintk("signature:0x%.8lx, command:0x%.8lx, event:0x%.8lx, perf_level:0x%.8lx.\n",
- ist_info.signature, ist_info.command, ist_info.event, ist_info.perf_level);
-
-
- /* Error if no IST-SMI BIOS or no PARM
- sig= 'ISGE' aka 'Intel Speedstep Gate E' */
- if ((ist_info.signature != 0x47534943) && (
- (smi_port == 0) || (smi_cmd == 0)))
- return -ENODEV;
-
- if (smi_sig == 1)
- smi_sig = 0x47534943;
- else
- smi_sig = ist_info.signature;
-
- /* setup smi_port from MODLULE_PARM or BIOS */
- if ((smi_port > 0xff) || (smi_port < 0)) {
- return -EINVAL;
- } else if (smi_port == 0) {
- smi_port = ist_info.command & 0xff;
- }
-
- if ((smi_cmd > 0xff) || (smi_cmd < 0)) {
- return -EINVAL;
- } else if (smi_cmd == 0) {
- smi_cmd = (ist_info.command >> 16) & 0xff;
- }
-
- return cpufreq_register_driver(&speedstep_driver);
-}
-
-
-/**
- * speedstep_exit - unregisters SpeedStep support
- *
- * Unregisters SpeedStep support.
- */
-static void __exit speedstep_exit(void)
-{
- cpufreq_unregister_driver(&speedstep_driver);
-}
-
-module_param(smi_port, int, 0444);
-module_param(smi_cmd, int, 0444);
-module_param(smi_sig, uint, 0444);
-
-MODULE_PARM_DESC(smi_port, "Override the BIOS-given IST port with this value -- Intel's default setting is 0xb2");
-MODULE_PARM_DESC(smi_cmd, "Override the BIOS-given IST command with this value -- Intel's default setting is 0x82");
-MODULE_PARM_DESC(smi_sig, "Set to 1 to fake the IST signature when using the SMI interface.");
-
-MODULE_AUTHOR ("Hiroshi Miura");
-MODULE_DESCRIPTION ("Speedstep driver for IST applet SMI interface.");
-MODULE_LICENSE ("GPL");
-
-module_init(speedstep_init);
-module_exit(speedstep_exit);
diff --git a/arch/i386/kernel/cpu/cyrix.c b/arch/i386/kernel/cpu/cyrix.c
deleted file mode 100644
index ff87cc22b32..00000000000
--- a/arch/i386/kernel/cpu/cyrix.c
+++ /dev/null
@@ -1,435 +0,0 @@
-#include <linux/init.h>
-#include <linux/bitops.h>
-#include <linux/delay.h>
-#include <linux/pci.h>
-#include <asm/dma.h>
-#include <asm/io.h>
-#include <asm/processor.h>
-#include <asm/timer.h>
-
-#include "cpu.h"
-
-/*
- * Read NSC/Cyrix DEVID registers (DIR) to get more detailed info. about the CPU
- */
-static void __init do_cyrix_devid(unsigned char *dir0, unsigned char *dir1)
-{
- unsigned char ccr2, ccr3;
- unsigned long flags;
-
- /* we test for DEVID by checking whether CCR3 is writable */
- local_irq_save(flags);
- ccr3 = getCx86(CX86_CCR3);
- setCx86(CX86_CCR3, ccr3 ^ 0x80);
- getCx86(0xc0); /* dummy to change bus */
-
- if (getCx86(CX86_CCR3) == ccr3) { /* no DEVID regs. */
- ccr2 = getCx86(CX86_CCR2);
- setCx86(CX86_CCR2, ccr2 ^ 0x04);
- getCx86(0xc0); /* dummy */
-
- if (getCx86(CX86_CCR2) == ccr2) /* old Cx486SLC/DLC */
- *dir0 = 0xfd;
- else { /* Cx486S A step */
- setCx86(CX86_CCR2, ccr2);
- *dir0 = 0xfe;
- }
- }
- else {
- setCx86(CX86_CCR3, ccr3); /* restore CCR3 */
-
- /* read DIR0 and DIR1 CPU registers */
- *dir0 = getCx86(CX86_DIR0);
- *dir1 = getCx86(CX86_DIR1);
- }
- local_irq_restore(flags);
-}
-
-/*
- * Cx86_dir0_msb is a HACK needed by check_cx686_cpuid/slop in bugs.h in
- * order to identify the Cyrix CPU model after we're out of setup.c
- *
- * Actually since bugs.h doesn't even reference this perhaps someone should
- * fix the documentation ???
- */
-static unsigned char Cx86_dir0_msb __initdata = 0;
-
-static char Cx86_model[][9] __initdata = {
- "Cx486", "Cx486", "5x86 ", "6x86", "MediaGX ", "6x86MX ",
- "M II ", "Unknown"
-};
-static char Cx486_name[][5] __initdata = {
- "SLC", "DLC", "SLC2", "DLC2", "SRx", "DRx",
- "SRx2", "DRx2"
-};
-static char Cx486S_name[][4] __initdata = {
- "S", "S2", "Se", "S2e"
-};
-static char Cx486D_name[][4] __initdata = {
- "DX", "DX2", "?", "?", "?", "DX4"
-};
-static char Cx86_cb[] __initdata = "?.5x Core/Bus Clock";
-static char cyrix_model_mult1[] __initdata = "12??43";
-static char cyrix_model_mult2[] __initdata = "12233445";
-
-/*
- * Reset the slow-loop (SLOP) bit on the 686(L) which is set by some old
- * BIOSes for compatibility with DOS games. This makes the udelay loop
- * work correctly, and improves performance.
- *
- * FIXME: our newer udelay uses the tsc. We don't need to frob with SLOP
- */
-
-extern void calibrate_delay(void) __init;
-
-static void __init check_cx686_slop(struct cpuinfo_x86 *c)
-{
- unsigned long flags;
-
- if (Cx86_dir0_msb == 3) {
- unsigned char ccr3, ccr5;
-
- local_irq_save(flags);
- ccr3 = getCx86(CX86_CCR3);
- setCx86(CX86_CCR3, (ccr3 & 0x0f) | 0x10); /* enable MAPEN */
- ccr5 = getCx86(CX86_CCR5);
- if (ccr5 & 2)
- setCx86(CX86_CCR5, ccr5 & 0xfd); /* reset SLOP */
- setCx86(CX86_CCR3, ccr3); /* disable MAPEN */
- local_irq_restore(flags);
-
- if (ccr5 & 2) { /* possible wrong calibration done */
- printk(KERN_INFO "Recalibrating delay loop with SLOP bit reset\n");
- calibrate_delay();
- c->loops_per_jiffy = loops_per_jiffy;
- }
- }
-}
-
-
-static void __init set_cx86_reorder(void)
-{
- u8 ccr3;
-
- printk(KERN_INFO "Enable Memory access reorder on Cyrix/NSC processor.\n");
- ccr3 = getCx86(CX86_CCR3);
- setCx86(CX86_CCR3, (ccr3 & 0x0f) | 0x10); /* enable MAPEN  */
-
- /* Load/Store Serialize to mem access disable (=reorder it)  */
- setCx86(CX86_PCR0, getCx86(CX86_PCR0) & ~0x80);
- /* set load/store serialize from 1GB to 4GB */
- ccr3 |= 0xe0;
- setCx86(CX86_CCR3, ccr3);
-}
-
-static void __init set_cx86_memwb(void)
-{
- u32 cr0;
-
- printk(KERN_INFO "Enable Memory-Write-back mode on Cyrix/NSC processor.\n");
-
- /* CCR2 bit 2: unlock NW bit */
- setCx86(CX86_CCR2, getCx86(CX86_CCR2) & ~0x04);
- /* set 'Not Write-through' */
- cr0 = 0x20000000;
- write_cr0(read_cr0() | cr0);
- /* CCR2 bit 2: lock NW bit and set WT1 */
- setCx86(CX86_CCR2, getCx86(CX86_CCR2) | 0x14 );
-}
-
-static void __init set_cx86_inc(void)
-{
- unsigned char ccr3;
-
- printk(KERN_INFO "Enable Incrementor on Cyrix/NSC processor.\n");
-
- ccr3 = getCx86(CX86_CCR3);
- setCx86(CX86_CCR3, (ccr3 & 0x0f) | 0x10); /* enable MAPEN  */
- /* PCR1 -- Performance Control */
- /* Incrementor on, whatever that is */
- setCx86(CX86_PCR1, getCx86(CX86_PCR1) | 0x02);
- /* PCR0 -- Performance Control */
- /* Incrementor Margin 10 */
- setCx86(CX86_PCR0, getCx86(CX86_PCR0) | 0x04);
- setCx86(CX86_CCR3, ccr3); /* disable MAPEN */
-}
-
-/*
- * Configure later MediaGX and/or Geode processor.
- */
-
-static void __init geode_configure(void)
-{
- unsigned long flags;
- u8 ccr3, ccr4;
- local_irq_save(flags);
-
- /* Suspend on halt power saving and enable #SUSP pin */
- setCx86(CX86_CCR2, getCx86(CX86_CCR2) | 0x88);
-
- ccr3 = getCx86(CX86_CCR3);
- setCx86(CX86_CCR3, (ccr3 & 0x0f) | 0x10); /* Enable */
-
- ccr4 = getCx86(CX86_CCR4);
- ccr4 |= 0x38; /* FPU fast, DTE cache, Mem bypass */
-
- setCx86(CX86_CCR3, ccr3);
-
- set_cx86_memwb();
- set_cx86_reorder();
- set_cx86_inc();
-
- local_irq_restore(flags);
-}
-
-
-#ifdef CONFIG_PCI
-static struct pci_device_id cyrix_55x0[] = {
- { PCI_DEVICE(PCI_VENDOR_ID_CYRIX, PCI_DEVICE_ID_CYRIX_5510) },
- { PCI_DEVICE(PCI_VENDOR_ID_CYRIX, PCI_DEVICE_ID_CYRIX_5520) },
- { },
-};
-#endif
-
-static void __init init_cyrix(struct cpuinfo_x86 *c)
-{
- unsigned char dir0, dir0_msn, dir0_lsn, dir1 = 0;
- char *buf = c->x86_model_id;
- const char *p = NULL;
-
- /* Bit 31 in normal CPUID used for nonstandard 3DNow ID;
- 3DNow is IDd by bit 31 in extended CPUID (1*32+31) anyway */
- clear_bit(0*32+31, c->x86_capability);
-
- /* Cyrix used bit 24 in extended (AMD) CPUID for Cyrix MMX extensions */
- if ( test_bit(1*32+24, c->x86_capability) ) {
- clear_bit(1*32+24, c->x86_capability);
- set_bit(X86_FEATURE_CXMMX, c->x86_capability);
- }
-
- do_cyrix_devid(&dir0, &dir1);
-
- check_cx686_slop(c);
-
- Cx86_dir0_msb = dir0_msn = dir0 >> 4; /* identifies CPU "family" */
- dir0_lsn = dir0 & 0xf; /* model or clock multiplier */
-
- /* common case step number/rev -- exceptions handled below */
- c->x86_model = (dir1 >> 4) + 1;
- c->x86_mask = dir1 & 0xf;
-
- /* Now cook; the original recipe is by Channing Corn, from Cyrix.
- * We do the same thing for each generation: we work out
- * the model, multiplier and stepping. Black magic included,
- * to make the silicon step/rev numbers match the printed ones.
- */
-
- switch (dir0_msn) {
- unsigned char tmp;
-
- case 0: /* Cx486SLC/DLC/SRx/DRx */
- p = Cx486_name[dir0_lsn & 7];
- break;
-
- case 1: /* Cx486S/DX/DX2/DX4 */
- p = (dir0_lsn & 8) ? Cx486D_name[dir0_lsn & 5]
- : Cx486S_name[dir0_lsn & 3];
- break;
-
- case 2: /* 5x86 */
- Cx86_cb[2] = cyrix_model_mult1[dir0_lsn & 5];
- p = Cx86_cb+2;
- break;
-
- case 3: /* 6x86/6x86L */
- Cx86_cb[1] = ' ';
- Cx86_cb[2] = cyrix_model_mult1[dir0_lsn & 5];
- if (dir1 > 0x21) { /* 686L */
- Cx86_cb[0] = 'L';
- p = Cx86_cb;
- (c->x86_model)++;
- } else /* 686 */
- p = Cx86_cb+1;
- /* Emulate MTRRs using Cyrix's ARRs. */
- set_bit(X86_FEATURE_CYRIX_ARR, c->x86_capability);
- /* 6x86's contain this bug */
- c->coma_bug = 1;
- break;
-
- case 4: /* MediaGX/GXm or Geode GXM/GXLV/GX1 */
-#ifdef CONFIG_PCI
- /* It isn't really a PCI quirk directly, but the cure is the
- same. The MediaGX has deep magic SMM stuff that handles the
- SB emulation. It thows away the fifo on disable_dma() which
- is wrong and ruins the audio.
-
- Bug2: VSA1 has a wrap bug so that using maximum sized DMA
- causes bad things. According to NatSemi VSA2 has another
- bug to do with 'hlt'. I've not seen any boards using VSA2
- and X doesn't seem to support it either so who cares 8).
- VSA1 we work around however.
- */
-
- printk(KERN_INFO "Working around Cyrix MediaGX virtual DMA bugs.\n");
- isa_dma_bridge_buggy = 2;
-#endif
- c->x86_cache_size=16; /* Yep 16K integrated cache thats it */
-
- /*
- * The 5510/5520 companion chips have a funky PIT.
- */
- if (pci_dev_present(cyrix_55x0))
- pit_latch_buggy = 1;
-
- /* GXm supports extended cpuid levels 'ala' AMD */
- if (c->cpuid_level == 2) {
- /* Enable cxMMX extensions (GX1 Datasheet 54) */
- setCx86(CX86_CCR7, getCx86(CX86_CCR7)|1);
-
- /* GXlv/GXm/GX1 */
- if((dir1 >= 0x50 && dir1 <= 0x54) || dir1 >= 0x63)
- geode_configure();
- get_model_name(c); /* get CPU marketing name */
- return;
- }
- else { /* MediaGX */
- Cx86_cb[2] = (dir0_lsn & 1) ? '3' : '4';
- p = Cx86_cb+2;
- c->x86_model = (dir1 & 0x20) ? 1 : 2;
- }
- break;
-
- case 5: /* 6x86MX/M II */
- if (dir1 > 7)
- {
- dir0_msn++; /* M II */
- /* Enable MMX extensions (App note 108) */
- setCx86(CX86_CCR7, getCx86(CX86_CCR7)|1);
- }
- else
- {
- c->coma_bug = 1; /* 6x86MX, it has the bug. */
- }
- tmp = (!(dir0_lsn & 7) || dir0_lsn & 1) ? 2 : 0;
- Cx86_cb[tmp] = cyrix_model_mult2[dir0_lsn & 7];
- p = Cx86_cb+tmp;
- if (((dir1 & 0x0f) > 4) || ((dir1 & 0xf0) == 0x20))
- (c->x86_model)++;
- /* Emulate MTRRs using Cyrix's ARRs. */
- set_bit(X86_FEATURE_CYRIX_ARR, c->x86_capability);
- break;
-
- case 0xf: /* Cyrix 486 without DEVID registers */
- switch (dir0_lsn) {
- case 0xd: /* either a 486SLC or DLC w/o DEVID */
- dir0_msn = 0;
- p = Cx486_name[(c->hard_math) ? 1 : 0];
- break;
-
- case 0xe: /* a 486S A step */
- dir0_msn = 0;
- p = Cx486S_name[0];
- break;
- }
- break;
-
- default: /* unknown (shouldn't happen, we know everyone ;-) */
- dir0_msn = 7;
- break;
- }
- strcpy(buf, Cx86_model[dir0_msn & 7]);
- if (p) strcat(buf, p);
- return;
-}
-
-/*
- * Cyrix CPUs without cpuid or with cpuid not yet enabled can be detected
- * by the fact that they preserve the flags across the division of 5/2.
- * PII and PPro exhibit this behavior too, but they have cpuid available.
- */
-
-/*
- * Perform the Cyrix 5/2 test. A Cyrix won't change
- * the flags, while other 486 chips will.
- */
-static inline int test_cyrix_52div(void)
-{
- unsigned int test;
-
- __asm__ __volatile__(
- "sahf\n\t" /* clear flags (%eax = 0x0005) */
- "div %b2\n\t" /* divide 5 by 2 */
- "lahf" /* store flags into %ah */
- : "=a" (test)
- : "0" (5), "q" (2)
- : "cc");
-
- /* AH is 0x02 on Cyrix after the divide.. */
- return (unsigned char) (test >> 8) == 0x02;
-}
-
-static void cyrix_identify(struct cpuinfo_x86 * c)
-{
- /* Detect Cyrix with disabled CPUID */
- if ( c->x86 == 4 && test_cyrix_52div() ) {
- unsigned char dir0, dir1;
-
- strcpy(c->x86_vendor_id, "CyrixInstead");
- c->x86_vendor = X86_VENDOR_CYRIX;
-
- /* Actually enable cpuid on the older cyrix */
-
- /* Retrieve CPU revisions */
-
- do_cyrix_devid(&dir0, &dir1);
-
- dir0>>=4;
-
- /* Check it is an affected model */
-
- if (dir0 == 5 || dir0 == 3)
- {
- unsigned char ccr3, ccr4;
- unsigned long flags;
- printk(KERN_INFO "Enabling CPUID on Cyrix processor.\n");
- local_irq_save(flags);
- ccr3 = getCx86(CX86_CCR3);
- setCx86(CX86_CCR3, (ccr3 & 0x0f) | 0x10); /* enable MAPEN */
- ccr4 = getCx86(CX86_CCR4);
- setCx86(CX86_CCR4, ccr4 | 0x80); /* enable cpuid */
- setCx86(CX86_CCR3, ccr3); /* disable MAPEN */
- local_irq_restore(flags);
- }
- }
- generic_identify(c);
-}
-
-static struct cpu_dev cyrix_cpu_dev __initdata = {
- .c_vendor = "Cyrix",
- .c_ident = { "CyrixInstead" },
- .c_init = init_cyrix,
- .c_identify = cyrix_identify,
-};
-
-int __init cyrix_init_cpu(void)
-{
- cpu_devs[X86_VENDOR_CYRIX] = &cyrix_cpu_dev;
- return 0;
-}
-
-//early_arch_initcall(cyrix_init_cpu);
-
-static struct cpu_dev nsc_cpu_dev __initdata = {
- .c_vendor = "NSC",
- .c_ident = { "Geode by NSC" },
- .c_init = init_cyrix,
- .c_identify = generic_identify,
-};
-
-int __init nsc_init_cpu(void)
-{
- cpu_devs[X86_VENDOR_NSC] = &nsc_cpu_dev;
- return 0;
-}
-
-//early_arch_initcall(nsc_init_cpu);
diff --git a/arch/i386/kernel/cpu/intel.c b/arch/i386/kernel/cpu/intel.c
deleted file mode 100644
index 43601de0f63..00000000000
--- a/arch/i386/kernel/cpu/intel.c
+++ /dev/null
@@ -1,268 +0,0 @@
-#include <linux/config.h>
-#include <linux/init.h>
-#include <linux/kernel.h>
-
-#include <linux/string.h>
-#include <linux/bitops.h>
-#include <linux/smp.h>
-#include <linux/thread_info.h>
-
-#include <asm/processor.h>
-#include <asm/msr.h>
-#include <asm/uaccess.h>
-
-#include "cpu.h"
-
-#ifdef CONFIG_X86_LOCAL_APIC
-#include <asm/mpspec.h>
-#include <asm/apic.h>
-#include <mach_apic.h>
-#endif
-
-extern int trap_init_f00f_bug(void);
-
-#ifdef CONFIG_X86_INTEL_USERCOPY
-/*
- * Alignment at which movsl is preferred for bulk memory copies.
- */
-struct movsl_mask movsl_mask __read_mostly;
-#endif
-
-void __devinit early_intel_workaround(struct cpuinfo_x86 *c)
-{
- if (c->x86_vendor != X86_VENDOR_INTEL)
- return;
- /* Netburst reports 64 bytes clflush size, but does IO in 128 bytes */
- if (c->x86 == 15 && c->x86_cache_alignment == 64)
- c->x86_cache_alignment = 128;
-}
-
-/*
- * Early probe support logic for ppro memory erratum #50
- *
- * This is called before we do cpu ident work
- */
-
-int __devinit ppro_with_ram_bug(void)
-{
- /* Uses data from early_cpu_detect now */
- if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL &&
- boot_cpu_data.x86 == 6 &&
- boot_cpu_data.x86_model == 1 &&
- boot_cpu_data.x86_mask < 8) {
- printk(KERN_INFO "Pentium Pro with Errata#50 detected. Taking evasive action.\n");
- return 1;
- }
- return 0;
-}
-
-
-/*
- * P4 Xeon errata 037 workaround.
- * Hardware prefetcher may cause stale data to be loaded into the cache.
- */
-static void __devinit Intel_errata_workarounds(struct cpuinfo_x86 *c)
-{
- unsigned long lo, hi;
-
- if ((c->x86 == 15) && (c->x86_model == 1) && (c->x86_mask == 1)) {
- rdmsr (MSR_IA32_MISC_ENABLE, lo, hi);
- if ((lo & (1<<9)) == 0) {
- printk (KERN_INFO "CPU: C0 stepping P4 Xeon detected.\n");
- printk (KERN_INFO "CPU: Disabling hardware prefetching (Errata 037)\n");
- lo |= (1<<9); /* Disable hw prefetching */
- wrmsr (MSR_IA32_MISC_ENABLE, lo, hi);
- }
- }
-}
-
-
-/*
- * find out the number of processor cores on the die
- */
-static int __devinit num_cpu_cores(struct cpuinfo_x86 *c)
-{
- unsigned int eax, ebx, ecx, edx;
-
- if (c->cpuid_level < 4)
- return 1;
-
- /* Intel has a non-standard dependency on %ecx for this CPUID level. */
- cpuid_count(4, 0, &eax, &ebx, &ecx, &edx);
- if (eax & 0x1f)
- return ((eax >> 26) + 1);
- else
- return 1;
-}
-
-static void __devinit init_intel(struct cpuinfo_x86 *c)
-{
- unsigned int l2 = 0;
- char *p = NULL;
-
-#ifdef CONFIG_X86_F00F_BUG
- /*
- * All current models of Pentium and Pentium with MMX technology CPUs
- * have the F0 0F bug, which lets nonprivileged users lock up the system.
- * Note that the workaround only should be initialized once...
- */
- c->f00f_bug = 0;
- if ( c->x86 == 5 ) {
- static int f00f_workaround_enabled = 0;
-
- c->f00f_bug = 1;
- if ( !f00f_workaround_enabled ) {
- trap_init_f00f_bug();
- printk(KERN_NOTICE "Intel Pentium with F0 0F bug - workaround enabled.\n");
- f00f_workaround_enabled = 1;
- }
- }
-#endif
-
- select_idle_routine(c);
- l2 = init_intel_cacheinfo(c);
-
- /* SEP CPUID bug: Pentium Pro reports SEP but doesn't have it until model 3 mask 3 */
- if ((c->x86<<8 | c->x86_model<<4 | c->x86_mask) < 0x633)
- clear_bit(X86_FEATURE_SEP, c->x86_capability);
-
- /* Names for the Pentium II/Celeron processors
- detectable only by also checking the cache size.
- Dixon is NOT a Celeron. */
- if (c->x86 == 6) {
- switch (c->x86_model) {
- case 5:
- if (c->x86_mask == 0) {
- if (l2 == 0)
- p = "Celeron (Covington)";
- else if (l2 == 256)
- p = "Mobile Pentium II (Dixon)";
- }
- break;
-
- case 6:
- if (l2 == 128)
- p = "Celeron (Mendocino)";
- else if (c->x86_mask == 0 || c->x86_mask == 5)
- p = "Celeron-A";
- break;
-
- case 8:
- if (l2 == 128)
- p = "Celeron (Coppermine)";
- break;
- }
- }
-
- if ( p )
- strcpy(c->x86_model_id, p);
-
- c->x86_num_cores = num_cpu_cores(c);
-
- detect_ht(c);
-
- /* Work around errata */
- Intel_errata_workarounds(c);
-
-#ifdef CONFIG_X86_INTEL_USERCOPY
- /*
- * Set up the preferred alignment for movsl bulk memory moves
- */
- switch (c->x86) {
- case 4: /* 486: untested */
- break;
- case 5: /* Old Pentia: untested */
- break;
- case 6: /* PII/PIII only like movsl with 8-byte alignment */
- movsl_mask.mask = 7;
- break;
- case 15: /* P4 is OK down to 8-byte alignment */
- movsl_mask.mask = 7;
- break;
- }
-#endif
-
- if (c->x86 == 15)
- set_bit(X86_FEATURE_P4, c->x86_capability);
- if (c->x86 == 6)
- set_bit(X86_FEATURE_P3, c->x86_capability);
-}
-
-
-static unsigned int intel_size_cache(struct cpuinfo_x86 * c, unsigned int size)
-{
- /* Intel PIII Tualatin. This comes in two flavours.
- * One has 256kb of cache, the other 512. We have no way
- * to determine which, so we use a boottime override
- * for the 512kb model, and assume 256 otherwise.
- */
- if ((c->x86 == 6) && (c->x86_model == 11) && (size == 0))
- size = 256;
- return size;
-}
-
-static struct cpu_dev intel_cpu_dev __devinitdata = {
- .c_vendor = "Intel",
- .c_ident = { "GenuineIntel" },
- .c_models = {
- { .vendor = X86_VENDOR_INTEL, .family = 4, .model_names =
- {
- [0] = "486 DX-25/33",
- [1] = "486 DX-50",
- [2] = "486 SX",
- [3] = "486 DX/2",
- [4] = "486 SL",
- [5] = "486 SX/2",
- [7] = "486 DX/2-WB",
- [8] = "486 DX/4",
- [9] = "486 DX/4-WB"
- }
- },
- { .vendor = X86_VENDOR_INTEL, .family = 5, .model_names =
- {
- [0] = "Pentium 60/66 A-step",
- [1] = "Pentium 60/66",
- [2] = "Pentium 75 - 200",
- [3] = "OverDrive PODP5V83",
- [4] = "Pentium MMX",
- [7] = "Mobile Pentium 75 - 200",
- [8] = "Mobile Pentium MMX"
- }
- },
- { .vendor = X86_VENDOR_INTEL, .family = 6, .model_names =
- {
- [0] = "Pentium Pro A-step",
- [1] = "Pentium Pro",
- [3] = "Pentium II (Klamath)",
- [4] = "Pentium II (Deschutes)",
- [5] = "Pentium II (Deschutes)",
- [6] = "Mobile Pentium II",
- [7] = "Pentium III (Katmai)",
- [8] = "Pentium III (Coppermine)",
- [10] = "Pentium III (Cascades)",
- [11] = "Pentium III (Tualatin)",
- }
- },
- { .vendor = X86_VENDOR_INTEL, .family = 15, .model_names =
- {
- [0] = "Pentium 4 (Unknown)",
- [1] = "Pentium 4 (Willamette)",
- [2] = "Pentium 4 (Northwood)",
- [4] = "Pentium 4 (Foster)",
- [5] = "Pentium 4 (Foster)",
- }
- },
- },
- .c_init = init_intel,
- .c_identify = generic_identify,
- .c_size_cache = intel_size_cache,
-};
-
-__init int intel_cpu_init(void)
-{
- cpu_devs[X86_VENDOR_INTEL] = &intel_cpu_dev;
- return 0;
-}
-
-// arch_initcall(intel_cpu_init);
-
diff --git a/arch/i386/kernel/cpu/intel_cacheinfo.c b/arch/i386/kernel/cpu/intel_cacheinfo.c
deleted file mode 100644
index 4dc42a189ae..00000000000
--- a/arch/i386/kernel/cpu/intel_cacheinfo.c
+++ /dev/null
@@ -1,626 +0,0 @@
-/*
- * Routines to indentify caches on Intel CPU.
- *
- * Changes:
- * Venkatesh Pallipadi : Adding cache identification through cpuid(4)
- * Ashok Raj <ashok.raj@intel.com>: Work with CPU hotplug infrastructure.
- */
-
-#include <linux/init.h>
-#include <linux/slab.h>
-#include <linux/device.h>
-#include <linux/compiler.h>
-#include <linux/cpu.h>
-#include <linux/sched.h>
-
-#include <asm/processor.h>
-#include <asm/smp.h>
-
-#define LVL_1_INST 1
-#define LVL_1_DATA 2
-#define LVL_2 3
-#define LVL_3 4
-#define LVL_TRACE 5
-
-struct _cache_table
-{
- unsigned char descriptor;
- char cache_type;
- short size;
-};
-
-/* all the cache descriptor types we care about (no TLB or trace cache entries) */
-static struct _cache_table cache_table[] __cpuinitdata =
-{
- { 0x06, LVL_1_INST, 8 }, /* 4-way set assoc, 32 byte line size */
- { 0x08, LVL_1_INST, 16 }, /* 4-way set assoc, 32 byte line size */
- { 0x0a, LVL_1_DATA, 8 }, /* 2 way set assoc, 32 byte line size */
- { 0x0c, LVL_1_DATA, 16 }, /* 4-way set assoc, 32 byte line size */
- { 0x22, LVL_3, 512 }, /* 4-way set assoc, sectored cache, 64 byte line size */
- { 0x23, LVL_3, 1024 }, /* 8-way set assoc, sectored cache, 64 byte line size */
- { 0x25, LVL_3, 2048 }, /* 8-way set assoc, sectored cache, 64 byte line size */
- { 0x29, LVL_3, 4096 }, /* 8-way set assoc, sectored cache, 64 byte line size */
- { 0x2c, LVL_1_DATA, 32 }, /* 8-way set assoc, 64 byte line size */
- { 0x30, LVL_1_INST, 32 }, /* 8-way set assoc, 64 byte line size */
- { 0x39, LVL_2, 128 }, /* 4-way set assoc, sectored cache, 64 byte line size */
- { 0x3b, LVL_2, 128 }, /* 2-way set assoc, sectored cache, 64 byte line size */
- { 0x3c, LVL_2, 256 }, /* 4-way set assoc, sectored cache, 64 byte line size */
- { 0x41, LVL_2, 128 }, /* 4-way set assoc, 32 byte line size */
- { 0x42, LVL_2, 256 }, /* 4-way set assoc, 32 byte line size */
- { 0x43, LVL_2, 512 }, /* 4-way set assoc, 32 byte line size */
- { 0x44, LVL_2, 1024 }, /* 4-way set assoc, 32 byte line size */
- { 0x45, LVL_2, 2048 }, /* 4-way set assoc, 32 byte line size */
- { 0x60, LVL_1_DATA, 16 }, /* 8-way set assoc, sectored cache, 64 byte line size */
- { 0x66, LVL_1_DATA, 8 }, /* 4-way set assoc, sectored cache, 64 byte line size */
- { 0x67, LVL_1_DATA, 16 }, /* 4-way set assoc, sectored cache, 64 byte line size */
- { 0x68, LVL_1_DATA, 32 }, /* 4-way set assoc, sectored cache, 64 byte line size */
- { 0x70, LVL_TRACE, 12 }, /* 8-way set assoc */
- { 0x71, LVL_TRACE, 16 }, /* 8-way set assoc */
- { 0x72, LVL_TRACE, 32 }, /* 8-way set assoc */
- { 0x78, LVL_2, 1024 }, /* 4-way set assoc, 64 byte line size */
- { 0x79, LVL_2, 128 }, /* 8-way set assoc, sectored cache, 64 byte line size */
- { 0x7a, LVL_2, 256 }, /* 8-way set assoc, sectored cache, 64 byte line size */
- { 0x7b, LVL_2, 512 }, /* 8-way set assoc, sectored cache, 64 byte line size */
- { 0x7c, LVL_2, 1024 }, /* 8-way set assoc, sectored cache, 64 byte line size */
- { 0x7d, LVL_2, 2048 }, /* 8-way set assoc, 64 byte line size */
- { 0x7f, LVL_2, 512 }, /* 2-way set assoc, 64 byte line size */
- { 0x82, LVL_2, 256 }, /* 8-way set assoc, 32 byte line size */
- { 0x83, LVL_2, 512 }, /* 8-way set assoc, 32 byte line size */
- { 0x84, LVL_2, 1024 }, /* 8-way set assoc, 32 byte line size */
- { 0x85, LVL_2, 2048 }, /* 8-way set assoc, 32 byte line size */
- { 0x86, LVL_2, 512 }, /* 4-way set assoc, 64 byte line size */
- { 0x87, LVL_2, 1024 }, /* 8-way set assoc, 64 byte line size */
- { 0x00, 0, 0}
-};
-
-
-enum _cache_type
-{
- CACHE_TYPE_NULL = 0,
- CACHE_TYPE_DATA = 1,
- CACHE_TYPE_INST = 2,
- CACHE_TYPE_UNIFIED = 3
-};
-
-union _cpuid4_leaf_eax {
- struct {
- enum _cache_type type:5;
- unsigned int level:3;
- unsigned int is_self_initializing:1;
- unsigned int is_fully_associative:1;
- unsigned int reserved:4;
- unsigned int num_threads_sharing:12;
- unsigned int num_cores_on_die:6;
- } split;
- u32 full;
-};
-
-union _cpuid4_leaf_ebx {
- struct {
- unsigned int coherency_line_size:12;
- unsigned int physical_line_partition:10;
- unsigned int ways_of_associativity:10;
- } split;
- u32 full;
-};
-
-union _cpuid4_leaf_ecx {
- struct {
- unsigned int number_of_sets:32;
- } split;
- u32 full;
-};
-
-struct _cpuid4_info {
- union _cpuid4_leaf_eax eax;
- union _cpuid4_leaf_ebx ebx;
- union _cpuid4_leaf_ecx ecx;
- unsigned long size;
- cpumask_t shared_cpu_map;
-};
-
-static unsigned short num_cache_leaves;
-
-static int __cpuinit cpuid4_cache_lookup(int index, struct _cpuid4_info *this_leaf)
-{
- unsigned int eax, ebx, ecx, edx;
- union _cpuid4_leaf_eax cache_eax;
-
- cpuid_count(4, index, &eax, &ebx, &ecx, &edx);
- cache_eax.full = eax;
- if (cache_eax.split.type == CACHE_TYPE_NULL)
- return -EIO; /* better error ? */
-
- this_leaf->eax.full = eax;
- this_leaf->ebx.full = ebx;
- this_leaf->ecx.full = ecx;
- this_leaf->size = (this_leaf->ecx.split.number_of_sets + 1) *
- (this_leaf->ebx.split.coherency_line_size + 1) *
- (this_leaf->ebx.split.physical_line_partition + 1) *
- (this_leaf->ebx.split.ways_of_associativity + 1);
- return 0;
-}
-
-static int __init find_num_cache_leaves(void)
-{
- unsigned int eax, ebx, ecx, edx;
- union _cpuid4_leaf_eax cache_eax;
- int i = -1;
-
- do {
- ++i;
- /* Do cpuid(4) loop to find out num_cache_leaves */
- cpuid_count(4, i, &eax, &ebx, &ecx, &edx);
- cache_eax.full = eax;
- } while (cache_eax.split.type != CACHE_TYPE_NULL);
- return i;
-}
-
-unsigned int __cpuinit init_intel_cacheinfo(struct cpuinfo_x86 *c)
-{
- unsigned int trace = 0, l1i = 0, l1d = 0, l2 = 0, l3 = 0; /* Cache sizes */
- unsigned int new_l1d = 0, new_l1i = 0; /* Cache sizes from cpuid(4) */
- unsigned int new_l2 = 0, new_l3 = 0, i; /* Cache sizes from cpuid(4) */
-
- if (c->cpuid_level > 4) {
- static int is_initialized;
-
- if (is_initialized == 0) {
- /* Init num_cache_leaves from boot CPU */
- num_cache_leaves = find_num_cache_leaves();
- is_initialized++;
- }
-
- /*
- * Whenever possible use cpuid(4), deterministic cache
- * parameters cpuid leaf to find the cache details
- */
- for (i = 0; i < num_cache_leaves; i++) {
- struct _cpuid4_info this_leaf;
-
- int retval;
-
- retval = cpuid4_cache_lookup(i, &this_leaf);
- if (retval >= 0) {
- switch(this_leaf.eax.split.level) {
- case 1:
- if (this_leaf.eax.split.type ==
- CACHE_TYPE_DATA)
- new_l1d = this_leaf.size/1024;
- else if (this_leaf.eax.split.type ==
- CACHE_TYPE_INST)
- new_l1i = this_leaf.size/1024;
- break;
- case 2:
- new_l2 = this_leaf.size/1024;
- break;
- case 3:
- new_l3 = this_leaf.size/1024;
- break;
- default:
- break;
- }
- }
- }
- }
- if (c->cpuid_level > 1) {
- /* supports eax=2 call */
- int i, j, n;
- int regs[4];
- unsigned char *dp = (unsigned char *)regs;
-
- /* Number of times to iterate */
- n = cpuid_eax(2) & 0xFF;
-
- for ( i = 0 ; i < n ; i++ ) {
- cpuid(2, &regs[0], &regs[1], &regs[2], &regs[3]);
-
- /* If bit 31 is set, this is an unknown format */
- for ( j = 0 ; j < 3 ; j++ ) {
- if ( regs[j] < 0 ) regs[j] = 0;
- }
-
- /* Byte 0 is level count, not a descriptor */
- for ( j = 1 ; j < 16 ; j++ ) {
- unsigned char des = dp[j];
- unsigned char k = 0;
-
- /* look up this descriptor in the table */
- while (cache_table[k].descriptor != 0)
- {
- if (cache_table[k].descriptor == des) {
- switch (cache_table[k].cache_type) {
- case LVL_1_INST:
- l1i += cache_table[k].size;
- break;
- case LVL_1_DATA:
- l1d += cache_table[k].size;
- break;
- case LVL_2:
- l2 += cache_table[k].size;
- break;
- case LVL_3:
- l3 += cache_table[k].size;
- break;
- case LVL_TRACE:
- trace += cache_table[k].size;
- break;
- }
-
- break;
- }
-
- k++;
- }
- }
- }
-
- if (new_l1d)
- l1d = new_l1d;
-
- if (new_l1i)
- l1i = new_l1i;
-
- if (new_l2)
- l2 = new_l2;
-
- if (new_l3)
- l3 = new_l3;
-
- if ( trace )
- printk (KERN_INFO "CPU: Trace cache: %dK uops", trace);
- else if ( l1i )
- printk (KERN_INFO "CPU: L1 I cache: %dK", l1i);
- if ( l1d )
- printk(", L1 D cache: %dK\n", l1d);
- else
- printk("\n");
- if ( l2 )
- printk(KERN_INFO "CPU: L2 cache: %dK\n", l2);
- if ( l3 )
- printk(KERN_INFO "CPU: L3 cache: %dK\n", l3);
-
- c->x86_cache_size = l3 ? l3 : (l2 ? l2 : (l1i+l1d));
- }
-
- return l2;
-}
-
-/* pointer to _cpuid4_info array (for each cache leaf) */
-static struct _cpuid4_info *cpuid4_info[NR_CPUS];
-#define CPUID4_INFO_IDX(x,y) (&((cpuid4_info[x])[y]))
-
-#ifdef CONFIG_SMP
-static void __cpuinit cache_shared_cpu_map_setup(unsigned int cpu, int index)
-{
- struct _cpuid4_info *this_leaf;
- unsigned long num_threads_sharing;
-#ifdef CONFIG_X86_HT
- struct cpuinfo_x86 *c = cpu_data + cpu;
-#endif
-
- this_leaf = CPUID4_INFO_IDX(cpu, index);
- num_threads_sharing = 1 + this_leaf->eax.split.num_threads_sharing;
-
- if (num_threads_sharing == 1)
- cpu_set(cpu, this_leaf->shared_cpu_map);
-#ifdef CONFIG_X86_HT
- else if (num_threads_sharing == smp_num_siblings)
- this_leaf->shared_cpu_map = cpu_sibling_map[cpu];
- else if (num_threads_sharing == (c->x86_num_cores * smp_num_siblings))
- this_leaf->shared_cpu_map = cpu_core_map[cpu];
- else
- printk(KERN_DEBUG "Number of CPUs sharing cache didn't match "
- "any known set of CPUs\n");
-#endif
-}
-#else
-static void __init cache_shared_cpu_map_setup(unsigned int cpu, int index) {}
-#endif
-
-static void free_cache_attributes(unsigned int cpu)
-{
- kfree(cpuid4_info[cpu]);
- cpuid4_info[cpu] = NULL;
-}
-
-static int __cpuinit detect_cache_attributes(unsigned int cpu)
-{
- struct _cpuid4_info *this_leaf;
- unsigned long j;
- int retval;
- cpumask_t oldmask;
-
- if (num_cache_leaves == 0)
- return -ENOENT;
-
- cpuid4_info[cpu] = kmalloc(
- sizeof(struct _cpuid4_info) * num_cache_leaves, GFP_KERNEL);
- if (unlikely(cpuid4_info[cpu] == NULL))
- return -ENOMEM;
- memset(cpuid4_info[cpu], 0,
- sizeof(struct _cpuid4_info) * num_cache_leaves);
-
- oldmask = current->cpus_allowed;
- retval = set_cpus_allowed(current, cpumask_of_cpu(cpu));
- if (retval)
- goto out;
-
- /* Do cpuid and store the results */
- retval = 0;
- for (j = 0; j < num_cache_leaves; j++) {
- this_leaf = CPUID4_INFO_IDX(cpu, j);
- retval = cpuid4_cache_lookup(j, this_leaf);
- if (unlikely(retval < 0))
- break;
- cache_shared_cpu_map_setup(cpu, j);
- }
- set_cpus_allowed(current, oldmask);
-
-out:
- if (retval)
- free_cache_attributes(cpu);
- return retval;
-}
-
-#ifdef CONFIG_SYSFS
-
-#include <linux/kobject.h>
-#include <linux/sysfs.h>
-
-extern struct sysdev_class cpu_sysdev_class; /* from drivers/base/cpu.c */
-
-/* pointer to kobject for cpuX/cache */
-static struct kobject * cache_kobject[NR_CPUS];
-
-struct _index_kobject {
- struct kobject kobj;
- unsigned int cpu;
- unsigned short index;
-};
-
-/* pointer to array of kobjects for cpuX/cache/indexY */
-static struct _index_kobject *index_kobject[NR_CPUS];
-#define INDEX_KOBJECT_PTR(x,y) (&((index_kobject[x])[y]))
-
-#define show_one_plus(file_name, object, val) \
-static ssize_t show_##file_name \
- (struct _cpuid4_info *this_leaf, char *buf) \
-{ \
- return sprintf (buf, "%lu\n", (unsigned long)this_leaf->object + val); \
-}
-
-show_one_plus(level, eax.split.level, 0);
-show_one_plus(coherency_line_size, ebx.split.coherency_line_size, 1);
-show_one_plus(physical_line_partition, ebx.split.physical_line_partition, 1);
-show_one_plus(ways_of_associativity, ebx.split.ways_of_associativity, 1);
-show_one_plus(number_of_sets, ecx.split.number_of_sets, 1);
-
-static ssize_t show_size(struct _cpuid4_info *this_leaf, char *buf)
-{
- return sprintf (buf, "%luK\n", this_leaf->size / 1024);
-}
-
-static ssize_t show_shared_cpu_map(struct _cpuid4_info *this_leaf, char *buf)
-{
- char mask_str[NR_CPUS];
- cpumask_scnprintf(mask_str, NR_CPUS, this_leaf->shared_cpu_map);
- return sprintf(buf, "%s\n", mask_str);
-}
-
-static ssize_t show_type(struct _cpuid4_info *this_leaf, char *buf) {
- switch(this_leaf->eax.split.type) {
- case CACHE_TYPE_DATA:
- return sprintf(buf, "Data\n");
- break;
- case CACHE_TYPE_INST:
- return sprintf(buf, "Instruction\n");
- break;
- case CACHE_TYPE_UNIFIED:
- return sprintf(buf, "Unified\n");
- break;
- default:
- return sprintf(buf, "Unknown\n");
- break;
- }
-}
-
-struct _cache_attr {
- struct attribute attr;
- ssize_t (*show)(struct _cpuid4_info *, char *);
- ssize_t (*store)(struct _cpuid4_info *, const char *, size_t count);
-};
-
-#define define_one_ro(_name) \
-static struct _cache_attr _name = \
- __ATTR(_name, 0444, show_##_name, NULL)
-
-define_one_ro(level);
-define_one_ro(type);
-define_one_ro(coherency_line_size);
-define_one_ro(physical_line_partition);
-define_one_ro(ways_of_associativity);
-define_one_ro(number_of_sets);
-define_one_ro(size);
-define_one_ro(shared_cpu_map);
-
-static struct attribute * default_attrs[] = {
- &type.attr,
- &level.attr,
- &coherency_line_size.attr,
- &physical_line_partition.attr,
- &ways_of_associativity.attr,
- &number_of_sets.attr,
- &size.attr,
- &shared_cpu_map.attr,
- NULL
-};
-
-#define to_object(k) container_of(k, struct _index_kobject, kobj)
-#define to_attr(a) container_of(a, struct _cache_attr, attr)
-
-static ssize_t show(struct kobject * kobj, struct attribute * attr, char * buf)
-{
- struct _cache_attr *fattr = to_attr(attr);
- struct _index_kobject *this_leaf = to_object(kobj);
- ssize_t ret;
-
- ret = fattr->show ?
- fattr->show(CPUID4_INFO_IDX(this_leaf->cpu, this_leaf->index),
- buf) :
- 0;
- return ret;
-}
-
-static ssize_t store(struct kobject * kobj, struct attribute * attr,
- const char * buf, size_t count)
-{
- return 0;
-}
-
-static struct sysfs_ops sysfs_ops = {
- .show = show,
- .store = store,
-};
-
-static struct kobj_type ktype_cache = {
- .sysfs_ops = &sysfs_ops,
- .default_attrs = default_attrs,
-};
-
-static struct kobj_type ktype_percpu_entry = {
- .sysfs_ops = &sysfs_ops,
-};
-
-static void cpuid4_cache_sysfs_exit(unsigned int cpu)
-{
- kfree(cache_kobject[cpu]);
- kfree(index_kobject[cpu]);
- cache_kobject[cpu] = NULL;
- index_kobject[cpu] = NULL;
- free_cache_attributes(cpu);
-}
-
-static int __cpuinit cpuid4_cache_sysfs_init(unsigned int cpu)
-{
-
- if (num_cache_leaves == 0)
- return -ENOENT;
-
- detect_cache_attributes(cpu);
- if (cpuid4_info[cpu] == NULL)
- return -ENOENT;
-
- /* Allocate all required memory */
- cache_kobject[cpu] = kmalloc(sizeof(struct kobject), GFP_KERNEL);
- if (unlikely(cache_kobject[cpu] == NULL))
- goto err_out;
- memset(cache_kobject[cpu], 0, sizeof(struct kobject));
-
- index_kobject[cpu] = kmalloc(
- sizeof(struct _index_kobject ) * num_cache_leaves, GFP_KERNEL);
- if (unlikely(index_kobject[cpu] == NULL))
- goto err_out;
- memset(index_kobject[cpu], 0,
- sizeof(struct _index_kobject) * num_cache_leaves);
-
- return 0;
-
-err_out:
- cpuid4_cache_sysfs_exit(cpu);
- return -ENOMEM;
-}
-
-/* Add/Remove cache interface for CPU device */
-static int __cpuinit cache_add_dev(struct sys_device * sys_dev)
-{
- unsigned int cpu = sys_dev->id;
- unsigned long i, j;
- struct _index_kobject *this_object;
- int retval = 0;
-
- retval = cpuid4_cache_sysfs_init(cpu);
- if (unlikely(retval < 0))
- return retval;
-
- cache_kobject[cpu]->parent = &sys_dev->kobj;
- kobject_set_name(cache_kobject[cpu], "%s", "cache");
- cache_kobject[cpu]->ktype = &ktype_percpu_entry;
- retval = kobject_register(cache_kobject[cpu]);
-
- for (i = 0; i < num_cache_leaves; i++) {
- this_object = INDEX_KOBJECT_PTR(cpu,i);
- this_object->cpu = cpu;
- this_object->index = i;
- this_object->kobj.parent = cache_kobject[cpu];
- kobject_set_name(&(this_object->kobj), "index%1lu", i);
- this_object->kobj.ktype = &ktype_cache;
- retval = kobject_register(&(this_object->kobj));
- if (unlikely(retval)) {
- for (j = 0; j < i; j++) {
- kobject_unregister(
- &(INDEX_KOBJECT_PTR(cpu,j)->kobj));
- }
- kobject_unregister(cache_kobject[cpu]);
- cpuid4_cache_sysfs_exit(cpu);
- break;
- }
- }
- return retval;
-}
-
-static void __cpuexit cache_remove_dev(struct sys_device * sys_dev)
-{
- unsigned int cpu = sys_dev->id;
- unsigned long i;
-
- for (i = 0; i < num_cache_leaves; i++)
- kobject_unregister(&(INDEX_KOBJECT_PTR(cpu,i)->kobj));
- kobject_unregister(cache_kobject[cpu]);
- cpuid4_cache_sysfs_exit(cpu);
- return;
-}
-
-static int __cpuinit cacheinfo_cpu_callback(struct notifier_block *nfb,
- unsigned long action, void *hcpu)
-{
- unsigned int cpu = (unsigned long)hcpu;
- struct sys_device *sys_dev;
-
- sys_dev = get_cpu_sysdev(cpu);
- switch (action) {
- case CPU_ONLINE:
- cache_add_dev(sys_dev);
- break;
- case CPU_DEAD:
- cache_remove_dev(sys_dev);
- break;
- }
- return NOTIFY_OK;
-}
-
-static struct notifier_block cacheinfo_cpu_notifier =
-{
- .notifier_call = cacheinfo_cpu_callback,
-};
-
-static int __cpuinit cache_sysfs_init(void)
-{
- int i;
-
- if (num_cache_leaves == 0)
- return 0;
-
- register_cpu_notifier(&cacheinfo_cpu_notifier);
-
- for_each_online_cpu(i) {
- cacheinfo_cpu_callback(&cacheinfo_cpu_notifier, CPU_ONLINE,
- (void *)(long)i);
- }
-
- return 0;
-}
-
-device_initcall(cache_sysfs_init);
-
-#endif
diff --git a/arch/i386/kernel/cpu/mcheck/Makefile b/arch/i386/kernel/cpu/mcheck/Makefile
deleted file mode 100644
index 30808f3d671..00000000000
--- a/arch/i386/kernel/cpu/mcheck/Makefile
+++ /dev/null
@@ -1,2 +0,0 @@
-obj-y = mce.o k7.o p4.o p5.o p6.o winchip.o
-obj-$(CONFIG_X86_MCE_NONFATAL) += non-fatal.o
diff --git a/arch/i386/kernel/cpu/mcheck/k7.c b/arch/i386/kernel/cpu/mcheck/k7.c
deleted file mode 100644
index fc5d5215e23..00000000000
--- a/arch/i386/kernel/cpu/mcheck/k7.c
+++ /dev/null
@@ -1,96 +0,0 @@
-/*
- * Athlon/Hammer specific Machine Check Exception Reporting
- * (C) Copyright 2002 Dave Jones <davej@codemonkey.org.uk>
- */
-
-#include <linux/init.h>
-#include <linux/types.h>
-#include <linux/kernel.h>
-#include <linux/config.h>
-#include <linux/interrupt.h>
-#include <linux/smp.h>
-
-#include <asm/processor.h>
-#include <asm/system.h>
-#include <asm/msr.h>
-
-#include "mce.h"
-
-/* Machine Check Handler For AMD Athlon/Duron */
-static fastcall void k7_machine_check(struct pt_regs * regs, long error_code)
-{
- int recover=1;
- u32 alow, ahigh, high, low;
- u32 mcgstl, mcgsth;
- int i;
-
- rdmsr (MSR_IA32_MCG_STATUS, mcgstl, mcgsth);
- if (mcgstl & (1<<0)) /* Recoverable ? */
- recover=0;
-
- printk (KERN_EMERG "CPU %d: Machine Check Exception: %08x%08x\n",
- smp_processor_id(), mcgsth, mcgstl);
-
- for (i=1; i<nr_mce_banks; i++) {
- rdmsr (MSR_IA32_MC0_STATUS+i*4,low, high);
- if (high&(1<<31)) {
- if (high & (1<<29))
- recover |= 1;
- if (high & (1<<25))
- recover |= 2;
- printk (KERN_EMERG "Bank %d: %08x%08x", i, high, low);
- high &= ~(1<<31);
- if (high & (1<<27)) {
- rdmsr (MSR_IA32_MC0_MISC+i*4, alow, ahigh);
- printk ("[%08x%08x]", ahigh, alow);
- }
- if (high & (1<<26)) {
- rdmsr (MSR_IA32_MC0_ADDR+i*4, alow, ahigh);
- printk (" at %08x%08x", ahigh, alow);
- }
- printk ("\n");
- /* Clear it */
- wrmsr (MSR_IA32_MC0_STATUS+i*4, 0UL, 0UL);
- /* Serialize */
- wmb();
- add_taint(TAINT_MACHINE_CHECK);
- }
- }
-
- if (recover&2)
- panic ("CPU context corrupt");
- if (recover&1)
- panic ("Unable to continue");
- printk (KERN_EMERG "Attempting to continue.\n");
- mcgstl &= ~(1<<2);
- wrmsr (MSR_IA32_MCG_STATUS,mcgstl, mcgsth);
-}
-
-
-/* AMD K7 machine check is Intel like */
-void amd_mcheck_init(struct cpuinfo_x86 *c)
-{
- u32 l, h;
- int i;
-
- machine_check_vector = k7_machine_check;
- wmb();
-
- printk (KERN_INFO "Intel machine check architecture supported.\n");
- rdmsr (MSR_IA32_MCG_CAP, l, h);
- if (l & (1<<8)) /* Control register present ? */
- wrmsr (MSR_IA32_MCG_CTL, 0xffffffff, 0xffffffff);
- nr_mce_banks = l & 0xff;
-
- /* Clear status for MC index 0 separately, we don't touch CTL,
- * as some Athlons cause spurious MCEs when its enabled. */
- wrmsr (MSR_IA32_MC0_STATUS, 0x0, 0x0);
- for (i=1; i<nr_mce_banks; i++) {
- wrmsr (MSR_IA32_MC0_CTL+4*i, 0xffffffff, 0xffffffff);
- wrmsr (MSR_IA32_MC0_STATUS+4*i, 0x0, 0x0);
- }
-
- set_in_cr4 (X86_CR4_MCE);
- printk (KERN_INFO "Intel machine check reporting enabled on CPU#%d.\n",
- smp_processor_id());
-}
diff --git a/arch/i386/kernel/cpu/mcheck/mce.c b/arch/i386/kernel/cpu/mcheck/mce.c
deleted file mode 100644
index 6170af3c271..00000000000
--- a/arch/i386/kernel/cpu/mcheck/mce.c
+++ /dev/null
@@ -1,77 +0,0 @@
-/*
- * mce.c - x86 Machine Check Exception Reporting
- * (c) 2002 Alan Cox <alan@redhat.com>, Dave Jones <davej@codemonkey.org.uk>
- */
-
-#include <linux/init.h>
-#include <linux/types.h>
-#include <linux/kernel.h>
-#include <linux/config.h>
-#include <linux/module.h>
-#include <linux/smp.h>
-#include <linux/thread_info.h>
-
-#include <asm/processor.h>
-#include <asm/system.h>
-
-#include "mce.h"
-
-int mce_disabled = 0;
-int nr_mce_banks;
-
-EXPORT_SYMBOL_GPL(nr_mce_banks); /* non-fatal.o */
-
-/* Handle unconfigured int18 (should never happen) */
-static fastcall void unexpected_machine_check(struct pt_regs * regs, long error_code)
-{
- printk(KERN_ERR "CPU#%d: Unexpected int18 (Machine Check).\n", smp_processor_id());
-}
-
-/* Call the installed machine check handler for this CPU setup. */
-void fastcall (*machine_check_vector)(struct pt_regs *, long error_code) = unexpected_machine_check;
-
-/* This has to be run for each processor */
-void mcheck_init(struct cpuinfo_x86 *c)
-{
- if (mce_disabled==1)
- return;
-
- switch (c->x86_vendor) {
- case X86_VENDOR_AMD:
- if (c->x86==6 || c->x86==15)
- amd_mcheck_init(c);
- break;
-
- case X86_VENDOR_INTEL:
- if (c->x86==5)
- intel_p5_mcheck_init(c);
- if (c->x86==6)
- intel_p6_mcheck_init(c);
- if (c->x86==15)
- intel_p4_mcheck_init(c);
- break;
-
- case X86_VENDOR_CENTAUR:
- if (c->x86==5)
- winchip_mcheck_init(c);
- break;
-
- default:
- break;
- }
-}
-
-static int __init mcheck_disable(char *str)
-{
- mce_disabled = 1;
- return 0;
-}
-
-static int __init mcheck_enable(char *str)
-{
- mce_disabled = -1;
- return 0;
-}
-
-__setup("nomce", mcheck_disable);
-__setup("mce", mcheck_enable);
diff --git a/arch/i386/kernel/cpu/mcheck/mce.h b/arch/i386/kernel/cpu/mcheck/mce.h
deleted file mode 100644
index dc2416dfef1..00000000000
--- a/arch/i386/kernel/cpu/mcheck/mce.h
+++ /dev/null
@@ -1,14 +0,0 @@
-#include <linux/init.h>
-
-void amd_mcheck_init(struct cpuinfo_x86 *c);
-void intel_p4_mcheck_init(struct cpuinfo_x86 *c);
-void intel_p5_mcheck_init(struct cpuinfo_x86 *c);
-void intel_p6_mcheck_init(struct cpuinfo_x86 *c);
-void winchip_mcheck_init(struct cpuinfo_x86 *c);
-
-/* Call the installed machine check handler for this CPU setup. */
-extern fastcall void (*machine_check_vector)(struct pt_regs *, long error_code);
-
-extern int mce_disabled __initdata;
-extern int nr_mce_banks;
-
diff --git a/arch/i386/kernel/cpu/mcheck/non-fatal.c b/arch/i386/kernel/cpu/mcheck/non-fatal.c
deleted file mode 100644
index 82dffe0d495..00000000000
--- a/arch/i386/kernel/cpu/mcheck/non-fatal.c
+++ /dev/null
@@ -1,92 +0,0 @@
-/*
- * Non Fatal Machine Check Exception Reporting
- *
- * (C) Copyright 2002 Dave Jones. <davej@codemonkey.org.uk>
- *
- * This file contains routines to check for non-fatal MCEs every 15s
- *
- */
-
-#include <linux/init.h>
-#include <linux/types.h>
-#include <linux/kernel.h>
-#include <linux/jiffies.h>
-#include <linux/config.h>
-#include <linux/workqueue.h>
-#include <linux/interrupt.h>
-#include <linux/smp.h>
-#include <linux/module.h>
-
-#include <asm/processor.h>
-#include <asm/system.h>
-#include <asm/msr.h>
-
-#include "mce.h"
-
-static int firstbank;
-
-#define MCE_RATE 15*HZ /* timer rate is 15s */
-
-static void mce_checkregs (void *info)
-{
- u32 low, high;
- int i;
-
- for (i=firstbank; i<nr_mce_banks; i++) {
- rdmsr (MSR_IA32_MC0_STATUS+i*4, low, high);
-
- if (high & (1<<31)) {
- printk(KERN_INFO "MCE: The hardware reports a non "
- "fatal, correctable incident occurred on "
- "CPU %d.\n",
- smp_processor_id());
- printk (KERN_INFO "Bank %d: %08x%08x\n", i, high, low);
-
- /* Scrub the error so we don't pick it up in MCE_RATE seconds time. */
- wrmsr (MSR_IA32_MC0_STATUS+i*4, 0UL, 0UL);
-
- /* Serialize */
- wmb();
- add_taint(TAINT_MACHINE_CHECK);
- }
- }
-}
-
-static void mce_work_fn(void *data);
-static DECLARE_WORK(mce_work, mce_work_fn, NULL);
-
-static void mce_work_fn(void *data)
-{
- on_each_cpu(mce_checkregs, NULL, 1, 1);
- schedule_delayed_work(&mce_work, MCE_RATE);
-}
-
-static int __init init_nonfatal_mce_checker(void)
-{
- struct cpuinfo_x86 *c = &boot_cpu_data;
-
- /* Check for MCE support */
- if (!cpu_has(c, X86_FEATURE_MCE))
- return -ENODEV;
-
- /* Check for PPro style MCA */
- if (!cpu_has(c, X86_FEATURE_MCA))
- return -ENODEV;
-
- /* Some Athlons misbehave when we frob bank 0 */
- if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD &&
- boot_cpu_data.x86 == 6)
- firstbank = 1;
- else
- firstbank = 0;
-
- /*
- * Check for non-fatal errors every MCE_RATE s
- */
- schedule_delayed_work(&mce_work, MCE_RATE);
- printk(KERN_INFO "Machine check exception polling timer started.\n");
- return 0;
-}
-module_init(init_nonfatal_mce_checker);
-
-MODULE_LICENSE("GPL");
diff --git a/arch/i386/kernel/cpu/mcheck/p4.c b/arch/i386/kernel/cpu/mcheck/p4.c
deleted file mode 100644
index fd2c459a31e..00000000000
--- a/arch/i386/kernel/cpu/mcheck/p4.c
+++ /dev/null
@@ -1,270 +0,0 @@
-/*
- * P4 specific Machine Check Exception Reporting
- */
-
-#include <linux/init.h>
-#include <linux/types.h>
-#include <linux/kernel.h>
-#include <linux/config.h>
-#include <linux/interrupt.h>
-#include <linux/smp.h>
-
-#include <asm/processor.h>
-#include <asm/system.h>
-#include <asm/msr.h>
-#include <asm/apic.h>
-
-#include "mce.h"
-
-/* as supported by the P4/Xeon family */
-struct intel_mce_extended_msrs {
- u32 eax;
- u32 ebx;
- u32 ecx;
- u32 edx;
- u32 esi;
- u32 edi;
- u32 ebp;
- u32 esp;
- u32 eflags;
- u32 eip;
- /* u32 *reserved[]; */
-};
-
-static int mce_num_extended_msrs = 0;
-
-
-#ifdef CONFIG_X86_MCE_P4THERMAL
-static void unexpected_thermal_interrupt(struct pt_regs *regs)
-{
- printk(KERN_ERR "CPU%d: Unexpected LVT TMR interrupt!\n",
- smp_processor_id());
- add_taint(TAINT_MACHINE_CHECK);
-}
-
-/* P4/Xeon Thermal transition interrupt handler */
-static void intel_thermal_interrupt(struct pt_regs *regs)
-{
- u32 l, h;
- unsigned int cpu = smp_processor_id();
- static unsigned long next[NR_CPUS];
-
- ack_APIC_irq();
-
- if (time_after(next[cpu], jiffies))
- return;
-
- next[cpu] = jiffies + HZ*5;
- rdmsr(MSR_IA32_THERM_STATUS, l, h);
- if (l & 0x1) {
- printk(KERN_EMERG "CPU%d: Temperature above threshold\n", cpu);
- printk(KERN_EMERG "CPU%d: Running in modulated clock mode\n",
- cpu);
- add_taint(TAINT_MACHINE_CHECK);
- } else {
- printk(KERN_INFO "CPU%d: Temperature/speed normal\n", cpu);
- }
-}
-
-/* Thermal interrupt handler for this CPU setup */
-static void (*vendor_thermal_interrupt)(struct pt_regs *regs) = unexpected_thermal_interrupt;
-
-fastcall void smp_thermal_interrupt(struct pt_regs *regs)
-{
- irq_enter();
- vendor_thermal_interrupt(regs);
- irq_exit();
-}
-
-/* P4/Xeon Thermal regulation detect and init */
-static void intel_init_thermal(struct cpuinfo_x86 *c)
-{
- u32 l, h;
- unsigned int cpu = smp_processor_id();
-
- /* Thermal monitoring */
- if (!cpu_has(c, X86_FEATURE_ACPI))
- return; /* -ENODEV */
-
- /* Clock modulation */
- if (!cpu_has(c, X86_FEATURE_ACC))
- return; /* -ENODEV */
-
- /* first check if its enabled already, in which case there might
- * be some SMM goo which handles it, so we can't even put a handler
- * since it might be delivered via SMI already -zwanem.
- */
- rdmsr (MSR_IA32_MISC_ENABLE, l, h);
- h = apic_read(APIC_LVTTHMR);
- if ((l & (1<<3)) && (h & APIC_DM_SMI)) {
- printk(KERN_DEBUG "CPU%d: Thermal monitoring handled by SMI\n",
- cpu);
- return; /* -EBUSY */
- }
-
- /* check whether a vector already exists, temporarily masked? */
- if (h & APIC_VECTOR_MASK) {
- printk(KERN_DEBUG "CPU%d: Thermal LVT vector (%#x) already "
- "installed\n",
- cpu, (h & APIC_VECTOR_MASK));
- return; /* -EBUSY */
- }
-
- /* The temperature transition interrupt handler setup */
- h = THERMAL_APIC_VECTOR; /* our delivery vector */
- h |= (APIC_DM_FIXED | APIC_LVT_MASKED); /* we'll mask till we're ready */
- apic_write_around(APIC_LVTTHMR, h);
-
- rdmsr (MSR_IA32_THERM_INTERRUPT, l, h);
- wrmsr (MSR_IA32_THERM_INTERRUPT, l | 0x03 , h);
-
- /* ok we're good to go... */
- vendor_thermal_interrupt = intel_thermal_interrupt;
-
- rdmsr (MSR_IA32_MISC_ENABLE, l, h);
- wrmsr (MSR_IA32_MISC_ENABLE, l | (1<<3), h);
-
- l = apic_read (APIC_LVTTHMR);
- apic_write_around (APIC_LVTTHMR, l & ~APIC_LVT_MASKED);
- printk (KERN_INFO "CPU%d: Thermal monitoring enabled\n", cpu);
- return;
-}
-#endif /* CONFIG_X86_MCE_P4THERMAL */
-
-
-/* P4/Xeon Extended MCE MSR retrieval, return 0 if unsupported */
-static inline int intel_get_extended_msrs(struct intel_mce_extended_msrs *r)
-{
- u32 h;
-
- if (mce_num_extended_msrs == 0)
- goto done;
-
- rdmsr (MSR_IA32_MCG_EAX, r->eax, h);
- rdmsr (MSR_IA32_MCG_EBX, r->ebx, h);
- rdmsr (MSR_IA32_MCG_ECX, r->ecx, h);
- rdmsr (MSR_IA32_MCG_EDX, r->edx, h);
- rdmsr (MSR_IA32_MCG_ESI, r->esi, h);
- rdmsr (MSR_IA32_MCG_EDI, r->edi, h);
- rdmsr (MSR_IA32_MCG_EBP, r->ebp, h);
- rdmsr (MSR_IA32_MCG_ESP, r->esp, h);
- rdmsr (MSR_IA32_MCG_EFLAGS, r->eflags, h);
- rdmsr (MSR_IA32_MCG_EIP, r->eip, h);
-
- /* can we rely on kmalloc to do a dynamic
- * allocation for the reserved registers?
- */
-done:
- return mce_num_extended_msrs;
-}
-
-static fastcall void intel_machine_check(struct pt_regs * regs, long error_code)
-{
- int recover=1;
- u32 alow, ahigh, high, low;
- u32 mcgstl, mcgsth;
- int i;
- struct intel_mce_extended_msrs dbg;
-
- rdmsr (MSR_IA32_MCG_STATUS, mcgstl, mcgsth);
- if (mcgstl & (1<<0)) /* Recoverable ? */
- recover=0;
-
- printk (KERN_EMERG "CPU %d: Machine Check Exception: %08x%08x\n",
- smp_processor_id(), mcgsth, mcgstl);
-
- if (intel_get_extended_msrs(&dbg)) {
- printk (KERN_DEBUG "CPU %d: EIP: %08x EFLAGS: %08x\n",
- smp_processor_id(), dbg.eip, dbg.eflags);
- printk (KERN_DEBUG "\teax: %08x ebx: %08x ecx: %08x edx: %08x\n",
- dbg.eax, dbg.ebx, dbg.ecx, dbg.edx);
- printk (KERN_DEBUG "\tesi: %08x edi: %08x ebp: %08x esp: %08x\n",
- dbg.esi, dbg.edi, dbg.ebp, dbg.esp);
- }
-
- for (i=0; i<nr_mce_banks; i++) {
- rdmsr (MSR_IA32_MC0_STATUS+i*4,low, high);
- if (high & (1<<31)) {
- if (high & (1<<29))
- recover |= 1;
- if (high & (1<<25))
- recover |= 2;
- printk (KERN_EMERG "Bank %d: %08x%08x", i, high, low);
- high &= ~(1<<31);
- if (high & (1<<27)) {
- rdmsr (MSR_IA32_MC0_MISC+i*4, alow, ahigh);
- printk ("[%08x%08x]", ahigh, alow);
- }
- if (high & (1<<26)) {
- rdmsr (MSR_IA32_MC0_ADDR+i*4, alow, ahigh);
- printk (" at %08x%08x", ahigh, alow);
- }
- printk ("\n");
- }
- }
-
- if (recover & 2)
- panic ("CPU context corrupt");
- if (recover & 1)
- panic ("Unable to continue");
-
- printk(KERN_EMERG "Attempting to continue.\n");
- /*
- * Do not clear the MSR_IA32_MCi_STATUS if the error is not
- * recoverable/continuable.This will allow BIOS to look at the MSRs
- * for errors if the OS could not log the error.
- */
- for (i=0; i<nr_mce_banks; i++) {
- u32 msr;
- msr = MSR_IA32_MC0_STATUS+i*4;
- rdmsr (msr, low, high);
- if (high&(1<<31)) {
- /* Clear it */
- wrmsr(msr, 0UL, 0UL);
- /* Serialize */
- wmb();
- add_taint(TAINT_MACHINE_CHECK);
- }
- }
- mcgstl &= ~(1<<2);
- wrmsr (MSR_IA32_MCG_STATUS,mcgstl, mcgsth);
-}
-
-
-void intel_p4_mcheck_init(struct cpuinfo_x86 *c)
-{
- u32 l, h;
- int i;
-
- machine_check_vector = intel_machine_check;
- wmb();
-
- printk (KERN_INFO "Intel machine check architecture supported.\n");
- rdmsr (MSR_IA32_MCG_CAP, l, h);
- if (l & (1<<8)) /* Control register present ? */
- wrmsr (MSR_IA32_MCG_CTL, 0xffffffff, 0xffffffff);
- nr_mce_banks = l & 0xff;
-
- for (i=0; i<nr_mce_banks; i++) {
- wrmsr (MSR_IA32_MC0_CTL+4*i, 0xffffffff, 0xffffffff);
- wrmsr (MSR_IA32_MC0_STATUS+4*i, 0x0, 0x0);
- }
-
- set_in_cr4 (X86_CR4_MCE);
- printk (KERN_INFO "Intel machine check reporting enabled on CPU#%d.\n",
- smp_processor_id());
-
- /* Check for P4/Xeon extended MCE MSRs */
- rdmsr (MSR_IA32_MCG_CAP, l, h);
- if (l & (1<<9)) {/* MCG_EXT_P */
- mce_num_extended_msrs = (l >> 16) & 0xff;
- printk (KERN_INFO "CPU%d: Intel P4/Xeon Extended MCE MSRs (%d)"
- " available\n",
- smp_processor_id(), mce_num_extended_msrs);
-
-#ifdef CONFIG_X86_MCE_P4THERMAL
- /* Check for P4/Xeon Thermal monitor */
- intel_init_thermal(c);
-#endif
- }
-}
diff --git a/arch/i386/kernel/cpu/mcheck/p5.c b/arch/i386/kernel/cpu/mcheck/p5.c
deleted file mode 100644
index 94bc43d950c..00000000000
--- a/arch/i386/kernel/cpu/mcheck/p5.c
+++ /dev/null
@@ -1,53 +0,0 @@
-/*
- * P5 specific Machine Check Exception Reporting
- * (C) Copyright 2002 Alan Cox <alan@redhat.com>
- */
-
-#include <linux/init.h>
-#include <linux/types.h>
-#include <linux/kernel.h>
-#include <linux/interrupt.h>
-#include <linux/smp.h>
-
-#include <asm/processor.h>
-#include <asm/system.h>
-#include <asm/msr.h>
-
-#include "mce.h"
-
-/* Machine check handler for Pentium class Intel */
-static fastcall void pentium_machine_check(struct pt_regs * regs, long error_code)
-{
- u32 loaddr, hi, lotype;
- rdmsr(MSR_IA32_P5_MC_ADDR, loaddr, hi);
- rdmsr(MSR_IA32_P5_MC_TYPE, lotype, hi);
- printk(KERN_EMERG "CPU#%d: Machine Check Exception: 0x%8X (type 0x%8X).\n", smp_processor_id(), loaddr, lotype);
- if(lotype&(1<<5))
- printk(KERN_EMERG "CPU#%d: Possible thermal failure (CPU on fire ?).\n", smp_processor_id());
- add_taint(TAINT_MACHINE_CHECK);
-}
-
-/* Set up machine check reporting for processors with Intel style MCE */
-void intel_p5_mcheck_init(struct cpuinfo_x86 *c)
-{
- u32 l, h;
-
- /*Check for MCE support */
- if( !cpu_has(c, X86_FEATURE_MCE) )
- return;
-
- /* Default P5 to off as its often misconnected */
- if(mce_disabled != -1)
- return;
- machine_check_vector = pentium_machine_check;
- wmb();
-
- /* Read registers before enabling */
- rdmsr(MSR_IA32_P5_MC_ADDR, l, h);
- rdmsr(MSR_IA32_P5_MC_TYPE, l, h);
- printk(KERN_INFO "Intel old style machine check architecture supported.\n");
-
- /* Enable MCE */
- set_in_cr4(X86_CR4_MCE);
- printk(KERN_INFO "Intel old style machine check reporting enabled on CPU#%d.\n", smp_processor_id());
-}
diff --git a/arch/i386/kernel/cpu/mcheck/p6.c b/arch/i386/kernel/cpu/mcheck/p6.c
deleted file mode 100644
index deeae42ce19..00000000000
--- a/arch/i386/kernel/cpu/mcheck/p6.c
+++ /dev/null
@@ -1,119 +0,0 @@
-/*
- * P6 specific Machine Check Exception Reporting
- * (C) Copyright 2002 Alan Cox <alan@redhat.com>
- */
-
-#include <linux/init.h>
-#include <linux/types.h>
-#include <linux/kernel.h>
-#include <linux/interrupt.h>
-#include <linux/smp.h>
-
-#include <asm/processor.h>
-#include <asm/system.h>
-#include <asm/msr.h>
-
-#include "mce.h"
-
-/* Machine Check Handler For PII/PIII */
-static fastcall void intel_machine_check(struct pt_regs * regs, long error_code)
-{
- int recover=1;
- u32 alow, ahigh, high, low;
- u32 mcgstl, mcgsth;
- int i;
-
- rdmsr (MSR_IA32_MCG_STATUS, mcgstl, mcgsth);
- if (mcgstl & (1<<0)) /* Recoverable ? */
- recover=0;
-
- printk (KERN_EMERG "CPU %d: Machine Check Exception: %08x%08x\n",
- smp_processor_id(), mcgsth, mcgstl);
-
- for (i=0; i<nr_mce_banks; i++) {
- rdmsr (MSR_IA32_MC0_STATUS+i*4,low, high);
- if (high & (1<<31)) {
- if (high & (1<<29))
- recover |= 1;
- if (high & (1<<25))
- recover |= 2;
- printk (KERN_EMERG "Bank %d: %08x%08x", i, high, low);
- high &= ~(1<<31);
- if (high & (1<<27)) {
- rdmsr (MSR_IA32_MC0_MISC+i*4, alow, ahigh);
- printk ("[%08x%08x]", ahigh, alow);
- }
- if (high & (1<<26)) {
- rdmsr (MSR_IA32_MC0_ADDR+i*4, alow, ahigh);
- printk (" at %08x%08x", ahigh, alow);
- }
- printk ("\n");
- }
- }
-
- if (recover & 2)
- panic ("CPU context corrupt");
- if (recover & 1)
- panic ("Unable to continue");
-
- printk (KERN_EMERG "Attempting to continue.\n");
- /*
- * Do not clear the MSR_IA32_MCi_STATUS if the error is not
- * recoverable/continuable.This will allow BIOS to look at the MSRs
- * for errors if the OS could not log the error.
- */
- for (i=0; i<nr_mce_banks; i++) {
- unsigned int msr;
- msr = MSR_IA32_MC0_STATUS+i*4;
- rdmsr (msr,low, high);
- if (high & (1<<31)) {
- /* Clear it */
- wrmsr (msr, 0UL, 0UL);
- /* Serialize */
- wmb();
- add_taint(TAINT_MACHINE_CHECK);
- }
- }
- mcgstl &= ~(1<<2);
- wrmsr (MSR_IA32_MCG_STATUS,mcgstl, mcgsth);
-}
-
-/* Set up machine check reporting for processors with Intel style MCE */
-void intel_p6_mcheck_init(struct cpuinfo_x86 *c)
-{
- u32 l, h;
- int i;
-
- /* Check for MCE support */
- if (!cpu_has(c, X86_FEATURE_MCE))
- return;
-
- /* Check for PPro style MCA */
- if (!cpu_has(c, X86_FEATURE_MCA))
- return;
-
- /* Ok machine check is available */
- machine_check_vector = intel_machine_check;
- wmb();
-
- printk (KERN_INFO "Intel machine check architecture supported.\n");
- rdmsr (MSR_IA32_MCG_CAP, l, h);
- if (l & (1<<8)) /* Control register present ? */
- wrmsr(MSR_IA32_MCG_CTL, 0xffffffff, 0xffffffff);
- nr_mce_banks = l & 0xff;
-
- /*
- * Following the example in IA-32 SDM Vol 3:
- * - MC0_CTL should not be written
- * - Status registers on all banks should be cleared on reset
- */
- for (i=1; i<nr_mce_banks; i++)
- wrmsr (MSR_IA32_MC0_CTL+4*i, 0xffffffff, 0xffffffff);
-
- for (i=0; i<nr_mce_banks; i++)
- wrmsr (MSR_IA32_MC0_STATUS+4*i, 0x0, 0x0);
-
- set_in_cr4 (X86_CR4_MCE);
- printk (KERN_INFO "Intel machine check reporting enabled on CPU#%d.\n",
- smp_processor_id());
-}
diff --git a/arch/i386/kernel/cpu/mcheck/winchip.c b/arch/i386/kernel/cpu/mcheck/winchip.c
deleted file mode 100644
index 9e424b6c293..00000000000
--- a/arch/i386/kernel/cpu/mcheck/winchip.c
+++ /dev/null
@@ -1,36 +0,0 @@
-/*
- * IDT Winchip specific Machine Check Exception Reporting
- * (C) Copyright 2002 Alan Cox <alan@redhat.com>
- */
-
-#include <linux/init.h>
-#include <linux/types.h>
-#include <linux/kernel.h>
-#include <linux/interrupt.h>
-
-#include <asm/processor.h>
-#include <asm/system.h>
-#include <asm/msr.h>
-
-#include "mce.h"
-
-/* Machine check handler for WinChip C6 */
-static fastcall void winchip_machine_check(struct pt_regs * regs, long error_code)
-{
- printk(KERN_EMERG "CPU0: Machine Check Exception.\n");
- add_taint(TAINT_MACHINE_CHECK);
-}
-
-/* Set up machine check reporting on the Winchip C6 series */
-void winchip_mcheck_init(struct cpuinfo_x86 *c)
-{
- u32 lo, hi;
- machine_check_vector = winchip_machine_check;
- wmb();
- rdmsr(MSR_IDT_FCR1, lo, hi);
- lo|= (1<<2); /* Enable EIERRINT (int 18 MCE) */
- lo&= ~(1<<4); /* Enable MCE */
- wrmsr(MSR_IDT_FCR1, lo, hi);
- set_in_cr4(X86_CR4_MCE);
- printk(KERN_INFO "Winchip machine check reporting enabled on CPU#0.\n");
-}
diff --git a/arch/i386/kernel/cpu/mtrr/Makefile b/arch/i386/kernel/cpu/mtrr/Makefile
deleted file mode 100644
index a25b701ab84..00000000000
--- a/arch/i386/kernel/cpu/mtrr/Makefile
+++ /dev/null
@@ -1,5 +0,0 @@
-obj-y := main.o if.o generic.o state.o
-obj-y += amd.o
-obj-y += cyrix.o
-obj-y += centaur.o
-
diff --git a/arch/i386/kernel/cpu/mtrr/amd.c b/arch/i386/kernel/cpu/mtrr/amd.c
deleted file mode 100644
index 1a1e04b6fd0..00000000000
--- a/arch/i386/kernel/cpu/mtrr/amd.c
+++ /dev/null
@@ -1,121 +0,0 @@
-#include <linux/init.h>
-#include <linux/mm.h>
-#include <asm/mtrr.h>
-#include <asm/msr.h>
-
-#include "mtrr.h"
-
-static void
-amd_get_mtrr(unsigned int reg, unsigned long *base,
- unsigned int *size, mtrr_type * type)
-{
- unsigned long low, high;
-
- rdmsr(MSR_K6_UWCCR, low, high);
- /* Upper dword is region 1, lower is region 0 */
- if (reg == 1)
- low = high;
- /* The base masks off on the right alignment */
- *base = (low & 0xFFFE0000) >> PAGE_SHIFT;
- *type = 0;
- if (low & 1)
- *type = MTRR_TYPE_UNCACHABLE;
- if (low & 2)
- *type = MTRR_TYPE_WRCOMB;
- if (!(low & 3)) {
- *size = 0;
- return;
- }
- /*
- * This needs a little explaining. The size is stored as an
- * inverted mask of bits of 128K granularity 15 bits long offset
- * 2 bits
- *
- * So to get a size we do invert the mask and add 1 to the lowest
- * mask bit (4 as its 2 bits in). This gives us a size we then shift
- * to turn into 128K blocks
- *
- * eg 111 1111 1111 1100 is 512K
- *
- * invert 000 0000 0000 0011
- * +1 000 0000 0000 0100
- * *128K ...
- */
- low = (~low) & 0x1FFFC;
- *size = (low + 4) << (15 - PAGE_SHIFT);
- return;
-}
-
-static void amd_set_mtrr(unsigned int reg, unsigned long base,
- unsigned long size, mtrr_type type)
-/* [SUMMARY] Set variable MTRR register on the local CPU.
- <reg> The register to set.
- <base> The base address of the region.
- <size> The size of the region. If this is 0 the region is disabled.
- <type> The type of the region.
- <do_safe> If TRUE, do the change safely. If FALSE, safety measures should
- be done externally.
- [RETURNS] Nothing.
-*/
-{
- u32 regs[2];
-
- /*
- * Low is MTRR0 , High MTRR 1
- */
- rdmsr(MSR_K6_UWCCR, regs[0], regs[1]);
- /*
- * Blank to disable
- */
- if (size == 0)
- regs[reg] = 0;
- else
- /* Set the register to the base, the type (off by one) and an
- inverted bitmask of the size The size is the only odd
- bit. We are fed say 512K We invert this and we get 111 1111
- 1111 1011 but if you subtract one and invert you get the
- desired 111 1111 1111 1100 mask
-
- But ~(x - 1) == ~x + 1 == -x. Two's complement rocks! */
- regs[reg] = (-size >> (15 - PAGE_SHIFT) & 0x0001FFFC)
- | (base << PAGE_SHIFT) | (type + 1);
-
- /*
- * The writeback rule is quite specific. See the manual. Its
- * disable local interrupts, write back the cache, set the mtrr
- */
- wbinvd();
- wrmsr(MSR_K6_UWCCR, regs[0], regs[1]);
-}
-
-static int amd_validate_add_page(unsigned long base, unsigned long size, unsigned int type)
-{
- /* Apply the K6 block alignment and size rules
- In order
- o Uncached or gathering only
- o 128K or bigger block
- o Power of 2 block
- o base suitably aligned to the power
- */
- if (type > MTRR_TYPE_WRCOMB || size < (1 << (17 - PAGE_SHIFT))
- || (size & ~(size - 1)) - size || (base & (size - 1)))
- return -EINVAL;
- return 0;
-}
-
-static struct mtrr_ops amd_mtrr_ops = {
- .vendor = X86_VENDOR_AMD,
- .set = amd_set_mtrr,
- .get = amd_get_mtrr,
- .get_free_region = generic_get_free_region,
- .validate_add_page = amd_validate_add_page,
- .have_wrcomb = positive_have_wrcomb,
-};
-
-int __init amd_init_mtrr(void)
-{
- set_mtrr_ops(&amd_mtrr_ops);
- return 0;
-}
-
-//arch_initcall(amd_mtrr_init);
diff --git a/arch/i386/kernel/cpu/mtrr/centaur.c b/arch/i386/kernel/cpu/mtrr/centaur.c
deleted file mode 100644
index 33f00ac314e..00000000000
--- a/arch/i386/kernel/cpu/mtrr/centaur.c
+++ /dev/null
@@ -1,223 +0,0 @@
-#include <linux/init.h>
-#include <linux/mm.h>
-#include <asm/mtrr.h>
-#include <asm/msr.h>
-#include "mtrr.h"
-
-static struct {
- unsigned long high;
- unsigned long low;
-} centaur_mcr[8];
-
-static u8 centaur_mcr_reserved;
-static u8 centaur_mcr_type; /* 0 for winchip, 1 for winchip2 */
-
-/*
- * Report boot time MCR setups
- */
-
-static int
-centaur_get_free_region(unsigned long base, unsigned long size)
-/* [SUMMARY] Get a free MTRR.
- <base> The starting (base) address of the region.
- <size> The size (in bytes) of the region.
- [RETURNS] The index of the region on success, else -1 on error.
-*/
-{
- int i, max;
- mtrr_type ltype;
- unsigned long lbase;
- unsigned int lsize;
-
- max = num_var_ranges;
- for (i = 0; i < max; ++i) {
- if (centaur_mcr_reserved & (1 << i))
- continue;
- mtrr_if->get(i, &lbase, &lsize, &ltype);
- if (lsize == 0)
- return i;
- }
- return -ENOSPC;
-}
-
-void
-mtrr_centaur_report_mcr(int mcr, u32 lo, u32 hi)
-{
- centaur_mcr[mcr].low = lo;
- centaur_mcr[mcr].high = hi;
-}
-
-static void
-centaur_get_mcr(unsigned int reg, unsigned long *base,
- unsigned int *size, mtrr_type * type)
-{
- *base = centaur_mcr[reg].high >> PAGE_SHIFT;
- *size = -(centaur_mcr[reg].low & 0xfffff000) >> PAGE_SHIFT;
- *type = MTRR_TYPE_WRCOMB; /* If it is there, it is write-combining */
- if (centaur_mcr_type == 1 && ((centaur_mcr[reg].low & 31) & 2))
- *type = MTRR_TYPE_UNCACHABLE;
- if (centaur_mcr_type == 1 && (centaur_mcr[reg].low & 31) == 25)
- *type = MTRR_TYPE_WRBACK;
- if (centaur_mcr_type == 0 && (centaur_mcr[reg].low & 31) == 31)
- *type = MTRR_TYPE_WRBACK;
-
-}
-
-static void centaur_set_mcr(unsigned int reg, unsigned long base,
- unsigned long size, mtrr_type type)
-{
- unsigned long low, high;
-
- if (size == 0) {
- /* Disable */
- high = low = 0;
- } else {
- high = base << PAGE_SHIFT;
- if (centaur_mcr_type == 0)
- low = -size << PAGE_SHIFT | 0x1f; /* only support write-combining... */
- else {
- if (type == MTRR_TYPE_UNCACHABLE)
- low = -size << PAGE_SHIFT | 0x02; /* NC */
- else
- low = -size << PAGE_SHIFT | 0x09; /* WWO,WC */
- }
- }
- centaur_mcr[reg].high = high;
- centaur_mcr[reg].low = low;
- wrmsr(MSR_IDT_MCR0 + reg, low, high);
-}
-
-#if 0
-/*
- * Initialise the later (saner) Winchip MCR variant. In this version
- * the BIOS can pass us the registers it has used (but not their values)
- * and the control register is read/write
- */
-
-static void __init
-centaur_mcr1_init(void)
-{
- unsigned i;
- u32 lo, hi;
-
- /* Unfortunately, MCR's are read-only, so there is no way to
- * find out what the bios might have done.
- */
-
- rdmsr(MSR_IDT_MCR_CTRL, lo, hi);
- if (((lo >> 17) & 7) == 1) { /* Type 1 Winchip2 MCR */
- lo &= ~0x1C0; /* clear key */
- lo |= 0x040; /* set key to 1 */
- wrmsr(MSR_IDT_MCR_CTRL, lo, hi); /* unlock MCR */
- }
-
- centaur_mcr_type = 1;
-
- /*
- * Clear any unconfigured MCR's.
- */
-
- for (i = 0; i < 8; ++i) {
- if (centaur_mcr[i].high == 0 && centaur_mcr[i].low == 0) {
- if (!(lo & (1 << (9 + i))))
- wrmsr(MSR_IDT_MCR0 + i, 0, 0);
- else
- /*
- * If the BIOS set up an MCR we cannot see it
- * but we don't wish to obliterate it
- */
- centaur_mcr_reserved |= (1 << i);
- }
- }
- /*
- * Throw the main write-combining switch...
- * However if OOSTORE is enabled then people have already done far
- * cleverer things and we should behave.
- */
-
- lo |= 15; /* Write combine enables */
- wrmsr(MSR_IDT_MCR_CTRL, lo, hi);
-}
-
-/*
- * Initialise the original winchip with read only MCR registers
- * no used bitmask for the BIOS to pass on and write only control
- */
-
-static void __init
-centaur_mcr0_init(void)
-{
- unsigned i;
-
- /* Unfortunately, MCR's are read-only, so there is no way to
- * find out what the bios might have done.
- */
-
- /* Clear any unconfigured MCR's.
- * This way we are sure that the centaur_mcr array contains the actual
- * values. The disadvantage is that any BIOS tweaks are thus undone.
- *
- */
- for (i = 0; i < 8; ++i) {
- if (centaur_mcr[i].high == 0 && centaur_mcr[i].low == 0)
- wrmsr(MSR_IDT_MCR0 + i, 0, 0);
- }
-
- wrmsr(MSR_IDT_MCR_CTRL, 0x01F0001F, 0); /* Write only */
-}
-
-/*
- * Initialise Winchip series MCR registers
- */
-
-static void __init
-centaur_mcr_init(void)
-{
- struct set_mtrr_context ctxt;
-
- set_mtrr_prepare_save(&ctxt);
- set_mtrr_cache_disable(&ctxt);
-
- if (boot_cpu_data.x86_model == 4)
- centaur_mcr0_init();
- else if (boot_cpu_data.x86_model == 8 || boot_cpu_data.x86_model == 9)
- centaur_mcr1_init();
-
- set_mtrr_done(&ctxt);
-}
-#endif
-
-static int centaur_validate_add_page(unsigned long base,
- unsigned long size, unsigned int type)
-{
- /*
- * FIXME: Winchip2 supports uncached
- */
- if (type != MTRR_TYPE_WRCOMB &&
- (centaur_mcr_type == 0 || type != MTRR_TYPE_UNCACHABLE)) {
- printk(KERN_WARNING
- "mtrr: only write-combining%s supported\n",
- centaur_mcr_type ? " and uncacheable are"
- : " is");
- return -EINVAL;
- }
- return 0;
-}
-
-static struct mtrr_ops centaur_mtrr_ops = {
- .vendor = X86_VENDOR_CENTAUR,
-// .init = centaur_mcr_init,
- .set = centaur_set_mcr,
- .get = centaur_get_mcr,
- .get_free_region = centaur_get_free_region,
- .validate_add_page = centaur_validate_add_page,
- .have_wrcomb = positive_have_wrcomb,
-};
-
-int __init centaur_init_mtrr(void)
-{
- set_mtrr_ops(&centaur_mtrr_ops);
- return 0;
-}
-
-//arch_initcall(centaur_init_mtrr);
diff --git a/arch/i386/kernel/cpu/mtrr/changelog b/arch/i386/kernel/cpu/mtrr/changelog
deleted file mode 100644
index af136853595..00000000000
--- a/arch/i386/kernel/cpu/mtrr/changelog
+++ /dev/null
@@ -1,229 +0,0 @@
- ChangeLog
-
- Prehistory Martin Tischhäuser <martin@ikcbarka.fzk.de>
- Initial register-setting code (from proform-1.0).
- 19971216 Richard Gooch <rgooch@atnf.csiro.au>
- Original version for /proc/mtrr interface, SMP-safe.
- v1.0
- 19971217 Richard Gooch <rgooch@atnf.csiro.au>
- Bug fix for ioctls()'s.
- Added sample code in Documentation/mtrr.txt
- v1.1
- 19971218 Richard Gooch <rgooch@atnf.csiro.au>
- Disallow overlapping regions.
- 19971219 Jens Maurer <jmaurer@menuett.rhein-main.de>
- Register-setting fixups.
- v1.2
- 19971222 Richard Gooch <rgooch@atnf.csiro.au>
- Fixups for kernel 2.1.75.
- v1.3
- 19971229 David Wragg <dpw@doc.ic.ac.uk>
- Register-setting fixups and conformity with Intel conventions.
- 19971229 Richard Gooch <rgooch@atnf.csiro.au>
- Cosmetic changes and wrote this ChangeLog ;-)
- 19980106 Richard Gooch <rgooch@atnf.csiro.au>
- Fixups for kernel 2.1.78.
- v1.4
- 19980119 David Wragg <dpw@doc.ic.ac.uk>
- Included passive-release enable code (elsewhere in PCI setup).
- v1.5
- 19980131 Richard Gooch <rgooch@atnf.csiro.au>
- Replaced global kernel lock with private spinlock.
- v1.6
- 19980201 Richard Gooch <rgooch@atnf.csiro.au>
- Added wait for other CPUs to complete changes.
- v1.7
- 19980202 Richard Gooch <rgooch@atnf.csiro.au>
- Bug fix in definition of <set_mtrr> for UP.
- v1.8
- 19980319 Richard Gooch <rgooch@atnf.csiro.au>
- Fixups for kernel 2.1.90.
- 19980323 Richard Gooch <rgooch@atnf.csiro.au>
- Move SMP BIOS fixup before secondary CPUs call <calibrate_delay>
- v1.9
- 19980325 Richard Gooch <rgooch@atnf.csiro.au>
- Fixed test for overlapping regions: confused by adjacent regions
- 19980326 Richard Gooch <rgooch@atnf.csiro.au>
- Added wbinvd in <set_mtrr_prepare>.
- 19980401 Richard Gooch <rgooch@atnf.csiro.au>
- Bug fix for non-SMP compilation.
- 19980418 David Wragg <dpw@doc.ic.ac.uk>
- Fixed-MTRR synchronisation for SMP and use atomic operations
- instead of spinlocks.
- 19980418 Richard Gooch <rgooch@atnf.csiro.au>
- Differentiate different MTRR register classes for BIOS fixup.
- v1.10
- 19980419 David Wragg <dpw@doc.ic.ac.uk>
- Bug fix in variable MTRR synchronisation.
- v1.11
- 19980419 Richard Gooch <rgooch@atnf.csiro.au>
- Fixups for kernel 2.1.97.
- v1.12
- 19980421 Richard Gooch <rgooch@atnf.csiro.au>
- Safer synchronisation across CPUs when changing MTRRs.
- v1.13
- 19980423 Richard Gooch <rgooch@atnf.csiro.au>
- Bugfix for SMP systems without MTRR support.
- v1.14
- 19980427 Richard Gooch <rgooch@atnf.csiro.au>
- Trap calls to <mtrr_add> and <mtrr_del> on non-MTRR machines.
- v1.15
- 19980427 Richard Gooch <rgooch@atnf.csiro.au>
- Use atomic bitops for setting SMP change mask.
- v1.16
- 19980428 Richard Gooch <rgooch@atnf.csiro.au>
- Removed spurious diagnostic message.
- v1.17
- 19980429 Richard Gooch <rgooch@atnf.csiro.au>
- Moved register-setting macros into this file.
- Moved setup code from init/main.c to i386-specific areas.
- v1.18
- 19980502 Richard Gooch <rgooch@atnf.csiro.au>
- Moved MTRR detection outside conditionals in <mtrr_init>.
- v1.19
- 19980502 Richard Gooch <rgooch@atnf.csiro.au>
- Documentation improvement: mention Pentium II and AGP.
- v1.20
- 19980521 Richard Gooch <rgooch@atnf.csiro.au>
- Only manipulate interrupt enable flag on local CPU.
- Allow enclosed uncachable regions.
- v1.21
- 19980611 Richard Gooch <rgooch@atnf.csiro.au>
- Always define <main_lock>.
- v1.22
- 19980901 Richard Gooch <rgooch@atnf.csiro.au>
- Removed module support in order to tidy up code.
- Added sanity check for <mtrr_add>/<mtrr_del> before <mtrr_init>.
- Created addition queue for prior to SMP commence.
- v1.23
- 19980902 Richard Gooch <rgooch@atnf.csiro.au>
- Ported patch to kernel 2.1.120-pre3.
- v1.24
- 19980910 Richard Gooch <rgooch@atnf.csiro.au>
- Removed sanity checks and addition queue: Linus prefers an OOPS.
- v1.25
- 19981001 Richard Gooch <rgooch@atnf.csiro.au>
- Fixed harmless compiler warning in include/asm-i386/mtrr.h
- Fixed version numbering and history for v1.23 -> v1.24.
- v1.26
- 19990118 Richard Gooch <rgooch@atnf.csiro.au>
- Added devfs support.
- v1.27
- 19990123 Richard Gooch <rgooch@atnf.csiro.au>
- Changed locking to spin with reschedule.
- Made use of new <smp_call_function>.
- v1.28
- 19990201 Zoltán Böszörményi <zboszor@mail.externet.hu>
- Extended the driver to be able to use Cyrix style ARRs.
- 19990204 Richard Gooch <rgooch@atnf.csiro.au>
- Restructured Cyrix support.
- v1.29
- 19990204 Zoltán Böszörményi <zboszor@mail.externet.hu>
- Refined ARR support: enable MAPEN in set_mtrr_prepare()
- and disable MAPEN in set_mtrr_done().
- 19990205 Richard Gooch <rgooch@atnf.csiro.au>
- Minor cleanups.
- v1.30
- 19990208 Zoltán Böszörményi <zboszor@mail.externet.hu>
- Protect plain 6x86s (and other processors without the
- Page Global Enable feature) against accessing CR4 in
- set_mtrr_prepare() and set_mtrr_done().
- 19990210 Richard Gooch <rgooch@atnf.csiro.au>
- Turned <set_mtrr_up> and <get_mtrr> into function pointers.
- v1.31
- 19990212 Zoltán Böszörményi <zboszor@mail.externet.hu>
- Major rewrite of cyrix_arr_init(): do not touch ARRs,
- leave them as the BIOS have set them up.
- Enable usage of all 8 ARRs.
- Avoid multiplications by 3 everywhere and other
- code clean ups/speed ups.
- 19990213 Zoltán Böszörményi <zboszor@mail.externet.hu>
- Set up other Cyrix processors identical to the boot cpu.
- Since Cyrix don't support Intel APIC, this is l'art pour l'art.
- Weigh ARRs by size:
- If size <= 32M is given, set up ARR# we were given.
- If size > 32M is given, set up ARR7 only if it is free,
- fail otherwise.
- 19990214 Zoltán Böszörményi <zboszor@mail.externet.hu>
- Also check for size >= 256K if we are to set up ARR7,
- mtrr_add() returns the value it gets from set_mtrr()
- 19990218 Zoltán Böszörményi <zboszor@mail.externet.hu>
- Remove Cyrix "coma bug" workaround from here.
- Moved to linux/arch/i386/kernel/setup.c and
- linux/include/asm-i386/bugs.h
- 19990228 Richard Gooch <rgooch@atnf.csiro.au>
- Added MTRRIOC_KILL_ENTRY ioctl(2)
- Trap for counter underflow in <mtrr_file_del>.
- Trap for 4 MiB aligned regions for PPro, stepping <= 7.
- 19990301 Richard Gooch <rgooch@atnf.csiro.au>
- Created <get_free_region> hook.
- 19990305 Richard Gooch <rgooch@atnf.csiro.au>
- Temporarily disable AMD support now MTRR capability flag is set.
- v1.32
- 19990308 Zoltán Böszörményi <zboszor@mail.externet.hu>
- Adjust my changes (19990212-19990218) to Richard Gooch's
- latest changes. (19990228-19990305)
- v1.33
- 19990309 Richard Gooch <rgooch@atnf.csiro.au>
- Fixed typo in <printk> message.
- 19990310 Richard Gooch <rgooch@atnf.csiro.au>
- Support K6-II/III based on Alan Cox's <alan@redhat.com> patches.
- v1.34
- 19990511 Bart Hartgers <bart@etpmod.phys.tue.nl>
- Support Centaur C6 MCR's.
- 19990512 Richard Gooch <rgooch@atnf.csiro.au>
- Minor cleanups.
- v1.35
- 19990707 Zoltán Böszörményi <zboszor@mail.externet.hu>
- Check whether ARR3 is protected in cyrix_get_free_region()
- and mtrr_del(). The code won't attempt to delete or change it
- from now on if the BIOS protected ARR3. It silently skips ARR3
- in cyrix_get_free_region() or returns with an error code from
- mtrr_del().
- 19990711 Zoltán Böszörményi <zboszor@mail.externet.hu>
- Reset some bits in the CCRs in cyrix_arr_init() to disable SMM
- if ARR3 isn't protected. This is needed because if SMM is active
- and ARR3 isn't protected then deleting and setting ARR3 again
- may lock up the processor. With SMM entirely disabled, it does
- not happen.
- 19990812 Zoltán Böszörményi <zboszor@mail.externet.hu>
- Rearrange switch() statements so the driver accomodates to
- the fact that the AMD Athlon handles its MTRRs the same way
- as Intel does.
- 19990814 Zoltán Böszörményi <zboszor@mail.externet.hu>
- Double check for Intel in mtrr_add()'s big switch() because
- that revision check is only valid for Intel CPUs.
- 19990819 Alan Cox <alan@redhat.com>
- Tested Zoltan's changes on a pre production Athlon - 100%
- success.
- 19991008 Manfred Spraul <manfreds@colorfullife.com>
- replaced spin_lock_reschedule() with a normal semaphore.
- v1.36
- 20000221 Richard Gooch <rgooch@atnf.csiro.au>
- Compile fix if procfs and devfs not enabled.
- Formatting changes.
- v1.37
- 20001109 H. Peter Anvin <hpa@zytor.com>
- Use the new centralized CPU feature detects.
-
- v1.38
- 20010309 Dave Jones <davej@suse.de>
- Add support for Cyrix III.
-
- v1.39
- 20010312 Dave Jones <davej@suse.de>
- Ugh, I broke AMD support.
- Reworked fix by Troels Walsted Hansen <troels@thule.no>
-
- v1.40
- 20010327 Dave Jones <davej@suse.de>
- Adapted Cyrix III support to include VIA C3.
-
- v2.0
- 20020306 Patrick Mochel <mochel@osdl.org>
- Split mtrr.c -> mtrr/*.c
- Converted to Linux Kernel Coding Style
- Fixed several minor nits in form
- Moved some SMP-only functions out, so they can be used
- for power management in the future.
- TODO: Fix user interface cruft.
diff --git a/arch/i386/kernel/cpu/mtrr/cyrix.c b/arch/i386/kernel/cpu/mtrr/cyrix.c
deleted file mode 100644
index 9027a987006..00000000000
--- a/arch/i386/kernel/cpu/mtrr/cyrix.c
+++ /dev/null
@@ -1,364 +0,0 @@
-#include <linux/init.h>
-#include <linux/mm.h>
-#include <asm/mtrr.h>
-#include <asm/msr.h>
-#include <asm/io.h>
-#include "mtrr.h"
-
-int arr3_protected;
-
-static void
-cyrix_get_arr(unsigned int reg, unsigned long *base,
- unsigned int *size, mtrr_type * type)
-{
- unsigned long flags;
- unsigned char arr, ccr3, rcr, shift;
-
- arr = CX86_ARR_BASE + (reg << 1) + reg; /* avoid multiplication by 3 */
-
- /* Save flags and disable interrupts */
- local_irq_save(flags);
-
- ccr3 = getCx86(CX86_CCR3);
- setCx86(CX86_CCR3, (ccr3 & 0x0f) | 0x10); /* enable MAPEN */
- ((unsigned char *) base)[3] = getCx86(arr);
- ((unsigned char *) base)[2] = getCx86(arr + 1);
- ((unsigned char *) base)[1] = getCx86(arr + 2);
- rcr = getCx86(CX86_RCR_BASE + reg);
- setCx86(CX86_CCR3, ccr3); /* disable MAPEN */
-
- /* Enable interrupts if it was enabled previously */
- local_irq_restore(flags);
- shift = ((unsigned char *) base)[1] & 0x0f;
- *base >>= PAGE_SHIFT;
-
- /* Power of two, at least 4K on ARR0-ARR6, 256K on ARR7
- * Note: shift==0xf means 4G, this is unsupported.
- */
- if (shift)
- *size = (reg < 7 ? 0x1UL : 0x40UL) << (shift - 1);
- else
- *size = 0;
-
- /* Bit 0 is Cache Enable on ARR7, Cache Disable on ARR0-ARR6 */
- if (reg < 7) {
- switch (rcr) {
- case 1:
- *type = MTRR_TYPE_UNCACHABLE;
- break;
- case 8:
- *type = MTRR_TYPE_WRBACK;
- break;
- case 9:
- *type = MTRR_TYPE_WRCOMB;
- break;
- case 24:
- default:
- *type = MTRR_TYPE_WRTHROUGH;
- break;
- }
- } else {
- switch (rcr) {
- case 0:
- *type = MTRR_TYPE_UNCACHABLE;
- break;
- case 8:
- *type = MTRR_TYPE_WRCOMB;
- break;
- case 9:
- *type = MTRR_TYPE_WRBACK;
- break;
- case 25:
- default:
- *type = MTRR_TYPE_WRTHROUGH;
- break;
- }
- }
-}
-
-static int
-cyrix_get_free_region(unsigned long base, unsigned long size)
-/* [SUMMARY] Get a free ARR.
- <base> The starting (base) address of the region.
- <size> The size (in bytes) of the region.
- [RETURNS] The index of the region on success, else -1 on error.
-*/
-{
- int i;
- mtrr_type ltype;
- unsigned long lbase;
- unsigned int lsize;
-
- /* If we are to set up a region >32M then look at ARR7 immediately */
- if (size > 0x2000) {
- cyrix_get_arr(7, &lbase, &lsize, &ltype);
- if (lsize == 0)
- return 7;
- /* Else try ARR0-ARR6 first */
- } else {
- for (i = 0; i < 7; i++) {
- cyrix_get_arr(i, &lbase, &lsize, &ltype);
- if ((i == 3) && arr3_protected)
- continue;
- if (lsize == 0)
- return i;
- }
- /* ARR0-ARR6 isn't free, try ARR7 but its size must be at least 256K */
- cyrix_get_arr(i, &lbase, &lsize, &ltype);
- if ((lsize == 0) && (size >= 0x40))
- return i;
- }
- return -ENOSPC;
-}
-
-static u32 cr4 = 0;
-static u32 ccr3;
-
-static void prepare_set(void)
-{
- u32 cr0;
-
- /* Save value of CR4 and clear Page Global Enable (bit 7) */
- if ( cpu_has_pge ) {
- cr4 = read_cr4();
- write_cr4(cr4 & (unsigned char) ~(1 << 7));
- }
-
- /* Disable and flush caches. Note that wbinvd flushes the TLBs as
- a side-effect */
- cr0 = read_cr0() | 0x40000000;
- wbinvd();
- write_cr0(cr0);
- wbinvd();
-
- /* Cyrix ARRs - everything else were excluded at the top */
- ccr3 = getCx86(CX86_CCR3);
-
- /* Cyrix ARRs - everything else were excluded at the top */
- setCx86(CX86_CCR3, (ccr3 & 0x0f) | 0x10);
-
-}
-
-static void post_set(void)
-{
- /* Flush caches and TLBs */
- wbinvd();
-
- /* Cyrix ARRs - everything else was excluded at the top */
- setCx86(CX86_CCR3, ccr3);
-
- /* Enable caches */
- write_cr0(read_cr0() & 0xbfffffff);
-
- /* Restore value of CR4 */
- if ( cpu_has_pge )
- write_cr4(cr4);
-}
-
-static void cyrix_set_arr(unsigned int reg, unsigned long base,
- unsigned long size, mtrr_type type)
-{
- unsigned char arr, arr_type, arr_size;
-
- arr = CX86_ARR_BASE + (reg << 1) + reg; /* avoid multiplication by 3 */
-
- /* count down from 32M (ARR0-ARR6) or from 2G (ARR7) */
- if (reg >= 7)
- size >>= 6;
-
- size &= 0x7fff; /* make sure arr_size <= 14 */
- for (arr_size = 0; size; arr_size++, size >>= 1) ;
-
- if (reg < 7) {
- switch (type) {
- case MTRR_TYPE_UNCACHABLE:
- arr_type = 1;
- break;
- case MTRR_TYPE_WRCOMB:
- arr_type = 9;
- break;
- case MTRR_TYPE_WRTHROUGH:
- arr_type = 24;
- break;
- default:
- arr_type = 8;
- break;
- }
- } else {
- switch (type) {
- case MTRR_TYPE_UNCACHABLE:
- arr_type = 0;
- break;
- case MTRR_TYPE_WRCOMB:
- arr_type = 8;
- break;
- case MTRR_TYPE_WRTHROUGH:
- arr_type = 25;
- break;
- default:
- arr_type = 9;
- break;
- }
- }
-
- prepare_set();
-
- base <<= PAGE_SHIFT;
- setCx86(arr, ((unsigned char *) &base)[3]);
- setCx86(arr + 1, ((unsigned char *) &base)[2]);
- setCx86(arr + 2, (((unsigned char *) &base)[1]) | arr_size);
- setCx86(CX86_RCR_BASE + reg, arr_type);
-
- post_set();
-}
-
-typedef struct {
- unsigned long base;
- unsigned int size;
- mtrr_type type;
-} arr_state_t;
-
-static arr_state_t arr_state[8] __devinitdata = {
- {0UL, 0UL, 0UL}, {0UL, 0UL, 0UL}, {0UL, 0UL, 0UL}, {0UL, 0UL, 0UL},
- {0UL, 0UL, 0UL}, {0UL, 0UL, 0UL}, {0UL, 0UL, 0UL}, {0UL, 0UL, 0UL}
-};
-
-static unsigned char ccr_state[7] __devinitdata = { 0, 0, 0, 0, 0, 0, 0 };
-
-static void cyrix_set_all(void)
-{
- int i;
-
- prepare_set();
-
- /* the CCRs are not contiguous */
- for (i = 0; i < 4; i++)
- setCx86(CX86_CCR0 + i, ccr_state[i]);
- for (; i < 7; i++)
- setCx86(CX86_CCR4 + i, ccr_state[i]);
- for (i = 0; i < 8; i++)
- cyrix_set_arr(i, arr_state[i].base,
- arr_state[i].size, arr_state[i].type);
-
- post_set();
-}
-
-#if 0
-/*
- * On Cyrix 6x86(MX) and M II the ARR3 is special: it has connection
- * with the SMM (System Management Mode) mode. So we need the following:
- * Check whether SMI_LOCK (CCR3 bit 0) is set
- * if it is set, write a warning message: ARR3 cannot be changed!
- * (it cannot be changed until the next processor reset)
- * if it is reset, then we can change it, set all the needed bits:
- * - disable access to SMM memory through ARR3 range (CCR1 bit 7 reset)
- * - disable access to SMM memory (CCR1 bit 2 reset)
- * - disable SMM mode (CCR1 bit 1 reset)
- * - disable write protection of ARR3 (CCR6 bit 1 reset)
- * - (maybe) disable ARR3
- * Just to be sure, we enable ARR usage by the processor (CCR5 bit 5 set)
- */
-static void __init
-cyrix_arr_init(void)
-{
- struct set_mtrr_context ctxt;
- unsigned char ccr[7];
- int ccrc[7] = { 0, 0, 0, 0, 0, 0, 0 };
-#ifdef CONFIG_SMP
- int i;
-#endif
-
- /* flush cache and enable MAPEN */
- set_mtrr_prepare_save(&ctxt);
- set_mtrr_cache_disable(&ctxt);
-
- /* Save all CCRs locally */
- ccr[0] = getCx86(CX86_CCR0);
- ccr[1] = getCx86(CX86_CCR1);
- ccr[2] = getCx86(CX86_CCR2);
- ccr[3] = ctxt.ccr3;
- ccr[4] = getCx86(CX86_CCR4);
- ccr[5] = getCx86(CX86_CCR5);
- ccr[6] = getCx86(CX86_CCR6);
-
- if (ccr[3] & 1) {
- ccrc[3] = 1;
- arr3_protected = 1;
- } else {
- /* Disable SMM mode (bit 1), access to SMM memory (bit 2) and
- * access to SMM memory through ARR3 (bit 7).
- */
- if (ccr[1] & 0x80) {
- ccr[1] &= 0x7f;
- ccrc[1] |= 0x80;
- }
- if (ccr[1] & 0x04) {
- ccr[1] &= 0xfb;
- ccrc[1] |= 0x04;
- }
- if (ccr[1] & 0x02) {
- ccr[1] &= 0xfd;
- ccrc[1] |= 0x02;
- }
- arr3_protected = 0;
- if (ccr[6] & 0x02) {
- ccr[6] &= 0xfd;
- ccrc[6] = 1; /* Disable write protection of ARR3 */
- setCx86(CX86_CCR6, ccr[6]);
- }
- /* Disable ARR3. This is safe now that we disabled SMM. */
- /* cyrix_set_arr_up (3, 0, 0, 0, FALSE); */
- }
- /* If we changed CCR1 in memory, change it in the processor, too. */
- if (ccrc[1])
- setCx86(CX86_CCR1, ccr[1]);
-
- /* Enable ARR usage by the processor */
- if (!(ccr[5] & 0x20)) {
- ccr[5] |= 0x20;
- ccrc[5] = 1;
- setCx86(CX86_CCR5, ccr[5]);
- }
-#ifdef CONFIG_SMP
- for (i = 0; i < 7; i++)
- ccr_state[i] = ccr[i];
- for (i = 0; i < 8; i++)
- cyrix_get_arr(i,
- &arr_state[i].base, &arr_state[i].size,
- &arr_state[i].type);
-#endif
-
- set_mtrr_done(&ctxt); /* flush cache and disable MAPEN */
-
- if (ccrc[5])
- printk(KERN_INFO "mtrr: ARR usage was not enabled, enabled manually\n");
- if (ccrc[3])
- printk(KERN_INFO "mtrr: ARR3 cannot be changed\n");
-/*
- if ( ccrc[1] & 0x80) printk ("mtrr: SMM memory access through ARR3 disabled\n");
- if ( ccrc[1] & 0x04) printk ("mtrr: SMM memory access disabled\n");
- if ( ccrc[1] & 0x02) printk ("mtrr: SMM mode disabled\n");
-*/
- if (ccrc[6])
- printk(KERN_INFO "mtrr: ARR3 was write protected, unprotected\n");
-}
-#endif
-
-static struct mtrr_ops cyrix_mtrr_ops = {
- .vendor = X86_VENDOR_CYRIX,
-// .init = cyrix_arr_init,
- .set_all = cyrix_set_all,
- .set = cyrix_set_arr,
- .get = cyrix_get_arr,
- .get_free_region = cyrix_get_free_region,
- .validate_add_page = generic_validate_add_page,
- .have_wrcomb = positive_have_wrcomb,
-};
-
-int __init cyrix_init_mtrr(void)
-{
- set_mtrr_ops(&cyrix_mtrr_ops);
- return 0;
-}
-
-//arch_initcall(cyrix_init_mtrr);
diff --git a/arch/i386/kernel/cpu/mtrr/generic.c b/arch/i386/kernel/cpu/mtrr/generic.c
deleted file mode 100644
index 169ac8e0db6..00000000000
--- a/arch/i386/kernel/cpu/mtrr/generic.c
+++ /dev/null
@@ -1,418 +0,0 @@
-/* This only handles 32bit MTRR on 32bit hosts. This is strictly wrong
- because MTRRs can span upto 40 bits (36bits on most modern x86) */
-#include <linux/init.h>
-#include <linux/slab.h>
-#include <linux/mm.h>
-#include <asm/io.h>
-#include <asm/mtrr.h>
-#include <asm/msr.h>
-#include <asm/system.h>
-#include <asm/cpufeature.h>
-#include <asm/tlbflush.h>
-#include "mtrr.h"
-
-struct mtrr_state {
- struct mtrr_var_range *var_ranges;
- mtrr_type fixed_ranges[NUM_FIXED_RANGES];
- unsigned char enabled;
- mtrr_type def_type;
-};
-
-static unsigned long smp_changes_mask;
-static struct mtrr_state mtrr_state = {};
-
-/* Get the MSR pair relating to a var range */
-static void __init
-get_mtrr_var_range(unsigned int index, struct mtrr_var_range *vr)
-{
- rdmsr(MTRRphysBase_MSR(index), vr->base_lo, vr->base_hi);
- rdmsr(MTRRphysMask_MSR(index), vr->mask_lo, vr->mask_hi);
-}
-
-static void __init
-get_fixed_ranges(mtrr_type * frs)
-{
- unsigned int *p = (unsigned int *) frs;
- int i;
-
- rdmsr(MTRRfix64K_00000_MSR, p[0], p[1]);
-
- for (i = 0; i < 2; i++)
- rdmsr(MTRRfix16K_80000_MSR + i, p[2 + i * 2], p[3 + i * 2]);
- for (i = 0; i < 8; i++)
- rdmsr(MTRRfix4K_C0000_MSR + i, p[6 + i * 2], p[7 + i * 2]);
-}
-
-/* Grab all of the MTRR state for this CPU into *state */
-void __init get_mtrr_state(void)
-{
- unsigned int i;
- struct mtrr_var_range *vrs;
- unsigned lo, dummy;
-
- if (!mtrr_state.var_ranges) {
- mtrr_state.var_ranges = kmalloc(num_var_ranges * sizeof (struct mtrr_var_range),
- GFP_KERNEL);
- if (!mtrr_state.var_ranges)
- return;
- }
- vrs = mtrr_state.var_ranges;
-
- for (i = 0; i < num_var_ranges; i++)
- get_mtrr_var_range(i, &vrs[i]);
- get_fixed_ranges(mtrr_state.fixed_ranges);
-
- rdmsr(MTRRdefType_MSR, lo, dummy);
- mtrr_state.def_type = (lo & 0xff);
- mtrr_state.enabled = (lo & 0xc00) >> 10;
-}
-
-/* Some BIOS's are fucked and don't set all MTRRs the same! */
-void __init mtrr_state_warn(void)
-{
- unsigned long mask = smp_changes_mask;
-
- if (!mask)
- return;
- if (mask & MTRR_CHANGE_MASK_FIXED)
- printk(KERN_WARNING "mtrr: your CPUs had inconsistent fixed MTRR settings\n");
- if (mask & MTRR_CHANGE_MASK_VARIABLE)
- printk(KERN_WARNING "mtrr: your CPUs had inconsistent variable MTRR settings\n");
- if (mask & MTRR_CHANGE_MASK_DEFTYPE)
- printk(KERN_WARNING "mtrr: your CPUs had inconsistent MTRRdefType settings\n");
- printk(KERN_INFO "mtrr: probably your BIOS does not setup all CPUs.\n");
- printk(KERN_INFO "mtrr: corrected configuration.\n");
-}
-
-/* Doesn't attempt to pass an error out to MTRR users
- because it's quite complicated in some cases and probably not
- worth it because the best error handling is to ignore it. */
-void mtrr_wrmsr(unsigned msr, unsigned a, unsigned b)
-{
- if (wrmsr_safe(msr, a, b) < 0)
- printk(KERN_ERR
- "MTRR: CPU %u: Writing MSR %x to %x:%x failed\n",
- smp_processor_id(), msr, a, b);
-}
-
-int generic_get_free_region(unsigned long base, unsigned long size)
-/* [SUMMARY] Get a free MTRR.
- <base> The starting (base) address of the region.
- <size> The size (in bytes) of the region.
- [RETURNS] The index of the region on success, else -1 on error.
-*/
-{
- int i, max;
- mtrr_type ltype;
- unsigned long lbase;
- unsigned lsize;
-
- max = num_var_ranges;
- for (i = 0; i < max; ++i) {
- mtrr_if->get(i, &lbase, &lsize, &ltype);
- if (lsize == 0)
- return i;
- }
- return -ENOSPC;
-}
-
-static void generic_get_mtrr(unsigned int reg, unsigned long *base,
- unsigned int *size, mtrr_type * type)
-{
- unsigned int mask_lo, mask_hi, base_lo, base_hi;
-
- rdmsr(MTRRphysMask_MSR(reg), mask_lo, mask_hi);
- if ((mask_lo & 0x800) == 0) {
- /* Invalid (i.e. free) range */
- *base = 0;
- *size = 0;
- *type = 0;
- return;
- }
-
- rdmsr(MTRRphysBase_MSR(reg), base_lo, base_hi);
-
- /* Work out the shifted address mask. */
- mask_lo = size_or_mask | mask_hi << (32 - PAGE_SHIFT)
- | mask_lo >> PAGE_SHIFT;
-
- /* This works correctly if size is a power of two, i.e. a
- contiguous range. */
- *size = -mask_lo;
- *base = base_hi << (32 - PAGE_SHIFT) | base_lo >> PAGE_SHIFT;
- *type = base_lo & 0xff;
-}
-
-static int set_fixed_ranges(mtrr_type * frs)
-{
- unsigned int *p = (unsigned int *) frs;
- int changed = FALSE;
- int i;
- unsigned int lo, hi;
-
- rdmsr(MTRRfix64K_00000_MSR, lo, hi);
- if (p[0] != lo || p[1] != hi) {
- mtrr_wrmsr(MTRRfix64K_00000_MSR, p[0], p[1]);
- changed = TRUE;
- }
-
- for (i = 0; i < 2; i++) {
- rdmsr(MTRRfix16K_80000_MSR + i, lo, hi);
- if (p[2 + i * 2] != lo || p[3 + i * 2] != hi) {
- mtrr_wrmsr(MTRRfix16K_80000_MSR + i, p[2 + i * 2],
- p[3 + i * 2]);
- changed = TRUE;
- }
- }
-
- for (i = 0; i < 8; i++) {
- rdmsr(MTRRfix4K_C0000_MSR + i, lo, hi);
- if (p[6 + i * 2] != lo || p[7 + i * 2] != hi) {
- mtrr_wrmsr(MTRRfix4K_C0000_MSR + i, p[6 + i * 2],
- p[7 + i * 2]);
- changed = TRUE;
- }
- }
- return changed;
-}
-
-/* Set the MSR pair relating to a var range. Returns TRUE if
- changes are made */
-static int set_mtrr_var_ranges(unsigned int index, struct mtrr_var_range *vr)
-{
- unsigned int lo, hi;
- int changed = FALSE;
-
- rdmsr(MTRRphysBase_MSR(index), lo, hi);
- if ((vr->base_lo & 0xfffff0ffUL) != (lo & 0xfffff0ffUL)
- || (vr->base_hi & (size_and_mask >> (32 - PAGE_SHIFT))) !=
- (hi & (size_and_mask >> (32 - PAGE_SHIFT)))) {
- mtrr_wrmsr(MTRRphysBase_MSR(index), vr->base_lo, vr->base_hi);
- changed = TRUE;
- }
-
- rdmsr(MTRRphysMask_MSR(index), lo, hi);
-
- if ((vr->mask_lo & 0xfffff800UL) != (lo & 0xfffff800UL)
- || (vr->mask_hi & (size_and_mask >> (32 - PAGE_SHIFT))) !=
- (hi & (size_and_mask >> (32 - PAGE_SHIFT)))) {
- mtrr_wrmsr(MTRRphysMask_MSR(index), vr->mask_lo, vr->mask_hi);
- changed = TRUE;
- }
- return changed;
-}
-
-static unsigned long set_mtrr_state(u32 deftype_lo, u32 deftype_hi)
-/* [SUMMARY] Set the MTRR state for this CPU.
- <state> The MTRR state information to read.
- <ctxt> Some relevant CPU context.
- [NOTE] The CPU must already be in a safe state for MTRR changes.
- [RETURNS] 0 if no changes made, else a mask indication what was changed.
-*/
-{
- unsigned int i;
- unsigned long change_mask = 0;
-
- for (i = 0; i < num_var_ranges; i++)
- if (set_mtrr_var_ranges(i, &mtrr_state.var_ranges[i]))
- change_mask |= MTRR_CHANGE_MASK_VARIABLE;
-
- if (set_fixed_ranges(mtrr_state.fixed_ranges))
- change_mask |= MTRR_CHANGE_MASK_FIXED;
-
- /* Set_mtrr_restore restores the old value of MTRRdefType,
- so to set it we fiddle with the saved value */
- if ((deftype_lo & 0xff) != mtrr_state.def_type
- || ((deftype_lo & 0xc00) >> 10) != mtrr_state.enabled) {
- deftype_lo |= (mtrr_state.def_type | mtrr_state.enabled << 10);
- change_mask |= MTRR_CHANGE_MASK_DEFTYPE;
- }
-
- return change_mask;
-}
-
-
-static unsigned long cr4 = 0;
-static u32 deftype_lo, deftype_hi;
-static DEFINE_SPINLOCK(set_atomicity_lock);
-
-/*
- * Since we are disabling the cache don't allow any interrupts - they
- * would run extremely slow and would only increase the pain. The caller must
- * ensure that local interrupts are disabled and are reenabled after post_set()
- * has been called.
- */
-
-static void prepare_set(void)
-{
- unsigned long cr0;
-
- /* Note that this is not ideal, since the cache is only flushed/disabled
- for this CPU while the MTRRs are changed, but changing this requires
- more invasive changes to the way the kernel boots */
-
- spin_lock(&set_atomicity_lock);
-
- /* Enter the no-fill (CD=1, NW=0) cache mode and flush caches. */
- cr0 = read_cr0() | 0x40000000; /* set CD flag */
- write_cr0(cr0);
- wbinvd();
-
- /* Save value of CR4 and clear Page Global Enable (bit 7) */
- if ( cpu_has_pge ) {
- cr4 = read_cr4();
- write_cr4(cr4 & ~X86_CR4_PGE);
- }
-
- /* Flush all TLBs via a mov %cr3, %reg; mov %reg, %cr3 */
- __flush_tlb();
-
- /* Save MTRR state */
- rdmsr(MTRRdefType_MSR, deftype_lo, deftype_hi);
-
- /* Disable MTRRs, and set the default type to uncached */
- mtrr_wrmsr(MTRRdefType_MSR, deftype_lo & 0xf300UL, deftype_hi);
-}
-
-static void post_set(void)
-{
- /* Flush TLBs (no need to flush caches - they are disabled) */
- __flush_tlb();
-
- /* Intel (P6) standard MTRRs */
- mtrr_wrmsr(MTRRdefType_MSR, deftype_lo, deftype_hi);
-
- /* Enable caches */
- write_cr0(read_cr0() & 0xbfffffff);
-
- /* Restore value of CR4 */
- if ( cpu_has_pge )
- write_cr4(cr4);
- spin_unlock(&set_atomicity_lock);
-}
-
-static void generic_set_all(void)
-{
- unsigned long mask, count;
- unsigned long flags;
-
- local_irq_save(flags);
- prepare_set();
-
- /* Actually set the state */
- mask = set_mtrr_state(deftype_lo,deftype_hi);
-
- post_set();
- local_irq_restore(flags);
-
- /* Use the atomic bitops to update the global mask */
- for (count = 0; count < sizeof mask * 8; ++count) {
- if (mask & 0x01)
- set_bit(count, &smp_changes_mask);
- mask >>= 1;
- }
-
-}
-
-static void generic_set_mtrr(unsigned int reg, unsigned long base,
- unsigned long size, mtrr_type type)
-/* [SUMMARY] Set variable MTRR register on the local CPU.
- <reg> The register to set.
- <base> The base address of the region.
- <size> The size of the region. If this is 0 the region is disabled.
- <type> The type of the region.
- <do_safe> If TRUE, do the change safely. If FALSE, safety measures should
- be done externally.
- [RETURNS] Nothing.
-*/
-{
- unsigned long flags;
- struct mtrr_var_range *vr;
-
- vr = &mtrr_state.var_ranges[reg];
-
- local_irq_save(flags);
- prepare_set();
-
- if (size == 0) {
- /* The invalid bit is kept in the mask, so we simply clear the
- relevant mask register to disable a range. */
- mtrr_wrmsr(MTRRphysMask_MSR(reg), 0, 0);
- memset(vr, 0, sizeof(struct mtrr_var_range));
- } else {
- vr->base_lo = base << PAGE_SHIFT | type;
- vr->base_hi = (base & size_and_mask) >> (32 - PAGE_SHIFT);
- vr->mask_lo = -size << PAGE_SHIFT | 0x800;
- vr->mask_hi = (-size & size_and_mask) >> (32 - PAGE_SHIFT);
-
- mtrr_wrmsr(MTRRphysBase_MSR(reg), vr->base_lo, vr->base_hi);
- mtrr_wrmsr(MTRRphysMask_MSR(reg), vr->mask_lo, vr->mask_hi);
- }
-
- post_set();
- local_irq_restore(flags);
-}
-
-int generic_validate_add_page(unsigned long base, unsigned long size, unsigned int type)
-{
- unsigned long lbase, last;
-
- /* For Intel PPro stepping <= 7, must be 4 MiB aligned
- and not touch 0x70000000->0x7003FFFF */
- if (is_cpu(INTEL) && boot_cpu_data.x86 == 6 &&
- boot_cpu_data.x86_model == 1 &&
- boot_cpu_data.x86_mask <= 7) {
- if (base & ((1 << (22 - PAGE_SHIFT)) - 1)) {
- printk(KERN_WARNING "mtrr: base(0x%lx000) is not 4 MiB aligned\n", base);
- return -EINVAL;
- }
- if (!(base + size < 0x70000000 || base > 0x7003FFFF) &&
- (type == MTRR_TYPE_WRCOMB
- || type == MTRR_TYPE_WRBACK)) {
- printk(KERN_WARNING "mtrr: writable mtrr between 0x70000000 and 0x7003FFFF may hang the CPU.\n");
- return -EINVAL;
- }
- }
-
- if (base + size < 0x100) {
- printk(KERN_WARNING "mtrr: cannot set region below 1 MiB (0x%lx000,0x%lx000)\n",
- base, size);
- return -EINVAL;
- }
- /* Check upper bits of base and last are equal and lower bits are 0
- for base and 1 for last */
- last = base + size - 1;
- for (lbase = base; !(lbase & 1) && (last & 1);
- lbase = lbase >> 1, last = last >> 1) ;
- if (lbase != last) {
- printk(KERN_WARNING "mtrr: base(0x%lx000) is not aligned on a size(0x%lx000) boundary\n",
- base, size);
- return -EINVAL;
- }
- return 0;
-}
-
-
-static int generic_have_wrcomb(void)
-{
- unsigned long config, dummy;
- rdmsr(MTRRcap_MSR, config, dummy);
- return (config & (1 << 10));
-}
-
-int positive_have_wrcomb(void)
-{
- return 1;
-}
-
-/* generic structure...
- */
-struct mtrr_ops generic_mtrr_ops = {
- .use_intel_if = 1,
- .set_all = generic_set_all,
- .get = generic_get_mtrr,
- .get_free_region = generic_get_free_region,
- .set = generic_set_mtrr,
- .validate_add_page = generic_validate_add_page,
- .have_wrcomb = generic_have_wrcomb,
-};
diff --git a/arch/i386/kernel/cpu/mtrr/if.c b/arch/i386/kernel/cpu/mtrr/if.c
deleted file mode 100644
index cf39e205d33..00000000000
--- a/arch/i386/kernel/cpu/mtrr/if.c
+++ /dev/null
@@ -1,403 +0,0 @@
-#include <linux/init.h>
-#include <linux/proc_fs.h>
-#include <linux/ctype.h>
-#include <linux/module.h>
-#include <linux/seq_file.h>
-#include <asm/uaccess.h>
-
-#define LINE_SIZE 80
-
-#include <asm/mtrr.h>
-#include "mtrr.h"
-
-/* RED-PEN: this is accessed without any locking */
-extern unsigned int *usage_table;
-
-
-#define FILE_FCOUNT(f) (((struct seq_file *)((f)->private_data))->private)
-
-static char *mtrr_strings[MTRR_NUM_TYPES] =
-{
- "uncachable", /* 0 */
- "write-combining", /* 1 */
- "?", /* 2 */
- "?", /* 3 */
- "write-through", /* 4 */
- "write-protect", /* 5 */
- "write-back", /* 6 */
-};
-
-char *mtrr_attrib_to_str(int x)
-{
- return (x <= 6) ? mtrr_strings[x] : "?";
-}
-
-#ifdef CONFIG_PROC_FS
-
-static int
-mtrr_file_add(unsigned long base, unsigned long size,
- unsigned int type, char increment, struct file *file, int page)
-{
- int reg, max;
- unsigned int *fcount = FILE_FCOUNT(file);
-
- max = num_var_ranges;
- if (fcount == NULL) {
- fcount = kmalloc(max * sizeof *fcount, GFP_KERNEL);
- if (!fcount)
- return -ENOMEM;
- memset(fcount, 0, max * sizeof *fcount);
- FILE_FCOUNT(file) = fcount;
- }
- if (!page) {
- if ((base & (PAGE_SIZE - 1)) || (size & (PAGE_SIZE - 1)))
- return -EINVAL;
- base >>= PAGE_SHIFT;
- size >>= PAGE_SHIFT;
- }
- reg = mtrr_add_page(base, size, type, 1);
- if (reg >= 0)
- ++fcount[reg];
- return reg;
-}
-
-static int
-mtrr_file_del(unsigned long base, unsigned long size,
- struct file *file, int page)
-{
- int reg;
- unsigned int *fcount = FILE_FCOUNT(file);
-
- if (!page) {
- if ((base & (PAGE_SIZE - 1)) || (size & (PAGE_SIZE - 1)))
- return -EINVAL;
- base >>= PAGE_SHIFT;
- size >>= PAGE_SHIFT;
- }
- reg = mtrr_del_page(-1, base, size);
- if (reg < 0)
- return reg;
- if (fcount == NULL)
- return reg;
- if (fcount[reg] < 1)
- return -EINVAL;
- --fcount[reg];
- return reg;
-}
-
-/* RED-PEN: seq_file can seek now. this is ignored. */
-static ssize_t
-mtrr_write(struct file *file, const char __user *buf, size_t len, loff_t * ppos)
-/* Format of control line:
- "base=%Lx size=%Lx type=%s" OR:
- "disable=%d"
-*/
-{
- int i, err;
- unsigned long reg;
- unsigned long long base, size;
- char *ptr;
- char line[LINE_SIZE];
- size_t linelen;
-
- if (!capable(CAP_SYS_ADMIN))
- return -EPERM;
- if (!len)
- return -EINVAL;
- memset(line, 0, LINE_SIZE);
- if (len > LINE_SIZE)
- len = LINE_SIZE;
- if (copy_from_user(line, buf, len - 1))
- return -EFAULT;
- linelen = strlen(line);
- ptr = line + linelen - 1;
- if (linelen && *ptr == '\n')
- *ptr = '\0';
- if (!strncmp(line, "disable=", 8)) {
- reg = simple_strtoul(line + 8, &ptr, 0);
- err = mtrr_del_page(reg, 0, 0);
- if (err < 0)
- return err;
- return len;
- }
- if (strncmp(line, "base=", 5))
- return -EINVAL;
- base = simple_strtoull(line + 5, &ptr, 0);
- for (; isspace(*ptr); ++ptr) ;
- if (strncmp(ptr, "size=", 5))
- return -EINVAL;
- size = simple_strtoull(ptr + 5, &ptr, 0);
- if ((base & 0xfff) || (size & 0xfff))
- return -EINVAL;
- for (; isspace(*ptr); ++ptr) ;
- if (strncmp(ptr, "type=", 5))
- return -EINVAL;
- ptr += 5;
- for (; isspace(*ptr); ++ptr) ;
- for (i = 0; i < MTRR_NUM_TYPES; ++i) {
- if (strcmp(ptr, mtrr_strings[i]))
- continue;
- base >>= PAGE_SHIFT;
- size >>= PAGE_SHIFT;
- err =
- mtrr_add_page((unsigned long) base, (unsigned long) size, i,
- 1);
- if (err < 0)
- return err;
- return len;
- }
- return -EINVAL;
-}
-
-static long
-mtrr_ioctl(struct file *file, unsigned int cmd, unsigned long __arg)
-{
- int err = 0;
- mtrr_type type;
- struct mtrr_sentry sentry;
- struct mtrr_gentry gentry;
- void __user *arg = (void __user *) __arg;
-
- switch (cmd) {
- case MTRRIOC_ADD_ENTRY:
- case MTRRIOC_SET_ENTRY:
- case MTRRIOC_DEL_ENTRY:
- case MTRRIOC_KILL_ENTRY:
- case MTRRIOC_ADD_PAGE_ENTRY:
- case MTRRIOC_SET_PAGE_ENTRY:
- case MTRRIOC_DEL_PAGE_ENTRY:
- case MTRRIOC_KILL_PAGE_ENTRY:
- if (copy_from_user(&sentry, arg, sizeof sentry))
- return -EFAULT;
- break;
- case MTRRIOC_GET_ENTRY:
- case MTRRIOC_GET_PAGE_ENTRY:
- if (copy_from_user(&gentry, arg, sizeof gentry))
- return -EFAULT;
- break;
-#ifdef CONFIG_COMPAT
- case MTRRIOC32_ADD_ENTRY:
- case MTRRIOC32_SET_ENTRY:
- case MTRRIOC32_DEL_ENTRY:
- case MTRRIOC32_KILL_ENTRY:
- case MTRRIOC32_ADD_PAGE_ENTRY:
- case MTRRIOC32_SET_PAGE_ENTRY:
- case MTRRIOC32_DEL_PAGE_ENTRY:
- case MTRRIOC32_KILL_PAGE_ENTRY: {
- struct mtrr_sentry32 __user *s32 = (struct mtrr_sentry32 __user *)__arg;
- err = get_user(sentry.base, &s32->base);
- err |= get_user(sentry.size, &s32->size);
- err |= get_user(sentry.type, &s32->type);
- if (err)
- return err;
- break;
- }
- case MTRRIOC32_GET_ENTRY:
- case MTRRIOC32_GET_PAGE_ENTRY: {
- struct mtrr_gentry32 __user *g32 = (struct mtrr_gentry32 __user *)__arg;
- err = get_user(gentry.regnum, &g32->regnum);
- err |= get_user(gentry.base, &g32->base);
- err |= get_user(gentry.size, &g32->size);
- err |= get_user(gentry.type, &g32->type);
- if (err)
- return err;
- break;
- }
-#endif
- }
-
- switch (cmd) {
- default:
- return -ENOTTY;
- case MTRRIOC_ADD_ENTRY:
- if (!capable(CAP_SYS_ADMIN))
- return -EPERM;
- err =
- mtrr_file_add(sentry.base, sentry.size, sentry.type, 1,
- file, 0);
- break;
- case MTRRIOC_SET_ENTRY:
- if (!capable(CAP_SYS_ADMIN))
- return -EPERM;
- err = mtrr_add(sentry.base, sentry.size, sentry.type, 0);
- break;
- case MTRRIOC_DEL_ENTRY:
- if (!capable(CAP_SYS_ADMIN))
- return -EPERM;
- err = mtrr_file_del(sentry.base, sentry.size, file, 0);
- break;
- case MTRRIOC_KILL_ENTRY:
- if (!capable(CAP_SYS_ADMIN))
- return -EPERM;
- err = mtrr_del(-1, sentry.base, sentry.size);
- break;
- case MTRRIOC_GET_ENTRY:
- if (gentry.regnum >= num_var_ranges)
- return -EINVAL;
- mtrr_if->get(gentry.regnum, &gentry.base, &gentry.size, &type);
-
- /* Hide entries that go above 4GB */
- if (gentry.base + gentry.size > 0x100000
- || gentry.size == 0x100000)
- gentry.base = gentry.size = gentry.type = 0;
- else {
- gentry.base <<= PAGE_SHIFT;
- gentry.size <<= PAGE_SHIFT;
- gentry.type = type;
- }
-
- break;
- case MTRRIOC_ADD_PAGE_ENTRY:
- if (!capable(CAP_SYS_ADMIN))
- return -EPERM;
- err =
- mtrr_file_add(sentry.base, sentry.size, sentry.type, 1,
- file, 1);
- break;
- case MTRRIOC_SET_PAGE_ENTRY:
- if (!capable(CAP_SYS_ADMIN))
- return -EPERM;
- err = mtrr_add_page(sentry.base, sentry.size, sentry.type, 0);
- break;
- case MTRRIOC_DEL_PAGE_ENTRY:
- if (!capable(CAP_SYS_ADMIN))
- return -EPERM;
- err = mtrr_file_del(sentry.base, sentry.size, file, 1);
- break;
- case MTRRIOC_KILL_PAGE_ENTRY:
- if (!capable(CAP_SYS_ADMIN))
- return -EPERM;
- err = mtrr_del_page(-1, sentry.base, sentry.size);
- break;
- case MTRRIOC_GET_PAGE_ENTRY:
- if (gentry.regnum >= num_var_ranges)
- return -EINVAL;
- mtrr_if->get(gentry.regnum, &gentry.base, &gentry.size, &type);
- gentry.type = type;
- break;
- }
-
- if (err)
- return err;
-
- switch(cmd) {
- case MTRRIOC_GET_ENTRY:
- case MTRRIOC_GET_PAGE_ENTRY:
- if (copy_to_user(arg, &gentry, sizeof gentry))
- err = -EFAULT;
- break;
-#ifdef CONFIG_COMPAT
- case MTRRIOC32_GET_ENTRY:
- case MTRRIOC32_GET_PAGE_ENTRY: {
- struct mtrr_gentry32 __user *g32 = (struct mtrr_gentry32 __user *)__arg;
- err = put_user(gentry.base, &g32->base);
- err |= put_user(gentry.size, &g32->size);
- err |= put_user(gentry.regnum, &g32->regnum);
- err |= put_user(gentry.type, &g32->type);
- break;
- }
-#endif
- }
- return err;
-}
-
-static int
-mtrr_close(struct inode *ino, struct file *file)
-{
- int i, max;
- unsigned int *fcount = FILE_FCOUNT(file);
-
- if (fcount != NULL) {
- max = num_var_ranges;
- for (i = 0; i < max; ++i) {
- while (fcount[i] > 0) {
- mtrr_del(i, 0, 0);
- --fcount[i];
- }
- }
- kfree(fcount);
- FILE_FCOUNT(file) = NULL;
- }
- return single_release(ino, file);
-}
-
-static int mtrr_seq_show(struct seq_file *seq, void *offset);
-
-static int mtrr_open(struct inode *inode, struct file *file)
-{
- if (!mtrr_if)
- return -EIO;
- if (!mtrr_if->get)
- return -ENXIO;
- return single_open(file, mtrr_seq_show, NULL);
-}
-
-static struct file_operations mtrr_fops = {
- .owner = THIS_MODULE,
- .open = mtrr_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .write = mtrr_write,
- .unlocked_ioctl = mtrr_ioctl,
- .compat_ioctl = mtrr_ioctl,
- .release = mtrr_close,
-};
-
-
-static struct proc_dir_entry *proc_root_mtrr;
-
-
-static int mtrr_seq_show(struct seq_file *seq, void *offset)
-{
- char factor;
- int i, max, len;
- mtrr_type type;
- unsigned long base;
- unsigned int size;
-
- len = 0;
- max = num_var_ranges;
- for (i = 0; i < max; i++) {
- mtrr_if->get(i, &base, &size, &type);
- if (size == 0)
- usage_table[i] = 0;
- else {
- if (size < (0x100000 >> PAGE_SHIFT)) {
- /* less than 1MB */
- factor = 'K';
- size <<= PAGE_SHIFT - 10;
- } else {
- factor = 'M';
- size >>= 20 - PAGE_SHIFT;
- }
- /* RED-PEN: base can be > 32bit */
- len += seq_printf(seq,
- "reg%02i: base=0x%05lx000 (%4liMB), size=%4i%cB: %s, count=%d\n",
- i, base, base >> (20 - PAGE_SHIFT), size, factor,
- mtrr_attrib_to_str(type), usage_table[i]);
- }
- }
- return 0;
-}
-
-static int __init mtrr_if_init(void)
-{
- struct cpuinfo_x86 *c = &boot_cpu_data;
-
- if ((!cpu_has(c, X86_FEATURE_MTRR)) &&
- (!cpu_has(c, X86_FEATURE_K6_MTRR)) &&
- (!cpu_has(c, X86_FEATURE_CYRIX_ARR)) &&
- (!cpu_has(c, X86_FEATURE_CENTAUR_MCR)))
- return -ENODEV;
-
- proc_root_mtrr =
- create_proc_entry("mtrr", S_IWUSR | S_IRUGO, &proc_root);
- if (proc_root_mtrr) {
- proc_root_mtrr->owner = THIS_MODULE;
- proc_root_mtrr->proc_fops = &mtrr_fops;
- }
- return 0;
-}
-
-arch_initcall(mtrr_if_init);
-#endif /* CONFIG_PROC_FS */
diff --git a/arch/i386/kernel/cpu/mtrr/main.c b/arch/i386/kernel/cpu/mtrr/main.c
deleted file mode 100644
index dd4ebd6af7e..00000000000
--- a/arch/i386/kernel/cpu/mtrr/main.c
+++ /dev/null
@@ -1,715 +0,0 @@
-/* Generic MTRR (Memory Type Range Register) driver.
-
- Copyright (C) 1997-2000 Richard Gooch
- Copyright (c) 2002 Patrick Mochel
-
- This library is free software; you can redistribute it and/or
- modify it under the terms of the GNU Library General Public
- License as published by the Free Software Foundation; either
- version 2 of the License, or (at your option) any later version.
-
- This library is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- Library General Public License for more details.
-
- You should have received a copy of the GNU Library General Public
- License along with this library; if not, write to the Free
- Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
-
- Richard Gooch may be reached by email at rgooch@atnf.csiro.au
- The postal address is:
- Richard Gooch, c/o ATNF, P. O. Box 76, Epping, N.S.W., 2121, Australia.
-
- Source: "Pentium Pro Family Developer's Manual, Volume 3:
- Operating System Writer's Guide" (Intel document number 242692),
- section 11.11.7
-
- This was cleaned and made readable by Patrick Mochel <mochel@osdl.org>
- on 6-7 March 2002.
- Source: Intel Architecture Software Developers Manual, Volume 3:
- System Programming Guide; Section 9.11. (1997 edition - PPro).
-*/
-
-#include <linux/module.h>
-#include <linux/init.h>
-#include <linux/pci.h>
-#include <linux/smp.h>
-#include <linux/cpu.h>
-
-#include <asm/mtrr.h>
-
-#include <asm/uaccess.h>
-#include <asm/processor.h>
-#include <asm/msr.h>
-#include "mtrr.h"
-
-#define MTRR_VERSION "2.0 (20020519)"
-
-u32 num_var_ranges = 0;
-
-unsigned int *usage_table;
-static DECLARE_MUTEX(main_lock);
-
-u32 size_or_mask, size_and_mask;
-
-static struct mtrr_ops * mtrr_ops[X86_VENDOR_NUM] = {};
-
-struct mtrr_ops * mtrr_if = NULL;
-
-static void set_mtrr(unsigned int reg, unsigned long base,
- unsigned long size, mtrr_type type);
-
-extern int arr3_protected;
-
-void set_mtrr_ops(struct mtrr_ops * ops)
-{
- if (ops->vendor && ops->vendor < X86_VENDOR_NUM)
- mtrr_ops[ops->vendor] = ops;
-}
-
-/* Returns non-zero if we have the write-combining memory type */
-static int have_wrcomb(void)
-{
- struct pci_dev *dev;
- u8 rev;
-
- if ((dev = pci_get_class(PCI_CLASS_BRIDGE_HOST << 8, NULL)) != NULL) {
- /* ServerWorks LE chipsets < rev 6 have problems with write-combining
- Don't allow it and leave room for other chipsets to be tagged */
- if (dev->vendor == PCI_VENDOR_ID_SERVERWORKS &&
- dev->device == PCI_DEVICE_ID_SERVERWORKS_LE) {
- pci_read_config_byte(dev, PCI_CLASS_REVISION, &rev);
- if (rev <= 5) {
- printk(KERN_INFO "mtrr: Serverworks LE rev < 6 detected. Write-combining disabled.\n");
- pci_dev_put(dev);
- return 0;
- }
- }
- /* Intel 450NX errata # 23. Non ascending cacheline evictions to
- write combining memory may resulting in data corruption */
- if (dev->vendor == PCI_VENDOR_ID_INTEL &&
- dev->device == PCI_DEVICE_ID_INTEL_82451NX) {
- printk(KERN_INFO "mtrr: Intel 450NX MMC detected. Write-combining disabled.\n");
- pci_dev_put(dev);
- return 0;
- }
- pci_dev_put(dev);
- }
- return (mtrr_if->have_wrcomb ? mtrr_if->have_wrcomb() : 0);
-}
-
-/* This function returns the number of variable MTRRs */
-static void __init set_num_var_ranges(void)
-{
- unsigned long config = 0, dummy;
-
- if (use_intel()) {
- rdmsr(MTRRcap_MSR, config, dummy);
- } else if (is_cpu(AMD))
- config = 2;
- else if (is_cpu(CYRIX) || is_cpu(CENTAUR))
- config = 8;
- num_var_ranges = config & 0xff;
-}
-
-static void __init init_table(void)
-{
- int i, max;
-
- max = num_var_ranges;
- if ((usage_table = kmalloc(max * sizeof *usage_table, GFP_KERNEL))
- == NULL) {
- printk(KERN_ERR "mtrr: could not allocate\n");
- return;
- }
- for (i = 0; i < max; i++)
- usage_table[i] = 1;
-}
-
-struct set_mtrr_data {
- atomic_t count;
- atomic_t gate;
- unsigned long smp_base;
- unsigned long smp_size;
- unsigned int smp_reg;
- mtrr_type smp_type;
-};
-
-#ifdef CONFIG_SMP
-
-static void ipi_handler(void *info)
-/* [SUMMARY] Synchronisation handler. Executed by "other" CPUs.
- [RETURNS] Nothing.
-*/
-{
- struct set_mtrr_data *data = info;
- unsigned long flags;
-
- local_irq_save(flags);
-
- atomic_dec(&data->count);
- while(!atomic_read(&data->gate))
- cpu_relax();
-
- /* The master has cleared me to execute */
- if (data->smp_reg != ~0U)
- mtrr_if->set(data->smp_reg, data->smp_base,
- data->smp_size, data->smp_type);
- else
- mtrr_if->set_all();
-
- atomic_dec(&data->count);
- while(atomic_read(&data->gate))
- cpu_relax();
-
- atomic_dec(&data->count);
- local_irq_restore(flags);
-}
-
-#endif
-
-/**
- * set_mtrr - update mtrrs on all processors
- * @reg: mtrr in question
- * @base: mtrr base
- * @size: mtrr size
- * @type: mtrr type
- *
- * This is kinda tricky, but fortunately, Intel spelled it out for us cleanly:
- *
- * 1. Send IPI to do the following:
- * 2. Disable Interrupts
- * 3. Wait for all procs to do so
- * 4. Enter no-fill cache mode
- * 5. Flush caches
- * 6. Clear PGE bit
- * 7. Flush all TLBs
- * 8. Disable all range registers
- * 9. Update the MTRRs
- * 10. Enable all range registers
- * 11. Flush all TLBs and caches again
- * 12. Enter normal cache mode and reenable caching
- * 13. Set PGE
- * 14. Wait for buddies to catch up
- * 15. Enable interrupts.
- *
- * What does that mean for us? Well, first we set data.count to the number
- * of CPUs. As each CPU disables interrupts, it'll decrement it once. We wait
- * until it hits 0 and proceed. We set the data.gate flag and reset data.count.
- * Meanwhile, they are waiting for that flag to be set. Once it's set, each
- * CPU goes through the transition of updating MTRRs. The CPU vendors may each do it
- * differently, so we call mtrr_if->set() callback and let them take care of it.
- * When they're done, they again decrement data->count and wait for data.gate to
- * be reset.
- * When we finish, we wait for data.count to hit 0 and toggle the data.gate flag.
- * Everyone then enables interrupts and we all continue on.
- *
- * Note that the mechanism is the same for UP systems, too; all the SMP stuff
- * becomes nops.
- */
-static void set_mtrr(unsigned int reg, unsigned long base,
- unsigned long size, mtrr_type type)
-{
- struct set_mtrr_data data;
- unsigned long flags;
-
- data.smp_reg = reg;
- data.smp_base = base;
- data.smp_size = size;
- data.smp_type = type;
- atomic_set(&data.count, num_booting_cpus() - 1);
- atomic_set(&data.gate,0);
-
- /* Start the ball rolling on other CPUs */
- if (smp_call_function(ipi_handler, &data, 1, 0) != 0)
- panic("mtrr: timed out waiting for other CPUs\n");
-
- local_irq_save(flags);
-
- while(atomic_read(&data.count))
- cpu_relax();
-
- /* ok, reset count and toggle gate */
- atomic_set(&data.count, num_booting_cpus() - 1);
- atomic_set(&data.gate,1);
-
- /* do our MTRR business */
-
- /* HACK!
- * We use this same function to initialize the mtrrs on boot.
- * The state of the boot cpu's mtrrs has been saved, and we want
- * to replicate across all the APs.
- * If we're doing that @reg is set to something special...
- */
- if (reg != ~0U)
- mtrr_if->set(reg,base,size,type);
-
- /* wait for the others */
- while(atomic_read(&data.count))
- cpu_relax();
-
- atomic_set(&data.count, num_booting_cpus() - 1);
- atomic_set(&data.gate,0);
-
- /*
- * Wait here for everyone to have seen the gate change
- * So we're the last ones to touch 'data'
- */
- while(atomic_read(&data.count))
- cpu_relax();
-
- local_irq_restore(flags);
-}
-
-/**
- * mtrr_add_page - Add a memory type region
- * @base: Physical base address of region in pages (4 KB)
- * @size: Physical size of region in pages (4 KB)
- * @type: Type of MTRR desired
- * @increment: If this is true do usage counting on the region
- *
- * Memory type region registers control the caching on newer Intel and
- * non Intel processors. This function allows drivers to request an
- * MTRR is added. The details and hardware specifics of each processor's
- * implementation are hidden from the caller, but nevertheless the
- * caller should expect to need to provide a power of two size on an
- * equivalent power of two boundary.
- *
- * If the region cannot be added either because all regions are in use
- * or the CPU cannot support it a negative value is returned. On success
- * the register number for this entry is returned, but should be treated
- * as a cookie only.
- *
- * On a multiprocessor machine the changes are made to all processors.
- * This is required on x86 by the Intel processors.
- *
- * The available types are
- *
- * %MTRR_TYPE_UNCACHABLE - No caching
- *
- * %MTRR_TYPE_WRBACK - Write data back in bursts whenever
- *
- * %MTRR_TYPE_WRCOMB - Write data back soon but allow bursts
- *
- * %MTRR_TYPE_WRTHROUGH - Cache reads but not writes
- *
- * BUGS: Needs a quiet flag for the cases where drivers do not mind
- * failures and do not wish system log messages to be sent.
- */
-
-int mtrr_add_page(unsigned long base, unsigned long size,
- unsigned int type, char increment)
-{
- int i;
- mtrr_type ltype;
- unsigned long lbase;
- unsigned int lsize;
- int error;
-
- if (!mtrr_if)
- return -ENXIO;
-
- if ((error = mtrr_if->validate_add_page(base,size,type)))
- return error;
-
- if (type >= MTRR_NUM_TYPES) {
- printk(KERN_WARNING "mtrr: type: %u invalid\n", type);
- return -EINVAL;
- }
-
- /* If the type is WC, check that this processor supports it */
- if ((type == MTRR_TYPE_WRCOMB) && !have_wrcomb()) {
- printk(KERN_WARNING
- "mtrr: your processor doesn't support write-combining\n");
- return -ENOSYS;
- }
-
- if (base & size_or_mask || size & size_or_mask) {
- printk(KERN_WARNING "mtrr: base or size exceeds the MTRR width\n");
- return -EINVAL;
- }
-
- error = -EINVAL;
-
- /* No CPU hotplug when we change MTRR entries */
- lock_cpu_hotplug();
- /* Search for existing MTRR */
- down(&main_lock);
- for (i = 0; i < num_var_ranges; ++i) {
- mtrr_if->get(i, &lbase, &lsize, &ltype);
- if (base >= lbase + lsize)
- continue;
- if ((base < lbase) && (base + size <= lbase))
- continue;
- /* At this point we know there is some kind of overlap/enclosure */
- if ((base < lbase) || (base + size > lbase + lsize)) {
- printk(KERN_WARNING
- "mtrr: 0x%lx000,0x%lx000 overlaps existing"
- " 0x%lx000,0x%x000\n", base, size, lbase,
- lsize);
- goto out;
- }
- /* New region is enclosed by an existing region */
- if (ltype != type) {
- if (type == MTRR_TYPE_UNCACHABLE)
- continue;
- printk (KERN_WARNING "mtrr: type mismatch for %lx000,%lx000 old: %s new: %s\n",
- base, size, mtrr_attrib_to_str(ltype),
- mtrr_attrib_to_str(type));
- goto out;
- }
- if (increment)
- ++usage_table[i];
- error = i;
- goto out;
- }
- /* Search for an empty MTRR */
- i = mtrr_if->get_free_region(base, size);
- if (i >= 0) {
- set_mtrr(i, base, size, type);
- usage_table[i] = 1;
- } else
- printk(KERN_INFO "mtrr: no more MTRRs available\n");
- error = i;
- out:
- up(&main_lock);
- unlock_cpu_hotplug();
- return error;
-}
-
-static int mtrr_check(unsigned long base, unsigned long size)
-{
- if ((base & (PAGE_SIZE - 1)) || (size & (PAGE_SIZE - 1))) {
- printk(KERN_WARNING
- "mtrr: size and base must be multiples of 4 kiB\n");
- printk(KERN_DEBUG
- "mtrr: size: 0x%lx base: 0x%lx\n", size, base);
- dump_stack();
- return -1;
- }
- return 0;
-}
-
-/**
- * mtrr_add - Add a memory type region
- * @base: Physical base address of region
- * @size: Physical size of region
- * @type: Type of MTRR desired
- * @increment: If this is true do usage counting on the region
- *
- * Memory type region registers control the caching on newer Intel and
- * non Intel processors. This function allows drivers to request an
- * MTRR is added. The details and hardware specifics of each processor's
- * implementation are hidden from the caller, but nevertheless the
- * caller should expect to need to provide a power of two size on an
- * equivalent power of two boundary.
- *
- * If the region cannot be added either because all regions are in use
- * or the CPU cannot support it a negative value is returned. On success
- * the register number for this entry is returned, but should be treated
- * as a cookie only.
- *
- * On a multiprocessor machine the changes are made to all processors.
- * This is required on x86 by the Intel processors.
- *
- * The available types are
- *
- * %MTRR_TYPE_UNCACHABLE - No caching
- *
- * %MTRR_TYPE_WRBACK - Write data back in bursts whenever
- *
- * %MTRR_TYPE_WRCOMB - Write data back soon but allow bursts
- *
- * %MTRR_TYPE_WRTHROUGH - Cache reads but not writes
- *
- * BUGS: Needs a quiet flag for the cases where drivers do not mind
- * failures and do not wish system log messages to be sent.
- */
-
-int
-mtrr_add(unsigned long base, unsigned long size, unsigned int type,
- char increment)
-{
- if (mtrr_check(base, size))
- return -EINVAL;
- return mtrr_add_page(base >> PAGE_SHIFT, size >> PAGE_SHIFT, type,
- increment);
-}
-
-/**
- * mtrr_del_page - delete a memory type region
- * @reg: Register returned by mtrr_add
- * @base: Physical base address
- * @size: Size of region
- *
- * If register is supplied then base and size are ignored. This is
- * how drivers should call it.
- *
- * Releases an MTRR region. If the usage count drops to zero the
- * register is freed and the region returns to default state.
- * On success the register is returned, on failure a negative error
- * code.
- */
-
-int mtrr_del_page(int reg, unsigned long base, unsigned long size)
-{
- int i, max;
- mtrr_type ltype;
- unsigned long lbase;
- unsigned int lsize;
- int error = -EINVAL;
-
- if (!mtrr_if)
- return -ENXIO;
-
- max = num_var_ranges;
- /* No CPU hotplug when we change MTRR entries */
- lock_cpu_hotplug();
- down(&main_lock);
- if (reg < 0) {
- /* Search for existing MTRR */
- for (i = 0; i < max; ++i) {
- mtrr_if->get(i, &lbase, &lsize, &ltype);
- if (lbase == base && lsize == size) {
- reg = i;
- break;
- }
- }
- if (reg < 0) {
- printk(KERN_DEBUG "mtrr: no MTRR for %lx000,%lx000 found\n", base,
- size);
- goto out;
- }
- }
- if (reg >= max) {
- printk(KERN_WARNING "mtrr: register: %d too big\n", reg);
- goto out;
- }
- if (is_cpu(CYRIX) && !use_intel()) {
- if ((reg == 3) && arr3_protected) {
- printk(KERN_WARNING "mtrr: ARR3 cannot be changed\n");
- goto out;
- }
- }
- mtrr_if->get(reg, &lbase, &lsize, &ltype);
- if (lsize < 1) {
- printk(KERN_WARNING "mtrr: MTRR %d not used\n", reg);
- goto out;
- }
- if (usage_table[reg] < 1) {
- printk(KERN_WARNING "mtrr: reg: %d has count=0\n", reg);
- goto out;
- }
- if (--usage_table[reg] < 1)
- set_mtrr(reg, 0, 0, 0);
- error = reg;
- out:
- up(&main_lock);
- unlock_cpu_hotplug();
- return error;
-}
-/**
- * mtrr_del - delete a memory type region
- * @reg: Register returned by mtrr_add
- * @base: Physical base address
- * @size: Size of region
- *
- * If register is supplied then base and size are ignored. This is
- * how drivers should call it.
- *
- * Releases an MTRR region. If the usage count drops to zero the
- * register is freed and the region returns to default state.
- * On success the register is returned, on failure a negative error
- * code.
- */
-
-int
-mtrr_del(int reg, unsigned long base, unsigned long size)
-{
- if (mtrr_check(base, size))
- return -EINVAL;
- return mtrr_del_page(reg, base >> PAGE_SHIFT, size >> PAGE_SHIFT);
-}
-
-EXPORT_SYMBOL(mtrr_add);
-EXPORT_SYMBOL(mtrr_del);
-
-/* HACK ALERT!
- * These should be called implicitly, but we can't yet until all the initcall
- * stuff is done...
- */
-extern void amd_init_mtrr(void);
-extern void cyrix_init_mtrr(void);
-extern void centaur_init_mtrr(void);
-
-static void __init init_ifs(void)
-{
- amd_init_mtrr();
- cyrix_init_mtrr();
- centaur_init_mtrr();
-}
-
-/* The suspend/resume methods are only for CPU without MTRR. CPU using generic
- * MTRR driver doesn't require this
- */
-struct mtrr_value {
- mtrr_type ltype;
- unsigned long lbase;
- unsigned int lsize;
-};
-
-static struct mtrr_value * mtrr_state;
-
-static int mtrr_save(struct sys_device * sysdev, pm_message_t state)
-{
- int i;
- int size = num_var_ranges * sizeof(struct mtrr_value);
-
- mtrr_state = kmalloc(size,GFP_ATOMIC);
- if (mtrr_state)
- memset(mtrr_state,0,size);
- else
- return -ENOMEM;
-
- for (i = 0; i < num_var_ranges; i++) {
- mtrr_if->get(i,
- &mtrr_state[i].lbase,
- &mtrr_state[i].lsize,
- &mtrr_state[i].ltype);
- }
- return 0;
-}
-
-static int mtrr_restore(struct sys_device * sysdev)
-{
- int i;
-
- for (i = 0; i < num_var_ranges; i++) {
- if (mtrr_state[i].lsize)
- set_mtrr(i,
- mtrr_state[i].lbase,
- mtrr_state[i].lsize,
- mtrr_state[i].ltype);
- }
- kfree(mtrr_state);
- return 0;
-}
-
-
-
-static struct sysdev_driver mtrr_sysdev_driver = {
- .suspend = mtrr_save,
- .resume = mtrr_restore,
-};
-
-
-/**
- * mtrr_bp_init - initialize mtrrs on the boot CPU
- *
- * This needs to be called early; before any of the other CPUs are
- * initialized (i.e. before smp_init()).
- *
- */
-void __init mtrr_bp_init(void)
-{
- init_ifs();
-
- if (cpu_has_mtrr) {
- mtrr_if = &generic_mtrr_ops;
- size_or_mask = 0xff000000; /* 36 bits */
- size_and_mask = 0x00f00000;
-
- /* This is an AMD specific MSR, but we assume(hope?) that
- Intel will implement it to when they extend the address
- bus of the Xeon. */
- if (cpuid_eax(0x80000000) >= 0x80000008) {
- u32 phys_addr;
- phys_addr = cpuid_eax(0x80000008) & 0xff;
- size_or_mask = ~((1 << (phys_addr - PAGE_SHIFT)) - 1);
- size_and_mask = ~size_or_mask & 0xfff00000;
- } else if (boot_cpu_data.x86_vendor == X86_VENDOR_CENTAUR &&
- boot_cpu_data.x86 == 6) {
- /* VIA C* family have Intel style MTRRs, but
- don't support PAE */
- size_or_mask = 0xfff00000; /* 32 bits */
- size_and_mask = 0;
- }
- } else {
- switch (boot_cpu_data.x86_vendor) {
- case X86_VENDOR_AMD:
- if (cpu_has_k6_mtrr) {
- /* Pre-Athlon (K6) AMD CPU MTRRs */
- mtrr_if = mtrr_ops[X86_VENDOR_AMD];
- size_or_mask = 0xfff00000; /* 32 bits */
- size_and_mask = 0;
- }
- break;
- case X86_VENDOR_CENTAUR:
- if (cpu_has_centaur_mcr) {
- mtrr_if = mtrr_ops[X86_VENDOR_CENTAUR];
- size_or_mask = 0xfff00000; /* 32 bits */
- size_and_mask = 0;
- }
- break;
- case X86_VENDOR_CYRIX:
- if (cpu_has_cyrix_arr) {
- mtrr_if = mtrr_ops[X86_VENDOR_CYRIX];
- size_or_mask = 0xfff00000; /* 32 bits */
- size_and_mask = 0;
- }
- break;
- default:
- break;
- }
- }
- printk(KERN_INFO "mtrr: v%s\n",MTRR_VERSION);
-
- if (mtrr_if) {
- set_num_var_ranges();
- init_table();
- if (use_intel())
- get_mtrr_state();
- }
-}
-
-void mtrr_ap_init(void)
-{
- unsigned long flags;
-
- if (!mtrr_if || !use_intel())
- return;
- /*
- * Ideally we should hold main_lock here to avoid mtrr entries changed,
- * but this routine will be called in cpu boot time, holding the lock
- * breaks it. This routine is called in two cases: 1.very earily time
- * of software resume, when there absolutely isn't mtrr entry changes;
- * 2.cpu hotadd time. We let mtrr_add/del_page hold cpuhotplug lock to
- * prevent mtrr entry changes
- */
- local_irq_save(flags);
-
- mtrr_if->set_all();
-
- local_irq_restore(flags);
-}
-
-static int __init mtrr_init_finialize(void)
-{
- if (!mtrr_if)
- return 0;
- if (use_intel())
- mtrr_state_warn();
- else {
- /* The CPUs haven't MTRR and seemes not support SMP. They have
- * specific drivers, we use a tricky method to support
- * suspend/resume for them.
- * TBD: is there any system with such CPU which supports
- * suspend/resume? if no, we should remove the code.
- */
- sysdev_driver_register(&cpu_sysdev_class,
- &mtrr_sysdev_driver);
- }
- return 0;
-}
-subsys_initcall(mtrr_init_finialize);
diff --git a/arch/i386/kernel/cpu/mtrr/mtrr.h b/arch/i386/kernel/cpu/mtrr/mtrr.h
deleted file mode 100644
index 99c9f268204..00000000000
--- a/arch/i386/kernel/cpu/mtrr/mtrr.h
+++ /dev/null
@@ -1,97 +0,0 @@
-/*
- * local mtrr defines.
- */
-
-#ifndef TRUE
-#define TRUE 1
-#define FALSE 0
-#endif
-
-#define MTRRcap_MSR 0x0fe
-#define MTRRdefType_MSR 0x2ff
-
-#define MTRRphysBase_MSR(reg) (0x200 + 2 * (reg))
-#define MTRRphysMask_MSR(reg) (0x200 + 2 * (reg) + 1)
-
-#define NUM_FIXED_RANGES 88
-#define MTRRfix64K_00000_MSR 0x250
-#define MTRRfix16K_80000_MSR 0x258
-#define MTRRfix16K_A0000_MSR 0x259
-#define MTRRfix4K_C0000_MSR 0x268
-#define MTRRfix4K_C8000_MSR 0x269
-#define MTRRfix4K_D0000_MSR 0x26a
-#define MTRRfix4K_D8000_MSR 0x26b
-#define MTRRfix4K_E0000_MSR 0x26c
-#define MTRRfix4K_E8000_MSR 0x26d
-#define MTRRfix4K_F0000_MSR 0x26e
-#define MTRRfix4K_F8000_MSR 0x26f
-
-#define MTRR_CHANGE_MASK_FIXED 0x01
-#define MTRR_CHANGE_MASK_VARIABLE 0x02
-#define MTRR_CHANGE_MASK_DEFTYPE 0x04
-
-/* In the Intel processor's MTRR interface, the MTRR type is always held in
- an 8 bit field: */
-typedef u8 mtrr_type;
-
-struct mtrr_ops {
- u32 vendor;
- u32 use_intel_if;
-// void (*init)(void);
- void (*set)(unsigned int reg, unsigned long base,
- unsigned long size, mtrr_type type);
- void (*set_all)(void);
-
- void (*get)(unsigned int reg, unsigned long *base,
- unsigned int *size, mtrr_type * type);
- int (*get_free_region) (unsigned long base, unsigned long size);
-
- int (*validate_add_page)(unsigned long base, unsigned long size,
- unsigned int type);
- int (*have_wrcomb)(void);
-};
-
-extern int generic_get_free_region(unsigned long base, unsigned long size);
-extern int generic_validate_add_page(unsigned long base, unsigned long size,
- unsigned int type);
-
-extern struct mtrr_ops generic_mtrr_ops;
-
-extern int positive_have_wrcomb(void);
-
-/* library functions for processor-specific routines */
-struct set_mtrr_context {
- unsigned long flags;
- unsigned long deftype_lo;
- unsigned long deftype_hi;
- unsigned long cr4val;
- unsigned long ccr3;
-};
-
-struct mtrr_var_range {
- unsigned long base_lo;
- unsigned long base_hi;
- unsigned long mask_lo;
- unsigned long mask_hi;
-};
-
-void set_mtrr_done(struct set_mtrr_context *ctxt);
-void set_mtrr_cache_disable(struct set_mtrr_context *ctxt);
-void set_mtrr_prepare_save(struct set_mtrr_context *ctxt);
-
-void get_mtrr_state(void);
-
-extern void set_mtrr_ops(struct mtrr_ops * ops);
-
-extern u32 size_or_mask, size_and_mask;
-extern struct mtrr_ops * mtrr_if;
-
-#define is_cpu(vnd) (mtrr_if && mtrr_if->vendor == X86_VENDOR_##vnd)
-#define use_intel() (mtrr_if && mtrr_if->use_intel_if == 1)
-
-extern unsigned int num_var_ranges;
-
-void mtrr_state_warn(void);
-char *mtrr_attrib_to_str(int x);
-void mtrr_wrmsr(unsigned, unsigned, unsigned);
-
diff --git a/arch/i386/kernel/cpu/mtrr/state.c b/arch/i386/kernel/cpu/mtrr/state.c
deleted file mode 100644
index f62ecd15811..00000000000
--- a/arch/i386/kernel/cpu/mtrr/state.c
+++ /dev/null
@@ -1,78 +0,0 @@
-#include <linux/mm.h>
-#include <linux/init.h>
-#include <asm/io.h>
-#include <asm/mtrr.h>
-#include <asm/msr.h>
-#include "mtrr.h"
-
-
-/* Put the processor into a state where MTRRs can be safely set */
-void set_mtrr_prepare_save(struct set_mtrr_context *ctxt)
-{
- unsigned int cr0;
-
- /* Disable interrupts locally */
- local_irq_save(ctxt->flags);
-
- if (use_intel() || is_cpu(CYRIX)) {
-
- /* Save value of CR4 and clear Page Global Enable (bit 7) */
- if ( cpu_has_pge ) {
- ctxt->cr4val = read_cr4();
- write_cr4(ctxt->cr4val & (unsigned char) ~(1 << 7));
- }
-
- /* Disable and flush caches. Note that wbinvd flushes the TLBs as
- a side-effect */
- cr0 = read_cr0() | 0x40000000;
- wbinvd();
- write_cr0(cr0);
- wbinvd();
-
- if (use_intel())
- /* Save MTRR state */
- rdmsr(MTRRdefType_MSR, ctxt->deftype_lo, ctxt->deftype_hi);
- else
- /* Cyrix ARRs - everything else were excluded at the top */
- ctxt->ccr3 = getCx86(CX86_CCR3);
- }
-}
-
-void set_mtrr_cache_disable(struct set_mtrr_context *ctxt)
-{
- if (use_intel())
- /* Disable MTRRs, and set the default type to uncached */
- mtrr_wrmsr(MTRRdefType_MSR, ctxt->deftype_lo & 0xf300UL,
- ctxt->deftype_hi);
- else if (is_cpu(CYRIX))
- /* Cyrix ARRs - everything else were excluded at the top */
- setCx86(CX86_CCR3, (ctxt->ccr3 & 0x0f) | 0x10);
-}
-
-/* Restore the processor after a set_mtrr_prepare */
-void set_mtrr_done(struct set_mtrr_context *ctxt)
-{
- if (use_intel() || is_cpu(CYRIX)) {
-
- /* Flush caches and TLBs */
- wbinvd();
-
- /* Restore MTRRdefType */
- if (use_intel())
- /* Intel (P6) standard MTRRs */
- mtrr_wrmsr(MTRRdefType_MSR, ctxt->deftype_lo, ctxt->deftype_hi);
- else
- /* Cyrix ARRs - everything else was excluded at the top */
- setCx86(CX86_CCR3, ctxt->ccr3);
-
- /* Enable caches */
- write_cr0(read_cr0() & 0xbfffffff);
-
- /* Restore value of CR4 */
- if ( cpu_has_pge )
- write_cr4(ctxt->cr4val);
- }
- /* Re-enable interrupts locally (if enabled previously) */
- local_irq_restore(ctxt->flags);
-}
-
diff --git a/arch/i386/kernel/cpu/nexgen.c b/arch/i386/kernel/cpu/nexgen.c
deleted file mode 100644
index 30898a260a5..00000000000
--- a/arch/i386/kernel/cpu/nexgen.c
+++ /dev/null
@@ -1,63 +0,0 @@
-#include <linux/kernel.h>
-#include <linux/init.h>
-#include <linux/string.h>
-#include <asm/processor.h>
-
-#include "cpu.h"
-
-/*
- * Detect a NexGen CPU running without BIOS hypercode new enough
- * to have CPUID. (Thanks to Herbert Oppmann)
- */
-
-static int __init deep_magic_nexgen_probe(void)
-{
- int ret;
-
- __asm__ __volatile__ (
- " movw $0x5555, %%ax\n"
- " xorw %%dx,%%dx\n"
- " movw $2, %%cx\n"
- " divw %%cx\n"
- " movl $0, %%eax\n"
- " jnz 1f\n"
- " movl $1, %%eax\n"
- "1:\n"
- : "=a" (ret) : : "cx", "dx" );
- return ret;
-}
-
-static void __init init_nexgen(struct cpuinfo_x86 * c)
-{
- c->x86_cache_size = 256; /* A few had 1 MB... */
-}
-
-static void __init nexgen_identify(struct cpuinfo_x86 * c)
-{
- /* Detect NexGen with old hypercode */
- if ( deep_magic_nexgen_probe() ) {
- strcpy(c->x86_vendor_id, "NexGenDriven");
- }
- generic_identify(c);
-}
-
-static struct cpu_dev nexgen_cpu_dev __initdata = {
- .c_vendor = "Nexgen",
- .c_ident = { "NexGenDriven" },
- .c_models = {
- { .vendor = X86_VENDOR_NEXGEN,
- .family = 5,
- .model_names = { [1] = "Nx586" }
- },
- },
- .c_init = init_nexgen,
- .c_identify = nexgen_identify,
-};
-
-int __init nexgen_init_cpu(void)
-{
- cpu_devs[X86_VENDOR_NEXGEN] = &nexgen_cpu_dev;
- return 0;
-}
-
-//early_arch_initcall(nexgen_init_cpu);
diff --git a/arch/i386/kernel/cpu/proc.c b/arch/i386/kernel/cpu/proc.c
deleted file mode 100644
index 41b871ecf4b..00000000000
--- a/arch/i386/kernel/cpu/proc.c
+++ /dev/null
@@ -1,155 +0,0 @@
-#include <linux/smp.h>
-#include <linux/timex.h>
-#include <linux/string.h>
-#include <asm/semaphore.h>
-#include <linux/seq_file.h>
-
-/*
- * Get CPU information for use by the procfs.
- */
-static int show_cpuinfo(struct seq_file *m, void *v)
-{
- /*
- * These flag bits must match the definitions in <asm/cpufeature.h>.
- * NULL means this bit is undefined or reserved; either way it doesn't
- * have meaning as far as Linux is concerned. Note that it's important
- * to realize there is a difference between this table and CPUID -- if
- * applications want to get the raw CPUID data, they should access
- * /dev/cpu/<cpu_nr>/cpuid instead.
- */
- static char *x86_cap_flags[] = {
- /* Intel-defined */
- "fpu", "vme", "de", "pse", "tsc", "msr", "pae", "mce",
- "cx8", "apic", NULL, "sep", "mtrr", "pge", "mca", "cmov",
- "pat", "pse36", "pn", "clflush", NULL, "dts", "acpi", "mmx",
- "fxsr", "sse", "sse2", "ss", "ht", "tm", "ia64", "pbe",
-
- /* AMD-defined */
- NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
- NULL, NULL, NULL, "syscall", NULL, NULL, NULL, NULL,
- NULL, NULL, NULL, "mp", "nx", NULL, "mmxext", NULL,
- NULL, "fxsr_opt", NULL, NULL, NULL, "lm", "3dnowext", "3dnow",
-
- /* Transmeta-defined */
- "recovery", "longrun", NULL, "lrti", NULL, NULL, NULL, NULL,
- NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
- NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
- NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
-
- /* Other (Linux-defined) */
- "cxmmx", "k6_mtrr", "cyrix_arr", "centaur_mcr",
- NULL, NULL, NULL, NULL,
- NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
- NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
- NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
-
- /* Intel-defined (#2) */
- "pni", NULL, NULL, "monitor", "ds_cpl", "vmx", NULL, "est",
- "tm2", NULL, "cid", NULL, NULL, "cx16", "xtpr", NULL,
- NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
- NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
-
- /* VIA/Cyrix/Centaur-defined */
- NULL, NULL, "rng", "rng_en", NULL, NULL, "ace", "ace_en",
- NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
- NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
- NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
-
- /* AMD-defined (#2) */
- "lahf_lm", "cmp_legacy", NULL, NULL, NULL, NULL, NULL, NULL,
- NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
- NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
- NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
- };
- struct cpuinfo_x86 *c = v;
- int i, n = c - cpu_data;
- int fpu_exception;
-
-#ifdef CONFIG_SMP
- if (!cpu_online(n))
- return 0;
-#endif
- seq_printf(m, "processor\t: %d\n"
- "vendor_id\t: %s\n"
- "cpu family\t: %d\n"
- "model\t\t: %d\n"
- "model name\t: %s\n",
- n,
- c->x86_vendor_id[0] ? c->x86_vendor_id : "unknown",
- c->x86,
- c->x86_model,
- c->x86_model_id[0] ? c->x86_model_id : "unknown");
-
- if (c->x86_mask || c->cpuid_level >= 0)
- seq_printf(m, "stepping\t: %d\n", c->x86_mask);
- else
- seq_printf(m, "stepping\t: unknown\n");
-
- if ( cpu_has(c, X86_FEATURE_TSC) ) {
- seq_printf(m, "cpu MHz\t\t: %u.%03u\n",
- cpu_khz / 1000, (cpu_khz % 1000));
- }
-
- /* Cache size */
- if (c->x86_cache_size >= 0)
- seq_printf(m, "cache size\t: %d KB\n", c->x86_cache_size);
-#ifdef CONFIG_X86_HT
- if (c->x86_num_cores * smp_num_siblings > 1) {
- seq_printf(m, "physical id\t: %d\n", phys_proc_id[n]);
- seq_printf(m, "siblings\t: %d\n",
- c->x86_num_cores * smp_num_siblings);
- seq_printf(m, "core id\t\t: %d\n", cpu_core_id[n]);
- seq_printf(m, "cpu cores\t: %d\n", c->x86_num_cores);
- }
-#endif
-
- /* We use exception 16 if we have hardware math and we've either seen it or the CPU claims it is internal */
- fpu_exception = c->hard_math && (ignore_fpu_irq || cpu_has_fpu);
- seq_printf(m, "fdiv_bug\t: %s\n"
- "hlt_bug\t\t: %s\n"
- "f00f_bug\t: %s\n"
- "coma_bug\t: %s\n"
- "fpu\t\t: %s\n"
- "fpu_exception\t: %s\n"
- "cpuid level\t: %d\n"
- "wp\t\t: %s\n"
- "flags\t\t:",
- c->fdiv_bug ? "yes" : "no",
- c->hlt_works_ok ? "no" : "yes",
- c->f00f_bug ? "yes" : "no",
- c->coma_bug ? "yes" : "no",
- c->hard_math ? "yes" : "no",
- fpu_exception ? "yes" : "no",
- c->cpuid_level,
- c->wp_works_ok ? "yes" : "no");
-
- for ( i = 0 ; i < 32*NCAPINTS ; i++ )
- if ( test_bit(i, c->x86_capability) &&
- x86_cap_flags[i] != NULL )
- seq_printf(m, " %s", x86_cap_flags[i]);
-
- seq_printf(m, "\nbogomips\t: %lu.%02lu\n\n",
- c->loops_per_jiffy/(500000/HZ),
- (c->loops_per_jiffy/(5000/HZ)) % 100);
-
- return 0;
-}
-
-static void *c_start(struct seq_file *m, loff_t *pos)
-{
- return *pos < NR_CPUS ? cpu_data + *pos : NULL;
-}
-static void *c_next(struct seq_file *m, void *v, loff_t *pos)
-{
- ++*pos;
- return c_start(m, pos);
-}
-static void c_stop(struct seq_file *m, void *v)
-{
-}
-struct seq_operations cpuinfo_op = {
- .start = c_start,
- .next = c_next,
- .stop = c_stop,
- .show = show_cpuinfo,
-};
diff --git a/arch/i386/kernel/cpu/rise.c b/arch/i386/kernel/cpu/rise.c
deleted file mode 100644
index 8602425628c..00000000000
--- a/arch/i386/kernel/cpu/rise.c
+++ /dev/null
@@ -1,53 +0,0 @@
-#include <linux/kernel.h>
-#include <linux/init.h>
-#include <linux/bitops.h>
-#include <asm/processor.h>
-
-#include "cpu.h"
-
-static void __init init_rise(struct cpuinfo_x86 *c)
-{
- printk("CPU: Rise iDragon");
- if (c->x86_model > 2)
- printk(" II");
- printk("\n");
-
- /* Unhide possibly hidden capability flags
- The mp6 iDragon family don't have MSRs.
- We switch on extra features with this cpuid weirdness: */
- __asm__ (
- "movl $0x6363452a, %%eax\n\t"
- "movl $0x3231206c, %%ecx\n\t"
- "movl $0x2a32313a, %%edx\n\t"
- "cpuid\n\t"
- "movl $0x63634523, %%eax\n\t"
- "movl $0x32315f6c, %%ecx\n\t"
- "movl $0x2333313a, %%edx\n\t"
- "cpuid\n\t" : : : "eax", "ebx", "ecx", "edx"
- );
- set_bit(X86_FEATURE_CX8, c->x86_capability);
-}
-
-static struct cpu_dev rise_cpu_dev __initdata = {
- .c_vendor = "Rise",
- .c_ident = { "RiseRiseRise" },
- .c_models = {
- { .vendor = X86_VENDOR_RISE, .family = 5, .model_names =
- {
- [0] = "iDragon",
- [2] = "iDragon",
- [8] = "iDragon II",
- [9] = "iDragon II"
- }
- },
- },
- .c_init = init_rise,
-};
-
-int __init rise_init_cpu(void)
-{
- cpu_devs[X86_VENDOR_RISE] = &rise_cpu_dev;
- return 0;
-}
-
-//early_arch_initcall(rise_init_cpu);
diff --git a/arch/i386/kernel/cpu/transmeta.c b/arch/i386/kernel/cpu/transmeta.c
deleted file mode 100644
index fc426380366..00000000000
--- a/arch/i386/kernel/cpu/transmeta.c
+++ /dev/null
@@ -1,113 +0,0 @@
-#include <linux/kernel.h>
-#include <linux/init.h>
-#include <asm/processor.h>
-#include <asm/msr.h>
-#include "cpu.h"
-
-static void __init init_transmeta(struct cpuinfo_x86 *c)
-{
- unsigned int cap_mask, uk, max, dummy;
- unsigned int cms_rev1, cms_rev2;
- unsigned int cpu_rev, cpu_freq, cpu_flags, new_cpu_rev;
- char cpu_info[65];
-
- get_model_name(c); /* Same as AMD/Cyrix */
- display_cacheinfo(c);
-
- /* Print CMS and CPU revision */
- max = cpuid_eax(0x80860000);
- cpu_rev = 0;
- if ( max >= 0x80860001 ) {
- cpuid(0x80860001, &dummy, &cpu_rev, &cpu_freq, &cpu_flags);
- if (cpu_rev != 0x02000000) {
- printk(KERN_INFO "CPU: Processor revision %u.%u.%u.%u, %u MHz\n",
- (cpu_rev >> 24) & 0xff,
- (cpu_rev >> 16) & 0xff,
- (cpu_rev >> 8) & 0xff,
- cpu_rev & 0xff,
- cpu_freq);
- }
- }
- if ( max >= 0x80860002 ) {
- cpuid(0x80860002, &new_cpu_rev, &cms_rev1, &cms_rev2, &dummy);
- if (cpu_rev == 0x02000000) {
- printk(KERN_INFO "CPU: Processor revision %08X, %u MHz\n",
- new_cpu_rev, cpu_freq);
- }
- printk(KERN_INFO "CPU: Code Morphing Software revision %u.%u.%u-%u-%u\n",
- (cms_rev1 >> 24) & 0xff,
- (cms_rev1 >> 16) & 0xff,
- (cms_rev1 >> 8) & 0xff,
- cms_rev1 & 0xff,
- cms_rev2);
- }
- if ( max >= 0x80860006 ) {
- cpuid(0x80860003,
- (void *)&cpu_info[0],
- (void *)&cpu_info[4],
- (void *)&cpu_info[8],
- (void *)&cpu_info[12]);
- cpuid(0x80860004,
- (void *)&cpu_info[16],
- (void *)&cpu_info[20],
- (void *)&cpu_info[24],
- (void *)&cpu_info[28]);
- cpuid(0x80860005,
- (void *)&cpu_info[32],
- (void *)&cpu_info[36],
- (void *)&cpu_info[40],
- (void *)&cpu_info[44]);
- cpuid(0x80860006,
- (void *)&cpu_info[48],
- (void *)&cpu_info[52],
- (void *)&cpu_info[56],
- (void *)&cpu_info[60]);
- cpu_info[64] = '\0';
- printk(KERN_INFO "CPU: %s\n", cpu_info);
- }
-
- /* Unhide possibly hidden capability flags */
- rdmsr(0x80860004, cap_mask, uk);
- wrmsr(0x80860004, ~0, uk);
- c->x86_capability[0] = cpuid_edx(0x00000001);
- wrmsr(0x80860004, cap_mask, uk);
-
- /* If we can run i686 user-space code, call us an i686 */
-#define USER686 (X86_FEATURE_TSC|X86_FEATURE_CX8|X86_FEATURE_CMOV)
- if ( c->x86 == 5 && (c->x86_capability[0] & USER686) == USER686 )
- c->x86 = 6;
-
-#ifdef CONFIG_SYSCTL
- /* randomize_va_space slows us down enormously;
- it probably triggers retranslation of x86->native bytecode */
- randomize_va_space = 0;
-#endif
-}
-
-static void transmeta_identify(struct cpuinfo_x86 * c)
-{
- u32 xlvl;
- generic_identify(c);
-
- /* Transmeta-defined flags: level 0x80860001 */
- xlvl = cpuid_eax(0x80860000);
- if ( (xlvl & 0xffff0000) == 0x80860000 ) {
- if ( xlvl >= 0x80860001 )
- c->x86_capability[2] = cpuid_edx(0x80860001);
- }
-}
-
-static struct cpu_dev transmeta_cpu_dev __initdata = {
- .c_vendor = "Transmeta",
- .c_ident = { "GenuineTMx86", "TransmetaCPU" },
- .c_init = init_transmeta,
- .c_identify = transmeta_identify,
-};
-
-int __init transmeta_init_cpu(void)
-{
- cpu_devs[X86_VENDOR_TRANSMETA] = &transmeta_cpu_dev;
- return 0;
-}
-
-//early_arch_initcall(transmeta_init_cpu);
diff --git a/arch/i386/kernel/cpu/umc.c b/arch/i386/kernel/cpu/umc.c
deleted file mode 100644
index 264fcad559d..00000000000
--- a/arch/i386/kernel/cpu/umc.c
+++ /dev/null
@@ -1,33 +0,0 @@
-#include <linux/kernel.h>
-#include <linux/init.h>
-#include <asm/processor.h>
-#include "cpu.h"
-
-/* UMC chips appear to be only either 386 or 486, so no special init takes place.
- */
-static void __init init_umc(struct cpuinfo_x86 * c)
-{
-
-}
-
-static struct cpu_dev umc_cpu_dev __initdata = {
- .c_vendor = "UMC",
- .c_ident = { "UMC UMC UMC" },
- .c_models = {
- { .vendor = X86_VENDOR_UMC, .family = 4, .model_names =
- {
- [1] = "U5D",
- [2] = "U5S",
- }
- },
- },
- .c_init = init_umc,
-};
-
-int __init umc_init_cpu(void)
-{
- cpu_devs[X86_VENDOR_UMC] = &umc_cpu_dev;
- return 0;
-}
-
-//early_arch_initcall(umc_init_cpu);
diff --git a/arch/i386/kernel/cpuid.c b/arch/i386/kernel/cpuid.c
deleted file mode 100644
index 13bae799e62..00000000000
--- a/arch/i386/kernel/cpuid.c
+++ /dev/null
@@ -1,246 +0,0 @@
-/* ----------------------------------------------------------------------- *
- *
- * Copyright 2000 H. Peter Anvin - All Rights Reserved
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation, Inc., 675 Mass Ave, Cambridge MA 02139,
- * USA; either version 2 of the License, or (at your option) any later
- * version; incorporated herein by reference.
- *
- * ----------------------------------------------------------------------- */
-
-/*
- * cpuid.c
- *
- * x86 CPUID access device
- *
- * This device is accessed by lseek() to the appropriate CPUID level
- * and then read in chunks of 16 bytes. A larger size means multiple
- * reads of consecutive levels.
- *
- * This driver uses /dev/cpu/%d/cpuid where %d is the minor number, and on
- * an SMP box will direct the access to CPU %d.
- */
-
-#include <linux/module.h>
-#include <linux/config.h>
-
-#include <linux/types.h>
-#include <linux/errno.h>
-#include <linux/fcntl.h>
-#include <linux/init.h>
-#include <linux/poll.h>
-#include <linux/smp.h>
-#include <linux/major.h>
-#include <linux/fs.h>
-#include <linux/smp_lock.h>
-#include <linux/fs.h>
-#include <linux/device.h>
-#include <linux/cpu.h>
-#include <linux/notifier.h>
-
-#include <asm/processor.h>
-#include <asm/msr.h>
-#include <asm/uaccess.h>
-#include <asm/system.h>
-
-static struct class *cpuid_class;
-
-#ifdef CONFIG_SMP
-
-struct cpuid_command {
- int cpu;
- u32 reg;
- u32 *data;
-};
-
-static void cpuid_smp_cpuid(void *cmd_block)
-{
- struct cpuid_command *cmd = (struct cpuid_command *)cmd_block;
-
- if (cmd->cpu == smp_processor_id())
- cpuid(cmd->reg, &cmd->data[0], &cmd->data[1], &cmd->data[2],
- &cmd->data[3]);
-}
-
-static inline void do_cpuid(int cpu, u32 reg, u32 * data)
-{
- struct cpuid_command cmd;
-
- preempt_disable();
- if (cpu == smp_processor_id()) {
- cpuid(reg, &data[0], &data[1], &data[2], &data[3]);
- } else {
- cmd.cpu = cpu;
- cmd.reg = reg;
- cmd.data = data;
-
- smp_call_function(cpuid_smp_cpuid, &cmd, 1, 1);
- }
- preempt_enable();
-}
-#else /* ! CONFIG_SMP */
-
-static inline void do_cpuid(int cpu, u32 reg, u32 * data)
-{
- cpuid(reg, &data[0], &data[1], &data[2], &data[3]);
-}
-
-#endif /* ! CONFIG_SMP */
-
-static loff_t cpuid_seek(struct file *file, loff_t offset, int orig)
-{
- loff_t ret;
-
- lock_kernel();
-
- switch (orig) {
- case 0:
- file->f_pos = offset;
- ret = file->f_pos;
- break;
- case 1:
- file->f_pos += offset;
- ret = file->f_pos;
- break;
- default:
- ret = -EINVAL;
- }
-
- unlock_kernel();
- return ret;
-}
-
-static ssize_t cpuid_read(struct file *file, char __user *buf,
- size_t count, loff_t * ppos)
-{
- char __user *tmp = buf;
- u32 data[4];
- size_t rv;
- u32 reg = *ppos;
- int cpu = iminor(file->f_dentry->d_inode);
-
- if (count % 16)
- return -EINVAL; /* Invalid chunk size */
-
- for (rv = 0; count; count -= 16) {
- do_cpuid(cpu, reg, data);
- if (copy_to_user(tmp, &data, 16))
- return -EFAULT;
- tmp += 16;
- *ppos = reg++;
- }
-
- return tmp - buf;
-}
-
-static int cpuid_open(struct inode *inode, struct file *file)
-{
- unsigned int cpu = iminor(file->f_dentry->d_inode);
- struct cpuinfo_x86 *c = &(cpu_data)[cpu];
-
- if (cpu >= NR_CPUS || !cpu_online(cpu))
- return -ENXIO; /* No such CPU */
- if (c->cpuid_level < 0)
- return -EIO; /* CPUID not supported */
-
- return 0;
-}
-
-/*
- * File operations we support
- */
-static struct file_operations cpuid_fops = {
- .owner = THIS_MODULE,
- .llseek = cpuid_seek,
- .read = cpuid_read,
- .open = cpuid_open,
-};
-
-static int cpuid_class_device_create(int i)
-{
- int err = 0;
- struct class_device *class_err;
-
- class_err = class_device_create(cpuid_class, NULL, MKDEV(CPUID_MAJOR, i), NULL, "cpu%d",i);
- if (IS_ERR(class_err))
- err = PTR_ERR(class_err);
- return err;
-}
-
-static int __devinit cpuid_class_cpu_callback(struct notifier_block *nfb, unsigned long action, void *hcpu)
-{
- unsigned int cpu = (unsigned long)hcpu;
-
- switch (action) {
- case CPU_ONLINE:
- cpuid_class_device_create(cpu);
- break;
- case CPU_DEAD:
- class_device_destroy(cpuid_class, MKDEV(CPUID_MAJOR, cpu));
- break;
- }
- return NOTIFY_OK;
-}
-
-static struct notifier_block cpuid_class_cpu_notifier =
-{
- .notifier_call = cpuid_class_cpu_callback,
-};
-
-static int __init cpuid_init(void)
-{
- int i, err = 0;
- i = 0;
-
- if (register_chrdev(CPUID_MAJOR, "cpu/cpuid", &cpuid_fops)) {
- printk(KERN_ERR "cpuid: unable to get major %d for cpuid\n",
- CPUID_MAJOR);
- err = -EBUSY;
- goto out;
- }
- cpuid_class = class_create(THIS_MODULE, "cpuid");
- if (IS_ERR(cpuid_class)) {
- err = PTR_ERR(cpuid_class);
- goto out_chrdev;
- }
- for_each_online_cpu(i) {
- err = cpuid_class_device_create(i);
- if (err != 0)
- goto out_class;
- }
- register_cpu_notifier(&cpuid_class_cpu_notifier);
-
- err = 0;
- goto out;
-
-out_class:
- i = 0;
- for_each_online_cpu(i) {
- class_device_destroy(cpuid_class, MKDEV(CPUID_MAJOR, i));
- }
- class_destroy(cpuid_class);
-out_chrdev:
- unregister_chrdev(CPUID_MAJOR, "cpu/cpuid");
-out:
- return err;
-}
-
-static void __exit cpuid_exit(void)
-{
- int cpu = 0;
-
- for_each_online_cpu(cpu)
- class_device_destroy(cpuid_class, MKDEV(CPUID_MAJOR, cpu));
- class_destroy(cpuid_class);
- unregister_chrdev(CPUID_MAJOR, "cpu/cpuid");
- unregister_cpu_notifier(&cpuid_class_cpu_notifier);
-}
-
-module_init(cpuid_init);
-module_exit(cpuid_exit);
-
-MODULE_AUTHOR("H. Peter Anvin <hpa@zytor.com>");
-MODULE_DESCRIPTION("x86 generic CPUID driver");
-MODULE_LICENSE("GPL");
diff --git a/arch/i386/kernel/crash.c b/arch/i386/kernel/crash.c
deleted file mode 100644
index af809ccf5fb..00000000000
--- a/arch/i386/kernel/crash.c
+++ /dev/null
@@ -1,214 +0,0 @@
-/*
- * Architecture specific (i386) functions for kexec based crash dumps.
- *
- * Created by: Hariprasad Nellitheertha (hari@in.ibm.com)
- *
- * Copyright (C) IBM Corporation, 2004. All rights reserved.
- *
- */
-
-#include <linux/init.h>
-#include <linux/types.h>
-#include <linux/kernel.h>
-#include <linux/smp.h>
-#include <linux/reboot.h>
-#include <linux/kexec.h>
-#include <linux/delay.h>
-#include <linux/elf.h>
-#include <linux/elfcore.h>
-
-#include <asm/processor.h>
-#include <asm/hardirq.h>
-#include <asm/nmi.h>
-#include <asm/hw_irq.h>
-#include <mach_ipi.h>
-
-
-note_buf_t crash_notes[NR_CPUS];
-/* This keeps a track of which one is crashing cpu. */
-static int crashing_cpu;
-
-static u32 *append_elf_note(u32 *buf, char *name, unsigned type, void *data,
- size_t data_len)
-{
- struct elf_note note;
-
- note.n_namesz = strlen(name) + 1;
- note.n_descsz = data_len;
- note.n_type = type;
- memcpy(buf, &note, sizeof(note));
- buf += (sizeof(note) +3)/4;
- memcpy(buf, name, note.n_namesz);
- buf += (note.n_namesz + 3)/4;
- memcpy(buf, data, note.n_descsz);
- buf += (note.n_descsz + 3)/4;
-
- return buf;
-}
-
-static void final_note(u32 *buf)
-{
- struct elf_note note;
-
- note.n_namesz = 0;
- note.n_descsz = 0;
- note.n_type = 0;
- memcpy(buf, &note, sizeof(note));
-}
-
-static void crash_save_this_cpu(struct pt_regs *regs, int cpu)
-{
- struct elf_prstatus prstatus;
- u32 *buf;
-
- if ((cpu < 0) || (cpu >= NR_CPUS))
- return;
-
- /* Using ELF notes here is opportunistic.
- * I need a well defined structure format
- * for the data I pass, and I need tags
- * on the data to indicate what information I have
- * squirrelled away. ELF notes happen to provide
- * all of that that no need to invent something new.
- */
- buf = &crash_notes[cpu][0];
- memset(&prstatus, 0, sizeof(prstatus));
- prstatus.pr_pid = current->pid;
- elf_core_copy_regs(&prstatus.pr_reg, regs);
- buf = append_elf_note(buf, "CORE", NT_PRSTATUS, &prstatus,
- sizeof(prstatus));
- final_note(buf);
-}
-
-static void crash_get_current_regs(struct pt_regs *regs)
-{
- __asm__ __volatile__("movl %%ebx,%0" : "=m"(regs->ebx));
- __asm__ __volatile__("movl %%ecx,%0" : "=m"(regs->ecx));
- __asm__ __volatile__("movl %%edx,%0" : "=m"(regs->edx));
- __asm__ __volatile__("movl %%esi,%0" : "=m"(regs->esi));
- __asm__ __volatile__("movl %%edi,%0" : "=m"(regs->edi));
- __asm__ __volatile__("movl %%ebp,%0" : "=m"(regs->ebp));
- __asm__ __volatile__("movl %%eax,%0" : "=m"(regs->eax));
- __asm__ __volatile__("movl %%esp,%0" : "=m"(regs->esp));
- __asm__ __volatile__("movw %%ss, %%ax;" :"=a"(regs->xss));
- __asm__ __volatile__("movw %%cs, %%ax;" :"=a"(regs->xcs));
- __asm__ __volatile__("movw %%ds, %%ax;" :"=a"(regs->xds));
- __asm__ __volatile__("movw %%es, %%ax;" :"=a"(regs->xes));
- __asm__ __volatile__("pushfl; popl %0" :"=m"(regs->eflags));
-
- regs->eip = (unsigned long)current_text_addr();
-}
-
-/* CPU does not save ss and esp on stack if execution is already
- * running in kernel mode at the time of NMI occurrence. This code
- * fixes it.
- */
-static void crash_setup_regs(struct pt_regs *newregs, struct pt_regs *oldregs)
-{
- memcpy(newregs, oldregs, sizeof(*newregs));
- newregs->esp = (unsigned long)&(oldregs->esp);
- __asm__ __volatile__("xorl %eax, %eax;");
- __asm__ __volatile__ ("movw %%ss, %%ax;" :"=a"(newregs->xss));
-}
-
-/* We may have saved_regs from where the error came from
- * or it is NULL if via a direct panic().
- */
-static void crash_save_self(struct pt_regs *saved_regs)
-{
- struct pt_regs regs;
- int cpu;
-
- cpu = smp_processor_id();
- if (saved_regs)
- crash_setup_regs(&regs, saved_regs);
- else
- crash_get_current_regs(&regs);
- crash_save_this_cpu(&regs, cpu);
-}
-
-#ifdef CONFIG_SMP
-static atomic_t waiting_for_crash_ipi;
-
-static int crash_nmi_callback(struct pt_regs *regs, int cpu)
-{
- struct pt_regs fixed_regs;
-
- /* Don't do anything if this handler is invoked on crashing cpu.
- * Otherwise, system will completely hang. Crashing cpu can get
- * an NMI if system was initially booted with nmi_watchdog parameter.
- */
- if (cpu == crashing_cpu)
- return 1;
- local_irq_disable();
-
- if (!user_mode(regs)) {
- crash_setup_regs(&fixed_regs, regs);
- regs = &fixed_regs;
- }
- crash_save_this_cpu(regs, cpu);
- atomic_dec(&waiting_for_crash_ipi);
- /* Assume hlt works */
- halt();
- for(;;);
-
- return 1;
-}
-
-/*
- * By using the NMI code instead of a vector we just sneak thru the
- * word generator coming out with just what we want. AND it does
- * not matter if clustered_apic_mode is set or not.
- */
-static void smp_send_nmi_allbutself(void)
-{
- send_IPI_allbutself(APIC_DM_NMI);
-}
-
-static void nmi_shootdown_cpus(void)
-{
- unsigned long msecs;
-
- atomic_set(&waiting_for_crash_ipi, num_online_cpus() - 1);
- /* Would it be better to replace the trap vector here? */
- set_nmi_callback(crash_nmi_callback);
- /* Ensure the new callback function is set before sending
- * out the NMI
- */
- wmb();
-
- smp_send_nmi_allbutself();
-
- msecs = 1000; /* Wait at most a second for the other cpus to stop */
- while ((atomic_read(&waiting_for_crash_ipi) > 0) && msecs) {
- mdelay(1);
- msecs--;
- }
-
- /* Leave the nmi callback set */
-}
-#else
-static void nmi_shootdown_cpus(void)
-{
- /* There are no cpus to shootdown */
-}
-#endif
-
-void machine_crash_shutdown(struct pt_regs *regs)
-{
- /* This function is only called after the system
- * has paniced or is otherwise in a critical state.
- * The minimum amount of code to allow a kexec'd kernel
- * to run successfully needs to happen here.
- *
- * In practice this means shooting down the other cpus in
- * an SMP system.
- */
- /* The kernel is broken so disable interrupts */
- local_irq_disable();
-
- /* Make a note of crashing cpu. Will be used in NMI callback.*/
- crashing_cpu = smp_processor_id();
- nmi_shootdown_cpus();
- crash_save_self(regs);
-}
diff --git a/arch/i386/kernel/dmi_scan.c b/arch/i386/kernel/dmi_scan.c
deleted file mode 100644
index 58516e2ac17..00000000000
--- a/arch/i386/kernel/dmi_scan.c
+++ /dev/null
@@ -1,301 +0,0 @@
-#include <linux/types.h>
-#include <linux/string.h>
-#include <linux/init.h>
-#include <linux/module.h>
-#include <linux/dmi.h>
-#include <linux/bootmem.h>
-
-
-static char * __init dmi_string(struct dmi_header *dm, u8 s)
-{
- u8 *bp = ((u8 *) dm) + dm->length;
- char *str = "";
-
- if (s) {
- s--;
- while (s > 0 && *bp) {
- bp += strlen(bp) + 1;
- s--;
- }
-
- if (*bp != 0) {
- str = alloc_bootmem(strlen(bp) + 1);
- if (str != NULL)
- strcpy(str, bp);
- else
- printk(KERN_ERR "dmi_string: out of memory.\n");
- }
- }
-
- return str;
-}
-
-/*
- * We have to be cautious here. We have seen BIOSes with DMI pointers
- * pointing to completely the wrong place for example
- */
-static int __init dmi_table(u32 base, int len, int num,
- void (*decode)(struct dmi_header *))
-{
- u8 *buf, *data;
- int i = 0;
-
- buf = bt_ioremap(base, len);
- if (buf == NULL)
- return -1;
-
- data = buf;
-
- /*
- * Stop when we see all the items the table claimed to have
- * OR we run off the end of the table (also happens)
- */
- while ((i < num) && (data - buf + sizeof(struct dmi_header)) <= len) {
- struct dmi_header *dm = (struct dmi_header *)data;
- /*
- * We want to know the total length (formated area and strings)
- * before decoding to make sure we won't run off the table in
- * dmi_decode or dmi_string
- */
- data += dm->length;
- while ((data - buf < len - 1) && (data[0] || data[1]))
- data++;
- if (data - buf < len - 1)
- decode(dm);
- data += 2;
- i++;
- }
- bt_iounmap(buf, len);
- return 0;
-}
-
-static int __init dmi_checksum(u8 *buf)
-{
- u8 sum = 0;
- int a;
-
- for (a = 0; a < 15; a++)
- sum += buf[a];
-
- return sum == 0;
-}
-
-static char *dmi_ident[DMI_STRING_MAX];
-static LIST_HEAD(dmi_devices);
-
-/*
- * Save a DMI string
- */
-static void __init dmi_save_ident(struct dmi_header *dm, int slot, int string)
-{
- char *p, *d = (char*) dm;
-
- if (dmi_ident[slot])
- return;
-
- p = dmi_string(dm, d[string]);
- if (p == NULL)
- return;
-
- dmi_ident[slot] = p;
-}
-
-static void __init dmi_save_devices(struct dmi_header *dm)
-{
- int i, count = (dm->length - sizeof(struct dmi_header)) / 2;
- struct dmi_device *dev;
-
- for (i = 0; i < count; i++) {
- char *d = ((char *) dm) + (i * 2);
-
- /* Skip disabled device */
- if ((*d & 0x80) == 0)
- continue;
-
- dev = alloc_bootmem(sizeof(*dev));
- if (!dev) {
- printk(KERN_ERR "dmi_save_devices: out of memory.\n");
- break;
- }
-
- dev->type = *d++ & 0x7f;
- dev->name = dmi_string(dm, *d);
- dev->device_data = NULL;
-
- list_add(&dev->list, &dmi_devices);
- }
-}
-
-static void __init dmi_save_ipmi_device(struct dmi_header *dm)
-{
- struct dmi_device *dev;
- void * data;
-
- data = alloc_bootmem(dm->length);
- if (data == NULL) {
- printk(KERN_ERR "dmi_save_ipmi_device: out of memory.\n");
- return;
- }
-
- memcpy(data, dm, dm->length);
-
- dev = alloc_bootmem(sizeof(*dev));
- if (!dev) {
- printk(KERN_ERR "dmi_save_ipmi_device: out of memory.\n");
- return;
- }
-
- dev->type = DMI_DEV_TYPE_IPMI;
- dev->name = "IPMI controller";
- dev->device_data = data;
-
- list_add(&dev->list, &dmi_devices);
-}
-
-/*
- * Process a DMI table entry. Right now all we care about are the BIOS
- * and machine entries. For 2.5 we should pull the smbus controller info
- * out of here.
- */
-static void __init dmi_decode(struct dmi_header *dm)
-{
- switch(dm->type) {
- case 0: /* BIOS Information */
- dmi_save_ident(dm, DMI_BIOS_VENDOR, 4);
- dmi_save_ident(dm, DMI_BIOS_VERSION, 5);
- dmi_save_ident(dm, DMI_BIOS_DATE, 8);
- break;
- case 1: /* System Information */
- dmi_save_ident(dm, DMI_SYS_VENDOR, 4);
- dmi_save_ident(dm, DMI_PRODUCT_NAME, 5);
- dmi_save_ident(dm, DMI_PRODUCT_VERSION, 6);
- dmi_save_ident(dm, DMI_PRODUCT_SERIAL, 7);
- break;
- case 2: /* Base Board Information */
- dmi_save_ident(dm, DMI_BOARD_VENDOR, 4);
- dmi_save_ident(dm, DMI_BOARD_NAME, 5);
- dmi_save_ident(dm, DMI_BOARD_VERSION, 6);
- break;
- case 10: /* Onboard Devices Information */
- dmi_save_devices(dm);
- break;
- case 38: /* IPMI Device Information */
- dmi_save_ipmi_device(dm);
- }
-}
-
-void __init dmi_scan_machine(void)
-{
- u8 buf[15];
- char __iomem *p, *q;
-
- /*
- * no iounmap() for that ioremap(); it would be a no-op, but it's
- * so early in setup that sucker gets confused into doing what
- * it shouldn't if we actually call it.
- */
- p = ioremap(0xF0000, 0x10000);
- if (p == NULL)
- goto out;
-
- for (q = p; q < p + 0x10000; q += 16) {
- memcpy_fromio(buf, q, 15);
- if ((memcmp(buf, "_DMI_", 5) == 0) && dmi_checksum(buf)) {
- u16 num = (buf[13] << 8) | buf[12];
- u16 len = (buf[7] << 8) | buf[6];
- u32 base = (buf[11] << 24) | (buf[10] << 16) |
- (buf[9] << 8) | buf[8];
-
- /*
- * DMI version 0.0 means that the real version is taken from
- * the SMBIOS version, which we don't know at this point.
- */
- if (buf[14] != 0)
- printk(KERN_INFO "DMI %d.%d present.\n",
- buf[14] >> 4, buf[14] & 0xF);
- else
- printk(KERN_INFO "DMI present.\n");
-
- if (dmi_table(base,len, num, dmi_decode) == 0)
- return;
- }
- }
-
-out: printk(KERN_INFO "DMI not present.\n");
-}
-
-
-/**
- * dmi_check_system - check system DMI data
- * @list: array of dmi_system_id structures to match against
- *
- * Walk the blacklist table running matching functions until someone
- * returns non zero or we hit the end. Callback function is called for
- * each successfull match. Returns the number of matches.
- */
-int dmi_check_system(struct dmi_system_id *list)
-{
- int i, count = 0;
- struct dmi_system_id *d = list;
-
- while (d->ident) {
- for (i = 0; i < ARRAY_SIZE(d->matches); i++) {
- int s = d->matches[i].slot;
- if (s == DMI_NONE)
- continue;
- if (dmi_ident[s] && strstr(dmi_ident[s], d->matches[i].substr))
- continue;
- /* No match */
- goto fail;
- }
- count++;
- if (d->callback && d->callback(d))
- break;
-fail: d++;
- }
-
- return count;
-}
-EXPORT_SYMBOL(dmi_check_system);
-
-/**
- * dmi_get_system_info - return DMI data value
- * @field: data index (see enum dmi_filed)
- *
- * Returns one DMI data value, can be used to perform
- * complex DMI data checks.
- */
-char *dmi_get_system_info(int field)
-{
- return dmi_ident[field];
-}
-EXPORT_SYMBOL(dmi_get_system_info);
-
-/**
- * dmi_find_device - find onboard device by type/name
- * @type: device type or %DMI_DEV_TYPE_ANY to match all device types
- * @desc: device name string or %NULL to match all
- * @from: previous device found in search, or %NULL for new search.
- *
- * Iterates through the list of known onboard devices. If a device is
- * found with a matching @vendor and @device, a pointer to its device
- * structure is returned. Otherwise, %NULL is returned.
- * A new search is initiated by passing %NULL to the @from argument.
- * If @from is not %NULL, searches continue from next device.
- */
-struct dmi_device * dmi_find_device(int type, const char *name,
- struct dmi_device *from)
-{
- struct list_head *d, *head = from ? &from->list : &dmi_devices;
-
- for(d = head->next; d != &dmi_devices; d = d->next) {
- struct dmi_device *dev = list_entry(d, struct dmi_device, list);
-
- if (((type == DMI_DEV_TYPE_ANY) || (dev->type == type)) &&
- ((name == NULL) || (strcmp(dev->name, name) == 0)))
- return dev;
- }
-
- return NULL;
-}
-EXPORT_SYMBOL(dmi_find_device);
diff --git a/arch/i386/kernel/doublefault.c b/arch/i386/kernel/doublefault.c
deleted file mode 100644
index 5edb1d379ad..00000000000
--- a/arch/i386/kernel/doublefault.c
+++ /dev/null
@@ -1,65 +0,0 @@
-#include <linux/mm.h>
-#include <linux/sched.h>
-#include <linux/init.h>
-#include <linux/init_task.h>
-#include <linux/fs.h>
-
-#include <asm/uaccess.h>
-#include <asm/pgtable.h>
-#include <asm/processor.h>
-#include <asm/desc.h>
-
-#define DOUBLEFAULT_STACKSIZE (1024)
-static unsigned long doublefault_stack[DOUBLEFAULT_STACKSIZE];
-#define STACK_START (unsigned long)(doublefault_stack+DOUBLEFAULT_STACKSIZE)
-
-#define ptr_ok(x) ((x) > PAGE_OFFSET && (x) < PAGE_OFFSET + 0x1000000)
-
-static void doublefault_fn(void)
-{
- struct Xgt_desc_struct gdt_desc = {0, 0};
- unsigned long gdt, tss;
-
- store_gdt(&gdt_desc);
- gdt = gdt_desc.address;
-
- printk("double fault, gdt at %08lx [%d bytes]\n", gdt, gdt_desc.size);
-
- if (ptr_ok(gdt)) {
- gdt += GDT_ENTRY_TSS << 3;
- tss = *(u16 *)(gdt+2);
- tss += *(u8 *)(gdt+4) << 16;
- tss += *(u8 *)(gdt+7) << 24;
- printk("double fault, tss at %08lx\n", tss);
-
- if (ptr_ok(tss)) {
- struct tss_struct *t = (struct tss_struct *)tss;
-
- printk("eip = %08lx, esp = %08lx\n", t->eip, t->esp);
-
- printk("eax = %08lx, ebx = %08lx, ecx = %08lx, edx = %08lx\n",
- t->eax, t->ebx, t->ecx, t->edx);
- printk("esi = %08lx, edi = %08lx\n",
- t->esi, t->edi);
- }
- }
-
- for (;;) /* nothing */;
-}
-
-struct tss_struct doublefault_tss __cacheline_aligned = {
- .esp0 = STACK_START,
- .ss0 = __KERNEL_DS,
- .ldt = 0,
- .io_bitmap_base = INVALID_IO_BITMAP_OFFSET,
-
- .eip = (unsigned long) doublefault_fn,
- .eflags = X86_EFLAGS_SF | 0x2, /* 0x2 bit is always set */
- .esp = STACK_START,
- .es = __USER_DS,
- .cs = __KERNEL_CS,
- .ss = __KERNEL_DS,
- .ds = __USER_DS,
-
- .__cr3 = __pa(swapper_pg_dir)
-};
diff --git a/arch/i386/kernel/early_printk.c b/arch/i386/kernel/early_printk.c
deleted file mode 100644
index 92f812ba275..00000000000
--- a/arch/i386/kernel/early_printk.c
+++ /dev/null
@@ -1,2 +0,0 @@
-
-#include "../../x86_64/kernel/early_printk.c"
diff --git a/arch/i386/kernel/efi.c b/arch/i386/kernel/efi.c
deleted file mode 100644
index ecad519fd39..00000000000
--- a/arch/i386/kernel/efi.c
+++ /dev/null
@@ -1,637 +0,0 @@
-/*
- * Extensible Firmware Interface
- *
- * Based on Extensible Firmware Interface Specification version 1.0
- *
- * Copyright (C) 1999 VA Linux Systems
- * Copyright (C) 1999 Walt Drummond <drummond@valinux.com>
- * Copyright (C) 1999-2002 Hewlett-Packard Co.
- * David Mosberger-Tang <davidm@hpl.hp.com>
- * Stephane Eranian <eranian@hpl.hp.com>
- *
- * All EFI Runtime Services are not implemented yet as EFI only
- * supports physical mode addressing on SoftSDV. This is to be fixed
- * in a future version. --drummond 1999-07-20
- *
- * Implemented EFI runtime services and virtual mode calls. --davidm
- *
- * Goutham Rao: <goutham.rao@intel.com>
- * Skip non-WB memory and ignore empty memory ranges.
- */
-
-#include <linux/config.h>
-#include <linux/kernel.h>
-#include <linux/init.h>
-#include <linux/mm.h>
-#include <linux/types.h>
-#include <linux/time.h>
-#include <linux/spinlock.h>
-#include <linux/bootmem.h>
-#include <linux/ioport.h>
-#include <linux/module.h>
-#include <linux/efi.h>
-#include <linux/kexec.h>
-
-#include <asm/setup.h>
-#include <asm/io.h>
-#include <asm/page.h>
-#include <asm/pgtable.h>
-#include <asm/processor.h>
-#include <asm/desc.h>
-#include <asm/tlbflush.h>
-
-#define EFI_DEBUG 0
-#define PFX "EFI: "
-
-extern efi_status_t asmlinkage efi_call_phys(void *, ...);
-
-struct efi efi;
-EXPORT_SYMBOL(efi);
-static struct efi efi_phys;
-struct efi_memory_map memmap;
-
-/*
- * We require an early boot_ioremap mapping mechanism initially
- */
-extern void * boot_ioremap(unsigned long, unsigned long);
-
-/*
- * To make EFI call EFI runtime service in physical addressing mode we need
- * prelog/epilog before/after the invocation to disable interrupt, to
- * claim EFI runtime service handler exclusively and to duplicate a memory in
- * low memory space say 0 - 3G.
- */
-
-static unsigned long efi_rt_eflags;
-static DEFINE_SPINLOCK(efi_rt_lock);
-static pgd_t efi_bak_pg_dir_pointer[2];
-
-static void efi_call_phys_prelog(void)
-{
- unsigned long cr4;
- unsigned long temp;
-
- spin_lock(&efi_rt_lock);
- local_irq_save(efi_rt_eflags);
-
- /*
- * If I don't have PSE, I should just duplicate two entries in page
- * directory. If I have PSE, I just need to duplicate one entry in
- * page directory.
- */
- cr4 = read_cr4();
-
- if (cr4 & X86_CR4_PSE) {
- efi_bak_pg_dir_pointer[0].pgd =
- swapper_pg_dir[pgd_index(0)].pgd;
- swapper_pg_dir[0].pgd =
- swapper_pg_dir[pgd_index(PAGE_OFFSET)].pgd;
- } else {
- efi_bak_pg_dir_pointer[0].pgd =
- swapper_pg_dir[pgd_index(0)].pgd;
- efi_bak_pg_dir_pointer[1].pgd =
- swapper_pg_dir[pgd_index(0x400000)].pgd;
- swapper_pg_dir[pgd_index(0)].pgd =
- swapper_pg_dir[pgd_index(PAGE_OFFSET)].pgd;
- temp = PAGE_OFFSET + 0x400000;
- swapper_pg_dir[pgd_index(0x400000)].pgd =
- swapper_pg_dir[pgd_index(temp)].pgd;
- }
-
- /*
- * After the lock is released, the original page table is restored.
- */
- local_flush_tlb();
-
- cpu_gdt_descr[0].address = __pa(cpu_gdt_descr[0].address);
- load_gdt((struct Xgt_desc_struct *) __pa(&cpu_gdt_descr[0]));
-}
-
-static void efi_call_phys_epilog(void)
-{
- unsigned long cr4;
-
- cpu_gdt_descr[0].address =
- (unsigned long) __va(cpu_gdt_descr[0].address);
- load_gdt(&cpu_gdt_descr[0]);
- cr4 = read_cr4();
-
- if (cr4 & X86_CR4_PSE) {
- swapper_pg_dir[pgd_index(0)].pgd =
- efi_bak_pg_dir_pointer[0].pgd;
- } else {
- swapper_pg_dir[pgd_index(0)].pgd =
- efi_bak_pg_dir_pointer[0].pgd;
- swapper_pg_dir[pgd_index(0x400000)].pgd =
- efi_bak_pg_dir_pointer[1].pgd;
- }
-
- /*
- * After the lock is released, the original page table is restored.
- */
- local_flush_tlb();
-
- local_irq_restore(efi_rt_eflags);
- spin_unlock(&efi_rt_lock);
-}
-
-static efi_status_t
-phys_efi_set_virtual_address_map(unsigned long memory_map_size,
- unsigned long descriptor_size,
- u32 descriptor_version,
- efi_memory_desc_t *virtual_map)
-{
- efi_status_t status;
-
- efi_call_phys_prelog();
- status = efi_call_phys(efi_phys.set_virtual_address_map,
- memory_map_size, descriptor_size,
- descriptor_version, virtual_map);
- efi_call_phys_epilog();
- return status;
-}
-
-static efi_status_t
-phys_efi_get_time(efi_time_t *tm, efi_time_cap_t *tc)
-{
- efi_status_t status;
-
- efi_call_phys_prelog();
- status = efi_call_phys(efi_phys.get_time, tm, tc);
- efi_call_phys_epilog();
- return status;
-}
-
-inline int efi_set_rtc_mmss(unsigned long nowtime)
-{
- int real_seconds, real_minutes;
- efi_status_t status;
- efi_time_t eft;
- efi_time_cap_t cap;
-
- spin_lock(&efi_rt_lock);
- status = efi.get_time(&eft, &cap);
- spin_unlock(&efi_rt_lock);
- if (status != EFI_SUCCESS)
- panic("Ooops, efitime: can't read time!\n");
- real_seconds = nowtime % 60;
- real_minutes = nowtime / 60;
-
- if (((abs(real_minutes - eft.minute) + 15)/30) & 1)
- real_minutes += 30;
- real_minutes %= 60;
-
- eft.minute = real_minutes;
- eft.second = real_seconds;
-
- if (status != EFI_SUCCESS) {
- printk("Ooops: efitime: can't read time!\n");
- return -1;
- }
- return 0;
-}
-/*
- * This should only be used during kernel init and before runtime
- * services have been remapped, therefore, we'll need to call in physical
- * mode. Note, this call isn't used later, so mark it __init.
- */
-inline unsigned long __init efi_get_time(void)
-{
- efi_status_t status;
- efi_time_t eft;
- efi_time_cap_t cap;
-
- status = phys_efi_get_time(&eft, &cap);
- if (status != EFI_SUCCESS)
- printk("Oops: efitime: can't read time status: 0x%lx\n",status);
-
- return mktime(eft.year, eft.month, eft.day, eft.hour,
- eft.minute, eft.second);
-}
-
-int is_available_memory(efi_memory_desc_t * md)
-{
- if (!(md->attribute & EFI_MEMORY_WB))
- return 0;
-
- switch (md->type) {
- case EFI_LOADER_CODE:
- case EFI_LOADER_DATA:
- case EFI_BOOT_SERVICES_CODE:
- case EFI_BOOT_SERVICES_DATA:
- case EFI_CONVENTIONAL_MEMORY:
- return 1;
- }
- return 0;
-}
-
-/*
- * We need to map the EFI memory map again after paging_init().
- */
-void __init efi_map_memmap(void)
-{
- memmap.map = NULL;
-
- memmap.map = bt_ioremap((unsigned long) memmap.phys_map,
- (memmap.nr_map * memmap.desc_size));
- if (memmap.map == NULL)
- printk(KERN_ERR PFX "Could not remap the EFI memmap!\n");
-
- memmap.map_end = memmap.map + (memmap.nr_map * memmap.desc_size);
-}
-
-#if EFI_DEBUG
-static void __init print_efi_memmap(void)
-{
- efi_memory_desc_t *md;
- void *p;
- int i;
-
- for (p = memmap.map, i = 0; p < memmap.map_end; p += memmap.desc_size, i++) {
- md = p;
- printk(KERN_INFO "mem%02u: type=%u, attr=0x%llx, "
- "range=[0x%016llx-0x%016llx) (%lluMB)\n",
- i, md->type, md->attribute, md->phys_addr,
- md->phys_addr + (md->num_pages << EFI_PAGE_SHIFT),
- (md->num_pages >> (20 - EFI_PAGE_SHIFT)));
- }
-}
-#endif /* EFI_DEBUG */
-
-/*
- * Walks the EFI memory map and calls CALLBACK once for each EFI
- * memory descriptor that has memory that is available for kernel use.
- */
-void efi_memmap_walk(efi_freemem_callback_t callback, void *arg)
-{
- int prev_valid = 0;
- struct range {
- unsigned long start;
- unsigned long end;
- } prev, curr;
- efi_memory_desc_t *md;
- unsigned long start, end;
- void *p;
-
- for (p = memmap.map; p < memmap.map_end; p += memmap.desc_size) {
- md = p;
-
- if ((md->num_pages == 0) || (!is_available_memory(md)))
- continue;
-
- curr.start = md->phys_addr;
- curr.end = curr.start + (md->num_pages << EFI_PAGE_SHIFT);
-
- if (!prev_valid) {
- prev = curr;
- prev_valid = 1;
- } else {
- if (curr.start < prev.start)
- printk(KERN_INFO PFX "Unordered memory map\n");
- if (prev.end == curr.start)
- prev.end = curr.end;
- else {
- start =
- (unsigned long) (PAGE_ALIGN(prev.start));
- end = (unsigned long) (prev.end & PAGE_MASK);
- if ((end > start)
- && (*callback) (start, end, arg) < 0)
- return;
- prev = curr;
- }
- }
- }
- if (prev_valid) {
- start = (unsigned long) PAGE_ALIGN(prev.start);
- end = (unsigned long) (prev.end & PAGE_MASK);
- if (end > start)
- (*callback) (start, end, arg);
- }
-}
-
-void __init efi_init(void)
-{
- efi_config_table_t *config_tables;
- efi_runtime_services_t *runtime;
- efi_char16_t *c16;
- char vendor[100] = "unknown";
- unsigned long num_config_tables;
- int i = 0;
-
- memset(&efi, 0, sizeof(efi) );
- memset(&efi_phys, 0, sizeof(efi_phys));
-
- efi_phys.systab = EFI_SYSTAB;
- memmap.phys_map = EFI_MEMMAP;
- memmap.nr_map = EFI_MEMMAP_SIZE/EFI_MEMDESC_SIZE;
- memmap.desc_version = EFI_MEMDESC_VERSION;
- memmap.desc_size = EFI_MEMDESC_SIZE;
-
- efi.systab = (efi_system_table_t *)
- boot_ioremap((unsigned long) efi_phys.systab,
- sizeof(efi_system_table_t));
- /*
- * Verify the EFI Table
- */
- if (efi.systab == NULL)
- printk(KERN_ERR PFX "Woah! Couldn't map the EFI system table.\n");
- if (efi.systab->hdr.signature != EFI_SYSTEM_TABLE_SIGNATURE)
- printk(KERN_ERR PFX "Woah! EFI system table signature incorrect\n");
- if ((efi.systab->hdr.revision ^ EFI_SYSTEM_TABLE_REVISION) >> 16 != 0)
- printk(KERN_ERR PFX
- "Warning: EFI system table major version mismatch: "
- "got %d.%02d, expected %d.%02d\n",
- efi.systab->hdr.revision >> 16,
- efi.systab->hdr.revision & 0xffff,
- EFI_SYSTEM_TABLE_REVISION >> 16,
- EFI_SYSTEM_TABLE_REVISION & 0xffff);
- /*
- * Grab some details from the system table
- */
- num_config_tables = efi.systab->nr_tables;
- config_tables = (efi_config_table_t *)efi.systab->tables;
- runtime = efi.systab->runtime;
-
- /*
- * Show what we know for posterity
- */
- c16 = (efi_char16_t *) boot_ioremap(efi.systab->fw_vendor, 2);
- if (c16) {
- for (i = 0; i < sizeof(vendor) && *c16; ++i)
- vendor[i] = *c16++;
- vendor[i] = '\0';
- } else
- printk(KERN_ERR PFX "Could not map the firmware vendor!\n");
-
- printk(KERN_INFO PFX "EFI v%u.%.02u by %s \n",
- efi.systab->hdr.revision >> 16,
- efi.systab->hdr.revision & 0xffff, vendor);
-
- /*
- * Let's see what config tables the firmware passed to us.
- */
- config_tables = (efi_config_table_t *)
- boot_ioremap((unsigned long) config_tables,
- num_config_tables * sizeof(efi_config_table_t));
-
- if (config_tables == NULL)
- printk(KERN_ERR PFX "Could not map EFI Configuration Table!\n");
-
- for (i = 0; i < num_config_tables; i++) {
- if (efi_guidcmp(config_tables[i].guid, MPS_TABLE_GUID) == 0) {
- efi.mps = (void *)config_tables[i].table;
- printk(KERN_INFO " MPS=0x%lx ", config_tables[i].table);
- } else
- if (efi_guidcmp(config_tables[i].guid, ACPI_20_TABLE_GUID) == 0) {
- efi.acpi20 = __va(config_tables[i].table);
- printk(KERN_INFO " ACPI 2.0=0x%lx ", config_tables[i].table);
- } else
- if (efi_guidcmp(config_tables[i].guid, ACPI_TABLE_GUID) == 0) {
- efi.acpi = __va(config_tables[i].table);
- printk(KERN_INFO " ACPI=0x%lx ", config_tables[i].table);
- } else
- if (efi_guidcmp(config_tables[i].guid, SMBIOS_TABLE_GUID) == 0) {
- efi.smbios = (void *) config_tables[i].table;
- printk(KERN_INFO " SMBIOS=0x%lx ", config_tables[i].table);
- } else
- if (efi_guidcmp(config_tables[i].guid, HCDP_TABLE_GUID) == 0) {
- efi.hcdp = (void *)config_tables[i].table;
- printk(KERN_INFO " HCDP=0x%lx ", config_tables[i].table);
- } else
- if (efi_guidcmp(config_tables[i].guid, UGA_IO_PROTOCOL_GUID) == 0) {
- efi.uga = (void *)config_tables[i].table;
- printk(KERN_INFO " UGA=0x%lx ", config_tables[i].table);
- }
- }
- printk("\n");
-
- /*
- * Check out the runtime services table. We need to map
- * the runtime services table so that we can grab the physical
- * address of several of the EFI runtime functions, needed to
- * set the firmware into virtual mode.
- */
-
- runtime = (efi_runtime_services_t *) boot_ioremap((unsigned long)
- runtime,
- sizeof(efi_runtime_services_t));
- if (runtime != NULL) {
- /*
- * We will only need *early* access to the following
- * two EFI runtime services before set_virtual_address_map
- * is invoked.
- */
- efi_phys.get_time = (efi_get_time_t *) runtime->get_time;
- efi_phys.set_virtual_address_map =
- (efi_set_virtual_address_map_t *)
- runtime->set_virtual_address_map;
- } else
- printk(KERN_ERR PFX "Could not map the runtime service table!\n");
-
- /* Map the EFI memory map for use until paging_init() */
- memmap.map = boot_ioremap((unsigned long) EFI_MEMMAP, EFI_MEMMAP_SIZE);
- if (memmap.map == NULL)
- printk(KERN_ERR PFX "Could not map the EFI memory map!\n");
-
- memmap.map_end = memmap.map + (memmap.nr_map * memmap.desc_size);
-
-#if EFI_DEBUG
- print_efi_memmap();
-#endif
-}
-
-static inline void __init check_range_for_systab(efi_memory_desc_t *md)
-{
- if (((unsigned long)md->phys_addr <= (unsigned long)efi_phys.systab) &&
- ((unsigned long)efi_phys.systab < md->phys_addr +
- ((unsigned long)md->num_pages << EFI_PAGE_SHIFT))) {
- unsigned long addr;
-
- addr = md->virt_addr - md->phys_addr +
- (unsigned long)efi_phys.systab;
- efi.systab = (efi_system_table_t *)addr;
- }
-}
-
-/*
- * This function will switch the EFI runtime services to virtual mode.
- * Essentially, look through the EFI memmap and map every region that
- * has the runtime attribute bit set in its memory descriptor and update
- * that memory descriptor with the virtual address obtained from ioremap().
- * This enables the runtime services to be called without having to
- * thunk back into physical mode for every invocation.
- */
-
-void __init efi_enter_virtual_mode(void)
-{
- efi_memory_desc_t *md;
- efi_status_t status;
- void *p;
-
- efi.systab = NULL;
-
- for (p = memmap.map; p < memmap.map_end; p += memmap.desc_size) {
- md = p;
-
- if (!(md->attribute & EFI_MEMORY_RUNTIME))
- continue;
-
- md->virt_addr = (unsigned long)ioremap(md->phys_addr,
- md->num_pages << EFI_PAGE_SHIFT);
- if (!(unsigned long)md->virt_addr) {
- printk(KERN_ERR PFX "ioremap of 0x%lX failed\n",
- (unsigned long)md->phys_addr);
- }
- /* update the virtual address of the EFI system table */
- check_range_for_systab(md);
- }
-
- if (!efi.systab)
- BUG();
-
- status = phys_efi_set_virtual_address_map(
- memmap.desc_size * memmap.nr_map,
- memmap.desc_size,
- memmap.desc_version,
- memmap.phys_map);
-
- if (status != EFI_SUCCESS) {
- printk (KERN_ALERT "You are screwed! "
- "Unable to switch EFI into virtual mode "
- "(status=%lx)\n", status);
- panic("EFI call to SetVirtualAddressMap() failed!");
- }
-
- /*
- * Now that EFI is in virtual mode, update the function
- * pointers in the runtime service table to the new virtual addresses.
- */
-
- efi.get_time = (efi_get_time_t *) efi.systab->runtime->get_time;
- efi.set_time = (efi_set_time_t *) efi.systab->runtime->set_time;
- efi.get_wakeup_time = (efi_get_wakeup_time_t *)
- efi.systab->runtime->get_wakeup_time;
- efi.set_wakeup_time = (efi_set_wakeup_time_t *)
- efi.systab->runtime->set_wakeup_time;
- efi.get_variable = (efi_get_variable_t *)
- efi.systab->runtime->get_variable;
- efi.get_next_variable = (efi_get_next_variable_t *)
- efi.systab->runtime->get_next_variable;
- efi.set_variable = (efi_set_variable_t *)
- efi.systab->runtime->set_variable;
- efi.get_next_high_mono_count = (efi_get_next_high_mono_count_t *)
- efi.systab->runtime->get_next_high_mono_count;
- efi.reset_system = (efi_reset_system_t *)
- efi.systab->runtime->reset_system;
-}
-
-void __init
-efi_initialize_iomem_resources(struct resource *code_resource,
- struct resource *data_resource)
-{
- struct resource *res;
- efi_memory_desc_t *md;
- void *p;
-
- for (p = memmap.map; p < memmap.map_end; p += memmap.desc_size) {
- md = p;
-
- if ((md->phys_addr + (md->num_pages << EFI_PAGE_SHIFT)) >
- 0x100000000ULL)
- continue;
- res = alloc_bootmem_low(sizeof(struct resource));
- switch (md->type) {
- case EFI_RESERVED_TYPE:
- res->name = "Reserved Memory";
- break;
- case EFI_LOADER_CODE:
- res->name = "Loader Code";
- break;
- case EFI_LOADER_DATA:
- res->name = "Loader Data";
- break;
- case EFI_BOOT_SERVICES_DATA:
- res->name = "BootServices Data";
- break;
- case EFI_BOOT_SERVICES_CODE:
- res->name = "BootServices Code";
- break;
- case EFI_RUNTIME_SERVICES_CODE:
- res->name = "Runtime Service Code";
- break;
- case EFI_RUNTIME_SERVICES_DATA:
- res->name = "Runtime Service Data";
- break;
- case EFI_CONVENTIONAL_MEMORY:
- res->name = "Conventional Memory";
- break;
- case EFI_UNUSABLE_MEMORY:
- res->name = "Unusable Memory";
- break;
- case EFI_ACPI_RECLAIM_MEMORY:
- res->name = "ACPI Reclaim";
- break;
- case EFI_ACPI_MEMORY_NVS:
- res->name = "ACPI NVS";
- break;
- case EFI_MEMORY_MAPPED_IO:
- res->name = "Memory Mapped IO";
- break;
- case EFI_MEMORY_MAPPED_IO_PORT_SPACE:
- res->name = "Memory Mapped IO Port Space";
- break;
- default:
- res->name = "Reserved";
- break;
- }
- res->start = md->phys_addr;
- res->end = res->start + ((md->num_pages << EFI_PAGE_SHIFT) - 1);
- res->flags = IORESOURCE_MEM | IORESOURCE_BUSY;
- if (request_resource(&iomem_resource, res) < 0)
- printk(KERN_ERR PFX "Failed to allocate res %s : 0x%lx-0x%lx\n",
- res->name, res->start, res->end);
- /*
- * We don't know which region contains kernel data so we try
- * it repeatedly and let the resource manager test it.
- */
- if (md->type == EFI_CONVENTIONAL_MEMORY) {
- request_resource(res, code_resource);
- request_resource(res, data_resource);
-#ifdef CONFIG_KEXEC
- request_resource(res, &crashk_res);
-#endif
- }
- }
-}
-
-/*
- * Convenience functions to obtain memory types and attributes
- */
-
-u32 efi_mem_type(unsigned long phys_addr)
-{
- efi_memory_desc_t *md;
- void *p;
-
- for (p = memmap.map; p < memmap.map_end; p += memmap.desc_size) {
- md = p;
- if ((md->phys_addr <= phys_addr) && (phys_addr <
- (md->phys_addr + (md-> num_pages << EFI_PAGE_SHIFT)) ))
- return md->type;
- }
- return 0;
-}
-
-u64 efi_mem_attributes(unsigned long phys_addr)
-{
- efi_memory_desc_t *md;
- void *p;
-
- for (p = memmap.map; p < memmap.map_end; p += memmap.desc_size) {
- md = p;
- if ((md->phys_addr <= phys_addr) && (phys_addr <
- (md->phys_addr + (md-> num_pages << EFI_PAGE_SHIFT)) ))
- return md->attribute;
- }
- return 0;
-}
diff --git a/arch/i386/kernel/efi_stub.S b/arch/i386/kernel/efi_stub.S
deleted file mode 100644
index 08c0312d9b6..00000000000
--- a/arch/i386/kernel/efi_stub.S
+++ /dev/null
@@ -1,124 +0,0 @@
-/*
- * EFI call stub for IA32.
- *
- * This stub allows us to make EFI calls in physical mode with interrupts
- * turned off.
- */
-
-#include <linux/config.h>
-#include <linux/linkage.h>
-#include <asm/page.h>
-#include <asm/pgtable.h>
-
-/*
- * efi_call_phys(void *, ...) is a function with variable parameters.
- * All the callers of this function assure that all the parameters are 4-bytes.
- */
-
-/*
- * In gcc calling convention, EBX, ESP, EBP, ESI and EDI are all callee save.
- * So we'd better save all of them at the beginning of this function and restore
- * at the end no matter how many we use, because we can not assure EFI runtime
- * service functions will comply with gcc calling convention, too.
- */
-
-.text
-ENTRY(efi_call_phys)
- /*
- * 0. The function can only be called in Linux kernel. So CS has been
- * set to 0x0010, DS and SS have been set to 0x0018. In EFI, I found
- * the values of these registers are the same. And, the corresponding
- * GDT entries are identical. So I will do nothing about segment reg
- * and GDT, but change GDT base register in prelog and epilog.
- */
-
- /*
- * 1. Now I am running with EIP = <physical address> + PAGE_OFFSET.
- * But to make it smoothly switch from virtual mode to flat mode.
- * The mapping of lower virtual memory has been created in prelog and
- * epilog.
- */
- movl $1f, %edx
- subl $__PAGE_OFFSET, %edx
- jmp *%edx
-1:
-
- /*
- * 2. Now on the top of stack is the return
- * address in the caller of efi_call_phys(), then parameter 1,
- * parameter 2, ..., param n. To make things easy, we save the return
- * address of efi_call_phys in a global variable.
- */
- popl %edx
- movl %edx, saved_return_addr
- /* get the function pointer into ECX*/
- popl %ecx
- movl %ecx, efi_rt_function_ptr
- movl $2f, %edx
- subl $__PAGE_OFFSET, %edx
- pushl %edx
-
- /*
- * 3. Clear PG bit in %CR0.
- */
- movl %cr0, %edx
- andl $0x7fffffff, %edx
- movl %edx, %cr0
- jmp 1f
-1:
-
- /*
- * 4. Adjust stack pointer.
- */
- subl $__PAGE_OFFSET, %esp
-
- /*
- * 5. Call the physical function.
- */
- jmp *%ecx
-
-2:
- /*
- * 6. After EFI runtime service returns, control will return to
- * following instruction. We'd better readjust stack pointer first.
- */
- addl $__PAGE_OFFSET, %esp
-
- /*
- * 7. Restore PG bit
- */
- movl %cr0, %edx
- orl $0x80000000, %edx
- movl %edx, %cr0
- jmp 1f
-1:
- /*
- * 8. Now restore the virtual mode from flat mode by
- * adding EIP with PAGE_OFFSET.
- */
- movl $1f, %edx
- jmp *%edx
-1:
-
- /*
- * 9. Balance the stack. And because EAX contain the return value,
- * we'd better not clobber it.
- */
- leal efi_rt_function_ptr, %edx
- movl (%edx), %ecx
- pushl %ecx
-
- /*
- * 10. Push the saved return address onto the stack and return.
- */
- leal saved_return_addr, %edx
- movl (%edx), %ecx
- pushl %ecx
- ret
-.previous
-
-.data
-saved_return_addr:
- .long 0
-efi_rt_function_ptr:
- .long 0
diff --git a/arch/i386/kernel/entry.S b/arch/i386/kernel/entry.S
deleted file mode 100644
index 9e24f7b207e..00000000000
--- a/arch/i386/kernel/entry.S
+++ /dev/null
@@ -1,663 +0,0 @@
-/*
- * linux/arch/i386/entry.S
- *
- * Copyright (C) 1991, 1992 Linus Torvalds
- */
-
-/*
- * entry.S contains the system-call and fault low-level handling routines.
- * This also contains the timer-interrupt handler, as well as all interrupts
- * and faults that can result in a task-switch.
- *
- * NOTE: This code handles signal-recognition, which happens every time
- * after a timer-interrupt and after each system call.
- *
- * I changed all the .align's to 4 (16 byte alignment), as that's faster
- * on a 486.
- *
- * Stack layout in 'ret_from_system_call':
- * ptrace needs to have all regs on the stack.
- * if the order here is changed, it needs to be
- * updated in fork.c:copy_process, signal.c:do_signal,
- * ptrace.c and ptrace.h
- *
- * 0(%esp) - %ebx
- * 4(%esp) - %ecx
- * 8(%esp) - %edx
- * C(%esp) - %esi
- * 10(%esp) - %edi
- * 14(%esp) - %ebp
- * 18(%esp) - %eax
- * 1C(%esp) - %ds
- * 20(%esp) - %es
- * 24(%esp) - orig_eax
- * 28(%esp) - %eip
- * 2C(%esp) - %cs
- * 30(%esp) - %eflags
- * 34(%esp) - %oldesp
- * 38(%esp) - %oldss
- *
- * "current" is in register %ebx during any slow entries.
- */
-
-#include <linux/config.h>
-#include <linux/linkage.h>
-#include <asm/thread_info.h>
-#include <asm/errno.h>
-#include <asm/segment.h>
-#include <asm/smp.h>
-#include <asm/page.h>
-#include <asm/desc.h>
-#include "irq_vectors.h"
-
-#define nr_syscalls ((syscall_table_size)/4)
-
-EBX = 0x00
-ECX = 0x04
-EDX = 0x08
-ESI = 0x0C
-EDI = 0x10
-EBP = 0x14
-EAX = 0x18
-DS = 0x1C
-ES = 0x20
-ORIG_EAX = 0x24
-EIP = 0x28
-CS = 0x2C
-EFLAGS = 0x30
-OLDESP = 0x34
-OLDSS = 0x38
-
-CF_MASK = 0x00000001
-TF_MASK = 0x00000100
-IF_MASK = 0x00000200
-DF_MASK = 0x00000400
-NT_MASK = 0x00004000
-VM_MASK = 0x00020000
-
-#ifdef CONFIG_PREEMPT
-#define preempt_stop cli
-#else
-#define preempt_stop
-#define resume_kernel restore_nocheck
-#endif
-
-#define SAVE_ALL \
- cld; \
- pushl %es; \
- pushl %ds; \
- pushl %eax; \
- pushl %ebp; \
- pushl %edi; \
- pushl %esi; \
- pushl %edx; \
- pushl %ecx; \
- pushl %ebx; \
- movl $(__USER_DS), %edx; \
- movl %edx, %ds; \
- movl %edx, %es;
-
-#define RESTORE_INT_REGS \
- popl %ebx; \
- popl %ecx; \
- popl %edx; \
- popl %esi; \
- popl %edi; \
- popl %ebp; \
- popl %eax
-
-#define RESTORE_REGS \
- RESTORE_INT_REGS; \
-1: popl %ds; \
-2: popl %es; \
-.section .fixup,"ax"; \
-3: movl $0,(%esp); \
- jmp 1b; \
-4: movl $0,(%esp); \
- jmp 2b; \
-.previous; \
-.section __ex_table,"a";\
- .align 4; \
- .long 1b,3b; \
- .long 2b,4b; \
-.previous
-
-
-ENTRY(ret_from_fork)
- pushl %eax
- call schedule_tail
- GET_THREAD_INFO(%ebp)
- popl %eax
- jmp syscall_exit
-
-/*
- * Return to user mode is not as complex as all this looks,
- * but we want the default path for a system call return to
- * go as quickly as possible which is why some of this is
- * less clear than it otherwise should be.
- */
-
- # userspace resumption stub bypassing syscall exit tracing
- ALIGN
-ret_from_exception:
- preempt_stop
-ret_from_intr:
- GET_THREAD_INFO(%ebp)
- movl EFLAGS(%esp), %eax # mix EFLAGS and CS
- movb CS(%esp), %al
- testl $(VM_MASK | 3), %eax
- jz resume_kernel
-ENTRY(resume_userspace)
- cli # make sure we don't miss an interrupt
- # setting need_resched or sigpending
- # between sampling and the iret
- movl TI_flags(%ebp), %ecx
- andl $_TIF_WORK_MASK, %ecx # is there any work to be done on
- # int/exception return?
- jne work_pending
- jmp restore_all
-
-#ifdef CONFIG_PREEMPT
-ENTRY(resume_kernel)
- cli
- cmpl $0,TI_preempt_count(%ebp) # non-zero preempt_count ?
- jnz restore_nocheck
-need_resched:
- movl TI_flags(%ebp), %ecx # need_resched set ?
- testb $_TIF_NEED_RESCHED, %cl
- jz restore_all
- testl $IF_MASK,EFLAGS(%esp) # interrupts off (exception path) ?
- jz restore_all
- call preempt_schedule_irq
- jmp need_resched
-#endif
-
-/* SYSENTER_RETURN points to after the "sysenter" instruction in
- the vsyscall page. See vsyscall-sysentry.S, which defines the symbol. */
-
- # sysenter call handler stub
-ENTRY(sysenter_entry)
- movl TSS_sysenter_esp0(%esp),%esp
-sysenter_past_esp:
- sti
- pushl $(__USER_DS)
- pushl %ebp
- pushfl
- pushl $(__USER_CS)
- pushl $SYSENTER_RETURN
-
-/*
- * Load the potential sixth argument from user stack.
- * Careful about security.
- */
- cmpl $__PAGE_OFFSET-3,%ebp
- jae syscall_fault
-1: movl (%ebp),%ebp
-.section __ex_table,"a"
- .align 4
- .long 1b,syscall_fault
-.previous
-
- pushl %eax
- SAVE_ALL
- GET_THREAD_INFO(%ebp)
-
- /* Note, _TIF_SECCOMP is bit number 8, and so it needs testw and not testb */
- testw $(_TIF_SYSCALL_EMU|_TIF_SYSCALL_TRACE|_TIF_SECCOMP|_TIF_SYSCALL_AUDIT),TI_flags(%ebp)
- jnz syscall_trace_entry
- cmpl $(nr_syscalls), %eax
- jae syscall_badsys
- call *sys_call_table(,%eax,4)
- movl %eax,EAX(%esp)
- cli
- movl TI_flags(%ebp), %ecx
- testw $_TIF_ALLWORK_MASK, %cx
- jne syscall_exit_work
-/* if something modifies registers it must also disable sysexit */
- movl EIP(%esp), %edx
- movl OLDESP(%esp), %ecx
- xorl %ebp,%ebp
- sti
- sysexit
-
-
- # system call handler stub
-ENTRY(system_call)
- pushl %eax # save orig_eax
- SAVE_ALL
- GET_THREAD_INFO(%ebp)
- # system call tracing in operation / emulation
- /* Note, _TIF_SECCOMP is bit number 8, and so it needs testw and not testb */
- testw $(_TIF_SYSCALL_EMU|_TIF_SYSCALL_TRACE|_TIF_SECCOMP|_TIF_SYSCALL_AUDIT),TI_flags(%ebp)
- jnz syscall_trace_entry
- cmpl $(nr_syscalls), %eax
- jae syscall_badsys
-syscall_call:
- call *sys_call_table(,%eax,4)
- movl %eax,EAX(%esp) # store the return value
-syscall_exit:
- cli # make sure we don't miss an interrupt
- # setting need_resched or sigpending
- # between sampling and the iret
- movl TI_flags(%ebp), %ecx
- testw $_TIF_ALLWORK_MASK, %cx # current->work
- jne syscall_exit_work
-
-restore_all:
- movl EFLAGS(%esp), %eax # mix EFLAGS, SS and CS
- # Warning: OLDSS(%esp) contains the wrong/random values if we
- # are returning to the kernel.
- # See comments in process.c:copy_thread() for details.
- movb OLDSS(%esp), %ah
- movb CS(%esp), %al
- andl $(VM_MASK | (4 << 8) | 3), %eax
- cmpl $((4 << 8) | 3), %eax
- je ldt_ss # returning to user-space with LDT SS
-restore_nocheck:
- RESTORE_REGS
- addl $4, %esp
-1: iret
-.section .fixup,"ax"
-iret_exc:
- sti
- pushl $0 # no error code
- pushl $do_iret_error
- jmp error_code
-.previous
-.section __ex_table,"a"
- .align 4
- .long 1b,iret_exc
-.previous
-
-ldt_ss:
- larl OLDSS(%esp), %eax
- jnz restore_nocheck
- testl $0x00400000, %eax # returning to 32bit stack?
- jnz restore_nocheck # allright, normal return
- /* If returning to userspace with 16bit stack,
- * try to fix the higher word of ESP, as the CPU
- * won't restore it.
- * This is an "official" bug of all the x86-compatible
- * CPUs, which we can try to work around to make
- * dosemu and wine happy. */
- subl $8, %esp # reserve space for switch16 pointer
- cli
- movl %esp, %eax
- /* Set up the 16bit stack frame with switch32 pointer on top,
- * and a switch16 pointer on top of the current frame. */
- call setup_x86_bogus_stack
- RESTORE_REGS
- lss 20+4(%esp), %esp # switch to 16bit stack
-1: iret
-.section __ex_table,"a"
- .align 4
- .long 1b,iret_exc
-.previous
-
- # perform work that needs to be done immediately before resumption
- ALIGN
-work_pending:
- testb $_TIF_NEED_RESCHED, %cl
- jz work_notifysig
-work_resched:
- call schedule
- cli # make sure we don't miss an interrupt
- # setting need_resched or sigpending
- # between sampling and the iret
- movl TI_flags(%ebp), %ecx
- andl $_TIF_WORK_MASK, %ecx # is there any work to be done other
- # than syscall tracing?
- jz restore_all
- testb $_TIF_NEED_RESCHED, %cl
- jnz work_resched
-
-work_notifysig: # deal with pending signals and
- # notify-resume requests
- testl $VM_MASK, EFLAGS(%esp)
- movl %esp, %eax
- jne work_notifysig_v86 # returning to kernel-space or
- # vm86-space
- xorl %edx, %edx
- call do_notify_resume
- jmp resume_userspace
-
- ALIGN
-work_notifysig_v86:
- pushl %ecx # save ti_flags for do_notify_resume
- call save_v86_state # %eax contains pt_regs pointer
- popl %ecx
- movl %eax, %esp
- xorl %edx, %edx
- call do_notify_resume
- jmp resume_userspace
-
- # perform syscall exit tracing
- ALIGN
-syscall_trace_entry:
- movl $-ENOSYS,EAX(%esp)
- movl %esp, %eax
- xorl %edx,%edx
- call do_syscall_trace
- cmpl $0, %eax
- jne resume_userspace # ret != 0 -> running under PTRACE_SYSEMU,
- # so must skip actual syscall
- movl ORIG_EAX(%esp), %eax
- cmpl $(nr_syscalls), %eax
- jnae syscall_call
- jmp syscall_exit
-
- # perform syscall exit tracing
- ALIGN
-syscall_exit_work:
- testb $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SINGLESTEP), %cl
- jz work_pending
- sti # could let do_syscall_trace() call
- # schedule() instead
- movl %esp, %eax
- movl $1, %edx
- call do_syscall_trace
- jmp resume_userspace
-
- ALIGN
-syscall_fault:
- pushl %eax # save orig_eax
- SAVE_ALL
- GET_THREAD_INFO(%ebp)
- movl $-EFAULT,EAX(%esp)
- jmp resume_userspace
-
- ALIGN
-syscall_badsys:
- movl $-ENOSYS,EAX(%esp)
- jmp resume_userspace
-
-#define FIXUP_ESPFIX_STACK \
- movl %esp, %eax; \
- /* switch to 32bit stack using the pointer on top of 16bit stack */ \
- lss %ss:CPU_16BIT_STACK_SIZE-8, %esp; \
- /* copy data from 16bit stack to 32bit stack */ \
- call fixup_x86_bogus_stack; \
- /* put ESP to the proper location */ \
- movl %eax, %esp;
-#define UNWIND_ESPFIX_STACK \
- pushl %eax; \
- movl %ss, %eax; \
- /* see if on 16bit stack */ \
- cmpw $__ESPFIX_SS, %ax; \
- jne 28f; \
- movl $__KERNEL_DS, %edx; \
- movl %edx, %ds; \
- movl %edx, %es; \
- /* switch to 32bit stack */ \
- FIXUP_ESPFIX_STACK \
-28: popl %eax;
-
-/*
- * Build the entry stubs and pointer table with
- * some assembler magic.
- */
-.data
-ENTRY(interrupt)
-.text
-
-vector=0
-ENTRY(irq_entries_start)
-.rept NR_IRQS
- ALIGN
-1: pushl $vector-256
- jmp common_interrupt
-.data
- .long 1b
-.text
-vector=vector+1
-.endr
-
- ALIGN
-common_interrupt:
- SAVE_ALL
- movl %esp,%eax
- call do_IRQ
- jmp ret_from_intr
-
-#define BUILD_INTERRUPT(name, nr) \
-ENTRY(name) \
- pushl $nr-256; \
- SAVE_ALL \
- movl %esp,%eax; \
- call smp_/**/name; \
- jmp ret_from_intr;
-
-/* The include is where all of the SMP etc. interrupts come from */
-#include "entry_arch.h"
-
-ENTRY(divide_error)
- pushl $0 # no error code
- pushl $do_divide_error
- ALIGN
-error_code:
- pushl %ds
- pushl %eax
- xorl %eax, %eax
- pushl %ebp
- pushl %edi
- pushl %esi
- pushl %edx
- decl %eax # eax = -1
- pushl %ecx
- pushl %ebx
- cld
- pushl %es
- UNWIND_ESPFIX_STACK
- popl %ecx
- movl ES(%esp), %edi # get the function address
- movl ORIG_EAX(%esp), %edx # get the error code
- movl %eax, ORIG_EAX(%esp)
- movl %ecx, ES(%esp)
- movl $(__USER_DS), %ecx
- movl %ecx, %ds
- movl %ecx, %es
- movl %esp,%eax # pt_regs pointer
- call *%edi
- jmp ret_from_exception
-
-ENTRY(coprocessor_error)
- pushl $0
- pushl $do_coprocessor_error
- jmp error_code
-
-ENTRY(simd_coprocessor_error)
- pushl $0
- pushl $do_simd_coprocessor_error
- jmp error_code
-
-ENTRY(device_not_available)
- pushl $-1 # mark this as an int
- SAVE_ALL
- movl %cr0, %eax
- testl $0x4, %eax # EM (math emulation bit)
- jne device_not_available_emulate
- preempt_stop
- call math_state_restore
- jmp ret_from_exception
-device_not_available_emulate:
- pushl $0 # temporary storage for ORIG_EIP
- call math_emulate
- addl $4, %esp
- jmp ret_from_exception
-
-/*
- * Debug traps and NMI can happen at the one SYSENTER instruction
- * that sets up the real kernel stack. Check here, since we can't
- * allow the wrong stack to be used.
- *
- * "TSS_sysenter_esp0+12" is because the NMI/debug handler will have
- * already pushed 3 words if it hits on the sysenter instruction:
- * eflags, cs and eip.
- *
- * We just load the right stack, and push the three (known) values
- * by hand onto the new stack - while updating the return eip past
- * the instruction that would have done it for sysenter.
- */
-#define FIX_STACK(offset, ok, label) \
- cmpw $__KERNEL_CS,4(%esp); \
- jne ok; \
-label: \
- movl TSS_sysenter_esp0+offset(%esp),%esp; \
- pushfl; \
- pushl $__KERNEL_CS; \
- pushl $sysenter_past_esp
-
-KPROBE_ENTRY(debug)
- cmpl $sysenter_entry,(%esp)
- jne debug_stack_correct
- FIX_STACK(12, debug_stack_correct, debug_esp_fix_insn)
-debug_stack_correct:
- pushl $-1 # mark this as an int
- SAVE_ALL
- xorl %edx,%edx # error code 0
- movl %esp,%eax # pt_regs pointer
- call do_debug
- jmp ret_from_exception
- .previous .text
-/*
- * NMI is doubly nasty. It can happen _while_ we're handling
- * a debug fault, and the debug fault hasn't yet been able to
- * clear up the stack. So we first check whether we got an
- * NMI on the sysenter entry path, but after that we need to
- * check whether we got an NMI on the debug path where the debug
- * fault happened on the sysenter path.
- */
-ENTRY(nmi)
- pushl %eax
- movl %ss, %eax
- cmpw $__ESPFIX_SS, %ax
- popl %eax
- je nmi_16bit_stack
- cmpl $sysenter_entry,(%esp)
- je nmi_stack_fixup
- pushl %eax
- movl %esp,%eax
- /* Do not access memory above the end of our stack page,
- * it might not exist.
- */
- andl $(THREAD_SIZE-1),%eax
- cmpl $(THREAD_SIZE-20),%eax
- popl %eax
- jae nmi_stack_correct
- cmpl $sysenter_entry,12(%esp)
- je nmi_debug_stack_check
-nmi_stack_correct:
- pushl %eax
- SAVE_ALL
- xorl %edx,%edx # zero error code
- movl %esp,%eax # pt_regs pointer
- call do_nmi
- jmp restore_all
-
-nmi_stack_fixup:
- FIX_STACK(12,nmi_stack_correct, 1)
- jmp nmi_stack_correct
-nmi_debug_stack_check:
- cmpw $__KERNEL_CS,16(%esp)
- jne nmi_stack_correct
- cmpl $debug - 1,(%esp)
- jle nmi_stack_correct
- cmpl $debug_esp_fix_insn,(%esp)
- jle nmi_debug_stack_fixup
-nmi_debug_stack_fixup:
- FIX_STACK(24,nmi_stack_correct, 1)
- jmp nmi_stack_correct
-
-nmi_16bit_stack:
- /* create the pointer to lss back */
- pushl %ss
- pushl %esp
- movzwl %sp, %esp
- addw $4, (%esp)
- /* copy the iret frame of 12 bytes */
- .rept 3
- pushl 16(%esp)
- .endr
- pushl %eax
- SAVE_ALL
- FIXUP_ESPFIX_STACK # %eax == %esp
- xorl %edx,%edx # zero error code
- call do_nmi
- RESTORE_REGS
- lss 12+4(%esp), %esp # back to 16bit stack
-1: iret
-.section __ex_table,"a"
- .align 4
- .long 1b,iret_exc
-.previous
-
-KPROBE_ENTRY(int3)
- pushl $-1 # mark this as an int
- SAVE_ALL
- xorl %edx,%edx # zero error code
- movl %esp,%eax # pt_regs pointer
- call do_int3
- jmp ret_from_exception
- .previous .text
-
-ENTRY(overflow)
- pushl $0
- pushl $do_overflow
- jmp error_code
-
-ENTRY(bounds)
- pushl $0
- pushl $do_bounds
- jmp error_code
-
-ENTRY(invalid_op)
- pushl $0
- pushl $do_invalid_op
- jmp error_code
-
-ENTRY(coprocessor_segment_overrun)
- pushl $0
- pushl $do_coprocessor_segment_overrun
- jmp error_code
-
-ENTRY(invalid_TSS)
- pushl $do_invalid_TSS
- jmp error_code
-
-ENTRY(segment_not_present)
- pushl $do_segment_not_present
- jmp error_code
-
-ENTRY(stack_segment)
- pushl $do_stack_segment
- jmp error_code
-
-KPROBE_ENTRY(general_protection)
- pushl $do_general_protection
- jmp error_code
- .previous .text
-
-ENTRY(alignment_check)
- pushl $do_alignment_check
- jmp error_code
-
-KPROBE_ENTRY(page_fault)
- pushl $do_page_fault
- jmp error_code
- .previous .text
-
-#ifdef CONFIG_X86_MCE
-ENTRY(machine_check)
- pushl $0
- pushl machine_check_vector
- jmp error_code
-#endif
-
-ENTRY(spurious_interrupt_bug)
- pushl $0
- pushl $do_spurious_interrupt_bug
- jmp error_code
-
-#include "syscall_table.S"
-
-syscall_table_size=(.-sys_call_table)
diff --git a/arch/i386/kernel/head.S b/arch/i386/kernel/head.S
deleted file mode 100644
index e437fb36749..00000000000
--- a/arch/i386/kernel/head.S
+++ /dev/null
@@ -1,527 +0,0 @@
-/*
- * linux/arch/i386/kernel/head.S -- the 32-bit startup code.
- *
- * Copyright (C) 1991, 1992 Linus Torvalds
- *
- * Enhanced CPU detection and feature setting code by Mike Jagdis
- * and Martin Mares, November 1997.
- */
-
-.text
-#include <linux/config.h>
-#include <linux/threads.h>
-#include <linux/linkage.h>
-#include <asm/segment.h>
-#include <asm/page.h>
-#include <asm/pgtable.h>
-#include <asm/desc.h>
-#include <asm/cache.h>
-#include <asm/thread_info.h>
-#include <asm/asm-offsets.h>
-#include <asm/setup.h>
-
-/*
- * References to members of the new_cpu_data structure.
- */
-
-#define X86 new_cpu_data+CPUINFO_x86
-#define X86_VENDOR new_cpu_data+CPUINFO_x86_vendor
-#define X86_MODEL new_cpu_data+CPUINFO_x86_model
-#define X86_MASK new_cpu_data+CPUINFO_x86_mask
-#define X86_HARD_MATH new_cpu_data+CPUINFO_hard_math
-#define X86_CPUID new_cpu_data+CPUINFO_cpuid_level
-#define X86_CAPABILITY new_cpu_data+CPUINFO_x86_capability
-#define X86_VENDOR_ID new_cpu_data+CPUINFO_x86_vendor_id
-
-/*
- * This is how much memory *in addition to the memory covered up to
- * and including _end* we need mapped initially. We need one bit for
- * each possible page, but only in low memory, which means
- * 2^32/4096/8 = 128K worst case (4G/4G split.)
- *
- * Modulo rounding, each megabyte assigned here requires a kilobyte of
- * memory, which is currently unreclaimed.
- *
- * This should be a multiple of a page.
- */
-#define INIT_MAP_BEYOND_END (128*1024)
-
-
-/*
- * 32-bit kernel entrypoint; only used by the boot CPU. On entry,
- * %esi points to the real-mode code as a 32-bit pointer.
- * CS and DS must be 4 GB flat segments, but we don't depend on
- * any particular GDT layout, because we load our own as soon as we
- * can.
- */
-ENTRY(startup_32)
-
-/*
- * Set segments to known values.
- */
- cld
- lgdt boot_gdt_descr - __PAGE_OFFSET
- movl $(__BOOT_DS),%eax
- movl %eax,%ds
- movl %eax,%es
- movl %eax,%fs
- movl %eax,%gs
-
-/*
- * Clear BSS first so that there are no surprises...
- * No need to cld as DF is already clear from cld above...
- */
- xorl %eax,%eax
- movl $__bss_start - __PAGE_OFFSET,%edi
- movl $__bss_stop - __PAGE_OFFSET,%ecx
- subl %edi,%ecx
- shrl $2,%ecx
- rep ; stosl
-/*
- * Copy bootup parameters out of the way.
- * Note: %esi still has the pointer to the real-mode data.
- * With the kexec as boot loader, parameter segment might be loaded beyond
- * kernel image and might not even be addressable by early boot page tables.
- * (kexec on panic case). Hence copy out the parameters before initializing
- * page tables.
- */
- movl $(boot_params - __PAGE_OFFSET),%edi
- movl $(PARAM_SIZE/4),%ecx
- cld
- rep
- movsl
- movl boot_params - __PAGE_OFFSET + NEW_CL_POINTER,%esi
- andl %esi,%esi
- jnz 2f # New command line protocol
- cmpw $(OLD_CL_MAGIC),OLD_CL_MAGIC_ADDR
- jne 1f
- movzwl OLD_CL_OFFSET,%esi
- addl $(OLD_CL_BASE_ADDR),%esi
-2:
- movl $(saved_command_line - __PAGE_OFFSET),%edi
- movl $(COMMAND_LINE_SIZE/4),%ecx
- rep
- movsl
-1:
-
-/*
- * Initialize page tables. This creates a PDE and a set of page
- * tables, which are located immediately beyond _end. The variable
- * init_pg_tables_end is set up to point to the first "safe" location.
- * Mappings are created both at virtual address 0 (identity mapping)
- * and PAGE_OFFSET for up to _end+sizeof(page tables)+INIT_MAP_BEYOND_END.
- *
- * Warning: don't use %esi or the stack in this code. However, %esp
- * can be used as a GPR if you really need it...
- */
-page_pde_offset = (__PAGE_OFFSET >> 20);
-
- movl $(pg0 - __PAGE_OFFSET), %edi
- movl $(swapper_pg_dir - __PAGE_OFFSET), %edx
- movl $0x007, %eax /* 0x007 = PRESENT+RW+USER */
-10:
- leal 0x007(%edi),%ecx /* Create PDE entry */
- movl %ecx,(%edx) /* Store identity PDE entry */
- movl %ecx,page_pde_offset(%edx) /* Store kernel PDE entry */
- addl $4,%edx
- movl $1024, %ecx
-11:
- stosl
- addl $0x1000,%eax
- loop 11b
- /* End condition: we must map up to and including INIT_MAP_BEYOND_END */
- /* bytes beyond the end of our own page tables; the +0x007 is the attribute bits */
- leal (INIT_MAP_BEYOND_END+0x007)(%edi),%ebp
- cmpl %ebp,%eax
- jb 10b
- movl %edi,(init_pg_tables_end - __PAGE_OFFSET)
-
-#ifdef CONFIG_SMP
- xorl %ebx,%ebx /* This is the boot CPU (BSP) */
- jmp 3f
-
-/*
- * Non-boot CPU entry point; entered from trampoline.S
- * We can't lgdt here, because lgdt itself uses a data segment, but
- * we know the trampoline has already loaded the boot_gdt_table GDT
- * for us.
- */
-ENTRY(startup_32_smp)
- cld
- movl $(__BOOT_DS),%eax
- movl %eax,%ds
- movl %eax,%es
- movl %eax,%fs
- movl %eax,%gs
-
-/*
- * New page tables may be in 4Mbyte page mode and may
- * be using the global pages.
- *
- * NOTE! If we are on a 486 we may have no cr4 at all!
- * So we do not try to touch it unless we really have
- * some bits in it to set. This won't work if the BSP
- * implements cr4 but this AP does not -- very unlikely
- * but be warned! The same applies to the pse feature
- * if not equally supported. --macro
- *
- * NOTE! We have to correct for the fact that we're
- * not yet offset PAGE_OFFSET..
- */
-#define cr4_bits mmu_cr4_features-__PAGE_OFFSET
- movl cr4_bits,%edx
- andl %edx,%edx
- jz 6f
- movl %cr4,%eax # Turn on paging options (PSE,PAE,..)
- orl %edx,%eax
- movl %eax,%cr4
-
- btl $5, %eax # check if PAE is enabled
- jnc 6f
-
- /* Check if extended functions are implemented */
- movl $0x80000000, %eax
- cpuid
- cmpl $0x80000000, %eax
- jbe 6f
- mov $0x80000001, %eax
- cpuid
- /* Execute Disable bit supported? */
- btl $20, %edx
- jnc 6f
-
- /* Setup EFER (Extended Feature Enable Register) */
- movl $0xc0000080, %ecx
- rdmsr
-
- btsl $11, %eax
- /* Make changes effective */
- wrmsr
-
-6:
- /* This is a secondary processor (AP) */
- xorl %ebx,%ebx
- incl %ebx
-
-3:
-#endif /* CONFIG_SMP */
-
-/*
- * Enable paging
- */
- movl $swapper_pg_dir-__PAGE_OFFSET,%eax
- movl %eax,%cr3 /* set the page table pointer.. */
- movl %cr0,%eax
- orl $0x80000000,%eax
- movl %eax,%cr0 /* ..and set paging (PG) bit */
- ljmp $__BOOT_CS,$1f /* Clear prefetch and normalize %eip */
-1:
- /* Set up the stack pointer */
- lss stack_start,%esp
-
-/*
- * Initialize eflags. Some BIOS's leave bits like NT set. This would
- * confuse the debugger if this code is traced.
- * XXX - best to initialize before switching to protected mode.
- */
- pushl $0
- popfl
-
-#ifdef CONFIG_SMP
- andl %ebx,%ebx
- jz 1f /* Initial CPU cleans BSS */
- jmp checkCPUtype
-1:
-#endif /* CONFIG_SMP */
-
-/*
- * start system 32-bit setup. We need to re-do some of the things done
- * in 16-bit mode for the "real" operations.
- */
- call setup_idt
-
-checkCPUtype:
-
- movl $-1,X86_CPUID # -1 for no CPUID initially
-
-/* check if it is 486 or 386. */
-/*
- * XXX - this does a lot of unnecessary setup. Alignment checks don't
- * apply at our cpl of 0 and the stack ought to be aligned already, and
- * we don't need to preserve eflags.
- */
-
- movb $3,X86 # at least 386
- pushfl # push EFLAGS
- popl %eax # get EFLAGS
- movl %eax,%ecx # save original EFLAGS
- xorl $0x240000,%eax # flip AC and ID bits in EFLAGS
- pushl %eax # copy to EFLAGS
- popfl # set EFLAGS
- pushfl # get new EFLAGS
- popl %eax # put it in eax
- xorl %ecx,%eax # change in flags
- pushl %ecx # restore original EFLAGS
- popfl
- testl $0x40000,%eax # check if AC bit changed
- je is386
-
- movb $4,X86 # at least 486
- testl $0x200000,%eax # check if ID bit changed
- je is486
-
- /* get vendor info */
- xorl %eax,%eax # call CPUID with 0 -> return vendor ID
- cpuid
- movl %eax,X86_CPUID # save CPUID level
- movl %ebx,X86_VENDOR_ID # lo 4 chars
- movl %edx,X86_VENDOR_ID+4 # next 4 chars
- movl %ecx,X86_VENDOR_ID+8 # last 4 chars
-
- orl %eax,%eax # do we have processor info as well?
- je is486
-
- movl $1,%eax # Use the CPUID instruction to get CPU type
- cpuid
- movb %al,%cl # save reg for future use
- andb $0x0f,%ah # mask processor family
- movb %ah,X86
- andb $0xf0,%al # mask model
- shrb $4,%al
- movb %al,X86_MODEL
- andb $0x0f,%cl # mask mask revision
- movb %cl,X86_MASK
- movl %edx,X86_CAPABILITY
-
-is486: movl $0x50022,%ecx # set AM, WP, NE and MP
- jmp 2f
-
-is386: movl $2,%ecx # set MP
-2: movl %cr0,%eax
- andl $0x80000011,%eax # Save PG,PE,ET
- orl %ecx,%eax
- movl %eax,%cr0
-
- call check_x87
- lgdt cpu_gdt_descr
- lidt idt_descr
- ljmp $(__KERNEL_CS),$1f
-1: movl $(__KERNEL_DS),%eax # reload all the segment registers
- movl %eax,%ss # after changing gdt.
-
- movl $(__USER_DS),%eax # DS/ES contains default USER segment
- movl %eax,%ds
- movl %eax,%es
-
- xorl %eax,%eax # Clear FS/GS and LDT
- movl %eax,%fs
- movl %eax,%gs
- lldt %ax
- cld # gcc2 wants the direction flag cleared at all times
-#ifdef CONFIG_SMP
- movb ready, %cl
- movb $1, ready
- cmpb $0,%cl
- je 1f # the first CPU calls start_kernel
- # all other CPUs call initialize_secondary
- call initialize_secondary
- jmp L6
-1:
-#endif /* CONFIG_SMP */
- call start_kernel
-L6:
- jmp L6 # main should never return here, but
- # just in case, we know what happens.
-
-/*
- * We depend on ET to be correct. This checks for 287/387.
- */
-check_x87:
- movb $0,X86_HARD_MATH
- clts
- fninit
- fstsw %ax
- cmpb $0,%al
- je 1f
- movl %cr0,%eax /* no coprocessor: have to set bits */
- xorl $4,%eax /* set EM */
- movl %eax,%cr0
- ret
- ALIGN
-1: movb $1,X86_HARD_MATH
- .byte 0xDB,0xE4 /* fsetpm for 287, ignored by 387 */
- ret
-
-/*
- * setup_idt
- *
- * sets up a idt with 256 entries pointing to
- * ignore_int, interrupt gates. It doesn't actually load
- * idt - that can be done only after paging has been enabled
- * and the kernel moved to PAGE_OFFSET. Interrupts
- * are enabled elsewhere, when we can be relatively
- * sure everything is ok.
- *
- * Warning: %esi is live across this function.
- */
-setup_idt:
- lea ignore_int,%edx
- movl $(__KERNEL_CS << 16),%eax
- movw %dx,%ax /* selector = 0x0010 = cs */
- movw $0x8E00,%dx /* interrupt gate - dpl=0, present */
-
- lea idt_table,%edi
- mov $256,%ecx
-rp_sidt:
- movl %eax,(%edi)
- movl %edx,4(%edi)
- addl $8,%edi
- dec %ecx
- jne rp_sidt
- ret
-
-/* This is the default interrupt "handler" :-) */
- ALIGN
-ignore_int:
- cld
-#ifdef CONFIG_PRINTK
- pushl %eax
- pushl %ecx
- pushl %edx
- pushl %es
- pushl %ds
- movl $(__KERNEL_DS),%eax
- movl %eax,%ds
- movl %eax,%es
- pushl 16(%esp)
- pushl 24(%esp)
- pushl 32(%esp)
- pushl 40(%esp)
- pushl $int_msg
- call printk
- addl $(5*4),%esp
- popl %ds
- popl %es
- popl %edx
- popl %ecx
- popl %eax
-#endif
- iret
-
-/*
- * Real beginning of normal "text" segment
- */
-ENTRY(stext)
-ENTRY(_stext)
-
-/*
- * BSS section
- */
-.section ".bss.page_aligned","w"
-ENTRY(swapper_pg_dir)
- .fill 1024,4,0
-ENTRY(empty_zero_page)
- .fill 4096,1,0
-
-/*
- * This starts the data section.
- */
-.data
-
-ENTRY(stack_start)
- .long init_thread_union+THREAD_SIZE
- .long __BOOT_DS
-
-ready: .byte 0
-
-int_msg:
- .asciz "Unknown interrupt or fault at EIP %p %p %p\n"
-
-/*
- * The IDT and GDT 'descriptors' are a strange 48-bit object
- * only used by the lidt and lgdt instructions. They are not
- * like usual segment descriptors - they consist of a 16-bit
- * segment size, and 32-bit linear address value:
- */
-
-.globl boot_gdt_descr
-.globl idt_descr
-.globl cpu_gdt_descr
-
- ALIGN
-# early boot GDT descriptor (must use 1:1 address mapping)
- .word 0 # 32 bit align gdt_desc.address
-boot_gdt_descr:
- .word __BOOT_DS+7
- .long boot_gdt_table - __PAGE_OFFSET
-
- .word 0 # 32-bit align idt_desc.address
-idt_descr:
- .word IDT_ENTRIES*8-1 # idt contains 256 entries
- .long idt_table
-
-# boot GDT descriptor (later on used by CPU#0):
- .word 0 # 32 bit align gdt_desc.address
-cpu_gdt_descr:
- .word GDT_ENTRIES*8-1
- .long cpu_gdt_table
-
- .fill NR_CPUS-1,8,0 # space for the other GDT descriptors
-
-/*
- * The boot_gdt_table must mirror the equivalent in setup.S and is
- * used only for booting.
- */
- .align L1_CACHE_BYTES
-ENTRY(boot_gdt_table)
- .fill GDT_ENTRY_BOOT_CS,8,0
- .quad 0x00cf9a000000ffff /* kernel 4GB code at 0x00000000 */
- .quad 0x00cf92000000ffff /* kernel 4GB data at 0x00000000 */
-
-/*
- * The Global Descriptor Table contains 28 quadwords, per-CPU.
- */
- .align PAGE_SIZE_asm
-ENTRY(cpu_gdt_table)
- .quad 0x0000000000000000 /* NULL descriptor */
- .quad 0x0000000000000000 /* 0x0b reserved */
- .quad 0x0000000000000000 /* 0x13 reserved */
- .quad 0x0000000000000000 /* 0x1b reserved */
- .quad 0x0000000000000000 /* 0x20 unused */
- .quad 0x0000000000000000 /* 0x28 unused */
- .quad 0x0000000000000000 /* 0x33 TLS entry 1 */
- .quad 0x0000000000000000 /* 0x3b TLS entry 2 */
- .quad 0x0000000000000000 /* 0x43 TLS entry 3 */
- .quad 0x0000000000000000 /* 0x4b reserved */
- .quad 0x0000000000000000 /* 0x53 reserved */
- .quad 0x0000000000000000 /* 0x5b reserved */
-
- .quad 0x00cf9a000000ffff /* 0x60 kernel 4GB code at 0x00000000 */
- .quad 0x00cf92000000ffff /* 0x68 kernel 4GB data at 0x00000000 */
- .quad 0x00cffa000000ffff /* 0x73 user 4GB code at 0x00000000 */
- .quad 0x00cff2000000ffff /* 0x7b user 4GB data at 0x00000000 */
-
- .quad 0x0000000000000000 /* 0x80 TSS descriptor */
- .quad 0x0000000000000000 /* 0x88 LDT descriptor */
-
- /* Segments used for calling PnP BIOS */
- .quad 0x00c09a0000000000 /* 0x90 32-bit code */
- .quad 0x00809a0000000000 /* 0x98 16-bit code */
- .quad 0x0080920000000000 /* 0xa0 16-bit data */
- .quad 0x0080920000000000 /* 0xa8 16-bit data */
- .quad 0x0080920000000000 /* 0xb0 16-bit data */
- /*
- * The APM segments have byte granularity and their bases
- * and limits are set at run time.
- */
- .quad 0x00409a0000000000 /* 0xb8 APM CS code */
- .quad 0x00009a0000000000 /* 0xc0 APM CS 16 code (16 bit) */
- .quad 0x0040920000000000 /* 0xc8 APM DS data */
-
- .quad 0x0000920000000000 /* 0xd0 - ESPFIX 16-bit SS */
- .quad 0x0000000000000000 /* 0xd8 - unused */
- .quad 0x0000000000000000 /* 0xe0 - unused */
- .quad 0x0000000000000000 /* 0xe8 - unused */
- .quad 0x0000000000000000 /* 0xf0 - unused */
- .quad 0x0000000000000000 /* 0xf8 - GDT entry 31: double-fault TSS */
-
diff --git a/arch/i386/kernel/i386_ksyms.c b/arch/i386/kernel/i386_ksyms.c
deleted file mode 100644
index 180f070d03c..00000000000
--- a/arch/i386/kernel/i386_ksyms.c
+++ /dev/null
@@ -1,35 +0,0 @@
-#include <linux/config.h>
-#include <linux/module.h>
-#include <asm/checksum.h>
-#include <asm/desc.h>
-
-/* This is definitely a GPL-only symbol */
-EXPORT_SYMBOL_GPL(cpu_gdt_table);
-
-EXPORT_SYMBOL(__down_failed);
-EXPORT_SYMBOL(__down_failed_interruptible);
-EXPORT_SYMBOL(__down_failed_trylock);
-EXPORT_SYMBOL(__up_wakeup);
-/* Networking helper routines. */
-EXPORT_SYMBOL(csum_partial_copy_generic);
-
-EXPORT_SYMBOL(__get_user_1);
-EXPORT_SYMBOL(__get_user_2);
-EXPORT_SYMBOL(__get_user_4);
-
-EXPORT_SYMBOL(__put_user_1);
-EXPORT_SYMBOL(__put_user_2);
-EXPORT_SYMBOL(__put_user_4);
-EXPORT_SYMBOL(__put_user_8);
-
-EXPORT_SYMBOL(strpbrk);
-EXPORT_SYMBOL(strstr);
-
-#ifdef CONFIG_SMP
-extern void FASTCALL( __write_lock_failed(rwlock_t *rw));
-extern void FASTCALL( __read_lock_failed(rwlock_t *rw));
-EXPORT_SYMBOL(__write_lock_failed);
-EXPORT_SYMBOL(__read_lock_failed);
-#endif
-
-EXPORT_SYMBOL(csum_partial);
diff --git a/arch/i386/kernel/i387.c b/arch/i386/kernel/i387.c
deleted file mode 100644
index d75524758da..00000000000
--- a/arch/i386/kernel/i387.c
+++ /dev/null
@@ -1,547 +0,0 @@
-/*
- * linux/arch/i386/kernel/i387.c
- *
- * Copyright (C) 1994 Linus Torvalds
- *
- * Pentium III FXSR, SSE support
- * General FPU state handling cleanups
- * Gareth Hughes <gareth@valinux.com>, May 2000
- */
-
-#include <linux/config.h>
-#include <linux/sched.h>
-#include <linux/module.h>
-#include <asm/processor.h>
-#include <asm/i387.h>
-#include <asm/math_emu.h>
-#include <asm/sigcontext.h>
-#include <asm/user.h>
-#include <asm/ptrace.h>
-#include <asm/uaccess.h>
-
-#ifdef CONFIG_MATH_EMULATION
-#define HAVE_HWFP (boot_cpu_data.hard_math)
-#else
-#define HAVE_HWFP 1
-#endif
-
-static unsigned long mxcsr_feature_mask = 0xffffffff;
-
-void mxcsr_feature_mask_init(void)
-{
- unsigned long mask = 0;
- clts();
- if (cpu_has_fxsr) {
- memset(&current->thread.i387.fxsave, 0, sizeof(struct i387_fxsave_struct));
- asm volatile("fxsave %0" : : "m" (current->thread.i387.fxsave));
- mask = current->thread.i387.fxsave.mxcsr_mask;
- if (mask == 0) mask = 0x0000ffbf;
- }
- mxcsr_feature_mask &= mask;
- stts();
-}
-
-/*
- * The _current_ task is using the FPU for the first time
- * so initialize it and set the mxcsr to its default
- * value at reset if we support XMM instructions and then
- * remeber the current task has used the FPU.
- */
-void init_fpu(struct task_struct *tsk)
-{
- if (cpu_has_fxsr) {
- memset(&tsk->thread.i387.fxsave, 0, sizeof(struct i387_fxsave_struct));
- tsk->thread.i387.fxsave.cwd = 0x37f;
- if (cpu_has_xmm)
- tsk->thread.i387.fxsave.mxcsr = 0x1f80;
- } else {
- memset(&tsk->thread.i387.fsave, 0, sizeof(struct i387_fsave_struct));
- tsk->thread.i387.fsave.cwd = 0xffff037fu;
- tsk->thread.i387.fsave.swd = 0xffff0000u;
- tsk->thread.i387.fsave.twd = 0xffffffffu;
- tsk->thread.i387.fsave.fos = 0xffff0000u;
- }
- /* only the device not available exception or ptrace can call init_fpu */
- set_stopped_child_used_math(tsk);
-}
-
-/*
- * FPU lazy state save handling.
- */
-
-void kernel_fpu_begin(void)
-{
- struct thread_info *thread = current_thread_info();
-
- preempt_disable();
- if (thread->status & TS_USEDFPU) {
- __save_init_fpu(thread->task);
- return;
- }
- clts();
-}
-EXPORT_SYMBOL_GPL(kernel_fpu_begin);
-
-/*
- * FPU tag word conversions.
- */
-
-static inline unsigned short twd_i387_to_fxsr( unsigned short twd )
-{
- unsigned int tmp; /* to avoid 16 bit prefixes in the code */
-
- /* Transform each pair of bits into 01 (valid) or 00 (empty) */
- tmp = ~twd;
- tmp = (tmp | (tmp>>1)) & 0x5555; /* 0V0V0V0V0V0V0V0V */
- /* and move the valid bits to the lower byte. */
- tmp = (tmp | (tmp >> 1)) & 0x3333; /* 00VV00VV00VV00VV */
- tmp = (tmp | (tmp >> 2)) & 0x0f0f; /* 0000VVVV0000VVVV */
- tmp = (tmp | (tmp >> 4)) & 0x00ff; /* 00000000VVVVVVVV */
- return tmp;
-}
-
-static inline unsigned long twd_fxsr_to_i387( struct i387_fxsave_struct *fxsave )
-{
- struct _fpxreg *st = NULL;
- unsigned long tos = (fxsave->swd >> 11) & 7;
- unsigned long twd = (unsigned long) fxsave->twd;
- unsigned long tag;
- unsigned long ret = 0xffff0000u;
- int i;
-
-#define FPREG_ADDR(f, n) ((void *)&(f)->st_space + (n) * 16);
-
- for ( i = 0 ; i < 8 ; i++ ) {
- if ( twd & 0x1 ) {
- st = FPREG_ADDR( fxsave, (i - tos) & 7 );
-
- switch ( st->exponent & 0x7fff ) {
- case 0x7fff:
- tag = 2; /* Special */
- break;
- case 0x0000:
- if ( !st->significand[0] &&
- !st->significand[1] &&
- !st->significand[2] &&
- !st->significand[3] ) {
- tag = 1; /* Zero */
- } else {
- tag = 2; /* Special */
- }
- break;
- default:
- if ( st->significand[3] & 0x8000 ) {
- tag = 0; /* Valid */
- } else {
- tag = 2; /* Special */
- }
- break;
- }
- } else {
- tag = 3; /* Empty */
- }
- ret |= (tag << (2 * i));
- twd = twd >> 1;
- }
- return ret;
-}
-
-/*
- * FPU state interaction.
- */
-
-unsigned short get_fpu_cwd( struct task_struct *tsk )
-{
- if ( cpu_has_fxsr ) {
- return tsk->thread.i387.fxsave.cwd;
- } else {
- return (unsigned short)tsk->thread.i387.fsave.cwd;
- }
-}
-
-unsigned short get_fpu_swd( struct task_struct *tsk )
-{
- if ( cpu_has_fxsr ) {
- return tsk->thread.i387.fxsave.swd;
- } else {
- return (unsigned short)tsk->thread.i387.fsave.swd;
- }
-}
-
-#if 0
-unsigned short get_fpu_twd( struct task_struct *tsk )
-{
- if ( cpu_has_fxsr ) {
- return tsk->thread.i387.fxsave.twd;
- } else {
- return (unsigned short)tsk->thread.i387.fsave.twd;
- }
-}
-#endif /* 0 */
-
-unsigned short get_fpu_mxcsr( struct task_struct *tsk )
-{
- if ( cpu_has_xmm ) {
- return tsk->thread.i387.fxsave.mxcsr;
- } else {
- return 0x1f80;
- }
-}
-
-#if 0
-
-void set_fpu_cwd( struct task_struct *tsk, unsigned short cwd )
-{
- if ( cpu_has_fxsr ) {
- tsk->thread.i387.fxsave.cwd = cwd;
- } else {
- tsk->thread.i387.fsave.cwd = ((long)cwd | 0xffff0000u);
- }
-}
-
-void set_fpu_swd( struct task_struct *tsk, unsigned short swd )
-{
- if ( cpu_has_fxsr ) {
- tsk->thread.i387.fxsave.swd = swd;
- } else {
- tsk->thread.i387.fsave.swd = ((long)swd | 0xffff0000u);
- }
-}
-
-void set_fpu_twd( struct task_struct *tsk, unsigned short twd )
-{
- if ( cpu_has_fxsr ) {
- tsk->thread.i387.fxsave.twd = twd_i387_to_fxsr(twd);
- } else {
- tsk->thread.i387.fsave.twd = ((long)twd | 0xffff0000u);
- }
-}
-
-#endif /* 0 */
-
-/*
- * FXSR floating point environment conversions.
- */
-
-static int convert_fxsr_to_user( struct _fpstate __user *buf,
- struct i387_fxsave_struct *fxsave )
-{
- unsigned long env[7];
- struct _fpreg __user *to;
- struct _fpxreg *from;
- int i;
-
- env[0] = (unsigned long)fxsave->cwd | 0xffff0000ul;
- env[1] = (unsigned long)fxsave->swd | 0xffff0000ul;
- env[2] = twd_fxsr_to_i387(fxsave);
- env[3] = fxsave->fip;
- env[4] = fxsave->fcs | ((unsigned long)fxsave->fop << 16);
- env[5] = fxsave->foo;
- env[6] = fxsave->fos;
-
- if ( __copy_to_user( buf, env, 7 * sizeof(unsigned long) ) )
- return 1;
-
- to = &buf->_st[0];
- from = (struct _fpxreg *) &fxsave->st_space[0];
- for ( i = 0 ; i < 8 ; i++, to++, from++ ) {
- unsigned long __user *t = (unsigned long __user *)to;
- unsigned long *f = (unsigned long *)from;
-
- if (__put_user(*f, t) ||
- __put_user(*(f + 1), t + 1) ||
- __put_user(from->exponent, &to->exponent))
- return 1;
- }
- return 0;
-}
-
-static int convert_fxsr_from_user( struct i387_fxsave_struct *fxsave,
- struct _fpstate __user *buf )
-{
- unsigned long env[7];
- struct _fpxreg *to;
- struct _fpreg __user *from;
- int i;
-
- if ( __copy_from_user( env, buf, 7 * sizeof(long) ) )
- return 1;
-
- fxsave->cwd = (unsigned short)(env[0] & 0xffff);
- fxsave->swd = (unsigned short)(env[1] & 0xffff);
- fxsave->twd = twd_i387_to_fxsr((unsigned short)(env[2] & 0xffff));
- fxsave->fip = env[3];
- fxsave->fop = (unsigned short)((env[4] & 0xffff0000ul) >> 16);
- fxsave->fcs = (env[4] & 0xffff);
- fxsave->foo = env[5];
- fxsave->fos = env[6];
-
- to = (struct _fpxreg *) &fxsave->st_space[0];
- from = &buf->_st[0];
- for ( i = 0 ; i < 8 ; i++, to++, from++ ) {
- unsigned long *t = (unsigned long *)to;
- unsigned long __user *f = (unsigned long __user *)from;
-
- if (__get_user(*t, f) ||
- __get_user(*(t + 1), f + 1) ||
- __get_user(to->exponent, &from->exponent))
- return 1;
- }
- return 0;
-}
-
-/*
- * Signal frame handlers.
- */
-
-static inline int save_i387_fsave( struct _fpstate __user *buf )
-{
- struct task_struct *tsk = current;
-
- unlazy_fpu( tsk );
- tsk->thread.i387.fsave.status = tsk->thread.i387.fsave.swd;
- if ( __copy_to_user( buf, &tsk->thread.i387.fsave,
- sizeof(struct i387_fsave_struct) ) )
- return -1;
- return 1;
-}
-
-static int save_i387_fxsave( struct _fpstate __user *buf )
-{
- struct task_struct *tsk = current;
- int err = 0;
-
- unlazy_fpu( tsk );
-
- if ( convert_fxsr_to_user( buf, &tsk->thread.i387.fxsave ) )
- return -1;
-
- err |= __put_user( tsk->thread.i387.fxsave.swd, &buf->status );
- err |= __put_user( X86_FXSR_MAGIC, &buf->magic );
- if ( err )
- return -1;
-
- if ( __copy_to_user( &buf->_fxsr_env[0], &tsk->thread.i387.fxsave,
- sizeof(struct i387_fxsave_struct) ) )
- return -1;
- return 1;
-}
-
-int save_i387( struct _fpstate __user *buf )
-{
- if ( !used_math() )
- return 0;
-
- /* This will cause a "finit" to be triggered by the next
- * attempted FPU operation by the 'current' process.
- */
- clear_used_math();
-
- if ( HAVE_HWFP ) {
- if ( cpu_has_fxsr ) {
- return save_i387_fxsave( buf );
- } else {
- return save_i387_fsave( buf );
- }
- } else {
- return save_i387_soft( &current->thread.i387.soft, buf );
- }
-}
-
-static inline int restore_i387_fsave( struct _fpstate __user *buf )
-{
- struct task_struct *tsk = current;
- clear_fpu( tsk );
- return __copy_from_user( &tsk->thread.i387.fsave, buf,
- sizeof(struct i387_fsave_struct) );
-}
-
-static int restore_i387_fxsave( struct _fpstate __user *buf )
-{
- int err;
- struct task_struct *tsk = current;
- clear_fpu( tsk );
- err = __copy_from_user( &tsk->thread.i387.fxsave, &buf->_fxsr_env[0],
- sizeof(struct i387_fxsave_struct) );
- /* mxcsr reserved bits must be masked to zero for security reasons */
- tsk->thread.i387.fxsave.mxcsr &= mxcsr_feature_mask;
- return err ? 1 : convert_fxsr_from_user( &tsk->thread.i387.fxsave, buf );
-}
-
-int restore_i387( struct _fpstate __user *buf )
-{
- int err;
-
- if ( HAVE_HWFP ) {
- if ( cpu_has_fxsr ) {
- err = restore_i387_fxsave( buf );
- } else {
- err = restore_i387_fsave( buf );
- }
- } else {
- err = restore_i387_soft( &current->thread.i387.soft, buf );
- }
- set_used_math();
- return err;
-}
-
-/*
- * ptrace request handlers.
- */
-
-static inline int get_fpregs_fsave( struct user_i387_struct __user *buf,
- struct task_struct *tsk )
-{
- return __copy_to_user( buf, &tsk->thread.i387.fsave,
- sizeof(struct user_i387_struct) );
-}
-
-static inline int get_fpregs_fxsave( struct user_i387_struct __user *buf,
- struct task_struct *tsk )
-{
- return convert_fxsr_to_user( (struct _fpstate __user *)buf,
- &tsk->thread.i387.fxsave );
-}
-
-int get_fpregs( struct user_i387_struct __user *buf, struct task_struct *tsk )
-{
- if ( HAVE_HWFP ) {
- if ( cpu_has_fxsr ) {
- return get_fpregs_fxsave( buf, tsk );
- } else {
- return get_fpregs_fsave( buf, tsk );
- }
- } else {
- return save_i387_soft( &tsk->thread.i387.soft,
- (struct _fpstate __user *)buf );
- }
-}
-
-static inline int set_fpregs_fsave( struct task_struct *tsk,
- struct user_i387_struct __user *buf )
-{
- return __copy_from_user( &tsk->thread.i387.fsave, buf,
- sizeof(struct user_i387_struct) );
-}
-
-static inline int set_fpregs_fxsave( struct task_struct *tsk,
- struct user_i387_struct __user *buf )
-{
- return convert_fxsr_from_user( &tsk->thread.i387.fxsave,
- (struct _fpstate __user *)buf );
-}
-
-int set_fpregs( struct task_struct *tsk, struct user_i387_struct __user *buf )
-{
- if ( HAVE_HWFP ) {
- if ( cpu_has_fxsr ) {
- return set_fpregs_fxsave( tsk, buf );
- } else {
- return set_fpregs_fsave( tsk, buf );
- }
- } else {
- return restore_i387_soft( &tsk->thread.i387.soft,
- (struct _fpstate __user *)buf );
- }
-}
-
-int get_fpxregs( struct user_fxsr_struct __user *buf, struct task_struct *tsk )
-{
- if ( cpu_has_fxsr ) {
- if (__copy_to_user( buf, &tsk->thread.i387.fxsave,
- sizeof(struct user_fxsr_struct) ))
- return -EFAULT;
- return 0;
- } else {
- return -EIO;
- }
-}
-
-int set_fpxregs( struct task_struct *tsk, struct user_fxsr_struct __user *buf )
-{
- int ret = 0;
-
- if ( cpu_has_fxsr ) {
- if (__copy_from_user( &tsk->thread.i387.fxsave, buf,
- sizeof(struct user_fxsr_struct) ))
- ret = -EFAULT;
- /* mxcsr reserved bits must be masked to zero for security reasons */
- tsk->thread.i387.fxsave.mxcsr &= mxcsr_feature_mask;
- } else {
- ret = -EIO;
- }
- return ret;
-}
-
-/*
- * FPU state for core dumps.
- */
-
-static inline void copy_fpu_fsave( struct task_struct *tsk,
- struct user_i387_struct *fpu )
-{
- memcpy( fpu, &tsk->thread.i387.fsave,
- sizeof(struct user_i387_struct) );
-}
-
-static inline void copy_fpu_fxsave( struct task_struct *tsk,
- struct user_i387_struct *fpu )
-{
- unsigned short *to;
- unsigned short *from;
- int i;
-
- memcpy( fpu, &tsk->thread.i387.fxsave, 7 * sizeof(long) );
-
- to = (unsigned short *)&fpu->st_space[0];
- from = (unsigned short *)&tsk->thread.i387.fxsave.st_space[0];
- for ( i = 0 ; i < 8 ; i++, to += 5, from += 8 ) {
- memcpy( to, from, 5 * sizeof(unsigned short) );
- }
-}
-
-int dump_fpu( struct pt_regs *regs, struct user_i387_struct *fpu )
-{
- int fpvalid;
- struct task_struct *tsk = current;
-
- fpvalid = !!used_math();
- if ( fpvalid ) {
- unlazy_fpu( tsk );
- if ( cpu_has_fxsr ) {
- copy_fpu_fxsave( tsk, fpu );
- } else {
- copy_fpu_fsave( tsk, fpu );
- }
- }
-
- return fpvalid;
-}
-EXPORT_SYMBOL(dump_fpu);
-
-int dump_task_fpu(struct task_struct *tsk, struct user_i387_struct *fpu)
-{
- int fpvalid = !!tsk_used_math(tsk);
-
- if (fpvalid) {
- if (tsk == current)
- unlazy_fpu(tsk);
- if (cpu_has_fxsr)
- copy_fpu_fxsave(tsk, fpu);
- else
- copy_fpu_fsave(tsk, fpu);
- }
- return fpvalid;
-}
-
-int dump_task_extended_fpu(struct task_struct *tsk, struct user_fxsr_struct *fpu)
-{
- int fpvalid = tsk_used_math(tsk) && cpu_has_fxsr;
-
- if (fpvalid) {
- if (tsk == current)
- unlazy_fpu(tsk);
- memcpy(fpu, &tsk->thread.i387.fxsave, sizeof(*fpu));
- }
- return fpvalid;
-}
diff --git a/arch/i386/kernel/i8237.c b/arch/i386/kernel/i8237.c
deleted file mode 100644
index c36d1c006c2..00000000000
--- a/arch/i386/kernel/i8237.c
+++ /dev/null
@@ -1,67 +0,0 @@
-/*
- * i8237.c: 8237A DMA controller suspend functions.
- *
- * Written by Pierre Ossman, 2005.
- */
-
-#include <linux/init.h>
-#include <linux/sysdev.h>
-
-#include <asm/dma.h>
-
-/*
- * This module just handles suspend/resume issues with the
- * 8237A DMA controller (used for ISA and LPC).
- * Allocation is handled in kernel/dma.c and normal usage is
- * in asm/dma.h.
- */
-
-static int i8237A_resume(struct sys_device *dev)
-{
- unsigned long flags;
- int i;
-
- flags = claim_dma_lock();
-
- dma_outb(DMA1_RESET_REG, 0);
- dma_outb(DMA2_RESET_REG, 0);
-
- for (i = 0;i < 8;i++) {
- set_dma_addr(i, 0x000000);
- /* DMA count is a bit weird so this is not 0 */
- set_dma_count(i, 1);
- }
-
- /* Enable cascade DMA or channel 0-3 won't work */
- enable_dma(4);
-
- release_dma_lock(flags);
-
- return 0;
-}
-
-static int i8237A_suspend(struct sys_device *dev, pm_message_t state)
-{
- return 0;
-}
-
-static struct sysdev_class i8237_sysdev_class = {
- set_kset_name("i8237"),
- .suspend = i8237A_suspend,
- .resume = i8237A_resume,
-};
-
-static struct sys_device device_i8237A = {
- .id = 0,
- .cls = &i8237_sysdev_class,
-};
-
-static int __init i8237A_init_sysfs(void)
-{
- int error = sysdev_class_register(&i8237_sysdev_class);
- if (!error)
- error = sysdev_register(&device_i8237A);
- return error;
-}
-
-device_initcall(i8237A_init_sysfs);
diff --git a/arch/i386/kernel/i8259.c b/arch/i386/kernel/i8259.c
deleted file mode 100644
index 323ef8ab324..00000000000
--- a/arch/i386/kernel/i8259.c
+++ /dev/null
@@ -1,438 +0,0 @@
-#include <linux/config.h>
-#include <linux/errno.h>
-#include <linux/signal.h>
-#include <linux/sched.h>
-#include <linux/ioport.h>
-#include <linux/interrupt.h>
-#include <linux/slab.h>
-#include <linux/random.h>
-#include <linux/smp_lock.h>
-#include <linux/init.h>
-#include <linux/kernel_stat.h>
-#include <linux/sysdev.h>
-#include <linux/bitops.h>
-
-#include <asm/8253pit.h>
-#include <asm/atomic.h>
-#include <asm/system.h>
-#include <asm/io.h>
-#include <asm/timer.h>
-#include <asm/pgtable.h>
-#include <asm/delay.h>
-#include <asm/desc.h>
-#include <asm/apic.h>
-#include <asm/arch_hooks.h>
-#include <asm/i8259.h>
-
-#include <io_ports.h>
-
-/*
- * This is the 'legacy' 8259A Programmable Interrupt Controller,
- * present in the majority of PC/AT boxes.
- * plus some generic x86 specific things if generic specifics makes
- * any sense at all.
- * this file should become arch/i386/kernel/irq.c when the old irq.c
- * moves to arch independent land
- */
-
-DEFINE_SPINLOCK(i8259A_lock);
-
-static void end_8259A_irq (unsigned int irq)
-{
- if (!(irq_desc[irq].status & (IRQ_DISABLED|IRQ_INPROGRESS)) &&
- irq_desc[irq].action)
- enable_8259A_irq(irq);
-}
-
-#define shutdown_8259A_irq disable_8259A_irq
-
-static void mask_and_ack_8259A(unsigned int);
-
-unsigned int startup_8259A_irq(unsigned int irq)
-{
- enable_8259A_irq(irq);
- return 0; /* never anything pending */
-}
-
-static struct hw_interrupt_type i8259A_irq_type = {
- .typename = "XT-PIC",
- .startup = startup_8259A_irq,
- .shutdown = shutdown_8259A_irq,
- .enable = enable_8259A_irq,
- .disable = disable_8259A_irq,
- .ack = mask_and_ack_8259A,
- .end = end_8259A_irq,
-};
-
-/*
- * 8259A PIC functions to handle ISA devices:
- */
-
-/*
- * This contains the irq mask for both 8259A irq controllers,
- */
-unsigned int cached_irq_mask = 0xffff;
-
-/*
- * Not all IRQs can be routed through the IO-APIC, eg. on certain (older)
- * boards the timer interrupt is not really connected to any IO-APIC pin,
- * it's fed to the master 8259A's IR0 line only.
- *
- * Any '1' bit in this mask means the IRQ is routed through the IO-APIC.
- * this 'mixed mode' IRQ handling costs nothing because it's only used
- * at IRQ setup time.
- */
-unsigned long io_apic_irqs;
-
-void disable_8259A_irq(unsigned int irq)
-{
- unsigned int mask = 1 << irq;
- unsigned long flags;
-
- spin_lock_irqsave(&i8259A_lock, flags);
- cached_irq_mask |= mask;
- if (irq & 8)
- outb(cached_slave_mask, PIC_SLAVE_IMR);
- else
- outb(cached_master_mask, PIC_MASTER_IMR);
- spin_unlock_irqrestore(&i8259A_lock, flags);
-}
-
-void enable_8259A_irq(unsigned int irq)
-{
- unsigned int mask = ~(1 << irq);
- unsigned long flags;
-
- spin_lock_irqsave(&i8259A_lock, flags);
- cached_irq_mask &= mask;
- if (irq & 8)
- outb(cached_slave_mask, PIC_SLAVE_IMR);
- else
- outb(cached_master_mask, PIC_MASTER_IMR);
- spin_unlock_irqrestore(&i8259A_lock, flags);
-}
-
-int i8259A_irq_pending(unsigned int irq)
-{
- unsigned int mask = 1<<irq;
- unsigned long flags;
- int ret;
-
- spin_lock_irqsave(&i8259A_lock, flags);
- if (irq < 8)
- ret = inb(PIC_MASTER_CMD) & mask;
- else
- ret = inb(PIC_SLAVE_CMD) & (mask >> 8);
- spin_unlock_irqrestore(&i8259A_lock, flags);
-
- return ret;
-}
-
-void make_8259A_irq(unsigned int irq)
-{
- disable_irq_nosync(irq);
- io_apic_irqs &= ~(1<<irq);
- irq_desc[irq].handler = &i8259A_irq_type;
- enable_irq(irq);
-}
-
-/*
- * This function assumes to be called rarely. Switching between
- * 8259A registers is slow.
- * This has to be protected by the irq controller spinlock
- * before being called.
- */
-static inline int i8259A_irq_real(unsigned int irq)
-{
- int value;
- int irqmask = 1<<irq;
-
- if (irq < 8) {
- outb(0x0B,PIC_MASTER_CMD); /* ISR register */
- value = inb(PIC_MASTER_CMD) & irqmask;
- outb(0x0A,PIC_MASTER_CMD); /* back to the IRR register */
- return value;
- }
- outb(0x0B,PIC_SLAVE_CMD); /* ISR register */
- value = inb(PIC_SLAVE_CMD) & (irqmask >> 8);
- outb(0x0A,PIC_SLAVE_CMD); /* back to the IRR register */
- return value;
-}
-
-/*
- * Careful! The 8259A is a fragile beast, it pretty
- * much _has_ to be done exactly like this (mask it
- * first, _then_ send the EOI, and the order of EOI
- * to the two 8259s is important!
- */
-static void mask_and_ack_8259A(unsigned int irq)
-{
- unsigned int irqmask = 1 << irq;
- unsigned long flags;
-
- spin_lock_irqsave(&i8259A_lock, flags);
- /*
- * Lightweight spurious IRQ detection. We do not want
- * to overdo spurious IRQ handling - it's usually a sign
- * of hardware problems, so we only do the checks we can
- * do without slowing down good hardware unnecesserily.
- *
- * Note that IRQ7 and IRQ15 (the two spurious IRQs
- * usually resulting from the 8259A-1|2 PICs) occur
- * even if the IRQ is masked in the 8259A. Thus we
- * can check spurious 8259A IRQs without doing the
- * quite slow i8259A_irq_real() call for every IRQ.
- * This does not cover 100% of spurious interrupts,
- * but should be enough to warn the user that there
- * is something bad going on ...
- */
- if (cached_irq_mask & irqmask)
- goto spurious_8259A_irq;
- cached_irq_mask |= irqmask;
-
-handle_real_irq:
- if (irq & 8) {
- inb(PIC_SLAVE_IMR); /* DUMMY - (do we need this?) */
- outb(cached_slave_mask, PIC_SLAVE_IMR);
- outb(0x60+(irq&7),PIC_SLAVE_CMD);/* 'Specific EOI' to slave */
- outb(0x60+PIC_CASCADE_IR,PIC_MASTER_CMD); /* 'Specific EOI' to master-IRQ2 */
- } else {
- inb(PIC_MASTER_IMR); /* DUMMY - (do we need this?) */
- outb(cached_master_mask, PIC_MASTER_IMR);
- outb(0x60+irq,PIC_MASTER_CMD); /* 'Specific EOI to master */
- }
- spin_unlock_irqrestore(&i8259A_lock, flags);
- return;
-
-spurious_8259A_irq:
- /*
- * this is the slow path - should happen rarely.
- */
- if (i8259A_irq_real(irq))
- /*
- * oops, the IRQ _is_ in service according to the
- * 8259A - not spurious, go handle it.
- */
- goto handle_real_irq;
-
- {
- static int spurious_irq_mask;
- /*
- * At this point we can be sure the IRQ is spurious,
- * lets ACK and report it. [once per IRQ]
- */
- if (!(spurious_irq_mask & irqmask)) {
- printk(KERN_DEBUG "spurious 8259A interrupt: IRQ%d.\n", irq);
- spurious_irq_mask |= irqmask;
- }
- atomic_inc(&irq_err_count);
- /*
- * Theoretically we do not have to handle this IRQ,
- * but in Linux this does not cause problems and is
- * simpler for us.
- */
- goto handle_real_irq;
- }
-}
-
-static char irq_trigger[2];
-/**
- * ELCR registers (0x4d0, 0x4d1) control edge/level of IRQ
- */
-static void restore_ELCR(char *trigger)
-{
- outb(trigger[0], 0x4d0);
- outb(trigger[1], 0x4d1);
-}
-
-static void save_ELCR(char *trigger)
-{
- /* IRQ 0,1,2,8,13 are marked as reserved */
- trigger[0] = inb(0x4d0) & 0xF8;
- trigger[1] = inb(0x4d1) & 0xDE;
-}
-
-static int i8259A_resume(struct sys_device *dev)
-{
- init_8259A(0);
- restore_ELCR(irq_trigger);
- return 0;
-}
-
-static int i8259A_suspend(struct sys_device *dev, pm_message_t state)
-{
- save_ELCR(irq_trigger);
- return 0;
-}
-
-static int i8259A_shutdown(struct sys_device *dev)
-{
- /* Put the i8259A into a quiescent state that
- * the kernel initialization code can get it
- * out of.
- */
- outb(0xff, 0x21); /* mask all of 8259A-1 */
- outb(0xff, 0xA1); /* mask all of 8259A-1 */
- return 0;
-}
-
-static struct sysdev_class i8259_sysdev_class = {
- set_kset_name("i8259"),
- .suspend = i8259A_suspend,
- .resume = i8259A_resume,
- .shutdown = i8259A_shutdown,
-};
-
-static struct sys_device device_i8259A = {
- .id = 0,
- .cls = &i8259_sysdev_class,
-};
-
-static int __init i8259A_init_sysfs(void)
-{
- int error = sysdev_class_register(&i8259_sysdev_class);
- if (!error)
- error = sysdev_register(&device_i8259A);
- return error;
-}
-
-device_initcall(i8259A_init_sysfs);
-
-void init_8259A(int auto_eoi)
-{
- unsigned long flags;
-
- spin_lock_irqsave(&i8259A_lock, flags);
-
- outb(0xff, PIC_MASTER_IMR); /* mask all of 8259A-1 */
- outb(0xff, PIC_SLAVE_IMR); /* mask all of 8259A-2 */
-
- /*
- * outb_p - this has to work on a wide range of PC hardware.
- */
- outb_p(0x11, PIC_MASTER_CMD); /* ICW1: select 8259A-1 init */
- outb_p(0x20 + 0, PIC_MASTER_IMR); /* ICW2: 8259A-1 IR0-7 mapped to 0x20-0x27 */
- outb_p(1U << PIC_CASCADE_IR, PIC_MASTER_IMR); /* 8259A-1 (the master) has a slave on IR2 */
- if (auto_eoi) /* master does Auto EOI */
- outb_p(MASTER_ICW4_DEFAULT | PIC_ICW4_AEOI, PIC_MASTER_IMR);
- else /* master expects normal EOI */
- outb_p(MASTER_ICW4_DEFAULT, PIC_MASTER_IMR);
-
- outb_p(0x11, PIC_SLAVE_CMD); /* ICW1: select 8259A-2 init */
- outb_p(0x20 + 8, PIC_SLAVE_IMR); /* ICW2: 8259A-2 IR0-7 mapped to 0x28-0x2f */
- outb_p(PIC_CASCADE_IR, PIC_SLAVE_IMR); /* 8259A-2 is a slave on master's IR2 */
- outb_p(SLAVE_ICW4_DEFAULT, PIC_SLAVE_IMR); /* (slave's support for AEOI in flat mode is to be investigated) */
- if (auto_eoi)
- /*
- * in AEOI mode we just have to mask the interrupt
- * when acking.
- */
- i8259A_irq_type.ack = disable_8259A_irq;
- else
- i8259A_irq_type.ack = mask_and_ack_8259A;
-
- udelay(100); /* wait for 8259A to initialize */
-
- outb(cached_master_mask, PIC_MASTER_IMR); /* restore master IRQ mask */
- outb(cached_slave_mask, PIC_SLAVE_IMR); /* restore slave IRQ mask */
-
- spin_unlock_irqrestore(&i8259A_lock, flags);
-}
-
-/*
- * Note that on a 486, we don't want to do a SIGFPE on an irq13
- * as the irq is unreliable, and exception 16 works correctly
- * (ie as explained in the intel literature). On a 386, you
- * can't use exception 16 due to bad IBM design, so we have to
- * rely on the less exact irq13.
- *
- * Careful.. Not only is IRQ13 unreliable, but it is also
- * leads to races. IBM designers who came up with it should
- * be shot.
- */
-
-
-static irqreturn_t math_error_irq(int cpl, void *dev_id, struct pt_regs *regs)
-{
- extern void math_error(void __user *);
- outb(0,0xF0);
- if (ignore_fpu_irq || !boot_cpu_data.hard_math)
- return IRQ_NONE;
- math_error((void __user *)regs->eip);
- return IRQ_HANDLED;
-}
-
-/*
- * New motherboards sometimes make IRQ 13 be a PCI interrupt,
- * so allow interrupt sharing.
- */
-static struct irqaction fpu_irq = { math_error_irq, 0, CPU_MASK_NONE, "fpu", NULL, NULL };
-
-void __init init_ISA_irqs (void)
-{
- int i;
-
-#ifdef CONFIG_X86_LOCAL_APIC
- init_bsp_APIC();
-#endif
- init_8259A(0);
-
- for (i = 0; i < NR_IRQS; i++) {
- irq_desc[i].status = IRQ_DISABLED;
- irq_desc[i].action = NULL;
- irq_desc[i].depth = 1;
-
- if (i < 16) {
- /*
- * 16 old-style INTA-cycle interrupts:
- */
- irq_desc[i].handler = &i8259A_irq_type;
- } else {
- /*
- * 'high' PCI IRQs filled in on demand
- */
- irq_desc[i].handler = &no_irq_type;
- }
- }
-}
-
-void __init init_IRQ(void)
-{
- int i;
-
- /* all the set up before the call gates are initialised */
- pre_intr_init_hook();
-
- /*
- * Cover the whole vector space, no vector can escape
- * us. (some of these will be overridden and become
- * 'special' SMP interrupts)
- */
- for (i = 0; i < (NR_VECTORS - FIRST_EXTERNAL_VECTOR); i++) {
- int vector = FIRST_EXTERNAL_VECTOR + i;
- if (i >= NR_IRQS)
- break;
- if (vector != SYSCALL_VECTOR)
- set_intr_gate(vector, interrupt[i]);
- }
-
- /* setup after call gates are initialised (usually add in
- * the architecture specific gates)
- */
- intr_init_hook();
-
- /*
- * Set the clock to HZ Hz, we already have a valid
- * vector now:
- */
- setup_pit_timer();
-
- /*
- * External FPU? Set up irq13 if so, for
- * original braindamaged IBM FERR coupling.
- */
- if (boot_cpu_data.hard_math && !cpu_has_fpu)
- setup_irq(FPU_IRQ, &fpu_irq);
-
- irq_ctx_init(smp_processor_id());
-}
diff --git a/arch/i386/kernel/init_task.c b/arch/i386/kernel/init_task.c
deleted file mode 100644
index 9caa8e8db80..00000000000
--- a/arch/i386/kernel/init_task.c
+++ /dev/null
@@ -1,46 +0,0 @@
-#include <linux/mm.h>
-#include <linux/module.h>
-#include <linux/sched.h>
-#include <linux/init.h>
-#include <linux/init_task.h>
-#include <linux/fs.h>
-#include <linux/mqueue.h>
-
-#include <asm/uaccess.h>
-#include <asm/pgtable.h>
-#include <asm/desc.h>
-
-static struct fs_struct init_fs = INIT_FS;
-static struct files_struct init_files = INIT_FILES;
-static struct signal_struct init_signals = INIT_SIGNALS(init_signals);
-static struct sighand_struct init_sighand = INIT_SIGHAND(init_sighand);
-struct mm_struct init_mm = INIT_MM(init_mm);
-
-EXPORT_SYMBOL(init_mm);
-
-/*
- * Initial thread structure.
- *
- * We need to make sure that this is THREAD_SIZE aligned due to the
- * way process stacks are handled. This is done by having a special
- * "init_task" linker map entry..
- */
-union thread_union init_thread_union
- __attribute__((__section__(".data.init_task"))) =
- { INIT_THREAD_INFO(init_task) };
-
-/*
- * Initial task structure.
- *
- * All other task structs will be allocated on slabs in fork.c
- */
-struct task_struct init_task = INIT_TASK(init_task);
-
-EXPORT_SYMBOL(init_task);
-
-/*
- * per-CPU TSS segments. Threads are completely 'soft' on Linux,
- * no more per-task TSS's.
- */
-DEFINE_PER_CPU(struct tss_struct, init_tss) ____cacheline_maxaligned_in_smp = INIT_TSS;
-
diff --git a/arch/i386/kernel/io_apic.c b/arch/i386/kernel/io_apic.c
deleted file mode 100644
index cc5d7ac5b2e..00000000000
--- a/arch/i386/kernel/io_apic.c
+++ /dev/null
@@ -1,2659 +0,0 @@
-/*
- * Intel IO-APIC support for multi-Pentium hosts.
- *
- * Copyright (C) 1997, 1998, 1999, 2000 Ingo Molnar, Hajnalka Szabo
- *
- * Many thanks to Stig Venaas for trying out countless experimental
- * patches and reporting/debugging problems patiently!
- *
- * (c) 1999, Multiple IO-APIC support, developed by
- * Ken-ichi Yaku <yaku@css1.kbnes.nec.co.jp> and
- * Hidemi Kishimoto <kisimoto@css1.kbnes.nec.co.jp>,
- * further tested and cleaned up by Zach Brown <zab@redhat.com>
- * and Ingo Molnar <mingo@redhat.com>
- *
- * Fixes
- * Maciej W. Rozycki : Bits for genuine 82489DX APICs;
- * thanks to Eric Gilmore
- * and Rolf G. Tews
- * for testing these extensively
- * Paul Diefenbaugh : Added full ACPI support
- */
-
-#include <linux/mm.h>
-#include <linux/interrupt.h>
-#include <linux/init.h>
-#include <linux/delay.h>
-#include <linux/sched.h>
-#include <linux/config.h>
-#include <linux/smp_lock.h>
-#include <linux/mc146818rtc.h>
-#include <linux/compiler.h>
-#include <linux/acpi.h>
-#include <linux/module.h>
-#include <linux/sysdev.h>
-
-#include <asm/io.h>
-#include <asm/smp.h>
-#include <asm/desc.h>
-#include <asm/timer.h>
-#include <asm/i8259.h>
-
-#include <mach_apic.h>
-
-#include "io_ports.h"
-
-int (*ioapic_renumber_irq)(int ioapic, int irq);
-atomic_t irq_mis_count;
-
-/* Where if anywhere is the i8259 connect in external int mode */
-static struct { int pin, apic; } ioapic_i8259 = { -1, -1 };
-
-static DEFINE_SPINLOCK(ioapic_lock);
-
-/*
- * Is the SiS APIC rmw bug present ?
- * -1 = don't know, 0 = no, 1 = yes
- */
-int sis_apic_bug = -1;
-
-/*
- * # of IRQ routing registers
- */
-int nr_ioapic_registers[MAX_IO_APICS];
-
-int disable_timer_pin_1 __initdata;
-
-/*
- * Rough estimation of how many shared IRQs there are, can
- * be changed anytime.
- */
-#define MAX_PLUS_SHARED_IRQS NR_IRQS
-#define PIN_MAP_SIZE (MAX_PLUS_SHARED_IRQS + NR_IRQS)
-
-/*
- * This is performance-critical, we want to do it O(1)
- *
- * the indexing order of this array favors 1:1 mappings
- * between pins and IRQs.
- */
-
-static struct irq_pin_list {
- int apic, pin, next;
-} irq_2_pin[PIN_MAP_SIZE];
-
-int vector_irq[NR_VECTORS] __read_mostly = { [0 ... NR_VECTORS - 1] = -1};
-#ifdef CONFIG_PCI_MSI
-#define vector_to_irq(vector) \
- (platform_legacy_irq(vector) ? vector : vector_irq[vector])
-#else
-#define vector_to_irq(vector) (vector)
-#endif
-
-/*
- * The common case is 1:1 IRQ<->pin mappings. Sometimes there are
- * shared ISA-space IRQs, so we have to support them. We are super
- * fast in the common case, and fast for shared ISA-space IRQs.
- */
-static void add_pin_to_irq(unsigned int irq, int apic, int pin)
-{
- static int first_free_entry = NR_IRQS;
- struct irq_pin_list *entry = irq_2_pin + irq;
-
- while (entry->next)
- entry = irq_2_pin + entry->next;
-
- if (entry->pin != -1) {
- entry->next = first_free_entry;
- entry = irq_2_pin + entry->next;
- if (++first_free_entry >= PIN_MAP_SIZE)
- panic("io_apic.c: whoops");
- }
- entry->apic = apic;
- entry->pin = pin;
-}
-
-/*
- * Reroute an IRQ to a different pin.
- */
-static void __init replace_pin_at_irq(unsigned int irq,
- int oldapic, int oldpin,
- int newapic, int newpin)
-{
- struct irq_pin_list *entry = irq_2_pin + irq;
-
- while (1) {
- if (entry->apic == oldapic && entry->pin == oldpin) {
- entry->apic = newapic;
- entry->pin = newpin;
- }
- if (!entry->next)
- break;
- entry = irq_2_pin + entry->next;
- }
-}
-
-static void __modify_IO_APIC_irq (unsigned int irq, unsigned long enable, unsigned long disable)
-{
- struct irq_pin_list *entry = irq_2_pin + irq;
- unsigned int pin, reg;
-
- for (;;) {
- pin = entry->pin;
- if (pin == -1)
- break;
- reg = io_apic_read(entry->apic, 0x10 + pin*2);
- reg &= ~disable;
- reg |= enable;
- io_apic_modify(entry->apic, 0x10 + pin*2, reg);
- if (!entry->next)
- break;
- entry = irq_2_pin + entry->next;
- }
-}
-
-/* mask = 1 */
-static void __mask_IO_APIC_irq (unsigned int irq)
-{
- __modify_IO_APIC_irq(irq, 0x00010000, 0);
-}
-
-/* mask = 0 */
-static void __unmask_IO_APIC_irq (unsigned int irq)
-{
- __modify_IO_APIC_irq(irq, 0, 0x00010000);
-}
-
-/* mask = 1, trigger = 0 */
-static void __mask_and_edge_IO_APIC_irq (unsigned int irq)
-{
- __modify_IO_APIC_irq(irq, 0x00010000, 0x00008000);
-}
-
-/* mask = 0, trigger = 1 */
-static void __unmask_and_level_IO_APIC_irq (unsigned int irq)
-{
- __modify_IO_APIC_irq(irq, 0x00008000, 0x00010000);
-}
-
-static void mask_IO_APIC_irq (unsigned int irq)
-{
- unsigned long flags;
-
- spin_lock_irqsave(&ioapic_lock, flags);
- __mask_IO_APIC_irq(irq);
- spin_unlock_irqrestore(&ioapic_lock, flags);
-}
-
-static void unmask_IO_APIC_irq (unsigned int irq)
-{
- unsigned long flags;
-
- spin_lock_irqsave(&ioapic_lock, flags);
- __unmask_IO_APIC_irq(irq);
- spin_unlock_irqrestore(&ioapic_lock, flags);
-}
-
-static void clear_IO_APIC_pin(unsigned int apic, unsigned int pin)
-{
- struct IO_APIC_route_entry entry;
- unsigned long flags;
-
- /* Check delivery_mode to be sure we're not clearing an SMI pin */
- spin_lock_irqsave(&ioapic_lock, flags);
- *(((int*)&entry) + 0) = io_apic_read(apic, 0x10 + 2 * pin);
- *(((int*)&entry) + 1) = io_apic_read(apic, 0x11 + 2 * pin);
- spin_unlock_irqrestore(&ioapic_lock, flags);
- if (entry.delivery_mode == dest_SMI)
- return;
-
- /*
- * Disable it in the IO-APIC irq-routing table:
- */
- memset(&entry, 0, sizeof(entry));
- entry.mask = 1;
- spin_lock_irqsave(&ioapic_lock, flags);
- io_apic_write(apic, 0x10 + 2 * pin, *(((int *)&entry) + 0));
- io_apic_write(apic, 0x11 + 2 * pin, *(((int *)&entry) + 1));
- spin_unlock_irqrestore(&ioapic_lock, flags);
-}
-
-static void clear_IO_APIC (void)
-{
- int apic, pin;
-
- for (apic = 0; apic < nr_ioapics; apic++)
- for (pin = 0; pin < nr_ioapic_registers[apic]; pin++)
- clear_IO_APIC_pin(apic, pin);
-}
-
-#ifdef CONFIG_SMP
-static void set_ioapic_affinity_irq(unsigned int irq, cpumask_t cpumask)
-{
- unsigned long flags;
- int pin;
- struct irq_pin_list *entry = irq_2_pin + irq;
- unsigned int apicid_value;
- cpumask_t tmp;
-
- cpus_and(tmp, cpumask, cpu_online_map);
- if (cpus_empty(tmp))
- tmp = TARGET_CPUS;
-
- cpus_and(cpumask, tmp, CPU_MASK_ALL);
-
- apicid_value = cpu_mask_to_apicid(cpumask);
- /* Prepare to do the io_apic_write */
- apicid_value = apicid_value << 24;
- spin_lock_irqsave(&ioapic_lock, flags);
- for (;;) {
- pin = entry->pin;
- if (pin == -1)
- break;
- io_apic_write(entry->apic, 0x10 + 1 + pin*2, apicid_value);
- if (!entry->next)
- break;
- entry = irq_2_pin + entry->next;
- }
- set_irq_info(irq, cpumask);
- spin_unlock_irqrestore(&ioapic_lock, flags);
-}
-
-#if defined(CONFIG_IRQBALANCE)
-# include <asm/processor.h> /* kernel_thread() */
-# include <linux/kernel_stat.h> /* kstat */
-# include <linux/slab.h> /* kmalloc() */
-# include <linux/timer.h> /* time_after() */
-
-# ifdef CONFIG_BALANCED_IRQ_DEBUG
-# define TDprintk(x...) do { printk("<%ld:%s:%d>: ", jiffies, __FILE__, __LINE__); printk(x); } while (0)
-# define Dprintk(x...) do { TDprintk(x); } while (0)
-# else
-# define TDprintk(x...)
-# define Dprintk(x...)
-# endif
-
-
-#define IRQBALANCE_CHECK_ARCH -999
-static int irqbalance_disabled = IRQBALANCE_CHECK_ARCH;
-static int physical_balance = 0;
-
-static struct irq_cpu_info {
- unsigned long * last_irq;
- unsigned long * irq_delta;
- unsigned long irq;
-} irq_cpu_data[NR_CPUS];
-
-#define CPU_IRQ(cpu) (irq_cpu_data[cpu].irq)
-#define LAST_CPU_IRQ(cpu,irq) (irq_cpu_data[cpu].last_irq[irq])
-#define IRQ_DELTA(cpu,irq) (irq_cpu_data[cpu].irq_delta[irq])
-
-#define IDLE_ENOUGH(cpu,now) \
- (idle_cpu(cpu) && ((now) - per_cpu(irq_stat, (cpu)).idle_timestamp > 1))
-
-#define IRQ_ALLOWED(cpu, allowed_mask) cpu_isset(cpu, allowed_mask)
-
-#define CPU_TO_PACKAGEINDEX(i) (first_cpu(cpu_sibling_map[i]))
-
-#define MAX_BALANCED_IRQ_INTERVAL (5*HZ)
-#define MIN_BALANCED_IRQ_INTERVAL (HZ/2)
-#define BALANCED_IRQ_MORE_DELTA (HZ/10)
-#define BALANCED_IRQ_LESS_DELTA (HZ)
-
-static long balanced_irq_interval = MAX_BALANCED_IRQ_INTERVAL;
-
-static unsigned long move(int curr_cpu, cpumask_t allowed_mask,
- unsigned long now, int direction)
-{
- int search_idle = 1;
- int cpu = curr_cpu;
-
- goto inside;
-
- do {
- if (unlikely(cpu == curr_cpu))
- search_idle = 0;
-inside:
- if (direction == 1) {
- cpu++;
- if (cpu >= NR_CPUS)
- cpu = 0;
- } else {
- cpu--;
- if (cpu == -1)
- cpu = NR_CPUS-1;
- }
- } while (!cpu_online(cpu) || !IRQ_ALLOWED(cpu,allowed_mask) ||
- (search_idle && !IDLE_ENOUGH(cpu,now)));
-
- return cpu;
-}
-
-static inline void balance_irq(int cpu, int irq)
-{
- unsigned long now = jiffies;
- cpumask_t allowed_mask;
- unsigned int new_cpu;
-
- if (irqbalance_disabled)
- return;
-
- cpus_and(allowed_mask, cpu_online_map, irq_affinity[irq]);
- new_cpu = move(cpu, allowed_mask, now, 1);
- if (cpu != new_cpu) {
- set_pending_irq(irq, cpumask_of_cpu(new_cpu));
- }
-}
-
-static inline void rotate_irqs_among_cpus(unsigned long useful_load_threshold)
-{
- int i, j;
- Dprintk("Rotating IRQs among CPUs.\n");
- for (i = 0; i < NR_CPUS; i++) {
- for (j = 0; cpu_online(i) && (j < NR_IRQS); j++) {
- if (!irq_desc[j].action)
- continue;
- /* Is it a significant load ? */
- if (IRQ_DELTA(CPU_TO_PACKAGEINDEX(i),j) <
- useful_load_threshold)
- continue;
- balance_irq(i, j);
- }
- }
- balanced_irq_interval = max((long)MIN_BALANCED_IRQ_INTERVAL,
- balanced_irq_interval - BALANCED_IRQ_LESS_DELTA);
- return;
-}
-
-static void do_irq_balance(void)
-{
- int i, j;
- unsigned long max_cpu_irq = 0, min_cpu_irq = (~0);
- unsigned long move_this_load = 0;
- int max_loaded = 0, min_loaded = 0;
- int load;
- unsigned long useful_load_threshold = balanced_irq_interval + 10;
- int selected_irq;
- int tmp_loaded, first_attempt = 1;
- unsigned long tmp_cpu_irq;
- unsigned long imbalance = 0;
- cpumask_t allowed_mask, target_cpu_mask, tmp;
-
- for (i = 0; i < NR_CPUS; i++) {
- int package_index;
- CPU_IRQ(i) = 0;
- if (!cpu_online(i))
- continue;
- package_index = CPU_TO_PACKAGEINDEX(i);
- for (j = 0; j < NR_IRQS; j++) {
- unsigned long value_now, delta;
- /* Is this an active IRQ? */
- if (!irq_desc[j].action)
- continue;
- if ( package_index == i )
- IRQ_DELTA(package_index,j) = 0;
- /* Determine the total count per processor per IRQ */
- value_now = (unsigned long) kstat_cpu(i).irqs[j];
-
- /* Determine the activity per processor per IRQ */
- delta = value_now - LAST_CPU_IRQ(i,j);
-
- /* Update last_cpu_irq[][] for the next time */
- LAST_CPU_IRQ(i,j) = value_now;
-
- /* Ignore IRQs whose rate is less than the clock */
- if (delta < useful_load_threshold)
- continue;
- /* update the load for the processor or package total */
- IRQ_DELTA(package_index,j) += delta;
-
- /* Keep track of the higher numbered sibling as well */
- if (i != package_index)
- CPU_IRQ(i) += delta;
- /*
- * We have sibling A and sibling B in the package
- *
- * cpu_irq[A] = load for cpu A + load for cpu B
- * cpu_irq[B] = load for cpu B
- */
- CPU_IRQ(package_index) += delta;
- }
- }
- /* Find the least loaded processor package */
- for (i = 0; i < NR_CPUS; i++) {
- if (!cpu_online(i))
- continue;
- if (i != CPU_TO_PACKAGEINDEX(i))
- continue;
- if (min_cpu_irq > CPU_IRQ(i)) {
- min_cpu_irq = CPU_IRQ(i);
- min_loaded = i;
- }
- }
- max_cpu_irq = ULONG_MAX;
-
-tryanothercpu:
- /* Look for heaviest loaded processor.
- * We may come back to get the next heaviest loaded processor.
- * Skip processors with trivial loads.
- */
- tmp_cpu_irq = 0;
- tmp_loaded = -1;
- for (i = 0; i < NR_CPUS; i++) {
- if (!cpu_online(i))
- continue;
- if (i != CPU_TO_PACKAGEINDEX(i))
- continue;
- if (max_cpu_irq <= CPU_IRQ(i))
- continue;
- if (tmp_cpu_irq < CPU_IRQ(i)) {
- tmp_cpu_irq = CPU_IRQ(i);
- tmp_loaded = i;
- }
- }
-
- if (tmp_loaded == -1) {
- /* In the case of small number of heavy interrupt sources,
- * loading some of the cpus too much. We use Ingo's original
- * approach to rotate them around.
- */
- if (!first_attempt && imbalance >= useful_load_threshold) {
- rotate_irqs_among_cpus(useful_load_threshold);
- return;
- }
- goto not_worth_the_effort;
- }
-
- first_attempt = 0; /* heaviest search */
- max_cpu_irq = tmp_cpu_irq; /* load */
- max_loaded = tmp_loaded; /* processor */
- imbalance = (max_cpu_irq - min_cpu_irq) / 2;
-
- Dprintk("max_loaded cpu = %d\n", max_loaded);
- Dprintk("min_loaded cpu = %d\n", min_loaded);
- Dprintk("max_cpu_irq load = %ld\n", max_cpu_irq);
- Dprintk("min_cpu_irq load = %ld\n", min_cpu_irq);
- Dprintk("load imbalance = %lu\n", imbalance);
-
- /* if imbalance is less than approx 10% of max load, then
- * observe diminishing returns action. - quit
- */
- if (imbalance < (max_cpu_irq >> 3)) {
- Dprintk("Imbalance too trivial\n");
- goto not_worth_the_effort;
- }
-
-tryanotherirq:
- /* if we select an IRQ to move that can't go where we want, then
- * see if there is another one to try.
- */
- move_this_load = 0;
- selected_irq = -1;
- for (j = 0; j < NR_IRQS; j++) {
- /* Is this an active IRQ? */
- if (!irq_desc[j].action)
- continue;
- if (imbalance <= IRQ_DELTA(max_loaded,j))
- continue;
- /* Try to find the IRQ that is closest to the imbalance
- * without going over.
- */
- if (move_this_load < IRQ_DELTA(max_loaded,j)) {
- move_this_load = IRQ_DELTA(max_loaded,j);
- selected_irq = j;
- }
- }
- if (selected_irq == -1) {
- goto tryanothercpu;
- }
-
- imbalance = move_this_load;
-
- /* For physical_balance case, we accumlated both load
- * values in the one of the siblings cpu_irq[],
- * to use the same code for physical and logical processors
- * as much as possible.
- *
- * NOTE: the cpu_irq[] array holds the sum of the load for
- * sibling A and sibling B in the slot for the lowest numbered
- * sibling (A), _AND_ the load for sibling B in the slot for
- * the higher numbered sibling.
- *
- * We seek the least loaded sibling by making the comparison
- * (A+B)/2 vs B
- */
- load = CPU_IRQ(min_loaded) >> 1;
- for_each_cpu_mask(j, cpu_sibling_map[min_loaded]) {
- if (load > CPU_IRQ(j)) {
- /* This won't change cpu_sibling_map[min_loaded] */
- load = CPU_IRQ(j);
- min_loaded = j;
- }
- }
-
- cpus_and(allowed_mask, cpu_online_map, irq_affinity[selected_irq]);
- target_cpu_mask = cpumask_of_cpu(min_loaded);
- cpus_and(tmp, target_cpu_mask, allowed_mask);
-
- if (!cpus_empty(tmp)) {
-
- Dprintk("irq = %d moved to cpu = %d\n",
- selected_irq, min_loaded);
- /* mark for change destination */
- set_pending_irq(selected_irq, cpumask_of_cpu(min_loaded));
-
- /* Since we made a change, come back sooner to
- * check for more variation.
- */
- balanced_irq_interval = max((long)MIN_BALANCED_IRQ_INTERVAL,
- balanced_irq_interval - BALANCED_IRQ_LESS_DELTA);
- return;
- }
- goto tryanotherirq;
-
-not_worth_the_effort:
- /*
- * if we did not find an IRQ to move, then adjust the time interval
- * upward
- */
- balanced_irq_interval = min((long)MAX_BALANCED_IRQ_INTERVAL,
- balanced_irq_interval + BALANCED_IRQ_MORE_DELTA);
- Dprintk("IRQ worth rotating not found\n");
- return;
-}
-
-static int balanced_irq(void *unused)
-{
- int i;
- unsigned long prev_balance_time = jiffies;
- long time_remaining = balanced_irq_interval;
-
- daemonize("kirqd");
-
- /* push everything to CPU 0 to give us a starting point. */
- for (i = 0 ; i < NR_IRQS ; i++) {
- pending_irq_cpumask[i] = cpumask_of_cpu(0);
- set_pending_irq(i, cpumask_of_cpu(0));
- }
-
- for ( ; ; ) {
- time_remaining = schedule_timeout_interruptible(time_remaining);
- try_to_freeze();
- if (time_after(jiffies,
- prev_balance_time+balanced_irq_interval)) {
- preempt_disable();
- do_irq_balance();
- prev_balance_time = jiffies;
- time_remaining = balanced_irq_interval;
- preempt_enable();
- }
- }
- return 0;
-}
-
-static int __init balanced_irq_init(void)
-{
- int i;
- struct cpuinfo_x86 *c;
- cpumask_t tmp;
-
- cpus_shift_right(tmp, cpu_online_map, 2);
- c = &boot_cpu_data;
- /* When not overwritten by the command line ask subarchitecture. */
- if (irqbalance_disabled == IRQBALANCE_CHECK_ARCH)
- irqbalance_disabled = NO_BALANCE_IRQ;
- if (irqbalance_disabled)
- return 0;
-
- /* disable irqbalance completely if there is only one processor online */
- if (num_online_cpus() < 2) {
- irqbalance_disabled = 1;
- return 0;
- }
- /*
- * Enable physical balance only if more than 1 physical processor
- * is present
- */
- if (smp_num_siblings > 1 && !cpus_empty(tmp))
- physical_balance = 1;
-
- for (i = 0; i < NR_CPUS; i++) {
- if (!cpu_online(i))
- continue;
- irq_cpu_data[i].irq_delta = kmalloc(sizeof(unsigned long) * NR_IRQS, GFP_KERNEL);
- irq_cpu_data[i].last_irq = kmalloc(sizeof(unsigned long) * NR_IRQS, GFP_KERNEL);
- if (irq_cpu_data[i].irq_delta == NULL || irq_cpu_data[i].last_irq == NULL) {
- printk(KERN_ERR "balanced_irq_init: out of memory");
- goto failed;
- }
- memset(irq_cpu_data[i].irq_delta,0,sizeof(unsigned long) * NR_IRQS);
- memset(irq_cpu_data[i].last_irq,0,sizeof(unsigned long) * NR_IRQS);
- }
-
- printk(KERN_INFO "Starting balanced_irq\n");
- if (kernel_thread(balanced_irq, NULL, CLONE_KERNEL) >= 0)
- return 0;
- else
- printk(KERN_ERR "balanced_irq_init: failed to spawn balanced_irq");
-failed:
- for (i = 0; i < NR_CPUS; i++) {
- kfree(irq_cpu_data[i].irq_delta);
- kfree(irq_cpu_data[i].last_irq);
- }
- return 0;
-}
-
-int __init irqbalance_disable(char *str)
-{
- irqbalance_disabled = 1;
- return 0;
-}
-
-__setup("noirqbalance", irqbalance_disable);
-
-late_initcall(balanced_irq_init);
-#endif /* CONFIG_IRQBALANCE */
-#endif /* CONFIG_SMP */
-
-#ifndef CONFIG_SMP
-void fastcall send_IPI_self(int vector)
-{
- unsigned int cfg;
-
- /*
- * Wait for idle.
- */
- apic_wait_icr_idle();
- cfg = APIC_DM_FIXED | APIC_DEST_SELF | vector | APIC_DEST_LOGICAL;
- /*
- * Send the IPI. The write to APIC_ICR fires this off.
- */
- apic_write_around(APIC_ICR, cfg);
-}
-#endif /* !CONFIG_SMP */
-
-
-/*
- * support for broken MP BIOSs, enables hand-redirection of PIRQ0-7 to
- * specific CPU-side IRQs.
- */
-
-#define MAX_PIRQS 8
-static int pirq_entries [MAX_PIRQS];
-static int pirqs_enabled;
-int skip_ioapic_setup;
-
-static int __init ioapic_setup(char *str)
-{
- skip_ioapic_setup = 1;
- return 1;
-}
-
-__setup("noapic", ioapic_setup);
-
-static int __init ioapic_pirq_setup(char *str)
-{
- int i, max;
- int ints[MAX_PIRQS+1];
-
- get_options(str, ARRAY_SIZE(ints), ints);
-
- for (i = 0; i < MAX_PIRQS; i++)
- pirq_entries[i] = -1;
-
- pirqs_enabled = 1;
- apic_printk(APIC_VERBOSE, KERN_INFO
- "PIRQ redirection, working around broken MP-BIOS.\n");
- max = MAX_PIRQS;
- if (ints[0] < MAX_PIRQS)
- max = ints[0];
-
- for (i = 0; i < max; i++) {
- apic_printk(APIC_VERBOSE, KERN_DEBUG
- "... PIRQ%d -> IRQ %d\n", i, ints[i+1]);
- /*
- * PIRQs are mapped upside down, usually.
- */
- pirq_entries[MAX_PIRQS-i-1] = ints[i+1];
- }
- return 1;
-}
-
-__setup("pirq=", ioapic_pirq_setup);
-
-/*
- * Find the IRQ entry number of a certain pin.
- */
-static int find_irq_entry(int apic, int pin, int type)
-{
- int i;
-
- for (i = 0; i < mp_irq_entries; i++)
- if (mp_irqs[i].mpc_irqtype == type &&
- (mp_irqs[i].mpc_dstapic == mp_ioapics[apic].mpc_apicid ||
- mp_irqs[i].mpc_dstapic == MP_APIC_ALL) &&
- mp_irqs[i].mpc_dstirq == pin)
- return i;
-
- return -1;
-}
-
-/*
- * Find the pin to which IRQ[irq] (ISA) is connected
- */
-static int __init find_isa_irq_pin(int irq, int type)
-{
- int i;
-
- for (i = 0; i < mp_irq_entries; i++) {
- int lbus = mp_irqs[i].mpc_srcbus;
-
- if ((mp_bus_id_to_type[lbus] == MP_BUS_ISA ||
- mp_bus_id_to_type[lbus] == MP_BUS_EISA ||
- mp_bus_id_to_type[lbus] == MP_BUS_MCA ||
- mp_bus_id_to_type[lbus] == MP_BUS_NEC98
- ) &&
- (mp_irqs[i].mpc_irqtype == type) &&
- (mp_irqs[i].mpc_srcbusirq == irq))
-
- return mp_irqs[i].mpc_dstirq;
- }
- return -1;
-}
-
-static int __init find_isa_irq_apic(int irq, int type)
-{
- int i;
-
- for (i = 0; i < mp_irq_entries; i++) {
- int lbus = mp_irqs[i].mpc_srcbus;
-
- if ((mp_bus_id_to_type[lbus] == MP_BUS_ISA ||
- mp_bus_id_to_type[lbus] == MP_BUS_EISA ||
- mp_bus_id_to_type[lbus] == MP_BUS_MCA ||
- mp_bus_id_to_type[lbus] == MP_BUS_NEC98
- ) &&
- (mp_irqs[i].mpc_irqtype == type) &&
- (mp_irqs[i].mpc_srcbusirq == irq))
- break;
- }
- if (i < mp_irq_entries) {
- int apic;
- for(apic = 0; apic < nr_ioapics; apic++) {
- if (mp_ioapics[apic].mpc_apicid == mp_irqs[i].mpc_dstapic)
- return apic;
- }
- }
-
- return -1;
-}
-
-/*
- * Find a specific PCI IRQ entry.
- * Not an __init, possibly needed by modules
- */
-static int pin_2_irq(int idx, int apic, int pin);
-
-int IO_APIC_get_PCI_irq_vector(int bus, int slot, int pin)
-{
- int apic, i, best_guess = -1;
-
- apic_printk(APIC_DEBUG, "querying PCI -> IRQ mapping bus:%d, "
- "slot:%d, pin:%d.\n", bus, slot, pin);
- if (mp_bus_id_to_pci_bus[bus] == -1) {
- printk(KERN_WARNING "PCI BIOS passed nonexistent PCI bus %d!\n", bus);
- return -1;
- }
- for (i = 0; i < mp_irq_entries; i++) {
- int lbus = mp_irqs[i].mpc_srcbus;
-
- for (apic = 0; apic < nr_ioapics; apic++)
- if (mp_ioapics[apic].mpc_apicid == mp_irqs[i].mpc_dstapic ||
- mp_irqs[i].mpc_dstapic == MP_APIC_ALL)
- break;
-
- if ((mp_bus_id_to_type[lbus] == MP_BUS_PCI) &&
- !mp_irqs[i].mpc_irqtype &&
- (bus == lbus) &&
- (slot == ((mp_irqs[i].mpc_srcbusirq >> 2) & 0x1f))) {
- int irq = pin_2_irq(i,apic,mp_irqs[i].mpc_dstirq);
-
- if (!(apic || IO_APIC_IRQ(irq)))
- continue;
-
- if (pin == (mp_irqs[i].mpc_srcbusirq & 3))
- return irq;
- /*
- * Use the first all-but-pin matching entry as a
- * best-guess fuzzy result for broken mptables.
- */
- if (best_guess < 0)
- best_guess = irq;
- }
- }
- return best_guess;
-}
-EXPORT_SYMBOL(IO_APIC_get_PCI_irq_vector);
-
-/*
- * This function currently is only a helper for the i386 smp boot process where
- * we need to reprogram the ioredtbls to cater for the cpus which have come online
- * so mask in all cases should simply be TARGET_CPUS
- */
-#ifdef CONFIG_SMP
-void __init setup_ioapic_dest(void)
-{
- int pin, ioapic, irq, irq_entry;
-
- if (skip_ioapic_setup == 1)
- return;
-
- for (ioapic = 0; ioapic < nr_ioapics; ioapic++) {
- for (pin = 0; pin < nr_ioapic_registers[ioapic]; pin++) {
- irq_entry = find_irq_entry(ioapic, pin, mp_INT);
- if (irq_entry == -1)
- continue;
- irq = pin_2_irq(irq_entry, ioapic, pin);
- set_ioapic_affinity_irq(irq, TARGET_CPUS);
- }
-
- }
-}
-#endif
-
-/*
- * EISA Edge/Level control register, ELCR
- */
-static int EISA_ELCR(unsigned int irq)
-{
- if (irq < 16) {
- unsigned int port = 0x4d0 + (irq >> 3);
- return (inb(port) >> (irq & 7)) & 1;
- }
- apic_printk(APIC_VERBOSE, KERN_INFO
- "Broken MPtable reports ISA irq %d\n", irq);
- return 0;
-}
-
-/* EISA interrupts are always polarity zero and can be edge or level
- * trigger depending on the ELCR value. If an interrupt is listed as
- * EISA conforming in the MP table, that means its trigger type must
- * be read in from the ELCR */
-
-#define default_EISA_trigger(idx) (EISA_ELCR(mp_irqs[idx].mpc_srcbusirq))
-#define default_EISA_polarity(idx) (0)
-
-/* ISA interrupts are always polarity zero edge triggered,
- * when listed as conforming in the MP table. */
-
-#define default_ISA_trigger(idx) (0)
-#define default_ISA_polarity(idx) (0)
-
-/* PCI interrupts are always polarity one level triggered,
- * when listed as conforming in the MP table. */
-
-#define default_PCI_trigger(idx) (1)
-#define default_PCI_polarity(idx) (1)
-
-/* MCA interrupts are always polarity zero level triggered,
- * when listed as conforming in the MP table. */
-
-#define default_MCA_trigger(idx) (1)
-#define default_MCA_polarity(idx) (0)
-
-/* NEC98 interrupts are always polarity zero edge triggered,
- * when listed as conforming in the MP table. */
-
-#define default_NEC98_trigger(idx) (0)
-#define default_NEC98_polarity(idx) (0)
-
-static int __init MPBIOS_polarity(int idx)
-{
- int bus = mp_irqs[idx].mpc_srcbus;
- int polarity;
-
- /*
- * Determine IRQ line polarity (high active or low active):
- */
- switch (mp_irqs[idx].mpc_irqflag & 3)
- {
- case 0: /* conforms, ie. bus-type dependent polarity */
- {
- switch (mp_bus_id_to_type[bus])
- {
- case MP_BUS_ISA: /* ISA pin */
- {
- polarity = default_ISA_polarity(idx);
- break;
- }
- case MP_BUS_EISA: /* EISA pin */
- {
- polarity = default_EISA_polarity(idx);
- break;
- }
- case MP_BUS_PCI: /* PCI pin */
- {
- polarity = default_PCI_polarity(idx);
- break;
- }
- case MP_BUS_MCA: /* MCA pin */
- {
- polarity = default_MCA_polarity(idx);
- break;
- }
- case MP_BUS_NEC98: /* NEC 98 pin */
- {
- polarity = default_NEC98_polarity(idx);
- break;
- }
- default:
- {
- printk(KERN_WARNING "broken BIOS!!\n");
- polarity = 1;
- break;
- }
- }
- break;
- }
- case 1: /* high active */
- {
- polarity = 0;
- break;
- }
- case 2: /* reserved */
- {
- printk(KERN_WARNING "broken BIOS!!\n");
- polarity = 1;
- break;
- }
- case 3: /* low active */
- {
- polarity = 1;
- break;
- }
- default: /* invalid */
- {
- printk(KERN_WARNING "broken BIOS!!\n");
- polarity = 1;
- break;
- }
- }
- return polarity;
-}
-
-static int MPBIOS_trigger(int idx)
-{
- int bus = mp_irqs[idx].mpc_srcbus;
- int trigger;
-
- /*
- * Determine IRQ trigger mode (edge or level sensitive):
- */
- switch ((mp_irqs[idx].mpc_irqflag>>2) & 3)
- {
- case 0: /* conforms, ie. bus-type dependent */
- {
- switch (mp_bus_id_to_type[bus])
- {
- case MP_BUS_ISA: /* ISA pin */
- {
- trigger = default_ISA_trigger(idx);
- break;
- }
- case MP_BUS_EISA: /* EISA pin */
- {
- trigger = default_EISA_trigger(idx);
- break;
- }
- case MP_BUS_PCI: /* PCI pin */
- {
- trigger = default_PCI_trigger(idx);
- break;
- }
- case MP_BUS_MCA: /* MCA pin */
- {
- trigger = default_MCA_trigger(idx);
- break;
- }
- case MP_BUS_NEC98: /* NEC 98 pin */
- {
- trigger = default_NEC98_trigger(idx);
- break;
- }
- default:
- {
- printk(KERN_WARNING "broken BIOS!!\n");
- trigger = 1;
- break;
- }
- }
- break;
- }
- case 1: /* edge */
- {
- trigger = 0;
- break;
- }
- case 2: /* reserved */
- {
- printk(KERN_WARNING "broken BIOS!!\n");
- trigger = 1;
- break;
- }
- case 3: /* level */
- {
- trigger = 1;
- break;
- }
- default: /* invalid */
- {
- printk(KERN_WARNING "broken BIOS!!\n");
- trigger = 0;
- break;
- }
- }
- return trigger;
-}
-
-static inline int irq_polarity(int idx)
-{
- return MPBIOS_polarity(idx);
-}
-
-static inline int irq_trigger(int idx)
-{
- return MPBIOS_trigger(idx);
-}
-
-static int pin_2_irq(int idx, int apic, int pin)
-{
- int irq, i;
- int bus = mp_irqs[idx].mpc_srcbus;
-
- /*
- * Debugging check, we are in big trouble if this message pops up!
- */
- if (mp_irqs[idx].mpc_dstirq != pin)
- printk(KERN_ERR "broken BIOS or MPTABLE parser, ayiee!!\n");
-
- switch (mp_bus_id_to_type[bus])
- {
- case MP_BUS_ISA: /* ISA pin */
- case MP_BUS_EISA:
- case MP_BUS_MCA:
- case MP_BUS_NEC98:
- {
- irq = mp_irqs[idx].mpc_srcbusirq;
- break;
- }
- case MP_BUS_PCI: /* PCI pin */
- {
- /*
- * PCI IRQs are mapped in order
- */
- i = irq = 0;
- while (i < apic)
- irq += nr_ioapic_registers[i++];
- irq += pin;
-
- /*
- * For MPS mode, so far only needed by ES7000 platform
- */
- if (ioapic_renumber_irq)
- irq = ioapic_renumber_irq(apic, irq);
-
- break;
- }
- default:
- {
- printk(KERN_ERR "unknown bus type %d.\n",bus);
- irq = 0;
- break;
- }
- }
-
- /*
- * PCI IRQ command line redirection. Yes, limits are hardcoded.
- */
- if ((pin >= 16) && (pin <= 23)) {
- if (pirq_entries[pin-16] != -1) {
- if (!pirq_entries[pin-16]) {
- apic_printk(APIC_VERBOSE, KERN_DEBUG
- "disabling PIRQ%d\n", pin-16);
- } else {
- irq = pirq_entries[pin-16];
- apic_printk(APIC_VERBOSE, KERN_DEBUG
- "using PIRQ%d -> IRQ %d\n",
- pin-16, irq);
- }
- }
- }
- return irq;
-}
-
-static inline int IO_APIC_irq_trigger(int irq)
-{
- int apic, idx, pin;
-
- for (apic = 0; apic < nr_ioapics; apic++) {
- for (pin = 0; pin < nr_ioapic_registers[apic]; pin++) {
- idx = find_irq_entry(apic,pin,mp_INT);
- if ((idx != -1) && (irq == pin_2_irq(idx,apic,pin)))
- return irq_trigger(idx);
- }
- }
- /*
- * nonexistent IRQs are edge default
- */
- return 0;
-}
-
-/* irq_vectors is indexed by the sum of all RTEs in all I/O APICs. */
-u8 irq_vector[NR_IRQ_VECTORS] __read_mostly = { FIRST_DEVICE_VECTOR , 0 };
-
-int assign_irq_vector(int irq)
-{
- static int current_vector = FIRST_DEVICE_VECTOR, offset = 0;
-
- BUG_ON(irq >= NR_IRQ_VECTORS);
- if (irq != AUTO_ASSIGN && IO_APIC_VECTOR(irq) > 0)
- return IO_APIC_VECTOR(irq);
-next:
- current_vector += 8;
- if (current_vector == SYSCALL_VECTOR)
- goto next;
-
- if (current_vector >= FIRST_SYSTEM_VECTOR) {
- offset++;
- if (!(offset%8))
- return -ENOSPC;
- current_vector = FIRST_DEVICE_VECTOR + offset;
- }
-
- vector_irq[current_vector] = irq;
- if (irq != AUTO_ASSIGN)
- IO_APIC_VECTOR(irq) = current_vector;
-
- return current_vector;
-}
-
-static struct hw_interrupt_type ioapic_level_type;
-static struct hw_interrupt_type ioapic_edge_type;
-
-#define IOAPIC_AUTO -1
-#define IOAPIC_EDGE 0
-#define IOAPIC_LEVEL 1
-
-static inline void ioapic_register_intr(int irq, int vector, unsigned long trigger)
-{
- if (use_pci_vector() && !platform_legacy_irq(irq)) {
- if ((trigger == IOAPIC_AUTO && IO_APIC_irq_trigger(irq)) ||
- trigger == IOAPIC_LEVEL)
- irq_desc[vector].handler = &ioapic_level_type;
- else
- irq_desc[vector].handler = &ioapic_edge_type;
- set_intr_gate(vector, interrupt[vector]);
- } else {
- if ((trigger == IOAPIC_AUTO && IO_APIC_irq_trigger(irq)) ||
- trigger == IOAPIC_LEVEL)
- irq_desc[irq].handler = &ioapic_level_type;
- else
- irq_desc[irq].handler = &ioapic_edge_type;
- set_intr_gate(vector, interrupt[irq]);
- }
-}
-
-static void __init setup_IO_APIC_irqs(void)
-{
- struct IO_APIC_route_entry entry;
- int apic, pin, idx, irq, first_notcon = 1, vector;
- unsigned long flags;
-
- apic_printk(APIC_VERBOSE, KERN_DEBUG "init IO_APIC IRQs\n");
-
- for (apic = 0; apic < nr_ioapics; apic++) {
- for (pin = 0; pin < nr_ioapic_registers[apic]; pin++) {
-
- /*
- * add it to the IO-APIC irq-routing table:
- */
- memset(&entry,0,sizeof(entry));
-
- entry.delivery_mode = INT_DELIVERY_MODE;
- entry.dest_mode = INT_DEST_MODE;
- entry.mask = 0; /* enable IRQ */
- entry.dest.logical.logical_dest =
- cpu_mask_to_apicid(TARGET_CPUS);
-
- idx = find_irq_entry(apic,pin,mp_INT);
- if (idx == -1) {
- if (first_notcon) {
- apic_printk(APIC_VERBOSE, KERN_DEBUG
- " IO-APIC (apicid-pin) %d-%d",
- mp_ioapics[apic].mpc_apicid,
- pin);
- first_notcon = 0;
- } else
- apic_printk(APIC_VERBOSE, ", %d-%d",
- mp_ioapics[apic].mpc_apicid, pin);
- continue;
- }
-
- entry.trigger = irq_trigger(idx);
- entry.polarity = irq_polarity(idx);
-
- if (irq_trigger(idx)) {
- entry.trigger = 1;
- entry.mask = 1;
- }
-
- irq = pin_2_irq(idx, apic, pin);
- /*
- * skip adding the timer int on secondary nodes, which causes
- * a small but painful rift in the time-space continuum
- */
- if (multi_timer_check(apic, irq))
- continue;
- else
- add_pin_to_irq(irq, apic, pin);
-
- if (!apic && !IO_APIC_IRQ(irq))
- continue;
-
- if (IO_APIC_IRQ(irq)) {
- vector = assign_irq_vector(irq);
- entry.vector = vector;
- ioapic_register_intr(irq, vector, IOAPIC_AUTO);
-
- if (!apic && (irq < 16))
- disable_8259A_irq(irq);
- }
- spin_lock_irqsave(&ioapic_lock, flags);
- io_apic_write(apic, 0x11+2*pin, *(((int *)&entry)+1));
- io_apic_write(apic, 0x10+2*pin, *(((int *)&entry)+0));
- set_native_irq_info(irq, TARGET_CPUS);
- spin_unlock_irqrestore(&ioapic_lock, flags);
- }
- }
-
- if (!first_notcon)
- apic_printk(APIC_VERBOSE, " not connected.\n");
-}
-
-/*
- * Set up the 8259A-master output pin:
- */
-static void __init setup_ExtINT_IRQ0_pin(unsigned int apic, unsigned int pin, int vector)
-{
- struct IO_APIC_route_entry entry;
- unsigned long flags;
-
- memset(&entry,0,sizeof(entry));
-
- disable_8259A_irq(0);
-
- /* mask LVT0 */
- apic_write_around(APIC_LVT0, APIC_LVT_MASKED | APIC_DM_EXTINT);
-
- /*
- * We use logical delivery to get the timer IRQ
- * to the first CPU.
- */
- entry.dest_mode = INT_DEST_MODE;
- entry.mask = 0; /* unmask IRQ now */
- entry.dest.logical.logical_dest = cpu_mask_to_apicid(TARGET_CPUS);
- entry.delivery_mode = INT_DELIVERY_MODE;
- entry.polarity = 0;
- entry.trigger = 0;
- entry.vector = vector;
-
- /*
- * The timer IRQ doesn't have to know that behind the
- * scene we have a 8259A-master in AEOI mode ...
- */
- irq_desc[0].handler = &ioapic_edge_type;
-
- /*
- * Add it to the IO-APIC irq-routing table:
- */
- spin_lock_irqsave(&ioapic_lock, flags);
- io_apic_write(apic, 0x11+2*pin, *(((int *)&entry)+1));
- io_apic_write(apic, 0x10+2*pin, *(((int *)&entry)+0));
- spin_unlock_irqrestore(&ioapic_lock, flags);
-
- enable_8259A_irq(0);
-}
-
-static inline void UNEXPECTED_IO_APIC(void)
-{
-}
-
-void __init print_IO_APIC(void)
-{
- int apic, i;
- union IO_APIC_reg_00 reg_00;
- union IO_APIC_reg_01 reg_01;
- union IO_APIC_reg_02 reg_02;
- union IO_APIC_reg_03 reg_03;
- unsigned long flags;
-
- if (apic_verbosity == APIC_QUIET)
- return;
-
- printk(KERN_DEBUG "number of MP IRQ sources: %d.\n", mp_irq_entries);
- for (i = 0; i < nr_ioapics; i++)
- printk(KERN_DEBUG "number of IO-APIC #%d registers: %d.\n",
- mp_ioapics[i].mpc_apicid, nr_ioapic_registers[i]);
-
- /*
- * We are a bit conservative about what we expect. We have to
- * know about every hardware change ASAP.
- */
- printk(KERN_INFO "testing the IO APIC.......................\n");
-
- for (apic = 0; apic < nr_ioapics; apic++) {
-
- spin_lock_irqsave(&ioapic_lock, flags);
- reg_00.raw = io_apic_read(apic, 0);
- reg_01.raw = io_apic_read(apic, 1);
- if (reg_01.bits.version >= 0x10)
- reg_02.raw = io_apic_read(apic, 2);
- if (reg_01.bits.version >= 0x20)
- reg_03.raw = io_apic_read(apic, 3);
- spin_unlock_irqrestore(&ioapic_lock, flags);
-
- printk(KERN_DEBUG "IO APIC #%d......\n", mp_ioapics[apic].mpc_apicid);
- printk(KERN_DEBUG ".... register #00: %08X\n", reg_00.raw);
- printk(KERN_DEBUG "....... : physical APIC id: %02X\n", reg_00.bits.ID);
- printk(KERN_DEBUG "....... : Delivery Type: %X\n", reg_00.bits.delivery_type);
- printk(KERN_DEBUG "....... : LTS : %X\n", reg_00.bits.LTS);
- if (reg_00.bits.ID >= get_physical_broadcast())
- UNEXPECTED_IO_APIC();
- if (reg_00.bits.__reserved_1 || reg_00.bits.__reserved_2)
- UNEXPECTED_IO_APIC();
-
- printk(KERN_DEBUG ".... register #01: %08X\n", reg_01.raw);
- printk(KERN_DEBUG "....... : max redirection entries: %04X\n", reg_01.bits.entries);
- if ( (reg_01.bits.entries != 0x0f) && /* older (Neptune) boards */
- (reg_01.bits.entries != 0x17) && /* typical ISA+PCI boards */
- (reg_01.bits.entries != 0x1b) && /* Compaq Proliant boards */
- (reg_01.bits.entries != 0x1f) && /* dual Xeon boards */
- (reg_01.bits.entries != 0x22) && /* bigger Xeon boards */
- (reg_01.bits.entries != 0x2E) &&
- (reg_01.bits.entries != 0x3F)
- )
- UNEXPECTED_IO_APIC();
-
- printk(KERN_DEBUG "....... : PRQ implemented: %X\n", reg_01.bits.PRQ);
- printk(KERN_DEBUG "....... : IO APIC version: %04X\n", reg_01.bits.version);
- if ( (reg_01.bits.version != 0x01) && /* 82489DX IO-APICs */
- (reg_01.bits.version != 0x10) && /* oldest IO-APICs */
- (reg_01.bits.version != 0x11) && /* Pentium/Pro IO-APICs */
- (reg_01.bits.version != 0x13) && /* Xeon IO-APICs */
- (reg_01.bits.version != 0x20) /* Intel P64H (82806 AA) */
- )
- UNEXPECTED_IO_APIC();
- if (reg_01.bits.__reserved_1 || reg_01.bits.__reserved_2)
- UNEXPECTED_IO_APIC();
-
- /*
- * Some Intel chipsets with IO APIC VERSION of 0x1? don't have reg_02,
- * but the value of reg_02 is read as the previous read register
- * value, so ignore it if reg_02 == reg_01.
- */
- if (reg_01.bits.version >= 0x10 && reg_02.raw != reg_01.raw) {
- printk(KERN_DEBUG ".... register #02: %08X\n", reg_02.raw);
- printk(KERN_DEBUG "....... : arbitration: %02X\n", reg_02.bits.arbitration);
- if (reg_02.bits.__reserved_1 || reg_02.bits.__reserved_2)
- UNEXPECTED_IO_APIC();
- }
-
- /*
- * Some Intel chipsets with IO APIC VERSION of 0x2? don't have reg_02
- * or reg_03, but the value of reg_0[23] is read as the previous read
- * register value, so ignore it if reg_03 == reg_0[12].
- */
- if (reg_01.bits.version >= 0x20 && reg_03.raw != reg_02.raw &&
- reg_03.raw != reg_01.raw) {
- printk(KERN_DEBUG ".... register #03: %08X\n", reg_03.raw);
- printk(KERN_DEBUG "....... : Boot DT : %X\n", reg_03.bits.boot_DT);
- if (reg_03.bits.__reserved_1)
- UNEXPECTED_IO_APIC();
- }
-
- printk(KERN_DEBUG ".... IRQ redirection table:\n");
-
- printk(KERN_DEBUG " NR Log Phy Mask Trig IRR Pol"
- " Stat Dest Deli Vect: \n");
-
- for (i = 0; i <= reg_01.bits.entries; i++) {
- struct IO_APIC_route_entry entry;
-
- spin_lock_irqsave(&ioapic_lock, flags);
- *(((int *)&entry)+0) = io_apic_read(apic, 0x10+i*2);
- *(((int *)&entry)+1) = io_apic_read(apic, 0x11+i*2);
- spin_unlock_irqrestore(&ioapic_lock, flags);
-
- printk(KERN_DEBUG " %02x %03X %02X ",
- i,
- entry.dest.logical.logical_dest,
- entry.dest.physical.physical_dest
- );
-
- printk("%1d %1d %1d %1d %1d %1d %1d %02X\n",
- entry.mask,
- entry.trigger,
- entry.irr,
- entry.polarity,
- entry.delivery_status,
- entry.dest_mode,
- entry.delivery_mode,
- entry.vector
- );
- }
- }
- if (use_pci_vector())
- printk(KERN_INFO "Using vector-based indexing\n");
- printk(KERN_DEBUG "IRQ to pin mappings:\n");
- for (i = 0; i < NR_IRQS; i++) {
- struct irq_pin_list *entry = irq_2_pin + i;
- if (entry->pin < 0)
- continue;
- if (use_pci_vector() && !platform_legacy_irq(i))
- printk(KERN_DEBUG "IRQ%d ", IO_APIC_VECTOR(i));
- else
- printk(KERN_DEBUG "IRQ%d ", i);
- for (;;) {
- printk("-> %d:%d", entry->apic, entry->pin);
- if (!entry->next)
- break;
- entry = irq_2_pin + entry->next;
- }
- printk("\n");
- }
-
- printk(KERN_INFO ".................................... done.\n");
-
- return;
-}
-
-#if 0
-
-static void print_APIC_bitfield (int base)
-{
- unsigned int v;
- int i, j;
-
- if (apic_verbosity == APIC_QUIET)
- return;
-
- printk(KERN_DEBUG "0123456789abcdef0123456789abcdef\n" KERN_DEBUG);
- for (i = 0; i < 8; i++) {
- v = apic_read(base + i*0x10);
- for (j = 0; j < 32; j++) {
- if (v & (1<<j))
- printk("1");
- else
- printk("0");
- }
- printk("\n");
- }
-}
-
-void /*__init*/ print_local_APIC(void * dummy)
-{
- unsigned int v, ver, maxlvt;
-
- if (apic_verbosity == APIC_QUIET)
- return;
-
- printk("\n" KERN_DEBUG "printing local APIC contents on CPU#%d/%d:\n",
- smp_processor_id(), hard_smp_processor_id());
- v = apic_read(APIC_ID);
- printk(KERN_INFO "... APIC ID: %08x (%01x)\n", v, GET_APIC_ID(v));
- v = apic_read(APIC_LVR);
- printk(KERN_INFO "... APIC VERSION: %08x\n", v);
- ver = GET_APIC_VERSION(v);
- maxlvt = get_maxlvt();
-
- v = apic_read(APIC_TASKPRI);
- printk(KERN_DEBUG "... APIC TASKPRI: %08x (%02x)\n", v, v & APIC_TPRI_MASK);
-
- if (APIC_INTEGRATED(ver)) { /* !82489DX */
- v = apic_read(APIC_ARBPRI);
- printk(KERN_DEBUG "... APIC ARBPRI: %08x (%02x)\n", v,
- v & APIC_ARBPRI_MASK);
- v = apic_read(APIC_PROCPRI);
- printk(KERN_DEBUG "... APIC PROCPRI: %08x\n", v);
- }
-
- v = apic_read(APIC_EOI);
- printk(KERN_DEBUG "... APIC EOI: %08x\n", v);
- v = apic_read(APIC_RRR);
- printk(KERN_DEBUG "... APIC RRR: %08x\n", v);
- v = apic_read(APIC_LDR);
- printk(KERN_DEBUG "... APIC LDR: %08x\n", v);
- v = apic_read(APIC_DFR);
- printk(KERN_DEBUG "... APIC DFR: %08x\n", v);
- v = apic_read(APIC_SPIV);
- printk(KERN_DEBUG "... APIC SPIV: %08x\n", v);
-
- printk(KERN_DEBUG "... APIC ISR field:\n");
- print_APIC_bitfield(APIC_ISR);
- printk(KERN_DEBUG "... APIC TMR field:\n");
- print_APIC_bitfield(APIC_TMR);
- printk(KERN_DEBUG "... APIC IRR field:\n");
- print_APIC_bitfield(APIC_IRR);
-
- if (APIC_INTEGRATED(ver)) { /* !82489DX */
- if (maxlvt > 3) /* Due to the Pentium erratum 3AP. */
- apic_write(APIC_ESR, 0);
- v = apic_read(APIC_ESR);
- printk(KERN_DEBUG "... APIC ESR: %08x\n", v);
- }
-
- v = apic_read(APIC_ICR);
- printk(KERN_DEBUG "... APIC ICR: %08x\n", v);
- v = apic_read(APIC_ICR2);
- printk(KERN_DEBUG "... APIC ICR2: %08x\n", v);
-
- v = apic_read(APIC_LVTT);
- printk(KERN_DEBUG "... APIC LVTT: %08x\n", v);
-
- if (maxlvt > 3) { /* PC is LVT#4. */
- v = apic_read(APIC_LVTPC);
- printk(KERN_DEBUG "... APIC LVTPC: %08x\n", v);
- }
- v = apic_read(APIC_LVT0);
- printk(KERN_DEBUG "... APIC LVT0: %08x\n", v);
- v = apic_read(APIC_LVT1);
- printk(KERN_DEBUG "... APIC LVT1: %08x\n", v);
-
- if (maxlvt > 2) { /* ERR is LVT#3. */
- v = apic_read(APIC_LVTERR);
- printk(KERN_DEBUG "... APIC LVTERR: %08x\n", v);
- }
-
- v = apic_read(APIC_TMICT);
- printk(KERN_DEBUG "... APIC TMICT: %08x\n", v);
- v = apic_read(APIC_TMCCT);
- printk(KERN_DEBUG "... APIC TMCCT: %08x\n", v);
- v = apic_read(APIC_TDCR);
- printk(KERN_DEBUG "... APIC TDCR: %08x\n", v);
- printk("\n");
-}
-
-void print_all_local_APICs (void)
-{
- on_each_cpu(print_local_APIC, NULL, 1, 1);
-}
-
-void /*__init*/ print_PIC(void)
-{
- unsigned int v;
- unsigned long flags;
-
- if (apic_verbosity == APIC_QUIET)
- return;
-
- printk(KERN_DEBUG "\nprinting PIC contents\n");
-
- spin_lock_irqsave(&i8259A_lock, flags);
-
- v = inb(0xa1) << 8 | inb(0x21);
- printk(KERN_DEBUG "... PIC IMR: %04x\n", v);
-
- v = inb(0xa0) << 8 | inb(0x20);
- printk(KERN_DEBUG "... PIC IRR: %04x\n", v);
-
- outb(0x0b,0xa0);
- outb(0x0b,0x20);
- v = inb(0xa0) << 8 | inb(0x20);
- outb(0x0a,0xa0);
- outb(0x0a,0x20);
-
- spin_unlock_irqrestore(&i8259A_lock, flags);
-
- printk(KERN_DEBUG "... PIC ISR: %04x\n", v);
-
- v = inb(0x4d1) << 8 | inb(0x4d0);
- printk(KERN_DEBUG "... PIC ELCR: %04x\n", v);
-}
-
-#endif /* 0 */
-
-static void __init enable_IO_APIC(void)
-{
- union IO_APIC_reg_01 reg_01;
- int i8259_apic, i8259_pin;
- int i, apic;
- unsigned long flags;
-
- for (i = 0; i < PIN_MAP_SIZE; i++) {
- irq_2_pin[i].pin = -1;
- irq_2_pin[i].next = 0;
- }
- if (!pirqs_enabled)
- for (i = 0; i < MAX_PIRQS; i++)
- pirq_entries[i] = -1;
-
- /*
- * The number of IO-APIC IRQ registers (== #pins):
- */
- for (apic = 0; apic < nr_ioapics; apic++) {
- spin_lock_irqsave(&ioapic_lock, flags);
- reg_01.raw = io_apic_read(apic, 1);
- spin_unlock_irqrestore(&ioapic_lock, flags);
- nr_ioapic_registers[apic] = reg_01.bits.entries+1;
- }
- for(apic = 0; apic < nr_ioapics; apic++) {
- int pin;
- /* See if any of the pins is in ExtINT mode */
- for(pin = 0; pin < nr_ioapic_registers[i]; pin++) {
- struct IO_APIC_route_entry entry;
- spin_lock_irqsave(&ioapic_lock, flags);
- *(((int *)&entry) + 0) = io_apic_read(apic, 0x10 + 2 * pin);
- *(((int *)&entry) + 1) = io_apic_read(apic, 0x11 + 2 * pin);
- spin_unlock_irqrestore(&ioapic_lock, flags);
-
-
- /* If the interrupt line is enabled and in ExtInt mode
- * I have found the pin where the i8259 is connected.
- */
- if ((entry.mask == 0) && (entry.delivery_mode == dest_ExtINT)) {
- ioapic_i8259.apic = apic;
- ioapic_i8259.pin = pin;
- goto found_i8259;
- }
- }
- }
- found_i8259:
- /* Look to see what if the MP table has reported the ExtINT */
- /* If we could not find the appropriate pin by looking at the ioapic
- * the i8259 probably is not connected the ioapic but give the
- * mptable a chance anyway.
- */
- i8259_pin = find_isa_irq_pin(0, mp_ExtINT);
- i8259_apic = find_isa_irq_apic(0, mp_ExtINT);
- /* Trust the MP table if nothing is setup in the hardware */
- if ((ioapic_i8259.pin == -1) && (i8259_pin >= 0)) {
- printk(KERN_WARNING "ExtINT not setup in hardware but reported by MP table\n");
- ioapic_i8259.pin = i8259_pin;
- ioapic_i8259.apic = i8259_apic;
- }
- /* Complain if the MP table and the hardware disagree */
- if (((ioapic_i8259.apic != i8259_apic) || (ioapic_i8259.pin != i8259_pin)) &&
- (i8259_pin >= 0) && (ioapic_i8259.pin >= 0))
- {
- printk(KERN_WARNING "ExtINT in hardware and MP table differ\n");
- }
-
- /*
- * Do not trust the IO-APIC being empty at bootup
- */
- clear_IO_APIC();
-}
-
-/*
- * Not an __init, needed by the reboot code
- */
-void disable_IO_APIC(void)
-{
- /*
- * Clear the IO-APIC before rebooting:
- */
- clear_IO_APIC();
-
- /*
- * If the i8259 is routed through an IOAPIC
- * Put that IOAPIC in virtual wire mode
- * so legacy interrupts can be delivered.
- */
- if (ioapic_i8259.pin != -1) {
- struct IO_APIC_route_entry entry;
- unsigned long flags;
-
- memset(&entry, 0, sizeof(entry));
- entry.mask = 0; /* Enabled */
- entry.trigger = 0; /* Edge */
- entry.irr = 0;
- entry.polarity = 0; /* High */
- entry.delivery_status = 0;
- entry.dest_mode = 0; /* Physical */
- entry.delivery_mode = dest_ExtINT; /* ExtInt */
- entry.vector = 0;
- entry.dest.physical.physical_dest = 0;
-
-
- /*
- * Add it to the IO-APIC irq-routing table:
- */
- spin_lock_irqsave(&ioapic_lock, flags);
- io_apic_write(ioapic_i8259.apic, 0x11+2*ioapic_i8259.pin,
- *(((int *)&entry)+1));
- io_apic_write(ioapic_i8259.apic, 0x10+2*ioapic_i8259.pin,
- *(((int *)&entry)+0));
- spin_unlock_irqrestore(&ioapic_lock, flags);
- }
- disconnect_bsp_APIC(ioapic_i8259.pin != -1);
-}
-
-/*
- * function to set the IO-APIC physical IDs based on the
- * values stored in the MPC table.
- *
- * by Matt Domsch <Matt_Domsch@dell.com> Tue Dec 21 12:25:05 CST 1999
- */
-
-#ifndef CONFIG_X86_NUMAQ
-static void __init setup_ioapic_ids_from_mpc(void)
-{
- union IO_APIC_reg_00 reg_00;
- physid_mask_t phys_id_present_map;
- int apic;
- int i;
- unsigned char old_id;
- unsigned long flags;
-
- /*
- * Don't check I/O APIC IDs for xAPIC systems. They have
- * no meaning without the serial APIC bus.
- */
- if (!(boot_cpu_data.x86_vendor == X86_VENDOR_INTEL && boot_cpu_data.x86 < 15))
- return;
- /*
- * This is broken; anything with a real cpu count has to
- * circumvent this idiocy regardless.
- */
- phys_id_present_map = ioapic_phys_id_map(phys_cpu_present_map);
-
- /*
- * Set the IOAPIC ID to the value stored in the MPC table.
- */
- for (apic = 0; apic < nr_ioapics; apic++) {
-
- /* Read the register 0 value */
- spin_lock_irqsave(&ioapic_lock, flags);
- reg_00.raw = io_apic_read(apic, 0);
- spin_unlock_irqrestore(&ioapic_lock, flags);
-
- old_id = mp_ioapics[apic].mpc_apicid;
-
- if (mp_ioapics[apic].mpc_apicid >= get_physical_broadcast()) {
- printk(KERN_ERR "BIOS bug, IO-APIC#%d ID is %d in the MPC table!...\n",
- apic, mp_ioapics[apic].mpc_apicid);
- printk(KERN_ERR "... fixing up to %d. (tell your hw vendor)\n",
- reg_00.bits.ID);
- mp_ioapics[apic].mpc_apicid = reg_00.bits.ID;
- }
-
- /*
- * Sanity check, is the ID really free? Every APIC in a
- * system must have a unique ID or we get lots of nice
- * 'stuck on smp_invalidate_needed IPI wait' messages.
- */
- if (check_apicid_used(phys_id_present_map,
- mp_ioapics[apic].mpc_apicid)) {
- printk(KERN_ERR "BIOS bug, IO-APIC#%d ID %d is already used!...\n",
- apic, mp_ioapics[apic].mpc_apicid);
- for (i = 0; i < get_physical_broadcast(); i++)
- if (!physid_isset(i, phys_id_present_map))
- break;
- if (i >= get_physical_broadcast())
- panic("Max APIC ID exceeded!\n");
- printk(KERN_ERR "... fixing up to %d. (tell your hw vendor)\n",
- i);
- physid_set(i, phys_id_present_map);
- mp_ioapics[apic].mpc_apicid = i;
- } else {
- physid_mask_t tmp;
- tmp = apicid_to_cpu_present(mp_ioapics[apic].mpc_apicid);
- apic_printk(APIC_VERBOSE, "Setting %d in the "
- "phys_id_present_map\n",
- mp_ioapics[apic].mpc_apicid);
- physids_or(phys_id_present_map, phys_id_present_map, tmp);
- }
-
-
- /*
- * We need to adjust the IRQ routing table
- * if the ID changed.
- */
- if (old_id != mp_ioapics[apic].mpc_apicid)
- for (i = 0; i < mp_irq_entries; i++)
- if (mp_irqs[i].mpc_dstapic == old_id)
- mp_irqs[i].mpc_dstapic
- = mp_ioapics[apic].mpc_apicid;
-
- /*
- * Read the right value from the MPC table and
- * write it into the ID register.
- */
- apic_printk(APIC_VERBOSE, KERN_INFO
- "...changing IO-APIC physical APIC ID to %d ...",
- mp_ioapics[apic].mpc_apicid);
-
- reg_00.bits.ID = mp_ioapics[apic].mpc_apicid;
- spin_lock_irqsave(&ioapic_lock, flags);
- io_apic_write(apic, 0, reg_00.raw);
- spin_unlock_irqrestore(&ioapic_lock, flags);
-
- /*
- * Sanity check
- */
- spin_lock_irqsave(&ioapic_lock, flags);
- reg_00.raw = io_apic_read(apic, 0);
- spin_unlock_irqrestore(&ioapic_lock, flags);
- if (reg_00.bits.ID != mp_ioapics[apic].mpc_apicid)
- printk("could not set ID!\n");
- else
- apic_printk(APIC_VERBOSE, " ok.\n");
- }
-}
-#else
-static void __init setup_ioapic_ids_from_mpc(void) { }
-#endif
-
-/*
- * There is a nasty bug in some older SMP boards, their mptable lies
- * about the timer IRQ. We do the following to work around the situation:
- *
- * - timer IRQ defaults to IO-APIC IRQ
- * - if this function detects that timer IRQs are defunct, then we fall
- * back to ISA timer IRQs
- */
-static int __init timer_irq_works(void)
-{
- unsigned long t1 = jiffies;
-
- local_irq_enable();
- /* Let ten ticks pass... */
- mdelay((10 * 1000) / HZ);
-
- /*
- * Expect a few ticks at least, to be sure some possible
- * glue logic does not lock up after one or two first
- * ticks in a non-ExtINT mode. Also the local APIC
- * might have cached one ExtINT interrupt. Finally, at
- * least one tick may be lost due to delays.
- */
- if (jiffies - t1 > 4)
- return 1;
-
- return 0;
-}
-
-/*
- * In the SMP+IOAPIC case it might happen that there are an unspecified
- * number of pending IRQ events unhandled. These cases are very rare,
- * so we 'resend' these IRQs via IPIs, to the same CPU. It's much
- * better to do it this way as thus we do not have to be aware of
- * 'pending' interrupts in the IRQ path, except at this point.
- */
-/*
- * Edge triggered needs to resend any interrupt
- * that was delayed but this is now handled in the device
- * independent code.
- */
-
-/*
- * Starting up a edge-triggered IO-APIC interrupt is
- * nasty - we need to make sure that we get the edge.
- * If it is already asserted for some reason, we need
- * return 1 to indicate that is was pending.
- *
- * This is not complete - we should be able to fake
- * an edge even if it isn't on the 8259A...
- */
-static unsigned int startup_edge_ioapic_irq(unsigned int irq)
-{
- int was_pending = 0;
- unsigned long flags;
-
- spin_lock_irqsave(&ioapic_lock, flags);
- if (irq < 16) {
- disable_8259A_irq(irq);
- if (i8259A_irq_pending(irq))
- was_pending = 1;
- }
- __unmask_IO_APIC_irq(irq);
- spin_unlock_irqrestore(&ioapic_lock, flags);
-
- return was_pending;
-}
-
-/*
- * Once we have recorded IRQ_PENDING already, we can mask the
- * interrupt for real. This prevents IRQ storms from unhandled
- * devices.
- */
-static void ack_edge_ioapic_irq(unsigned int irq)
-{
- move_irq(irq);
- if ((irq_desc[irq].status & (IRQ_PENDING | IRQ_DISABLED))
- == (IRQ_PENDING | IRQ_DISABLED))
- mask_IO_APIC_irq(irq);
- ack_APIC_irq();
-}
-
-/*
- * Level triggered interrupts can just be masked,
- * and shutting down and starting up the interrupt
- * is the same as enabling and disabling them -- except
- * with a startup need to return a "was pending" value.
- *
- * Level triggered interrupts are special because we
- * do not touch any IO-APIC register while handling
- * them. We ack the APIC in the end-IRQ handler, not
- * in the start-IRQ-handler. Protection against reentrance
- * from the same interrupt is still provided, both by the
- * generic IRQ layer and by the fact that an unacked local
- * APIC does not accept IRQs.
- */
-static unsigned int startup_level_ioapic_irq (unsigned int irq)
-{
- unmask_IO_APIC_irq(irq);
-
- return 0; /* don't check for pending */
-}
-
-static void end_level_ioapic_irq (unsigned int irq)
-{
- unsigned long v;
- int i;
-
- move_irq(irq);
-/*
- * It appears there is an erratum which affects at least version 0x11
- * of I/O APIC (that's the 82093AA and cores integrated into various
- * chipsets). Under certain conditions a level-triggered interrupt is
- * erroneously delivered as edge-triggered one but the respective IRR
- * bit gets set nevertheless. As a result the I/O unit expects an EOI
- * message but it will never arrive and further interrupts are blocked
- * from the source. The exact reason is so far unknown, but the
- * phenomenon was observed when two consecutive interrupt requests
- * from a given source get delivered to the same CPU and the source is
- * temporarily disabled in between.
- *
- * A workaround is to simulate an EOI message manually. We achieve it
- * by setting the trigger mode to edge and then to level when the edge
- * trigger mode gets detected in the TMR of a local APIC for a
- * level-triggered interrupt. We mask the source for the time of the
- * operation to prevent an edge-triggered interrupt escaping meanwhile.
- * The idea is from Manfred Spraul. --macro
- */
- i = IO_APIC_VECTOR(irq);
-
- v = apic_read(APIC_TMR + ((i & ~0x1f) >> 1));
-
- ack_APIC_irq();
-
- if (!(v & (1 << (i & 0x1f)))) {
- atomic_inc(&irq_mis_count);
- spin_lock(&ioapic_lock);
- __mask_and_edge_IO_APIC_irq(irq);
- __unmask_and_level_IO_APIC_irq(irq);
- spin_unlock(&ioapic_lock);
- }
-}
-
-#ifdef CONFIG_PCI_MSI
-static unsigned int startup_edge_ioapic_vector(unsigned int vector)
-{
- int irq = vector_to_irq(vector);
-
- return startup_edge_ioapic_irq(irq);
-}
-
-static void ack_edge_ioapic_vector(unsigned int vector)
-{
- int irq = vector_to_irq(vector);
-
- move_irq(vector);
- ack_edge_ioapic_irq(irq);
-}
-
-static unsigned int startup_level_ioapic_vector (unsigned int vector)
-{
- int irq = vector_to_irq(vector);
-
- return startup_level_ioapic_irq (irq);
-}
-
-static void end_level_ioapic_vector (unsigned int vector)
-{
- int irq = vector_to_irq(vector);
-
- move_irq(vector);
- end_level_ioapic_irq(irq);
-}
-
-static void mask_IO_APIC_vector (unsigned int vector)
-{
- int irq = vector_to_irq(vector);
-
- mask_IO_APIC_irq(irq);
-}
-
-static void unmask_IO_APIC_vector (unsigned int vector)
-{
- int irq = vector_to_irq(vector);
-
- unmask_IO_APIC_irq(irq);
-}
-
-#ifdef CONFIG_SMP
-static void set_ioapic_affinity_vector (unsigned int vector,
- cpumask_t cpu_mask)
-{
- int irq = vector_to_irq(vector);
-
- set_native_irq_info(vector, cpu_mask);
- set_ioapic_affinity_irq(irq, cpu_mask);
-}
-#endif
-#endif
-
-/*
- * Level and edge triggered IO-APIC interrupts need different handling,
- * so we use two separate IRQ descriptors. Edge triggered IRQs can be
- * handled with the level-triggered descriptor, but that one has slightly
- * more overhead. Level-triggered interrupts cannot be handled with the
- * edge-triggered handler, without risking IRQ storms and other ugly
- * races.
- */
-static struct hw_interrupt_type ioapic_edge_type __read_mostly = {
- .typename = "IO-APIC-edge",
- .startup = startup_edge_ioapic,
- .shutdown = shutdown_edge_ioapic,
- .enable = enable_edge_ioapic,
- .disable = disable_edge_ioapic,
- .ack = ack_edge_ioapic,
- .end = end_edge_ioapic,
-#ifdef CONFIG_SMP
- .set_affinity = set_ioapic_affinity,
-#endif
-};
-
-static struct hw_interrupt_type ioapic_level_type __read_mostly = {
- .typename = "IO-APIC-level",
- .startup = startup_level_ioapic,
- .shutdown = shutdown_level_ioapic,
- .enable = enable_level_ioapic,
- .disable = disable_level_ioapic,
- .ack = mask_and_ack_level_ioapic,
- .end = end_level_ioapic,
-#ifdef CONFIG_SMP
- .set_affinity = set_ioapic_affinity,
-#endif
-};
-
-static inline void init_IO_APIC_traps(void)
-{
- int irq;
-
- /*
- * NOTE! The local APIC isn't very good at handling
- * multiple interrupts at the same interrupt level.
- * As the interrupt level is determined by taking the
- * vector number and shifting that right by 4, we
- * want to spread these out a bit so that they don't
- * all fall in the same interrupt level.
- *
- * Also, we've got to be careful not to trash gate
- * 0x80, because int 0x80 is hm, kind of importantish. ;)
- */
- for (irq = 0; irq < NR_IRQS ; irq++) {
- int tmp = irq;
- if (use_pci_vector()) {
- if (!platform_legacy_irq(tmp))
- if ((tmp = vector_to_irq(tmp)) == -1)
- continue;
- }
- if (IO_APIC_IRQ(tmp) && !IO_APIC_VECTOR(tmp)) {
- /*
- * Hmm.. We don't have an entry for this,
- * so default to an old-fashioned 8259
- * interrupt if we can..
- */
- if (irq < 16)
- make_8259A_irq(irq);
- else
- /* Strange. Oh, well.. */
- irq_desc[irq].handler = &no_irq_type;
- }
- }
-}
-
-static void enable_lapic_irq (unsigned int irq)
-{
- unsigned long v;
-
- v = apic_read(APIC_LVT0);
- apic_write_around(APIC_LVT0, v & ~APIC_LVT_MASKED);
-}
-
-static void disable_lapic_irq (unsigned int irq)
-{
- unsigned long v;
-
- v = apic_read(APIC_LVT0);
- apic_write_around(APIC_LVT0, v | APIC_LVT_MASKED);
-}
-
-static void ack_lapic_irq (unsigned int irq)
-{
- ack_APIC_irq();
-}
-
-static void end_lapic_irq (unsigned int i) { /* nothing */ }
-
-static struct hw_interrupt_type lapic_irq_type __read_mostly = {
- .typename = "local-APIC-edge",
- .startup = NULL, /* startup_irq() not used for IRQ0 */
- .shutdown = NULL, /* shutdown_irq() not used for IRQ0 */
- .enable = enable_lapic_irq,
- .disable = disable_lapic_irq,
- .ack = ack_lapic_irq,
- .end = end_lapic_irq
-};
-
-static void setup_nmi (void)
-{
- /*
- * Dirty trick to enable the NMI watchdog ...
- * We put the 8259A master into AEOI mode and
- * unmask on all local APICs LVT0 as NMI.
- *
- * The idea to use the 8259A in AEOI mode ('8259A Virtual Wire')
- * is from Maciej W. Rozycki - so we do not have to EOI from
- * the NMI handler or the timer interrupt.
- */
- apic_printk(APIC_VERBOSE, KERN_INFO "activating NMI Watchdog ...");
-
- on_each_cpu(enable_NMI_through_LVT0, NULL, 1, 1);
-
- apic_printk(APIC_VERBOSE, " done.\n");
-}
-
-/*
- * This looks a bit hackish but it's about the only one way of sending
- * a few INTA cycles to 8259As and any associated glue logic. ICR does
- * not support the ExtINT mode, unfortunately. We need to send these
- * cycles as some i82489DX-based boards have glue logic that keeps the
- * 8259A interrupt line asserted until INTA. --macro
- */
-static inline void unlock_ExtINT_logic(void)
-{
- int apic, pin, i;
- struct IO_APIC_route_entry entry0, entry1;
- unsigned char save_control, save_freq_select;
- unsigned long flags;
-
- pin = find_isa_irq_pin(8, mp_INT);
- apic = find_isa_irq_apic(8, mp_INT);
- if (pin == -1)
- return;
-
- spin_lock_irqsave(&ioapic_lock, flags);
- *(((int *)&entry0) + 1) = io_apic_read(apic, 0x11 + 2 * pin);
- *(((int *)&entry0) + 0) = io_apic_read(apic, 0x10 + 2 * pin);
- spin_unlock_irqrestore(&ioapic_lock, flags);
- clear_IO_APIC_pin(apic, pin);
-
- memset(&entry1, 0, sizeof(entry1));
-
- entry1.dest_mode = 0; /* physical delivery */
- entry1.mask = 0; /* unmask IRQ now */
- entry1.dest.physical.physical_dest = hard_smp_processor_id();
- entry1.delivery_mode = dest_ExtINT;
- entry1.polarity = entry0.polarity;
- entry1.trigger = 0;
- entry1.vector = 0;
-
- spin_lock_irqsave(&ioapic_lock, flags);
- io_apic_write(apic, 0x11 + 2 * pin, *(((int *)&entry1) + 1));
- io_apic_write(apic, 0x10 + 2 * pin, *(((int *)&entry1) + 0));
- spin_unlock_irqrestore(&ioapic_lock, flags);
-
- save_control = CMOS_READ(RTC_CONTROL);
- save_freq_select = CMOS_READ(RTC_FREQ_SELECT);
- CMOS_WRITE((save_freq_select & ~RTC_RATE_SELECT) | 0x6,
- RTC_FREQ_SELECT);
- CMOS_WRITE(save_control | RTC_PIE, RTC_CONTROL);
-
- i = 100;
- while (i-- > 0) {
- mdelay(10);
- if ((CMOS_READ(RTC_INTR_FLAGS) & RTC_PF) == RTC_PF)
- i -= 10;
- }
-
- CMOS_WRITE(save_control, RTC_CONTROL);
- CMOS_WRITE(save_freq_select, RTC_FREQ_SELECT);
- clear_IO_APIC_pin(apic, pin);
-
- spin_lock_irqsave(&ioapic_lock, flags);
- io_apic_write(apic, 0x11 + 2 * pin, *(((int *)&entry0) + 1));
- io_apic_write(apic, 0x10 + 2 * pin, *(((int *)&entry0) + 0));
- spin_unlock_irqrestore(&ioapic_lock, flags);
-}
-
-/*
- * This code may look a bit paranoid, but it's supposed to cooperate with
- * a wide range of boards and BIOS bugs. Fortunately only the timer IRQ
- * is so screwy. Thanks to Brian Perkins for testing/hacking this beast
- * fanatically on his truly buggy board.
- */
-static inline void check_timer(void)
-{
- int apic1, pin1, apic2, pin2;
- int vector;
-
- /*
- * get/set the timer IRQ vector:
- */
- disable_8259A_irq(0);
- vector = assign_irq_vector(0);
- set_intr_gate(vector, interrupt[0]);
-
- /*
- * Subtle, code in do_timer_interrupt() expects an AEOI
- * mode for the 8259A whenever interrupts are routed
- * through I/O APICs. Also IRQ0 has to be enabled in
- * the 8259A which implies the virtual wire has to be
- * disabled in the local APIC.
- */
- apic_write_around(APIC_LVT0, APIC_LVT_MASKED | APIC_DM_EXTINT);
- init_8259A(1);
- timer_ack = 1;
- enable_8259A_irq(0);
-
- pin1 = find_isa_irq_pin(0, mp_INT);
- apic1 = find_isa_irq_apic(0, mp_INT);
- pin2 = ioapic_i8259.pin;
- apic2 = ioapic_i8259.apic;
-
- printk(KERN_INFO "..TIMER: vector=0x%02X apic1=%d pin1=%d apic2=%d pin2=%d\n",
- vector, apic1, pin1, apic2, pin2);
-
- if (pin1 != -1) {
- /*
- * Ok, does IRQ0 through the IOAPIC work?
- */
- unmask_IO_APIC_irq(0);
- if (timer_irq_works()) {
- if (nmi_watchdog == NMI_IO_APIC) {
- disable_8259A_irq(0);
- setup_nmi();
- enable_8259A_irq(0);
- }
- if (disable_timer_pin_1 > 0)
- clear_IO_APIC_pin(0, pin1);
- return;
- }
- clear_IO_APIC_pin(apic1, pin1);
- printk(KERN_ERR "..MP-BIOS bug: 8254 timer not connected to "
- "IO-APIC\n");
- }
-
- printk(KERN_INFO "...trying to set up timer (IRQ0) through the 8259A ... ");
- if (pin2 != -1) {
- printk("\n..... (found pin %d) ...", pin2);
- /*
- * legacy devices should be connected to IO APIC #0
- */
- setup_ExtINT_IRQ0_pin(apic2, pin2, vector);
- if (timer_irq_works()) {
- printk("works.\n");
- if (pin1 != -1)
- replace_pin_at_irq(0, apic1, pin1, apic2, pin2);
- else
- add_pin_to_irq(0, apic2, pin2);
- if (nmi_watchdog == NMI_IO_APIC) {
- setup_nmi();
- }
- return;
- }
- /*
- * Cleanup, just in case ...
- */
- clear_IO_APIC_pin(apic2, pin2);
- }
- printk(" failed.\n");
-
- if (nmi_watchdog == NMI_IO_APIC) {
- printk(KERN_WARNING "timer doesn't work through the IO-APIC - disabling NMI Watchdog!\n");
- nmi_watchdog = 0;
- }
-
- printk(KERN_INFO "...trying to set up timer as Virtual Wire IRQ...");
-
- disable_8259A_irq(0);
- irq_desc[0].handler = &lapic_irq_type;
- apic_write_around(APIC_LVT0, APIC_DM_FIXED | vector); /* Fixed mode */
- enable_8259A_irq(0);
-
- if (timer_irq_works()) {
- printk(" works.\n");
- return;
- }
- apic_write_around(APIC_LVT0, APIC_LVT_MASKED | APIC_DM_FIXED | vector);
- printk(" failed.\n");
-
- printk(KERN_INFO "...trying to set up timer as ExtINT IRQ...");
-
- timer_ack = 0;
- init_8259A(0);
- make_8259A_irq(0);
- apic_write_around(APIC_LVT0, APIC_DM_EXTINT);
-
- unlock_ExtINT_logic();
-
- if (timer_irq_works()) {
- printk(" works.\n");
- return;
- }
- printk(" failed :(.\n");
- panic("IO-APIC + timer doesn't work! Boot with apic=debug and send a "
- "report. Then try booting with the 'noapic' option");
-}
-
-/*
- *
- * IRQ's that are handled by the PIC in the MPS IOAPIC case.
- * - IRQ2 is the cascade IRQ, and cannot be a io-apic IRQ.
- * Linux doesn't really care, as it's not actually used
- * for any interrupt handling anyway.
- */
-#define PIC_IRQS (1 << PIC_CASCADE_IR)
-
-void __init setup_IO_APIC(void)
-{
- enable_IO_APIC();
-
- if (acpi_ioapic)
- io_apic_irqs = ~0; /* all IRQs go through IOAPIC */
- else
- io_apic_irqs = ~PIC_IRQS;
-
- printk("ENABLING IO-APIC IRQs\n");
-
- /*
- * Set up IO-APIC IRQ routing.
- */
- if (!acpi_ioapic)
- setup_ioapic_ids_from_mpc();
- sync_Arb_IDs();
- setup_IO_APIC_irqs();
- init_IO_APIC_traps();
- check_timer();
- if (!acpi_ioapic)
- print_IO_APIC();
-}
-
-/*
- * Called after all the initialization is done. If we didnt find any
- * APIC bugs then we can allow the modify fast path
- */
-
-static int __init io_apic_bug_finalize(void)
-{
- if(sis_apic_bug == -1)
- sis_apic_bug = 0;
- return 0;
-}
-
-late_initcall(io_apic_bug_finalize);
-
-struct sysfs_ioapic_data {
- struct sys_device dev;
- struct IO_APIC_route_entry entry[0];
-};
-static struct sysfs_ioapic_data * mp_ioapic_data[MAX_IO_APICS];
-
-static int ioapic_suspend(struct sys_device *dev, pm_message_t state)
-{
- struct IO_APIC_route_entry *entry;
- struct sysfs_ioapic_data *data;
- unsigned long flags;
- int i;
-
- data = container_of(dev, struct sysfs_ioapic_data, dev);
- entry = data->entry;
- spin_lock_irqsave(&ioapic_lock, flags);
- for (i = 0; i < nr_ioapic_registers[dev->id]; i ++, entry ++ ) {
- *(((int *)entry) + 1) = io_apic_read(dev->id, 0x11 + 2 * i);
- *(((int *)entry) + 0) = io_apic_read(dev->id, 0x10 + 2 * i);
- }
- spin_unlock_irqrestore(&ioapic_lock, flags);
-
- return 0;
-}
-
-static int ioapic_resume(struct sys_device *dev)
-{
- struct IO_APIC_route_entry *entry;
- struct sysfs_ioapic_data *data;
- unsigned long flags;
- union IO_APIC_reg_00 reg_00;
- int i;
-
- data = container_of(dev, struct sysfs_ioapic_data, dev);
- entry = data->entry;
-
- spin_lock_irqsave(&ioapic_lock, flags);
- reg_00.raw = io_apic_read(dev->id, 0);
- if (reg_00.bits.ID != mp_ioapics[dev->id].mpc_apicid) {
- reg_00.bits.ID = mp_ioapics[dev->id].mpc_apicid;
- io_apic_write(dev->id, 0, reg_00.raw);
- }
- for (i = 0; i < nr_ioapic_registers[dev->id]; i ++, entry ++ ) {
- io_apic_write(dev->id, 0x11+2*i, *(((int *)entry)+1));
- io_apic_write(dev->id, 0x10+2*i, *(((int *)entry)+0));
- }
- spin_unlock_irqrestore(&ioapic_lock, flags);
-
- return 0;
-}
-
-static struct sysdev_class ioapic_sysdev_class = {
- set_kset_name("ioapic"),
- .suspend = ioapic_suspend,
- .resume = ioapic_resume,
-};
-
-static int __init ioapic_init_sysfs(void)
-{
- struct sys_device * dev;
- int i, size, error = 0;
-
- error = sysdev_class_register(&ioapic_sysdev_class);
- if (error)
- return error;
-
- for (i = 0; i < nr_ioapics; i++ ) {
- size = sizeof(struct sys_device) + nr_ioapic_registers[i]
- * sizeof(struct IO_APIC_route_entry);
- mp_ioapic_data[i] = kmalloc(size, GFP_KERNEL);
- if (!mp_ioapic_data[i]) {
- printk(KERN_ERR "Can't suspend/resume IOAPIC %d\n", i);
- continue;
- }
- memset(mp_ioapic_data[i], 0, size);
- dev = &mp_ioapic_data[i]->dev;
- dev->id = i;
- dev->cls = &ioapic_sysdev_class;
- error = sysdev_register(dev);
- if (error) {
- kfree(mp_ioapic_data[i]);
- mp_ioapic_data[i] = NULL;
- printk(KERN_ERR "Can't suspend/resume IOAPIC %d\n", i);
- continue;
- }
- }
-
- return 0;
-}
-
-device_initcall(ioapic_init_sysfs);
-
-/* --------------------------------------------------------------------------
- ACPI-based IOAPIC Configuration
- -------------------------------------------------------------------------- */
-
-#ifdef CONFIG_ACPI
-
-int __init io_apic_get_unique_id (int ioapic, int apic_id)
-{
- union IO_APIC_reg_00 reg_00;
- static physid_mask_t apic_id_map = PHYSID_MASK_NONE;
- physid_mask_t tmp;
- unsigned long flags;
- int i = 0;
-
- /*
- * The P4 platform supports up to 256 APIC IDs on two separate APIC
- * buses (one for LAPICs, one for IOAPICs), where predecessors only
- * supports up to 16 on one shared APIC bus.
- *
- * TBD: Expand LAPIC/IOAPIC support on P4-class systems to take full
- * advantage of new APIC bus architecture.
- */
-
- if (physids_empty(apic_id_map))
- apic_id_map = ioapic_phys_id_map(phys_cpu_present_map);
-
- spin_lock_irqsave(&ioapic_lock, flags);
- reg_00.raw = io_apic_read(ioapic, 0);
- spin_unlock_irqrestore(&ioapic_lock, flags);
-
- if (apic_id >= get_physical_broadcast()) {
- printk(KERN_WARNING "IOAPIC[%d]: Invalid apic_id %d, trying "
- "%d\n", ioapic, apic_id, reg_00.bits.ID);
- apic_id = reg_00.bits.ID;
- }
-
- /*
- * Every APIC in a system must have a unique ID or we get lots of nice
- * 'stuck on smp_invalidate_needed IPI wait' messages.
- */
- if (check_apicid_used(apic_id_map, apic_id)) {
-
- for (i = 0; i < get_physical_broadcast(); i++) {
- if (!check_apicid_used(apic_id_map, i))
- break;
- }
-
- if (i == get_physical_broadcast())
- panic("Max apic_id exceeded!\n");
-
- printk(KERN_WARNING "IOAPIC[%d]: apic_id %d already used, "
- "trying %d\n", ioapic, apic_id, i);
-
- apic_id = i;
- }
-
- tmp = apicid_to_cpu_present(apic_id);
- physids_or(apic_id_map, apic_id_map, tmp);
-
- if (reg_00.bits.ID != apic_id) {
- reg_00.bits.ID = apic_id;
-
- spin_lock_irqsave(&ioapic_lock, flags);
- io_apic_write(ioapic, 0, reg_00.raw);
- reg_00.raw = io_apic_read(ioapic, 0);
- spin_unlock_irqrestore(&ioapic_lock, flags);
-
- /* Sanity check */
- if (reg_00.bits.ID != apic_id)
- panic("IOAPIC[%d]: Unable change apic_id!\n", ioapic);
- }
-
- apic_printk(APIC_VERBOSE, KERN_INFO
- "IOAPIC[%d]: Assigned apic_id %d\n", ioapic, apic_id);
-
- return apic_id;
-}
-
-
-int __init io_apic_get_version (int ioapic)
-{
- union IO_APIC_reg_01 reg_01;
- unsigned long flags;
-
- spin_lock_irqsave(&ioapic_lock, flags);
- reg_01.raw = io_apic_read(ioapic, 1);
- spin_unlock_irqrestore(&ioapic_lock, flags);
-
- return reg_01.bits.version;
-}
-
-
-int __init io_apic_get_redir_entries (int ioapic)
-{
- union IO_APIC_reg_01 reg_01;
- unsigned long flags;
-
- spin_lock_irqsave(&ioapic_lock, flags);
- reg_01.raw = io_apic_read(ioapic, 1);
- spin_unlock_irqrestore(&ioapic_lock, flags);
-
- return reg_01.bits.entries;
-}
-
-
-int io_apic_set_pci_routing (int ioapic, int pin, int irq, int edge_level, int active_high_low)
-{
- struct IO_APIC_route_entry entry;
- unsigned long flags;
-
- if (!IO_APIC_IRQ(irq)) {
- printk(KERN_ERR "IOAPIC[%d]: Invalid reference to IRQ 0\n",
- ioapic);
- return -EINVAL;
- }
-
- /*
- * Generate a PCI IRQ routing entry and program the IOAPIC accordingly.
- * Note that we mask (disable) IRQs now -- these get enabled when the
- * corresponding device driver registers for this IRQ.
- */
-
- memset(&entry,0,sizeof(entry));
-
- entry.delivery_mode = INT_DELIVERY_MODE;
- entry.dest_mode = INT_DEST_MODE;
- entry.dest.logical.logical_dest = cpu_mask_to_apicid(TARGET_CPUS);
- entry.trigger = edge_level;
- entry.polarity = active_high_low;
- entry.mask = 1;
-
- /*
- * IRQs < 16 are already in the irq_2_pin[] map
- */
- if (irq >= 16)
- add_pin_to_irq(irq, ioapic, pin);
-
- entry.vector = assign_irq_vector(irq);
-
- apic_printk(APIC_DEBUG, KERN_DEBUG "IOAPIC[%d]: Set PCI routing entry "
- "(%d-%d -> 0x%x -> IRQ %d Mode:%i Active:%i)\n", ioapic,
- mp_ioapics[ioapic].mpc_apicid, pin, entry.vector, irq,
- edge_level, active_high_low);
-
- ioapic_register_intr(irq, entry.vector, edge_level);
-
- if (!ioapic && (irq < 16))
- disable_8259A_irq(irq);
-
- spin_lock_irqsave(&ioapic_lock, flags);
- io_apic_write(ioapic, 0x11+2*pin, *(((int *)&entry)+1));
- io_apic_write(ioapic, 0x10+2*pin, *(((int *)&entry)+0));
- set_native_irq_info(use_pci_vector() ? entry.vector : irq, TARGET_CPUS);
- spin_unlock_irqrestore(&ioapic_lock, flags);
-
- return 0;
-}
-
-#endif /* CONFIG_ACPI */
diff --git a/arch/i386/kernel/ioport.c b/arch/i386/kernel/ioport.c
deleted file mode 100644
index b59a34dbe26..00000000000
--- a/arch/i386/kernel/ioport.c
+++ /dev/null
@@ -1,151 +0,0 @@
-/*
- * linux/arch/i386/kernel/ioport.c
- *
- * This contains the io-permission bitmap code - written by obz, with changes
- * by Linus.
- */
-
-#include <linux/sched.h>
-#include <linux/kernel.h>
-#include <linux/errno.h>
-#include <linux/types.h>
-#include <linux/ioport.h>
-#include <linux/smp.h>
-#include <linux/smp_lock.h>
-#include <linux/stddef.h>
-#include <linux/slab.h>
-#include <linux/thread_info.h>
-
-/* Set EXTENT bits starting at BASE in BITMAP to value TURN_ON. */
-static void set_bitmap(unsigned long *bitmap, unsigned int base, unsigned int extent, int new_value)
-{
- unsigned long mask;
- unsigned long *bitmap_base = bitmap + (base / BITS_PER_LONG);
- unsigned int low_index = base & (BITS_PER_LONG-1);
- int length = low_index + extent;
-
- if (low_index != 0) {
- mask = (~0UL << low_index);
- if (length < BITS_PER_LONG)
- mask &= ~(~0UL << length);
- if (new_value)
- *bitmap_base++ |= mask;
- else
- *bitmap_base++ &= ~mask;
- length -= BITS_PER_LONG;
- }
-
- mask = (new_value ? ~0UL : 0UL);
- while (length >= BITS_PER_LONG) {
- *bitmap_base++ = mask;
- length -= BITS_PER_LONG;
- }
-
- if (length > 0) {
- mask = ~(~0UL << length);
- if (new_value)
- *bitmap_base++ |= mask;
- else
- *bitmap_base++ &= ~mask;
- }
-}
-
-
-/*
- * this changes the io permissions bitmap in the current task.
- */
-asmlinkage long sys_ioperm(unsigned long from, unsigned long num, int turn_on)
-{
- unsigned long i, max_long, bytes, bytes_updated;
- struct thread_struct * t = &current->thread;
- struct tss_struct * tss;
- unsigned long *bitmap;
-
- if ((from + num <= from) || (from + num > IO_BITMAP_BITS))
- return -EINVAL;
- if (turn_on && !capable(CAP_SYS_RAWIO))
- return -EPERM;
-
- /*
- * If it's the first ioperm() call in this thread's lifetime, set the
- * IO bitmap up. ioperm() is much less timing critical than clone(),
- * this is why we delay this operation until now:
- */
- if (!t->io_bitmap_ptr) {
- bitmap = kmalloc(IO_BITMAP_BYTES, GFP_KERNEL);
- if (!bitmap)
- return -ENOMEM;
-
- memset(bitmap, 0xff, IO_BITMAP_BYTES);
- t->io_bitmap_ptr = bitmap;
- }
-
- /*
- * do it in the per-thread copy and in the TSS ...
- *
- * Disable preemption via get_cpu() - we must not switch away
- * because the ->io_bitmap_max value must match the bitmap
- * contents:
- */
- tss = &per_cpu(init_tss, get_cpu());
-
- set_bitmap(t->io_bitmap_ptr, from, num, !turn_on);
-
- /*
- * Search for a (possibly new) maximum. This is simple and stupid,
- * to keep it obviously correct:
- */
- max_long = 0;
- for (i = 0; i < IO_BITMAP_LONGS; i++)
- if (t->io_bitmap_ptr[i] != ~0UL)
- max_long = i;
-
- bytes = (max_long + 1) * sizeof(long);
- bytes_updated = max(bytes, t->io_bitmap_max);
-
- t->io_bitmap_max = bytes;
-
- /*
- * Sets the lazy trigger so that the next I/O operation will
- * reload the correct bitmap.
- * Reset the owner so that a process switch will not set
- * tss->io_bitmap_base to IO_BITMAP_OFFSET.
- */
- tss->io_bitmap_base = INVALID_IO_BITMAP_OFFSET_LAZY;
- tss->io_bitmap_owner = NULL;
-
- put_cpu();
-
- return 0;
-}
-
-/*
- * sys_iopl has to be used when you want to access the IO ports
- * beyond the 0x3ff range: to get the full 65536 ports bitmapped
- * you'd need 8kB of bitmaps/process, which is a bit excessive.
- *
- * Here we just change the eflags value on the stack: we allow
- * only the super-user to do it. This depends on the stack-layout
- * on system-call entry - see also fork() and the signal handling
- * code.
- */
-
-asmlinkage long sys_iopl(unsigned long unused)
-{
- volatile struct pt_regs * regs = (struct pt_regs *) &unused;
- unsigned int level = regs->ebx;
- unsigned int old = (regs->eflags >> 12) & 3;
- struct thread_struct *t = &current->thread;
-
- if (level > 3)
- return -EINVAL;
- /* Trying to gain more privileges? */
- if (level > old) {
- if (!capable(CAP_SYS_RAWIO))
- return -EPERM;
- }
- t->iopl = level << 12;
- regs->eflags = (regs->eflags & ~X86_EFLAGS_IOPL) | t->iopl;
- set_iopl_mask(t->iopl);
- return 0;
-}
diff --git a/arch/i386/kernel/irq.c b/arch/i386/kernel/irq.c
deleted file mode 100644
index 1a201a93286..00000000000
--- a/arch/i386/kernel/irq.c
+++ /dev/null
@@ -1,307 +0,0 @@
-/*
- * linux/arch/i386/kernel/irq.c
- *
- * Copyright (C) 1992, 1998 Linus Torvalds, Ingo Molnar
- *
- * This file contains the lowest level x86-specific interrupt
- * entry, irq-stacks and irq statistics code. All the remaining
- * irq logic is done by the generic kernel/irq/ code and
- * by the x86-specific irq controller code. (e.g. i8259.c and
- * io_apic.c.)
- */
-
-#include <asm/uaccess.h>
-#include <linux/module.h>
-#include <linux/seq_file.h>
-#include <linux/interrupt.h>
-#include <linux/kernel_stat.h>
-#include <linux/notifier.h>
-#include <linux/cpu.h>
-#include <linux/delay.h>
-
-DEFINE_PER_CPU(irq_cpustat_t, irq_stat) ____cacheline_maxaligned_in_smp;
-EXPORT_PER_CPU_SYMBOL(irq_stat);
-
-#ifndef CONFIG_X86_LOCAL_APIC
-/*
- * 'what should we do if we get a hw irq event on an illegal vector'.
- * each architecture has to answer this themselves.
- */
-void ack_bad_irq(unsigned int irq)
-{
- printk("unexpected IRQ trap at vector %02x\n", irq);
-}
-#endif
-
-#ifdef CONFIG_4KSTACKS
-/*
- * per-CPU IRQ handling contexts (thread information and stack)
- */
-union irq_ctx {
- struct thread_info tinfo;
- u32 stack[THREAD_SIZE/sizeof(u32)];
-};
-
-static union irq_ctx *hardirq_ctx[NR_CPUS];
-static union irq_ctx *softirq_ctx[NR_CPUS];
-#endif
-
-/*
- * do_IRQ handles all normal device IRQ's (the special
- * SMP cross-CPU interrupts have their own specific
- * handlers).
- */
-fastcall unsigned int do_IRQ(struct pt_regs *regs)
-{
- /* high bits used in ret_from_ code */
- int irq = regs->orig_eax & 0xff;
-#ifdef CONFIG_4KSTACKS
- union irq_ctx *curctx, *irqctx;
- u32 *isp;
-#endif
-
- irq_enter();
-#ifdef CONFIG_DEBUG_STACKOVERFLOW
- /* Debugging check for stack overflow: is there less than 1KB free? */
- {
- long esp;
-
- __asm__ __volatile__("andl %%esp,%0" :
- "=r" (esp) : "0" (THREAD_SIZE - 1));
- if (unlikely(esp < (sizeof(struct thread_info) + STACK_WARN))) {
- printk("do_IRQ: stack overflow: %ld\n",
- esp - sizeof(struct thread_info));
- dump_stack();
- }
- }
-#endif
-
-#ifdef CONFIG_4KSTACKS
-
- curctx = (union irq_ctx *) current_thread_info();
- irqctx = hardirq_ctx[smp_processor_id()];
-
- /*
- * this is where we switch to the IRQ stack. However, if we are
- * already using the IRQ stack (because we interrupted a hardirq
- * handler) we can't do that and just have to keep using the
- * current stack (which is the irq stack already after all)
- */
- if (curctx != irqctx) {
- int arg1, arg2, ebx;
-
- /* build the stack frame on the IRQ stack */
- isp = (u32*) ((char*)irqctx + sizeof(*irqctx));
- irqctx->tinfo.task = curctx->tinfo.task;
- irqctx->tinfo.previous_esp = current_stack_pointer;
-
- asm volatile(
- " xchgl %%ebx,%%esp \n"
- " call __do_IRQ \n"
- " movl %%ebx,%%esp \n"
- : "=a" (arg1), "=d" (arg2), "=b" (ebx)
- : "0" (irq), "1" (regs), "2" (isp)
- : "memory", "cc", "ecx"
- );
- } else
-#endif
- __do_IRQ(irq, regs);
-
- irq_exit();
-
- return 1;
-}
-
-#ifdef CONFIG_4KSTACKS
-
-/*
- * These should really be __section__(".bss.page_aligned") as well, but
- * gcc's 3.0 and earlier don't handle that correctly.
- */
-static char softirq_stack[NR_CPUS * THREAD_SIZE]
- __attribute__((__aligned__(THREAD_SIZE)));
-
-static char hardirq_stack[NR_CPUS * THREAD_SIZE]
- __attribute__((__aligned__(THREAD_SIZE)));
-
-/*
- * allocate per-cpu stacks for hardirq and for softirq processing
- */
-void irq_ctx_init(int cpu)
-{
- union irq_ctx *irqctx;
-
- if (hardirq_ctx[cpu])
- return;
-
- irqctx = (union irq_ctx*) &hardirq_stack[cpu*THREAD_SIZE];
- irqctx->tinfo.task = NULL;
- irqctx->tinfo.exec_domain = NULL;
- irqctx->tinfo.cpu = cpu;
- irqctx->tinfo.preempt_count = HARDIRQ_OFFSET;
- irqctx->tinfo.addr_limit = MAKE_MM_SEG(0);
-
- hardirq_ctx[cpu] = irqctx;
-
- irqctx = (union irq_ctx*) &softirq_stack[cpu*THREAD_SIZE];
- irqctx->tinfo.task = NULL;
- irqctx->tinfo.exec_domain = NULL;
- irqctx->tinfo.cpu = cpu;
- irqctx->tinfo.preempt_count = SOFTIRQ_OFFSET;
- irqctx->tinfo.addr_limit = MAKE_MM_SEG(0);
-
- softirq_ctx[cpu] = irqctx;
-
- printk("CPU %u irqstacks, hard=%p soft=%p\n",
- cpu,hardirq_ctx[cpu],softirq_ctx[cpu]);
-}
-
-void irq_ctx_exit(int cpu)
-{
- hardirq_ctx[cpu] = NULL;
-}
-
-extern asmlinkage void __do_softirq(void);
-
-asmlinkage void do_softirq(void)
-{
- unsigned long flags;
- struct thread_info *curctx;
- union irq_ctx *irqctx;
- u32 *isp;
-
- if (in_interrupt())
- return;
-
- local_irq_save(flags);
-
- if (local_softirq_pending()) {
- curctx = current_thread_info();
- irqctx = softirq_ctx[smp_processor_id()];
- irqctx->tinfo.task = curctx->task;
- irqctx->tinfo.previous_esp = current_stack_pointer;
-
- /* build the stack frame on the softirq stack */
- isp = (u32*) ((char*)irqctx + sizeof(*irqctx));
-
- asm volatile(
- " xchgl %%ebx,%%esp \n"
- " call __do_softirq \n"
- " movl %%ebx,%%esp \n"
- : "=b"(isp)
- : "0"(isp)
- : "memory", "cc", "edx", "ecx", "eax"
- );
- }
-
- local_irq_restore(flags);
-}
-
-EXPORT_SYMBOL(do_softirq);
-#endif
-
-/*
- * Interrupt statistics:
- */
-
-atomic_t irq_err_count;
-
-/*
- * /proc/interrupts printing:
- */
-
-int show_interrupts(struct seq_file *p, void *v)
-{
- int i = *(loff_t *) v, j;
- struct irqaction * action;
- unsigned long flags;
-
- if (i == 0) {
- seq_printf(p, " ");
- for_each_online_cpu(j)
- seq_printf(p, "CPU%d ",j);
- seq_putc(p, '\n');
- }
-
- if (i < NR_IRQS) {
- spin_lock_irqsave(&irq_desc[i].lock, flags);
- action = irq_desc[i].action;
- if (!action)
- goto skip;
- seq_printf(p, "%3d: ",i);
-#ifndef CONFIG_SMP
- seq_printf(p, "%10u ", kstat_irqs(i));
-#else
- for_each_online_cpu(j)
- seq_printf(p, "%10u ", kstat_cpu(j).irqs[i]);
-#endif
- seq_printf(p, " %14s", irq_desc[i].handler->typename);
- seq_printf(p, " %s", action->name);
-
- for (action=action->next; action; action = action->next)
- seq_printf(p, ", %s", action->name);
-
- seq_putc(p, '\n');
-skip:
- spin_unlock_irqrestore(&irq_desc[i].lock, flags);
- } else if (i == NR_IRQS) {
- seq_printf(p, "NMI: ");
- for_each_online_cpu(j)
- seq_printf(p, "%10u ", nmi_count(j));
- seq_putc(p, '\n');
-#ifdef CONFIG_X86_LOCAL_APIC
- seq_printf(p, "LOC: ");
- for_each_online_cpu(j)
- seq_printf(p, "%10u ",
- per_cpu(irq_stat,j).apic_timer_irqs);
- seq_putc(p, '\n');
-#endif
- seq_printf(p, "ERR: %10u\n", atomic_read(&irq_err_count));
-#if defined(CONFIG_X86_IO_APIC)
- seq_printf(p, "MIS: %10u\n", atomic_read(&irq_mis_count));
-#endif
- }
- return 0;
-}
-
-#ifdef CONFIG_HOTPLUG_CPU
-#include <mach_apic.h>
-
-void fixup_irqs(cpumask_t map)
-{
- unsigned int irq;
- static int warned;
-
- for (irq = 0; irq < NR_IRQS; irq++) {
- cpumask_t mask;
- if (irq == 2)
- continue;
-
- cpus_and(mask, irq_affinity[irq], map);
- if (any_online_cpu(mask) == NR_CPUS) {
- printk("Breaking affinity for irq %i\n", irq);
- mask = map;
- }
- if (irq_desc[irq].handler->set_affinity)
- irq_desc[irq].handler->set_affinity(irq, mask);
- else if (irq_desc[irq].action && !(warned++))
- printk("Cannot set affinity for irq %i\n", irq);
- }
-
-#if 0
- barrier();
- /* Ingo Molnar says: "after the IO-APIC masks have been redirected
- [note the nop - the interrupt-enable boundary on x86 is two
- instructions from sti] - to flush out pending hardirqs and
- IPIs. After this point nothing is supposed to reach this CPU." */
- __asm__ __volatile__("sti; nop; cli");
- barrier();
-#else
- /* That doesn't seem sufficient. Give it 1ms. */
- local_irq_enable();
- mdelay(1);
- local_irq_disable();
-#endif
-}
-#endif
-
diff --git a/arch/i386/kernel/kprobes.c b/arch/i386/kernel/kprobes.c
deleted file mode 100644
index 6345b430b10..00000000000
--- a/arch/i386/kernel/kprobes.c
+++ /dev/null
@@ -1,548 +0,0 @@
-/*
- * Kernel Probes (KProbes)
- * arch/i386/kernel/kprobes.c
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
- *
- * Copyright (C) IBM Corporation, 2002, 2004
- *
- * 2002-Oct Created by Vamsi Krishna S <vamsi_krishna@in.ibm.com> Kernel
- * Probes initial implementation ( includes contributions from
- * Rusty Russell).
- * 2004-July Suparna Bhattacharya <suparna@in.ibm.com> added jumper probes
- * interface to access function arguments.
- * 2005-May Hien Nguyen <hien@us.ibm.com>, Jim Keniston
- * <jkenisto@us.ibm.com> and Prasanna S Panchamukhi
- * <prasanna@in.ibm.com> added function-return probes.
- */
-
-#include <linux/config.h>
-#include <linux/kprobes.h>
-#include <linux/ptrace.h>
-#include <linux/spinlock.h>
-#include <linux/preempt.h>
-#include <asm/cacheflush.h>
-#include <asm/kdebug.h>
-#include <asm/desc.h>
-
-static struct kprobe *current_kprobe;
-static unsigned long kprobe_status, kprobe_old_eflags, kprobe_saved_eflags;
-static struct kprobe *kprobe_prev;
-static unsigned long kprobe_status_prev, kprobe_old_eflags_prev, kprobe_saved_eflags_prev;
-static struct pt_regs jprobe_saved_regs;
-static long *jprobe_saved_esp;
-/* copy of the kernel stack at the probe fire time */
-static kprobe_opcode_t jprobes_stack[MAX_STACK_SIZE];
-void jprobe_return_end(void);
-
-/*
- * returns non-zero if opcode modifies the interrupt flag.
- */
-static inline int is_IF_modifier(kprobe_opcode_t opcode)
-{
- switch (opcode) {
- case 0xfa: /* cli */
- case 0xfb: /* sti */
- case 0xcf: /* iret/iretd */
- case 0x9d: /* popf/popfd */
- return 1;
- }
- return 0;
-}
-
-int __kprobes arch_prepare_kprobe(struct kprobe *p)
-{
- return 0;
-}
-
-void __kprobes arch_copy_kprobe(struct kprobe *p)
-{
- memcpy(p->ainsn.insn, p->addr, MAX_INSN_SIZE * sizeof(kprobe_opcode_t));
- p->opcode = *p->addr;
-}
-
-void __kprobes arch_arm_kprobe(struct kprobe *p)
-{
- *p->addr = BREAKPOINT_INSTRUCTION;
- flush_icache_range((unsigned long) p->addr,
- (unsigned long) p->addr + sizeof(kprobe_opcode_t));
-}
-
-void __kprobes arch_disarm_kprobe(struct kprobe *p)
-{
- *p->addr = p->opcode;
- flush_icache_range((unsigned long) p->addr,
- (unsigned long) p->addr + sizeof(kprobe_opcode_t));
-}
-
-void __kprobes arch_remove_kprobe(struct kprobe *p)
-{
-}
-
-static inline void save_previous_kprobe(void)
-{
- kprobe_prev = current_kprobe;
- kprobe_status_prev = kprobe_status;
- kprobe_old_eflags_prev = kprobe_old_eflags;
- kprobe_saved_eflags_prev = kprobe_saved_eflags;
-}
-
-static inline void restore_previous_kprobe(void)
-{
- current_kprobe = kprobe_prev;
- kprobe_status = kprobe_status_prev;
- kprobe_old_eflags = kprobe_old_eflags_prev;
- kprobe_saved_eflags = kprobe_saved_eflags_prev;
-}
-
-static inline void set_current_kprobe(struct kprobe *p, struct pt_regs *regs)
-{
- current_kprobe = p;
- kprobe_saved_eflags = kprobe_old_eflags
- = (regs->eflags & (TF_MASK | IF_MASK));
- if (is_IF_modifier(p->opcode))
- kprobe_saved_eflags &= ~IF_MASK;
-}
-
-static inline void prepare_singlestep(struct kprobe *p, struct pt_regs *regs)
-{
- regs->eflags |= TF_MASK;
- regs->eflags &= ~IF_MASK;
- /*single step inline if the instruction is an int3*/
- if (p->opcode == BREAKPOINT_INSTRUCTION)
- regs->eip = (unsigned long)p->addr;
- else
- regs->eip = (unsigned long)&p->ainsn.insn;
-}
-
-void __kprobes arch_prepare_kretprobe(struct kretprobe *rp,
- struct pt_regs *regs)
-{
- unsigned long *sara = (unsigned long *)&regs->esp;
- struct kretprobe_instance *ri;
-
- if ((ri = get_free_rp_inst(rp)) != NULL) {
- ri->rp = rp;
- ri->task = current;
- ri->ret_addr = (kprobe_opcode_t *) *sara;
-
- /* Replace the return addr with trampoline addr */
- *sara = (unsigned long) &kretprobe_trampoline;
-
- add_rp_inst(ri);
- } else {
- rp->nmissed++;
- }
-}
-
-/*
- * Interrupts are disabled on entry as trap3 is an interrupt gate and they
- * remain disabled thorough out this function.
- */
-static int __kprobes kprobe_handler(struct pt_regs *regs)
-{
- struct kprobe *p;
- int ret = 0;
- kprobe_opcode_t *addr = NULL;
- unsigned long *lp;
-
- /* We're in an interrupt, but this is clear and BUG()-safe. */
- preempt_disable();
- /* Check if the application is using LDT entry for its code segment and
- * calculate the address by reading the base address from the LDT entry.
- */
- if ((regs->xcs & 4) && (current->mm)) {
- lp = (unsigned long *) ((unsigned long)((regs->xcs >> 3) * 8)
- + (char *) current->mm->context.ldt);
- addr = (kprobe_opcode_t *) (get_desc_base(lp) + regs->eip -
- sizeof(kprobe_opcode_t));
- } else {
- addr = (kprobe_opcode_t *)(regs->eip - sizeof(kprobe_opcode_t));
- }
- /* Check we're not actually recursing */
- if (kprobe_running()) {
- /* We *are* holding lock here, so this is safe.
- Disarm the probe we just hit, and ignore it. */
- p = get_kprobe(addr);
- if (p) {
- if (kprobe_status == KPROBE_HIT_SS &&
- *p->ainsn.insn == BREAKPOINT_INSTRUCTION) {
- regs->eflags &= ~TF_MASK;
- regs->eflags |= kprobe_saved_eflags;
- unlock_kprobes();
- goto no_kprobe;
- }
- /* We have reentered the kprobe_handler(), since
- * another probe was hit while within the handler.
- * We here save the original kprobes variables and
- * just single step on the instruction of the new probe
- * without calling any user handlers.
- */
- save_previous_kprobe();
- set_current_kprobe(p, regs);
- p->nmissed++;
- prepare_singlestep(p, regs);
- kprobe_status = KPROBE_REENTER;
- return 1;
- } else {
- p = current_kprobe;
- if (p->break_handler && p->break_handler(p, regs)) {
- goto ss_probe;
- }
- }
- /* If it's not ours, can't be delete race, (we hold lock). */
- goto no_kprobe;
- }
-
- lock_kprobes();
- p = get_kprobe(addr);
- if (!p) {
- unlock_kprobes();
- if (regs->eflags & VM_MASK) {
- /* We are in virtual-8086 mode. Return 0 */
- goto no_kprobe;
- }
-
- if (*addr != BREAKPOINT_INSTRUCTION) {
- /*
- * The breakpoint instruction was removed right
- * after we hit it. Another cpu has removed
- * either a probepoint or a debugger breakpoint
- * at this address. In either case, no further
- * handling of this interrupt is appropriate.
- * Back up over the (now missing) int3 and run
- * the original instruction.
- */
- regs->eip -= sizeof(kprobe_opcode_t);
- ret = 1;
- }
- /* Not one of ours: let kernel handle it */
- goto no_kprobe;
- }
-
- kprobe_status = KPROBE_HIT_ACTIVE;
- set_current_kprobe(p, regs);
-
- if (p->pre_handler && p->pre_handler(p, regs))
- /* handler has already set things up, so skip ss setup */
- return 1;
-
-ss_probe:
- prepare_singlestep(p, regs);
- kprobe_status = KPROBE_HIT_SS;
- return 1;
-
-no_kprobe:
- preempt_enable_no_resched();
- return ret;
-}
-
-/*
- * For function-return probes, init_kprobes() establishes a probepoint
- * here. When a retprobed function returns, this probe is hit and
- * trampoline_probe_handler() runs, calling the kretprobe's handler.
- */
- void kretprobe_trampoline_holder(void)
- {
- asm volatile ( ".global kretprobe_trampoline\n"
- "kretprobe_trampoline: \n"
- "nop\n");
- }
-
-/*
- * Called when we hit the probe point at kretprobe_trampoline
- */
-int __kprobes trampoline_probe_handler(struct kprobe *p, struct pt_regs *regs)
-{
- struct kretprobe_instance *ri = NULL;
- struct hlist_head *head;
- struct hlist_node *node, *tmp;
- unsigned long orig_ret_address = 0;
- unsigned long trampoline_address =(unsigned long)&kretprobe_trampoline;
-
- head = kretprobe_inst_table_head(current);
-
- /*
- * It is possible to have multiple instances associated with a given
- * task either because an multiple functions in the call path
- * have a return probe installed on them, and/or more then one return
- * return probe was registered for a target function.
- *
- * We can handle this because:
- * - instances are always inserted at the head of the list
- * - when multiple return probes are registered for the same
- * function, the first instance's ret_addr will point to the
- * real return address, and all the rest will point to
- * kretprobe_trampoline
- */
- hlist_for_each_entry_safe(ri, node, tmp, head, hlist) {
- if (ri->task != current)
- /* another task is sharing our hash bucket */
- continue;
-
- if (ri->rp && ri->rp->handler)
- ri->rp->handler(ri, regs);
-
- orig_ret_address = (unsigned long)ri->ret_addr;
- recycle_rp_inst(ri);
-
- if (orig_ret_address != trampoline_address)
- /*
- * This is the real return address. Any other
- * instances associated with this task are for
- * other calls deeper on the call stack
- */
- break;
- }
-
- BUG_ON(!orig_ret_address || (orig_ret_address == trampoline_address));
- regs->eip = orig_ret_address;
-
- unlock_kprobes();
- preempt_enable_no_resched();
-
- /*
- * By returning a non-zero value, we are telling
- * kprobe_handler() that we have handled unlocking
- * and re-enabling preemption.
- */
- return 1;
-}
-
-/*
- * Called after single-stepping. p->addr is the address of the
- * instruction whose first byte has been replaced by the "int 3"
- * instruction. To avoid the SMP problems that can occur when we
- * temporarily put back the original opcode to single-step, we
- * single-stepped a copy of the instruction. The address of this
- * copy is p->ainsn.insn.
- *
- * This function prepares to return from the post-single-step
- * interrupt. We have to fix up the stack as follows:
- *
- * 0) Except in the case of absolute or indirect jump or call instructions,
- * the new eip is relative to the copied instruction. We need to make
- * it relative to the original instruction.
- *
- * 1) If the single-stepped instruction was pushfl, then the TF and IF
- * flags are set in the just-pushed eflags, and may need to be cleared.
- *
- * 2) If the single-stepped instruction was a call, the return address
- * that is atop the stack is the address following the copied instruction.
- * We need to make it the address following the original instruction.
- */
-static void __kprobes resume_execution(struct kprobe *p, struct pt_regs *regs)
-{
- unsigned long *tos = (unsigned long *)&regs->esp;
- unsigned long next_eip = 0;
- unsigned long copy_eip = (unsigned long)&p->ainsn.insn;
- unsigned long orig_eip = (unsigned long)p->addr;
-
- switch (p->ainsn.insn[0]) {
- case 0x9c: /* pushfl */
- *tos &= ~(TF_MASK | IF_MASK);
- *tos |= kprobe_old_eflags;
- break;
- case 0xc3: /* ret/lret */
- case 0xcb:
- case 0xc2:
- case 0xca:
- regs->eflags &= ~TF_MASK;
- /* eip is already adjusted, no more changes required*/
- return;
- case 0xe8: /* call relative - Fix return addr */
- *tos = orig_eip + (*tos - copy_eip);
- break;
- case 0xff:
- if ((p->ainsn.insn[1] & 0x30) == 0x10) {
- /* call absolute, indirect */
- /* Fix return addr; eip is correct. */
- next_eip = regs->eip;
- *tos = orig_eip + (*tos - copy_eip);
- } else if (((p->ainsn.insn[1] & 0x31) == 0x20) || /* jmp near, absolute indirect */
- ((p->ainsn.insn[1] & 0x31) == 0x21)) { /* jmp far, absolute indirect */
- /* eip is correct. */
- next_eip = regs->eip;
- }
- break;
- case 0xea: /* jmp absolute -- eip is correct */
- next_eip = regs->eip;
- break;
- default:
- break;
- }
-
- regs->eflags &= ~TF_MASK;
- if (next_eip) {
- regs->eip = next_eip;
- } else {
- regs->eip = orig_eip + (regs->eip - copy_eip);
- }
-}
-
-/*
- * Interrupts are disabled on entry as trap1 is an interrupt gate and they
- * remain disabled thoroughout this function. And we hold kprobe lock.
- */
-static inline int post_kprobe_handler(struct pt_regs *regs)
-{
- if (!kprobe_running())
- return 0;
-
- if ((kprobe_status != KPROBE_REENTER) && current_kprobe->post_handler) {
- kprobe_status = KPROBE_HIT_SSDONE;
- current_kprobe->post_handler(current_kprobe, regs, 0);
- }
-
- resume_execution(current_kprobe, regs);
- regs->eflags |= kprobe_saved_eflags;
-
- /*Restore back the original saved kprobes variables and continue. */
- if (kprobe_status == KPROBE_REENTER) {
- restore_previous_kprobe();
- goto out;
- }
- unlock_kprobes();
-out:
- preempt_enable_no_resched();
-
- /*
- * if somebody else is singlestepping across a probe point, eflags
- * will have TF set, in which case, continue the remaining processing
- * of do_debug, as if this is not a probe hit.
- */
- if (regs->eflags & TF_MASK)
- return 0;
-
- return 1;
-}
-
-/* Interrupts disabled, kprobe_lock held. */
-static inline int kprobe_fault_handler(struct pt_regs *regs, int trapnr)
-{
- if (current_kprobe->fault_handler
- && current_kprobe->fault_handler(current_kprobe, regs, trapnr))
- return 1;
-
- if (kprobe_status & KPROBE_HIT_SS) {
- resume_execution(current_kprobe, regs);
- regs->eflags |= kprobe_old_eflags;
-
- unlock_kprobes();
- preempt_enable_no_resched();
- }
- return 0;
-}
-
-/*
- * Wrapper routine to for handling exceptions.
- */
-int __kprobes kprobe_exceptions_notify(struct notifier_block *self,
- unsigned long val, void *data)
-{
- struct die_args *args = (struct die_args *)data;
- switch (val) {
- case DIE_INT3:
- if (kprobe_handler(args->regs))
- return NOTIFY_STOP;
- break;
- case DIE_DEBUG:
- if (post_kprobe_handler(args->regs))
- return NOTIFY_STOP;
- break;
- case DIE_GPF:
- if (kprobe_running() &&
- kprobe_fault_handler(args->regs, args->trapnr))
- return NOTIFY_STOP;
- break;
- case DIE_PAGE_FAULT:
- if (kprobe_running() &&
- kprobe_fault_handler(args->regs, args->trapnr))
- return NOTIFY_STOP;
- break;
- default:
- break;
- }
- return NOTIFY_DONE;
-}
-
-int __kprobes setjmp_pre_handler(struct kprobe *p, struct pt_regs *regs)
-{
- struct jprobe *jp = container_of(p, struct jprobe, kp);
- unsigned long addr;
-
- jprobe_saved_regs = *regs;
- jprobe_saved_esp = &regs->esp;
- addr = (unsigned long)jprobe_saved_esp;
-
- /*
- * TBD: As Linus pointed out, gcc assumes that the callee
- * owns the argument space and could overwrite it, e.g.
- * tailcall optimization. So, to be absolutely safe
- * we also save and restore enough stack bytes to cover
- * the argument area.
- */
- memcpy(jprobes_stack, (kprobe_opcode_t *) addr, MIN_STACK_SIZE(addr));
- regs->eflags &= ~IF_MASK;
- regs->eip = (unsigned long)(jp->entry);
- return 1;
-}
-
-void __kprobes jprobe_return(void)
-{
- preempt_enable_no_resched();
- asm volatile (" xchgl %%ebx,%%esp \n"
- " int3 \n"
- " .globl jprobe_return_end \n"
- " jprobe_return_end: \n"
- " nop \n"::"b"
- (jprobe_saved_esp):"memory");
-}
-
-int __kprobes longjmp_break_handler(struct kprobe *p, struct pt_regs *regs)
-{
- u8 *addr = (u8 *) (regs->eip - 1);
- unsigned long stack_addr = (unsigned long)jprobe_saved_esp;
- struct jprobe *jp = container_of(p, struct jprobe, kp);
-
- if ((addr > (u8 *) jprobe_return) && (addr < (u8 *) jprobe_return_end)) {
- if (&regs->esp != jprobe_saved_esp) {
- struct pt_regs *saved_regs =
- container_of(jprobe_saved_esp, struct pt_regs, esp);
- printk("current esp %p does not match saved esp %p\n",
- &regs->esp, jprobe_saved_esp);
- printk("Saved registers for jprobe %p\n", jp);
- show_registers(saved_regs);
- printk("Current registers\n");
- show_registers(regs);
- BUG();
- }
- *regs = jprobe_saved_regs;
- memcpy((kprobe_opcode_t *) stack_addr, jprobes_stack,
- MIN_STACK_SIZE(stack_addr));
- return 1;
- }
- return 0;
-}
-
-static struct kprobe trampoline_p = {
- .addr = (kprobe_opcode_t *) &kretprobe_trampoline,
- .pre_handler = trampoline_probe_handler
-};
-
-int __init arch_init_kprobes(void)
-{
- return register_kprobe(&trampoline_p);
-}
diff --git a/arch/i386/kernel/ldt.c b/arch/i386/kernel/ldt.c
deleted file mode 100644
index 983f95707e1..00000000000
--- a/arch/i386/kernel/ldt.c
+++ /dev/null
@@ -1,253 +0,0 @@
-/*
- * linux/kernel/ldt.c
- *
- * Copyright (C) 1992 Krishna Balasubramanian and Linus Torvalds
- * Copyright (C) 1999 Ingo Molnar <mingo@redhat.com>
- */
-
-#include <linux/errno.h>
-#include <linux/sched.h>
-#include <linux/string.h>
-#include <linux/mm.h>
-#include <linux/smp.h>
-#include <linux/smp_lock.h>
-#include <linux/vmalloc.h>
-#include <linux/slab.h>
-
-#include <asm/uaccess.h>
-#include <asm/system.h>
-#include <asm/ldt.h>
-#include <asm/desc.h>
-#include <asm/mmu_context.h>
-
-#ifdef CONFIG_SMP /* avoids "defined but not used" warnig */
-static void flush_ldt(void *null)
-{
- if (current->active_mm)
- load_LDT(&current->active_mm->context);
-}
-#endif
-
-static int alloc_ldt(mm_context_t *pc, int mincount, int reload)
-{
- void *oldldt;
- void *newldt;
- int oldsize;
-
- if (mincount <= pc->size)
- return 0;
- oldsize = pc->size;
- mincount = (mincount+511)&(~511);
- if (mincount*LDT_ENTRY_SIZE > PAGE_SIZE)
- newldt = vmalloc(mincount*LDT_ENTRY_SIZE);
- else
- newldt = kmalloc(mincount*LDT_ENTRY_SIZE, GFP_KERNEL);
-
- if (!newldt)
- return -ENOMEM;
-
- if (oldsize)
- memcpy(newldt, pc->ldt, oldsize*LDT_ENTRY_SIZE);
- oldldt = pc->ldt;
- memset(newldt+oldsize*LDT_ENTRY_SIZE, 0, (mincount-oldsize)*LDT_ENTRY_SIZE);
- pc->ldt = newldt;
- wmb();
- pc->size = mincount;
- wmb();
-
- if (reload) {
-#ifdef CONFIG_SMP
- cpumask_t mask;
- preempt_disable();
- load_LDT(pc);
- mask = cpumask_of_cpu(smp_processor_id());
- if (!cpus_equal(current->mm->cpu_vm_mask, mask))
- smp_call_function(flush_ldt, NULL, 1, 1);
- preempt_enable();
-#else
- load_LDT(pc);
-#endif
- }
- if (oldsize) {
- if (oldsize*LDT_ENTRY_SIZE > PAGE_SIZE)
- vfree(oldldt);
- else
- kfree(oldldt);
- }
- return 0;
-}
-
-static inline int copy_ldt(mm_context_t *new, mm_context_t *old)
-{
- int err = alloc_ldt(new, old->size, 0);
- if (err < 0)
- return err;
- memcpy(new->ldt, old->ldt, old->size*LDT_ENTRY_SIZE);
- return 0;
-}
-
-/*
- * we do not have to muck with descriptors here, that is
- * done in switch_mm() as needed.
- */
-int init_new_context(struct task_struct *tsk, struct mm_struct *mm)
-{
- struct mm_struct * old_mm;
- int retval = 0;
-
- init_MUTEX(&mm->context.sem);
- mm->context.size = 0;
- old_mm = current->mm;
- if (old_mm && old_mm->context.size > 0) {
- down(&old_mm->context.sem);
- retval = copy_ldt(&mm->context, &old_mm->context);
- up(&old_mm->context.sem);
- }
- return retval;
-}
-
-/*
- * No need to lock the MM as we are the last user
- */
-void destroy_context(struct mm_struct *mm)
-{
- if (mm->context.size) {
- if (mm == current->active_mm)
- clear_LDT();
- if (mm->context.size*LDT_ENTRY_SIZE > PAGE_SIZE)
- vfree(mm->context.ldt);
- else
- kfree(mm->context.ldt);
- mm->context.size = 0;
- }
-}
-
-static int read_ldt(void __user * ptr, unsigned long bytecount)
-{
- int err;
- unsigned long size;
- struct mm_struct * mm = current->mm;
-
- if (!mm->context.size)
- return 0;
- if (bytecount > LDT_ENTRY_SIZE*LDT_ENTRIES)
- bytecount = LDT_ENTRY_SIZE*LDT_ENTRIES;
-
- down(&mm->context.sem);
- size = mm->context.size*LDT_ENTRY_SIZE;
- if (size > bytecount)
- size = bytecount;
-
- err = 0;
- if (copy_to_user(ptr, mm->context.ldt, size))
- err = -EFAULT;
- up(&mm->context.sem);
- if (err < 0)
- goto error_return;
- if (size != bytecount) {
- /* zero-fill the rest */
- if (clear_user(ptr+size, bytecount-size) != 0) {
- err = -EFAULT;
- goto error_return;
- }
- }
- return bytecount;
-error_return:
- return err;
-}
-
-static int read_default_ldt(void __user * ptr, unsigned long bytecount)
-{
- int err;
- unsigned long size;
- void *address;
-
- err = 0;
- address = &default_ldt[0];
- size = 5*sizeof(struct desc_struct);
- if (size > bytecount)
- size = bytecount;
-
- err = size;
- if (copy_to_user(ptr, address, size))
- err = -EFAULT;
-
- return err;
-}
-
-static int write_ldt(void __user * ptr, unsigned long bytecount, int oldmode)
-{
- struct mm_struct * mm = current->mm;
- __u32 entry_1, entry_2;
- int error;
- struct user_desc ldt_info;
-
- error = -EINVAL;
- if (bytecount != sizeof(ldt_info))
- goto out;
- error = -EFAULT;
- if (copy_from_user(&ldt_info, ptr, sizeof(ldt_info)))
- goto out;
-
- error = -EINVAL;
- if (ldt_info.entry_number >= LDT_ENTRIES)
- goto out;
- if (ldt_info.contents == 3) {
- if (oldmode)
- goto out;
- if (ldt_info.seg_not_present == 0)
- goto out;
- }
-
- down(&mm->context.sem);
- if (ldt_info.entry_number >= mm->context.size) {
- error = alloc_ldt(&current->mm->context, ldt_info.entry_number+1, 1);
- if (error < 0)
- goto out_unlock;
- }
-
- /* Allow LDTs to be cleared by the user. */
- if (ldt_info.base_addr == 0 && ldt_info.limit == 0) {
- if (oldmode || LDT_empty(&ldt_info)) {
- entry_1 = 0;
- entry_2 = 0;
- goto install;
- }
- }
-
- entry_1 = LDT_entry_a(&ldt_info);
- entry_2 = LDT_entry_b(&ldt_info);
- if (oldmode)
- entry_2 &= ~(1 << 20);
-
- /* Install the new entry ... */
-install:
- write_ldt_entry(mm->context.ldt, ldt_info.entry_number, entry_1, entry_2);
- error = 0;
-
-out_unlock:
- up(&mm->context.sem);
-out:
- return error;
-}
-
-asmlinkage int sys_modify_ldt(int func, void __user *ptr, unsigned long bytecount)
-{
- int ret = -ENOSYS;
-
- switch (func) {
- case 0:
- ret = read_ldt(ptr, bytecount);
- break;
- case 1:
- ret = write_ldt(ptr, bytecount, 1);
- break;
- case 2:
- ret = read_default_ldt(ptr, bytecount);
- break;
- case 0x11:
- ret = write_ldt(ptr, bytecount, 0);
- break;
- }
- return ret;
-}
diff --git a/arch/i386/kernel/machine_kexec.c b/arch/i386/kernel/machine_kexec.c
deleted file mode 100644
index a912fed4848..00000000000
--- a/arch/i386/kernel/machine_kexec.c
+++ /dev/null
@@ -1,214 +0,0 @@
-/*
- * machine_kexec.c - handle transition of Linux booting another kernel
- * Copyright (C) 2002-2005 Eric Biederman <ebiederm@xmission.com>
- *
- * This source code is licensed under the GNU General Public License,
- * Version 2. See the file COPYING for more details.
- */
-
-#include <linux/mm.h>
-#include <linux/kexec.h>
-#include <linux/delay.h>
-#include <asm/pgtable.h>
-#include <asm/pgalloc.h>
-#include <asm/tlbflush.h>
-#include <asm/mmu_context.h>
-#include <asm/io.h>
-#include <asm/apic.h>
-#include <asm/cpufeature.h>
-#include <asm/desc.h>
-#include <asm/system.h>
-
-#define PAGE_ALIGNED __attribute__ ((__aligned__(PAGE_SIZE)))
-
-#define L0_ATTR (_PAGE_PRESENT | _PAGE_RW | _PAGE_ACCESSED | _PAGE_DIRTY)
-#define L1_ATTR (_PAGE_PRESENT | _PAGE_RW | _PAGE_ACCESSED | _PAGE_DIRTY)
-#define L2_ATTR (_PAGE_PRESENT)
-
-#define LEVEL0_SIZE (1UL << 12UL)
-
-#ifndef CONFIG_X86_PAE
-#define LEVEL1_SIZE (1UL << 22UL)
-static u32 pgtable_level1[1024] PAGE_ALIGNED;
-
-static void identity_map_page(unsigned long address)
-{
- unsigned long level1_index, level2_index;
- u32 *pgtable_level2;
-
- /* Find the current page table */
- pgtable_level2 = __va(read_cr3());
-
- /* Find the indexes of the physical address to identity map */
- level1_index = (address % LEVEL1_SIZE)/LEVEL0_SIZE;
- level2_index = address / LEVEL1_SIZE;
-
- /* Identity map the page table entry */
- pgtable_level1[level1_index] = address | L0_ATTR;
- pgtable_level2[level2_index] = __pa(pgtable_level1) | L1_ATTR;
-
- /* Flush the tlb so the new mapping takes effect.
- * Global tlb entries are not flushed but that is not an issue.
- */
- load_cr3(pgtable_level2);
-}
-
-#else
-#define LEVEL1_SIZE (1UL << 21UL)
-#define LEVEL2_SIZE (1UL << 30UL)
-static u64 pgtable_level1[512] PAGE_ALIGNED;
-static u64 pgtable_level2[512] PAGE_ALIGNED;
-
-static void identity_map_page(unsigned long address)
-{
- unsigned long level1_index, level2_index, level3_index;
- u64 *pgtable_level3;
-
- /* Find the current page table */
- pgtable_level3 = __va(read_cr3());
-
- /* Find the indexes of the physical address to identity map */
- level1_index = (address % LEVEL1_SIZE)/LEVEL0_SIZE;
- level2_index = (address % LEVEL2_SIZE)/LEVEL1_SIZE;
- level3_index = address / LEVEL2_SIZE;
-
- /* Identity map the page table entry */
- pgtable_level1[level1_index] = address | L0_ATTR;
- pgtable_level2[level2_index] = __pa(pgtable_level1) | L1_ATTR;
- set_64bit(&pgtable_level3[level3_index],
- __pa(pgtable_level2) | L2_ATTR);
-
- /* Flush the tlb so the new mapping takes effect.
- * Global tlb entries are not flushed but that is not an issue.
- */
- load_cr3(pgtable_level3);
-}
-#endif
-
-static void set_idt(void *newidt, __u16 limit)
-{
- struct Xgt_desc_struct curidt;
-
- /* ia32 supports unaliged loads & stores */
- curidt.size = limit;
- curidt.address = (unsigned long)newidt;
-
- load_idt(&curidt);
-};
-
-
-static void set_gdt(void *newgdt, __u16 limit)
-{
- struct Xgt_desc_struct curgdt;
-
- /* ia32 supports unaligned loads & stores */
- curgdt.size = limit;
- curgdt.address = (unsigned long)newgdt;
-
- load_gdt(&curgdt);
-};
-
-static void load_segments(void)
-{
-#define __STR(X) #X
-#define STR(X) __STR(X)
-
- __asm__ __volatile__ (
- "\tljmp $"STR(__KERNEL_CS)",$1f\n"
- "\t1:\n"
- "\tmovl $"STR(__KERNEL_DS)",%eax\n"
- "\tmovl %eax,%ds\n"
- "\tmovl %eax,%es\n"
- "\tmovl %eax,%fs\n"
- "\tmovl %eax,%gs\n"
- "\tmovl %eax,%ss\n"
- );
-#undef STR
-#undef __STR
-}
-
-typedef asmlinkage NORET_TYPE void (*relocate_new_kernel_t)(
- unsigned long indirection_page,
- unsigned long reboot_code_buffer,
- unsigned long start_address,
- unsigned int has_pae) ATTRIB_NORET;
-
-const extern unsigned char relocate_new_kernel[];
-extern void relocate_new_kernel_end(void);
-const extern unsigned int relocate_new_kernel_size;
-
-/*
- * A architecture hook called to validate the
- * proposed image and prepare the control pages
- * as needed. The pages for KEXEC_CONTROL_CODE_SIZE
- * have been allocated, but the segments have yet
- * been copied into the kernel.
- *
- * Do what every setup is needed on image and the
- * reboot code buffer to allow us to avoid allocations
- * later.
- *
- * Currently nothing.
- */
-int machine_kexec_prepare(struct kimage *image)
-{
- return 0;
-}
-
-/*
- * Undo anything leftover by machine_kexec_prepare
- * when an image is freed.
- */
-void machine_kexec_cleanup(struct kimage *image)
-{
-}
-
-/*
- * Do not allocate memory (or fail in any way) in machine_kexec().
- * We are past the point of no return, committed to rebooting now.
- */
-NORET_TYPE void machine_kexec(struct kimage *image)
-{
- unsigned long page_list;
- unsigned long reboot_code_buffer;
-
- relocate_new_kernel_t rnk;
-
- /* Interrupts aren't acceptable while we reboot */
- local_irq_disable();
-
- /* Compute some offsets */
- reboot_code_buffer = page_to_pfn(image->control_code_page)
- << PAGE_SHIFT;
- page_list = image->head;
-
- /* Set up an identity mapping for the reboot_code_buffer */
- identity_map_page(reboot_code_buffer);
-
- /* copy it out */
- memcpy((void *)reboot_code_buffer, relocate_new_kernel,
- relocate_new_kernel_size);
-
- /* The segment registers are funny things, they are
- * automatically loaded from a table, in memory wherever you
- * set them to a specific selector, but this table is never
- * accessed again you set the segment to a different selector.
- *
- * The more common model is are caches where the behide
- * the scenes work is done, but is also dropped at arbitrary
- * times.
- *
- * I take advantage of this here by force loading the
- * segments, before I zap the gdt with an invalid value.
- */
- load_segments();
- /* The gdt & idt are now invalid.
- * If you want to load them you must set up your own idt & gdt.
- */
- set_gdt(phys_to_virt(0),0);
- set_idt(phys_to_virt(0),0);
-
- /* now call it */
- rnk = (relocate_new_kernel_t) reboot_code_buffer;
- (*rnk)(page_list, reboot_code_buffer, image->start, cpu_has_pae);
-}
diff --git a/arch/i386/kernel/mca.c b/arch/i386/kernel/mca.c
deleted file mode 100644
index 558bb207720..00000000000
--- a/arch/i386/kernel/mca.c
+++ /dev/null
@@ -1,474 +0,0 @@
-/*
- * linux/arch/i386/kernel/mca.c
- * Written by Martin Kolinek, February 1996
- *
- * Changes:
- *
- * Chris Beauregard July 28th, 1996
- * - Fixed up integrated SCSI detection
- *
- * Chris Beauregard August 3rd, 1996
- * - Made mca_info local
- * - Made integrated registers accessible through standard function calls
- * - Added name field
- * - More sanity checking
- *
- * Chris Beauregard August 9th, 1996
- * - Rewrote /proc/mca
- *
- * Chris Beauregard January 7th, 1997
- * - Added basic NMI-processing
- * - Added more information to mca_info structure
- *
- * David Weinehall October 12th, 1998
- * - Made a lot of cleaning up in the source
- * - Added use of save_flags / restore_flags
- * - Added the 'driver_loaded' flag in MCA_adapter
- * - Added an alternative implemention of ZP Gu's mca_find_unused_adapter
- *
- * David Weinehall March 24th, 1999
- * - Fixed the output of 'Driver Installed' in /proc/mca/pos
- * - Made the Integrated Video & SCSI show up even if they have id 0000
- *
- * Alexander Viro November 9th, 1999
- * - Switched to regular procfs methods
- *
- * Alfred Arnold & David Weinehall August 23rd, 2000
- * - Added support for Planar POS-registers
- */
-
-#include <linux/module.h>
-#include <linux/types.h>
-#include <linux/errno.h>
-#include <linux/kernel.h>
-#include <linux/mca.h>
-#include <asm/system.h>
-#include <asm/io.h>
-#include <linux/proc_fs.h>
-#include <linux/mman.h>
-#include <linux/config.h>
-#include <linux/mm.h>
-#include <linux/pagemap.h>
-#include <linux/ioport.h>
-#include <asm/uaccess.h>
-#include <linux/init.h>
-#include <asm/arch_hooks.h>
-
-static unsigned char which_scsi = 0;
-
-int MCA_bus = 0;
-EXPORT_SYMBOL(MCA_bus);
-
-/*
- * Motherboard register spinlock. Untested on SMP at the moment, but
- * are there any MCA SMP boxes?
- *
- * Yes - Alan
- */
-static DEFINE_SPINLOCK(mca_lock);
-
-/* Build the status info for the adapter */
-
-static void mca_configure_adapter_status(struct mca_device *mca_dev) {
- mca_dev->status = MCA_ADAPTER_NONE;
-
- mca_dev->pos_id = mca_dev->pos[0]
- + (mca_dev->pos[1] << 8);
-
- if(!mca_dev->pos_id && mca_dev->slot < MCA_MAX_SLOT_NR) {
-
- /* id = 0x0000 usually indicates hardware failure,
- * however, ZP Gu (zpg@castle.net> reports that his 9556
- * has 0x0000 as id and everything still works. There
- * also seem to be an adapter with id = 0x0000; the
- * NCR Parallel Bus Memory Card. Until this is confirmed,
- * however, this code will stay.
- */
-
- mca_dev->status = MCA_ADAPTER_ERROR;
-
- return;
- } else if(mca_dev->pos_id != 0xffff) {
-
- /* 0xffff usually indicates that there's no adapter,
- * however, some integrated adapters may have 0xffff as
- * their id and still be valid. Examples are on-board
- * VGA of the 55sx, the integrated SCSI of the 56 & 57,
- * and possibly also the 95 ULTIMEDIA.
- */
-
- mca_dev->status = MCA_ADAPTER_NORMAL;
- }
-
- if((mca_dev->pos_id == 0xffff ||
- mca_dev->pos_id == 0x0000) && mca_dev->slot >= MCA_MAX_SLOT_NR) {
- int j;
-
- for(j = 2; j < 8; j++) {
- if(mca_dev->pos[j] != 0xff) {
- mca_dev->status = MCA_ADAPTER_NORMAL;
- break;
- }
- }
- }
-
- if(!(mca_dev->pos[2] & MCA_ENABLED)) {
-
- /* enabled bit is in POS 2 */
-
- mca_dev->status = MCA_ADAPTER_DISABLED;
- }
-} /* mca_configure_adapter_status */
-
-/*--------------------------------------------------------------------*/
-
-static struct resource mca_standard_resources[] = {
- { .start = 0x60, .end = 0x60, .name = "system control port B (MCA)" },
- { .start = 0x90, .end = 0x90, .name = "arbitration (MCA)" },
- { .start = 0x91, .end = 0x91, .name = "card Select Feedback (MCA)" },
- { .start = 0x92, .end = 0x92, .name = "system Control port A (MCA)" },
- { .start = 0x94, .end = 0x94, .name = "system board setup (MCA)" },
- { .start = 0x96, .end = 0x97, .name = "POS (MCA)" },
- { .start = 0x100, .end = 0x107, .name = "POS (MCA)" }
-};
-
-#define MCA_STANDARD_RESOURCES ARRAY_SIZE(mca_standard_resources)
-
-/**
- * mca_read_and_store_pos - read the POS registers into a memory buffer
- * @pos: a char pointer to 8 bytes, contains the POS register value on
- * successful return
- *
- * Returns 1 if a card actually exists (i.e. the pos isn't
- * all 0xff) or 0 otherwise
- */
-static int mca_read_and_store_pos(unsigned char *pos) {
- int j;
- int found = 0;
-
- for(j=0; j<8; j++) {
- if((pos[j] = inb_p(MCA_POS_REG(j))) != 0xff) {
- /* 0xff all across means no device. 0x00 means
- * something's broken, but a device is
- * probably there. However, if you get 0x00
- * from a motherboard register it won't matter
- * what we find. For the record, on the
- * 57SLC, the integrated SCSI adapter has
- * 0xffff for the adapter ID, but nonzero for
- * other registers. */
-
- found = 1;
- }
- }
- return found;
-}
-
-static unsigned char mca_pc_read_pos(struct mca_device *mca_dev, int reg)
-{
- unsigned char byte;
- unsigned long flags;
-
- if(reg < 0 || reg >= 8)
- return 0;
-
- spin_lock_irqsave(&mca_lock, flags);
- if(mca_dev->pos_register) {
- /* Disable adapter setup, enable motherboard setup */
-
- outb_p(0, MCA_ADAPTER_SETUP_REG);
- outb_p(mca_dev->pos_register, MCA_MOTHERBOARD_SETUP_REG);
-
- byte = inb_p(MCA_POS_REG(reg));
- outb_p(0xff, MCA_MOTHERBOARD_SETUP_REG);
- } else {
-
- /* Make sure motherboard setup is off */
-
- outb_p(0xff, MCA_MOTHERBOARD_SETUP_REG);
-
- /* Read the appropriate register */
-
- outb_p(0x8|(mca_dev->slot & 0xf), MCA_ADAPTER_SETUP_REG);
- byte = inb_p(MCA_POS_REG(reg));
- outb_p(0, MCA_ADAPTER_SETUP_REG);
- }
- spin_unlock_irqrestore(&mca_lock, flags);
-
- mca_dev->pos[reg] = byte;
-
- return byte;
-}
-
-static void mca_pc_write_pos(struct mca_device *mca_dev, int reg,
- unsigned char byte)
-{
- unsigned long flags;
-
- if(reg < 0 || reg >= 8)
- return;
-
- spin_lock_irqsave(&mca_lock, flags);
-
- /* Make sure motherboard setup is off */
-
- outb_p(0xff, MCA_MOTHERBOARD_SETUP_REG);
-
- /* Read in the appropriate register */
-
- outb_p(0x8|(mca_dev->slot&0xf), MCA_ADAPTER_SETUP_REG);
- outb_p(byte, MCA_POS_REG(reg));
- outb_p(0, MCA_ADAPTER_SETUP_REG);
-
- spin_unlock_irqrestore(&mca_lock, flags);
-
- /* Update the global register list, while we have the byte */
-
- mca_dev->pos[reg] = byte;
-
-}
-
-/* for the primary MCA bus, we have identity transforms */
-static int mca_dummy_transform_irq(struct mca_device * mca_dev, int irq)
-{
- return irq;
-}
-
-static int mca_dummy_transform_ioport(struct mca_device * mca_dev, int port)
-{
- return port;
-}
-
-static void *mca_dummy_transform_memory(struct mca_device * mca_dev, void *mem)
-{
- return mem;
-}
-
-
-static int __init mca_init(void)
-{
- unsigned int i, j;
- struct mca_device *mca_dev;
- unsigned char pos[8];
- short mca_builtin_scsi_ports[] = {0xf7, 0xfd, 0x00};
- struct mca_bus *bus;
-
- /* WARNING: Be careful when making changes here. Putting an adapter
- * and the motherboard simultaneously into setup mode may result in
- * damage to chips (according to The Indispensible PC Hardware Book
- * by Hans-Peter Messmer). Also, we disable system interrupts (so
- * that we are not disturbed in the middle of this).
- */
-
- /* Make sure the MCA bus is present */
-
- if (mca_system_init()) {
- printk(KERN_ERR "MCA bus system initialisation failed\n");
- return -ENODEV;
- }
-
- if (!MCA_bus)
- return -ENODEV;
-
- printk(KERN_INFO "Micro Channel bus detected.\n");
-
- /* All MCA systems have at least a primary bus */
- bus = mca_attach_bus(MCA_PRIMARY_BUS);
- if (!bus)
- goto out_nomem;
- bus->default_dma_mask = 0xffffffffLL;
- bus->f.mca_write_pos = mca_pc_write_pos;
- bus->f.mca_read_pos = mca_pc_read_pos;
- bus->f.mca_transform_irq = mca_dummy_transform_irq;
- bus->f.mca_transform_ioport = mca_dummy_transform_ioport;
- bus->f.mca_transform_memory = mca_dummy_transform_memory;
-
- /* get the motherboard device */
- mca_dev = kmalloc(sizeof(struct mca_device), GFP_KERNEL);
- if(unlikely(!mca_dev))
- goto out_nomem;
- memset(mca_dev, 0, sizeof(struct mca_device));
-
- /*
- * We do not expect many MCA interrupts during initialization,
- * but let us be safe:
- */
- spin_lock_irq(&mca_lock);
-
- /* Make sure adapter setup is off */
-
- outb_p(0, MCA_ADAPTER_SETUP_REG);
-
- /* Read motherboard POS registers */
-
- mca_dev->pos_register = 0x7f;
- outb_p(mca_dev->pos_register, MCA_MOTHERBOARD_SETUP_REG);
- mca_dev->name[0] = 0;
- mca_read_and_store_pos(mca_dev->pos);
- mca_configure_adapter_status(mca_dev);
- /* fake POS and slot for a motherboard */
- mca_dev->pos_id = MCA_MOTHERBOARD_POS;
- mca_dev->slot = MCA_MOTHERBOARD;
- mca_register_device(MCA_PRIMARY_BUS, mca_dev);
-
- mca_dev = kmalloc(sizeof(struct mca_device), GFP_ATOMIC);
- if(unlikely(!mca_dev))
- goto out_unlock_nomem;
- memset(mca_dev, 0, sizeof(struct mca_device));
-
-
- /* Put motherboard into video setup mode, read integrated video
- * POS registers, and turn motherboard setup off.
- */
-
- mca_dev->pos_register = 0xdf;
- outb_p(mca_dev->pos_register, MCA_MOTHERBOARD_SETUP_REG);
- mca_dev->name[0] = 0;
- mca_read_and_store_pos(mca_dev->pos);
- mca_configure_adapter_status(mca_dev);
- /* fake POS and slot for the integrated video */
- mca_dev->pos_id = MCA_INTEGVIDEO_POS;
- mca_dev->slot = MCA_INTEGVIDEO;
- mca_register_device(MCA_PRIMARY_BUS, mca_dev);
-
- /* Put motherboard into scsi setup mode, read integrated scsi
- * POS registers, and turn motherboard setup off.
- *
- * It seems there are two possible SCSI registers. Martin says that
- * for the 56,57, 0xf7 is the one, but fails on the 76.
- * Alfredo (apena@vnet.ibm.com) says
- * 0xfd works on his machine. We'll try both of them. I figure it's
- * a good bet that only one could be valid at a time. This could
- * screw up though if one is used for something else on the other
- * machine.
- */
-
- for(i = 0; (which_scsi = mca_builtin_scsi_ports[i]) != 0; i++) {
- outb_p(which_scsi, MCA_MOTHERBOARD_SETUP_REG);
- if(mca_read_and_store_pos(pos))
- break;
- }
- if(which_scsi) {
- /* found a scsi card */
- mca_dev = kmalloc(sizeof(struct mca_device), GFP_ATOMIC);
- if(unlikely(!mca_dev))
- goto out_unlock_nomem;
- memset(mca_dev, 0, sizeof(struct mca_device));
-
- for(j = 0; j < 8; j++)
- mca_dev->pos[j] = pos[j];
-
- mca_configure_adapter_status(mca_dev);
- /* fake POS and slot for integrated SCSI controller */
- mca_dev->pos_id = MCA_INTEGSCSI_POS;
- mca_dev->slot = MCA_INTEGSCSI;
- mca_dev->pos_register = which_scsi;
- mca_register_device(MCA_PRIMARY_BUS, mca_dev);
- }
-
- /* Turn off motherboard setup */
-
- outb_p(0xff, MCA_MOTHERBOARD_SETUP_REG);
-
- /* Now loop over MCA slots: put each adapter into setup mode, and
- * read its POS registers. Then put adapter setup off.
- */
-
- for(i=0; i<MCA_MAX_SLOT_NR; i++) {
- outb_p(0x8|(i&0xf), MCA_ADAPTER_SETUP_REG);
- if(!mca_read_and_store_pos(pos))
- continue;
-
- mca_dev = kmalloc(sizeof(struct mca_device), GFP_ATOMIC);
- if(unlikely(!mca_dev))
- goto out_unlock_nomem;
- memset(mca_dev, 0, sizeof(struct mca_device));
-
- for(j=0; j<8; j++)
- mca_dev->pos[j]=pos[j];
-
- mca_dev->driver_loaded = 0;
- mca_dev->slot = i;
- mca_dev->pos_register = 0;
- mca_configure_adapter_status(mca_dev);
- mca_register_device(MCA_PRIMARY_BUS, mca_dev);
- }
- outb_p(0, MCA_ADAPTER_SETUP_REG);
-
- /* Enable interrupts and return memory start */
- spin_unlock_irq(&mca_lock);
-
- for (i = 0; i < MCA_STANDARD_RESOURCES; i++)
- request_resource(&ioport_resource, mca_standard_resources + i);
-
- mca_do_proc_init();
-
- return 0;
-
- out_unlock_nomem:
- spin_unlock_irq(&mca_lock);
- out_nomem:
- printk(KERN_EMERG "Failed memory allocation in MCA setup!\n");
- return -ENOMEM;
-}
-
-subsys_initcall(mca_init);
-
-/*--------------------------------------------------------------------*/
-
-static void mca_handle_nmi_device(struct mca_device *mca_dev, int check_flag)
-{
- int slot = mca_dev->slot;
-
- if(slot == MCA_INTEGSCSI) {
- printk(KERN_CRIT "NMI: caused by MCA integrated SCSI adapter (%s)\n",
- mca_dev->name);
- } else if(slot == MCA_INTEGVIDEO) {
- printk(KERN_CRIT "NMI: caused by MCA integrated video adapter (%s)\n",
- mca_dev->name);
- } else if(slot == MCA_MOTHERBOARD) {
- printk(KERN_CRIT "NMI: caused by motherboard (%s)\n",
- mca_dev->name);
- }
-
- /* More info available in POS 6 and 7? */
-
- if(check_flag) {
- unsigned char pos6, pos7;
-
- pos6 = mca_device_read_pos(mca_dev, 6);
- pos7 = mca_device_read_pos(mca_dev, 7);
-
- printk(KERN_CRIT "NMI: POS 6 = 0x%x, POS 7 = 0x%x\n", pos6, pos7);
- }
-
-} /* mca_handle_nmi_slot */
-
-/*--------------------------------------------------------------------*/
-
-static int mca_handle_nmi_callback(struct device *dev, void *data)
-{
- struct mca_device *mca_dev = to_mca_device(dev);
- unsigned char pos5;
-
- pos5 = mca_device_read_pos(mca_dev, 5);
-
- if(!(pos5 & 0x80)) {
- /* Bit 7 of POS 5 is reset when this adapter has a hardware
- * error. Bit 7 it reset if there's error information
- * available in POS 6 and 7.
- */
- mca_handle_nmi_device(mca_dev, !(pos5 & 0x40));
- return 1;
- }
- return 0;
-}
-
-void mca_handle_nmi(void)
-{
- /* First try - scan the various adapters and see if a specific
- * adapter was responsible for the error.
- */
- bus_for_each_dev(&mca_bus_type, NULL, NULL, mca_handle_nmi_callback);
-
- mca_nmi_hook();
-} /* mca_handle_nmi */
diff --git a/arch/i386/kernel/microcode.c b/arch/i386/kernel/microcode.c
deleted file mode 100644
index 165f13158c6..00000000000
--- a/arch/i386/kernel/microcode.c
+++ /dev/null
@@ -1,515 +0,0 @@
-/*
- * Intel CPU Microcode Update Driver for Linux
- *
- * Copyright (C) 2000-2004 Tigran Aivazian
- *
- * This driver allows to upgrade microcode on Intel processors
- * belonging to IA-32 family - PentiumPro, Pentium II,
- * Pentium III, Xeon, Pentium 4, etc.
- *
- * Reference: Section 8.10 of Volume III, Intel Pentium 4 Manual,
- * Order Number 245472 or free download from:
- *
- * http://developer.intel.com/design/pentium4/manuals/245472.htm
- *
- * For more information, go to http://www.urbanmyth.org/microcode
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
- *
- * 1.0 16 Feb 2000, Tigran Aivazian <tigran@sco.com>
- * Initial release.
- * 1.01 18 Feb 2000, Tigran Aivazian <tigran@sco.com>
- * Added read() support + cleanups.
- * 1.02 21 Feb 2000, Tigran Aivazian <tigran@sco.com>
- * Added 'device trimming' support. open(O_WRONLY) zeroes
- * and frees the saved copy of applied microcode.
- * 1.03 29 Feb 2000, Tigran Aivazian <tigran@sco.com>
- * Made to use devfs (/dev/cpu/microcode) + cleanups.
- * 1.04 06 Jun 2000, Simon Trimmer <simon@veritas.com>
- * Added misc device support (now uses both devfs and misc).
- * Added MICROCODE_IOCFREE ioctl to clear memory.
- * 1.05 09 Jun 2000, Simon Trimmer <simon@veritas.com>
- * Messages for error cases (non Intel & no suitable microcode).
- * 1.06 03 Aug 2000, Tigran Aivazian <tigran@veritas.com>
- * Removed ->release(). Removed exclusive open and status bitmap.
- * Added microcode_rwsem to serialize read()/write()/ioctl().
- * Removed global kernel lock usage.
- * 1.07 07 Sep 2000, Tigran Aivazian <tigran@veritas.com>
- * Write 0 to 0x8B msr and then cpuid before reading revision,
- * so that it works even if there were no update done by the
- * BIOS. Otherwise, reading from 0x8B gives junk (which happened
- * to be 0 on my machine which is why it worked even when I
- * disabled update by the BIOS)
- * Thanks to Eric W. Biederman <ebiederman@lnxi.com> for the fix.
- * 1.08 11 Dec 2000, Richard Schaal <richard.schaal@intel.com> and
- * Tigran Aivazian <tigran@veritas.com>
- * Intel Pentium 4 processor support and bugfixes.
- * 1.09 30 Oct 2001, Tigran Aivazian <tigran@veritas.com>
- * Bugfix for HT (Hyper-Threading) enabled processors
- * whereby processor resources are shared by all logical processors
- * in a single CPU package.
- * 1.10 28 Feb 2002 Asit K Mallick <asit.k.mallick@intel.com> and
- * Tigran Aivazian <tigran@veritas.com>,
- * Serialize updates as required on HT processors due to speculative
- * nature of implementation.
- * 1.11 22 Mar 2002 Tigran Aivazian <tigran@veritas.com>
- * Fix the panic when writing zero-length microcode chunk.
- * 1.12 29 Sep 2003 Nitin Kamble <nitin.a.kamble@intel.com>,
- * Jun Nakajima <jun.nakajima@intel.com>
- * Support for the microcode updates in the new format.
- * 1.13 10 Oct 2003 Tigran Aivazian <tigran@veritas.com>
- * Removed ->read() method and obsoleted MICROCODE_IOCFREE ioctl
- * because we no longer hold a copy of applied microcode
- * in kernel memory.
- * 1.14 25 Jun 2004 Tigran Aivazian <tigran@veritas.com>
- * Fix sigmatch() macro to handle old CPUs with pf == 0.
- * Thanks to Stuart Swales for pointing out this bug.
- */
-
-//#define DEBUG /* pr_debug */
-#include <linux/kernel.h>
-#include <linux/init.h>
-#include <linux/sched.h>
-#include <linux/module.h>
-#include <linux/slab.h>
-#include <linux/vmalloc.h>
-#include <linux/miscdevice.h>
-#include <linux/spinlock.h>
-#include <linux/mm.h>
-
-#include <asm/msr.h>
-#include <asm/uaccess.h>
-#include <asm/processor.h>
-
-MODULE_DESCRIPTION("Intel CPU (IA-32) Microcode Update Driver");
-MODULE_AUTHOR("Tigran Aivazian <tigran@veritas.com>");
-MODULE_LICENSE("GPL");
-
-#define MICROCODE_VERSION "1.14"
-
-#define DEFAULT_UCODE_DATASIZE (2000) /* 2000 bytes */
-#define MC_HEADER_SIZE (sizeof (microcode_header_t)) /* 48 bytes */
-#define DEFAULT_UCODE_TOTALSIZE (DEFAULT_UCODE_DATASIZE + MC_HEADER_SIZE) /* 2048 bytes */
-#define EXT_HEADER_SIZE (sizeof (struct extended_sigtable)) /* 20 bytes */
-#define EXT_SIGNATURE_SIZE (sizeof (struct extended_signature)) /* 12 bytes */
-#define DWSIZE (sizeof (u32))
-#define get_totalsize(mc) \
- (((microcode_t *)mc)->hdr.totalsize ? \
- ((microcode_t *)mc)->hdr.totalsize : DEFAULT_UCODE_TOTALSIZE)
-#define get_datasize(mc) \
- (((microcode_t *)mc)->hdr.datasize ? \
- ((microcode_t *)mc)->hdr.datasize : DEFAULT_UCODE_DATASIZE)
-
-#define sigmatch(s1, s2, p1, p2) \
- (((s1) == (s2)) && (((p1) & (p2)) || (((p1) == 0) && ((p2) == 0))))
-
-#define exttable_size(et) ((et)->count * EXT_SIGNATURE_SIZE + EXT_HEADER_SIZE)
-
-/* serialize access to the physical write to MSR 0x79 */
-static DEFINE_SPINLOCK(microcode_update_lock);
-
-/* no concurrent ->write()s are allowed on /dev/cpu/microcode */
-static DECLARE_MUTEX(microcode_sem);
-
-static void __user *user_buffer; /* user area microcode data buffer */
-static unsigned int user_buffer_size; /* it's size */
-
-typedef enum mc_error_code {
- MC_SUCCESS = 0,
- MC_NOTFOUND = 1,
- MC_MARKED = 2,
- MC_ALLOCATED = 3,
-} mc_error_code_t;
-
-static struct ucode_cpu_info {
- unsigned int sig;
- unsigned int pf;
- unsigned int rev;
- unsigned int cksum;
- mc_error_code_t err;
- microcode_t *mc;
-} ucode_cpu_info[NR_CPUS];
-
-static int microcode_open (struct inode *unused1, struct file *unused2)
-{
- return capable(CAP_SYS_RAWIO) ? 0 : -EPERM;
-}
-
-static void collect_cpu_info (void *unused)
-{
- int cpu_num = smp_processor_id();
- struct cpuinfo_x86 *c = cpu_data + cpu_num;
- struct ucode_cpu_info *uci = ucode_cpu_info + cpu_num;
- unsigned int val[2];
-
- uci->sig = uci->pf = uci->rev = uci->cksum = 0;
- uci->err = MC_NOTFOUND;
- uci->mc = NULL;
-
- if (c->x86_vendor != X86_VENDOR_INTEL || c->x86 < 6 ||
- cpu_has(c, X86_FEATURE_IA64)) {
- printk(KERN_ERR "microcode: CPU%d not a capable Intel processor\n", cpu_num);
- return;
- } else {
- uci->sig = cpuid_eax(0x00000001);
-
- if ((c->x86_model >= 5) || (c->x86 > 6)) {
- /* get processor flags from MSR 0x17 */
- rdmsr(MSR_IA32_PLATFORM_ID, val[0], val[1]);
- uci->pf = 1 << ((val[1] >> 18) & 7);
- }
- }
-
- wrmsr(MSR_IA32_UCODE_REV, 0, 0);
- /* see notes above for revision 1.07. Apparent chip bug */
- serialize_cpu();
- /* get the current revision from MSR 0x8B */
- rdmsr(MSR_IA32_UCODE_REV, val[0], uci->rev);
- pr_debug("microcode: collect_cpu_info : sig=0x%x, pf=0x%x, rev=0x%x\n",
- uci->sig, uci->pf, uci->rev);
-}
-
-static inline void mark_microcode_update (int cpu_num, microcode_header_t *mc_header, int sig, int pf, int cksum)
-{
- struct ucode_cpu_info *uci = ucode_cpu_info + cpu_num;
-
- pr_debug("Microcode Found.\n");
- pr_debug(" Header Revision 0x%x\n", mc_header->hdrver);
- pr_debug(" Loader Revision 0x%x\n", mc_header->ldrver);
- pr_debug(" Revision 0x%x \n", mc_header->rev);
- pr_debug(" Date %x/%x/%x\n",
- ((mc_header->date >> 24 ) & 0xff),
- ((mc_header->date >> 16 ) & 0xff),
- (mc_header->date & 0xFFFF));
- pr_debug(" Signature 0x%x\n", sig);
- pr_debug(" Type 0x%x Family 0x%x Model 0x%x Stepping 0x%x\n",
- ((sig >> 12) & 0x3),
- ((sig >> 8) & 0xf),
- ((sig >> 4) & 0xf),
- ((sig & 0xf)));
- pr_debug(" Processor Flags 0x%x\n", pf);
- pr_debug(" Checksum 0x%x\n", cksum);
-
- if (mc_header->rev < uci->rev) {
- printk(KERN_ERR "microcode: CPU%d not 'upgrading' to earlier revision"
- " 0x%x (current=0x%x)\n", cpu_num, mc_header->rev, uci->rev);
- goto out;
- } else if (mc_header->rev == uci->rev) {
- /* notify the caller of success on this cpu */
- uci->err = MC_SUCCESS;
- printk(KERN_ERR "microcode: CPU%d already at revision"
- " 0x%x (current=0x%x)\n", cpu_num, mc_header->rev, uci->rev);
- goto out;
- }
-
- pr_debug("microcode: CPU%d found a matching microcode update with "
- " revision 0x%x (current=0x%x)\n", cpu_num, mc_header->rev, uci->rev);
- uci->cksum = cksum;
- uci->pf = pf; /* keep the original mc pf for cksum calculation */
- uci->err = MC_MARKED; /* found the match */
-out:
- return;
-}
-
-static int find_matching_ucodes (void)
-{
- int cursor = 0;
- int error = 0;
-
- while (cursor + MC_HEADER_SIZE < user_buffer_size) {
- microcode_header_t mc_header;
- void *newmc = NULL;
- int i, sum, cpu_num, allocated_flag, total_size, data_size, ext_table_size;
-
- if (copy_from_user(&mc_header, user_buffer + cursor, MC_HEADER_SIZE)) {
- printk(KERN_ERR "microcode: error! Can not read user data\n");
- error = -EFAULT;
- goto out;
- }
-
- total_size = get_totalsize(&mc_header);
- if ((cursor + total_size > user_buffer_size) || (total_size < DEFAULT_UCODE_TOTALSIZE)) {
- printk(KERN_ERR "microcode: error! Bad data in microcode data file\n");
- error = -EINVAL;
- goto out;
- }
-
- data_size = get_datasize(&mc_header);
- if ((data_size + MC_HEADER_SIZE > total_size) || (data_size < DEFAULT_UCODE_DATASIZE)) {
- printk(KERN_ERR "microcode: error! Bad data in microcode data file\n");
- error = -EINVAL;
- goto out;
- }
-
- if (mc_header.ldrver != 1 || mc_header.hdrver != 1) {
- printk(KERN_ERR "microcode: error! Unknown microcode update format\n");
- error = -EINVAL;
- goto out;
- }
-
- for (cpu_num = 0; cpu_num < num_online_cpus(); cpu_num++) {
- struct ucode_cpu_info *uci = ucode_cpu_info + cpu_num;
- if (uci->err != MC_NOTFOUND) /* already found a match or not an online cpu*/
- continue;
-
- if (sigmatch(mc_header.sig, uci->sig, mc_header.pf, uci->pf))
- mark_microcode_update(cpu_num, &mc_header, mc_header.sig, mc_header.pf, mc_header.cksum);
- }
-
- ext_table_size = total_size - (MC_HEADER_SIZE + data_size);
- if (ext_table_size) {
- struct extended_sigtable ext_header;
- struct extended_signature ext_sig;
- int ext_sigcount;
-
- if ((ext_table_size < EXT_HEADER_SIZE)
- || ((ext_table_size - EXT_HEADER_SIZE) % EXT_SIGNATURE_SIZE)) {
- printk(KERN_ERR "microcode: error! Bad data in microcode data file\n");
- error = -EINVAL;
- goto out;
- }
- if (copy_from_user(&ext_header, user_buffer + cursor
- + MC_HEADER_SIZE + data_size, EXT_HEADER_SIZE)) {
- printk(KERN_ERR "microcode: error! Can not read user data\n");
- error = -EFAULT;
- goto out;
- }
- if (ext_table_size != exttable_size(&ext_header)) {
- printk(KERN_ERR "microcode: error! Bad data in microcode data file\n");
- error = -EFAULT;
- goto out;
- }
-
- ext_sigcount = ext_header.count;
-
- for (i = 0; i < ext_sigcount; i++) {
- if (copy_from_user(&ext_sig, user_buffer + cursor + MC_HEADER_SIZE + data_size + EXT_HEADER_SIZE
- + EXT_SIGNATURE_SIZE * i, EXT_SIGNATURE_SIZE)) {
- printk(KERN_ERR "microcode: error! Can not read user data\n");
- error = -EFAULT;
- goto out;
- }
- for (cpu_num = 0; cpu_num < num_online_cpus(); cpu_num++) {
- struct ucode_cpu_info *uci = ucode_cpu_info + cpu_num;
- if (uci->err != MC_NOTFOUND) /* already found a match or not an online cpu*/
- continue;
- if (sigmatch(ext_sig.sig, uci->sig, ext_sig.pf, uci->pf)) {
- mark_microcode_update(cpu_num, &mc_header, ext_sig.sig, ext_sig.pf, ext_sig.cksum);
- }
- }
- }
- }
- /* now check if any cpu has matched */
- for (cpu_num = 0, allocated_flag = 0, sum = 0; cpu_num < num_online_cpus(); cpu_num++) {
- if (ucode_cpu_info[cpu_num].err == MC_MARKED) {
- struct ucode_cpu_info *uci = ucode_cpu_info + cpu_num;
- if (!allocated_flag) {
- allocated_flag = 1;
- newmc = vmalloc(total_size);
- if (!newmc) {
- printk(KERN_ERR "microcode: error! Can not allocate memory\n");
- error = -ENOMEM;
- goto out;
- }
- if (copy_from_user(newmc + MC_HEADER_SIZE,
- user_buffer + cursor + MC_HEADER_SIZE,
- total_size - MC_HEADER_SIZE)) {
- printk(KERN_ERR "microcode: error! Can not read user data\n");
- vfree(newmc);
- error = -EFAULT;
- goto out;
- }
- memcpy(newmc, &mc_header, MC_HEADER_SIZE);
- /* check extended table checksum */
- if (ext_table_size) {
- int ext_table_sum = 0;
- int * ext_tablep = (((void *) newmc) + MC_HEADER_SIZE + data_size);
- i = ext_table_size / DWSIZE;
- while (i--) ext_table_sum += ext_tablep[i];
- if (ext_table_sum) {
- printk(KERN_WARNING "microcode: aborting, bad extended signature table checksum\n");
- vfree(newmc);
- error = -EINVAL;
- goto out;
- }
- }
-
- /* calculate the checksum */
- i = (MC_HEADER_SIZE + data_size) / DWSIZE;
- while (i--) sum += ((int *)newmc)[i];
- sum -= (mc_header.sig + mc_header.pf + mc_header.cksum);
- }
- ucode_cpu_info[cpu_num].mc = newmc;
- ucode_cpu_info[cpu_num].err = MC_ALLOCATED; /* mc updated */
- if (sum + uci->sig + uci->pf + uci->cksum != 0) {
- printk(KERN_ERR "microcode: CPU%d aborting, bad checksum\n", cpu_num);
- error = -EINVAL;
- goto out;
- }
- }
- }
- cursor += total_size; /* goto the next update patch */
- } /* end of while */
-out:
- return error;
-}
-
-static void do_update_one (void * unused)
-{
- unsigned long flags;
- unsigned int val[2];
- int cpu_num = smp_processor_id();
- struct ucode_cpu_info *uci = ucode_cpu_info + cpu_num;
-
- if (uci->mc == NULL) {
- printk(KERN_INFO "microcode: No new microcode data for CPU%d\n", cpu_num);
- return;
- }
-
- /* serialize access to the physical write to MSR 0x79 */
- spin_lock_irqsave(&microcode_update_lock, flags);
-
- /* write microcode via MSR 0x79 */
- wrmsr(MSR_IA32_UCODE_WRITE,
- (unsigned long) uci->mc->bits,
- (unsigned long) uci->mc->bits >> 16 >> 16);
- wrmsr(MSR_IA32_UCODE_REV, 0, 0);
-
- /* see notes above for revision 1.07. Apparent chip bug */
- serialize_cpu();
-
- /* get the current revision from MSR 0x8B */
- rdmsr(MSR_IA32_UCODE_REV, val[0], val[1]);
-
- /* notify the caller of success on this cpu */
- uci->err = MC_SUCCESS;
- spin_unlock_irqrestore(&microcode_update_lock, flags);
- printk(KERN_INFO "microcode: CPU%d updated from revision "
- "0x%x to 0x%x, date = %08x \n",
- cpu_num, uci->rev, val[1], uci->mc->hdr.date);
- return;
-}
-
-static int do_microcode_update (void)
-{
- int i, error;
-
- if (on_each_cpu(collect_cpu_info, NULL, 1, 1) != 0) {
- printk(KERN_ERR "microcode: Error! Could not run on all processors\n");
- error = -EIO;
- goto out;
- }
-
- if ((error = find_matching_ucodes())) {
- printk(KERN_ERR "microcode: Error in the microcode data\n");
- goto out_free;
- }
-
- if (on_each_cpu(do_update_one, NULL, 1, 1) != 0) {
- printk(KERN_ERR "microcode: Error! Could not run on all processors\n");
- error = -EIO;
- }
-
-out_free:
- for (i = 0; i < num_online_cpus(); i++) {
- if (ucode_cpu_info[i].mc) {
- int j;
- void *tmp = ucode_cpu_info[i].mc;
- vfree(tmp);
- for (j = i; j < num_online_cpus(); j++) {
- if (ucode_cpu_info[j].mc == tmp)
- ucode_cpu_info[j].mc = NULL;
- }
- }
- }
-out:
- return error;
-}
-
-static ssize_t microcode_write (struct file *file, const char __user *buf, size_t len, loff_t *ppos)
-{
- ssize_t ret;
-
- if (len < DEFAULT_UCODE_TOTALSIZE) {
- printk(KERN_ERR "microcode: not enough data\n");
- return -EINVAL;
- }
-
- if ((len >> PAGE_SHIFT) > num_physpages) {
- printk(KERN_ERR "microcode: too much data (max %ld pages)\n", num_physpages);
- return -EINVAL;
- }
-
- down(&microcode_sem);
-
- user_buffer = (void __user *) buf;
- user_buffer_size = (int) len;
-
- ret = do_microcode_update();
- if (!ret)
- ret = (ssize_t)len;
-
- up(&microcode_sem);
-
- return ret;
-}
-
-static int microcode_ioctl (struct inode *inode, struct file *file,
- unsigned int cmd, unsigned long arg)
-{
- switch (cmd) {
- /*
- * XXX: will be removed after microcode_ctl
- * is updated to ignore failure of this ioctl()
- */
- case MICROCODE_IOCFREE:
- return 0;
- default:
- return -EINVAL;
- }
- return -EINVAL;
-}
-
-static struct file_operations microcode_fops = {
- .owner = THIS_MODULE,
- .write = microcode_write,
- .ioctl = microcode_ioctl,
- .open = microcode_open,
-};
-
-static struct miscdevice microcode_dev = {
- .minor = MICROCODE_MINOR,
- .name = "microcode",
- .devfs_name = "cpu/microcode",
- .fops = &microcode_fops,
-};
-
-static int __init microcode_init (void)
-{
- int error;
-
- error = misc_register(&microcode_dev);
- if (error) {
- printk(KERN_ERR
- "microcode: can't misc_register on minor=%d\n",
- MICROCODE_MINOR);
- return error;
- }
-
- printk(KERN_INFO
- "IA-32 Microcode Update Driver: v" MICROCODE_VERSION " <tigran@veritas.com>\n");
- return 0;
-}
-
-static void __exit microcode_exit (void)
-{
- misc_deregister(&microcode_dev);
- printk(KERN_INFO "IA-32 Microcode Update Driver v" MICROCODE_VERSION " unregistered\n");
-}
-
-module_init(microcode_init)
-module_exit(microcode_exit)
-MODULE_ALIAS_MISCDEV(MICROCODE_MINOR);
diff --git a/arch/i386/kernel/module.c b/arch/i386/kernel/module.c
deleted file mode 100644
index 5149c8a621f..00000000000
--- a/arch/i386/kernel/module.c
+++ /dev/null
@@ -1,129 +0,0 @@
-/* Kernel module help for i386.
- Copyright (C) 2001 Rusty Russell.
-
- This program is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published by
- the Free Software Foundation; either version 2 of the License, or
- (at your option) any later version.
-
- This program is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- GNU General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program; if not, write to the Free Software
- Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
-*/
-#include <linux/moduleloader.h>
-#include <linux/elf.h>
-#include <linux/vmalloc.h>
-#include <linux/fs.h>
-#include <linux/string.h>
-#include <linux/kernel.h>
-
-#if 0
-#define DEBUGP printk
-#else
-#define DEBUGP(fmt...)
-#endif
-
-void *module_alloc(unsigned long size)
-{
- if (size == 0)
- return NULL;
- return vmalloc_exec(size);
-}
-
-
-/* Free memory returned from module_alloc */
-void module_free(struct module *mod, void *module_region)
-{
- vfree(module_region);
- /* FIXME: If module_region == mod->init_region, trim exception
- table entries. */
-}
-
-/* We don't need anything special. */
-int module_frob_arch_sections(Elf_Ehdr *hdr,
- Elf_Shdr *sechdrs,
- char *secstrings,
- struct module *mod)
-{
- return 0;
-}
-
-int apply_relocate(Elf32_Shdr *sechdrs,
- const char *strtab,
- unsigned int symindex,
- unsigned int relsec,
- struct module *me)
-{
- unsigned int i;
- Elf32_Rel *rel = (void *)sechdrs[relsec].sh_addr;
- Elf32_Sym *sym;
- uint32_t *location;
-
- DEBUGP("Applying relocate section %u to %u\n", relsec,
- sechdrs[relsec].sh_info);
- for (i = 0; i < sechdrs[relsec].sh_size / sizeof(*rel); i++) {
- /* This is where to make the change */
- location = (void *)sechdrs[sechdrs[relsec].sh_info].sh_addr
- + rel[i].r_offset;
- /* This is the symbol it is referring to. Note that all
- undefined symbols have been resolved. */
- sym = (Elf32_Sym *)sechdrs[symindex].sh_addr
- + ELF32_R_SYM(rel[i].r_info);
-
- switch (ELF32_R_TYPE(rel[i].r_info)) {
- case R_386_32:
- /* We add the value into the location given */
- *location += sym->st_value;
- break;
- case R_386_PC32:
- /* Add the value, subtract its postition */
- *location += sym->st_value - (uint32_t)location;
- break;
- default:
- printk(KERN_ERR "module %s: Unknown relocation: %u\n",
- me->name, ELF32_R_TYPE(rel[i].r_info));
- return -ENOEXEC;
- }
- }
- return 0;
-}
-
-int apply_relocate_add(Elf32_Shdr *sechdrs,
- const char *strtab,
- unsigned int symindex,
- unsigned int relsec,
- struct module *me)
-{
- printk(KERN_ERR "module %s: ADD RELOCATION unsupported\n",
- me->name);
- return -ENOEXEC;
-}
-
-extern void apply_alternatives(void *start, void *end);
-
-int module_finalize(const Elf_Ehdr *hdr,
- const Elf_Shdr *sechdrs,
- struct module *me)
-{
- const Elf_Shdr *s;
- char *secstrings = (void *)hdr + sechdrs[hdr->e_shstrndx].sh_offset;
-
- /* look for .altinstructions to patch */
- for (s = sechdrs; s < sechdrs + hdr->e_shnum; s++) {
- void *seg;
- if (strcmp(".altinstructions", secstrings + s->sh_name))
- continue;
- seg = (void *)s->sh_addr;
- apply_alternatives(seg, seg + s->sh_size);
- }
- return 0;
-}
-
-void module_arch_cleanup(struct module *mod)
-{
-}
diff --git a/arch/i386/kernel/mpparse.c b/arch/i386/kernel/mpparse.c
deleted file mode 100644
index 8f767d9aa45..00000000000
--- a/arch/i386/kernel/mpparse.c
+++ /dev/null
@@ -1,1149 +0,0 @@
-/*
- * Intel Multiprocessor Specification 1.1 and 1.4
- * compliant MP-table parsing routines.
- *
- * (c) 1995 Alan Cox, Building #3 <alan@redhat.com>
- * (c) 1998, 1999, 2000 Ingo Molnar <mingo@redhat.com>
- *
- * Fixes
- * Erich Boleyn : MP v1.4 and additional changes.
- * Alan Cox : Added EBDA scanning
- * Ingo Molnar : various cleanups and rewrites
- * Maciej W. Rozycki: Bits for default MP configurations
- * Paul Diefenbaugh: Added full ACPI support
- */
-
-#include <linux/mm.h>
-#include <linux/init.h>
-#include <linux/acpi.h>
-#include <linux/delay.h>
-#include <linux/config.h>
-#include <linux/bootmem.h>
-#include <linux/smp_lock.h>
-#include <linux/kernel_stat.h>
-#include <linux/mc146818rtc.h>
-#include <linux/bitops.h>
-
-#include <asm/smp.h>
-#include <asm/acpi.h>
-#include <asm/mtrr.h>
-#include <asm/mpspec.h>
-#include <asm/io_apic.h>
-
-#include <mach_apic.h>
-#include <mach_mpparse.h>
-#include <bios_ebda.h>
-
-/* Have we found an MP table */
-int smp_found_config;
-unsigned int __initdata maxcpus = NR_CPUS;
-
-/*
- * Various Linux-internal data structures created from the
- * MP-table.
- */
-int apic_version [MAX_APICS];
-int mp_bus_id_to_type [MAX_MP_BUSSES];
-int mp_bus_id_to_node [MAX_MP_BUSSES];
-int mp_bus_id_to_local [MAX_MP_BUSSES];
-int quad_local_to_mp_bus_id [NR_CPUS/4][4];
-int mp_bus_id_to_pci_bus [MAX_MP_BUSSES] = { [0 ... MAX_MP_BUSSES-1] = -1 };
-static int mp_current_pci_id;
-
-/* I/O APIC entries */
-struct mpc_config_ioapic mp_ioapics[MAX_IO_APICS];
-
-/* # of MP IRQ source entries */
-struct mpc_config_intsrc mp_irqs[MAX_IRQ_SOURCES];
-
-/* MP IRQ source entries */
-int mp_irq_entries;
-
-int nr_ioapics;
-
-int pic_mode;
-unsigned long mp_lapic_addr;
-
-unsigned int def_to_bigsmp = 0;
-
-/* Processor that is doing the boot up */
-unsigned int boot_cpu_physical_apicid = -1U;
-/* Internal processor count */
-static unsigned int __devinitdata num_processors;
-
-/* Bitmask of physically existing CPUs */
-physid_mask_t phys_cpu_present_map;
-
-u8 bios_cpu_apicid[NR_CPUS] = { [0 ... NR_CPUS-1] = BAD_APICID };
-
-/*
- * Intel MP BIOS table parsing routines:
- */
-
-
-/*
- * Checksum an MP configuration block.
- */
-
-static int __init mpf_checksum(unsigned char *mp, int len)
-{
- int sum = 0;
-
- while (len--)
- sum += *mp++;
-
- return sum & 0xFF;
-}
-
-/*
- * Have to match translation table entries to main table entries by counter
- * hence the mpc_record variable .... can't see a less disgusting way of
- * doing this ....
- */
-
-static int mpc_record;
-static struct mpc_config_translation *translation_table[MAX_MPC_ENTRY] __initdata;
-
-#ifdef CONFIG_X86_NUMAQ
-static int MP_valid_apicid(int apicid, int version)
-{
- return hweight_long(apicid & 0xf) == 1 && (apicid >> 4) != 0xf;
-}
-#else
-static int MP_valid_apicid(int apicid, int version)
-{
- if (version >= 0x14)
- return apicid < 0xff;
- else
- return apicid < 0xf;
-}
-#endif
-
-static void __devinit MP_processor_info (struct mpc_config_processor *m)
-{
- int ver, apicid;
- physid_mask_t phys_cpu;
-
- if (!(m->mpc_cpuflag & CPU_ENABLED))
- return;
-
- apicid = mpc_apic_id(m, translation_table[mpc_record]);
-
- if (m->mpc_featureflag&(1<<0))
- Dprintk(" Floating point unit present.\n");
- if (m->mpc_featureflag&(1<<7))
- Dprintk(" Machine Exception supported.\n");
- if (m->mpc_featureflag&(1<<8))
- Dprintk(" 64 bit compare & exchange supported.\n");
- if (m->mpc_featureflag&(1<<9))
- Dprintk(" Internal APIC present.\n");
- if (m->mpc_featureflag&(1<<11))
- Dprintk(" SEP present.\n");
- if (m->mpc_featureflag&(1<<12))
- Dprintk(" MTRR present.\n");
- if (m->mpc_featureflag&(1<<13))
- Dprintk(" PGE present.\n");
- if (m->mpc_featureflag&(1<<14))
- Dprintk(" MCA present.\n");
- if (m->mpc_featureflag&(1<<15))
- Dprintk(" CMOV present.\n");
- if (m->mpc_featureflag&(1<<16))
- Dprintk(" PAT present.\n");
- if (m->mpc_featureflag&(1<<17))
- Dprintk(" PSE present.\n");
- if (m->mpc_featureflag&(1<<18))
- Dprintk(" PSN present.\n");
- if (m->mpc_featureflag&(1<<19))
- Dprintk(" Cache Line Flush Instruction present.\n");
- /* 20 Reserved */
- if (m->mpc_featureflag&(1<<21))
- Dprintk(" Debug Trace and EMON Store present.\n");
- if (m->mpc_featureflag&(1<<22))
- Dprintk(" ACPI Thermal Throttle Registers present.\n");
- if (m->mpc_featureflag&(1<<23))
- Dprintk(" MMX present.\n");
- if (m->mpc_featureflag&(1<<24))
- Dprintk(" FXSR present.\n");
- if (m->mpc_featureflag&(1<<25))
- Dprintk(" XMM present.\n");
- if (m->mpc_featureflag&(1<<26))
- Dprintk(" Willamette New Instructions present.\n");
- if (m->mpc_featureflag&(1<<27))
- Dprintk(" Self Snoop present.\n");
- if (m->mpc_featureflag&(1<<28))
- Dprintk(" HT present.\n");
- if (m->mpc_featureflag&(1<<29))
- Dprintk(" Thermal Monitor present.\n");
- /* 30, 31 Reserved */
-
-
- if (m->mpc_cpuflag & CPU_BOOTPROCESSOR) {
- Dprintk(" Bootup CPU\n");
- boot_cpu_physical_apicid = m->mpc_apicid;
- }
-
- ver = m->mpc_apicver;
-
- if (!MP_valid_apicid(apicid, ver)) {
- printk(KERN_WARNING "Processor #%d INVALID. (Max ID: %d).\n",
- m->mpc_apicid, MAX_APICS);
- return;
- }
-
- /*
- * Validate version
- */
- if (ver == 0x0) {
- printk(KERN_WARNING "BIOS bug, APIC version is 0 for CPU#%d! "
- "fixing up to 0x10. (tell your hw vendor)\n",
- m->mpc_apicid);
- ver = 0x10;
- }
- apic_version[m->mpc_apicid] = ver;
-
- phys_cpu = apicid_to_cpu_present(apicid);
- physids_or(phys_cpu_present_map, phys_cpu_present_map, phys_cpu);
-
- if (num_processors >= NR_CPUS) {
- printk(KERN_WARNING "WARNING: NR_CPUS limit of %i reached."
- " Processor ignored.\n", NR_CPUS);
- return;
- }
-
- if (num_processors >= maxcpus) {
- printk(KERN_WARNING "WARNING: maxcpus limit of %i reached."
- " Processor ignored.\n", maxcpus);
- return;
- }
-
- cpu_set(num_processors, cpu_possible_map);
- num_processors++;
-
- if ((num_processors > 8) &&
- APIC_XAPIC(ver) &&
- (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL))
- def_to_bigsmp = 1;
- else
- def_to_bigsmp = 0;
-
- bios_cpu_apicid[num_processors - 1] = m->mpc_apicid;
-}
-
-static void __init MP_bus_info (struct mpc_config_bus *m)
-{
- char str[7];
-
- memcpy(str, m->mpc_bustype, 6);
- str[6] = 0;
-
- mpc_oem_bus_info(m, str, translation_table[mpc_record]);
-
- if (strncmp(str, BUSTYPE_ISA, sizeof(BUSTYPE_ISA)-1) == 0) {
- mp_bus_id_to_type[m->mpc_busid] = MP_BUS_ISA;
- } else if (strncmp(str, BUSTYPE_EISA, sizeof(BUSTYPE_EISA)-1) == 0) {
- mp_bus_id_to_type[m->mpc_busid] = MP_BUS_EISA;
- } else if (strncmp(str, BUSTYPE_PCI, sizeof(BUSTYPE_PCI)-1) == 0) {
- mpc_oem_pci_bus(m, translation_table[mpc_record]);
- mp_bus_id_to_type[m->mpc_busid] = MP_BUS_PCI;
- mp_bus_id_to_pci_bus[m->mpc_busid] = mp_current_pci_id;
- mp_current_pci_id++;
- } else if (strncmp(str, BUSTYPE_MCA, sizeof(BUSTYPE_MCA)-1) == 0) {
- mp_bus_id_to_type[m->mpc_busid] = MP_BUS_MCA;
- } else if (strncmp(str, BUSTYPE_NEC98, sizeof(BUSTYPE_NEC98)-1) == 0) {
- mp_bus_id_to_type[m->mpc_busid] = MP_BUS_NEC98;
- } else {
- printk(KERN_WARNING "Unknown bustype %s - ignoring\n", str);
- }
-}
-
-static void __init MP_ioapic_info (struct mpc_config_ioapic *m)
-{
- if (!(m->mpc_flags & MPC_APIC_USABLE))
- return;
-
- printk(KERN_INFO "I/O APIC #%d Version %d at 0x%lX.\n",
- m->mpc_apicid, m->mpc_apicver, m->mpc_apicaddr);
- if (nr_ioapics >= MAX_IO_APICS) {
- printk(KERN_CRIT "Max # of I/O APICs (%d) exceeded (found %d).\n",
- MAX_IO_APICS, nr_ioapics);
- panic("Recompile kernel with bigger MAX_IO_APICS!.\n");
- }
- if (!m->mpc_apicaddr) {
- printk(KERN_ERR "WARNING: bogus zero I/O APIC address"
- " found in MP table, skipping!\n");
- return;
- }
- mp_ioapics[nr_ioapics] = *m;
- nr_ioapics++;
-}
-
-static void __init MP_intsrc_info (struct mpc_config_intsrc *m)
-{
- mp_irqs [mp_irq_entries] = *m;
- Dprintk("Int: type %d, pol %d, trig %d, bus %d,"
- " IRQ %02x, APIC ID %x, APIC INT %02x\n",
- m->mpc_irqtype, m->mpc_irqflag & 3,
- (m->mpc_irqflag >> 2) & 3, m->mpc_srcbus,
- m->mpc_srcbusirq, m->mpc_dstapic, m->mpc_dstirq);
- if (++mp_irq_entries == MAX_IRQ_SOURCES)
- panic("Max # of irq sources exceeded!!\n");
-}
-
-static void __init MP_lintsrc_info (struct mpc_config_lintsrc *m)
-{
- Dprintk("Lint: type %d, pol %d, trig %d, bus %d,"
- " IRQ %02x, APIC ID %x, APIC LINT %02x\n",
- m->mpc_irqtype, m->mpc_irqflag & 3,
- (m->mpc_irqflag >> 2) &3, m->mpc_srcbusid,
- m->mpc_srcbusirq, m->mpc_destapic, m->mpc_destapiclint);
- /*
- * Well it seems all SMP boards in existence
- * use ExtINT/LVT1 == LINT0 and
- * NMI/LVT2 == LINT1 - the following check
- * will show us if this assumptions is false.
- * Until then we do not have to add baggage.
- */
- if ((m->mpc_irqtype == mp_ExtINT) &&
- (m->mpc_destapiclint != 0))
- BUG();
- if ((m->mpc_irqtype == mp_NMI) &&
- (m->mpc_destapiclint != 1))
- BUG();
-}
-
-#ifdef CONFIG_X86_NUMAQ
-static void __init MP_translation_info (struct mpc_config_translation *m)
-{
- printk(KERN_INFO "Translation: record %d, type %d, quad %d, global %d, local %d\n", mpc_record, m->trans_type, m->trans_quad, m->trans_global, m->trans_local);
-
- if (mpc_record >= MAX_MPC_ENTRY)
- printk(KERN_ERR "MAX_MPC_ENTRY exceeded!\n");
- else
- translation_table[mpc_record] = m; /* stash this for later */
- if (m->trans_quad < MAX_NUMNODES && !node_online(m->trans_quad))
- node_set_online(m->trans_quad);
-}
-
-/*
- * Read/parse the MPC oem tables
- */
-
-static void __init smp_read_mpc_oem(struct mp_config_oemtable *oemtable, \
- unsigned short oemsize)
-{
- int count = sizeof (*oemtable); /* the header size */
- unsigned char *oemptr = ((unsigned char *)oemtable)+count;
-
- mpc_record = 0;
- printk(KERN_INFO "Found an OEM MPC table at %8p - parsing it ... \n", oemtable);
- if (memcmp(oemtable->oem_signature,MPC_OEM_SIGNATURE,4))
- {
- printk(KERN_WARNING "SMP mpc oemtable: bad signature [%c%c%c%c]!\n",
- oemtable->oem_signature[0],
- oemtable->oem_signature[1],
- oemtable->oem_signature[2],
- oemtable->oem_signature[3]);
- return;
- }
- if (mpf_checksum((unsigned char *)oemtable,oemtable->oem_length))
- {
- printk(KERN_WARNING "SMP oem mptable: checksum error!\n");
- return;
- }
- while (count < oemtable->oem_length) {
- switch (*oemptr) {
- case MP_TRANSLATION:
- {
- struct mpc_config_translation *m=
- (struct mpc_config_translation *)oemptr;
- MP_translation_info(m);
- oemptr += sizeof(*m);
- count += sizeof(*m);
- ++mpc_record;
- break;
- }
- default:
- {
- printk(KERN_WARNING "Unrecognised OEM table entry type! - %d\n", (int) *oemptr);
- return;
- }
- }
- }
-}
-
-static inline void mps_oem_check(struct mp_config_table *mpc, char *oem,
- char *productid)
-{
- if (strncmp(oem, "IBM NUMA", 8))
- printk("Warning! May not be a NUMA-Q system!\n");
- if (mpc->mpc_oemptr)
- smp_read_mpc_oem((struct mp_config_oemtable *) mpc->mpc_oemptr,
- mpc->mpc_oemsize);
-}
-#endif /* CONFIG_X86_NUMAQ */
-
-/*
- * Read/parse the MPC
- */
-
-static int __init smp_read_mpc(struct mp_config_table *mpc)
-{
- char str[16];
- char oem[10];
- int count=sizeof(*mpc);
- unsigned char *mpt=((unsigned char *)mpc)+count;
-
- if (memcmp(mpc->mpc_signature,MPC_SIGNATURE,4)) {
- printk(KERN_ERR "SMP mptable: bad signature [0x%x]!\n",
- *(u32 *)mpc->mpc_signature);
- return 0;
- }
- if (mpf_checksum((unsigned char *)mpc,mpc->mpc_length)) {
- printk(KERN_ERR "SMP mptable: checksum error!\n");
- return 0;
- }
- if (mpc->mpc_spec!=0x01 && mpc->mpc_spec!=0x04) {
- printk(KERN_ERR "SMP mptable: bad table version (%d)!!\n",
- mpc->mpc_spec);
- return 0;
- }
- if (!mpc->mpc_lapic) {
- printk(KERN_ERR "SMP mptable: null local APIC address!\n");
- return 0;
- }
- memcpy(oem,mpc->mpc_oem,8);
- oem[8]=0;
- printk(KERN_INFO "OEM ID: %s ",oem);
-
- memcpy(str,mpc->mpc_productid,12);
- str[12]=0;
- printk("Product ID: %s ",str);
-
- mps_oem_check(mpc, oem, str);
-
- printk("APIC at: 0x%lX\n",mpc->mpc_lapic);
-
- /*
- * Save the local APIC address (it might be non-default) -- but only
- * if we're not using ACPI.
- */
- if (!acpi_lapic)
- mp_lapic_addr = mpc->mpc_lapic;
-
- /*
- * Now process the configuration blocks.
- */
- mpc_record = 0;
- while (count < mpc->mpc_length) {
- switch(*mpt) {
- case MP_PROCESSOR:
- {
- struct mpc_config_processor *m=
- (struct mpc_config_processor *)mpt;
- /* ACPI may have already provided this data */
- if (!acpi_lapic)
- MP_processor_info(m);
- mpt += sizeof(*m);
- count += sizeof(*m);
- break;
- }
- case MP_BUS:
- {
- struct mpc_config_bus *m=
- (struct mpc_config_bus *)mpt;
- MP_bus_info(m);
- mpt += sizeof(*m);
- count += sizeof(*m);
- break;
- }
- case MP_IOAPIC:
- {
- struct mpc_config_ioapic *m=
- (struct mpc_config_ioapic *)mpt;
- MP_ioapic_info(m);
- mpt+=sizeof(*m);
- count+=sizeof(*m);
- break;
- }
- case MP_INTSRC:
- {
- struct mpc_config_intsrc *m=
- (struct mpc_config_intsrc *)mpt;
-
- MP_intsrc_info(m);
- mpt+=sizeof(*m);
- count+=sizeof(*m);
- break;
- }
- case MP_LINTSRC:
- {
- struct mpc_config_lintsrc *m=
- (struct mpc_config_lintsrc *)mpt;
- MP_lintsrc_info(m);
- mpt+=sizeof(*m);
- count+=sizeof(*m);
- break;
- }
- default:
- {
- count = mpc->mpc_length;
- break;
- }
- }
- ++mpc_record;
- }
- clustered_apic_check();
- if (!num_processors)
- printk(KERN_ERR "SMP mptable: no processors registered!\n");
- return num_processors;
-}
-
-static int __init ELCR_trigger(unsigned int irq)
-{
- unsigned int port;
-
- port = 0x4d0 + (irq >> 3);
- return (inb(port) >> (irq & 7)) & 1;
-}
-
-static void __init construct_default_ioirq_mptable(int mpc_default_type)
-{
- struct mpc_config_intsrc intsrc;
- int i;
- int ELCR_fallback = 0;
-
- intsrc.mpc_type = MP_INTSRC;
- intsrc.mpc_irqflag = 0; /* conforming */
- intsrc.mpc_srcbus = 0;
- intsrc.mpc_dstapic = mp_ioapics[0].mpc_apicid;
-
- intsrc.mpc_irqtype = mp_INT;
-
- /*
- * If true, we have an ISA/PCI system with no IRQ entries
- * in the MP table. To prevent the PCI interrupts from being set up
- * incorrectly, we try to use the ELCR. The sanity check to see if
- * there is good ELCR data is very simple - IRQ0, 1, 2 and 13 can
- * never be level sensitive, so we simply see if the ELCR agrees.
- * If it does, we assume it's valid.
- */
- if (mpc_default_type == 5) {
- printk(KERN_INFO "ISA/PCI bus type with no IRQ information... falling back to ELCR\n");
-
- if (ELCR_trigger(0) || ELCR_trigger(1) || ELCR_trigger(2) || ELCR_trigger(13))
- printk(KERN_WARNING "ELCR contains invalid data... not using ELCR\n");
- else {
- printk(KERN_INFO "Using ELCR to identify PCI interrupts\n");
- ELCR_fallback = 1;
- }
- }
-
- for (i = 0; i < 16; i++) {
- switch (mpc_default_type) {
- case 2:
- if (i == 0 || i == 13)
- continue; /* IRQ0 & IRQ13 not connected */
- /* fall through */
- default:
- if (i == 2)
- continue; /* IRQ2 is never connected */
- }
-
- if (ELCR_fallback) {
- /*
- * If the ELCR indicates a level-sensitive interrupt, we
- * copy that information over to the MP table in the
- * irqflag field (level sensitive, active high polarity).
- */
- if (ELCR_trigger(i))
- intsrc.mpc_irqflag = 13;
- else
- intsrc.mpc_irqflag = 0;
- }
-
- intsrc.mpc_srcbusirq = i;
- intsrc.mpc_dstirq = i ? i : 2; /* IRQ0 to INTIN2 */
- MP_intsrc_info(&intsrc);
- }
-
- intsrc.mpc_irqtype = mp_ExtINT;
- intsrc.mpc_srcbusirq = 0;
- intsrc.mpc_dstirq = 0; /* 8259A to INTIN0 */
- MP_intsrc_info(&intsrc);
-}
-
-static inline void __init construct_default_ISA_mptable(int mpc_default_type)
-{
- struct mpc_config_processor processor;
- struct mpc_config_bus bus;
- struct mpc_config_ioapic ioapic;
- struct mpc_config_lintsrc lintsrc;
- int linttypes[2] = { mp_ExtINT, mp_NMI };
- int i;
-
- /*
- * local APIC has default address
- */
- mp_lapic_addr = APIC_DEFAULT_PHYS_BASE;
-
- /*
- * 2 CPUs, numbered 0 & 1.
- */
- processor.mpc_type = MP_PROCESSOR;
- /* Either an integrated APIC or a discrete 82489DX. */
- processor.mpc_apicver = mpc_default_type > 4 ? 0x10 : 0x01;
- processor.mpc_cpuflag = CPU_ENABLED;
- processor.mpc_cpufeature = (boot_cpu_data.x86 << 8) |
- (boot_cpu_data.x86_model << 4) |
- boot_cpu_data.x86_mask;
- processor.mpc_featureflag = boot_cpu_data.x86_capability[0];
- processor.mpc_reserved[0] = 0;
- processor.mpc_reserved[1] = 0;
- for (i = 0; i < 2; i++) {
- processor.mpc_apicid = i;
- MP_processor_info(&processor);
- }
-
- bus.mpc_type = MP_BUS;
- bus.mpc_busid = 0;
- switch (mpc_default_type) {
- default:
- printk("???\n");
- printk(KERN_ERR "Unknown standard configuration %d\n",
- mpc_default_type);
- /* fall through */
- case 1:
- case 5:
- memcpy(bus.mpc_bustype, "ISA ", 6);
- break;
- case 2:
- case 6:
- case 3:
- memcpy(bus.mpc_bustype, "EISA ", 6);
- break;
- case 4:
- case 7:
- memcpy(bus.mpc_bustype, "MCA ", 6);
- }
- MP_bus_info(&bus);
- if (mpc_default_type > 4) {
- bus.mpc_busid = 1;
- memcpy(bus.mpc_bustype, "PCI ", 6);
- MP_bus_info(&bus);
- }
-
- ioapic.mpc_type = MP_IOAPIC;
- ioapic.mpc_apicid = 2;
- ioapic.mpc_apicver = mpc_default_type > 4 ? 0x10 : 0x01;
- ioapic.mpc_flags = MPC_APIC_USABLE;
- ioapic.mpc_apicaddr = 0xFEC00000;
- MP_ioapic_info(&ioapic);
-
- /*
- * We set up most of the low 16 IO-APIC pins according to MPS rules.
- */
- construct_default_ioirq_mptable(mpc_default_type);
-
- lintsrc.mpc_type = MP_LINTSRC;
- lintsrc.mpc_irqflag = 0; /* conforming */
- lintsrc.mpc_srcbusid = 0;
- lintsrc.mpc_srcbusirq = 0;
- lintsrc.mpc_destapic = MP_APIC_ALL;
- for (i = 0; i < 2; i++) {
- lintsrc.mpc_irqtype = linttypes[i];
- lintsrc.mpc_destapiclint = i;
- MP_lintsrc_info(&lintsrc);
- }
-}
-
-static struct intel_mp_floating *mpf_found;
-
-/*
- * Scan the memory blocks for an SMP configuration block.
- */
-void __init get_smp_config (void)
-{
- struct intel_mp_floating *mpf = mpf_found;
-
- /*
- * ACPI supports both logical (e.g. Hyper-Threading) and physical
- * processors, where MPS only supports physical.
- */
- if (acpi_lapic && acpi_ioapic) {
- printk(KERN_INFO "Using ACPI (MADT) for SMP configuration information\n");
- return;
- }
- else if (acpi_lapic)
- printk(KERN_INFO "Using ACPI for processor (LAPIC) configuration information\n");
-
- printk(KERN_INFO "Intel MultiProcessor Specification v1.%d\n", mpf->mpf_specification);
- if (mpf->mpf_feature2 & (1<<7)) {
- printk(KERN_INFO " IMCR and PIC compatibility mode.\n");
- pic_mode = 1;
- } else {
- printk(KERN_INFO " Virtual Wire compatibility mode.\n");
- pic_mode = 0;
- }
-
- /*
- * Now see if we need to read further.
- */
- if (mpf->mpf_feature1 != 0) {
-
- printk(KERN_INFO "Default MP configuration #%d\n", mpf->mpf_feature1);
- construct_default_ISA_mptable(mpf->mpf_feature1);
-
- } else if (mpf->mpf_physptr) {
-
- /*
- * Read the physical hardware table. Anything here will
- * override the defaults.
- */
- if (!smp_read_mpc((void *)mpf->mpf_physptr)) {
- smp_found_config = 0;
- printk(KERN_ERR "BIOS bug, MP table errors detected!...\n");
- printk(KERN_ERR "... disabling SMP support. (tell your hw vendor)\n");
- return;
- }
- /*
- * If there are no explicit MP IRQ entries, then we are
- * broken. We set up most of the low 16 IO-APIC pins to
- * ISA defaults and hope it will work.
- */
- if (!mp_irq_entries) {
- struct mpc_config_bus bus;
-
- printk(KERN_ERR "BIOS bug, no explicit IRQ entries, using default mptable. (tell your hw vendor)\n");
-
- bus.mpc_type = MP_BUS;
- bus.mpc_busid = 0;
- memcpy(bus.mpc_bustype, "ISA ", 6);
- MP_bus_info(&bus);
-
- construct_default_ioirq_mptable(0);
- }
-
- } else
- BUG();
-
- printk(KERN_INFO "Processors: %d\n", num_processors);
- /*
- * Only use the first configuration found.
- */
-}
-
-static int __init smp_scan_config (unsigned long base, unsigned long length)
-{
- unsigned long *bp = phys_to_virt(base);
- struct intel_mp_floating *mpf;
-
- Dprintk("Scan SMP from %p for %ld bytes.\n", bp,length);
- if (sizeof(*mpf) != 16)
- printk("Error: MPF size\n");
-
- while (length > 0) {
- mpf = (struct intel_mp_floating *)bp;
- if ((*bp == SMP_MAGIC_IDENT) &&
- (mpf->mpf_length == 1) &&
- !mpf_checksum((unsigned char *)bp, 16) &&
- ((mpf->mpf_specification == 1)
- || (mpf->mpf_specification == 4)) ) {
-
- smp_found_config = 1;
- printk(KERN_INFO "found SMP MP-table at %08lx\n",
- virt_to_phys(mpf));
- reserve_bootmem(virt_to_phys(mpf), PAGE_SIZE);
- if (mpf->mpf_physptr) {
- /*
- * We cannot access to MPC table to compute
- * table size yet, as only few megabytes from
- * the bottom is mapped now.
- * PC-9800's MPC table places on the very last
- * of physical memory; so that simply reserving
- * PAGE_SIZE from mpg->mpf_physptr yields BUG()
- * in reserve_bootmem.
- */
- unsigned long size = PAGE_SIZE;
- unsigned long end = max_low_pfn * PAGE_SIZE;
- if (mpf->mpf_physptr + size > end)
- size = end - mpf->mpf_physptr;
- reserve_bootmem(mpf->mpf_physptr, size);
- }
-
- mpf_found = mpf;
- return 1;
- }
- bp += 4;
- length -= 16;
- }
- return 0;
-}
-
-void __init find_smp_config (void)
-{
- unsigned int address;
-
- /*
- * FIXME: Linux assumes you have 640K of base ram..
- * this continues the error...
- *
- * 1) Scan the bottom 1K for a signature
- * 2) Scan the top 1K of base RAM
- * 3) Scan the 64K of bios
- */
- if (smp_scan_config(0x0,0x400) ||
- smp_scan_config(639*0x400,0x400) ||
- smp_scan_config(0xF0000,0x10000))
- return;
- /*
- * If it is an SMP machine we should know now, unless the
- * configuration is in an EISA/MCA bus machine with an
- * extended bios data area.
- *
- * there is a real-mode segmented pointer pointing to the
- * 4K EBDA area at 0x40E, calculate and scan it here.
- *
- * NOTE! There are Linux loaders that will corrupt the EBDA
- * area, and as such this kind of SMP config may be less
- * trustworthy, simply because the SMP table may have been
- * stomped on during early boot. These loaders are buggy and
- * should be fixed.
- *
- * MP1.4 SPEC states to only scan first 1K of 4K EBDA.
- */
-
- address = get_bios_ebda();
- if (address)
- smp_scan_config(address, 0x400);
-}
-
-/* --------------------------------------------------------------------------
- ACPI-based MP Configuration
- -------------------------------------------------------------------------- */
-
-#ifdef CONFIG_ACPI
-
-void __init mp_register_lapic_address (
- u64 address)
-{
- mp_lapic_addr = (unsigned long) address;
-
- set_fixmap_nocache(FIX_APIC_BASE, mp_lapic_addr);
-
- if (boot_cpu_physical_apicid == -1U)
- boot_cpu_physical_apicid = GET_APIC_ID(apic_read(APIC_ID));
-
- Dprintk("Boot CPU = %d\n", boot_cpu_physical_apicid);
-}
-
-
-void __devinit mp_register_lapic (
- u8 id,
- u8 enabled)
-{
- struct mpc_config_processor processor;
- int boot_cpu = 0;
-
- if (MAX_APICS - id <= 0) {
- printk(KERN_WARNING "Processor #%d invalid (max %d)\n",
- id, MAX_APICS);
- return;
- }
-
- if (id == boot_cpu_physical_apicid)
- boot_cpu = 1;
-
- processor.mpc_type = MP_PROCESSOR;
- processor.mpc_apicid = id;
- processor.mpc_apicver = GET_APIC_VERSION(apic_read(APIC_LVR));
- processor.mpc_cpuflag = (enabled ? CPU_ENABLED : 0);
- processor.mpc_cpuflag |= (boot_cpu ? CPU_BOOTPROCESSOR : 0);
- processor.mpc_cpufeature = (boot_cpu_data.x86 << 8) |
- (boot_cpu_data.x86_model << 4) | boot_cpu_data.x86_mask;
- processor.mpc_featureflag = boot_cpu_data.x86_capability[0];
- processor.mpc_reserved[0] = 0;
- processor.mpc_reserved[1] = 0;
-
- MP_processor_info(&processor);
-}
-
-#ifdef CONFIG_X86_IO_APIC
-
-#define MP_ISA_BUS 0
-#define MP_MAX_IOAPIC_PIN 127
-
-static struct mp_ioapic_routing {
- int apic_id;
- int gsi_base;
- int gsi_end;
- u32 pin_programmed[4];
-} mp_ioapic_routing[MAX_IO_APICS];
-
-
-static int mp_find_ioapic (
- int gsi)
-{
- int i = 0;
-
- /* Find the IOAPIC that manages this GSI. */
- for (i = 0; i < nr_ioapics; i++) {
- if ((gsi >= mp_ioapic_routing[i].gsi_base)
- && (gsi <= mp_ioapic_routing[i].gsi_end))
- return i;
- }
-
- printk(KERN_ERR "ERROR: Unable to locate IOAPIC for GSI %d\n", gsi);
-
- return -1;
-}
-
-
-void __init mp_register_ioapic (
- u8 id,
- u32 address,
- u32 gsi_base)
-{
- int idx = 0;
-
- if (nr_ioapics >= MAX_IO_APICS) {
- printk(KERN_ERR "ERROR: Max # of I/O APICs (%d) exceeded "
- "(found %d)\n", MAX_IO_APICS, nr_ioapics);
- panic("Recompile kernel with bigger MAX_IO_APICS!\n");
- }
- if (!address) {
- printk(KERN_ERR "WARNING: Bogus (zero) I/O APIC address"
- " found in MADT table, skipping!\n");
- return;
- }
-
- idx = nr_ioapics++;
-
- mp_ioapics[idx].mpc_type = MP_IOAPIC;
- mp_ioapics[idx].mpc_flags = MPC_APIC_USABLE;
- mp_ioapics[idx].mpc_apicaddr = address;
-
- set_fixmap_nocache(FIX_IO_APIC_BASE_0 + idx, address);
- if ((boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) && (boot_cpu_data.x86 < 15))
- mp_ioapics[idx].mpc_apicid = io_apic_get_unique_id(idx, id);
- else
- mp_ioapics[idx].mpc_apicid = id;
- mp_ioapics[idx].mpc_apicver = io_apic_get_version(idx);
-
- /*
- * Build basic GSI lookup table to facilitate gsi->io_apic lookups
- * and to prevent reprogramming of IOAPIC pins (PCI GSIs).
- */
- mp_ioapic_routing[idx].apic_id = mp_ioapics[idx].mpc_apicid;
- mp_ioapic_routing[idx].gsi_base = gsi_base;
- mp_ioapic_routing[idx].gsi_end = gsi_base +
- io_apic_get_redir_entries(idx);
-
- printk("IOAPIC[%d]: apic_id %d, version %d, address 0x%lx, "
- "GSI %d-%d\n", idx, mp_ioapics[idx].mpc_apicid,
- mp_ioapics[idx].mpc_apicver, mp_ioapics[idx].mpc_apicaddr,
- mp_ioapic_routing[idx].gsi_base,
- mp_ioapic_routing[idx].gsi_end);
-
- return;
-}
-
-
-void __init mp_override_legacy_irq (
- u8 bus_irq,
- u8 polarity,
- u8 trigger,
- u32 gsi)
-{
- struct mpc_config_intsrc intsrc;
- int ioapic = -1;
- int pin = -1;
-
- /*
- * Convert 'gsi' to 'ioapic.pin'.
- */
- ioapic = mp_find_ioapic(gsi);
- if (ioapic < 0)
- return;
- pin = gsi - mp_ioapic_routing[ioapic].gsi_base;
-
- /*
- * TBD: This check is for faulty timer entries, where the override
- * erroneously sets the trigger to level, resulting in a HUGE
- * increase of timer interrupts!
- */
- if ((bus_irq == 0) && (trigger == 3))
- trigger = 1;
-
- intsrc.mpc_type = MP_INTSRC;
- intsrc.mpc_irqtype = mp_INT;
- intsrc.mpc_irqflag = (trigger << 2) | polarity;
- intsrc.mpc_srcbus = MP_ISA_BUS;
- intsrc.mpc_srcbusirq = bus_irq; /* IRQ */
- intsrc.mpc_dstapic = mp_ioapics[ioapic].mpc_apicid; /* APIC ID */
- intsrc.mpc_dstirq = pin; /* INTIN# */
-
- Dprintk("Int: type %d, pol %d, trig %d, bus %d, irq %d, %d-%d\n",
- intsrc.mpc_irqtype, intsrc.mpc_irqflag & 3,
- (intsrc.mpc_irqflag >> 2) & 3, intsrc.mpc_srcbus,
- intsrc.mpc_srcbusirq, intsrc.mpc_dstapic, intsrc.mpc_dstirq);
-
- mp_irqs[mp_irq_entries] = intsrc;
- if (++mp_irq_entries == MAX_IRQ_SOURCES)
- panic("Max # of irq sources exceeded!\n");
-
- return;
-}
-
-int es7000_plat;
-
-void __init mp_config_acpi_legacy_irqs (void)
-{
- struct mpc_config_intsrc intsrc;
- int i = 0;
- int ioapic = -1;
-
- /*
- * Fabricate the legacy ISA bus (bus #31).
- */
- mp_bus_id_to_type[MP_ISA_BUS] = MP_BUS_ISA;
- Dprintk("Bus #%d is ISA\n", MP_ISA_BUS);
-
- /*
- * Older generations of ES7000 have no legacy identity mappings
- */
- if (es7000_plat == 1)
- return;
-
- /*
- * Locate the IOAPIC that manages the ISA IRQs (0-15).
- */
- ioapic = mp_find_ioapic(0);
- if (ioapic < 0)
- return;
-
- intsrc.mpc_type = MP_INTSRC;
- intsrc.mpc_irqflag = 0; /* Conforming */
- intsrc.mpc_srcbus = MP_ISA_BUS;
- intsrc.mpc_dstapic = mp_ioapics[ioapic].mpc_apicid;
-
- /*
- * Use the default configuration for the IRQs 0-15. Unless
- * overriden by (MADT) interrupt source override entries.
- */
- for (i = 0; i < 16; i++) {
- int idx;
-
- for (idx = 0; idx < mp_irq_entries; idx++) {
- struct mpc_config_intsrc *irq = mp_irqs + idx;
-
- /* Do we already have a mapping for this ISA IRQ? */
- if (irq->mpc_srcbus == MP_ISA_BUS && irq->mpc_srcbusirq == i)
- break;
-
- /* Do we already have a mapping for this IOAPIC pin */
- if ((irq->mpc_dstapic == intsrc.mpc_dstapic) &&
- (irq->mpc_dstirq == i))
- break;
- }
-
- if (idx != mp_irq_entries) {
- printk(KERN_DEBUG "ACPI: IRQ%d used by override.\n", i);
- continue; /* IRQ already used */
- }
-
- intsrc.mpc_irqtype = mp_INT;
- intsrc.mpc_srcbusirq = i; /* Identity mapped */
- intsrc.mpc_dstirq = i;
-
- Dprintk("Int: type %d, pol %d, trig %d, bus %d, irq %d, "
- "%d-%d\n", intsrc.mpc_irqtype, intsrc.mpc_irqflag & 3,
- (intsrc.mpc_irqflag >> 2) & 3, intsrc.mpc_srcbus,
- intsrc.mpc_srcbusirq, intsrc.mpc_dstapic,
- intsrc.mpc_dstirq);
-
- mp_irqs[mp_irq_entries] = intsrc;
- if (++mp_irq_entries == MAX_IRQ_SOURCES)
- panic("Max # of irq sources exceeded!\n");
- }
-}
-
-#define MAX_GSI_NUM 4096
-
-int mp_register_gsi (u32 gsi, int edge_level, int active_high_low)
-{
- int ioapic = -1;
- int ioapic_pin = 0;
- int idx, bit = 0;
- static int pci_irq = 16;
- /*
- * Mapping between Global System Interrups, which
- * represent all possible interrupts, and IRQs
- * assigned to actual devices.
- */
- static int gsi_to_irq[MAX_GSI_NUM];
-
- /* Don't set up the ACPI SCI because it's already set up */
- if (acpi_fadt.sci_int == gsi)
- return gsi;
-
- ioapic = mp_find_ioapic(gsi);
- if (ioapic < 0) {
- printk(KERN_WARNING "No IOAPIC for GSI %u\n", gsi);
- return gsi;
- }
-
- ioapic_pin = gsi - mp_ioapic_routing[ioapic].gsi_base;
-
- if (ioapic_renumber_irq)
- gsi = ioapic_renumber_irq(ioapic, gsi);
-
- /*
- * Avoid pin reprogramming. PRTs typically include entries
- * with redundant pin->gsi mappings (but unique PCI devices);
- * we only program the IOAPIC on the first.
- */
- bit = ioapic_pin % 32;
- idx = (ioapic_pin < 32) ? 0 : (ioapic_pin / 32);
- if (idx > 3) {
- printk(KERN_ERR "Invalid reference to IOAPIC pin "
- "%d-%d\n", mp_ioapic_routing[ioapic].apic_id,
- ioapic_pin);
- return gsi;
- }
- if ((1<<bit) & mp_ioapic_routing[ioapic].pin_programmed[idx]) {
- Dprintk(KERN_DEBUG "Pin %d-%d already programmed\n",
- mp_ioapic_routing[ioapic].apic_id, ioapic_pin);
- return gsi_to_irq[gsi];
- }
-
- mp_ioapic_routing[ioapic].pin_programmed[idx] |= (1<<bit);
-
- if (edge_level) {
- /*
- * For PCI devices assign IRQs in order, avoiding gaps
- * due to unused I/O APIC pins.
- */
- int irq = gsi;
- if (gsi < MAX_GSI_NUM) {
- if (gsi > 15)
- gsi = pci_irq++;
- /*
- * Don't assign IRQ used by ACPI SCI
- */
- if (gsi == acpi_fadt.sci_int)
- gsi = pci_irq++;
- gsi_to_irq[irq] = gsi;
- } else {
- printk(KERN_ERR "GSI %u is too high\n", gsi);
- return gsi;
- }
- }
-
- io_apic_set_pci_routing(ioapic, ioapic_pin, gsi,
- edge_level == ACPI_EDGE_SENSITIVE ? 0 : 1,
- active_high_low == ACPI_ACTIVE_HIGH ? 0 : 1);
- return gsi;
-}
-
-#endif /* CONFIG_X86_IO_APIC */
-#endif /* CONFIG_ACPI */
diff --git a/arch/i386/kernel/msr.c b/arch/i386/kernel/msr.c
deleted file mode 100644
index 44470fea430..00000000000
--- a/arch/i386/kernel/msr.c
+++ /dev/null
@@ -1,327 +0,0 @@
-/* ----------------------------------------------------------------------- *
- *
- * Copyright 2000 H. Peter Anvin - All Rights Reserved
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation, Inc., 675 Mass Ave, Cambridge MA 02139,
- * USA; either version 2 of the License, or (at your option) any later
- * version; incorporated herein by reference.
- *
- * ----------------------------------------------------------------------- */
-
-/*
- * msr.c
- *
- * x86 MSR access device
- *
- * This device is accessed by lseek() to the appropriate register number
- * and then read/write in chunks of 8 bytes. A larger size means multiple
- * reads or writes of the same register.
- *
- * This driver uses /dev/cpu/%d/msr where %d is the minor number, and on
- * an SMP box will direct the access to CPU %d.
- */
-
-#include <linux/module.h>
-#include <linux/config.h>
-
-#include <linux/types.h>
-#include <linux/errno.h>
-#include <linux/fcntl.h>
-#include <linux/init.h>
-#include <linux/poll.h>
-#include <linux/smp.h>
-#include <linux/smp_lock.h>
-#include <linux/major.h>
-#include <linux/fs.h>
-#include <linux/device.h>
-#include <linux/cpu.h>
-#include <linux/notifier.h>
-
-#include <asm/processor.h>
-#include <asm/msr.h>
-#include <asm/uaccess.h>
-#include <asm/system.h>
-
-static struct class *msr_class;
-
-static inline int wrmsr_eio(u32 reg, u32 eax, u32 edx)
-{
- int err;
-
- err = wrmsr_safe(reg, eax, edx);
- if (err)
- err = -EIO;
- return err;
-}
-
-static inline int rdmsr_eio(u32 reg, u32 *eax, u32 *edx)
-{
- int err;
-
- err = rdmsr_safe(reg, eax, edx);
- if (err)
- err = -EIO;
- return err;
-}
-
-#ifdef CONFIG_SMP
-
-struct msr_command {
- int cpu;
- int err;
- u32 reg;
- u32 data[2];
-};
-
-static void msr_smp_wrmsr(void *cmd_block)
-{
- struct msr_command *cmd = (struct msr_command *)cmd_block;
-
- if (cmd->cpu == smp_processor_id())
- cmd->err = wrmsr_eio(cmd->reg, cmd->data[0], cmd->data[1]);
-}
-
-static void msr_smp_rdmsr(void *cmd_block)
-{
- struct msr_command *cmd = (struct msr_command *)cmd_block;
-
- if (cmd->cpu == smp_processor_id())
- cmd->err = rdmsr_eio(cmd->reg, &cmd->data[0], &cmd->data[1]);
-}
-
-static inline int do_wrmsr(int cpu, u32 reg, u32 eax, u32 edx)
-{
- struct msr_command cmd;
- int ret;
-
- preempt_disable();
- if (cpu == smp_processor_id()) {
- ret = wrmsr_eio(reg, eax, edx);
- } else {
- cmd.cpu = cpu;
- cmd.reg = reg;
- cmd.data[0] = eax;
- cmd.data[1] = edx;
-
- smp_call_function(msr_smp_wrmsr, &cmd, 1, 1);
- ret = cmd.err;
- }
- preempt_enable();
- return ret;
-}
-
-static inline int do_rdmsr(int cpu, u32 reg, u32 * eax, u32 * edx)
-{
- struct msr_command cmd;
- int ret;
-
- preempt_disable();
- if (cpu == smp_processor_id()) {
- ret = rdmsr_eio(reg, eax, edx);
- } else {
- cmd.cpu = cpu;
- cmd.reg = reg;
-
- smp_call_function(msr_smp_rdmsr, &cmd, 1, 1);
-
- *eax = cmd.data[0];
- *edx = cmd.data[1];
-
- ret = cmd.err;
- }
- preempt_enable();
- return ret;
-}
-
-#else /* ! CONFIG_SMP */
-
-static inline int do_wrmsr(int cpu, u32 reg, u32 eax, u32 edx)
-{
- return wrmsr_eio(reg, eax, edx);
-}
-
-static inline int do_rdmsr(int cpu, u32 reg, u32 *eax, u32 *edx)
-{
- return rdmsr_eio(reg, eax, edx);
-}
-
-#endif /* ! CONFIG_SMP */
-
-static loff_t msr_seek(struct file *file, loff_t offset, int orig)
-{
- loff_t ret = -EINVAL;
-
- lock_kernel();
- switch (orig) {
- case 0:
- file->f_pos = offset;
- ret = file->f_pos;
- break;
- case 1:
- file->f_pos += offset;
- ret = file->f_pos;
- }
- unlock_kernel();
- return ret;
-}
-
-static ssize_t msr_read(struct file *file, char __user * buf,
- size_t count, loff_t * ppos)
-{
- u32 __user *tmp = (u32 __user *) buf;
- u32 data[2];
- size_t rv;
- u32 reg = *ppos;
- int cpu = iminor(file->f_dentry->d_inode);
- int err;
-
- if (count % 8)
- return -EINVAL; /* Invalid chunk size */
-
- for (rv = 0; count; count -= 8) {
- err = do_rdmsr(cpu, reg, &data[0], &data[1]);
- if (err)
- return err;
- if (copy_to_user(tmp, &data, 8))
- return -EFAULT;
- tmp += 2;
- }
-
- return ((char __user *)tmp) - buf;
-}
-
-static ssize_t msr_write(struct file *file, const char __user *buf,
- size_t count, loff_t *ppos)
-{
- const u32 __user *tmp = (const u32 __user *)buf;
- u32 data[2];
- size_t rv;
- u32 reg = *ppos;
- int cpu = iminor(file->f_dentry->d_inode);
- int err;
-
- if (count % 8)
- return -EINVAL; /* Invalid chunk size */
-
- for (rv = 0; count; count -= 8) {
- if (copy_from_user(&data, tmp, 8))
- return -EFAULT;
- err = do_wrmsr(cpu, reg, data[0], data[1]);
- if (err)
- return err;
- tmp += 2;
- }
-
- return ((char __user *)tmp) - buf;
-}
-
-static int msr_open(struct inode *inode, struct file *file)
-{
- unsigned int cpu = iminor(file->f_dentry->d_inode);
- struct cpuinfo_x86 *c = &(cpu_data)[cpu];
-
- if (cpu >= NR_CPUS || !cpu_online(cpu))
- return -ENXIO; /* No such CPU */
- if (!cpu_has(c, X86_FEATURE_MSR))
- return -EIO; /* MSR not supported */
-
- return 0;
-}
-
-/*
- * File operations we support
- */
-static struct file_operations msr_fops = {
- .owner = THIS_MODULE,
- .llseek = msr_seek,
- .read = msr_read,
- .write = msr_write,
- .open = msr_open,
-};
-
-static int msr_class_device_create(int i)
-{
- int err = 0;
- struct class_device *class_err;
-
- class_err = class_device_create(msr_class, NULL, MKDEV(MSR_MAJOR, i), NULL, "msr%d",i);
- if (IS_ERR(class_err))
- err = PTR_ERR(class_err);
- return err;
-}
-
-static int __devinit msr_class_cpu_callback(struct notifier_block *nfb, unsigned long action, void *hcpu)
-{
- unsigned int cpu = (unsigned long)hcpu;
-
- switch (action) {
- case CPU_ONLINE:
- msr_class_device_create(cpu);
- break;
- case CPU_DEAD:
- class_device_destroy(msr_class, MKDEV(MSR_MAJOR, cpu));
- break;
- }
- return NOTIFY_OK;
-}
-
-static struct notifier_block msr_class_cpu_notifier =
-{
- .notifier_call = msr_class_cpu_callback,
-};
-
-static int __init msr_init(void)
-{
- int i, err = 0;
- i = 0;
-
- if (register_chrdev(MSR_MAJOR, "cpu/msr", &msr_fops)) {
- printk(KERN_ERR "msr: unable to get major %d for msr\n",
- MSR_MAJOR);
- err = -EBUSY;
- goto out;
- }
- msr_class = class_create(THIS_MODULE, "msr");
- if (IS_ERR(msr_class)) {
- err = PTR_ERR(msr_class);
- goto out_chrdev;
- }
- for_each_online_cpu(i) {
- err = msr_class_device_create(i);
- if (err != 0)
- goto out_class;
- }
- register_cpu_notifier(&msr_class_cpu_notifier);
-
- err = 0;
- goto out;
-
-out_class:
- i = 0;
- for_each_online_cpu(i)
- class_device_destroy(msr_class, MKDEV(MSR_MAJOR, i));
- class_destroy(msr_class);
-out_chrdev:
- unregister_chrdev(MSR_MAJOR, "cpu/msr");
-out:
- return err;
-}
-
-static void __exit msr_exit(void)
-{
- int cpu = 0;
- for_each_online_cpu(cpu)
- class_device_destroy(msr_class, MKDEV(MSR_MAJOR, cpu));
- class_destroy(msr_class);
- unregister_chrdev(MSR_MAJOR, "cpu/msr");
- unregister_cpu_notifier(&msr_class_cpu_notifier);
-}
-
-module_init(msr_init);
-module_exit(msr_exit)
-
-MODULE_AUTHOR("H. Peter Anvin <hpa@zytor.com>");
-MODULE_DESCRIPTION("x86 generic MSR driver");
-MODULE_LICENSE("GPL");
diff --git a/arch/i386/kernel/nmi.c b/arch/i386/kernel/nmi.c
deleted file mode 100644
index d661703ac1c..00000000000
--- a/arch/i386/kernel/nmi.c
+++ /dev/null
@@ -1,621 +0,0 @@
-/*
- * linux/arch/i386/nmi.c
- *
- * NMI watchdog support on APIC systems
- *
- * Started by Ingo Molnar <mingo@redhat.com>
- *
- * Fixes:
- * Mikael Pettersson : AMD K7 support for local APIC NMI watchdog.
- * Mikael Pettersson : Power Management for local APIC NMI watchdog.
- * Mikael Pettersson : Pentium 4 support for local APIC NMI watchdog.
- * Pavel Machek and
- * Mikael Pettersson : PM converted to driver model. Disable/enable API.
- */
-
-#include <linux/config.h>
-#include <linux/mm.h>
-#include <linux/delay.h>
-#include <linux/bootmem.h>
-#include <linux/smp_lock.h>
-#include <linux/interrupt.h>
-#include <linux/mc146818rtc.h>
-#include <linux/kernel_stat.h>
-#include <linux/module.h>
-#include <linux/nmi.h>
-#include <linux/sysdev.h>
-#include <linux/sysctl.h>
-
-#include <asm/smp.h>
-#include <asm/div64.h>
-#include <asm/nmi.h>
-
-#include "mach_traps.h"
-
-unsigned int nmi_watchdog = NMI_NONE;
-extern int unknown_nmi_panic;
-static unsigned int nmi_hz = HZ;
-static unsigned int nmi_perfctr_msr; /* the MSR to reset in NMI handler */
-static unsigned int nmi_p4_cccr_val;
-extern void show_registers(struct pt_regs *regs);
-
-/*
- * lapic_nmi_owner tracks the ownership of the lapic NMI hardware:
- * - it may be reserved by some other driver, or not
- * - when not reserved by some other driver, it may be used for
- * the NMI watchdog, or not
- *
- * This is maintained separately from nmi_active because the NMI
- * watchdog may also be driven from the I/O APIC timer.
- */
-static DEFINE_SPINLOCK(lapic_nmi_owner_lock);
-static unsigned int lapic_nmi_owner;
-#define LAPIC_NMI_WATCHDOG (1<<0)
-#define LAPIC_NMI_RESERVED (1<<1)
-
-/* nmi_active:
- * +1: the lapic NMI watchdog is active, but can be disabled
- * 0: the lapic NMI watchdog has not been set up, and cannot
- * be enabled
- * -1: the lapic NMI watchdog is disabled, but can be enabled
- */
-int nmi_active;
-
-#define K7_EVNTSEL_ENABLE (1 << 22)
-#define K7_EVNTSEL_INT (1 << 20)
-#define K7_EVNTSEL_OS (1 << 17)
-#define K7_EVNTSEL_USR (1 << 16)
-#define K7_EVENT_CYCLES_PROCESSOR_IS_RUNNING 0x76
-#define K7_NMI_EVENT K7_EVENT_CYCLES_PROCESSOR_IS_RUNNING
-
-#define P6_EVNTSEL0_ENABLE (1 << 22)
-#define P6_EVNTSEL_INT (1 << 20)
-#define P6_EVNTSEL_OS (1 << 17)
-#define P6_EVNTSEL_USR (1 << 16)
-#define P6_EVENT_CPU_CLOCKS_NOT_HALTED 0x79
-#define P6_NMI_EVENT P6_EVENT_CPU_CLOCKS_NOT_HALTED
-
-#define MSR_P4_MISC_ENABLE 0x1A0
-#define MSR_P4_MISC_ENABLE_PERF_AVAIL (1<<7)
-#define MSR_P4_MISC_ENABLE_PEBS_UNAVAIL (1<<12)
-#define MSR_P4_PERFCTR0 0x300
-#define MSR_P4_CCCR0 0x360
-#define P4_ESCR_EVENT_SELECT(N) ((N)<<25)
-#define P4_ESCR_OS (1<<3)
-#define P4_ESCR_USR (1<<2)
-#define P4_CCCR_OVF_PMI0 (1<<26)
-#define P4_CCCR_OVF_PMI1 (1<<27)
-#define P4_CCCR_THRESHOLD(N) ((N)<<20)
-#define P4_CCCR_COMPLEMENT (1<<19)
-#define P4_CCCR_COMPARE (1<<18)
-#define P4_CCCR_REQUIRED (3<<16)
-#define P4_CCCR_ESCR_SELECT(N) ((N)<<13)
-#define P4_CCCR_ENABLE (1<<12)
-/* Set up IQ_COUNTER0 to behave like a clock, by having IQ_CCCR0 filter
- CRU_ESCR0 (with any non-null event selector) through a complemented
- max threshold. [IA32-Vol3, Section 14.9.9] */
-#define MSR_P4_IQ_COUNTER0 0x30C
-#define P4_NMI_CRU_ESCR0 (P4_ESCR_EVENT_SELECT(0x3F)|P4_ESCR_OS|P4_ESCR_USR)
-#define P4_NMI_IQ_CCCR0 \
- (P4_CCCR_OVF_PMI0|P4_CCCR_THRESHOLD(15)|P4_CCCR_COMPLEMENT| \
- P4_CCCR_COMPARE|P4_CCCR_REQUIRED|P4_CCCR_ESCR_SELECT(4)|P4_CCCR_ENABLE)
-
-#ifdef CONFIG_SMP
-/* The performance counters used by NMI_LOCAL_APIC don't trigger when
- * the CPU is idle. To make sure the NMI watchdog really ticks on all
- * CPUs during the test make them busy.
- */
-static __init void nmi_cpu_busy(void *data)
-{
- volatile int *endflag = data;
- local_irq_enable();
- /* Intentionally don't use cpu_relax here. This is
- to make sure that the performance counter really ticks,
- even if there is a simulator or similar that catches the
- pause instruction. On a real HT machine this is fine because
- all other CPUs are busy with "useless" delay loops and don't
- care if they get somewhat less cycles. */
- while (*endflag == 0)
- barrier();
-}
-#endif
-
-static int __init check_nmi_watchdog(void)
-{
- volatile int endflag = 0;
- unsigned int *prev_nmi_count;
- int cpu;
-
- if (nmi_watchdog == NMI_NONE)
- return 0;
-
- prev_nmi_count = kmalloc(NR_CPUS * sizeof(int), GFP_KERNEL);
- if (!prev_nmi_count)
- return -1;
-
- printk(KERN_INFO "Testing NMI watchdog ... ");
-
- if (nmi_watchdog == NMI_LOCAL_APIC)
- smp_call_function(nmi_cpu_busy, (void *)&endflag, 0, 0);
-
- for (cpu = 0; cpu < NR_CPUS; cpu++)
- prev_nmi_count[cpu] = per_cpu(irq_stat, cpu).__nmi_count;
- local_irq_enable();
- mdelay((10*1000)/nmi_hz); // wait 10 ticks
-
- for (cpu = 0; cpu < NR_CPUS; cpu++) {
-#ifdef CONFIG_SMP
- /* Check cpu_callin_map here because that is set
- after the timer is started. */
- if (!cpu_isset(cpu, cpu_callin_map))
- continue;
-#endif
- if (nmi_count(cpu) - prev_nmi_count[cpu] <= 5) {
- endflag = 1;
- printk("CPU#%d: NMI appears to be stuck (%d->%d)!\n",
- cpu,
- prev_nmi_count[cpu],
- nmi_count(cpu));
- nmi_active = 0;
- lapic_nmi_owner &= ~LAPIC_NMI_WATCHDOG;
- kfree(prev_nmi_count);
- return -1;
- }
- }
- endflag = 1;
- printk("OK.\n");
-
- /* now that we know it works we can reduce NMI frequency to
- something more reasonable; makes a difference in some configs */
- if (nmi_watchdog == NMI_LOCAL_APIC)
- nmi_hz = 1;
-
- kfree(prev_nmi_count);
- return 0;
-}
-/* This needs to happen later in boot so counters are working */
-late_initcall(check_nmi_watchdog);
-
-static int __init setup_nmi_watchdog(char *str)
-{
- int nmi;
-
- get_option(&str, &nmi);
-
- if (nmi >= NMI_INVALID)
- return 0;
- if (nmi == NMI_NONE)
- nmi_watchdog = nmi;
- /*
- * If any other x86 CPU has a local APIC, then
- * please test the NMI stuff there and send me the
- * missing bits. Right now Intel P6/P4 and AMD K7 only.
- */
- if ((nmi == NMI_LOCAL_APIC) &&
- (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) &&
- (boot_cpu_data.x86 == 6 || boot_cpu_data.x86 == 15))
- nmi_watchdog = nmi;
- if ((nmi == NMI_LOCAL_APIC) &&
- (boot_cpu_data.x86_vendor == X86_VENDOR_AMD) &&
- (boot_cpu_data.x86 == 6 || boot_cpu_data.x86 == 15))
- nmi_watchdog = nmi;
- /*
- * We can enable the IO-APIC watchdog
- * unconditionally.
- */
- if (nmi == NMI_IO_APIC) {
- nmi_active = 1;
- nmi_watchdog = nmi;
- }
- return 1;
-}
-
-__setup("nmi_watchdog=", setup_nmi_watchdog);
-
-static void disable_lapic_nmi_watchdog(void)
-{
- if (nmi_active <= 0)
- return;
- switch (boot_cpu_data.x86_vendor) {
- case X86_VENDOR_AMD:
- wrmsr(MSR_K7_EVNTSEL0, 0, 0);
- break;
- case X86_VENDOR_INTEL:
- switch (boot_cpu_data.x86) {
- case 6:
- if (boot_cpu_data.x86_model > 0xd)
- break;
-
- wrmsr(MSR_P6_EVNTSEL0, 0, 0);
- break;
- case 15:
- if (boot_cpu_data.x86_model > 0x4)
- break;
-
- wrmsr(MSR_P4_IQ_CCCR0, 0, 0);
- wrmsr(MSR_P4_CRU_ESCR0, 0, 0);
- break;
- }
- break;
- }
- nmi_active = -1;
- /* tell do_nmi() and others that we're not active any more */
- nmi_watchdog = 0;
-}
-
-static void enable_lapic_nmi_watchdog(void)
-{
- if (nmi_active < 0) {
- nmi_watchdog = NMI_LOCAL_APIC;
- setup_apic_nmi_watchdog();
- }
-}
-
-int reserve_lapic_nmi(void)
-{
- unsigned int old_owner;
-
- spin_lock(&lapic_nmi_owner_lock);
- old_owner = lapic_nmi_owner;
- lapic_nmi_owner |= LAPIC_NMI_RESERVED;
- spin_unlock(&lapic_nmi_owner_lock);
- if (old_owner & LAPIC_NMI_RESERVED)
- return -EBUSY;
- if (old_owner & LAPIC_NMI_WATCHDOG)
- disable_lapic_nmi_watchdog();
- return 0;
-}
-
-void release_lapic_nmi(void)
-{
- unsigned int new_owner;
-
- spin_lock(&lapic_nmi_owner_lock);
- new_owner = lapic_nmi_owner & ~LAPIC_NMI_RESERVED;
- lapic_nmi_owner = new_owner;
- spin_unlock(&lapic_nmi_owner_lock);
- if (new_owner & LAPIC_NMI_WATCHDOG)
- enable_lapic_nmi_watchdog();
-}
-
-void disable_timer_nmi_watchdog(void)
-{
- if ((nmi_watchdog != NMI_IO_APIC) || (nmi_active <= 0))
- return;
-
- unset_nmi_callback();
- nmi_active = -1;
- nmi_watchdog = NMI_NONE;
-}
-
-void enable_timer_nmi_watchdog(void)
-{
- if (nmi_active < 0) {
- nmi_watchdog = NMI_IO_APIC;
- touch_nmi_watchdog();
- nmi_active = 1;
- }
-}
-
-#ifdef CONFIG_PM
-
-static int nmi_pm_active; /* nmi_active before suspend */
-
-static int lapic_nmi_suspend(struct sys_device *dev, pm_message_t state)
-{
- nmi_pm_active = nmi_active;
- disable_lapic_nmi_watchdog();
- return 0;
-}
-
-static int lapic_nmi_resume(struct sys_device *dev)
-{
- if (nmi_pm_active > 0)
- enable_lapic_nmi_watchdog();
- return 0;
-}
-
-
-static struct sysdev_class nmi_sysclass = {
- set_kset_name("lapic_nmi"),
- .resume = lapic_nmi_resume,
- .suspend = lapic_nmi_suspend,
-};
-
-static struct sys_device device_lapic_nmi = {
- .id = 0,
- .cls = &nmi_sysclass,
-};
-
-static int __init init_lapic_nmi_sysfs(void)
-{
- int error;
-
- if (nmi_active == 0 || nmi_watchdog != NMI_LOCAL_APIC)
- return 0;
-
- error = sysdev_class_register(&nmi_sysclass);
- if (!error)
- error = sysdev_register(&device_lapic_nmi);
- return error;
-}
-/* must come after the local APIC's device_initcall() */
-late_initcall(init_lapic_nmi_sysfs);
-
-#endif /* CONFIG_PM */
-
-/*
- * Activate the NMI watchdog via the local APIC.
- * Original code written by Keith Owens.
- */
-
-static void clear_msr_range(unsigned int base, unsigned int n)
-{
- unsigned int i;
-
- for(i = 0; i < n; ++i)
- wrmsr(base+i, 0, 0);
-}
-
-static inline void write_watchdog_counter(const char *descr)
-{
- u64 count = (u64)cpu_khz * 1000;
-
- do_div(count, nmi_hz);
- if(descr)
- Dprintk("setting %s to -0x%08Lx\n", descr, count);
- wrmsrl(nmi_perfctr_msr, 0 - count);
-}
-
-static void setup_k7_watchdog(void)
-{
- unsigned int evntsel;
-
- nmi_perfctr_msr = MSR_K7_PERFCTR0;
-
- clear_msr_range(MSR_K7_EVNTSEL0, 4);
- clear_msr_range(MSR_K7_PERFCTR0, 4);
-
- evntsel = K7_EVNTSEL_INT
- | K7_EVNTSEL_OS
- | K7_EVNTSEL_USR
- | K7_NMI_EVENT;
-
- wrmsr(MSR_K7_EVNTSEL0, evntsel, 0);
- write_watchdog_counter("K7_PERFCTR0");
- apic_write(APIC_LVTPC, APIC_DM_NMI);
- evntsel |= K7_EVNTSEL_ENABLE;
- wrmsr(MSR_K7_EVNTSEL0, evntsel, 0);
-}
-
-static void setup_p6_watchdog(void)
-{
- unsigned int evntsel;
-
- nmi_perfctr_msr = MSR_P6_PERFCTR0;
-
- clear_msr_range(MSR_P6_EVNTSEL0, 2);
- clear_msr_range(MSR_P6_PERFCTR0, 2);
-
- evntsel = P6_EVNTSEL_INT
- | P6_EVNTSEL_OS
- | P6_EVNTSEL_USR
- | P6_NMI_EVENT;
-
- wrmsr(MSR_P6_EVNTSEL0, evntsel, 0);
- write_watchdog_counter("P6_PERFCTR0");
- apic_write(APIC_LVTPC, APIC_DM_NMI);
- evntsel |= P6_EVNTSEL0_ENABLE;
- wrmsr(MSR_P6_EVNTSEL0, evntsel, 0);
-}
-
-static int setup_p4_watchdog(void)
-{
- unsigned int misc_enable, dummy;
-
- rdmsr(MSR_P4_MISC_ENABLE, misc_enable, dummy);
- if (!(misc_enable & MSR_P4_MISC_ENABLE_PERF_AVAIL))
- return 0;
-
- nmi_perfctr_msr = MSR_P4_IQ_COUNTER0;
- nmi_p4_cccr_val = P4_NMI_IQ_CCCR0;
-#ifdef CONFIG_SMP
- if (smp_num_siblings == 2)
- nmi_p4_cccr_val |= P4_CCCR_OVF_PMI1;
-#endif
-
- if (!(misc_enable & MSR_P4_MISC_ENABLE_PEBS_UNAVAIL))
- clear_msr_range(0x3F1, 2);
- /* MSR 0x3F0 seems to have a default value of 0xFC00, but current
- docs doesn't fully define it, so leave it alone for now. */
- if (boot_cpu_data.x86_model >= 0x3) {
- /* MSR_P4_IQ_ESCR0/1 (0x3ba/0x3bb) removed */
- clear_msr_range(0x3A0, 26);
- clear_msr_range(0x3BC, 3);
- } else {
- clear_msr_range(0x3A0, 31);
- }
- clear_msr_range(0x3C0, 6);
- clear_msr_range(0x3C8, 6);
- clear_msr_range(0x3E0, 2);
- clear_msr_range(MSR_P4_CCCR0, 18);
- clear_msr_range(MSR_P4_PERFCTR0, 18);
-
- wrmsr(MSR_P4_CRU_ESCR0, P4_NMI_CRU_ESCR0, 0);
- wrmsr(MSR_P4_IQ_CCCR0, P4_NMI_IQ_CCCR0 & ~P4_CCCR_ENABLE, 0);
- write_watchdog_counter("P4_IQ_COUNTER0");
- apic_write(APIC_LVTPC, APIC_DM_NMI);
- wrmsr(MSR_P4_IQ_CCCR0, nmi_p4_cccr_val, 0);
- return 1;
-}
-
-void setup_apic_nmi_watchdog (void)
-{
- switch (boot_cpu_data.x86_vendor) {
- case X86_VENDOR_AMD:
- if (boot_cpu_data.x86 != 6 && boot_cpu_data.x86 != 15)
- return;
- setup_k7_watchdog();
- break;
- case X86_VENDOR_INTEL:
- switch (boot_cpu_data.x86) {
- case 6:
- if (boot_cpu_data.x86_model > 0xd)
- return;
-
- setup_p6_watchdog();
- break;
- case 15:
- if (boot_cpu_data.x86_model > 0x4)
- return;
-
- if (!setup_p4_watchdog())
- return;
- break;
- default:
- return;
- }
- break;
- default:
- return;
- }
- lapic_nmi_owner = LAPIC_NMI_WATCHDOG;
- nmi_active = 1;
-}
-
-/*
- * the best way to detect whether a CPU has a 'hard lockup' problem
- * is to check it's local APIC timer IRQ counts. If they are not
- * changing then that CPU has some problem.
- *
- * as these watchdog NMI IRQs are generated on every CPU, we only
- * have to check the current processor.
- *
- * since NMIs don't listen to _any_ locks, we have to be extremely
- * careful not to rely on unsafe variables. The printk might lock
- * up though, so we have to break up any console locks first ...
- * [when there will be more tty-related locks, break them up
- * here too!]
- */
-
-static unsigned int
- last_irq_sums [NR_CPUS],
- alert_counter [NR_CPUS];
-
-void touch_nmi_watchdog (void)
-{
- int i;
-
- /*
- * Just reset the alert counters, (other CPUs might be
- * spinning on locks we hold):
- */
- for (i = 0; i < NR_CPUS; i++)
- alert_counter[i] = 0;
-
- /*
- * Tickle the softlockup detector too:
- */
- touch_softlockup_watchdog();
-}
-
-extern void die_nmi(struct pt_regs *, const char *msg);
-
-void nmi_watchdog_tick (struct pt_regs * regs)
-{
-
- /*
- * Since current_thread_info()-> is always on the stack, and we
- * always switch the stack NMI-atomically, it's safe to use
- * smp_processor_id().
- */
- int sum, cpu = smp_processor_id();
-
- sum = per_cpu(irq_stat, cpu).apic_timer_irqs;
-
- if (last_irq_sums[cpu] == sum) {
- /*
- * Ayiee, looks like this CPU is stuck ...
- * wait a few IRQs (5 seconds) before doing the oops ...
- */
- alert_counter[cpu]++;
- if (alert_counter[cpu] == 5*nmi_hz)
- /*
- * die_nmi will return ONLY if NOTIFY_STOP happens..
- */
- die_nmi(regs, "NMI Watchdog detected LOCKUP");
-
- last_irq_sums[cpu] = sum;
- alert_counter[cpu] = 0;
- }
- if (nmi_perfctr_msr) {
- if (nmi_perfctr_msr == MSR_P4_IQ_COUNTER0) {
- /*
- * P4 quirks:
- * - An overflown perfctr will assert its interrupt
- * until the OVF flag in its CCCR is cleared.
- * - LVTPC is masked on interrupt and must be
- * unmasked by the LVTPC handler.
- */
- wrmsr(MSR_P4_IQ_CCCR0, nmi_p4_cccr_val, 0);
- apic_write(APIC_LVTPC, APIC_DM_NMI);
- }
- else if (nmi_perfctr_msr == MSR_P6_PERFCTR0) {
- /* Only P6 based Pentium M need to re-unmask
- * the apic vector but it doesn't hurt
- * other P6 variant */
- apic_write(APIC_LVTPC, APIC_DM_NMI);
- }
- write_watchdog_counter(NULL);
- }
-}
-
-#ifdef CONFIG_SYSCTL
-
-static int unknown_nmi_panic_callback(struct pt_regs *regs, int cpu)
-{
- unsigned char reason = get_nmi_reason();
- char buf[64];
-
- if (!(reason & 0xc0)) {
- sprintf(buf, "NMI received for unknown reason %02x\n", reason);
- die_nmi(regs, buf);
- }
- return 0;
-}
-
-/*
- * proc handler for /proc/sys/kernel/unknown_nmi_panic
- */
-int proc_unknown_nmi_panic(ctl_table *table, int write, struct file *file,
- void __user *buffer, size_t *length, loff_t *ppos)
-{
- int old_state;
-
- old_state = unknown_nmi_panic;
- proc_dointvec(table, write, file, buffer, length, ppos);
- if (!!old_state == !!unknown_nmi_panic)
- return 0;
-
- if (unknown_nmi_panic) {
- if (reserve_lapic_nmi() < 0) {
- unknown_nmi_panic = 0;
- return -EBUSY;
- } else {
- set_nmi_callback(unknown_nmi_panic_callback);
- }
- } else {
- release_lapic_nmi();
- unset_nmi_callback();
- }
- return 0;
-}
-
-#endif
-
-EXPORT_SYMBOL(nmi_active);
-EXPORT_SYMBOL(nmi_watchdog);
-EXPORT_SYMBOL(reserve_lapic_nmi);
-EXPORT_SYMBOL(release_lapic_nmi);
-EXPORT_SYMBOL(disable_timer_nmi_watchdog);
-EXPORT_SYMBOL(enable_timer_nmi_watchdog);
diff --git a/arch/i386/kernel/numaq.c b/arch/i386/kernel/numaq.c
deleted file mode 100644
index 5f5b075f860..00000000000
--- a/arch/i386/kernel/numaq.c
+++ /dev/null
@@ -1,88 +0,0 @@
-/*
- * Written by: Patricia Gaughen, IBM Corporation
- *
- * Copyright (C) 2002, IBM Corp.
- *
- * All rights reserved.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
- * NON INFRINGEMENT. See the GNU General Public License for more
- * details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
- *
- * Send feedback to <gone@us.ibm.com>
- */
-
-#include <linux/config.h>
-#include <linux/mm.h>
-#include <linux/bootmem.h>
-#include <linux/mmzone.h>
-#include <linux/module.h>
-#include <linux/nodemask.h>
-#include <asm/numaq.h>
-#include <asm/topology.h>
-#include <asm/processor.h>
-
-#define MB_TO_PAGES(addr) ((addr) << (20 - PAGE_SHIFT))
-
-/*
- * Function: smp_dump_qct()
- *
- * Description: gets memory layout from the quad config table. This
- * function also updates node_online_map with the nodes (quads) present.
- */
-static void __init smp_dump_qct(void)
-{
- int node;
- struct eachquadmem *eq;
- struct sys_cfg_data *scd =
- (struct sys_cfg_data *)__va(SYS_CFG_DATA_PRIV_ADDR);
-
- nodes_clear(node_online_map);
- for_each_node(node) {
- if (scd->quads_present31_0 & (1 << node)) {
- node_set_online(node);
- eq = &scd->eq[node];
- /* Convert to pages */
- node_start_pfn[node] = MB_TO_PAGES(
- eq->hi_shrd_mem_start - eq->priv_mem_size);
- node_end_pfn[node] = MB_TO_PAGES(
- eq->hi_shrd_mem_start + eq->hi_shrd_mem_size);
-
- memory_present(node,
- node_start_pfn[node], node_end_pfn[node]);
- node_remap_size[node] = node_memmap_size_bytes(node,
- node_start_pfn[node],
- node_end_pfn[node]);
- }
- }
-}
-
-/*
- * Unlike Summit, we don't really care to let the NUMA-Q
- * fall back to flat mode. Don't compile for NUMA-Q
- * unless you really need it!
- */
-int __init get_memcfg_numaq(void)
-{
- smp_dump_qct();
- return 1;
-}
-
-static int __init numaq_dsc_disable(void)
-{
- printk(KERN_DEBUG "NUMAQ: disabling TSC\n");
- tsc_disable = 1;
- return 0;
-}
-core_initcall(numaq_dsc_disable);
diff --git a/arch/i386/kernel/pci-dma.c b/arch/i386/kernel/pci-dma.c
deleted file mode 100644
index 25fe6685393..00000000000
--- a/arch/i386/kernel/pci-dma.c
+++ /dev/null
@@ -1,150 +0,0 @@
-/*
- * Dynamic DMA mapping support.
- *
- * On i386 there is no hardware dynamic DMA address translation,
- * so consistent alloc/free are merely page allocation/freeing.
- * The rest of the dynamic DMA mapping interface is implemented
- * in asm/pci.h.
- */
-
-#include <linux/types.h>
-#include <linux/mm.h>
-#include <linux/string.h>
-#include <linux/pci.h>
-#include <linux/module.h>
-#include <asm/io.h>
-
-struct dma_coherent_mem {
- void *virt_base;
- u32 device_base;
- int size;
- int flags;
- unsigned long *bitmap;
-};
-
-void *dma_alloc_coherent(struct device *dev, size_t size,
- dma_addr_t *dma_handle, gfp_t gfp)
-{
- void *ret;
- struct dma_coherent_mem *mem = dev ? dev->dma_mem : NULL;
- int order = get_order(size);
- /* ignore region specifiers */
- gfp &= ~(__GFP_DMA | __GFP_HIGHMEM);
-
- if (mem) {
- int page = bitmap_find_free_region(mem->bitmap, mem->size,
- order);
- if (page >= 0) {
- *dma_handle = mem->device_base + (page << PAGE_SHIFT);
- ret = mem->virt_base + (page << PAGE_SHIFT);
- memset(ret, 0, size);
- return ret;
- }
- if (mem->flags & DMA_MEMORY_EXCLUSIVE)
- return NULL;
- }
-
- if (dev == NULL || (dev->coherent_dma_mask < 0xffffffff))
- gfp |= GFP_DMA;
-
- ret = (void *)__get_free_pages(gfp, order);
-
- if (ret != NULL) {
- memset(ret, 0, size);
- *dma_handle = virt_to_phys(ret);
- }
- return ret;
-}
-EXPORT_SYMBOL(dma_alloc_coherent);
-
-void dma_free_coherent(struct device *dev, size_t size,
- void *vaddr, dma_addr_t dma_handle)
-{
- struct dma_coherent_mem *mem = dev ? dev->dma_mem : NULL;
- int order = get_order(size);
-
- if (mem && vaddr >= mem->virt_base && vaddr < (mem->virt_base + (mem->size << PAGE_SHIFT))) {
- int page = (vaddr - mem->virt_base) >> PAGE_SHIFT;
-
- bitmap_release_region(mem->bitmap, page, order);
- } else
- free_pages((unsigned long)vaddr, order);
-}
-EXPORT_SYMBOL(dma_free_coherent);
-
-int dma_declare_coherent_memory(struct device *dev, dma_addr_t bus_addr,
- dma_addr_t device_addr, size_t size, int flags)
-{
- void __iomem *mem_base;
- int pages = size >> PAGE_SHIFT;
- int bitmap_size = (pages + 31)/32;
-
- if ((flags & (DMA_MEMORY_MAP | DMA_MEMORY_IO)) == 0)
- goto out;
- if (!size)
- goto out;
- if (dev->dma_mem)
- goto out;
-
- /* FIXME: this routine just ignores DMA_MEMORY_INCLUDES_CHILDREN */
-
- mem_base = ioremap(bus_addr, size);
- if (!mem_base)
- goto out;
-
- dev->dma_mem = kmalloc(sizeof(struct dma_coherent_mem), GFP_KERNEL);
- if (!dev->dma_mem)
- goto out;
- memset(dev->dma_mem, 0, sizeof(struct dma_coherent_mem));
- dev->dma_mem->bitmap = kmalloc(bitmap_size, GFP_KERNEL);
- if (!dev->dma_mem->bitmap)
- goto free1_out;
- memset(dev->dma_mem->bitmap, 0, bitmap_size);
-
- dev->dma_mem->virt_base = mem_base;
- dev->dma_mem->device_base = device_addr;
- dev->dma_mem->size = pages;
- dev->dma_mem->flags = flags;
-
- if (flags & DMA_MEMORY_MAP)
- return DMA_MEMORY_MAP;
-
- return DMA_MEMORY_IO;
-
- free1_out:
- kfree(dev->dma_mem->bitmap);
- out:
- return 0;
-}
-EXPORT_SYMBOL(dma_declare_coherent_memory);
-
-void dma_release_declared_memory(struct device *dev)
-{
- struct dma_coherent_mem *mem = dev->dma_mem;
-
- if(!mem)
- return;
- dev->dma_mem = NULL;
- iounmap(mem->virt_base);
- kfree(mem->bitmap);
- kfree(mem);
-}
-EXPORT_SYMBOL(dma_release_declared_memory);
-
-void *dma_mark_declared_memory_occupied(struct device *dev,
- dma_addr_t device_addr, size_t size)
-{
- struct dma_coherent_mem *mem = dev->dma_mem;
- int pages = (size + (device_addr & ~PAGE_MASK) + PAGE_SIZE - 1) >> PAGE_SHIFT;
- int pos, err;
-
- if (!mem)
- return ERR_PTR(-EINVAL);
-
- pos = (device_addr - mem->device_base) >> PAGE_SHIFT;
- err = bitmap_allocate_region(mem->bitmap, pos, get_order(pages));
- if (err != 0)
- return ERR_PTR(err);
- return mem->virt_base + (pos << PAGE_SHIFT);
-}
-EXPORT_SYMBOL(dma_mark_declared_memory_occupied);
diff --git a/arch/i386/kernel/process.c b/arch/i386/kernel/process.c
deleted file mode 100644
index 7a14fdfd3af..00000000000
--- a/arch/i386/kernel/process.c
+++ /dev/null
@@ -1,949 +0,0 @@
-/*
- * linux/arch/i386/kernel/process.c
- *
- * Copyright (C) 1995 Linus Torvalds
- *
- * Pentium III FXSR, SSE support
- * Gareth Hughes <gareth@valinux.com>, May 2000
- */
-
-/*
- * This file handles the architecture-dependent parts of process handling..
- */
-
-#include <stdarg.h>
-
-#include <linux/cpu.h>
-#include <linux/errno.h>
-#include <linux/sched.h>
-#include <linux/fs.h>
-#include <linux/kernel.h>
-#include <linux/mm.h>
-#include <linux/elfcore.h>
-#include <linux/smp.h>
-#include <linux/smp_lock.h>
-#include <linux/stddef.h>
-#include <linux/slab.h>
-#include <linux/vmalloc.h>
-#include <linux/user.h>
-#include <linux/a.out.h>
-#include <linux/interrupt.h>
-#include <linux/config.h>
-#include <linux/utsname.h>
-#include <linux/delay.h>
-#include <linux/reboot.h>
-#include <linux/init.h>
-#include <linux/mc146818rtc.h>
-#include <linux/module.h>
-#include <linux/kallsyms.h>
-#include <linux/ptrace.h>
-#include <linux/random.h>
-#include <linux/kprobes.h>
-
-#include <asm/uaccess.h>
-#include <asm/pgtable.h>
-#include <asm/system.h>
-#include <asm/io.h>
-#include <asm/ldt.h>
-#include <asm/processor.h>
-#include <asm/i387.h>
-#include <asm/desc.h>
-#ifdef CONFIG_MATH_EMULATION
-#include <asm/math_emu.h>
-#endif
-
-#include <linux/err.h>
-
-#include <asm/tlbflush.h>
-#include <asm/cpu.h>
-
-asmlinkage void ret_from_fork(void) __asm__("ret_from_fork");
-
-static int hlt_counter;
-
-unsigned long boot_option_idle_override = 0;
-EXPORT_SYMBOL(boot_option_idle_override);
-
-/*
- * Return saved PC of a blocked thread.
- */
-unsigned long thread_saved_pc(struct task_struct *tsk)
-{
- return ((unsigned long *)tsk->thread.esp)[3];
-}
-
-/*
- * Powermanagement idle function, if any..
- */
-void (*pm_idle)(void);
-EXPORT_SYMBOL(pm_idle);
-static DEFINE_PER_CPU(unsigned int, cpu_idle_state);
-
-void disable_hlt(void)
-{
- hlt_counter++;
-}
-
-EXPORT_SYMBOL(disable_hlt);
-
-void enable_hlt(void)
-{
- hlt_counter--;
-}
-
-EXPORT_SYMBOL(enable_hlt);
-
-/*
- * We use this if we don't have any better
- * idle routine..
- */
-void default_idle(void)
-{
- if (!hlt_counter && boot_cpu_data.hlt_works_ok) {
- local_irq_disable();
- if (!need_resched())
- safe_halt();
- else
- local_irq_enable();
- } else {
- cpu_relax();
- }
-}
-#ifdef CONFIG_APM_MODULE
-EXPORT_SYMBOL(default_idle);
-#endif
-
-/*
- * On SMP it's slightly faster (but much more power-consuming!)
- * to poll the ->work.need_resched flag instead of waiting for the
- * cross-CPU IPI to arrive. Use this option with caution.
- */
-static void poll_idle (void)
-{
- int oldval;
-
- local_irq_enable();
-
- /*
- * Deal with another CPU just having chosen a thread to
- * run here:
- */
- oldval = test_and_clear_thread_flag(TIF_NEED_RESCHED);
-
- if (!oldval) {
- set_thread_flag(TIF_POLLING_NRFLAG);
- asm volatile(
- "2:"
- "testl %0, %1;"
- "rep; nop;"
- "je 2b;"
- : : "i"(_TIF_NEED_RESCHED), "m" (current_thread_info()->flags));
-
- clear_thread_flag(TIF_POLLING_NRFLAG);
- } else {
- set_need_resched();
- }
-}
-
-#ifdef CONFIG_HOTPLUG_CPU
-#include <asm/nmi.h>
-/* We don't actually take CPU down, just spin without interrupts. */
-static inline void play_dead(void)
-{
- /* This must be done before dead CPU ack */
- cpu_exit_clear();
- wbinvd();
- mb();
- /* Ack it */
- __get_cpu_var(cpu_state) = CPU_DEAD;
-
- /*
- * With physical CPU hotplug, we should halt the cpu
- */
- local_irq_disable();
- while (1)
- halt();
-}
-#else
-static inline void play_dead(void)
-{
- BUG();
-}
-#endif /* CONFIG_HOTPLUG_CPU */
-
-/*
- * The idle thread. There's no useful work to be
- * done, so just try to conserve power and have a
- * low exit latency (ie sit in a loop waiting for
- * somebody to say that they'd like to reschedule)
- */
-void cpu_idle(void)
-{
- int cpu = raw_smp_processor_id();
-
- /* endless idle loop with no priority at all */
- while (1) {
- while (!need_resched()) {
- void (*idle)(void);
-
- if (__get_cpu_var(cpu_idle_state))
- __get_cpu_var(cpu_idle_state) = 0;
-
- rmb();
- idle = pm_idle;
-
- if (!idle)
- idle = default_idle;
-
- if (cpu_is_offline(cpu))
- play_dead();
-
- __get_cpu_var(irq_stat).idle_timestamp = jiffies;
- idle();
- }
- schedule();
- }
-}
-
-void cpu_idle_wait(void)
-{
- unsigned int cpu, this_cpu = get_cpu();
- cpumask_t map;
-
- set_cpus_allowed(current, cpumask_of_cpu(this_cpu));
- put_cpu();
-
- cpus_clear(map);
- for_each_online_cpu(cpu) {
- per_cpu(cpu_idle_state, cpu) = 1;
- cpu_set(cpu, map);
- }
-
- __get_cpu_var(cpu_idle_state) = 0;
-
- wmb();
- do {
- ssleep(1);
- for_each_online_cpu(cpu) {
- if (cpu_isset(cpu, map) && !per_cpu(cpu_idle_state, cpu))
- cpu_clear(cpu, map);
- }
- cpus_and(map, map, cpu_online_map);
- } while (!cpus_empty(map));
-}
-EXPORT_SYMBOL_GPL(cpu_idle_wait);
-
-/*
- * This uses new MONITOR/MWAIT instructions on P4 processors with PNI,
- * which can obviate IPI to trigger checking of need_resched.
- * We execute MONITOR against need_resched and enter optimized wait state
- * through MWAIT. Whenever someone changes need_resched, we would be woken
- * up from MWAIT (without an IPI).
- */
-static void mwait_idle(void)
-{
- local_irq_enable();
-
- if (!need_resched()) {
- set_thread_flag(TIF_POLLING_NRFLAG);
- do {
- __monitor((void *)&current_thread_info()->flags, 0, 0);
- if (need_resched())
- break;
- __mwait(0, 0);
- } while (!need_resched());
- clear_thread_flag(TIF_POLLING_NRFLAG);
- }
-}
-
-void __devinit select_idle_routine(const struct cpuinfo_x86 *c)
-{
- if (cpu_has(c, X86_FEATURE_MWAIT)) {
- printk("monitor/mwait feature present.\n");
- /*
- * Skip, if setup has overridden idle.
- * One CPU supports mwait => All CPUs supports mwait
- */
- if (!pm_idle) {
- printk("using mwait in idle threads.\n");
- pm_idle = mwait_idle;
- }
- }
-}
-
-static int __init idle_setup (char *str)
-{
- if (!strncmp(str, "poll", 4)) {
- printk("using polling idle threads.\n");
- pm_idle = poll_idle;
-#ifdef CONFIG_X86_SMP
- if (smp_num_siblings > 1)
- printk("WARNING: polling idle and HT enabled, performance may degrade.\n");
-#endif
- } else if (!strncmp(str, "halt", 4)) {
- printk("using halt in idle threads.\n");
- pm_idle = default_idle;
- }
-
- boot_option_idle_override = 1;
- return 1;
-}
-
-__setup("idle=", idle_setup);
-
-void show_regs(struct pt_regs * regs)
-{
- unsigned long cr0 = 0L, cr2 = 0L, cr3 = 0L, cr4 = 0L;
-
- printk("\n");
- printk("Pid: %d, comm: %20s\n", current->pid, current->comm);
- printk("EIP: %04x:[<%08lx>] CPU: %d\n",0xffff & regs->xcs,regs->eip, smp_processor_id());
- print_symbol("EIP is at %s\n", regs->eip);
-
- if (user_mode(regs))
- printk(" ESP: %04x:%08lx",0xffff & regs->xss,regs->esp);
- printk(" EFLAGS: %08lx %s (%s)\n",
- regs->eflags, print_tainted(), system_utsname.release);
- printk("EAX: %08lx EBX: %08lx ECX: %08lx EDX: %08lx\n",
- regs->eax,regs->ebx,regs->ecx,regs->edx);
- printk("ESI: %08lx EDI: %08lx EBP: %08lx",
- regs->esi, regs->edi, regs->ebp);
- printk(" DS: %04x ES: %04x\n",
- 0xffff & regs->xds,0xffff & regs->xes);
-
- cr0 = read_cr0();
- cr2 = read_cr2();
- cr3 = read_cr3();
- if (current_cpu_data.x86 > 4) {
- cr4 = read_cr4();
- }
- printk("CR0: %08lx CR2: %08lx CR3: %08lx CR4: %08lx\n", cr0, cr2, cr3, cr4);
- show_trace(NULL, &regs->esp);
-}
-
-/*
- * This gets run with %ebx containing the
- * function to call, and %edx containing
- * the "args".
- */
-extern void kernel_thread_helper(void);
-__asm__(".section .text\n"
- ".align 4\n"
- "kernel_thread_helper:\n\t"
- "movl %edx,%eax\n\t"
- "pushl %edx\n\t"
- "call *%ebx\n\t"
- "pushl %eax\n\t"
- "call do_exit\n"
- ".previous");
-
-/*
- * Create a kernel thread
- */
-int kernel_thread(int (*fn)(void *), void * arg, unsigned long flags)
-{
- struct pt_regs regs;
-
- memset(&regs, 0, sizeof(regs));
-
- regs.ebx = (unsigned long) fn;
- regs.edx = (unsigned long) arg;
-
- regs.xds = __USER_DS;
- regs.xes = __USER_DS;
- regs.orig_eax = -1;
- regs.eip = (unsigned long) kernel_thread_helper;
- regs.xcs = __KERNEL_CS;
- regs.eflags = X86_EFLAGS_IF | X86_EFLAGS_SF | X86_EFLAGS_PF | 0x2;
-
- /* Ok, create the new process.. */
- return do_fork(flags | CLONE_VM | CLONE_UNTRACED, 0, &regs, 0, NULL, NULL);
-}
-EXPORT_SYMBOL(kernel_thread);
-
-/*
- * Free current thread data structures etc..
- */
-void exit_thread(void)
-{
- struct task_struct *tsk = current;
- struct thread_struct *t = &tsk->thread;
-
- /*
- * Remove function-return probe instances associated with this task
- * and put them back on the free list. Do not insert an exit probe for
- * this function, it will be disabled by kprobe_flush_task if you do.
- */
- kprobe_flush_task(tsk);
-
- /* The process may have allocated an io port bitmap... nuke it. */
- if (unlikely(NULL != t->io_bitmap_ptr)) {
- int cpu = get_cpu();
- struct tss_struct *tss = &per_cpu(init_tss, cpu);
-
- kfree(t->io_bitmap_ptr);
- t->io_bitmap_ptr = NULL;
- /*
- * Careful, clear this in the TSS too:
- */
- memset(tss->io_bitmap, 0xff, tss->io_bitmap_max);
- t->io_bitmap_max = 0;
- tss->io_bitmap_owner = NULL;
- tss->io_bitmap_max = 0;
- tss->io_bitmap_base = INVALID_IO_BITMAP_OFFSET;
- put_cpu();
- }
-}
-
-void flush_thread(void)
-{
- struct task_struct *tsk = current;
-
- /*
- * Remove function-return probe instances associated with this task
- * and put them back on the free list. Do not insert an exit probe for
- * this function, it will be disabled by kprobe_flush_task if you do.
- */
- kprobe_flush_task(tsk);
-
- memset(tsk->thread.debugreg, 0, sizeof(unsigned long)*8);
- memset(tsk->thread.tls_array, 0, sizeof(tsk->thread.tls_array));
- /*
- * Forget coprocessor state..
- */
- clear_fpu(tsk);
- clear_used_math();
-}
-
-void release_thread(struct task_struct *dead_task)
-{
- if (dead_task->mm) {
- // temporary debugging check
- if (dead_task->mm->context.size) {
- printk("WARNING: dead process %8s still has LDT? <%p/%d>\n",
- dead_task->comm,
- dead_task->mm->context.ldt,
- dead_task->mm->context.size);
- BUG();
- }
- }
-
- release_vm86_irqs(dead_task);
-}
-
-/*
- * This gets called before we allocate a new thread and copy
- * the current task into it.
- */
-void prepare_to_copy(struct task_struct *tsk)
-{
- unlazy_fpu(tsk);
-}
-
-int copy_thread(int nr, unsigned long clone_flags, unsigned long esp,
- unsigned long unused,
- struct task_struct * p, struct pt_regs * regs)
-{
- struct pt_regs * childregs;
- struct task_struct *tsk;
- int err;
-
- childregs = ((struct pt_regs *) (THREAD_SIZE + (unsigned long) p->thread_info)) - 1;
- /*
- * The below -8 is to reserve 8 bytes on top of the ring0 stack.
- * This is necessary to guarantee that the entire "struct pt_regs"
- * is accessable even if the CPU haven't stored the SS/ESP registers
- * on the stack (interrupt gate does not save these registers
- * when switching to the same priv ring).
- * Therefore beware: accessing the xss/esp fields of the
- * "struct pt_regs" is possible, but they may contain the
- * completely wrong values.
- */
- childregs = (struct pt_regs *) ((unsigned long) childregs - 8);
- *childregs = *regs;
- childregs->eax = 0;
- childregs->esp = esp;
-
- p->thread.esp = (unsigned long) childregs;
- p->thread.esp0 = (unsigned long) (childregs+1);
-
- p->thread.eip = (unsigned long) ret_from_fork;
-
- savesegment(fs,p->thread.fs);
- savesegment(gs,p->thread.gs);
-
- tsk = current;
- if (unlikely(NULL != tsk->thread.io_bitmap_ptr)) {
- p->thread.io_bitmap_ptr = kmalloc(IO_BITMAP_BYTES, GFP_KERNEL);
- if (!p->thread.io_bitmap_ptr) {
- p->thread.io_bitmap_max = 0;
- return -ENOMEM;
- }
- memcpy(p->thread.io_bitmap_ptr, tsk->thread.io_bitmap_ptr,
- IO_BITMAP_BYTES);
- }
-
- /*
- * Set a new TLS for the child thread?
- */
- if (clone_flags & CLONE_SETTLS) {
- struct desc_struct *desc;
- struct user_desc info;
- int idx;
-
- err = -EFAULT;
- if (copy_from_user(&info, (void __user *)childregs->esi, sizeof(info)))
- goto out;
- err = -EINVAL;
- if (LDT_empty(&info))
- goto out;
-
- idx = info.entry_number;
- if (idx < GDT_ENTRY_TLS_MIN || idx > GDT_ENTRY_TLS_MAX)
- goto out;
-
- desc = p->thread.tls_array + idx - GDT_ENTRY_TLS_MIN;
- desc->a = LDT_entry_a(&info);
- desc->b = LDT_entry_b(&info);
- }
-
- err = 0;
- out:
- if (err && p->thread.io_bitmap_ptr) {
- kfree(p->thread.io_bitmap_ptr);
- p->thread.io_bitmap_max = 0;
- }
- return err;
-}
-
-/*
- * fill in the user structure for a core dump..
- */
-void dump_thread(struct pt_regs * regs, struct user * dump)
-{
- int i;
-
-/* changed the size calculations - should hopefully work better. lbt */
- dump->magic = CMAGIC;
- dump->start_code = 0;
- dump->start_stack = regs->esp & ~(PAGE_SIZE - 1);
- dump->u_tsize = ((unsigned long) current->mm->end_code) >> PAGE_SHIFT;
- dump->u_dsize = ((unsigned long) (current->mm->brk + (PAGE_SIZE-1))) >> PAGE_SHIFT;
- dump->u_dsize -= dump->u_tsize;
- dump->u_ssize = 0;
- for (i = 0; i < 8; i++)
- dump->u_debugreg[i] = current->thread.debugreg[i];
-
- if (dump->start_stack < TASK_SIZE)
- dump->u_ssize = ((unsigned long) (TASK_SIZE - dump->start_stack)) >> PAGE_SHIFT;
-
- dump->regs.ebx = regs->ebx;
- dump->regs.ecx = regs->ecx;
- dump->regs.edx = regs->edx;
- dump->regs.esi = regs->esi;
- dump->regs.edi = regs->edi;
- dump->regs.ebp = regs->ebp;
- dump->regs.eax = regs->eax;
- dump->regs.ds = regs->xds;
- dump->regs.es = regs->xes;
- savesegment(fs,dump->regs.fs);
- savesegment(gs,dump->regs.gs);
- dump->regs.orig_eax = regs->orig_eax;
- dump->regs.eip = regs->eip;
- dump->regs.cs = regs->xcs;
- dump->regs.eflags = regs->eflags;
- dump->regs.esp = regs->esp;
- dump->regs.ss = regs->xss;
-
- dump->u_fpvalid = dump_fpu (regs, &dump->i387);
-}
-EXPORT_SYMBOL(dump_thread);
-
-/*
- * Capture the user space registers if the task is not running (in user space)
- */
-int dump_task_regs(struct task_struct *tsk, elf_gregset_t *regs)
-{
- struct pt_regs ptregs;
-
- ptregs = *(struct pt_regs *)
- ((unsigned long)tsk->thread_info+THREAD_SIZE - sizeof(ptregs));
- ptregs.xcs &= 0xffff;
- ptregs.xds &= 0xffff;
- ptregs.xes &= 0xffff;
- ptregs.xss &= 0xffff;
-
- elf_core_copy_regs(regs, &ptregs);
-
- return 1;
-}
-
-static inline void
-handle_io_bitmap(struct thread_struct *next, struct tss_struct *tss)
-{
- if (!next->io_bitmap_ptr) {
- /*
- * Disable the bitmap via an invalid offset. We still cache
- * the previous bitmap owner and the IO bitmap contents:
- */
- tss->io_bitmap_base = INVALID_IO_BITMAP_OFFSET;
- return;
- }
- if (likely(next == tss->io_bitmap_owner)) {
- /*
- * Previous owner of the bitmap (hence the bitmap content)
- * matches the next task, we dont have to do anything but
- * to set a valid offset in the TSS:
- */
- tss->io_bitmap_base = IO_BITMAP_OFFSET;
- return;
- }
- /*
- * Lazy TSS's I/O bitmap copy. We set an invalid offset here
- * and we let the task to get a GPF in case an I/O instruction
- * is performed. The handler of the GPF will verify that the
- * faulting task has a valid I/O bitmap and, it true, does the
- * real copy and restart the instruction. This will save us
- * redundant copies when the currently switched task does not
- * perform any I/O during its timeslice.
- */
- tss->io_bitmap_base = INVALID_IO_BITMAP_OFFSET_LAZY;
-}
-
-/*
- * This function selects if the context switch from prev to next
- * has to tweak the TSC disable bit in the cr4.
- */
-static inline void disable_tsc(struct task_struct *prev_p,
- struct task_struct *next_p)
-{
- struct thread_info *prev, *next;
-
- /*
- * gcc should eliminate the ->thread_info dereference if
- * has_secure_computing returns 0 at compile time (SECCOMP=n).
- */
- prev = prev_p->thread_info;
- next = next_p->thread_info;
-
- if (has_secure_computing(prev) || has_secure_computing(next)) {
- /* slow path here */
- if (has_secure_computing(prev) &&
- !has_secure_computing(next)) {
- write_cr4(read_cr4() & ~X86_CR4_TSD);
- } else if (!has_secure_computing(prev) &&
- has_secure_computing(next))
- write_cr4(read_cr4() | X86_CR4_TSD);
- }
-}
-
-/*
- * switch_to(x,yn) should switch tasks from x to y.
- *
- * We fsave/fwait so that an exception goes off at the right time
- * (as a call from the fsave or fwait in effect) rather than to
- * the wrong process. Lazy FP saving no longer makes any sense
- * with modern CPU's, and this simplifies a lot of things (SMP
- * and UP become the same).
- *
- * NOTE! We used to use the x86 hardware context switching. The
- * reason for not using it any more becomes apparent when you
- * try to recover gracefully from saved state that is no longer
- * valid (stale segment register values in particular). With the
- * hardware task-switch, there is no way to fix up bad state in
- * a reasonable manner.
- *
- * The fact that Intel documents the hardware task-switching to
- * be slow is a fairly red herring - this code is not noticeably
- * faster. However, there _is_ some room for improvement here,
- * so the performance issues may eventually be a valid point.
- * More important, however, is the fact that this allows us much
- * more flexibility.
- *
- * The return value (in %eax) will be the "prev" task after
- * the task-switch, and shows up in ret_from_fork in entry.S,
- * for example.
- */
-struct task_struct fastcall * __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
-{
- struct thread_struct *prev = &prev_p->thread,
- *next = &next_p->thread;
- int cpu = smp_processor_id();
- struct tss_struct *tss = &per_cpu(init_tss, cpu);
-
- /* never put a printk in __switch_to... printk() calls wake_up*() indirectly */
-
- __unlazy_fpu(prev_p);
-
- /*
- * Reload esp0.
- */
- load_esp0(tss, next);
-
- /*
- * Save away %fs and %gs. No need to save %es and %ds, as
- * those are always kernel segments while inside the kernel.
- * Doing this before setting the new TLS descriptors avoids
- * the situation where we temporarily have non-reloadable
- * segments in %fs and %gs. This could be an issue if the
- * NMI handler ever used %fs or %gs (it does not today), or
- * if the kernel is running inside of a hypervisor layer.
- */
- savesegment(fs, prev->fs);
- savesegment(gs, prev->gs);
-
- /*
- * Load the per-thread Thread-Local Storage descriptor.
- */
- load_TLS(next, cpu);
-
- /*
- * Restore %fs and %gs if needed.
- *
- * Glibc normally makes %fs be zero, and %gs is one of
- * the TLS segments.
- */
- if (unlikely(prev->fs | next->fs))
- loadsegment(fs, next->fs);
-
- if (prev->gs | next->gs)
- loadsegment(gs, next->gs);
-
- /*
- * Restore IOPL if needed.
- */
- if (unlikely(prev->iopl != next->iopl))
- set_iopl_mask(next->iopl);
-
- /*
- * Now maybe reload the debug registers
- */
- if (unlikely(next->debugreg[7])) {
- set_debugreg(next->debugreg[0], 0);
- set_debugreg(next->debugreg[1], 1);
- set_debugreg(next->debugreg[2], 2);
- set_debugreg(next->debugreg[3], 3);
- /* no 4 and 5 */
- set_debugreg(next->debugreg[6], 6);
- set_debugreg(next->debugreg[7], 7);
- }
-
- if (unlikely(prev->io_bitmap_ptr || next->io_bitmap_ptr))
- handle_io_bitmap(next, tss);
-
- disable_tsc(prev_p, next_p);
-
- return prev_p;
-}
-
-asmlinkage int sys_fork(struct pt_regs regs)
-{
- return do_fork(SIGCHLD, regs.esp, &regs, 0, NULL, NULL);
-}
-
-asmlinkage int sys_clone(struct pt_regs regs)
-{
- unsigned long clone_flags;
- unsigned long newsp;
- int __user *parent_tidptr, *child_tidptr;
-
- clone_flags = regs.ebx;
- newsp = regs.ecx;
- parent_tidptr = (int __user *)regs.edx;
- child_tidptr = (int __user *)regs.edi;
- if (!newsp)
- newsp = regs.esp;
- return do_fork(clone_flags, newsp, &regs, 0, parent_tidptr, child_tidptr);
-}
-
-/*
- * This is trivial, and on the face of it looks like it
- * could equally well be done in user mode.
- *
- * Not so, for quite unobvious reasons - register pressure.
- * In user mode vfork() cannot have a stack frame, and if
- * done by calling the "clone()" system call directly, you
- * do not have enough call-clobbered registers to hold all
- * the information you need.
- */
-asmlinkage int sys_vfork(struct pt_regs regs)
-{
- return do_fork(CLONE_VFORK | CLONE_VM | SIGCHLD, regs.esp, &regs, 0, NULL, NULL);
-}
-
-/*
- * sys_execve() executes a new program.
- */
-asmlinkage int sys_execve(struct pt_regs regs)
-{
- int error;
- char * filename;
-
- filename = getname((char __user *) regs.ebx);
- error = PTR_ERR(filename);
- if (IS_ERR(filename))
- goto out;
- error = do_execve(filename,
- (char __user * __user *) regs.ecx,
- (char __user * __user *) regs.edx,
- &regs);
- if (error == 0) {
- task_lock(current);
- current->ptrace &= ~PT_DTRACE;
- task_unlock(current);
- /* Make sure we don't return using sysenter.. */
- set_thread_flag(TIF_IRET);
- }
- putname(filename);
-out:
- return error;
-}
-
-#define top_esp (THREAD_SIZE - sizeof(unsigned long))
-#define top_ebp (THREAD_SIZE - 2*sizeof(unsigned long))
-
-unsigned long get_wchan(struct task_struct *p)
-{
- unsigned long ebp, esp, eip;
- unsigned long stack_page;
- int count = 0;
- if (!p || p == current || p->state == TASK_RUNNING)
- return 0;
- stack_page = (unsigned long)p->thread_info;
- esp = p->thread.esp;
- if (!stack_page || esp < stack_page || esp > top_esp+stack_page)
- return 0;
- /* include/asm-i386/system.h:switch_to() pushes ebp last. */
- ebp = *(unsigned long *) esp;
- do {
- if (ebp < stack_page || ebp > top_ebp+stack_page)
- return 0;
- eip = *(unsigned long *) (ebp+4);
- if (!in_sched_functions(eip))
- return eip;
- ebp = *(unsigned long *) ebp;
- } while (count++ < 16);
- return 0;
-}
-EXPORT_SYMBOL(get_wchan);
-
-/*
- * sys_alloc_thread_area: get a yet unused TLS descriptor index.
- */
-static int get_free_idx(void)
-{
- struct thread_struct *t = &current->thread;
- int idx;
-
- for (idx = 0; idx < GDT_ENTRY_TLS_ENTRIES; idx++)
- if (desc_empty(t->tls_array + idx))
- return idx + GDT_ENTRY_TLS_MIN;
- return -ESRCH;
-}
-
-/*
- * Set a given TLS descriptor:
- */
-asmlinkage int sys_set_thread_area(struct user_desc __user *u_info)
-{
- struct thread_struct *t = &current->thread;
- struct user_desc info;
- struct desc_struct *desc;
- int cpu, idx;
-
- if (copy_from_user(&info, u_info, sizeof(info)))
- return -EFAULT;
- idx = info.entry_number;
-
- /*
- * index -1 means the kernel should try to find and
- * allocate an empty descriptor:
- */
- if (idx == -1) {
- idx = get_free_idx();
- if (idx < 0)
- return idx;
- if (put_user(idx, &u_info->entry_number))
- return -EFAULT;
- }
-
- if (idx < GDT_ENTRY_TLS_MIN || idx > GDT_ENTRY_TLS_MAX)
- return -EINVAL;
-
- desc = t->tls_array + idx - GDT_ENTRY_TLS_MIN;
-
- /*
- * We must not get preempted while modifying the TLS.
- */
- cpu = get_cpu();
-
- if (LDT_empty(&info)) {
- desc->a = 0;
- desc->b = 0;
- } else {
- desc->a = LDT_entry_a(&info);
- desc->b = LDT_entry_b(&info);
- }
- load_TLS(t, cpu);
-
- put_cpu();
-
- return 0;
-}
-
-/*
- * Get the current Thread-Local Storage area:
- */
-
-#define GET_BASE(desc) ( \
- (((desc)->a >> 16) & 0x0000ffff) | \
- (((desc)->b << 16) & 0x00ff0000) | \
- ( (desc)->b & 0xff000000) )
-
-#define GET_LIMIT(desc) ( \
- ((desc)->a & 0x0ffff) | \
- ((desc)->b & 0xf0000) )
-
-#define GET_32BIT(desc) (((desc)->b >> 22) & 1)
-#define GET_CONTENTS(desc) (((desc)->b >> 10) & 3)
-#define GET_WRITABLE(desc) (((desc)->b >> 9) & 1)
-#define GET_LIMIT_PAGES(desc) (((desc)->b >> 23) & 1)
-#define GET_PRESENT(desc) (((desc)->b >> 15) & 1)
-#define GET_USEABLE(desc) (((desc)->b >> 20) & 1)
-
-asmlinkage int sys_get_thread_area(struct user_desc __user *u_info)
-{
- struct user_desc info;
- struct desc_struct *desc;
- int idx;
-
- if (get_user(idx, &u_info->entry_number))
- return -EFAULT;
- if (idx < GDT_ENTRY_TLS_MIN || idx > GDT_ENTRY_TLS_MAX)
- return -EINVAL;
-
- memset(&info, 0, sizeof(info));
-
- desc = current->thread.tls_array + idx - GDT_ENTRY_TLS_MIN;
-
- info.entry_number = idx;
- info.base_addr = GET_BASE(desc);
- info.limit = GET_LIMIT(desc);
- info.seg_32bit = GET_32BIT(desc);
- info.contents = GET_CONTENTS(desc);
- info.read_exec_only = !GET_WRITABLE(desc);
- info.limit_in_pages = GET_LIMIT_PAGES(desc);
- info.seg_not_present = !GET_PRESENT(desc);
- info.useable = GET_USEABLE(desc);
-
- if (copy_to_user(u_info, &info, sizeof(info)))
- return -EFAULT;
- return 0;
-}
-
-unsigned long arch_align_stack(unsigned long sp)
-{
- if (randomize_va_space)
- sp -= get_random_int() % 8192;
- return sp & ~0xf;
-}
diff --git a/arch/i386/kernel/ptrace.c b/arch/i386/kernel/ptrace.c
deleted file mode 100644
index 5ffbb4b7ad0..00000000000
--- a/arch/i386/kernel/ptrace.c
+++ /dev/null
@@ -1,730 +0,0 @@
-/* ptrace.c */
-/* By Ross Biro 1/23/92 */
-/*
- * Pentium III FXSR, SSE support
- * Gareth Hughes <gareth@valinux.com>, May 2000
- */
-
-#include <linux/kernel.h>
-#include <linux/sched.h>
-#include <linux/mm.h>
-#include <linux/smp.h>
-#include <linux/smp_lock.h>
-#include <linux/errno.h>
-#include <linux/ptrace.h>
-#include <linux/user.h>
-#include <linux/security.h>
-#include <linux/audit.h>
-#include <linux/seccomp.h>
-#include <linux/signal.h>
-
-#include <asm/uaccess.h>
-#include <asm/pgtable.h>
-#include <asm/system.h>
-#include <asm/processor.h>
-#include <asm/i387.h>
-#include <asm/debugreg.h>
-#include <asm/ldt.h>
-#include <asm/desc.h>
-
-/*
- * does not yet catch signals sent when the child dies.
- * in exit.c or in signal.c.
- */
-
-/* determines which flags the user has access to. */
-/* 1 = access 0 = no access */
-#define FLAG_MASK 0x00044dd5
-
-/* set's the trap flag. */
-#define TRAP_FLAG 0x100
-
-/*
- * Offset of eflags on child stack..
- */
-#define EFL_OFFSET ((EFL-2)*4-sizeof(struct pt_regs))
-
-static inline struct pt_regs *get_child_regs(struct task_struct *task)
-{
- void *stack_top = (void *)task->thread.esp0;
- return stack_top - sizeof(struct pt_regs);
-}
-
-/*
- * this routine will get a word off of the processes privileged stack.
- * the offset is how far from the base addr as stored in the TSS.
- * this routine assumes that all the privileged stacks are in our
- * data space.
- */
-static inline int get_stack_long(struct task_struct *task, int offset)
-{
- unsigned char *stack;
-
- stack = (unsigned char *)task->thread.esp0;
- stack += offset;
- return (*((int *)stack));
-}
-
-/*
- * this routine will put a word on the processes privileged stack.
- * the offset is how far from the base addr as stored in the TSS.
- * this routine assumes that all the privileged stacks are in our
- * data space.
- */
-static inline int put_stack_long(struct task_struct *task, int offset,
- unsigned long data)
-{
- unsigned char * stack;
-
- stack = (unsigned char *) task->thread.esp0;
- stack += offset;
- *(unsigned long *) stack = data;
- return 0;
-}
-
-static int putreg(struct task_struct *child,
- unsigned long regno, unsigned long value)
-{
- switch (regno >> 2) {
- case FS:
- if (value && (value & 3) != 3)
- return -EIO;
- child->thread.fs = value;
- return 0;
- case GS:
- if (value && (value & 3) != 3)
- return -EIO;
- child->thread.gs = value;
- return 0;
- case DS:
- case ES:
- if (value && (value & 3) != 3)
- return -EIO;
- value &= 0xffff;
- break;
- case SS:
- case CS:
- if ((value & 3) != 3)
- return -EIO;
- value &= 0xffff;
- break;
- case EFL:
- value &= FLAG_MASK;
- value |= get_stack_long(child, EFL_OFFSET) & ~FLAG_MASK;
- break;
- }
- if (regno > GS*4)
- regno -= 2*4;
- put_stack_long(child, regno - sizeof(struct pt_regs), value);
- return 0;
-}
-
-static unsigned long getreg(struct task_struct *child,
- unsigned long regno)
-{
- unsigned long retval = ~0UL;
-
- switch (regno >> 2) {
- case FS:
- retval = child->thread.fs;
- break;
- case GS:
- retval = child->thread.gs;
- break;
- case DS:
- case ES:
- case SS:
- case CS:
- retval = 0xffff;
- /* fall through */
- default:
- if (regno > GS*4)
- regno -= 2*4;
- regno = regno - sizeof(struct pt_regs);
- retval &= get_stack_long(child, regno);
- }
- return retval;
-}
-
-#define LDT_SEGMENT 4
-
-static unsigned long convert_eip_to_linear(struct task_struct *child, struct pt_regs *regs)
-{
- unsigned long addr, seg;
-
- addr = regs->eip;
- seg = regs->xcs & 0xffff;
- if (regs->eflags & VM_MASK) {
- addr = (addr & 0xffff) + (seg << 4);
- return addr;
- }
-
- /*
- * We'll assume that the code segments in the GDT
- * are all zero-based. That is largely true: the
- * TLS segments are used for data, and the PNPBIOS
- * and APM bios ones we just ignore here.
- */
- if (seg & LDT_SEGMENT) {
- u32 *desc;
- unsigned long base;
-
- down(&child->mm->context.sem);
- desc = child->mm->context.ldt + (seg & ~7);
- base = (desc[0] >> 16) | ((desc[1] & 0xff) << 16) | (desc[1] & 0xff000000);
-
- /* 16-bit code segment? */
- if (!((desc[1] >> 22) & 1))
- addr &= 0xffff;
- addr += base;
- up(&child->mm->context.sem);
- }
- return addr;
-}
-
-static inline int is_at_popf(struct task_struct *child, struct pt_regs *regs)
-{
- int i, copied;
- unsigned char opcode[16];
- unsigned long addr = convert_eip_to_linear(child, regs);
-
- copied = access_process_vm(child, addr, opcode, sizeof(opcode), 0);
- for (i = 0; i < copied; i++) {
- switch (opcode[i]) {
- /* popf */
- case 0x9d:
- return 1;
- /* opcode and address size prefixes */
- case 0x66: case 0x67:
- continue;
- /* irrelevant prefixes (segment overrides and repeats) */
- case 0x26: case 0x2e:
- case 0x36: case 0x3e:
- case 0x64: case 0x65:
- case 0xf0: case 0xf2: case 0xf3:
- continue;
-
- /*
- * pushf: NOTE! We should probably not let
- * the user see the TF bit being set. But
- * it's more pain than it's worth to avoid
- * it, and a debugger could emulate this
- * all in user space if it _really_ cares.
- */
- case 0x9c:
- default:
- return 0;
- }
- }
- return 0;
-}
-
-static void set_singlestep(struct task_struct *child)
-{
- struct pt_regs *regs = get_child_regs(child);
-
- /*
- * Always set TIF_SINGLESTEP - this guarantees that
- * we single-step system calls etc.. This will also
- * cause us to set TF when returning to user mode.
- */
- set_tsk_thread_flag(child, TIF_SINGLESTEP);
-
- /*
- * If TF was already set, don't do anything else
- */
- if (regs->eflags & TRAP_FLAG)
- return;
-
- /* Set TF on the kernel stack.. */
- regs->eflags |= TRAP_FLAG;
-
- /*
- * ..but if TF is changed by the instruction we will trace,
- * don't mark it as being "us" that set it, so that we
- * won't clear it by hand later.
- */
- if (is_at_popf(child, regs))
- return;
-
- child->ptrace |= PT_DTRACE;
-}
-
-static void clear_singlestep(struct task_struct *child)
-{
- /* Always clear TIF_SINGLESTEP... */
- clear_tsk_thread_flag(child, TIF_SINGLESTEP);
-
- /* But touch TF only if it was set by us.. */
- if (child->ptrace & PT_DTRACE) {
- struct pt_regs *regs = get_child_regs(child);
- regs->eflags &= ~TRAP_FLAG;
- child->ptrace &= ~PT_DTRACE;
- }
-}
-
-/*
- * Called by kernel/ptrace.c when detaching..
- *
- * Make sure the single step bit is not set.
- */
-void ptrace_disable(struct task_struct *child)
-{
- clear_singlestep(child);
- clear_tsk_thread_flag(child, TIF_SYSCALL_TRACE);
- clear_tsk_thread_flag(child, TIF_SYSCALL_EMU);
-}
-
-/*
- * Perform get_thread_area on behalf of the traced child.
- */
-static int
-ptrace_get_thread_area(struct task_struct *child,
- int idx, struct user_desc __user *user_desc)
-{
- struct user_desc info;
- struct desc_struct *desc;
-
-/*
- * Get the current Thread-Local Storage area:
- */
-
-#define GET_BASE(desc) ( \
- (((desc)->a >> 16) & 0x0000ffff) | \
- (((desc)->b << 16) & 0x00ff0000) | \
- ( (desc)->b & 0xff000000) )
-
-#define GET_LIMIT(desc) ( \
- ((desc)->a & 0x0ffff) | \
- ((desc)->b & 0xf0000) )
-
-#define GET_32BIT(desc) (((desc)->b >> 22) & 1)
-#define GET_CONTENTS(desc) (((desc)->b >> 10) & 3)
-#define GET_WRITABLE(desc) (((desc)->b >> 9) & 1)
-#define GET_LIMIT_PAGES(desc) (((desc)->b >> 23) & 1)
-#define GET_PRESENT(desc) (((desc)->b >> 15) & 1)
-#define GET_USEABLE(desc) (((desc)->b >> 20) & 1)
-
- if (idx < GDT_ENTRY_TLS_MIN || idx > GDT_ENTRY_TLS_MAX)
- return -EINVAL;
-
- desc = child->thread.tls_array + idx - GDT_ENTRY_TLS_MIN;
-
- info.entry_number = idx;
- info.base_addr = GET_BASE(desc);
- info.limit = GET_LIMIT(desc);
- info.seg_32bit = GET_32BIT(desc);
- info.contents = GET_CONTENTS(desc);
- info.read_exec_only = !GET_WRITABLE(desc);
- info.limit_in_pages = GET_LIMIT_PAGES(desc);
- info.seg_not_present = !GET_PRESENT(desc);
- info.useable = GET_USEABLE(desc);
-
- if (copy_to_user(user_desc, &info, sizeof(info)))
- return -EFAULT;
-
- return 0;
-}
-
-/*
- * Perform set_thread_area on behalf of the traced child.
- */
-static int
-ptrace_set_thread_area(struct task_struct *child,
- int idx, struct user_desc __user *user_desc)
-{
- struct user_desc info;
- struct desc_struct *desc;
-
- if (copy_from_user(&info, user_desc, sizeof(info)))
- return -EFAULT;
-
- if (idx < GDT_ENTRY_TLS_MIN || idx > GDT_ENTRY_TLS_MAX)
- return -EINVAL;
-
- desc = child->thread.tls_array + idx - GDT_ENTRY_TLS_MIN;
- if (LDT_empty(&info)) {
- desc->a = 0;
- desc->b = 0;
- } else {
- desc->a = LDT_entry_a(&info);
- desc->b = LDT_entry_b(&info);
- }
-
- return 0;
-}
-
-long arch_ptrace(struct task_struct *child, long request, long addr, long data)
-{
- struct user * dummy = NULL;
- int i, ret;
- unsigned long __user *datap = (unsigned long __user *)data;
-
- switch (request) {
- /* when I and D space are separate, these will need to be fixed. */
- case PTRACE_PEEKTEXT: /* read word at location addr. */
- case PTRACE_PEEKDATA: {
- unsigned long tmp;
- int copied;
-
- copied = access_process_vm(child, addr, &tmp, sizeof(tmp), 0);
- ret = -EIO;
- if (copied != sizeof(tmp))
- break;
- ret = put_user(tmp, datap);
- break;
- }
-
- /* read the word at location addr in the USER area. */
- case PTRACE_PEEKUSR: {
- unsigned long tmp;
-
- ret = -EIO;
- if ((addr & 3) || addr < 0 ||
- addr > sizeof(struct user) - 3)
- break;
-
- tmp = 0; /* Default return condition */
- if(addr < FRAME_SIZE*sizeof(long))
- tmp = getreg(child, addr);
- if(addr >= (long) &dummy->u_debugreg[0] &&
- addr <= (long) &dummy->u_debugreg[7]){
- addr -= (long) &dummy->u_debugreg[0];
- addr = addr >> 2;
- tmp = child->thread.debugreg[addr];
- }
- ret = put_user(tmp, datap);
- break;
- }
-
- /* when I and D space are separate, this will have to be fixed. */
- case PTRACE_POKETEXT: /* write the word at location addr. */
- case PTRACE_POKEDATA:
- ret = 0;
- if (access_process_vm(child, addr, &data, sizeof(data), 1) == sizeof(data))
- break;
- ret = -EIO;
- break;
-
- case PTRACE_POKEUSR: /* write the word at location addr in the USER area */
- ret = -EIO;
- if ((addr & 3) || addr < 0 ||
- addr > sizeof(struct user) - 3)
- break;
-
- if (addr < FRAME_SIZE*sizeof(long)) {
- ret = putreg(child, addr, data);
- break;
- }
- /* We need to be very careful here. We implicitly
- want to modify a portion of the task_struct, and we
- have to be selective about what portions we allow someone
- to modify. */
-
- ret = -EIO;
- if(addr >= (long) &dummy->u_debugreg[0] &&
- addr <= (long) &dummy->u_debugreg[7]){
-
- if(addr == (long) &dummy->u_debugreg[4]) break;
- if(addr == (long) &dummy->u_debugreg[5]) break;
- if(addr < (long) &dummy->u_debugreg[4] &&
- ((unsigned long) data) >= TASK_SIZE-3) break;
-
- /* Sanity-check data. Take one half-byte at once with
- * check = (val >> (16 + 4*i)) & 0xf. It contains the
- * R/Wi and LENi bits; bits 0 and 1 are R/Wi, and bits
- * 2 and 3 are LENi. Given a list of invalid values,
- * we do mask |= 1 << invalid_value, so that
- * (mask >> check) & 1 is a correct test for invalid
- * values.
- *
- * R/Wi contains the type of the breakpoint /
- * watchpoint, LENi contains the length of the watched
- * data in the watchpoint case.
- *
- * The invalid values are:
- * - LENi == 0x10 (undefined), so mask |= 0x0f00.
- * - R/Wi == 0x10 (break on I/O reads or writes), so
- * mask |= 0x4444.
- * - R/Wi == 0x00 && LENi != 0x00, so we have mask |=
- * 0x1110.
- *
- * Finally, mask = 0x0f00 | 0x4444 | 0x1110 == 0x5f54.
- *
- * See the Intel Manual "System Programming Guide",
- * 15.2.4
- *
- * Note that LENi == 0x10 is defined on x86_64 in long
- * mode (i.e. even for 32-bit userspace software, but
- * 64-bit kernel), so the x86_64 mask value is 0x5454.
- * See the AMD manual no. 24593 (AMD64 System
- * Programming)*/
-
- if(addr == (long) &dummy->u_debugreg[7]) {
- data &= ~DR_CONTROL_RESERVED;
- for(i=0; i<4; i++)
- if ((0x5f54 >> ((data >> (16 + 4*i)) & 0xf)) & 1)
- goto out_tsk;
- }
-
- addr -= (long) &dummy->u_debugreg;
- addr = addr >> 2;
- child->thread.debugreg[addr] = data;
- ret = 0;
- }
- break;
-
- case PTRACE_SYSEMU: /* continue and stop at next syscall, which will not be executed */
- case PTRACE_SYSCALL: /* continue and stop at next (return from) syscall */
- case PTRACE_CONT: /* restart after signal. */
- ret = -EIO;
- if (!valid_signal(data))
- break;
- if (request == PTRACE_SYSEMU) {
- set_tsk_thread_flag(child, TIF_SYSCALL_EMU);
- clear_tsk_thread_flag(child, TIF_SYSCALL_TRACE);
- } else if (request == PTRACE_SYSCALL) {
- set_tsk_thread_flag(child, TIF_SYSCALL_TRACE);
- clear_tsk_thread_flag(child, TIF_SYSCALL_EMU);
- } else {
- clear_tsk_thread_flag(child, TIF_SYSCALL_EMU);
- clear_tsk_thread_flag(child, TIF_SYSCALL_TRACE);
- }
- child->exit_code = data;
- /* make sure the single step bit is not set. */
- clear_singlestep(child);
- wake_up_process(child);
- ret = 0;
- break;
-
-/*
- * make the child exit. Best I can do is send it a sigkill.
- * perhaps it should be put in the status that it wants to
- * exit.
- */
- case PTRACE_KILL:
- ret = 0;
- if (child->exit_state == EXIT_ZOMBIE) /* already dead */
- break;
- child->exit_code = SIGKILL;
- /* make sure the single step bit is not set. */
- clear_singlestep(child);
- wake_up_process(child);
- break;
-
- case PTRACE_SYSEMU_SINGLESTEP: /* Same as SYSEMU, but singlestep if not syscall */
- case PTRACE_SINGLESTEP: /* set the trap flag. */
- ret = -EIO;
- if (!valid_signal(data))
- break;
-
- if (request == PTRACE_SYSEMU_SINGLESTEP)
- set_tsk_thread_flag(child, TIF_SYSCALL_EMU);
- else
- clear_tsk_thread_flag(child, TIF_SYSCALL_EMU);
-
- clear_tsk_thread_flag(child, TIF_SYSCALL_TRACE);
- set_singlestep(child);
- child->exit_code = data;
- /* give it a chance to run. */
- wake_up_process(child);
- ret = 0;
- break;
-
- case PTRACE_DETACH:
- /* detach a process that was attached. */
- ret = ptrace_detach(child, data);
- break;
-
- case PTRACE_GETREGS: { /* Get all gp regs from the child. */
- if (!access_ok(VERIFY_WRITE, datap, FRAME_SIZE*sizeof(long))) {
- ret = -EIO;
- break;
- }
- for ( i = 0; i < FRAME_SIZE*sizeof(long); i += sizeof(long) ) {
- __put_user(getreg(child, i), datap);
- datap++;
- }
- ret = 0;
- break;
- }
-
- case PTRACE_SETREGS: { /* Set all gp regs in the child. */
- unsigned long tmp;
- if (!access_ok(VERIFY_READ, datap, FRAME_SIZE*sizeof(long))) {
- ret = -EIO;
- break;
- }
- for ( i = 0; i < FRAME_SIZE*sizeof(long); i += sizeof(long) ) {
- __get_user(tmp, datap);
- putreg(child, i, tmp);
- datap++;
- }
- ret = 0;
- break;
- }
-
- case PTRACE_GETFPREGS: { /* Get the child FPU state. */
- if (!access_ok(VERIFY_WRITE, datap,
- sizeof(struct user_i387_struct))) {
- ret = -EIO;
- break;
- }
- ret = 0;
- if (!tsk_used_math(child))
- init_fpu(child);
- get_fpregs((struct user_i387_struct __user *)data, child);
- break;
- }
-
- case PTRACE_SETFPREGS: { /* Set the child FPU state. */
- if (!access_ok(VERIFY_READ, datap,
- sizeof(struct user_i387_struct))) {
- ret = -EIO;
- break;
- }
- set_stopped_child_used_math(child);
- set_fpregs(child, (struct user_i387_struct __user *)data);
- ret = 0;
- break;
- }
-
- case PTRACE_GETFPXREGS: { /* Get the child extended FPU state. */
- if (!access_ok(VERIFY_WRITE, datap,
- sizeof(struct user_fxsr_struct))) {
- ret = -EIO;
- break;
- }
- if (!tsk_used_math(child))
- init_fpu(child);
- ret = get_fpxregs((struct user_fxsr_struct __user *)data, child);
- break;
- }
-
- case PTRACE_SETFPXREGS: { /* Set the child extended FPU state. */
- if (!access_ok(VERIFY_READ, datap,
- sizeof(struct user_fxsr_struct))) {
- ret = -EIO;
- break;
- }
- set_stopped_child_used_math(child);
- ret = set_fpxregs(child, (struct user_fxsr_struct __user *)data);
- break;
- }
-
- case PTRACE_GET_THREAD_AREA:
- ret = ptrace_get_thread_area(child, addr,
- (struct user_desc __user *) data);
- break;
-
- case PTRACE_SET_THREAD_AREA:
- ret = ptrace_set_thread_area(child, addr,
- (struct user_desc __user *) data);
- break;
-
- default:
- ret = ptrace_request(child, request, addr, data);
- break;
- }
- out_tsk:
- return ret;
-}
-
-void send_sigtrap(struct task_struct *tsk, struct pt_regs *regs, int error_code)
-{
- struct siginfo info;
-
- tsk->thread.trap_no = 1;
- tsk->thread.error_code = error_code;
-
- memset(&info, 0, sizeof(info));
- info.si_signo = SIGTRAP;
- info.si_code = TRAP_BRKPT;
-
- /* User-mode eip? */
- info.si_addr = user_mode_vm(regs) ? (void __user *) regs->eip : NULL;
-
- /* Send us the fakey SIGTRAP */
- force_sig_info(SIGTRAP, &info, tsk);
-}
-
-/* notification of system call entry/exit
- * - triggered by current->work.syscall_trace
- */
-__attribute__((regparm(3)))
-int do_syscall_trace(struct pt_regs *regs, int entryexit)
-{
- int is_sysemu = test_thread_flag(TIF_SYSCALL_EMU);
- /*
- * With TIF_SYSCALL_EMU set we want to ignore TIF_SINGLESTEP for syscall
- * interception
- */
- int is_singlestep = !is_sysemu && test_thread_flag(TIF_SINGLESTEP);
- int ret = 0;
-
- /* do the secure computing check first */
- if (!entryexit)
- secure_computing(regs->orig_eax);
-
- if (unlikely(current->audit_context)) {
- if (entryexit)
- audit_syscall_exit(current, AUDITSC_RESULT(regs->eax),
- regs->eax);
- /* Debug traps, when using PTRACE_SINGLESTEP, must be sent only
- * on the syscall exit path. Normally, when TIF_SYSCALL_AUDIT is
- * not used, entry.S will call us only on syscall exit, not
- * entry; so when TIF_SYSCALL_AUDIT is used we must avoid
- * calling send_sigtrap() on syscall entry.
- *
- * Note that when PTRACE_SYSEMU_SINGLESTEP is used,
- * is_singlestep is false, despite his name, so we will still do
- * the correct thing.
- */
- else if (is_singlestep)
- goto out;
- }
-
- if (!(current->ptrace & PT_PTRACED))
- goto out;
-
- /* If a process stops on the 1st tracepoint with SYSCALL_TRACE
- * and then is resumed with SYSEMU_SINGLESTEP, it will come in
- * here. We have to check this and return */
- if (is_sysemu && entryexit)
- return 0;
-
- /* Fake a debug trap */
- if (is_singlestep)
- send_sigtrap(current, regs, 0);
-
- if (!test_thread_flag(TIF_SYSCALL_TRACE) && !is_sysemu)
- goto out;
-
- /* the 0x80 provides a way for the tracing parent to distinguish
- between a syscall stop and SIGTRAP delivery */
- /* Note that the debugger could change the result of test_thread_flag!*/
- ptrace_notify(SIGTRAP | ((current->ptrace & PT_TRACESYSGOOD) ? 0x80:0));
-
- /*
- * this isn't the same as continuing with a signal, but it will do
- * for normal use. strace only continues with a signal if the
- * stopping signal is not SIGTRAP. -brl
- */
- if (current->exit_code) {
- send_sig(current->exit_code, current, 1);
- current->exit_code = 0;
- }
- ret = is_sysemu;
-out:
- if (unlikely(current->audit_context) && !entryexit)
- audit_syscall_entry(current, AUDIT_ARCH_I386, regs->orig_eax,
- regs->ebx, regs->ecx, regs->edx, regs->esi);
- if (ret == 0)
- return 0;
-
- regs->orig_eax = -1; /* force skip of syscall restarting */
- if (unlikely(current->audit_context))
- audit_syscall_exit(current, AUDITSC_RESULT(regs->eax),
- regs->eax);
- return 1;
-}
diff --git a/arch/i386/kernel/quirks.c b/arch/i386/kernel/quirks.c
deleted file mode 100644
index aaf89cb2bc5..00000000000
--- a/arch/i386/kernel/quirks.c
+++ /dev/null
@@ -1,52 +0,0 @@
-/*
- * This file contains work-arounds for x86 and x86_64 platform bugs.
- */
-#include <linux/config.h>
-#include <linux/pci.h>
-#include <linux/irq.h>
-
-#if defined(CONFIG_X86_IO_APIC) && defined(CONFIG_SMP) && defined(CONFIG_PCI)
-
-static void __devinit quirk_intel_irqbalance(struct pci_dev *dev)
-{
- u8 config, rev;
- u32 word;
-
- /* BIOS may enable hardware IRQ balancing for
- * E7520/E7320/E7525(revision ID 0x9 and below)
- * based platforms.
- * Disable SW irqbalance/affinity on those platforms.
- */
- pci_read_config_byte(dev, PCI_CLASS_REVISION, &rev);
- if (rev > 0x9)
- return;
-
- printk(KERN_INFO "Intel E7520/7320/7525 detected.");
-
- /* enable access to config space*/
- pci_read_config_byte(dev, 0xf4, &config);
- config |= 0x2;
- pci_write_config_byte(dev, 0xf4, config);
-
- /* read xTPR register */
- raw_pci_ops->read(0, 0, 0x40, 0x4c, 2, &word);
-
- if (!(word & (1 << 13))) {
- printk(KERN_INFO "Disabling irq balancing and affinity\n");
-#ifdef CONFIG_IRQBALANCE
- irqbalance_disable("");
-#endif
- noirqdebug_setup("");
-#ifdef CONFIG_PROC_FS
- no_irq_affinity = 1;
-#endif
- }
-
- config &= ~0x2;
- /* disable access to config space*/
- pci_write_config_byte(dev, 0xf4, config);
-}
-DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_E7320_MCH, quirk_intel_irqbalance);
-DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_E7525_MCH, quirk_intel_irqbalance);
-DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_E7520_MCH, quirk_intel_irqbalance);
-#endif
diff --git a/arch/i386/kernel/reboot.c b/arch/i386/kernel/reboot.c
deleted file mode 100644
index 350ea6680f6..00000000000
--- a/arch/i386/kernel/reboot.c
+++ /dev/null
@@ -1,356 +0,0 @@
-/*
- * linux/arch/i386/kernel/reboot.c
- */
-
-#include <linux/config.h>
-#include <linux/mm.h>
-#include <linux/module.h>
-#include <linux/delay.h>
-#include <linux/init.h>
-#include <linux/interrupt.h>
-#include <linux/mc146818rtc.h>
-#include <linux/efi.h>
-#include <linux/dmi.h>
-#include <linux/ctype.h>
-#include <asm/uaccess.h>
-#include <asm/apic.h>
-#include <asm/desc.h>
-#include "mach_reboot.h"
-#include <linux/reboot_fixups.h>
-
-/*
- * Power off function, if any
- */
-void (*pm_power_off)(void);
-EXPORT_SYMBOL(pm_power_off);
-
-static int reboot_mode;
-static int reboot_thru_bios;
-
-#ifdef CONFIG_SMP
-static int reboot_cpu = -1;
-#endif
-static int __init reboot_setup(char *str)
-{
- while(1) {
- switch (*str) {
- case 'w': /* "warm" reboot (no memory testing etc) */
- reboot_mode = 0x1234;
- break;
- case 'c': /* "cold" reboot (with memory testing etc) */
- reboot_mode = 0x0;
- break;
- case 'b': /* "bios" reboot by jumping through the BIOS */
- reboot_thru_bios = 1;
- break;
- case 'h': /* "hard" reboot by toggling RESET and/or crashing the CPU */
- reboot_thru_bios = 0;
- break;
-#ifdef CONFIG_SMP
- case 's': /* "smp" reboot by executing reset on BSP or other CPU*/
- if (isdigit(*(str+1))) {
- reboot_cpu = (int) (*(str+1) - '0');
- if (isdigit(*(str+2)))
- reboot_cpu = reboot_cpu*10 + (int)(*(str+2) - '0');
- }
- /* we will leave sorting out the final value
- when we are ready to reboot, since we might not
- have set up boot_cpu_id or smp_num_cpu */
- break;
-#endif
- }
- if((str = strchr(str,',')) != NULL)
- str++;
- else
- break;
- }
- return 1;
-}
-
-__setup("reboot=", reboot_setup);
-
-/*
- * Reboot options and system auto-detection code provided by
- * Dell Inc. so their systems "just work". :-)
- */
-
-/*
- * Some machines require the "reboot=b" commandline option, this quirk makes that automatic.
- */
-static int __init set_bios_reboot(struct dmi_system_id *d)
-{
- if (!reboot_thru_bios) {
- reboot_thru_bios = 1;
- printk(KERN_INFO "%s series board detected. Selecting BIOS-method for reboots.\n", d->ident);
- }
- return 0;
-}
-
-static struct dmi_system_id __initdata reboot_dmi_table[] = {
- { /* Handle problems with rebooting on Dell 1300's */
- .callback = set_bios_reboot,
- .ident = "Dell PowerEdge 1300",
- .matches = {
- DMI_MATCH(DMI_SYS_VENDOR, "Dell Computer Corporation"),
- DMI_MATCH(DMI_PRODUCT_NAME, "PowerEdge 1300/"),
- },
- },
- { /* Handle problems with rebooting on Dell 300's */
- .callback = set_bios_reboot,
- .ident = "Dell PowerEdge 300",
- .matches = {
- DMI_MATCH(DMI_SYS_VENDOR, "Dell Computer Corporation"),
- DMI_MATCH(DMI_PRODUCT_NAME, "PowerEdge 300/"),
- },
- },
- { /* Handle problems with rebooting on Dell 2400's */
- .callback = set_bios_reboot,
- .ident = "Dell PowerEdge 2400",
- .matches = {
- DMI_MATCH(DMI_SYS_VENDOR, "Dell Computer Corporation"),
- DMI_MATCH(DMI_PRODUCT_NAME, "PowerEdge 2400"),
- },
- },
- { }
-};
-
-static int __init reboot_init(void)
-{
- dmi_check_system(reboot_dmi_table);
- return 0;
-}
-
-core_initcall(reboot_init);
-
-/* The following code and data reboots the machine by switching to real
- mode and jumping to the BIOS reset entry point, as if the CPU has
- really been reset. The previous version asked the keyboard
- controller to pulse the CPU reset line, which is more thorough, but
- doesn't work with at least one type of 486 motherboard. It is easy
- to stop this code working; hence the copious comments. */
-
-static unsigned long long
-real_mode_gdt_entries [3] =
-{
- 0x0000000000000000ULL, /* Null descriptor */
- 0x00009a000000ffffULL, /* 16-bit real-mode 64k code at 0x00000000 */
- 0x000092000100ffffULL /* 16-bit real-mode 64k data at 0x00000100 */
-};
-
-static struct
-{
- unsigned short size __attribute__ ((packed));
- unsigned long long * base __attribute__ ((packed));
-}
-real_mode_gdt = { sizeof (real_mode_gdt_entries) - 1, real_mode_gdt_entries },
-real_mode_idt = { 0x3ff, NULL },
-no_idt = { 0, NULL };
-
-
-/* This is 16-bit protected mode code to disable paging and the cache,
- switch to real mode and jump to the BIOS reset code.
-
- The instruction that switches to real mode by writing to CR0 must be
- followed immediately by a far jump instruction, which set CS to a
- valid value for real mode, and flushes the prefetch queue to avoid
- running instructions that have already been decoded in protected
- mode.
-
- Clears all the flags except ET, especially PG (paging), PE
- (protected-mode enable) and TS (task switch for coprocessor state
- save). Flushes the TLB after paging has been disabled. Sets CD and
- NW, to disable the cache on a 486, and invalidates the cache. This
- is more like the state of a 486 after reset. I don't know if
- something else should be done for other chips.
-
- More could be done here to set up the registers as if a CPU reset had
- occurred; hopefully real BIOSs don't assume much. */
-
-static unsigned char real_mode_switch [] =
-{
- 0x66, 0x0f, 0x20, 0xc0, /* movl %cr0,%eax */
- 0x66, 0x83, 0xe0, 0x11, /* andl $0x00000011,%eax */
- 0x66, 0x0d, 0x00, 0x00, 0x00, 0x60, /* orl $0x60000000,%eax */
- 0x66, 0x0f, 0x22, 0xc0, /* movl %eax,%cr0 */
- 0x66, 0x0f, 0x22, 0xd8, /* movl %eax,%cr3 */
- 0x66, 0x0f, 0x20, 0xc3, /* movl %cr0,%ebx */
- 0x66, 0x81, 0xe3, 0x00, 0x00, 0x00, 0x60, /* andl $0x60000000,%ebx */
- 0x74, 0x02, /* jz f */
- 0x0f, 0x09, /* wbinvd */
- 0x24, 0x10, /* f: andb $0x10,al */
- 0x66, 0x0f, 0x22, 0xc0 /* movl %eax,%cr0 */
-};
-static unsigned char jump_to_bios [] =
-{
- 0xea, 0x00, 0x00, 0xff, 0xff /* ljmp $0xffff,$0x0000 */
-};
-
-/*
- * Switch to real mode and then execute the code
- * specified by the code and length parameters.
- * We assume that length will aways be less that 100!
- */
-void machine_real_restart(unsigned char *code, int length)
-{
- unsigned long flags;
-
- local_irq_disable();
-
- /* Write zero to CMOS register number 0x0f, which the BIOS POST
- routine will recognize as telling it to do a proper reboot. (Well
- that's what this book in front of me says -- it may only apply to
- the Phoenix BIOS though, it's not clear). At the same time,
- disable NMIs by setting the top bit in the CMOS address register,
- as we're about to do peculiar things to the CPU. I'm not sure if
- `outb_p' is needed instead of just `outb'. Use it to be on the
- safe side. (Yes, CMOS_WRITE does outb_p's. - Paul G.)
- */
-
- spin_lock_irqsave(&rtc_lock, flags);
- CMOS_WRITE(0x00, 0x8f);
- spin_unlock_irqrestore(&rtc_lock, flags);
-
- /* Remap the kernel at virtual address zero, as well as offset zero
- from the kernel segment. This assumes the kernel segment starts at
- virtual address PAGE_OFFSET. */
-
- memcpy (swapper_pg_dir, swapper_pg_dir + USER_PGD_PTRS,
- sizeof (swapper_pg_dir [0]) * KERNEL_PGD_PTRS);
-
- /*
- * Use `swapper_pg_dir' as our page directory.
- */
- load_cr3(swapper_pg_dir);
-
- /* Write 0x1234 to absolute memory location 0x472. The BIOS reads
- this on booting to tell it to "Bypass memory test (also warm
- boot)". This seems like a fairly standard thing that gets set by
- REBOOT.COM programs, and the previous reset routine did this
- too. */
-
- *((unsigned short *)0x472) = reboot_mode;
-
- /* For the switch to real mode, copy some code to low memory. It has
- to be in the first 64k because it is running in 16-bit mode, and it
- has to have the same physical and virtual address, because it turns
- off paging. Copy it near the end of the first page, out of the way
- of BIOS variables. */
-
- memcpy ((void *) (0x1000 - sizeof (real_mode_switch) - 100),
- real_mode_switch, sizeof (real_mode_switch));
- memcpy ((void *) (0x1000 - 100), code, length);
-
- /* Set up the IDT for real mode. */
-
- load_idt(&real_mode_idt);
-
- /* Set up a GDT from which we can load segment descriptors for real
- mode. The GDT is not used in real mode; it is just needed here to
- prepare the descriptors. */
-
- load_gdt(&real_mode_gdt);
-
- /* Load the data segment registers, and thus the descriptors ready for
- real mode. The base address of each segment is 0x100, 16 times the
- selector value being loaded here. This is so that the segment
- registers don't have to be reloaded after switching to real mode:
- the values are consistent for real mode operation already. */
-
- __asm__ __volatile__ ("movl $0x0010,%%eax\n"
- "\tmovl %%eax,%%ds\n"
- "\tmovl %%eax,%%es\n"
- "\tmovl %%eax,%%fs\n"
- "\tmovl %%eax,%%gs\n"
- "\tmovl %%eax,%%ss" : : : "eax");
-
- /* Jump to the 16-bit code that we copied earlier. It disables paging
- and the cache, switches to real mode, and jumps to the BIOS reset
- entry point. */
-
- __asm__ __volatile__ ("ljmp $0x0008,%0"
- :
- : "i" ((void *) (0x1000 - sizeof (real_mode_switch) - 100)));
-}
-#ifdef CONFIG_APM_MODULE
-EXPORT_SYMBOL(machine_real_restart);
-#endif
-
-void machine_shutdown(void)
-{
-#ifdef CONFIG_SMP
- int reboot_cpu_id;
-
- /* The boot cpu is always logical cpu 0 */
- reboot_cpu_id = 0;
-
- /* See if there has been given a command line override */
- if ((reboot_cpu != -1) && (reboot_cpu < NR_CPUS) &&
- cpu_isset(reboot_cpu, cpu_online_map)) {
- reboot_cpu_id = reboot_cpu;
- }
-
- /* Make certain the cpu I'm rebooting on is online */
- if (!cpu_isset(reboot_cpu_id, cpu_online_map)) {
- reboot_cpu_id = smp_processor_id();
- }
-
- /* Make certain I only run on the appropriate processor */
- set_cpus_allowed(current, cpumask_of_cpu(reboot_cpu_id));
-
- /* O.K. Now that I'm on the appropriate processor, stop
- * all of the others, and disable their local APICs.
- */
-
- smp_send_stop();
-#endif /* CONFIG_SMP */
-
- lapic_shutdown();
-
-#ifdef CONFIG_X86_IO_APIC
- disable_IO_APIC();
-#endif
-}
-
-void machine_emergency_restart(void)
-{
- if (!reboot_thru_bios) {
- if (efi_enabled) {
- efi.reset_system(EFI_RESET_COLD, EFI_SUCCESS, 0, NULL);
- load_idt(&no_idt);
- __asm__ __volatile__("int3");
- }
- /* rebooting needs to touch the page at absolute addr 0 */
- *((unsigned short *)__va(0x472)) = reboot_mode;
- for (;;) {
- mach_reboot_fixups(); /* for board specific fixups */
- mach_reboot();
- /* That didn't work - force a triple fault.. */
- load_idt(&no_idt);
- __asm__ __volatile__("int3");
- }
- }
- if (efi_enabled)
- efi.reset_system(EFI_RESET_WARM, EFI_SUCCESS, 0, NULL);
-
- machine_real_restart(jump_to_bios, sizeof(jump_to_bios));
-}
-
-void machine_restart(char * __unused)
-{
- machine_shutdown();
- machine_emergency_restart();
-}
-
-void machine_halt(void)
-{
-}
-
-void machine_power_off(void)
-{
- machine_shutdown();
-
- if (pm_power_off)
- pm_power_off();
-}
-
-
diff --git a/arch/i386/kernel/reboot_fixups.c b/arch/i386/kernel/reboot_fixups.c
deleted file mode 100644
index 10e21a4773d..00000000000
--- a/arch/i386/kernel/reboot_fixups.c
+++ /dev/null
@@ -1,57 +0,0 @@
-/*
- * linux/arch/i386/kernel/reboot_fixups.c
- *
- * This is a good place to put board specific reboot fixups.
- *
- * List of supported fixups:
- * geode-gx1/cs5530a - Jaya Kumar <jayalk@intworks.biz>
- *
- */
-
-#include <asm/delay.h>
-#include <linux/pci.h>
-#include <linux/reboot_fixups.h>
-
-static void cs5530a_warm_reset(struct pci_dev *dev)
-{
- /* writing 1 to the reset control register, 0x44 causes the
- cs5530a to perform a system warm reset */
- pci_write_config_byte(dev, 0x44, 0x1);
- udelay(50); /* shouldn't get here but be safe and spin-a-while */
- return;
-}
-
-struct device_fixup {
- unsigned int vendor;
- unsigned int device;
- void (*reboot_fixup)(struct pci_dev *);
-};
-
-static struct device_fixup fixups_table[] = {
-{ PCI_VENDOR_ID_CYRIX, PCI_DEVICE_ID_CYRIX_5530_LEGACY, cs5530a_warm_reset },
-};
-
-/*
- * we see if any fixup is available for our current hardware. if there
- * is a fixup, we call it and we expect to never return from it. if we
- * do return, we keep looking and then eventually fall back to the
- * standard mach_reboot on return.
- */
-void mach_reboot_fixups(void)
-{
- struct device_fixup *cur;
- struct pci_dev *dev;
- int i;
-
- for (i=0; i < ARRAY_SIZE(fixups_table); i++) {
- cur = &(fixups_table[i]);
- dev = pci_get_device(cur->vendor, cur->device, NULL);
- if (!dev)
- continue;
-
- cur->reboot_fixup(dev);
- }
-
- printk(KERN_WARNING "No reboot fixup found for your hardware\n");
-}
-
diff --git a/arch/i386/kernel/relocate_kernel.S b/arch/i386/kernel/relocate_kernel.S
deleted file mode 100644
index d312616effa..00000000000
--- a/arch/i386/kernel/relocate_kernel.S
+++ /dev/null
@@ -1,120 +0,0 @@
-/*
- * relocate_kernel.S - put the kernel image in place to boot
- * Copyright (C) 2002-2004 Eric Biederman <ebiederm@xmission.com>
- *
- * This source code is licensed under the GNU General Public License,
- * Version 2. See the file COPYING for more details.
- */
-
-#include <linux/linkage.h>
-
- /*
- * Must be relocatable PIC code callable as a C function, that once
- * it starts can not use the previous processes stack.
- */
- .globl relocate_new_kernel
-relocate_new_kernel:
- /* read the arguments and say goodbye to the stack */
- movl 4(%esp), %ebx /* page_list */
- movl 8(%esp), %ebp /* reboot_code_buffer */
- movl 12(%esp), %edx /* start address */
- movl 16(%esp), %ecx /* cpu_has_pae */
-
- /* zero out flags, and disable interrupts */
- pushl $0
- popfl
-
- /* set a new stack at the bottom of our page... */
- lea 4096(%ebp), %esp
-
- /* store the parameters back on the stack */
- pushl %edx /* store the start address */
-
- /* Set cr0 to a known state:
- * 31 0 == Paging disabled
- * 18 0 == Alignment check disabled
- * 16 0 == Write protect disabled
- * 3 0 == No task switch
- * 2 0 == Don't do FP software emulation.
- * 0 1 == Proctected mode enabled
- */
- movl %cr0, %eax
- andl $~((1<<31)|(1<<18)|(1<<16)|(1<<3)|(1<<2)), %eax
- orl $(1<<0), %eax
- movl %eax, %cr0
-
- /* clear cr4 if applicable */
- testl %ecx, %ecx
- jz 1f
- /* Set cr4 to a known state:
- * Setting everything to zero seems safe.
- */
- movl %cr4, %eax
- andl $0, %eax
- movl %eax, %cr4
-
- jmp 1f
-1:
-
- /* Flush the TLB (needed?) */
- xorl %eax, %eax
- movl %eax, %cr3
-
- /* Do the copies */
- movl %ebx, %ecx
- jmp 1f
-
-0: /* top, read another word from the indirection page */
- movl (%ebx), %ecx
- addl $4, %ebx
-1:
- testl $0x1, %ecx /* is it a destination page */
- jz 2f
- movl %ecx, %edi
- andl $0xfffff000, %edi
- jmp 0b
-2:
- testl $0x2, %ecx /* is it an indirection page */
- jz 2f
- movl %ecx, %ebx
- andl $0xfffff000, %ebx
- jmp 0b
-2:
- testl $0x4, %ecx /* is it the done indicator */
- jz 2f
- jmp 3f
-2:
- testl $0x8, %ecx /* is it the source indicator */
- jz 0b /* Ignore it otherwise */
- movl %ecx, %esi /* For every source page do a copy */
- andl $0xfffff000, %esi
-
- movl $1024, %ecx
- rep ; movsl
- jmp 0b
-
-3:
-
- /* To be certain of avoiding problems with self-modifying code
- * I need to execute a serializing instruction here.
- * So I flush the TLB, it's handy, and not processor dependent.
- */
- xorl %eax, %eax
- movl %eax, %cr3
-
- /* set all of the registers to known values */
- /* leave %esp alone */
-
- xorl %eax, %eax
- xorl %ebx, %ebx
- xorl %ecx, %ecx
- xorl %edx, %edx
- xorl %esi, %esi
- xorl %edi, %edi
- xorl %ebp, %ebp
- ret
-relocate_new_kernel_end:
-
- .globl relocate_new_kernel_size
-relocate_new_kernel_size:
- .long relocate_new_kernel_end - relocate_new_kernel
diff --git a/arch/i386/kernel/scx200.c b/arch/i386/kernel/scx200.c
deleted file mode 100644
index 9c968ae67c4..00000000000
--- a/arch/i386/kernel/scx200.c
+++ /dev/null
@@ -1,168 +0,0 @@
-/* linux/arch/i386/kernel/scx200.c
-
- Copyright (c) 2001,2002 Christer Weinigel <wingel@nano-system.com>
-
- National Semiconductor SCx200 support. */
-
-#include <linux/config.h>
-#include <linux/module.h>
-#include <linux/errno.h>
-#include <linux/kernel.h>
-#include <linux/init.h>
-#include <linux/pci.h>
-
-#include <linux/scx200.h>
-#include <linux/scx200_gpio.h>
-
-/* Verify that the configuration block really is there */
-#define scx200_cb_probe(base) (inw((base) + SCx200_CBA) == (base))
-
-#define NAME "scx200"
-
-MODULE_AUTHOR("Christer Weinigel <wingel@nano-system.com>");
-MODULE_DESCRIPTION("NatSemi SCx200 Driver");
-MODULE_LICENSE("GPL");
-
-unsigned scx200_gpio_base = 0;
-long scx200_gpio_shadow[2];
-
-unsigned scx200_cb_base = 0;
-
-static struct pci_device_id scx200_tbl[] = {
- { PCI_DEVICE(PCI_VENDOR_ID_NS, PCI_DEVICE_ID_NS_SCx200_BRIDGE) },
- { PCI_DEVICE(PCI_VENDOR_ID_NS, PCI_DEVICE_ID_NS_SC1100_BRIDGE) },
- { PCI_DEVICE(PCI_VENDOR_ID_NS, PCI_DEVICE_ID_NS_SCx200_XBUS) },
- { PCI_DEVICE(PCI_VENDOR_ID_NS, PCI_DEVICE_ID_NS_SC1100_XBUS) },
- { },
-};
-MODULE_DEVICE_TABLE(pci,scx200_tbl);
-
-static int __devinit scx200_probe(struct pci_dev *, const struct pci_device_id *);
-
-static struct pci_driver scx200_pci_driver = {
- .name = "scx200",
- .id_table = scx200_tbl,
- .probe = scx200_probe,
-};
-
-static DEFINE_SPINLOCK(scx200_gpio_config_lock);
-
-static int __devinit scx200_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
-{
- int bank;
- unsigned base;
-
- if (pdev->device == PCI_DEVICE_ID_NS_SCx200_BRIDGE ||
- pdev->device == PCI_DEVICE_ID_NS_SC1100_BRIDGE) {
- base = pci_resource_start(pdev, 0);
- printk(KERN_INFO NAME ": GPIO base 0x%x\n", base);
-
- if (request_region(base, SCx200_GPIO_SIZE, "NatSemi SCx200 GPIO") == 0) {
- printk(KERN_ERR NAME ": can't allocate I/O for GPIOs\n");
- return -EBUSY;
- }
-
- scx200_gpio_base = base;
-
- /* read the current values driven on the GPIO signals */
- for (bank = 0; bank < 2; ++bank)
- scx200_gpio_shadow[bank] = inl(scx200_gpio_base + 0x10 * bank);
-
- } else {
- /* find the base of the Configuration Block */
- if (scx200_cb_probe(SCx200_CB_BASE_FIXED)) {
- scx200_cb_base = SCx200_CB_BASE_FIXED;
- } else {
- pci_read_config_dword(pdev, SCx200_CBA_SCRATCH, &base);
- if (scx200_cb_probe(base)) {
- scx200_cb_base = base;
- } else {
- printk(KERN_WARNING NAME ": Configuration Block not found\n");
- return -ENODEV;
- }
- }
- printk(KERN_INFO NAME ": Configuration Block base 0x%x\n", scx200_cb_base);
- }
-
- return 0;
-}
-
-u32 scx200_gpio_configure(int index, u32 mask, u32 bits)
-{
- u32 config, new_config;
- unsigned long flags;
-
- spin_lock_irqsave(&scx200_gpio_config_lock, flags);
-
- outl(index, scx200_gpio_base + 0x20);
- config = inl(scx200_gpio_base + 0x24);
-
- new_config = (config & mask) | bits;
- outl(new_config, scx200_gpio_base + 0x24);
-
- spin_unlock_irqrestore(&scx200_gpio_config_lock, flags);
-
- return config;
-}
-
-#if 0
-void scx200_gpio_dump(unsigned index)
-{
- u32 config = scx200_gpio_configure(index, ~0, 0);
- printk(KERN_DEBUG "GPIO%02u: 0x%08lx", index, (unsigned long)config);
-
- if (config & 1)
- printk(" OE"); /* output enabled */
- else
- printk(" TS"); /* tristate */
- if (config & 2)
- printk(" PP"); /* push pull */
- else
- printk(" OD"); /* open drain */
- if (config & 4)
- printk(" PUE"); /* pull up enabled */
- else
- printk(" PUD"); /* pull up disabled */
- if (config & 8)
- printk(" LOCKED"); /* locked */
- if (config & 16)
- printk(" LEVEL"); /* level input */
- else
- printk(" EDGE"); /* edge input */
- if (config & 32)
- printk(" HI"); /* trigger on rising edge */
- else
- printk(" LO"); /* trigger on falling edge */
- if (config & 64)
- printk(" DEBOUNCE"); /* debounce */
- printk("\n");
-}
-#endif /* 0 */
-
-static int __init scx200_init(void)
-{
- printk(KERN_INFO NAME ": NatSemi SCx200 Driver\n");
-
- return pci_module_init(&scx200_pci_driver);
-}
-
-static void __exit scx200_cleanup(void)
-{
- pci_unregister_driver(&scx200_pci_driver);
- release_region(scx200_gpio_base, SCx200_GPIO_SIZE);
-}
-
-module_init(scx200_init);
-module_exit(scx200_cleanup);
-
-EXPORT_SYMBOL(scx200_gpio_base);
-EXPORT_SYMBOL(scx200_gpio_shadow);
-EXPORT_SYMBOL(scx200_gpio_configure);
-EXPORT_SYMBOL(scx200_cb_base);
-
-/*
- Local variables:
- compile-command: "make -k -C ../../.. SUBDIRS=arch/i386/kernel modules"
- c-basic-offset: 8
- End:
-*/
diff --git a/arch/i386/kernel/semaphore.c b/arch/i386/kernel/semaphore.c
deleted file mode 100644
index 7455ab64394..00000000000
--- a/arch/i386/kernel/semaphore.c
+++ /dev/null
@@ -1,135 +0,0 @@
-/*
- * i386 semaphore implementation.
- *
- * (C) Copyright 1999 Linus Torvalds
- *
- * Portions Copyright 1999 Red Hat, Inc.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
- *
- * rw semaphores implemented November 1999 by Benjamin LaHaise <bcrl@kvack.org>
- */
-#include <linux/config.h>
-#include <asm/semaphore.h>
-
-/*
- * The semaphore operations have a special calling sequence that
- * allow us to do a simpler in-line version of them. These routines
- * need to convert that sequence back into the C sequence when
- * there is contention on the semaphore.
- *
- * %eax contains the semaphore pointer on entry. Save the C-clobbered
- * registers (%eax, %edx and %ecx) except %eax whish is either a return
- * value or just clobbered..
- */
-asm(
-".section .sched.text\n"
-".align 4\n"
-".globl __down_failed\n"
-"__down_failed:\n\t"
-#if defined(CONFIG_FRAME_POINTER)
- "pushl %ebp\n\t"
- "movl %esp,%ebp\n\t"
-#endif
- "pushl %edx\n\t"
- "pushl %ecx\n\t"
- "call __down\n\t"
- "popl %ecx\n\t"
- "popl %edx\n\t"
-#if defined(CONFIG_FRAME_POINTER)
- "movl %ebp,%esp\n\t"
- "popl %ebp\n\t"
-#endif
- "ret"
-);
-
-asm(
-".section .sched.text\n"
-".align 4\n"
-".globl __down_failed_interruptible\n"
-"__down_failed_interruptible:\n\t"
-#if defined(CONFIG_FRAME_POINTER)
- "pushl %ebp\n\t"
- "movl %esp,%ebp\n\t"
-#endif
- "pushl %edx\n\t"
- "pushl %ecx\n\t"
- "call __down_interruptible\n\t"
- "popl %ecx\n\t"
- "popl %edx\n\t"
-#if defined(CONFIG_FRAME_POINTER)
- "movl %ebp,%esp\n\t"
- "popl %ebp\n\t"
-#endif
- "ret"
-);
-
-asm(
-".section .sched.text\n"
-".align 4\n"
-".globl __down_failed_trylock\n"
-"__down_failed_trylock:\n\t"
-#if defined(CONFIG_FRAME_POINTER)
- "pushl %ebp\n\t"
- "movl %esp,%ebp\n\t"
-#endif
- "pushl %edx\n\t"
- "pushl %ecx\n\t"
- "call __down_trylock\n\t"
- "popl %ecx\n\t"
- "popl %edx\n\t"
-#if defined(CONFIG_FRAME_POINTER)
- "movl %ebp,%esp\n\t"
- "popl %ebp\n\t"
-#endif
- "ret"
-);
-
-asm(
-".section .sched.text\n"
-".align 4\n"
-".globl __up_wakeup\n"
-"__up_wakeup:\n\t"
- "pushl %edx\n\t"
- "pushl %ecx\n\t"
- "call __up\n\t"
- "popl %ecx\n\t"
- "popl %edx\n\t"
- "ret"
-);
-
-/*
- * rw spinlock fallbacks
- */
-#if defined(CONFIG_SMP)
-asm(
-".section .sched.text\n"
-".align 4\n"
-".globl __write_lock_failed\n"
-"__write_lock_failed:\n\t"
- LOCK "addl $" RW_LOCK_BIAS_STR ",(%eax)\n"
-"1: rep; nop\n\t"
- "cmpl $" RW_LOCK_BIAS_STR ",(%eax)\n\t"
- "jne 1b\n\t"
- LOCK "subl $" RW_LOCK_BIAS_STR ",(%eax)\n\t"
- "jnz __write_lock_failed\n\t"
- "ret"
-);
-
-asm(
-".section .sched.text\n"
-".align 4\n"
-".globl __read_lock_failed\n"
-"__read_lock_failed:\n\t"
- LOCK "incl (%eax)\n"
-"1: rep; nop\n\t"
- "cmpl $1,(%eax)\n\t"
- "js 1b\n\t"
- LOCK "decl (%eax)\n\t"
- "js __read_lock_failed\n\t"
- "ret"
-);
-#endif
diff --git a/arch/i386/kernel/setup.c b/arch/i386/kernel/setup.c
deleted file mode 100644
index b48ac635f3c..00000000000
--- a/arch/i386/kernel/setup.c
+++ /dev/null
@@ -1,1634 +0,0 @@
-/*
- * linux/arch/i386/kernel/setup.c
- *
- * Copyright (C) 1995 Linus Torvalds
- *
- * Support of BIGMEM added by Gerhard Wichert, Siemens AG, July 1999
- *
- * Memory region support
- * David Parsons <orc@pell.chi.il.us>, July-August 1999
- *
- * Added E820 sanitization routine (removes overlapping memory regions);
- * Brian Moyle <bmoyle@mvista.com>, February 2001
- *
- * Moved CPU detection code to cpu/${cpu}.c
- * Patrick Mochel <mochel@osdl.org>, March 2002
- *
- * Provisions for empty E820 memory regions (reported by certain BIOSes).
- * Alex Achenbach <xela@slit.de>, December 2002.
- *
- */
-
-/*
- * This file handles the architecture-dependent parts of initialization
- */
-
-#include <linux/config.h>
-#include <linux/sched.h>
-#include <linux/mm.h>
-#include <linux/mmzone.h>
-#include <linux/tty.h>
-#include <linux/ioport.h>
-#include <linux/acpi.h>
-#include <linux/apm_bios.h>
-#include <linux/initrd.h>
-#include <linux/bootmem.h>
-#include <linux/seq_file.h>
-#include <linux/console.h>
-#include <linux/mca.h>
-#include <linux/root_dev.h>
-#include <linux/highmem.h>
-#include <linux/module.h>
-#include <linux/efi.h>
-#include <linux/init.h>
-#include <linux/edd.h>
-#include <linux/nodemask.h>
-#include <linux/kexec.h>
-#include <linux/crash_dump.h>
-
-#include <video/edid.h>
-
-#include <asm/apic.h>
-#include <asm/e820.h>
-#include <asm/mpspec.h>
-#include <asm/setup.h>
-#include <asm/arch_hooks.h>
-#include <asm/sections.h>
-#include <asm/io_apic.h>
-#include <asm/ist.h>
-#include <asm/io.h>
-#include "setup_arch_pre.h"
-#include <bios_ebda.h>
-
-/* Forward Declaration. */
-void __init find_max_pfn(void);
-
-/* This value is set up by the early boot code to point to the value
- immediately after the boot time page tables. It contains a *physical*
- address, and must not be in the .bss segment! */
-unsigned long init_pg_tables_end __initdata = ~0UL;
-
-int disable_pse __devinitdata = 0;
-
-/*
- * Machine setup..
- */
-
-#ifdef CONFIG_EFI
-int efi_enabled = 0;
-EXPORT_SYMBOL(efi_enabled);
-#endif
-
-/* cpu data as detected by the assembly code in head.S */
-struct cpuinfo_x86 new_cpu_data __initdata = { 0, 0, 0, 0, -1, 1, 0, 0, -1 };
-/* common cpu data for all cpus */
-struct cpuinfo_x86 boot_cpu_data __read_mostly = { 0, 0, 0, 0, -1, 1, 0, 0, -1 };
-EXPORT_SYMBOL(boot_cpu_data);
-
-unsigned long mmu_cr4_features;
-
-#ifdef CONFIG_ACPI
- int acpi_disabled = 0;
-#else
- int acpi_disabled = 1;
-#endif
-EXPORT_SYMBOL(acpi_disabled);
-
-#ifdef CONFIG_ACPI
-int __initdata acpi_force = 0;
-extern acpi_interrupt_flags acpi_sci_flags;
-#endif
-
-/* for MCA, but anyone else can use it if they want */
-unsigned int machine_id;
-#ifdef CONFIG_MCA
-EXPORT_SYMBOL(machine_id);
-#endif
-unsigned int machine_submodel_id;
-unsigned int BIOS_revision;
-unsigned int mca_pentium_flag;
-
-/* For PCI or other memory-mapped resources */
-unsigned long pci_mem_start = 0x10000000;
-#ifdef CONFIG_PCI
-EXPORT_SYMBOL(pci_mem_start);
-#endif
-
-/* Boot loader ID as an integer, for the benefit of proc_dointvec */
-int bootloader_type;
-
-/* user-defined highmem size */
-static unsigned int highmem_pages = -1;
-
-/*
- * Setup options
- */
-struct drive_info_struct { char dummy[32]; } drive_info;
-#if defined(CONFIG_BLK_DEV_IDE) || defined(CONFIG_BLK_DEV_HD) || \
- defined(CONFIG_BLK_DEV_IDE_MODULE) || defined(CONFIG_BLK_DEV_HD_MODULE)
-EXPORT_SYMBOL(drive_info);
-#endif
-struct screen_info screen_info;
-#ifdef CONFIG_VT
-EXPORT_SYMBOL(screen_info);
-#endif
-struct apm_info apm_info;
-EXPORT_SYMBOL(apm_info);
-struct sys_desc_table_struct {
- unsigned short length;
- unsigned char table[0];
-};
-struct edid_info edid_info;
-EXPORT_SYMBOL_GPL(edid_info);
-struct ist_info ist_info;
-#if defined(CONFIG_X86_SPEEDSTEP_SMI) || \
- defined(CONFIG_X86_SPEEDSTEP_SMI_MODULE)
-EXPORT_SYMBOL(ist_info);
-#endif
-struct e820map e820;
-
-extern void early_cpu_init(void);
-extern void dmi_scan_machine(void);
-extern void generic_apic_probe(char *);
-extern int root_mountflags;
-
-unsigned long saved_videomode;
-
-#define RAMDISK_IMAGE_START_MASK 0x07FF
-#define RAMDISK_PROMPT_FLAG 0x8000
-#define RAMDISK_LOAD_FLAG 0x4000
-
-static char command_line[COMMAND_LINE_SIZE];
-
-unsigned char __initdata boot_params[PARAM_SIZE];
-
-static struct resource data_resource = {
- .name = "Kernel data",
- .start = 0,
- .end = 0,
- .flags = IORESOURCE_BUSY | IORESOURCE_MEM
-};
-
-static struct resource code_resource = {
- .name = "Kernel code",
- .start = 0,
- .end = 0,
- .flags = IORESOURCE_BUSY | IORESOURCE_MEM
-};
-
-static struct resource system_rom_resource = {
- .name = "System ROM",
- .start = 0xf0000,
- .end = 0xfffff,
- .flags = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM
-};
-
-static struct resource extension_rom_resource = {
- .name = "Extension ROM",
- .start = 0xe0000,
- .end = 0xeffff,
- .flags = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM
-};
-
-static struct resource adapter_rom_resources[] = { {
- .name = "Adapter ROM",
- .start = 0xc8000,
- .end = 0,
- .flags = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM
-}, {
- .name = "Adapter ROM",
- .start = 0,
- .end = 0,
- .flags = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM
-}, {
- .name = "Adapter ROM",
- .start = 0,
- .end = 0,
- .flags = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM
-}, {
- .name = "Adapter ROM",
- .start = 0,
- .end = 0,
- .flags = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM
-}, {
- .name = "Adapter ROM",
- .start = 0,
- .end = 0,
- .flags = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM
-}, {
- .name = "Adapter ROM",
- .start = 0,
- .end = 0,
- .flags = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM
-} };
-
-#define ADAPTER_ROM_RESOURCES \
- (sizeof adapter_rom_resources / sizeof adapter_rom_resources[0])
-
-static struct resource video_rom_resource = {
- .name = "Video ROM",
- .start = 0xc0000,
- .end = 0xc7fff,
- .flags = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM
-};
-
-static struct resource video_ram_resource = {
- .name = "Video RAM area",
- .start = 0xa0000,
- .end = 0xbffff,
- .flags = IORESOURCE_BUSY | IORESOURCE_MEM
-};
-
-static struct resource standard_io_resources[] = { {
- .name = "dma1",
- .start = 0x0000,
- .end = 0x001f,
- .flags = IORESOURCE_BUSY | IORESOURCE_IO
-}, {
- .name = "pic1",
- .start = 0x0020,
- .end = 0x0021,
- .flags = IORESOURCE_BUSY | IORESOURCE_IO
-}, {
- .name = "timer0",
- .start = 0x0040,
- .end = 0x0043,
- .flags = IORESOURCE_BUSY | IORESOURCE_IO
-}, {
- .name = "timer1",
- .start = 0x0050,
- .end = 0x0053,
- .flags = IORESOURCE_BUSY | IORESOURCE_IO
-}, {
- .name = "keyboard",
- .start = 0x0060,
- .end = 0x006f,
- .flags = IORESOURCE_BUSY | IORESOURCE_IO
-}, {
- .name = "dma page reg",
- .start = 0x0080,
- .end = 0x008f,
- .flags = IORESOURCE_BUSY | IORESOURCE_IO
-}, {
- .name = "pic2",
- .start = 0x00a0,
- .end = 0x00a1,
- .flags = IORESOURCE_BUSY | IORESOURCE_IO
-}, {
- .name = "dma2",
- .start = 0x00c0,
- .end = 0x00df,
- .flags = IORESOURCE_BUSY | IORESOURCE_IO
-}, {
- .name = "fpu",
- .start = 0x00f0,
- .end = 0x00ff,
- .flags = IORESOURCE_BUSY | IORESOURCE_IO
-} };
-
-#define STANDARD_IO_RESOURCES \
- (sizeof standard_io_resources / sizeof standard_io_resources[0])
-
-#define romsignature(x) (*(unsigned short *)(x) == 0xaa55)
-
-static int __init romchecksum(unsigned char *rom, unsigned long length)
-{
- unsigned char *p, sum = 0;
-
- for (p = rom; p < rom + length; p++)
- sum += *p;
- return sum == 0;
-}
-
-static void __init probe_roms(void)
-{
- unsigned long start, length, upper;
- unsigned char *rom;
- int i;
-
- /* video rom */
- upper = adapter_rom_resources[0].start;
- for (start = video_rom_resource.start; start < upper; start += 2048) {
- rom = isa_bus_to_virt(start);
- if (!romsignature(rom))
- continue;
-
- video_rom_resource.start = start;
-
- /* 0 < length <= 0x7f * 512, historically */
- length = rom[2] * 512;
-
- /* if checksum okay, trust length byte */
- if (length && romchecksum(rom, length))
- video_rom_resource.end = start + length - 1;
-
- request_resource(&iomem_resource, &video_rom_resource);
- break;
- }
-
- start = (video_rom_resource.end + 1 + 2047) & ~2047UL;
- if (start < upper)
- start = upper;
-
- /* system rom */
- request_resource(&iomem_resource, &system_rom_resource);
- upper = system_rom_resource.start;
-
- /* check for extension rom (ignore length byte!) */
- rom = isa_bus_to_virt(extension_rom_resource.start);
- if (romsignature(rom)) {
- length = extension_rom_resource.end - extension_rom_resource.start + 1;
- if (romchecksum(rom, length)) {
- request_resource(&iomem_resource, &extension_rom_resource);
- upper = extension_rom_resource.start;
- }
- }
-
- /* check for adapter roms on 2k boundaries */
- for (i = 0; i < ADAPTER_ROM_RESOURCES && start < upper; start += 2048) {
- rom = isa_bus_to_virt(start);
- if (!romsignature(rom))
- continue;
-
- /* 0 < length <= 0x7f * 512, historically */
- length = rom[2] * 512;
-
- /* but accept any length that fits if checksum okay */
- if (!length || start + length > upper || !romchecksum(rom, length))
- continue;
-
- adapter_rom_resources[i].start = start;
- adapter_rom_resources[i].end = start + length - 1;
- request_resource(&iomem_resource, &adapter_rom_resources[i]);
-
- start = adapter_rom_resources[i++].end & ~2047UL;
- }
-}
-
-static void __init limit_regions(unsigned long long size)
-{
- unsigned long long current_addr = 0;
- int i;
-
- if (efi_enabled) {
- efi_memory_desc_t *md;
- void *p;
-
- for (p = memmap.map, i = 0; p < memmap.map_end;
- p += memmap.desc_size, i++) {
- md = p;
- current_addr = md->phys_addr + (md->num_pages << 12);
- if (md->type == EFI_CONVENTIONAL_MEMORY) {
- if (current_addr >= size) {
- md->num_pages -=
- (((current_addr-size) + PAGE_SIZE-1) >> PAGE_SHIFT);
- memmap.nr_map = i + 1;
- return;
- }
- }
- }
- }
- for (i = 0; i < e820.nr_map; i++) {
- current_addr = e820.map[i].addr + e820.map[i].size;
- if (current_addr < size)
- continue;
-
- if (e820.map[i].type != E820_RAM)
- continue;
-
- if (e820.map[i].addr >= size) {
- /*
- * This region starts past the end of the
- * requested size, skip it completely.
- */
- e820.nr_map = i;
- } else {
- e820.nr_map = i + 1;
- e820.map[i].size -= current_addr - size;
- }
- return;
- }
-}
-
-static void __init add_memory_region(unsigned long long start,
- unsigned long long size, int type)
-{
- int x;
-
- if (!efi_enabled) {
- x = e820.nr_map;
-
- if (x == E820MAX) {
- printk(KERN_ERR "Ooops! Too many entries in the memory map!\n");
- return;
- }
-
- e820.map[x].addr = start;
- e820.map[x].size = size;
- e820.map[x].type = type;
- e820.nr_map++;
- }
-} /* add_memory_region */
-
-#define E820_DEBUG 1
-
-static void __init print_memory_map(char *who)
-{
- int i;
-
- for (i = 0; i < e820.nr_map; i++) {
- printk(" %s: %016Lx - %016Lx ", who,
- e820.map[i].addr,
- e820.map[i].addr + e820.map[i].size);
- switch (e820.map[i].type) {
- case E820_RAM: printk("(usable)\n");
- break;
- case E820_RESERVED:
- printk("(reserved)\n");
- break;
- case E820_ACPI:
- printk("(ACPI data)\n");
- break;
- case E820_NVS:
- printk("(ACPI NVS)\n");
- break;
- default: printk("type %lu\n", e820.map[i].type);
- break;
- }
- }
-}
-
-/*
- * Sanitize the BIOS e820 map.
- *
- * Some e820 responses include overlapping entries. The following
- * replaces the original e820 map with a new one, removing overlaps.
- *
- */
-struct change_member {
- struct e820entry *pbios; /* pointer to original bios entry */
- unsigned long long addr; /* address for this change point */
-};
-static struct change_member change_point_list[2*E820MAX] __initdata;
-static struct change_member *change_point[2*E820MAX] __initdata;
-static struct e820entry *overlap_list[E820MAX] __initdata;
-static struct e820entry new_bios[E820MAX] __initdata;
-
-static int __init sanitize_e820_map(struct e820entry * biosmap, char * pnr_map)
-{
- struct change_member *change_tmp;
- unsigned long current_type, last_type;
- unsigned long long last_addr;
- int chgidx, still_changing;
- int overlap_entries;
- int new_bios_entry;
- int old_nr, new_nr, chg_nr;
- int i;
-
- /*
- Visually we're performing the following (1,2,3,4 = memory types)...
-
- Sample memory map (w/overlaps):
- ____22__________________
- ______________________4_
- ____1111________________
- _44_____________________
- 11111111________________
- ____________________33__
- ___________44___________
- __________33333_________
- ______________22________
- ___________________2222_
- _________111111111______
- _____________________11_
- _________________4______
-
- Sanitized equivalent (no overlap):
- 1_______________________
- _44_____________________
- ___1____________________
- ____22__________________
- ______11________________
- _________1______________
- __________3_____________
- ___________44___________
- _____________33_________
- _______________2________
- ________________1_______
- _________________4______
- ___________________2____
- ____________________33__
- ______________________4_
- */
-
- /* if there's only one memory region, don't bother */
- if (*pnr_map < 2)
- return -1;
-
- old_nr = *pnr_map;
-
- /* bail out if we find any unreasonable addresses in bios map */
- for (i=0; i<old_nr; i++)
- if (biosmap[i].addr + biosmap[i].size < biosmap[i].addr)
- return -1;
-
- /* create pointers for initial change-point information (for sorting) */
- for (i=0; i < 2*old_nr; i++)
- change_point[i] = &change_point_list[i];
-
- /* record all known change-points (starting and ending addresses),
- omitting those that are for empty memory regions */
- chgidx = 0;
- for (i=0; i < old_nr; i++) {
- if (biosmap[i].size != 0) {
- change_point[chgidx]->addr = biosmap[i].addr;
- change_point[chgidx++]->pbios = &biosmap[i];
- change_point[chgidx]->addr = biosmap[i].addr + biosmap[i].size;
- change_point[chgidx++]->pbios = &biosmap[i];
- }
- }
- chg_nr = chgidx; /* true number of change-points */
-
- /* sort change-point list by memory addresses (low -> high) */
- still_changing = 1;
- while (still_changing) {
- still_changing = 0;
- for (i=1; i < chg_nr; i++) {
- /* if <current_addr> > <last_addr>, swap */
- /* or, if current=<start_addr> & last=<end_addr>, swap */
- if ((change_point[i]->addr < change_point[i-1]->addr) ||
- ((change_point[i]->addr == change_point[i-1]->addr) &&
- (change_point[i]->addr == change_point[i]->pbios->addr) &&
- (change_point[i-1]->addr != change_point[i-1]->pbios->addr))
- )
- {
- change_tmp = change_point[i];
- change_point[i] = change_point[i-1];
- change_point[i-1] = change_tmp;
- still_changing=1;
- }
- }
- }
-
- /* create a new bios memory map, removing overlaps */
- overlap_entries=0; /* number of entries in the overlap table */
- new_bios_entry=0; /* index for creating new bios map entries */
- last_type = 0; /* start with undefined memory type */
- last_addr = 0; /* start with 0 as last starting address */
- /* loop through change-points, determining affect on the new bios map */
- for (chgidx=0; chgidx < chg_nr; chgidx++)
- {
- /* keep track of all overlapping bios entries */
- if (change_point[chgidx]->addr == change_point[chgidx]->pbios->addr)
- {
- /* add map entry to overlap list (> 1 entry implies an overlap) */
- overlap_list[overlap_entries++]=change_point[chgidx]->pbios;
- }
- else
- {
- /* remove entry from list (order independent, so swap with last) */
- for (i=0; i<overlap_entries; i++)
- {
- if (overlap_list[i] == change_point[chgidx]->pbios)
- overlap_list[i] = overlap_list[overlap_entries-1];
- }
- overlap_entries--;
- }
- /* if there are overlapping entries, decide which "type" to use */
- /* (larger value takes precedence -- 1=usable, 2,3,4,4+=unusable) */
- current_type = 0;
- for (i=0; i<overlap_entries; i++)
- if (overlap_list[i]->type > current_type)
- current_type = overlap_list[i]->type;
- /* continue building up new bios map based on this information */
- if (current_type != last_type) {
- if (last_type != 0) {
- new_bios[new_bios_entry].size =
- change_point[chgidx]->addr - last_addr;
- /* move forward only if the new size was non-zero */
- if (new_bios[new_bios_entry].size != 0)
- if (++new_bios_entry >= E820MAX)
- break; /* no more space left for new bios entries */
- }
- if (current_type != 0) {
- new_bios[new_bios_entry].addr = change_point[chgidx]->addr;
- new_bios[new_bios_entry].type = current_type;
- last_addr=change_point[chgidx]->addr;
- }
- last_type = current_type;
- }
- }
- new_nr = new_bios_entry; /* retain count for new bios entries */
-
- /* copy new bios mapping into original location */
- memcpy(biosmap, new_bios, new_nr*sizeof(struct e820entry));
- *pnr_map = new_nr;
-
- return 0;
-}
-
-/*
- * Copy the BIOS e820 map into a safe place.
- *
- * Sanity-check it while we're at it..
- *
- * If we're lucky and live on a modern system, the setup code
- * will have given us a memory map that we can use to properly
- * set up memory. If we aren't, we'll fake a memory map.
- *
- * We check to see that the memory map contains at least 2 elements
- * before we'll use it, because the detection code in setup.S may
- * not be perfect and most every PC known to man has two memory
- * regions: one from 0 to 640k, and one from 1mb up. (The IBM
- * thinkpad 560x, for example, does not cooperate with the memory
- * detection code.)
- */
-static int __init copy_e820_map(struct e820entry * biosmap, int nr_map)
-{
- /* Only one memory region (or negative)? Ignore it */
- if (nr_map < 2)
- return -1;
-
- do {
- unsigned long long start = biosmap->addr;
- unsigned long long size = biosmap->size;
- unsigned long long end = start + size;
- unsigned long type = biosmap->type;
-
- /* Overflow in 64 bits? Ignore the memory map. */
- if (start > end)
- return -1;
-
- /*
- * Some BIOSes claim RAM in the 640k - 1M region.
- * Not right. Fix it up.
- */
- if (type == E820_RAM) {
- if (start < 0x100000ULL && end > 0xA0000ULL) {
- if (start < 0xA0000ULL)
- add_memory_region(start, 0xA0000ULL-start, type);
- if (end <= 0x100000ULL)
- continue;
- start = 0x100000ULL;
- size = end - start;
- }
- }
- add_memory_region(start, size, type);
- } while (biosmap++,--nr_map);
- return 0;
-}
-
-#if defined(CONFIG_EDD) || defined(CONFIG_EDD_MODULE)
-struct edd edd;
-#ifdef CONFIG_EDD_MODULE
-EXPORT_SYMBOL(edd);
-#endif
-/**
- * copy_edd() - Copy the BIOS EDD information
- * from boot_params into a safe place.
- *
- */
-static inline void copy_edd(void)
-{
- memcpy(edd.mbr_signature, EDD_MBR_SIGNATURE, sizeof(edd.mbr_signature));
- memcpy(edd.edd_info, EDD_BUF, sizeof(edd.edd_info));
- edd.mbr_signature_nr = EDD_MBR_SIG_NR;
- edd.edd_info_nr = EDD_NR;
-}
-#else
-static inline void copy_edd(void)
-{
-}
-#endif
-
-/*
- * Do NOT EVER look at the BIOS memory size location.
- * It does not work on many machines.
- */
-#define LOWMEMSIZE() (0x9f000)
-
-static void __init parse_cmdline_early (char ** cmdline_p)
-{
- char c = ' ', *to = command_line, *from = saved_command_line;
- int len = 0;
- int userdef = 0;
-
- /* Save unparsed command line copy for /proc/cmdline */
- saved_command_line[COMMAND_LINE_SIZE-1] = '\0';
-
- for (;;) {
- if (c != ' ')
- goto next_char;
- /*
- * "mem=nopentium" disables the 4MB page tables.
- * "mem=XXX[kKmM]" defines a memory region from HIGH_MEM
- * to <mem>, overriding the bios size.
- * "memmap=XXX[KkmM]@XXX[KkmM]" defines a memory region from
- * <start> to <start>+<mem>, overriding the bios size.
- *
- * HPA tells me bootloaders need to parse mem=, so no new
- * option should be mem= [also see Documentation/i386/boot.txt]
- */
- if (!memcmp(from, "mem=", 4)) {
- if (to != command_line)
- to--;
- if (!memcmp(from+4, "nopentium", 9)) {
- from += 9+4;
- clear_bit(X86_FEATURE_PSE, boot_cpu_data.x86_capability);
- disable_pse = 1;
- } else {
- /* If the user specifies memory size, we
- * limit the BIOS-provided memory map to
- * that size. exactmap can be used to specify
- * the exact map. mem=number can be used to
- * trim the existing memory map.
- */
- unsigned long long mem_size;
-
- mem_size = memparse(from+4, &from);
- limit_regions(mem_size);
- userdef=1;
- }
- }
-
- else if (!memcmp(from, "memmap=", 7)) {
- if (to != command_line)
- to--;
- if (!memcmp(from+7, "exactmap", 8)) {
-#ifdef CONFIG_CRASH_DUMP
- /* If we are doing a crash dump, we
- * still need to know the real mem
- * size before original memory map is
- * reset.
- */
- find_max_pfn();
- saved_max_pfn = max_pfn;
-#endif
- from += 8+7;
- e820.nr_map = 0;
- userdef = 1;
- } else {
- /* If the user specifies memory size, we
- * limit the BIOS-provided memory map to
- * that size. exactmap can be used to specify
- * the exact map. mem=number can be used to
- * trim the existing memory map.
- */
- unsigned long long start_at, mem_size;
-
- mem_size = memparse(from+7, &from);
- if (*from == '@') {
- start_at = memparse(from+1, &from);
- add_memory_region(start_at, mem_size, E820_RAM);
- } else if (*from == '#') {
- start_at = memparse(from+1, &from);
- add_memory_region(start_at, mem_size, E820_ACPI);
- } else if (*from == '$') {
- start_at = memparse(from+1, &from);
- add_memory_region(start_at, mem_size, E820_RESERVED);
- } else {
- limit_regions(mem_size);
- userdef=1;
- }
- }
- }
-
- else if (!memcmp(from, "noexec=", 7))
- noexec_setup(from + 7);
-
-
-#ifdef CONFIG_X86_SMP
- /*
- * If the BIOS enumerates physical processors before logical,
- * maxcpus=N at enumeration-time can be used to disable HT.
- */
- else if (!memcmp(from, "maxcpus=", 8)) {
- extern unsigned int maxcpus;
-
- maxcpus = simple_strtoul(from + 8, NULL, 0);
- }
-#endif
-
-#ifdef CONFIG_ACPI
- /* "acpi=off" disables both ACPI table parsing and interpreter */
- else if (!memcmp(from, "acpi=off", 8)) {
- disable_acpi();
- }
-
- /* acpi=force to over-ride black-list */
- else if (!memcmp(from, "acpi=force", 10)) {
- acpi_force = 1;
- acpi_ht = 1;
- acpi_disabled = 0;
- }
-
- /* acpi=strict disables out-of-spec workarounds */
- else if (!memcmp(from, "acpi=strict", 11)) {
- acpi_strict = 1;
- }
-
- /* Limit ACPI just to boot-time to enable HT */
- else if (!memcmp(from, "acpi=ht", 7)) {
- if (!acpi_force)
- disable_acpi();
- acpi_ht = 1;
- }
-
- /* "pci=noacpi" disable ACPI IRQ routing and PCI scan */
- else if (!memcmp(from, "pci=noacpi", 10)) {
- acpi_disable_pci();
- }
- /* "acpi=noirq" disables ACPI interrupt routing */
- else if (!memcmp(from, "acpi=noirq", 10)) {
- acpi_noirq_set();
- }
-
- else if (!memcmp(from, "acpi_sci=edge", 13))
- acpi_sci_flags.trigger = 1;
-
- else if (!memcmp(from, "acpi_sci=level", 14))
- acpi_sci_flags.trigger = 3;
-
- else if (!memcmp(from, "acpi_sci=high", 13))
- acpi_sci_flags.polarity = 1;
-
- else if (!memcmp(from, "acpi_sci=low", 12))
- acpi_sci_flags.polarity = 3;
-
-#ifdef CONFIG_X86_IO_APIC
- else if (!memcmp(from, "acpi_skip_timer_override", 24))
- acpi_skip_timer_override = 1;
-
- if (!memcmp(from, "disable_timer_pin_1", 19))
- disable_timer_pin_1 = 1;
- if (!memcmp(from, "enable_timer_pin_1", 18))
- disable_timer_pin_1 = -1;
-
- /* disable IO-APIC */
- else if (!memcmp(from, "noapic", 6))
- disable_ioapic_setup();
-#endif /* CONFIG_X86_IO_APIC */
-#endif /* CONFIG_ACPI */
-
-#ifdef CONFIG_X86_LOCAL_APIC
- /* enable local APIC */
- else if (!memcmp(from, "lapic", 5))
- lapic_enable();
-
- /* disable local APIC */
- else if (!memcmp(from, "nolapic", 6))
- lapic_disable();
-#endif /* CONFIG_X86_LOCAL_APIC */
-
-#ifdef CONFIG_KEXEC
- /* crashkernel=size@addr specifies the location to reserve for
- * a crash kernel. By reserving this memory we guarantee
- * that linux never set's it up as a DMA target.
- * Useful for holding code to do something appropriate
- * after a kernel panic.
- */
- else if (!memcmp(from, "crashkernel=", 12)) {
- unsigned long size, base;
- size = memparse(from+12, &from);
- if (*from == '@') {
- base = memparse(from+1, &from);
- /* FIXME: Do I want a sanity check
- * to validate the memory range?
- */
- crashk_res.start = base;
- crashk_res.end = base + size - 1;
- }
- }
-#endif
-#ifdef CONFIG_CRASH_DUMP
- /* elfcorehdr= specifies the location of elf core header
- * stored by the crashed kernel.
- */
- else if (!memcmp(from, "elfcorehdr=", 11))
- elfcorehdr_addr = memparse(from+11, &from);
-#endif
-
- /*
- * highmem=size forces highmem to be exactly 'size' bytes.
- * This works even on boxes that have no highmem otherwise.
- * This also works to reduce highmem size on bigger boxes.
- */
- else if (!memcmp(from, "highmem=", 8))
- highmem_pages = memparse(from+8, &from) >> PAGE_SHIFT;
-
- /*
- * vmalloc=size forces the vmalloc area to be exactly 'size'
- * bytes. This can be used to increase (or decrease) the
- * vmalloc area - the default is 128m.
- */
- else if (!memcmp(from, "vmalloc=", 8))
- __VMALLOC_RESERVE = memparse(from+8, &from);
-
- next_char:
- c = *(from++);
- if (!c)
- break;
- if (COMMAND_LINE_SIZE <= ++len)
- break;
- *(to++) = c;
- }
- *to = '\0';
- *cmdline_p = command_line;
- if (userdef) {
- printk(KERN_INFO "user-defined physical RAM map:\n");
- print_memory_map("user");
- }
-}
-
-/*
- * Callback for efi_memory_walk.
- */
-static int __init
-efi_find_max_pfn(unsigned long start, unsigned long end, void *arg)
-{
- unsigned long *max_pfn = arg, pfn;
-
- if (start < end) {
- pfn = PFN_UP(end -1);
- if (pfn > *max_pfn)
- *max_pfn = pfn;
- }
- return 0;
-}
-
-
-/*
- * Find the highest page frame number we have available
- */
-void __init find_max_pfn(void)
-{
- int i;
-
- max_pfn = 0;
- if (efi_enabled) {
- efi_memmap_walk(efi_find_max_pfn, &max_pfn);
- return;
- }
-
- for (i = 0; i < e820.nr_map; i++) {
- unsigned long start, end;
- /* RAM? */
- if (e820.map[i].type != E820_RAM)
- continue;
- start = PFN_UP(e820.map[i].addr);
- end = PFN_DOWN(e820.map[i].addr + e820.map[i].size);
- if (start >= end)
- continue;
- if (end > max_pfn)
- max_pfn = end;
- }
-}
-
-/*
- * Determine low and high memory ranges:
- */
-unsigned long __init find_max_low_pfn(void)
-{
- unsigned long max_low_pfn;
-
- max_low_pfn = max_pfn;
- if (max_low_pfn > MAXMEM_PFN) {
- if (highmem_pages == -1)
- highmem_pages = max_pfn - MAXMEM_PFN;
- if (highmem_pages + MAXMEM_PFN < max_pfn)
- max_pfn = MAXMEM_PFN + highmem_pages;
- if (highmem_pages + MAXMEM_PFN > max_pfn) {
- printk("only %luMB highmem pages available, ignoring highmem size of %uMB.\n", pages_to_mb(max_pfn - MAXMEM_PFN), pages_to_mb(highmem_pages));
- highmem_pages = 0;
- }
- max_low_pfn = MAXMEM_PFN;
-#ifndef CONFIG_HIGHMEM
- /* Maximum memory usable is what is directly addressable */
- printk(KERN_WARNING "Warning only %ldMB will be used.\n",
- MAXMEM>>20);
- if (max_pfn > MAX_NONPAE_PFN)
- printk(KERN_WARNING "Use a PAE enabled kernel.\n");
- else
- printk(KERN_WARNING "Use a HIGHMEM enabled kernel.\n");
- max_pfn = MAXMEM_PFN;
-#else /* !CONFIG_HIGHMEM */
-#ifndef CONFIG_X86_PAE
- if (max_pfn > MAX_NONPAE_PFN) {
- max_pfn = MAX_NONPAE_PFN;
- printk(KERN_WARNING "Warning only 4GB will be used.\n");
- printk(KERN_WARNING "Use a PAE enabled kernel.\n");
- }
-#endif /* !CONFIG_X86_PAE */
-#endif /* !CONFIG_HIGHMEM */
- } else {
- if (highmem_pages == -1)
- highmem_pages = 0;
-#ifdef CONFIG_HIGHMEM
- if (highmem_pages >= max_pfn) {
- printk(KERN_ERR "highmem size specified (%uMB) is bigger than pages available (%luMB)!.\n", pages_to_mb(highmem_pages), pages_to_mb(max_pfn));
- highmem_pages = 0;
- }
- if (highmem_pages) {
- if (max_low_pfn-highmem_pages < 64*1024*1024/PAGE_SIZE){
- printk(KERN_ERR "highmem size %uMB results in smaller than 64MB lowmem, ignoring it.\n", pages_to_mb(highmem_pages));
- highmem_pages = 0;
- }
- max_low_pfn -= highmem_pages;
- }
-#else
- if (highmem_pages)
- printk(KERN_ERR "ignoring highmem size on non-highmem kernel!\n");
-#endif
- }
- return max_low_pfn;
-}
-
-/*
- * Free all available memory for boot time allocation. Used
- * as a callback function by efi_memory_walk()
- */
-
-static int __init
-free_available_memory(unsigned long start, unsigned long end, void *arg)
-{
- /* check max_low_pfn */
- if (start >= ((max_low_pfn + 1) << PAGE_SHIFT))
- return 0;
- if (end >= ((max_low_pfn + 1) << PAGE_SHIFT))
- end = (max_low_pfn + 1) << PAGE_SHIFT;
- if (start < end)
- free_bootmem(start, end - start);
-
- return 0;
-}
-/*
- * Register fully available low RAM pages with the bootmem allocator.
- */
-static void __init register_bootmem_low_pages(unsigned long max_low_pfn)
-{
- int i;
-
- if (efi_enabled) {
- efi_memmap_walk(free_available_memory, NULL);
- return;
- }
- for (i = 0; i < e820.nr_map; i++) {
- unsigned long curr_pfn, last_pfn, size;
- /*
- * Reserve usable low memory
- */
- if (e820.map[i].type != E820_RAM)
- continue;
- /*
- * We are rounding up the start address of usable memory:
- */
- curr_pfn = PFN_UP(e820.map[i].addr);
- if (curr_pfn >= max_low_pfn)
- continue;
- /*
- * ... and at the end of the usable range downwards:
- */
- last_pfn = PFN_DOWN(e820.map[i].addr + e820.map[i].size);
-
- if (last_pfn > max_low_pfn)
- last_pfn = max_low_pfn;
-
- /*
- * .. finally, did all the rounding and playing
- * around just make the area go away?
- */
- if (last_pfn <= curr_pfn)
- continue;
-
- size = last_pfn - curr_pfn;
- free_bootmem(PFN_PHYS(curr_pfn), PFN_PHYS(size));
- }
-}
-
-/*
- * workaround for Dell systems that neglect to reserve EBDA
- */
-static void __init reserve_ebda_region(void)
-{
- unsigned int addr;
- addr = get_bios_ebda();
- if (addr)
- reserve_bootmem(addr, PAGE_SIZE);
-}
-
-#ifndef CONFIG_NEED_MULTIPLE_NODES
-void __init setup_bootmem_allocator(void);
-static unsigned long __init setup_memory(void)
-{
- /*
- * partially used pages are not usable - thus
- * we are rounding upwards:
- */
- min_low_pfn = PFN_UP(init_pg_tables_end);
-
- find_max_pfn();
-
- max_low_pfn = find_max_low_pfn();
-
-#ifdef CONFIG_HIGHMEM
- highstart_pfn = highend_pfn = max_pfn;
- if (max_pfn > max_low_pfn) {
- highstart_pfn = max_low_pfn;
- }
- printk(KERN_NOTICE "%ldMB HIGHMEM available.\n",
- pages_to_mb(highend_pfn - highstart_pfn));
-#endif
- printk(KERN_NOTICE "%ldMB LOWMEM available.\n",
- pages_to_mb(max_low_pfn));
-
- setup_bootmem_allocator();
-
- return max_low_pfn;
-}
-
-void __init zone_sizes_init(void)
-{
- unsigned long zones_size[MAX_NR_ZONES] = {0, 0, 0};
- unsigned int max_dma, low;
-
- max_dma = virt_to_phys((char *)MAX_DMA_ADDRESS) >> PAGE_SHIFT;
- low = max_low_pfn;
-
- if (low < max_dma)
- zones_size[ZONE_DMA] = low;
- else {
- zones_size[ZONE_DMA] = max_dma;
- zones_size[ZONE_NORMAL] = low - max_dma;
-#ifdef CONFIG_HIGHMEM
- zones_size[ZONE_HIGHMEM] = highend_pfn - low;
-#endif
- }
- free_area_init(zones_size);
-}
-#else
-extern unsigned long __init setup_memory(void);
-extern void zone_sizes_init(void);
-#endif /* !CONFIG_NEED_MULTIPLE_NODES */
-
-void __init setup_bootmem_allocator(void)
-{
- unsigned long bootmap_size;
- /*
- * Initialize the boot-time allocator (with low memory only):
- */
- bootmap_size = init_bootmem(min_low_pfn, max_low_pfn);
-
- register_bootmem_low_pages(max_low_pfn);
-
- /*
- * Reserve the bootmem bitmap itself as well. We do this in two
- * steps (first step was init_bootmem()) because this catches
- * the (very unlikely) case of us accidentally initializing the
- * bootmem allocator with an invalid RAM area.
- */
- reserve_bootmem(__PHYSICAL_START, (PFN_PHYS(min_low_pfn) +
- bootmap_size + PAGE_SIZE-1) - (__PHYSICAL_START));
-
- /*
- * reserve physical page 0 - it's a special BIOS page on many boxes,
- * enabling clean reboots, SMP operation, laptop functions.
- */
- reserve_bootmem(0, PAGE_SIZE);
-
- /* reserve EBDA region, it's a 4K region */
- reserve_ebda_region();
-
- /* could be an AMD 768MPX chipset. Reserve a page before VGA to prevent
- PCI prefetch into it (errata #56). Usually the page is reserved anyways,
- unless you have no PS/2 mouse plugged in. */
- if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD &&
- boot_cpu_data.x86 == 6)
- reserve_bootmem(0xa0000 - 4096, 4096);
-
-#ifdef CONFIG_SMP
- /*
- * But first pinch a few for the stack/trampoline stuff
- * FIXME: Don't need the extra page at 4K, but need to fix
- * trampoline before removing it. (see the GDT stuff)
- */
- reserve_bootmem(PAGE_SIZE, PAGE_SIZE);
-#endif
-#ifdef CONFIG_ACPI_SLEEP
- /*
- * Reserve low memory region for sleep support.
- */
- acpi_reserve_bootmem();
-#endif
-#ifdef CONFIG_X86_FIND_SMP_CONFIG
- /*
- * Find and reserve possible boot-time SMP configuration:
- */
- find_smp_config();
-#endif
-
-#ifdef CONFIG_BLK_DEV_INITRD
- if (LOADER_TYPE && INITRD_START) {
- if (INITRD_START + INITRD_SIZE <= (max_low_pfn << PAGE_SHIFT)) {
- reserve_bootmem(INITRD_START, INITRD_SIZE);
- initrd_start =
- INITRD_START ? INITRD_START + PAGE_OFFSET : 0;
- initrd_end = initrd_start+INITRD_SIZE;
- }
- else {
- printk(KERN_ERR "initrd extends beyond end of memory "
- "(0x%08lx > 0x%08lx)\ndisabling initrd\n",
- INITRD_START + INITRD_SIZE,
- max_low_pfn << PAGE_SHIFT);
- initrd_start = 0;
- }
- }
-#endif
-#ifdef CONFIG_KEXEC
- if (crashk_res.start != crashk_res.end)
- reserve_bootmem(crashk_res.start,
- crashk_res.end - crashk_res.start + 1);
-#endif
-}
-
-/*
- * The node 0 pgdat is initialized before all of these because
- * it's needed for bootmem. node>0 pgdats have their virtual
- * space allocated before the pagetables are in place to access
- * them, so they can't be cleared then.
- *
- * This should all compile down to nothing when NUMA is off.
- */
-void __init remapped_pgdat_init(void)
-{
- int nid;
-
- for_each_online_node(nid) {
- if (nid != 0)
- memset(NODE_DATA(nid), 0, sizeof(struct pglist_data));
- }
-}
-
-/*
- * Request address space for all standard RAM and ROM resources
- * and also for regions reported as reserved by the e820.
- */
-static void __init
-legacy_init_iomem_resources(struct resource *code_resource, struct resource *data_resource)
-{
- int i;
-
- probe_roms();
- for (i = 0; i < e820.nr_map; i++) {
- struct resource *res;
- if (e820.map[i].addr + e820.map[i].size > 0x100000000ULL)
- continue;
- res = alloc_bootmem_low(sizeof(struct resource));
- switch (e820.map[i].type) {
- case E820_RAM: res->name = "System RAM"; break;
- case E820_ACPI: res->name = "ACPI Tables"; break;
- case E820_NVS: res->name = "ACPI Non-volatile Storage"; break;
- default: res->name = "reserved";
- }
- res->start = e820.map[i].addr;
- res->end = res->start + e820.map[i].size - 1;
- res->flags = IORESOURCE_MEM | IORESOURCE_BUSY;
- request_resource(&iomem_resource, res);
- if (e820.map[i].type == E820_RAM) {
- /*
- * We don't know which RAM region contains kernel data,
- * so we try it repeatedly and let the resource manager
- * test it.
- */
- request_resource(res, code_resource);
- request_resource(res, data_resource);
-#ifdef CONFIG_KEXEC
- request_resource(res, &crashk_res);
-#endif
- }
- }
-}
-
-/*
- * Request address space for all standard resources
- */
-static void __init register_memory(void)
-{
- unsigned long gapstart, gapsize, round;
- unsigned long long last;
- int i;
-
- if (efi_enabled)
- efi_initialize_iomem_resources(&code_resource, &data_resource);
- else
- legacy_init_iomem_resources(&code_resource, &data_resource);
-
- /* EFI systems may still have VGA */
- request_resource(&iomem_resource, &video_ram_resource);
-
- /* request I/O space for devices used on all i[345]86 PCs */
- for (i = 0; i < STANDARD_IO_RESOURCES; i++)
- request_resource(&ioport_resource, &standard_io_resources[i]);
-
- /*
- * Search for the bigest gap in the low 32 bits of the e820
- * memory space.
- */
- last = 0x100000000ull;
- gapstart = 0x10000000;
- gapsize = 0x400000;
- i = e820.nr_map;
- while (--i >= 0) {
- unsigned long long start = e820.map[i].addr;
- unsigned long long end = start + e820.map[i].size;
-
- /*
- * Since "last" is at most 4GB, we know we'll
- * fit in 32 bits if this condition is true
- */
- if (last > end) {
- unsigned long gap = last - end;
-
- if (gap > gapsize) {
- gapsize = gap;
- gapstart = end;
- }
- }
- if (start < last)
- last = start;
- }
-
- /*
- * See how much we want to round up: start off with
- * rounding to the next 1MB area.
- */
- round = 0x100000;
- while ((gapsize >> 4) > round)
- round += round;
- /* Fun with two's complement */
- pci_mem_start = (gapstart + round) & -round;
-
- printk("Allocating PCI resources starting at %08lx (gap: %08lx:%08lx)\n",
- pci_mem_start, gapstart, gapsize);
-}
-
-/* Use inline assembly to define this because the nops are defined
- as inline assembly strings in the include files and we cannot
- get them easily into strings. */
-asm("\t.data\nintelnops: "
- GENERIC_NOP1 GENERIC_NOP2 GENERIC_NOP3 GENERIC_NOP4 GENERIC_NOP5 GENERIC_NOP6
- GENERIC_NOP7 GENERIC_NOP8);
-asm("\t.data\nk8nops: "
- K8_NOP1 K8_NOP2 K8_NOP3 K8_NOP4 K8_NOP5 K8_NOP6
- K8_NOP7 K8_NOP8);
-asm("\t.data\nk7nops: "
- K7_NOP1 K7_NOP2 K7_NOP3 K7_NOP4 K7_NOP5 K7_NOP6
- K7_NOP7 K7_NOP8);
-
-extern unsigned char intelnops[], k8nops[], k7nops[];
-static unsigned char *intel_nops[ASM_NOP_MAX+1] = {
- NULL,
- intelnops,
- intelnops + 1,
- intelnops + 1 + 2,
- intelnops + 1 + 2 + 3,
- intelnops + 1 + 2 + 3 + 4,
- intelnops + 1 + 2 + 3 + 4 + 5,
- intelnops + 1 + 2 + 3 + 4 + 5 + 6,
- intelnops + 1 + 2 + 3 + 4 + 5 + 6 + 7,
-};
-static unsigned char *k8_nops[ASM_NOP_MAX+1] = {
- NULL,
- k8nops,
- k8nops + 1,
- k8nops + 1 + 2,
- k8nops + 1 + 2 + 3,
- k8nops + 1 + 2 + 3 + 4,
- k8nops + 1 + 2 + 3 + 4 + 5,
- k8nops + 1 + 2 + 3 + 4 + 5 + 6,
- k8nops + 1 + 2 + 3 + 4 + 5 + 6 + 7,
-};
-static unsigned char *k7_nops[ASM_NOP_MAX+1] = {
- NULL,
- k7nops,
- k7nops + 1,
- k7nops + 1 + 2,
- k7nops + 1 + 2 + 3,
- k7nops + 1 + 2 + 3 + 4,
- k7nops + 1 + 2 + 3 + 4 + 5,
- k7nops + 1 + 2 + 3 + 4 + 5 + 6,
- k7nops + 1 + 2 + 3 + 4 + 5 + 6 + 7,
-};
-static struct nop {
- int cpuid;
- unsigned char **noptable;
-} noptypes[] = {
- { X86_FEATURE_K8, k8_nops },
- { X86_FEATURE_K7, k7_nops },
- { -1, NULL }
-};
-
-/* Replace instructions with better alternatives for this CPU type.
-
- This runs before SMP is initialized to avoid SMP problems with
- self modifying code. This implies that assymetric systems where
- APs have less capabilities than the boot processor are not handled.
- Tough. Make sure you disable such features by hand. */
-void apply_alternatives(void *start, void *end)
-{
- struct alt_instr *a;
- int diff, i, k;
- unsigned char **noptable = intel_nops;
- for (i = 0; noptypes[i].cpuid >= 0; i++) {
- if (boot_cpu_has(noptypes[i].cpuid)) {
- noptable = noptypes[i].noptable;
- break;
- }
- }
- for (a = start; (void *)a < end; a++) {
- if (!boot_cpu_has(a->cpuid))
- continue;
- BUG_ON(a->replacementlen > a->instrlen);
- memcpy(a->instr, a->replacement, a->replacementlen);
- diff = a->instrlen - a->replacementlen;
- /* Pad the rest with nops */
- for (i = a->replacementlen; diff > 0; diff -= k, i += k) {
- k = diff;
- if (k > ASM_NOP_MAX)
- k = ASM_NOP_MAX;
- memcpy(a->instr + i, noptable[k], k);
- }
- }
-}
-
-void __init alternative_instructions(void)
-{
- extern struct alt_instr __alt_instructions[], __alt_instructions_end[];
- apply_alternatives(__alt_instructions, __alt_instructions_end);
-}
-
-static char * __init machine_specific_memory_setup(void);
-
-#ifdef CONFIG_MCA
-static void set_mca_bus(int x)
-{
- MCA_bus = x;
-}
-#else
-static void set_mca_bus(int x) { }
-#endif
-
-/*
- * Determine if we were loaded by an EFI loader. If so, then we have also been
- * passed the efi memmap, systab, etc., so we should use these data structures
- * for initialization. Note, the efi init code path is determined by the
- * global efi_enabled. This allows the same kernel image to be used on existing
- * systems (with a traditional BIOS) as well as on EFI systems.
- */
-void __init setup_arch(char **cmdline_p)
-{
- unsigned long max_low_pfn;
-
- memcpy(&boot_cpu_data, &new_cpu_data, sizeof(new_cpu_data));
- pre_setup_arch_hook();
- early_cpu_init();
-
- /*
- * FIXME: This isn't an official loader_type right
- * now but does currently work with elilo.
- * If we were configured as an EFI kernel, check to make
- * sure that we were loaded correctly from elilo and that
- * the system table is valid. If not, then initialize normally.
- */
-#ifdef CONFIG_EFI
- if ((LOADER_TYPE == 0x50) && EFI_SYSTAB)
- efi_enabled = 1;
-#endif
-
- ROOT_DEV = old_decode_dev(ORIG_ROOT_DEV);
- drive_info = DRIVE_INFO;
- screen_info = SCREEN_INFO;
- edid_info = EDID_INFO;
- apm_info.bios = APM_BIOS_INFO;
- ist_info = IST_INFO;
- saved_videomode = VIDEO_MODE;
- if( SYS_DESC_TABLE.length != 0 ) {
- set_mca_bus(SYS_DESC_TABLE.table[3] & 0x2);
- machine_id = SYS_DESC_TABLE.table[0];
- machine_submodel_id = SYS_DESC_TABLE.table[1];
- BIOS_revision = SYS_DESC_TABLE.table[2];
- }
- bootloader_type = LOADER_TYPE;
-
-#ifdef CONFIG_BLK_DEV_RAM
- rd_image_start = RAMDISK_FLAGS & RAMDISK_IMAGE_START_MASK;
- rd_prompt = ((RAMDISK_FLAGS & RAMDISK_PROMPT_FLAG) != 0);
- rd_doload = ((RAMDISK_FLAGS & RAMDISK_LOAD_FLAG) != 0);
-#endif
- ARCH_SETUP
- if (efi_enabled)
- efi_init();
- else {
- printk(KERN_INFO "BIOS-provided physical RAM map:\n");
- print_memory_map(machine_specific_memory_setup());
- }
-
- copy_edd();
-
- if (!MOUNT_ROOT_RDONLY)
- root_mountflags &= ~MS_RDONLY;
- init_mm.start_code = (unsigned long) _text;
- init_mm.end_code = (unsigned long) _etext;
- init_mm.end_data = (unsigned long) _edata;
- init_mm.brk = init_pg_tables_end + PAGE_OFFSET;
-
- code_resource.start = virt_to_phys(_text);
- code_resource.end = virt_to_phys(_etext)-1;
- data_resource.start = virt_to_phys(_etext);
- data_resource.end = virt_to_phys(_edata)-1;
-
- parse_cmdline_early(cmdline_p);
-
- max_low_pfn = setup_memory();
-
- /*
- * NOTE: before this point _nobody_ is allowed to allocate
- * any memory using the bootmem allocator. Although the
- * alloctor is now initialised only the first 8Mb of the kernel
- * virtual address space has been mapped. All allocations before
- * paging_init() has completed must use the alloc_bootmem_low_pages()
- * variant (which allocates DMA'able memory) and care must be taken
- * not to exceed the 8Mb limit.
- */
-
-#ifdef CONFIG_SMP
- smp_alloc_memory(); /* AP processor realmode stacks in low memory*/
-#endif
- paging_init();
- remapped_pgdat_init();
- sparse_init();
- zone_sizes_init();
-
- /*
- * NOTE: at this point the bootmem allocator is fully available.
- */
-
-#ifdef CONFIG_EARLY_PRINTK
- {
- char *s = strstr(*cmdline_p, "earlyprintk=");
- if (s) {
- extern void setup_early_printk(char *);
-
- setup_early_printk(s);
- printk("early console enabled\n");
- }
- }
-#endif
-
-
- dmi_scan_machine();
-
-#ifdef CONFIG_X86_GENERICARCH
- generic_apic_probe(*cmdline_p);
-#endif
- if (efi_enabled)
- efi_map_memmap();
-
-#ifdef CONFIG_ACPI
- /*
- * Parse the ACPI tables for possible boot-time SMP configuration.
- */
- acpi_boot_table_init();
- acpi_boot_init();
-
-#if defined(CONFIG_SMP) && defined(CONFIG_X86_PC)
- if (def_to_bigsmp)
- printk(KERN_WARNING "More than 8 CPUs detected and "
- "CONFIG_X86_PC cannot handle it.\nUse "
- "CONFIG_X86_GENERICARCH or CONFIG_X86_BIGSMP.\n");
-#endif
-#endif
-#ifdef CONFIG_X86_LOCAL_APIC
- if (smp_found_config)
- get_smp_config();
-#endif
-
- register_memory();
-
-#ifdef CONFIG_VT
-#if defined(CONFIG_VGA_CONSOLE)
- if (!efi_enabled || (efi_mem_type(0xa0000) != EFI_CONVENTIONAL_MEMORY))
- conswitchp = &vga_con;
-#elif defined(CONFIG_DUMMY_CONSOLE)
- conswitchp = &dummy_con;
-#endif
-#endif
-}
-
-#include "setup_arch_post.h"
-/*
- * Local Variables:
- * mode:c
- * c-file-style:"k&r"
- * c-basic-offset:8
- * End:
- */
diff --git a/arch/i386/kernel/sigframe.h b/arch/i386/kernel/sigframe.h
deleted file mode 100644
index 0b2221711da..00000000000
--- a/arch/i386/kernel/sigframe.h
+++ /dev/null
@@ -1,21 +0,0 @@
-struct sigframe
-{
- char __user *pretcode;
- int sig;
- struct sigcontext sc;
- struct _fpstate fpstate;
- unsigned long extramask[_NSIG_WORDS-1];
- char retcode[8];
-};
-
-struct rt_sigframe
-{
- char __user *pretcode;
- int sig;
- struct siginfo __user *pinfo;
- void __user *puc;
- struct siginfo info;
- struct ucontext uc;
- struct _fpstate fpstate;
- char retcode[8];
-};
diff --git a/arch/i386/kernel/signal.c b/arch/i386/kernel/signal.c
deleted file mode 100644
index adcd069db91..00000000000
--- a/arch/i386/kernel/signal.c
+++ /dev/null
@@ -1,675 +0,0 @@
-/*
- * linux/arch/i386/kernel/signal.c
- *
- * Copyright (C) 1991, 1992 Linus Torvalds
- *
- * 1997-11-28 Modified for POSIX.1b signals by Richard Henderson
- * 2000-06-20 Pentium III FXSR, SSE support by Gareth Hughes
- */
-
-#include <linux/sched.h>
-#include <linux/mm.h>
-#include <linux/smp.h>
-#include <linux/smp_lock.h>
-#include <linux/kernel.h>
-#include <linux/signal.h>
-#include <linux/errno.h>
-#include <linux/wait.h>
-#include <linux/unistd.h>
-#include <linux/stddef.h>
-#include <linux/personality.h>
-#include <linux/suspend.h>
-#include <linux/ptrace.h>
-#include <linux/elf.h>
-#include <asm/processor.h>
-#include <asm/ucontext.h>
-#include <asm/uaccess.h>
-#include <asm/i387.h>
-#include "sigframe.h"
-
-#define DEBUG_SIG 0
-
-#define _BLOCKABLE (~(sigmask(SIGKILL) | sigmask(SIGSTOP)))
-
-/*
- * Atomically swap in the new signal mask, and wait for a signal.
- */
-asmlinkage int
-sys_sigsuspend(int history0, int history1, old_sigset_t mask)
-{
- struct pt_regs * regs = (struct pt_regs *) &history0;
- sigset_t saveset;
-
- mask &= _BLOCKABLE;
- spin_lock_irq(&current->sighand->siglock);
- saveset = current->blocked;
- siginitset(&current->blocked, mask);
- recalc_sigpending();
- spin_unlock_irq(&current->sighand->siglock);
-
- regs->eax = -EINTR;
- while (1) {
- current->state = TASK_INTERRUPTIBLE;
- schedule();
- if (do_signal(regs, &saveset))
- return -EINTR;
- }
-}
-
-asmlinkage int
-sys_rt_sigsuspend(struct pt_regs regs)
-{
- sigset_t saveset, newset;
-
- /* XXX: Don't preclude handling different sized sigset_t's. */
- if (regs.ecx != sizeof(sigset_t))
- return -EINVAL;
-
- if (copy_from_user(&newset, (sigset_t __user *)regs.ebx, sizeof(newset)))
- return -EFAULT;
- sigdelsetmask(&newset, ~_BLOCKABLE);
-
- spin_lock_irq(&current->sighand->siglock);
- saveset = current->blocked;
- current->blocked = newset;
- recalc_sigpending();
- spin_unlock_irq(&current->sighand->siglock);
-
- regs.eax = -EINTR;
- while (1) {
- current->state = TASK_INTERRUPTIBLE;
- schedule();
- if (do_signal(&regs, &saveset))
- return -EINTR;
- }
-}
-
-asmlinkage int
-sys_sigaction(int sig, const struct old_sigaction __user *act,
- struct old_sigaction __user *oact)
-{
- struct k_sigaction new_ka, old_ka;
- int ret;
-
- if (act) {
- old_sigset_t mask;
- if (!access_ok(VERIFY_READ, act, sizeof(*act)) ||
- __get_user(new_ka.sa.sa_handler, &act->sa_handler) ||
- __get_user(new_ka.sa.sa_restorer, &act->sa_restorer))
- return -EFAULT;
- __get_user(new_ka.sa.sa_flags, &act->sa_flags);
- __get_user(mask, &act->sa_mask);
- siginitset(&new_ka.sa.sa_mask, mask);
- }
-
- ret = do_sigaction(sig, act ? &new_ka : NULL, oact ? &old_ka : NULL);
-
- if (!ret && oact) {
- if (!access_ok(VERIFY_WRITE, oact, sizeof(*oact)) ||
- __put_user(old_ka.sa.sa_handler, &oact->sa_handler) ||
- __put_user(old_ka.sa.sa_restorer, &oact->sa_restorer))
- return -EFAULT;
- __put_user(old_ka.sa.sa_flags, &oact->sa_flags);
- __put_user(old_ka.sa.sa_mask.sig[0], &oact->sa_mask);
- }
-
- return ret;
-}
-
-asmlinkage int
-sys_sigaltstack(unsigned long ebx)
-{
- /* This is needed to make gcc realize it doesn't own the "struct pt_regs" */
- struct pt_regs *regs = (struct pt_regs *)&ebx;
- const stack_t __user *uss = (const stack_t __user *)ebx;
- stack_t __user *uoss = (stack_t __user *)regs->ecx;
-
- return do_sigaltstack(uss, uoss, regs->esp);
-}
-
-
-/*
- * Do a signal return; undo the signal stack.
- */
-
-static int
-restore_sigcontext(struct pt_regs *regs, struct sigcontext __user *sc, int *peax)
-{
- unsigned int err = 0;
-
- /* Always make any pending restarted system calls return -EINTR */
- current_thread_info()->restart_block.fn = do_no_restart_syscall;
-
-#define COPY(x) err |= __get_user(regs->x, &sc->x)
-
-#define COPY_SEG(seg) \
- { unsigned short tmp; \
- err |= __get_user(tmp, &sc->seg); \
- regs->x##seg = tmp; }
-
-#define COPY_SEG_STRICT(seg) \
- { unsigned short tmp; \
- err |= __get_user(tmp, &sc->seg); \
- regs->x##seg = tmp|3; }
-
-#define GET_SEG(seg) \
- { unsigned short tmp; \
- err |= __get_user(tmp, &sc->seg); \
- loadsegment(seg,tmp); }
-
-#define FIX_EFLAGS (X86_EFLAGS_AC | X86_EFLAGS_OF | X86_EFLAGS_DF | \
- X86_EFLAGS_TF | X86_EFLAGS_SF | X86_EFLAGS_ZF | \
- X86_EFLAGS_AF | X86_EFLAGS_PF | X86_EFLAGS_CF)
-
- GET_SEG(gs);
- GET_SEG(fs);
- COPY_SEG(es);
- COPY_SEG(ds);
- COPY(edi);
- COPY(esi);
- COPY(ebp);
- COPY(esp);
- COPY(ebx);
- COPY(edx);
- COPY(ecx);
- COPY(eip);
- COPY_SEG_STRICT(cs);
- COPY_SEG_STRICT(ss);
-
- {
- unsigned int tmpflags;
- err |= __get_user(tmpflags, &sc->eflags);
- regs->eflags = (regs->eflags & ~FIX_EFLAGS) | (tmpflags & FIX_EFLAGS);
- regs->orig_eax = -1; /* disable syscall checks */
- }
-
- {
- struct _fpstate __user * buf;
- err |= __get_user(buf, &sc->fpstate);
- if (buf) {
- if (!access_ok(VERIFY_READ, buf, sizeof(*buf)))
- goto badframe;
- err |= restore_i387(buf);
- } else {
- struct task_struct *me = current;
- if (used_math()) {
- clear_fpu(me);
- clear_used_math();
- }
- }
- }
-
- err |= __get_user(*peax, &sc->eax);
- return err;
-
-badframe:
- return 1;
-}
-
-asmlinkage int sys_sigreturn(unsigned long __unused)
-{
- struct pt_regs *regs = (struct pt_regs *) &__unused;
- struct sigframe __user *frame = (struct sigframe __user *)(regs->esp - 8);
- sigset_t set;
- int eax;
-
- if (!access_ok(VERIFY_READ, frame, sizeof(*frame)))
- goto badframe;
- if (__get_user(set.sig[0], &frame->sc.oldmask)
- || (_NSIG_WORDS > 1
- && __copy_from_user(&set.sig[1], &frame->extramask,
- sizeof(frame->extramask))))
- goto badframe;
-
- sigdelsetmask(&set, ~_BLOCKABLE);
- spin_lock_irq(&current->sighand->siglock);
- current->blocked = set;
- recalc_sigpending();
- spin_unlock_irq(&current->sighand->siglock);
-
- if (restore_sigcontext(regs, &frame->sc, &eax))
- goto badframe;
- return eax;
-
-badframe:
- force_sig(SIGSEGV, current);
- return 0;
-}
-
-asmlinkage int sys_rt_sigreturn(unsigned long __unused)
-{
- struct pt_regs *regs = (struct pt_regs *) &__unused;
- struct rt_sigframe __user *frame = (struct rt_sigframe __user *)(regs->esp - 4);
- sigset_t set;
- int eax;
-
- if (!access_ok(VERIFY_READ, frame, sizeof(*frame)))
- goto badframe;
- if (__copy_from_user(&set, &frame->uc.uc_sigmask, sizeof(set)))
- goto badframe;
-
- sigdelsetmask(&set, ~_BLOCKABLE);
- spin_lock_irq(&current->sighand->siglock);
- current->blocked = set;
- recalc_sigpending();
- spin_unlock_irq(&current->sighand->siglock);
-
- if (restore_sigcontext(regs, &frame->uc.uc_mcontext, &eax))
- goto badframe;
-
- if (do_sigaltstack(&frame->uc.uc_stack, NULL, regs->esp) == -EFAULT)
- goto badframe;
-
- return eax;
-
-badframe:
- force_sig(SIGSEGV, current);
- return 0;
-}
-
-/*
- * Set up a signal frame.
- */
-
-static int
-setup_sigcontext(struct sigcontext __user *sc, struct _fpstate __user *fpstate,
- struct pt_regs *regs, unsigned long mask)
-{
- int tmp, err = 0;
-
- tmp = 0;
- savesegment(gs, tmp);
- err |= __put_user(tmp, (unsigned int __user *)&sc->gs);
- savesegment(fs, tmp);
- err |= __put_user(tmp, (unsigned int __user *)&sc->fs);
-
- err |= __put_user(regs->xes, (unsigned int __user *)&sc->es);
- err |= __put_user(regs->xds, (unsigned int __user *)&sc->ds);
- err |= __put_user(regs->edi, &sc->edi);
- err |= __put_user(regs->esi, &sc->esi);
- err |= __put_user(regs->ebp, &sc->ebp);
- err |= __put_user(regs->esp, &sc->esp);
- err |= __put_user(regs->ebx, &sc->ebx);
- err |= __put_user(regs->edx, &sc->edx);
- err |= __put_user(regs->ecx, &sc->ecx);
- err |= __put_user(regs->eax, &sc->eax);
- err |= __put_user(current->thread.trap_no, &sc->trapno);
- err |= __put_user(current->thread.error_code, &sc->err);
- err |= __put_user(regs->eip, &sc->eip);
- err |= __put_user(regs->xcs, (unsigned int __user *)&sc->cs);
- err |= __put_user(regs->eflags, &sc->eflags);
- err |= __put_user(regs->esp, &sc->esp_at_signal);
- err |= __put_user(regs->xss, (unsigned int __user *)&sc->ss);
-
- tmp = save_i387(fpstate);
- if (tmp < 0)
- err = 1;
- else
- err |= __put_user(tmp ? fpstate : NULL, &sc->fpstate);
-
- /* non-iBCS2 extensions.. */
- err |= __put_user(mask, &sc->oldmask);
- err |= __put_user(current->thread.cr2, &sc->cr2);
-
- return err;
-}
-
-/*
- * Determine which stack to use..
- */
-static inline void __user *
-get_sigframe(struct k_sigaction *ka, struct pt_regs * regs, size_t frame_size)
-{
- unsigned long esp;
-
- /* Default to using normal stack */
- esp = regs->esp;
-
- /* This is the X/Open sanctioned signal stack switching. */
- if (ka->sa.sa_flags & SA_ONSTACK) {
- if (sas_ss_flags(esp) == 0)
- esp = current->sas_ss_sp + current->sas_ss_size;
- }
-
- /* This is the legacy signal stack switching. */
- else if ((regs->xss & 0xffff) != __USER_DS &&
- !(ka->sa.sa_flags & SA_RESTORER) &&
- ka->sa.sa_restorer) {
- esp = (unsigned long) ka->sa.sa_restorer;
- }
-
- esp -= frame_size;
- /* Align the stack pointer according to the i386 ABI,
- * i.e. so that on function entry ((sp + 4) & 15) == 0. */
- esp = ((esp + 4) & -16ul) - 4;
- return (void __user *) esp;
-}
-
-/* These symbols are defined with the addresses in the vsyscall page.
- See vsyscall-sigreturn.S. */
-extern void __user __kernel_sigreturn;
-extern void __user __kernel_rt_sigreturn;
-
-static int setup_frame(int sig, struct k_sigaction *ka,
- sigset_t *set, struct pt_regs * regs)
-{
- void __user *restorer;
- struct sigframe __user *frame;
- int err = 0;
- int usig;
-
- frame = get_sigframe(ka, regs, sizeof(*frame));
-
- if (!access_ok(VERIFY_WRITE, frame, sizeof(*frame)))
- goto give_sigsegv;
-
- usig = current_thread_info()->exec_domain
- && current_thread_info()->exec_domain->signal_invmap
- && sig < 32
- ? current_thread_info()->exec_domain->signal_invmap[sig]
- : sig;
-
- err = __put_user(usig, &frame->sig);
- if (err)
- goto give_sigsegv;
-
- err = setup_sigcontext(&frame->sc, &frame->fpstate, regs, set->sig[0]);
- if (err)
- goto give_sigsegv;
-
- if (_NSIG_WORDS > 1) {
- err = __copy_to_user(&frame->extramask, &set->sig[1],
- sizeof(frame->extramask));
- if (err)
- goto give_sigsegv;
- }
-
- restorer = &__kernel_sigreturn;
- if (ka->sa.sa_flags & SA_RESTORER)
- restorer = ka->sa.sa_restorer;
-
- /* Set up to return from userspace. */
- err |= __put_user(restorer, &frame->pretcode);
-
- /*
- * This is popl %eax ; movl $,%eax ; int $0x80
- *
- * WE DO NOT USE IT ANY MORE! It's only left here for historical
- * reasons and because gdb uses it as a signature to notice
- * signal handler stack frames.
- */
- err |= __put_user(0xb858, (short __user *)(frame->retcode+0));
- err |= __put_user(__NR_sigreturn, (int __user *)(frame->retcode+2));
- err |= __put_user(0x80cd, (short __user *)(frame->retcode+6));
-
- if (err)
- goto give_sigsegv;
-
- /* Set up registers for signal handler */
- regs->esp = (unsigned long) frame;
- regs->eip = (unsigned long) ka->sa.sa_handler;
- regs->eax = (unsigned long) sig;
- regs->edx = (unsigned long) 0;
- regs->ecx = (unsigned long) 0;
-
- set_fs(USER_DS);
- regs->xds = __USER_DS;
- regs->xes = __USER_DS;
- regs->xss = __USER_DS;
- regs->xcs = __USER_CS;
-
- /*
- * Clear TF when entering the signal handler, but
- * notify any tracer that was single-stepping it.
- * The tracer may want to single-step inside the
- * handler too.
- */
- regs->eflags &= ~TF_MASK;
- if (test_thread_flag(TIF_SINGLESTEP))
- ptrace_notify(SIGTRAP);
-
-#if DEBUG_SIG
- printk("SIG deliver (%s:%d): sp=%p pc=%p ra=%p\n",
- current->comm, current->pid, frame, regs->eip, frame->pretcode);
-#endif
-
- return 1;
-
-give_sigsegv:
- force_sigsegv(sig, current);
- return 0;
-}
-
-static int setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info,
- sigset_t *set, struct pt_regs * regs)
-{
- void __user *restorer;
- struct rt_sigframe __user *frame;
- int err = 0;
- int usig;
-
- frame = get_sigframe(ka, regs, sizeof(*frame));
-
- if (!access_ok(VERIFY_WRITE, frame, sizeof(*frame)))
- goto give_sigsegv;
-
- usig = current_thread_info()->exec_domain
- && current_thread_info()->exec_domain->signal_invmap
- && sig < 32
- ? current_thread_info()->exec_domain->signal_invmap[sig]
- : sig;
-
- err |= __put_user(usig, &frame->sig);
- err |= __put_user(&frame->info, &frame->pinfo);
- err |= __put_user(&frame->uc, &frame->puc);
- err |= copy_siginfo_to_user(&frame->info, info);
- if (err)
- goto give_sigsegv;
-
- /* Create the ucontext. */
- err |= __put_user(0, &frame->uc.uc_flags);
- err |= __put_user(0, &frame->uc.uc_link);
- err |= __put_user(current->sas_ss_sp, &frame->uc.uc_stack.ss_sp);
- err |= __put_user(sas_ss_flags(regs->esp),
- &frame->uc.uc_stack.ss_flags);
- err |= __put_user(current->sas_ss_size, &frame->uc.uc_stack.ss_size);
- err |= setup_sigcontext(&frame->uc.uc_mcontext, &frame->fpstate,
- regs, set->sig[0]);
- err |= __copy_to_user(&frame->uc.uc_sigmask, set, sizeof(*set));
- if (err)
- goto give_sigsegv;
-
- /* Set up to return from userspace. */
- restorer = &__kernel_rt_sigreturn;
- if (ka->sa.sa_flags & SA_RESTORER)
- restorer = ka->sa.sa_restorer;
- err |= __put_user(restorer, &frame->pretcode);
-
- /*
- * This is movl $,%eax ; int $0x80
- *
- * WE DO NOT USE IT ANY MORE! It's only left here for historical
- * reasons and because gdb uses it as a signature to notice
- * signal handler stack frames.
- */
- err |= __put_user(0xb8, (char __user *)(frame->retcode+0));
- err |= __put_user(__NR_rt_sigreturn, (int __user *)(frame->retcode+1));
- err |= __put_user(0x80cd, (short __user *)(frame->retcode+5));
-
- if (err)
- goto give_sigsegv;
-
- /* Set up registers for signal handler */
- regs->esp = (unsigned long) frame;
- regs->eip = (unsigned long) ka->sa.sa_handler;
- regs->eax = (unsigned long) usig;
- regs->edx = (unsigned long) &frame->info;
- regs->ecx = (unsigned long) &frame->uc;
-
- set_fs(USER_DS);
- regs->xds = __USER_DS;
- regs->xes = __USER_DS;
- regs->xss = __USER_DS;
- regs->xcs = __USER_CS;
-
- /*
- * Clear TF when entering the signal handler, but
- * notify any tracer that was single-stepping it.
- * The tracer may want to single-step inside the
- * handler too.
- */
- regs->eflags &= ~TF_MASK;
- if (test_thread_flag(TIF_SINGLESTEP))
- ptrace_notify(SIGTRAP);
-
-#if DEBUG_SIG
- printk("SIG deliver (%s:%d): sp=%p pc=%p ra=%p\n",
- current->comm, current->pid, frame, regs->eip, frame->pretcode);
-#endif
-
- return 1;
-
-give_sigsegv:
- force_sigsegv(sig, current);
- return 0;
-}
-
-/*
- * OK, we're invoking a handler
- */
-
-static int
-handle_signal(unsigned long sig, siginfo_t *info, struct k_sigaction *ka,
- sigset_t *oldset, struct pt_regs * regs)
-{
- int ret;
-
- /* Are we from a system call? */
- if (regs->orig_eax >= 0) {
- /* If so, check system call restarting.. */
- switch (regs->eax) {
- case -ERESTART_RESTARTBLOCK:
- case -ERESTARTNOHAND:
- regs->eax = -EINTR;
- break;
-
- case -ERESTARTSYS:
- if (!(ka->sa.sa_flags & SA_RESTART)) {
- regs->eax = -EINTR;
- break;
- }
- /* fallthrough */
- case -ERESTARTNOINTR:
- regs->eax = regs->orig_eax;
- regs->eip -= 2;
- }
- }
-
- /*
- * If TF is set due to a debugger (PT_DTRACE), clear the TF flag so
- * that register information in the sigcontext is correct.
- */
- if (unlikely(regs->eflags & TF_MASK)
- && likely(current->ptrace & PT_DTRACE)) {
- current->ptrace &= ~PT_DTRACE;
- regs->eflags &= ~TF_MASK;
- }
-
- /* Set up the stack frame */
- if (ka->sa.sa_flags & SA_SIGINFO)
- ret = setup_rt_frame(sig, ka, info, oldset, regs);
- else
- ret = setup_frame(sig, ka, oldset, regs);
-
- if (ret) {
- spin_lock_irq(&current->sighand->siglock);
- sigorsets(&current->blocked,&current->blocked,&ka->sa.sa_mask);
- if (!(ka->sa.sa_flags & SA_NODEFER))
- sigaddset(&current->blocked,sig);
- recalc_sigpending();
- spin_unlock_irq(&current->sighand->siglock);
- }
-
- return ret;
-}
-
-/*
- * Note that 'init' is a special process: it doesn't get signals it doesn't
- * want to handle. Thus you cannot kill init even with a SIGKILL even by
- * mistake.
- */
-int fastcall do_signal(struct pt_regs *regs, sigset_t *oldset)
-{
- siginfo_t info;
- int signr;
- struct k_sigaction ka;
-
- /*
- * We want the common case to go fast, which
- * is why we may in certain cases get here from
- * kernel mode. Just return without doing anything
- * if so. vm86 regs switched out by assembly code
- * before reaching here, so testing against kernel
- * CS suffices.
- */
- if (!user_mode(regs))
- return 1;
-
- if (try_to_freeze())
- goto no_signal;
-
- if (!oldset)
- oldset = &current->blocked;
-
- signr = get_signal_to_deliver(&info, &ka, regs, NULL);
- if (signr > 0) {
- /* Reenable any watchpoints before delivering the
- * signal to user space. The processor register will
- * have been cleared if the watchpoint triggered
- * inside the kernel.
- */
- if (unlikely(current->thread.debugreg[7])) {
- set_debugreg(current->thread.debugreg[7], 7);
- }
-
- /* Whee! Actually deliver the signal. */
- return handle_signal(signr, &info, &ka, oldset, regs);
- }
-
- no_signal:
- /* Did we come from a system call? */
- if (regs->orig_eax >= 0) {
- /* Restart the system call - no handlers present */
- if (regs->eax == -ERESTARTNOHAND ||
- regs->eax == -ERESTARTSYS ||
- regs->eax == -ERESTARTNOINTR) {
- regs->eax = regs->orig_eax;
- regs->eip -= 2;
- }
- if (regs->eax == -ERESTART_RESTARTBLOCK){
- regs->eax = __NR_restart_syscall;
- regs->eip -= 2;
- }
- }
- return 0;
-}
-
-/*
- * notification of userspace execution resumption
- * - triggered by current->work.notify_resume
- */
-__attribute__((regparm(3)))
-void do_notify_resume(struct pt_regs *regs, sigset_t *oldset,
- __u32 thread_info_flags)
-{
- /* Pending single-step? */
- if (thread_info_flags & _TIF_SINGLESTEP) {
- regs->eflags |= TF_MASK;
- clear_thread_flag(TIF_SINGLESTEP);
- }
- /* deal with pending signal delivery */
- if (thread_info_flags & _TIF_SIGPENDING)
- do_signal(regs,oldset);
-
- clear_thread_flag(TIF_IRET);
-}
diff --git a/arch/i386/kernel/smp.c b/arch/i386/kernel/smp.c
deleted file mode 100644
index 218d725a5a1..00000000000
--- a/arch/i386/kernel/smp.c
+++ /dev/null
@@ -1,630 +0,0 @@
-/*
- * Intel SMP support routines.
- *
- * (c) 1995 Alan Cox, Building #3 <alan@redhat.com>
- * (c) 1998-99, 2000 Ingo Molnar <mingo@redhat.com>
- *
- * This code is released under the GNU General Public License version 2 or
- * later.
- */
-
-#include <linux/init.h>
-
-#include <linux/mm.h>
-#include <linux/delay.h>
-#include <linux/spinlock.h>
-#include <linux/smp_lock.h>
-#include <linux/kernel_stat.h>
-#include <linux/mc146818rtc.h>
-#include <linux/cache.h>
-#include <linux/interrupt.h>
-#include <linux/cpu.h>
-#include <linux/module.h>
-
-#include <asm/mtrr.h>
-#include <asm/tlbflush.h>
-#include <mach_apic.h>
-
-/*
- * Some notes on x86 processor bugs affecting SMP operation:
- *
- * Pentium, Pentium Pro, II, III (and all CPUs) have bugs.
- * The Linux implications for SMP are handled as follows:
- *
- * Pentium III / [Xeon]
- * None of the E1AP-E3AP errata are visible to the user.
- *
- * E1AP. see PII A1AP
- * E2AP. see PII A2AP
- * E3AP. see PII A3AP
- *
- * Pentium II / [Xeon]
- * None of the A1AP-A3AP errata are visible to the user.
- *
- * A1AP. see PPro 1AP
- * A2AP. see PPro 2AP
- * A3AP. see PPro 7AP
- *
- * Pentium Pro
- * None of 1AP-9AP errata are visible to the normal user,
- * except occasional delivery of 'spurious interrupt' as trap #15.
- * This is very rare and a non-problem.
- *
- * 1AP. Linux maps APIC as non-cacheable
- * 2AP. worked around in hardware
- * 3AP. fixed in C0 and above steppings microcode update.
- * Linux does not use excessive STARTUP_IPIs.
- * 4AP. worked around in hardware
- * 5AP. symmetric IO mode (normal Linux operation) not affected.
- * 'noapic' mode has vector 0xf filled out properly.
- * 6AP. 'noapic' mode might be affected - fixed in later steppings
- * 7AP. We do not assume writes to the LVT deassering IRQs
- * 8AP. We do not enable low power mode (deep sleep) during MP bootup
- * 9AP. We do not use mixed mode
- *
- * Pentium
- * There is a marginal case where REP MOVS on 100MHz SMP
- * machines with B stepping processors can fail. XXX should provide
- * an L1cache=Writethrough or L1cache=off option.
- *
- * B stepping CPUs may hang. There are hardware work arounds
- * for this. We warn about it in case your board doesn't have the work
- * arounds. Basically thats so I can tell anyone with a B stepping
- * CPU and SMP problems "tough".
- *
- * Specific items [From Pentium Processor Specification Update]
- *
- * 1AP. Linux doesn't use remote read
- * 2AP. Linux doesn't trust APIC errors
- * 3AP. We work around this
- * 4AP. Linux never generated 3 interrupts of the same priority
- * to cause a lost local interrupt.
- * 5AP. Remote read is never used
- * 6AP. not affected - worked around in hardware
- * 7AP. not affected - worked around in hardware
- * 8AP. worked around in hardware - we get explicit CS errors if not
- * 9AP. only 'noapic' mode affected. Might generate spurious
- * interrupts, we log only the first one and count the
- * rest silently.
- * 10AP. not affected - worked around in hardware
- * 11AP. Linux reads the APIC between writes to avoid this, as per
- * the documentation. Make sure you preserve this as it affects
- * the C stepping chips too.
- * 12AP. not affected - worked around in hardware
- * 13AP. not affected - worked around in hardware
- * 14AP. we always deassert INIT during bootup
- * 15AP. not affected - worked around in hardware
- * 16AP. not affected - worked around in hardware
- * 17AP. not affected - worked around in hardware
- * 18AP. not affected - worked around in hardware
- * 19AP. not affected - worked around in BIOS
- *
- * If this sounds worrying believe me these bugs are either ___RARE___,
- * or are signal timing bugs worked around in hardware and there's
- * about nothing of note with C stepping upwards.
- */
-
-DEFINE_PER_CPU(struct tlb_state, cpu_tlbstate) ____cacheline_aligned = { &init_mm, 0, };
-
-/*
- * the following functions deal with sending IPIs between CPUs.
- *
- * We use 'broadcast', CPU->CPU IPIs and self-IPIs too.
- */
-
-static inline int __prepare_ICR (unsigned int shortcut, int vector)
-{
- return APIC_DM_FIXED | shortcut | vector | APIC_DEST_LOGICAL;
-}
-
-static inline int __prepare_ICR2 (unsigned int mask)
-{
- return SET_APIC_DEST_FIELD(mask);
-}
-
-void __send_IPI_shortcut(unsigned int shortcut, int vector)
-{
- /*
- * Subtle. In the case of the 'never do double writes' workaround
- * we have to lock out interrupts to be safe. As we don't care
- * of the value read we use an atomic rmw access to avoid costly
- * cli/sti. Otherwise we use an even cheaper single atomic write
- * to the APIC.
- */
- unsigned int cfg;
-
- /*
- * Wait for idle.
- */
- apic_wait_icr_idle();
-
- /*
- * No need to touch the target chip field
- */
- cfg = __prepare_ICR(shortcut, vector);
-
- /*
- * Send the IPI. The write to APIC_ICR fires this off.
- */
- apic_write_around(APIC_ICR, cfg);
-}
-
-void fastcall send_IPI_self(int vector)
-{
- __send_IPI_shortcut(APIC_DEST_SELF, vector);
-}
-
-/*
- * This is only used on smaller machines.
- */
-void send_IPI_mask_bitmask(cpumask_t cpumask, int vector)
-{
- unsigned long mask = cpus_addr(cpumask)[0];
- unsigned long cfg;
- unsigned long flags;
-
- local_irq_save(flags);
- WARN_ON(mask & ~cpus_addr(cpu_online_map)[0]);
- /*
- * Wait for idle.
- */
- apic_wait_icr_idle();
-
- /*
- * prepare target chip field
- */
- cfg = __prepare_ICR2(mask);
- apic_write_around(APIC_ICR2, cfg);
-
- /*
- * program the ICR
- */
- cfg = __prepare_ICR(0, vector);
-
- /*
- * Send the IPI. The write to APIC_ICR fires this off.
- */
- apic_write_around(APIC_ICR, cfg);
-
- local_irq_restore(flags);
-}
-
-void send_IPI_mask_sequence(cpumask_t mask, int vector)
-{
- unsigned long cfg, flags;
- unsigned int query_cpu;
-
- /*
- * Hack. The clustered APIC addressing mode doesn't allow us to send
- * to an arbitrary mask, so I do a unicasts to each CPU instead. This
- * should be modified to do 1 message per cluster ID - mbligh
- */
-
- local_irq_save(flags);
-
- for (query_cpu = 0; query_cpu < NR_CPUS; ++query_cpu) {
- if (cpu_isset(query_cpu, mask)) {
-
- /*
- * Wait for idle.
- */
- apic_wait_icr_idle();
-
- /*
- * prepare target chip field
- */
- cfg = __prepare_ICR2(cpu_to_logical_apicid(query_cpu));
- apic_write_around(APIC_ICR2, cfg);
-
- /*
- * program the ICR
- */
- cfg = __prepare_ICR(0, vector);
-
- /*
- * Send the IPI. The write to APIC_ICR fires this off.
- */
- apic_write_around(APIC_ICR, cfg);
- }
- }
- local_irq_restore(flags);
-}
-
-#include <mach_ipi.h> /* must come after the send_IPI functions above for inlining */
-
-/*
- * Smarter SMP flushing macros.
- * c/o Linus Torvalds.
- *
- * These mean you can really definitely utterly forget about
- * writing to user space from interrupts. (Its not allowed anyway).
- *
- * Optimizations Manfred Spraul <manfred@colorfullife.com>
- */
-
-static cpumask_t flush_cpumask;
-static struct mm_struct * flush_mm;
-static unsigned long flush_va;
-static DEFINE_SPINLOCK(tlbstate_lock);
-#define FLUSH_ALL 0xffffffff
-
-/*
- * We cannot call mmdrop() because we are in interrupt context,
- * instead update mm->cpu_vm_mask.
- *
- * We need to reload %cr3 since the page tables may be going
- * away from under us..
- */
-static inline void leave_mm (unsigned long cpu)
-{
- if (per_cpu(cpu_tlbstate, cpu).state == TLBSTATE_OK)
- BUG();
- cpu_clear(cpu, per_cpu(cpu_tlbstate, cpu).active_mm->cpu_vm_mask);
- load_cr3(swapper_pg_dir);
-}
-
-/*
- *
- * The flush IPI assumes that a thread switch happens in this order:
- * [cpu0: the cpu that switches]
- * 1) switch_mm() either 1a) or 1b)
- * 1a) thread switch to a different mm
- * 1a1) cpu_clear(cpu, old_mm->cpu_vm_mask);
- * Stop ipi delivery for the old mm. This is not synchronized with
- * the other cpus, but smp_invalidate_interrupt ignore flush ipis
- * for the wrong mm, and in the worst case we perform a superflous
- * tlb flush.
- * 1a2) set cpu_tlbstate to TLBSTATE_OK
- * Now the smp_invalidate_interrupt won't call leave_mm if cpu0
- * was in lazy tlb mode.
- * 1a3) update cpu_tlbstate[].active_mm
- * Now cpu0 accepts tlb flushes for the new mm.
- * 1a4) cpu_set(cpu, new_mm->cpu_vm_mask);
- * Now the other cpus will send tlb flush ipis.
- * 1a4) change cr3.
- * 1b) thread switch without mm change
- * cpu_tlbstate[].active_mm is correct, cpu0 already handles
- * flush ipis.
- * 1b1) set cpu_tlbstate to TLBSTATE_OK
- * 1b2) test_and_set the cpu bit in cpu_vm_mask.
- * Atomically set the bit [other cpus will start sending flush ipis],
- * and test the bit.
- * 1b3) if the bit was 0: leave_mm was called, flush the tlb.
- * 2) switch %%esp, ie current
- *
- * The interrupt must handle 2 special cases:
- * - cr3 is changed before %%esp, ie. it cannot use current->{active_,}mm.
- * - the cpu performs speculative tlb reads, i.e. even if the cpu only
- * runs in kernel space, the cpu could load tlb entries for user space
- * pages.
- *
- * The good news is that cpu_tlbstate is local to each cpu, no
- * write/read ordering problems.
- */
-
-/*
- * TLB flush IPI:
- *
- * 1) Flush the tlb entries if the cpu uses the mm that's being flushed.
- * 2) Leave the mm if we are in the lazy tlb mode.
- */
-
-fastcall void smp_invalidate_interrupt(struct pt_regs *regs)
-{
- unsigned long cpu;
-
- cpu = get_cpu();
-
- if (!cpu_isset(cpu, flush_cpumask))
- goto out;
- /*
- * This was a BUG() but until someone can quote me the
- * line from the intel manual that guarantees an IPI to
- * multiple CPUs is retried _only_ on the erroring CPUs
- * its staying as a return
- *
- * BUG();
- */
-
- if (flush_mm == per_cpu(cpu_tlbstate, cpu).active_mm) {
- if (per_cpu(cpu_tlbstate, cpu).state == TLBSTATE_OK) {
- if (flush_va == FLUSH_ALL)
- local_flush_tlb();
- else
- __flush_tlb_one(flush_va);
- } else
- leave_mm(cpu);
- }
- ack_APIC_irq();
- smp_mb__before_clear_bit();
- cpu_clear(cpu, flush_cpumask);
- smp_mb__after_clear_bit();
-out:
- put_cpu_no_resched();
-}
-
-static void flush_tlb_others(cpumask_t cpumask, struct mm_struct *mm,
- unsigned long va)
-{
- /*
- * A couple of (to be removed) sanity checks:
- *
- * - current CPU must not be in mask
- * - mask must exist :)
- */
- BUG_ON(cpus_empty(cpumask));
- BUG_ON(cpu_isset(smp_processor_id(), cpumask));
- BUG_ON(!mm);
-
- /* If a CPU which we ran on has gone down, OK. */
- cpus_and(cpumask, cpumask, cpu_online_map);
- if (cpus_empty(cpumask))
- return;
-
- /*
- * i'm not happy about this global shared spinlock in the
- * MM hot path, but we'll see how contended it is.
- * Temporarily this turns IRQs off, so that lockups are
- * detected by the NMI watchdog.
- */
- spin_lock(&tlbstate_lock);
-
- flush_mm = mm;
- flush_va = va;
-#if NR_CPUS <= BITS_PER_LONG
- atomic_set_mask(cpumask, &flush_cpumask);
-#else
- {
- int k;
- unsigned long *flush_mask = (unsigned long *)&flush_cpumask;
- unsigned long *cpu_mask = (unsigned long *)&cpumask;
- for (k = 0; k < BITS_TO_LONGS(NR_CPUS); ++k)
- atomic_set_mask(cpu_mask[k], &flush_mask[k]);
- }
-#endif
- /*
- * We have to send the IPI only to
- * CPUs affected.
- */
- send_IPI_mask(cpumask, INVALIDATE_TLB_VECTOR);
-
- while (!cpus_empty(flush_cpumask))
- /* nothing. lockup detection does not belong here */
- mb();
-
- flush_mm = NULL;
- flush_va = 0;
- spin_unlock(&tlbstate_lock);
-}
-
-void flush_tlb_current_task(void)
-{
- struct mm_struct *mm = current->mm;
- cpumask_t cpu_mask;
-
- preempt_disable();
- cpu_mask = mm->cpu_vm_mask;
- cpu_clear(smp_processor_id(), cpu_mask);
-
- local_flush_tlb();
- if (!cpus_empty(cpu_mask))
- flush_tlb_others(cpu_mask, mm, FLUSH_ALL);
- preempt_enable();
-}
-
-void flush_tlb_mm (struct mm_struct * mm)
-{
- cpumask_t cpu_mask;
-
- preempt_disable();
- cpu_mask = mm->cpu_vm_mask;
- cpu_clear(smp_processor_id(), cpu_mask);
-
- if (current->active_mm == mm) {
- if (current->mm)
- local_flush_tlb();
- else
- leave_mm(smp_processor_id());
- }
- if (!cpus_empty(cpu_mask))
- flush_tlb_others(cpu_mask, mm, FLUSH_ALL);
-
- preempt_enable();
-}
-
-void flush_tlb_page(struct vm_area_struct * vma, unsigned long va)
-{
- struct mm_struct *mm = vma->vm_mm;
- cpumask_t cpu_mask;
-
- preempt_disable();
- cpu_mask = mm->cpu_vm_mask;
- cpu_clear(smp_processor_id(), cpu_mask);
-
- if (current->active_mm == mm) {
- if(current->mm)
- __flush_tlb_one(va);
- else
- leave_mm(smp_processor_id());
- }
-
- if (!cpus_empty(cpu_mask))
- flush_tlb_others(cpu_mask, mm, va);
-
- preempt_enable();
-}
-EXPORT_SYMBOL(flush_tlb_page);
-
-static void do_flush_tlb_all(void* info)
-{
- unsigned long cpu = smp_processor_id();
-
- __flush_tlb_all();
- if (per_cpu(cpu_tlbstate, cpu).state == TLBSTATE_LAZY)
- leave_mm(cpu);
-}
-
-void flush_tlb_all(void)
-{
- on_each_cpu(do_flush_tlb_all, NULL, 1, 1);
-}
-
-/*
- * this function sends a 'reschedule' IPI to another CPU.
- * it goes straight through and wastes no time serializing
- * anything. Worst case is that we lose a reschedule ...
- */
-void smp_send_reschedule(int cpu)
-{
- WARN_ON(cpu_is_offline(cpu));
- send_IPI_mask(cpumask_of_cpu(cpu), RESCHEDULE_VECTOR);
-}
-
-/*
- * Structure and data for smp_call_function(). This is designed to minimise
- * static memory requirements. It also looks cleaner.
- */
-static DEFINE_SPINLOCK(call_lock);
-
-struct call_data_struct {
- void (*func) (void *info);
- void *info;
- atomic_t started;
- atomic_t finished;
- int wait;
-};
-
-void lock_ipi_call_lock(void)
-{
- spin_lock_irq(&call_lock);
-}
-
-void unlock_ipi_call_lock(void)
-{
- spin_unlock_irq(&call_lock);
-}
-
-static struct call_data_struct * call_data;
-
-/*
- * this function sends a 'generic call function' IPI to all other CPUs
- * in the system.
- */
-
-int smp_call_function (void (*func) (void *info), void *info, int nonatomic,
- int wait)
-/*
- * [SUMMARY] Run a function on all other CPUs.
- * <func> The function to run. This must be fast and non-blocking.
- * <info> An arbitrary pointer to pass to the function.
- * <nonatomic> currently unused.
- * <wait> If true, wait (atomically) until function has completed on other CPUs.
- * [RETURNS] 0 on success, else a negative status code. Does not return until
- * remote CPUs are nearly ready to execute <<func>> or are or have executed.
- *
- * You must not call this function with disabled interrupts or from a
- * hardware interrupt handler or from a bottom half handler.
- */
-{
- struct call_data_struct data;
- int cpus;
-
- /* Holding any lock stops cpus from going down. */
- spin_lock(&call_lock);
- cpus = num_online_cpus() - 1;
- if (!cpus) {
- spin_unlock(&call_lock);
- return 0;
- }
-
- /* Can deadlock when called with interrupts disabled */
- WARN_ON(irqs_disabled());
-
- data.func = func;
- data.info = info;
- atomic_set(&data.started, 0);
- data.wait = wait;
- if (wait)
- atomic_set(&data.finished, 0);
-
- call_data = &data;
- mb();
-
- /* Send a message to all other CPUs and wait for them to respond */
- send_IPI_allbutself(CALL_FUNCTION_VECTOR);
-
- /* Wait for response */
- while (atomic_read(&data.started) != cpus)
- cpu_relax();
-
- if (wait)
- while (atomic_read(&data.finished) != cpus)
- cpu_relax();
- spin_unlock(&call_lock);
-
- return 0;
-}
-EXPORT_SYMBOL(smp_call_function);
-
-static void stop_this_cpu (void * dummy)
-{
- /*
- * Remove this CPU:
- */
- cpu_clear(smp_processor_id(), cpu_online_map);
- local_irq_disable();
- disable_local_APIC();
- if (cpu_data[smp_processor_id()].hlt_works_ok)
- for(;;) halt();
- for (;;);
-}
-
-/*
- * this function calls the 'stop' function on all other CPUs in the system.
- */
-
-void smp_send_stop(void)
-{
- smp_call_function(stop_this_cpu, NULL, 1, 0);
-
- local_irq_disable();
- disable_local_APIC();
- local_irq_enable();
-}
-
-/*
- * Reschedule call back. Nothing to do,
- * all the work is done automatically when
- * we return from the interrupt.
- */
-fastcall void smp_reschedule_interrupt(struct pt_regs *regs)
-{
- ack_APIC_irq();
-}
-
-fastcall void smp_call_function_interrupt(struct pt_regs *regs)
-{
- void (*func) (void *info) = call_data->func;
- void *info = call_data->info;
- int wait = call_data->wait;
-
- ack_APIC_irq();
- /*
- * Notify initiating CPU that I've grabbed the data and am
- * about to execute the function
- */
- mb();
- atomic_inc(&call_data->started);
- /*
- * At this point the info structure may be out of scope unless wait==1
- */
- irq_enter();
- (*func)(info);
- irq_exit();
-
- if (wait) {
- mb();
- atomic_inc(&call_data->finished);
- }
-}
-
diff --git a/arch/i386/kernel/smpboot.c b/arch/i386/kernel/smpboot.c
deleted file mode 100644
index e6488ffc1f7..00000000000
--- a/arch/i386/kernel/smpboot.c
+++ /dev/null
@@ -1,1404 +0,0 @@
-/*
- * x86 SMP booting functions
- *
- * (c) 1995 Alan Cox, Building #3 <alan@redhat.com>
- * (c) 1998, 1999, 2000 Ingo Molnar <mingo@redhat.com>
- *
- * Much of the core SMP work is based on previous work by Thomas Radke, to
- * whom a great many thanks are extended.
- *
- * Thanks to Intel for making available several different Pentium,
- * Pentium Pro and Pentium-II/Xeon MP machines.
- * Original development of Linux SMP code supported by Caldera.
- *
- * This code is released under the GNU General Public License version 2 or
- * later.
- *
- * Fixes
- * Felix Koop : NR_CPUS used properly
- * Jose Renau : Handle single CPU case.
- * Alan Cox : By repeated request 8) - Total BogoMIPS report.
- * Greg Wright : Fix for kernel stacks panic.
- * Erich Boleyn : MP v1.4 and additional changes.
- * Matthias Sattler : Changes for 2.1 kernel map.
- * Michel Lespinasse : Changes for 2.1 kernel map.
- * Michael Chastain : Change trampoline.S to gnu as.
- * Alan Cox : Dumb bug: 'B' step PPro's are fine
- * Ingo Molnar : Added APIC timers, based on code
- * from Jose Renau
- * Ingo Molnar : various cleanups and rewrites
- * Tigran Aivazian : fixed "0.00 in /proc/uptime on SMP" bug.
- * Maciej W. Rozycki : Bits for genuine 82489DX APICs
- * Martin J. Bligh : Added support for multi-quad systems
- * Dave Jones : Report invalid combinations of Athlon CPUs.
-* Rusty Russell : Hacked into shape for new "hotplug" boot process. */
-
-#include <linux/module.h>
-#include <linux/config.h>
-#include <linux/init.h>
-#include <linux/kernel.h>
-
-#include <linux/mm.h>
-#include <linux/sched.h>
-#include <linux/kernel_stat.h>
-#include <linux/smp_lock.h>
-#include <linux/bootmem.h>
-#include <linux/notifier.h>
-#include <linux/cpu.h>
-#include <linux/percpu.h>
-
-#include <linux/delay.h>
-#include <linux/mc146818rtc.h>
-#include <asm/tlbflush.h>
-#include <asm/desc.h>
-#include <asm/arch_hooks.h>
-
-#include <mach_apic.h>
-#include <mach_wakecpu.h>
-#include <smpboot_hooks.h>
-
-/* Set if we find a B stepping CPU */
-static int __devinitdata smp_b_stepping;
-
-/* Number of siblings per CPU package */
-int smp_num_siblings = 1;
-#ifdef CONFIG_X86_HT
-EXPORT_SYMBOL(smp_num_siblings);
-#endif
-
-/* Package ID of each logical CPU */
-int phys_proc_id[NR_CPUS] __read_mostly = {[0 ... NR_CPUS-1] = BAD_APICID};
-EXPORT_SYMBOL(phys_proc_id);
-
-/* Core ID of each logical CPU */
-int cpu_core_id[NR_CPUS] __read_mostly = {[0 ... NR_CPUS-1] = BAD_APICID};
-EXPORT_SYMBOL(cpu_core_id);
-
-cpumask_t cpu_sibling_map[NR_CPUS] __read_mostly;
-EXPORT_SYMBOL(cpu_sibling_map);
-
-cpumask_t cpu_core_map[NR_CPUS] __read_mostly;
-EXPORT_SYMBOL(cpu_core_map);
-
-/* bitmap of online cpus */
-cpumask_t cpu_online_map __read_mostly;
-EXPORT_SYMBOL(cpu_online_map);
-
-cpumask_t cpu_callin_map;
-cpumask_t cpu_callout_map;
-EXPORT_SYMBOL(cpu_callout_map);
-#ifdef CONFIG_HOTPLUG_CPU
-cpumask_t cpu_possible_map = CPU_MASK_ALL;
-#else
-cpumask_t cpu_possible_map;
-#endif
-EXPORT_SYMBOL(cpu_possible_map);
-static cpumask_t smp_commenced_mask;
-
-/* TSC's upper 32 bits can't be written in eariler CPU (before prescott), there
- * is no way to resync one AP against BP. TBD: for prescott and above, we
- * should use IA64's algorithm
- */
-static int __devinitdata tsc_sync_disabled;
-
-/* Per CPU bogomips and other parameters */
-struct cpuinfo_x86 cpu_data[NR_CPUS] __cacheline_aligned;
-EXPORT_SYMBOL(cpu_data);
-
-u8 x86_cpu_to_apicid[NR_CPUS] __read_mostly =
- { [0 ... NR_CPUS-1] = 0xff };
-EXPORT_SYMBOL(x86_cpu_to_apicid);
-
-/*
- * Trampoline 80x86 program as an array.
- */
-
-extern unsigned char trampoline_data [];
-extern unsigned char trampoline_end [];
-static unsigned char *trampoline_base;
-static int trampoline_exec;
-
-static void map_cpu_to_logical_apicid(void);
-
-/* State of each CPU. */
-DEFINE_PER_CPU(int, cpu_state) = { 0 };
-
-/*
- * Currently trivial. Write the real->protected mode
- * bootstrap into the page concerned. The caller
- * has made sure it's suitably aligned.
- */
-
-static unsigned long __devinit setup_trampoline(void)
-{
- memcpy(trampoline_base, trampoline_data, trampoline_end - trampoline_data);
- return virt_to_phys(trampoline_base);
-}
-
-/*
- * We are called very early to get the low memory for the
- * SMP bootup trampoline page.
- */
-void __init smp_alloc_memory(void)
-{
- trampoline_base = (void *) alloc_bootmem_low_pages(PAGE_SIZE);
- /*
- * Has to be in very low memory so we can execute
- * real-mode AP code.
- */
- if (__pa(trampoline_base) >= 0x9F000)
- BUG();
- /*
- * Make the SMP trampoline executable:
- */
- trampoline_exec = set_kernel_exec((unsigned long)trampoline_base, 1);
-}
-
-/*
- * The bootstrap kernel entry code has set these up. Save them for
- * a given CPU
- */
-
-static void __devinit smp_store_cpu_info(int id)
-{
- struct cpuinfo_x86 *c = cpu_data + id;
-
- *c = boot_cpu_data;
- if (id!=0)
- identify_cpu(c);
- /*
- * Mask B, Pentium, but not Pentium MMX
- */
- if (c->x86_vendor == X86_VENDOR_INTEL &&
- c->x86 == 5 &&
- c->x86_mask >= 1 && c->x86_mask <= 4 &&
- c->x86_model <= 3)
- /*
- * Remember we have B step Pentia with bugs
- */
- smp_b_stepping = 1;
-
- /*
- * Certain Athlons might work (for various values of 'work') in SMP
- * but they are not certified as MP capable.
- */
- if ((c->x86_vendor == X86_VENDOR_AMD) && (c->x86 == 6)) {
-
- /* Athlon 660/661 is valid. */
- if ((c->x86_model==6) && ((c->x86_mask==0) || (c->x86_mask==1)))
- goto valid_k7;
-
- /* Duron 670 is valid */
- if ((c->x86_model==7) && (c->x86_mask==0))
- goto valid_k7;
-
- /*
- * Athlon 662, Duron 671, and Athlon >model 7 have capability bit.
- * It's worth noting that the A5 stepping (662) of some Athlon XP's
- * have the MP bit set.
- * See http://www.heise.de/newsticker/data/jow-18.10.01-000 for more.
- */
- if (((c->x86_model==6) && (c->x86_mask>=2)) ||
- ((c->x86_model==7) && (c->x86_mask>=1)) ||
- (c->x86_model> 7))
- if (cpu_has_mp)
- goto valid_k7;
-
- /* If we get here, it's not a certified SMP capable AMD system. */
- add_taint(TAINT_UNSAFE_SMP);
- }
-
-valid_k7:
- ;
-}
-
-/*
- * TSC synchronization.
- *
- * We first check whether all CPUs have their TSC's synchronized,
- * then we print a warning if not, and always resync.
- */
-
-static atomic_t tsc_start_flag = ATOMIC_INIT(0);
-static atomic_t tsc_count_start = ATOMIC_INIT(0);
-static atomic_t tsc_count_stop = ATOMIC_INIT(0);
-static unsigned long long tsc_values[NR_CPUS];
-
-#define NR_LOOPS 5
-
-static void __init synchronize_tsc_bp (void)
-{
- int i;
- unsigned long long t0;
- unsigned long long sum, avg;
- long long delta;
- unsigned int one_usec;
- int buggy = 0;
-
- printk(KERN_INFO "checking TSC synchronization across %u CPUs: ", num_booting_cpus());
-
- /* convert from kcyc/sec to cyc/usec */
- one_usec = cpu_khz / 1000;
-
- atomic_set(&tsc_start_flag, 1);
- wmb();
-
- /*
- * We loop a few times to get a primed instruction cache,
- * then the last pass is more or less synchronized and
- * the BP and APs set their cycle counters to zero all at
- * once. This reduces the chance of having random offsets
- * between the processors, and guarantees that the maximum
- * delay between the cycle counters is never bigger than
- * the latency of information-passing (cachelines) between
- * two CPUs.
- */
- for (i = 0; i < NR_LOOPS; i++) {
- /*
- * all APs synchronize but they loop on '== num_cpus'
- */
- while (atomic_read(&tsc_count_start) != num_booting_cpus()-1)
- mb();
- atomic_set(&tsc_count_stop, 0);
- wmb();
- /*
- * this lets the APs save their current TSC:
- */
- atomic_inc(&tsc_count_start);
-
- rdtscll(tsc_values[smp_processor_id()]);
- /*
- * We clear the TSC in the last loop:
- */
- if (i == NR_LOOPS-1)
- write_tsc(0, 0);
-
- /*
- * Wait for all APs to leave the synchronization point:
- */
- while (atomic_read(&tsc_count_stop) != num_booting_cpus()-1)
- mb();
- atomic_set(&tsc_count_start, 0);
- wmb();
- atomic_inc(&tsc_count_stop);
- }
-
- sum = 0;
- for (i = 0; i < NR_CPUS; i++) {
- if (cpu_isset(i, cpu_callout_map)) {
- t0 = tsc_values[i];
- sum += t0;
- }
- }
- avg = sum;
- do_div(avg, num_booting_cpus());
-
- sum = 0;
- for (i = 0; i < NR_CPUS; i++) {
- if (!cpu_isset(i, cpu_callout_map))
- continue;
- delta = tsc_values[i] - avg;
- if (delta < 0)
- delta = -delta;
- /*
- * We report bigger than 2 microseconds clock differences.
- */
- if (delta > 2*one_usec) {
- long realdelta;
- if (!buggy) {
- buggy = 1;
- printk("\n");
- }
- realdelta = delta;
- do_div(realdelta, one_usec);
- if (tsc_values[i] < avg)
- realdelta = -realdelta;
-
- printk(KERN_INFO "CPU#%d had %ld usecs TSC skew, fixed it up.\n", i, realdelta);
- }
-
- sum += delta;
- }
- if (!buggy)
- printk("passed.\n");
-}
-
-static void __init synchronize_tsc_ap (void)
-{
- int i;
-
- /*
- * Not every cpu is online at the time
- * this gets called, so we first wait for the BP to
- * finish SMP initialization:
- */
- while (!atomic_read(&tsc_start_flag)) mb();
-
- for (i = 0; i < NR_LOOPS; i++) {
- atomic_inc(&tsc_count_start);
- while (atomic_read(&tsc_count_start) != num_booting_cpus())
- mb();
-
- rdtscll(tsc_values[smp_processor_id()]);
- if (i == NR_LOOPS-1)
- write_tsc(0, 0);
-
- atomic_inc(&tsc_count_stop);
- while (atomic_read(&tsc_count_stop) != num_booting_cpus()) mb();
- }
-}
-#undef NR_LOOPS
-
-extern void calibrate_delay(void);
-
-static atomic_t init_deasserted;
-
-static void __devinit smp_callin(void)
-{
- int cpuid, phys_id;
- unsigned long timeout;
-
- /*
- * If waken up by an INIT in an 82489DX configuration
- * we may get here before an INIT-deassert IPI reaches
- * our local APIC. We have to wait for the IPI or we'll
- * lock up on an APIC access.
- */
- wait_for_init_deassert(&init_deasserted);
-
- /*
- * (This works even if the APIC is not enabled.)
- */
- phys_id = GET_APIC_ID(apic_read(APIC_ID));
- cpuid = smp_processor_id();
- if (cpu_isset(cpuid, cpu_callin_map)) {
- printk("huh, phys CPU#%d, CPU#%d already present??\n",
- phys_id, cpuid);
- BUG();
- }
- Dprintk("CPU#%d (phys ID: %d) waiting for CALLOUT\n", cpuid, phys_id);
-
- /*
- * STARTUP IPIs are fragile beasts as they might sometimes
- * trigger some glue motherboard logic. Complete APIC bus
- * silence for 1 second, this overestimates the time the
- * boot CPU is spending to send the up to 2 STARTUP IPIs
- * by a factor of two. This should be enough.
- */
-
- /*
- * Waiting 2s total for startup (udelay is not yet working)
- */
- timeout = jiffies + 2*HZ;
- while (time_before(jiffies, timeout)) {
- /*
- * Has the boot CPU finished it's STARTUP sequence?
- */
- if (cpu_isset(cpuid, cpu_callout_map))
- break;
- rep_nop();
- }
-
- if (!time_before(jiffies, timeout)) {
- printk("BUG: CPU%d started up but did not get a callout!\n",
- cpuid);
- BUG();
- }
-
- /*
- * the boot CPU has finished the init stage and is spinning
- * on callin_map until we finish. We are free to set up this
- * CPU, first the APIC. (this is probably redundant on most
- * boards)
- */
-
- Dprintk("CALLIN, before setup_local_APIC().\n");
- smp_callin_clear_local_apic();
- setup_local_APIC();
- map_cpu_to_logical_apicid();
-
- /*
- * Get our bogomips.
- */
- calibrate_delay();
- Dprintk("Stack at about %p\n",&cpuid);
-
- /*
- * Save our processor parameters
- */
- smp_store_cpu_info(cpuid);
-
- disable_APIC_timer();
-
- /*
- * Allow the master to continue.
- */
- cpu_set(cpuid, cpu_callin_map);
-
- /*
- * Synchronize the TSC with the BP
- */
- if (cpu_has_tsc && cpu_khz && !tsc_sync_disabled)
- synchronize_tsc_ap();
-}
-
-static int cpucount;
-
-static inline void
-set_cpu_sibling_map(int cpu)
-{
- int i;
-
- if (smp_num_siblings > 1) {
- for (i = 0; i < NR_CPUS; i++) {
- if (!cpu_isset(i, cpu_callout_map))
- continue;
- if (cpu_core_id[cpu] == cpu_core_id[i]) {
- cpu_set(i, cpu_sibling_map[cpu]);
- cpu_set(cpu, cpu_sibling_map[i]);
- }
- }
- } else {
- cpu_set(cpu, cpu_sibling_map[cpu]);
- }
-
- if (current_cpu_data.x86_num_cores > 1) {
- for (i = 0; i < NR_CPUS; i++) {
- if (!cpu_isset(i, cpu_callout_map))
- continue;
- if (phys_proc_id[cpu] == phys_proc_id[i]) {
- cpu_set(i, cpu_core_map[cpu]);
- cpu_set(cpu, cpu_core_map[i]);
- }
- }
- } else {
- cpu_core_map[cpu] = cpu_sibling_map[cpu];
- }
-}
-
-/*
- * Activate a secondary processor.
- */
-static void __devinit start_secondary(void *unused)
-{
- /*
- * Dont put anything before smp_callin(), SMP
- * booting is too fragile that we want to limit the
- * things done here to the most necessary things.
- */
- cpu_init();
- smp_callin();
- while (!cpu_isset(smp_processor_id(), smp_commenced_mask))
- rep_nop();
- setup_secondary_APIC_clock();
- if (nmi_watchdog == NMI_IO_APIC) {
- disable_8259A_irq(0);
- enable_NMI_through_LVT0(NULL);
- enable_8259A_irq(0);
- }
- enable_APIC_timer();
- /*
- * low-memory mappings have been cleared, flush them from
- * the local TLBs too.
- */
- local_flush_tlb();
-
- /* This must be done before setting cpu_online_map */
- set_cpu_sibling_map(raw_smp_processor_id());
- wmb();
-
- /*
- * We need to hold call_lock, so there is no inconsistency
- * between the time smp_call_function() determines number of
- * IPI receipients, and the time when the determination is made
- * for which cpus receive the IPI. Holding this
- * lock helps us to not include this cpu in a currently in progress
- * smp_call_function().
- */
- lock_ipi_call_lock();
- cpu_set(smp_processor_id(), cpu_online_map);
- unlock_ipi_call_lock();
- per_cpu(cpu_state, smp_processor_id()) = CPU_ONLINE;
-
- /* We can take interrupts now: we're officially "up". */
- local_irq_enable();
-
- wmb();
- cpu_idle();
-}
-
-/*
- * Everything has been set up for the secondary
- * CPUs - they just need to reload everything
- * from the task structure
- * This function must not return.
- */
-void __devinit initialize_secondary(void)
-{
- /*
- * We don't actually need to load the full TSS,
- * basically just the stack pointer and the eip.
- */
-
- asm volatile(
- "movl %0,%%esp\n\t"
- "jmp *%1"
- :
- :"r" (current->thread.esp),"r" (current->thread.eip));
-}
-
-extern struct {
- void * esp;
- unsigned short ss;
-} stack_start;
-
-#ifdef CONFIG_NUMA
-
-/* which logical CPUs are on which nodes */
-cpumask_t node_2_cpu_mask[MAX_NUMNODES] __read_mostly =
- { [0 ... MAX_NUMNODES-1] = CPU_MASK_NONE };
-/* which node each logical CPU is on */
-int cpu_2_node[NR_CPUS] __read_mostly = { [0 ... NR_CPUS-1] = 0 };
-EXPORT_SYMBOL(cpu_2_node);
-
-/* set up a mapping between cpu and node. */
-static inline void map_cpu_to_node(int cpu, int node)
-{
- printk("Mapping cpu %d to node %d\n", cpu, node);
- cpu_set(cpu, node_2_cpu_mask[node]);
- cpu_2_node[cpu] = node;
-}
-
-/* undo a mapping between cpu and node. */
-static inline void unmap_cpu_to_node(int cpu)
-{
- int node;
-
- printk("Unmapping cpu %d from all nodes\n", cpu);
- for (node = 0; node < MAX_NUMNODES; node ++)
- cpu_clear(cpu, node_2_cpu_mask[node]);
- cpu_2_node[cpu] = 0;
-}
-#else /* !CONFIG_NUMA */
-
-#define map_cpu_to_node(cpu, node) ({})
-#define unmap_cpu_to_node(cpu) ({})
-
-#endif /* CONFIG_NUMA */
-
-u8 cpu_2_logical_apicid[NR_CPUS] __read_mostly = { [0 ... NR_CPUS-1] = BAD_APICID };
-
-static void map_cpu_to_logical_apicid(void)
-{
- int cpu = smp_processor_id();
- int apicid = logical_smp_processor_id();
-
- cpu_2_logical_apicid[cpu] = apicid;
- map_cpu_to_node(cpu, apicid_to_node(apicid));
-}
-
-static void unmap_cpu_to_logical_apicid(int cpu)
-{
- cpu_2_logical_apicid[cpu] = BAD_APICID;
- unmap_cpu_to_node(cpu);
-}
-
-#if APIC_DEBUG
-static inline void __inquire_remote_apic(int apicid)
-{
- int i, regs[] = { APIC_ID >> 4, APIC_LVR >> 4, APIC_SPIV >> 4 };
- char *names[] = { "ID", "VERSION", "SPIV" };
- int timeout, status;
-
- printk("Inquiring remote APIC #%d...\n", apicid);
-
- for (i = 0; i < ARRAY_SIZE(regs); i++) {
- printk("... APIC #%d %s: ", apicid, names[i]);
-
- /*
- * Wait for idle.
- */
- apic_wait_icr_idle();
-
- apic_write_around(APIC_ICR2, SET_APIC_DEST_FIELD(apicid));
- apic_write_around(APIC_ICR, APIC_DM_REMRD | regs[i]);
-
- timeout = 0;
- do {
- udelay(100);
- status = apic_read(APIC_ICR) & APIC_ICR_RR_MASK;
- } while (status == APIC_ICR_RR_INPROG && timeout++ < 1000);
-
- switch (status) {
- case APIC_ICR_RR_VALID:
- status = apic_read(APIC_RRR);
- printk("%08x\n", status);
- break;
- default:
- printk("failed\n");
- }
- }
-}
-#endif
-
-#ifdef WAKE_SECONDARY_VIA_NMI
-/*
- * Poke the other CPU in the eye via NMI to wake it up. Remember that the normal
- * INIT, INIT, STARTUP sequence will reset the chip hard for us, and this
- * won't ... remember to clear down the APIC, etc later.
- */
-static int __devinit
-wakeup_secondary_cpu(int logical_apicid, unsigned long start_eip)
-{
- unsigned long send_status = 0, accept_status = 0;
- int timeout, maxlvt;
-
- /* Target chip */
- apic_write_around(APIC_ICR2, SET_APIC_DEST_FIELD(logical_apicid));
-
- /* Boot on the stack */
- /* Kick the second */
- apic_write_around(APIC_ICR, APIC_DM_NMI | APIC_DEST_LOGICAL);
-
- Dprintk("Waiting for send to finish...\n");
- timeout = 0;
- do {
- Dprintk("+");
- udelay(100);
- send_status = apic_read(APIC_ICR) & APIC_ICR_BUSY;
- } while (send_status && (timeout++ < 1000));
-
- /*
- * Give the other CPU some time to accept the IPI.
- */
- udelay(200);
- /*
- * Due to the Pentium erratum 3AP.
- */
- maxlvt = get_maxlvt();
- if (maxlvt > 3) {
- apic_read_around(APIC_SPIV);
- apic_write(APIC_ESR, 0);
- }
- accept_status = (apic_read(APIC_ESR) & 0xEF);
- Dprintk("NMI sent.\n");
-
- if (send_status)
- printk("APIC never delivered???\n");
- if (accept_status)
- printk("APIC delivery error (%lx).\n", accept_status);
-
- return (send_status | accept_status);
-}
-#endif /* WAKE_SECONDARY_VIA_NMI */
-
-#ifdef WAKE_SECONDARY_VIA_INIT
-static int __devinit
-wakeup_secondary_cpu(int phys_apicid, unsigned long start_eip)
-{
- unsigned long send_status = 0, accept_status = 0;
- int maxlvt, timeout, num_starts, j;
-
- /*
- * Be paranoid about clearing APIC errors.
- */
- if (APIC_INTEGRATED(apic_version[phys_apicid])) {
- apic_read_around(APIC_SPIV);
- apic_write(APIC_ESR, 0);
- apic_read(APIC_ESR);
- }
-
- Dprintk("Asserting INIT.\n");
-
- /*
- * Turn INIT on target chip
- */
- apic_write_around(APIC_ICR2, SET_APIC_DEST_FIELD(phys_apicid));
-
- /*
- * Send IPI
- */
- apic_write_around(APIC_ICR, APIC_INT_LEVELTRIG | APIC_INT_ASSERT
- | APIC_DM_INIT);
-
- Dprintk("Waiting for send to finish...\n");
- timeout = 0;
- do {
- Dprintk("+");
- udelay(100);
- send_status = apic_read(APIC_ICR) & APIC_ICR_BUSY;
- } while (send_status && (timeout++ < 1000));
-
- mdelay(10);
-
- Dprintk("Deasserting INIT.\n");
-
- /* Target chip */
- apic_write_around(APIC_ICR2, SET_APIC_DEST_FIELD(phys_apicid));
-
- /* Send IPI */
- apic_write_around(APIC_ICR, APIC_INT_LEVELTRIG | APIC_DM_INIT);
-
- Dprintk("Waiting for send to finish...\n");
- timeout = 0;
- do {
- Dprintk("+");
- udelay(100);
- send_status = apic_read(APIC_ICR) & APIC_ICR_BUSY;
- } while (send_status && (timeout++ < 1000));
-
- atomic_set(&init_deasserted, 1);
-
- /*
- * Should we send STARTUP IPIs ?
- *
- * Determine this based on the APIC version.
- * If we don't have an integrated APIC, don't send the STARTUP IPIs.
- */
- if (APIC_INTEGRATED(apic_version[phys_apicid]))
- num_starts = 2;
- else
- num_starts = 0;
-
- /*
- * Run STARTUP IPI loop.
- */
- Dprintk("#startup loops: %d.\n", num_starts);
-
- maxlvt = get_maxlvt();
-
- for (j = 1; j <= num_starts; j++) {
- Dprintk("Sending STARTUP #%d.\n",j);
- apic_read_around(APIC_SPIV);
- apic_write(APIC_ESR, 0);
- apic_read(APIC_ESR);
- Dprintk("After apic_write.\n");
-
- /*
- * STARTUP IPI
- */
-
- /* Target chip */
- apic_write_around(APIC_ICR2, SET_APIC_DEST_FIELD(phys_apicid));
-
- /* Boot on the stack */
- /* Kick the second */
- apic_write_around(APIC_ICR, APIC_DM_STARTUP
- | (start_eip >> 12));
-
- /*
- * Give the other CPU some time to accept the IPI.
- */
- udelay(300);
-
- Dprintk("Startup point 1.\n");
-
- Dprintk("Waiting for send to finish...\n");
- timeout = 0;
- do {
- Dprintk("+");
- udelay(100);
- send_status = apic_read(APIC_ICR) & APIC_ICR_BUSY;
- } while (send_status && (timeout++ < 1000));
-
- /*
- * Give the other CPU some time to accept the IPI.
- */
- udelay(200);
- /*
- * Due to the Pentium erratum 3AP.
- */
- if (maxlvt > 3) {
- apic_read_around(APIC_SPIV);
- apic_write(APIC_ESR, 0);
- }
- accept_status = (apic_read(APIC_ESR) & 0xEF);
- if (send_status || accept_status)
- break;
- }
- Dprintk("After Startup.\n");
-
- if (send_status)
- printk("APIC never delivered???\n");
- if (accept_status)
- printk("APIC delivery error (%lx).\n", accept_status);
-
- return (send_status | accept_status);
-}
-#endif /* WAKE_SECONDARY_VIA_INIT */
-
-extern cpumask_t cpu_initialized;
-static inline int alloc_cpu_id(void)
-{
- cpumask_t tmp_map;
- int cpu;
- cpus_complement(tmp_map, cpu_present_map);
- cpu = first_cpu(tmp_map);
- if (cpu >= NR_CPUS)
- return -ENODEV;
- return cpu;
-}
-
-#ifdef CONFIG_HOTPLUG_CPU
-static struct task_struct * __devinitdata cpu_idle_tasks[NR_CPUS];
-static inline struct task_struct * alloc_idle_task(int cpu)
-{
- struct task_struct *idle;
-
- if ((idle = cpu_idle_tasks[cpu]) != NULL) {
- /* initialize thread_struct. we really want to avoid destroy
- * idle tread
- */
- idle->thread.esp = (unsigned long)(((struct pt_regs *)
- (THREAD_SIZE + (unsigned long) idle->thread_info)) - 1);
- init_idle(idle, cpu);
- return idle;
- }
- idle = fork_idle(cpu);
-
- if (!IS_ERR(idle))
- cpu_idle_tasks[cpu] = idle;
- return idle;
-}
-#else
-#define alloc_idle_task(cpu) fork_idle(cpu)
-#endif
-
-static int __devinit do_boot_cpu(int apicid, int cpu)
-/*
- * NOTE - on most systems this is a PHYSICAL apic ID, but on multiquad
- * (ie clustered apic addressing mode), this is a LOGICAL apic ID.
- * Returns zero if CPU booted OK, else error code from wakeup_secondary_cpu.
- */
-{
- struct task_struct *idle;
- unsigned long boot_error;
- int timeout;
- unsigned long start_eip;
- unsigned short nmi_high = 0, nmi_low = 0;
-
- ++cpucount;
-
- /*
- * We can't use kernel_thread since we must avoid to
- * reschedule the child.
- */
- idle = alloc_idle_task(cpu);
- if (IS_ERR(idle))
- panic("failed fork for CPU %d", cpu);
- idle->thread.eip = (unsigned long) start_secondary;
- /* start_eip had better be page-aligned! */
- start_eip = setup_trampoline();
-
- /* So we see what's up */
- printk("Booting processor %d/%d eip %lx\n", cpu, apicid, start_eip);
- /* Stack for startup_32 can be just as for start_secondary onwards */
- stack_start.esp = (void *) idle->thread.esp;
-
- irq_ctx_init(cpu);
-
- /*
- * This grunge runs the startup process for
- * the targeted processor.
- */
-
- atomic_set(&init_deasserted, 0);
-
- Dprintk("Setting warm reset code and vector.\n");
-
- store_NMI_vector(&nmi_high, &nmi_low);
-
- smpboot_setup_warm_reset_vector(start_eip);
-
- /*
- * Starting actual IPI sequence...
- */
- boot_error = wakeup_secondary_cpu(apicid, start_eip);
-
- if (!boot_error) {
- /*
- * allow APs to start initializing.
- */
- Dprintk("Before Callout %d.\n", cpu);
- cpu_set(cpu, cpu_callout_map);
- Dprintk("After Callout %d.\n", cpu);
-
- /*
- * Wait 5s total for a response
- */
- for (timeout = 0; timeout < 50000; timeout++) {
- if (cpu_isset(cpu, cpu_callin_map))
- break; /* It has booted */
- udelay(100);
- }
-
- if (cpu_isset(cpu, cpu_callin_map)) {
- /* number CPUs logically, starting from 1 (BSP is 0) */
- Dprintk("OK.\n");
- printk("CPU%d: ", cpu);
- print_cpu_info(&cpu_data[cpu]);
- Dprintk("CPU has booted.\n");
- } else {
- boot_error= 1;
- if (*((volatile unsigned char *)trampoline_base)
- == 0xA5)
- /* trampoline started but...? */
- printk("Stuck ??\n");
- else
- /* trampoline code not run */
- printk("Not responding.\n");
- inquire_remote_apic(apicid);
- }
- }
-
- if (boot_error) {
- /* Try to put things back the way they were before ... */
- unmap_cpu_to_logical_apicid(cpu);
- cpu_clear(cpu, cpu_callout_map); /* was set here (do_boot_cpu()) */
- cpu_clear(cpu, cpu_initialized); /* was set by cpu_init() */
- cpucount--;
- } else {
- x86_cpu_to_apicid[cpu] = apicid;
- cpu_set(cpu, cpu_present_map);
- }
-
- /* mark "stuck" area as not stuck */
- *((volatile unsigned long *)trampoline_base) = 0;
-
- return boot_error;
-}
-
-#ifdef CONFIG_HOTPLUG_CPU
-void cpu_exit_clear(void)
-{
- int cpu = raw_smp_processor_id();
-
- idle_task_exit();
-
- cpucount --;
- cpu_uninit();
- irq_ctx_exit(cpu);
-
- cpu_clear(cpu, cpu_callout_map);
- cpu_clear(cpu, cpu_callin_map);
- cpu_clear(cpu, cpu_present_map);
-
- cpu_clear(cpu, smp_commenced_mask);
- unmap_cpu_to_logical_apicid(cpu);
-}
-
-struct warm_boot_cpu_info {
- struct completion *complete;
- int apicid;
- int cpu;
-};
-
-static void __devinit do_warm_boot_cpu(void *p)
-{
- struct warm_boot_cpu_info *info = p;
- do_boot_cpu(info->apicid, info->cpu);
- complete(info->complete);
-}
-
-int __devinit smp_prepare_cpu(int cpu)
-{
- DECLARE_COMPLETION(done);
- struct warm_boot_cpu_info info;
- struct work_struct task;
- int apicid, ret;
-
- lock_cpu_hotplug();
- apicid = x86_cpu_to_apicid[cpu];
- if (apicid == BAD_APICID) {
- ret = -ENODEV;
- goto exit;
- }
-
- info.complete = &done;
- info.apicid = apicid;
- info.cpu = cpu;
- INIT_WORK(&task, do_warm_boot_cpu, &info);
-
- tsc_sync_disabled = 1;
-
- /* init low mem mapping */
- clone_pgd_range(swapper_pg_dir, swapper_pg_dir + USER_PGD_PTRS,
- KERNEL_PGD_PTRS);
- flush_tlb_all();
- schedule_work(&task);
- wait_for_completion(&done);
-
- tsc_sync_disabled = 0;
- zap_low_mappings();
- ret = 0;
-exit:
- unlock_cpu_hotplug();
- return ret;
-}
-#endif
-
-static void smp_tune_scheduling (void)
-{
- unsigned long cachesize; /* kB */
- unsigned long bandwidth = 350; /* MB/s */
- /*
- * Rough estimation for SMP scheduling, this is the number of
- * cycles it takes for a fully memory-limited process to flush
- * the SMP-local cache.
- *
- * (For a P5 this pretty much means we will choose another idle
- * CPU almost always at wakeup time (this is due to the small
- * L1 cache), on PIIs it's around 50-100 usecs, depending on
- * the cache size)
- */
-
- if (!cpu_khz) {
- /*
- * this basically disables processor-affinity
- * scheduling on SMP without a TSC.
- */
- return;
- } else {
- cachesize = boot_cpu_data.x86_cache_size;
- if (cachesize == -1) {
- cachesize = 16; /* Pentiums, 2x8kB cache */
- bandwidth = 100;
- }
- }
-}
-
-/*
- * Cycle through the processors sending APIC IPIs to boot each.
- */
-
-static int boot_cpu_logical_apicid;
-/* Where the IO area was mapped on multiquad, always 0 otherwise */
-void *xquad_portio;
-#ifdef CONFIG_X86_NUMAQ
-EXPORT_SYMBOL(xquad_portio);
-#endif
-
-static void __init smp_boot_cpus(unsigned int max_cpus)
-{
- int apicid, cpu, bit, kicked;
- unsigned long bogosum = 0;
-
- /*
- * Setup boot CPU information
- */
- smp_store_cpu_info(0); /* Final full version of the data */
- printk("CPU%d: ", 0);
- print_cpu_info(&cpu_data[0]);
-
- boot_cpu_physical_apicid = GET_APIC_ID(apic_read(APIC_ID));
- boot_cpu_logical_apicid = logical_smp_processor_id();
- x86_cpu_to_apicid[0] = boot_cpu_physical_apicid;
-
- current_thread_info()->cpu = 0;
- smp_tune_scheduling();
- cpus_clear(cpu_sibling_map[0]);
- cpu_set(0, cpu_sibling_map[0]);
-
- cpus_clear(cpu_core_map[0]);
- cpu_set(0, cpu_core_map[0]);
-
- /*
- * If we couldn't find an SMP configuration at boot time,
- * get out of here now!
- */
- if (!smp_found_config && !acpi_lapic) {
- printk(KERN_NOTICE "SMP motherboard not detected.\n");
- smpboot_clear_io_apic_irqs();
- phys_cpu_present_map = physid_mask_of_physid(0);
- if (APIC_init_uniprocessor())
- printk(KERN_NOTICE "Local APIC not detected."
- " Using dummy APIC emulation.\n");
- map_cpu_to_logical_apicid();
- cpu_set(0, cpu_sibling_map[0]);
- cpu_set(0, cpu_core_map[0]);
- return;
- }
-
- /*
- * Should not be necessary because the MP table should list the boot
- * CPU too, but we do it for the sake of robustness anyway.
- * Makes no sense to do this check in clustered apic mode, so skip it
- */
- if (!check_phys_apicid_present(boot_cpu_physical_apicid)) {
- printk("weird, boot CPU (#%d) not listed by the BIOS.\n",
- boot_cpu_physical_apicid);
- physid_set(hard_smp_processor_id(), phys_cpu_present_map);
- }
-
- /*
- * If we couldn't find a local APIC, then get out of here now!
- */
- if (APIC_INTEGRATED(apic_version[boot_cpu_physical_apicid]) && !cpu_has_apic) {
- printk(KERN_ERR "BIOS bug, local APIC #%d not detected!...\n",
- boot_cpu_physical_apicid);
- printk(KERN_ERR "... forcing use of dummy APIC emulation. (tell your hw vendor)\n");
- smpboot_clear_io_apic_irqs();
- phys_cpu_present_map = physid_mask_of_physid(0);
- cpu_set(0, cpu_sibling_map[0]);
- cpu_set(0, cpu_core_map[0]);
- return;
- }
-
- verify_local_APIC();
-
- /*
- * If SMP should be disabled, then really disable it!
- */
- if (!max_cpus) {
- smp_found_config = 0;
- printk(KERN_INFO "SMP mode deactivated, forcing use of dummy APIC emulation.\n");
- smpboot_clear_io_apic_irqs();
- phys_cpu_present_map = physid_mask_of_physid(0);
- cpu_set(0, cpu_sibling_map[0]);
- cpu_set(0, cpu_core_map[0]);
- return;
- }
-
- connect_bsp_APIC();
- setup_local_APIC();
- map_cpu_to_logical_apicid();
-
-
- setup_portio_remap();
-
- /*
- * Scan the CPU present map and fire up the other CPUs via do_boot_cpu
- *
- * In clustered apic mode, phys_cpu_present_map is a constructed thus:
- * bits 0-3 are quad0, 4-7 are quad1, etc. A perverse twist on the
- * clustered apic ID.
- */
- Dprintk("CPU present map: %lx\n", physids_coerce(phys_cpu_present_map));
-
- kicked = 1;
- for (bit = 0; kicked < NR_CPUS && bit < MAX_APICS; bit++) {
- apicid = cpu_present_to_apicid(bit);
- /*
- * Don't even attempt to start the boot CPU!
- */
- if ((apicid == boot_cpu_apicid) || (apicid == BAD_APICID))
- continue;
-
- if (!check_apicid_present(bit))
- continue;
- if (max_cpus <= cpucount+1)
- continue;
-
- if (((cpu = alloc_cpu_id()) <= 0) || do_boot_cpu(apicid, cpu))
- printk("CPU #%d not responding - cannot use it.\n",
- apicid);
- else
- ++kicked;
- }
-
- /*
- * Cleanup possible dangling ends...
- */
- smpboot_restore_warm_reset_vector();
-
- /*
- * Allow the user to impress friends.
- */
- Dprintk("Before bogomips.\n");
- for (cpu = 0; cpu < NR_CPUS; cpu++)
- if (cpu_isset(cpu, cpu_callout_map))
- bogosum += cpu_data[cpu].loops_per_jiffy;
- printk(KERN_INFO
- "Total of %d processors activated (%lu.%02lu BogoMIPS).\n",
- cpucount+1,
- bogosum/(500000/HZ),
- (bogosum/(5000/HZ))%100);
-
- Dprintk("Before bogocount - setting activated=1.\n");
-
- if (smp_b_stepping)
- printk(KERN_WARNING "WARNING: SMP operation may be unreliable with B stepping processors.\n");
-
- /*
- * Don't taint if we are running SMP kernel on a single non-MP
- * approved Athlon
- */
- if (tainted & TAINT_UNSAFE_SMP) {
- if (cpucount)
- printk (KERN_INFO "WARNING: This combination of AMD processors is not suitable for SMP.\n");
- else
- tainted &= ~TAINT_UNSAFE_SMP;
- }
-
- Dprintk("Boot done.\n");
-
- /*
- * construct cpu_sibling_map[], so that we can tell sibling CPUs
- * efficiently.
- */
- for (cpu = 0; cpu < NR_CPUS; cpu++) {
- cpus_clear(cpu_sibling_map[cpu]);
- cpus_clear(cpu_core_map[cpu]);
- }
-
- cpu_set(0, cpu_sibling_map[0]);
- cpu_set(0, cpu_core_map[0]);
-
- smpboot_setup_io_apic();
-
- setup_boot_APIC_clock();
-
- /*
- * Synchronize the TSC with the AP
- */
- if (cpu_has_tsc && cpucount && cpu_khz)
- synchronize_tsc_bp();
-}
-
-/* These are wrappers to interface to the new boot process. Someone
- who understands all this stuff should rewrite it properly. --RR 15/Jul/02 */
-void __init smp_prepare_cpus(unsigned int max_cpus)
-{
- smp_commenced_mask = cpumask_of_cpu(0);
- cpu_callin_map = cpumask_of_cpu(0);
- mb();
- smp_boot_cpus(max_cpus);
-}
-
-void __devinit smp_prepare_boot_cpu(void)
-{
- cpu_set(smp_processor_id(), cpu_online_map);
- cpu_set(smp_processor_id(), cpu_callout_map);
- cpu_set(smp_processor_id(), cpu_present_map);
- cpu_set(smp_processor_id(), cpu_possible_map);
- per_cpu(cpu_state, smp_processor_id()) = CPU_ONLINE;
-}
-
-#ifdef CONFIG_HOTPLUG_CPU
-static void
-remove_siblinginfo(int cpu)
-{
- int sibling;
-
- for_each_cpu_mask(sibling, cpu_sibling_map[cpu])
- cpu_clear(cpu, cpu_sibling_map[sibling]);
- for_each_cpu_mask(sibling, cpu_core_map[cpu])
- cpu_clear(cpu, cpu_core_map[sibling]);
- cpus_clear(cpu_sibling_map[cpu]);
- cpus_clear(cpu_core_map[cpu]);
- phys_proc_id[cpu] = BAD_APICID;
- cpu_core_id[cpu] = BAD_APICID;
-}
-
-int __cpu_disable(void)
-{
- cpumask_t map = cpu_online_map;
- int cpu = smp_processor_id();
-
- /*
- * Perhaps use cpufreq to drop frequency, but that could go
- * into generic code.
- *
- * We won't take down the boot processor on i386 due to some
- * interrupts only being able to be serviced by the BSP.
- * Especially so if we're not using an IOAPIC -zwane
- */
- if (cpu == 0)
- return -EBUSY;
-
- /* We enable the timer again on the exit path of the death loop */
- disable_APIC_timer();
- /* Allow any queued timer interrupts to get serviced */
- local_irq_enable();
- mdelay(1);
- local_irq_disable();
-
- remove_siblinginfo(cpu);
-
- cpu_clear(cpu, map);
- fixup_irqs(map);
- /* It's now safe to remove this processor from the online map */
- cpu_clear(cpu, cpu_online_map);
- return 0;
-}
-
-void __cpu_die(unsigned int cpu)
-{
- /* We don't do anything here: idle task is faking death itself. */
- unsigned int i;
-
- for (i = 0; i < 10; i++) {
- /* They ack this in play_dead by setting CPU_DEAD */
- if (per_cpu(cpu_state, cpu) == CPU_DEAD) {
- printk ("CPU %d is now offline\n", cpu);
- return;
- }
- msleep(100);
- }
- printk(KERN_ERR "CPU %u didn't die...\n", cpu);
-}
-#else /* ... !CONFIG_HOTPLUG_CPU */
-int __cpu_disable(void)
-{
- return -ENOSYS;
-}
-
-void __cpu_die(unsigned int cpu)
-{
- /* We said "no" in __cpu_disable */
- BUG();
-}
-#endif /* CONFIG_HOTPLUG_CPU */
-
-int __devinit __cpu_up(unsigned int cpu)
-{
- /* In case one didn't come up */
- if (!cpu_isset(cpu, cpu_callin_map)) {
- printk(KERN_DEBUG "skipping cpu%d, didn't come online\n", cpu);
- local_irq_enable();
- return -EIO;
- }
-
- local_irq_enable();
- per_cpu(cpu_state, cpu) = CPU_UP_PREPARE;
- /* Unleash the CPU! */
- cpu_set(cpu, smp_commenced_mask);
- while (!cpu_isset(cpu, cpu_online_map))
- mb();
- return 0;
-}
-
-void __init smp_cpus_done(unsigned int max_cpus)
-{
-#ifdef CONFIG_X86_IO_APIC
- setup_ioapic_dest();
-#endif
- zap_low_mappings();
-#ifndef CONFIG_HOTPLUG_CPU
- /*
- * Disable executability of the SMP trampoline:
- */
- set_kernel_exec((unsigned long)trampoline_base, trampoline_exec);
-#endif
-}
-
-void __init smp_intr_init(void)
-{
- /*
- * IRQ0 must be given a fixed assignment and initialized,
- * because it's used before the IO-APIC is set up.
- */
- set_intr_gate(FIRST_DEVICE_VECTOR, interrupt[0]);
-
- /*
- * The reschedule interrupt is a CPU-to-CPU reschedule-helper
- * IPI, driven by wakeup.
- */
- set_intr_gate(RESCHEDULE_VECTOR, reschedule_interrupt);
-
- /* IPI for invalidation */
- set_intr_gate(INVALIDATE_TLB_VECTOR, invalidate_interrupt);
-
- /* IPI for generic function call */
- set_intr_gate(CALL_FUNCTION_VECTOR, call_function_interrupt);
-}
diff --git a/arch/i386/kernel/srat.c b/arch/i386/kernel/srat.c
deleted file mode 100644
index 8de658db814..00000000000
--- a/arch/i386/kernel/srat.c
+++ /dev/null
@@ -1,467 +0,0 @@
-/*
- * Some of the code in this file has been gleaned from the 64 bit
- * discontigmem support code base.
- *
- * Copyright (C) 2002, IBM Corp.
- *
- * All rights reserved.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
- * NON INFRINGEMENT. See the GNU General Public License for more
- * details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
- *
- * Send feedback to Pat Gaughen <gone@us.ibm.com>
- */
-#include <linux/config.h>
-#include <linux/mm.h>
-#include <linux/bootmem.h>
-#include <linux/mmzone.h>
-#include <linux/acpi.h>
-#include <linux/nodemask.h>
-#include <asm/srat.h>
-#include <asm/topology.h>
-
-/*
- * proximity macros and definitions
- */
-#define NODE_ARRAY_INDEX(x) ((x) / 8) /* 8 bits/char */
-#define NODE_ARRAY_OFFSET(x) ((x) % 8) /* 8 bits/char */
-#define BMAP_SET(bmap, bit) ((bmap)[NODE_ARRAY_INDEX(bit)] |= 1 << NODE_ARRAY_OFFSET(bit))
-#define BMAP_TEST(bmap, bit) ((bmap)[NODE_ARRAY_INDEX(bit)] & (1 << NODE_ARRAY_OFFSET(bit)))
-#define MAX_PXM_DOMAINS 256 /* 1 byte and no promises about values */
-/* bitmap length; _PXM is at most 255 */
-#define PXM_BITMAP_LEN (MAX_PXM_DOMAINS / 8)
-static u8 pxm_bitmap[PXM_BITMAP_LEN]; /* bitmap of proximity domains */
-
-#define MAX_CHUNKS_PER_NODE 4
-#define MAXCHUNKS (MAX_CHUNKS_PER_NODE * MAX_NUMNODES)
-struct node_memory_chunk_s {
- unsigned long start_pfn;
- unsigned long end_pfn;
- u8 pxm; // proximity domain of node
- u8 nid; // which cnode contains this chunk?
- u8 bank; // which mem bank on this node
-};
-static struct node_memory_chunk_s node_memory_chunk[MAXCHUNKS];
-
-static int num_memory_chunks; /* total number of memory chunks */
-static int zholes_size_init;
-static unsigned long zholes_size[MAX_NUMNODES * MAX_NR_ZONES];
-
-extern void * boot_ioremap(unsigned long, unsigned long);
-
-/* Identify CPU proximity domains */
-static void __init parse_cpu_affinity_structure(char *p)
-{
- struct acpi_table_processor_affinity *cpu_affinity =
- (struct acpi_table_processor_affinity *) p;
-
- if (!cpu_affinity->flags.enabled)
- return; /* empty entry */
-
- /* mark this node as "seen" in node bitmap */
- BMAP_SET(pxm_bitmap, cpu_affinity->proximity_domain);
-
- printk("CPU 0x%02X in proximity domain 0x%02X\n",
- cpu_affinity->apic_id, cpu_affinity->proximity_domain);
-}
-
-/*
- * Identify memory proximity domains and hot-remove capabilities.
- * Fill node memory chunk list structure.
- */
-static void __init parse_memory_affinity_structure (char *sratp)
-{
- unsigned long long paddr, size;
- unsigned long start_pfn, end_pfn;
- u8 pxm;
- struct node_memory_chunk_s *p, *q, *pend;
- struct acpi_table_memory_affinity *memory_affinity =
- (struct acpi_table_memory_affinity *) sratp;
-
- if (!memory_affinity->flags.enabled)
- return; /* empty entry */
-
- /* mark this node as "seen" in node bitmap */
- BMAP_SET(pxm_bitmap, memory_affinity->proximity_domain);
-
- /* calculate info for memory chunk structure */
- paddr = memory_affinity->base_addr_hi;
- paddr = (paddr << 32) | memory_affinity->base_addr_lo;
- size = memory_affinity->length_hi;
- size = (size << 32) | memory_affinity->length_lo;
-
- start_pfn = paddr >> PAGE_SHIFT;
- end_pfn = (paddr + size) >> PAGE_SHIFT;
-
- pxm = memory_affinity->proximity_domain;
-
- if (num_memory_chunks >= MAXCHUNKS) {
- printk("Too many mem chunks in SRAT. Ignoring %lld MBytes at %llx\n",
- size/(1024*1024), paddr);
- return;
- }
-
- /* Insertion sort based on base address */
- pend = &node_memory_chunk[num_memory_chunks];
- for (p = &node_memory_chunk[0]; p < pend; p++) {
- if (start_pfn < p->start_pfn)
- break;
- }
- if (p < pend) {
- for (q = pend; q >= p; q--)
- *(q + 1) = *q;
- }
- p->start_pfn = start_pfn;
- p->end_pfn = end_pfn;
- p->pxm = pxm;
-
- num_memory_chunks++;
-
- printk("Memory range 0x%lX to 0x%lX (type 0x%X) in proximity domain 0x%02X %s\n",
- start_pfn, end_pfn,
- memory_affinity->memory_type,
- memory_affinity->proximity_domain,
- (memory_affinity->flags.hot_pluggable ?
- "enabled and removable" : "enabled" ) );
-}
-
-#if MAX_NR_ZONES != 3
-#error "MAX_NR_ZONES != 3, chunk_to_zone requires review"
-#endif
-/* Take a chunk of pages from page frame cstart to cend and count the number
- * of pages in each zone, returned via zones[].
- */
-static __init void chunk_to_zones(unsigned long cstart, unsigned long cend,
- unsigned long *zones)
-{
- unsigned long max_dma;
- extern unsigned long max_low_pfn;
-
- int z;
- unsigned long rend;
-
- /* FIXME: MAX_DMA_ADDRESS and max_low_pfn are trying to provide
- * similarly scoped information and should be handled in a consistant
- * manner.
- */
- max_dma = virt_to_phys((char *)MAX_DMA_ADDRESS) >> PAGE_SHIFT;
-
- /* Split the hole into the zones in which it falls. Repeatedly
- * take the segment in which the remaining hole starts, round it
- * to the end of that zone.
- */
- memset(zones, 0, MAX_NR_ZONES * sizeof(long));
- while (cstart < cend) {
- if (cstart < max_dma) {
- z = ZONE_DMA;
- rend = (cend < max_dma)? cend : max_dma;
-
- } else if (cstart < max_low_pfn) {
- z = ZONE_NORMAL;
- rend = (cend < max_low_pfn)? cend : max_low_pfn;
-
- } else {
- z = ZONE_HIGHMEM;
- rend = cend;
- }
- zones[z] += rend - cstart;
- cstart = rend;
- }
-}
-
-/*
- * The SRAT table always lists ascending addresses, so can always
- * assume that the first "start" address that you see is the real
- * start of the node, and that the current "end" address is after
- * the previous one.
- */
-static __init void node_read_chunk(int nid, struct node_memory_chunk_s *memory_chunk)
-{
- /*
- * Only add present memory as told by the e820.
- * There is no guarantee from the SRAT that the memory it
- * enumerates is present at boot time because it represents
- * *possible* memory hotplug areas the same as normal RAM.
- */
- if (memory_chunk->start_pfn >= max_pfn) {
- printk (KERN_INFO "Ignoring SRAT pfns: 0x%08lx -> %08lx\n",
- memory_chunk->start_pfn, memory_chunk->end_pfn);
- return;
- }
- if (memory_chunk->nid != nid)
- return;
-
- if (!node_has_online_mem(nid))
- node_start_pfn[nid] = memory_chunk->start_pfn;
-
- if (node_start_pfn[nid] > memory_chunk->start_pfn)
- node_start_pfn[nid] = memory_chunk->start_pfn;
-
- if (node_end_pfn[nid] < memory_chunk->end_pfn)
- node_end_pfn[nid] = memory_chunk->end_pfn;
-}
-
-static u8 pxm_to_nid_map[MAX_PXM_DOMAINS];/* _PXM to logical node ID map */
-
-int pxm_to_node(int pxm)
-{
- return pxm_to_nid_map[pxm];
-}
-
-/* Parse the ACPI Static Resource Affinity Table */
-static int __init acpi20_parse_srat(struct acpi_table_srat *sratp)
-{
- u8 *start, *end, *p;
- int i, j, nid;
- u8 nid_to_pxm_map[MAX_NUMNODES];/* logical node ID to _PXM map */
-
- start = (u8 *)(&(sratp->reserved) + 1); /* skip header */
- p = start;
- end = (u8 *)sratp + sratp->header.length;
-
- memset(pxm_bitmap, 0, sizeof(pxm_bitmap)); /* init proximity domain bitmap */
- memset(node_memory_chunk, 0, sizeof(node_memory_chunk));
- memset(zholes_size, 0, sizeof(zholes_size));
-
- /* -1 in these maps means not available */
- memset(pxm_to_nid_map, -1, sizeof(pxm_to_nid_map));
- memset(nid_to_pxm_map, -1, sizeof(nid_to_pxm_map));
-
- num_memory_chunks = 0;
- while (p < end) {
- switch (*p) {
- case ACPI_SRAT_PROCESSOR_AFFINITY:
- parse_cpu_affinity_structure(p);
- break;
- case ACPI_SRAT_MEMORY_AFFINITY:
- parse_memory_affinity_structure(p);
- break;
- default:
- printk("ACPI 2.0 SRAT: unknown entry skipped: type=0x%02X, len=%d\n", p[0], p[1]);
- break;
- }
- p += p[1];
- if (p[1] == 0) {
- printk("acpi20_parse_srat: Entry length value is zero;"
- " can't parse any further!\n");
- break;
- }
- }
-
- if (num_memory_chunks == 0) {
- printk("could not finy any ACPI SRAT memory areas.\n");
- goto out_fail;
- }
-
- /* Calculate total number of nodes in system from PXM bitmap and create
- * a set of sequential node IDs starting at zero. (ACPI doesn't seem
- * to specify the range of _PXM values.)
- */
- /*
- * MCD - we no longer HAVE to number nodes sequentially. PXM domain
- * numbers could go as high as 256, and MAX_NUMNODES for i386 is typically
- * 32, so we will continue numbering them in this manner until MAX_NUMNODES
- * approaches MAX_PXM_DOMAINS for i386.
- */
- nodes_clear(node_online_map);
- for (i = 0; i < MAX_PXM_DOMAINS; i++) {
- if (BMAP_TEST(pxm_bitmap, i)) {
- nid = num_online_nodes();
- pxm_to_nid_map[i] = nid;
- nid_to_pxm_map[nid] = i;
- node_set_online(nid);
- }
- }
- BUG_ON(num_online_nodes() == 0);
-
- /* set cnode id in memory chunk structure */
- for (i = 0; i < num_memory_chunks; i++)
- node_memory_chunk[i].nid = pxm_to_nid_map[node_memory_chunk[i].pxm];
-
- printk("pxm bitmap: ");
- for (i = 0; i < sizeof(pxm_bitmap); i++) {
- printk("%02X ", pxm_bitmap[i]);
- }
- printk("\n");
- printk("Number of logical nodes in system = %d\n", num_online_nodes());
- printk("Number of memory chunks in system = %d\n", num_memory_chunks);
-
- for (j = 0; j < num_memory_chunks; j++){
- struct node_memory_chunk_s * chunk = &node_memory_chunk[j];
- printk("chunk %d nid %d start_pfn %08lx end_pfn %08lx\n",
- j, chunk->nid, chunk->start_pfn, chunk->end_pfn);
- node_read_chunk(chunk->nid, chunk);
- }
-
- for_each_online_node(nid) {
- unsigned long start = node_start_pfn[nid];
- unsigned long end = node_end_pfn[nid];
-
- memory_present(nid, start, end);
- node_remap_size[nid] = node_memmap_size_bytes(nid, start, end);
- }
- return 1;
-out_fail:
- return 0;
-}
-
-int __init get_memcfg_from_srat(void)
-{
- struct acpi_table_header *header = NULL;
- struct acpi_table_rsdp *rsdp = NULL;
- struct acpi_table_rsdt *rsdt = NULL;
- struct acpi_pointer *rsdp_address = NULL;
- struct acpi_table_rsdt saved_rsdt;
- int tables = 0;
- int i = 0;
-
- if (ACPI_FAILURE(acpi_find_root_pointer(ACPI_PHYSICAL_ADDRESSING,
- rsdp_address))) {
- printk("%s: System description tables not found\n",
- __FUNCTION__);
- goto out_err;
- }
-
- if (rsdp_address->pointer_type == ACPI_PHYSICAL_POINTER) {
- printk("%s: assigning address to rsdp\n", __FUNCTION__);
- rsdp = (struct acpi_table_rsdp *)
- (u32)rsdp_address->pointer.physical;
- } else {
- printk("%s: rsdp_address is not a physical pointer\n", __FUNCTION__);
- goto out_err;
- }
- if (!rsdp) {
- printk("%s: Didn't find ACPI root!\n", __FUNCTION__);
- goto out_err;
- }
-
- printk(KERN_INFO "%.8s v%d [%.6s]\n", rsdp->signature, rsdp->revision,
- rsdp->oem_id);
-
- if (strncmp(rsdp->signature, RSDP_SIG,strlen(RSDP_SIG))) {
- printk(KERN_WARNING "%s: RSDP table signature incorrect\n", __FUNCTION__);
- goto out_err;
- }
-
- rsdt = (struct acpi_table_rsdt *)
- boot_ioremap(rsdp->rsdt_address, sizeof(struct acpi_table_rsdt));
-
- if (!rsdt) {
- printk(KERN_WARNING
- "%s: ACPI: Invalid root system description tables (RSDT)\n",
- __FUNCTION__);
- goto out_err;
- }
-
- header = & rsdt->header;
-
- if (strncmp(header->signature, RSDT_SIG, strlen(RSDT_SIG))) {
- printk(KERN_WARNING "ACPI: RSDT signature incorrect\n");
- goto out_err;
- }
-
- /*
- * The number of tables is computed by taking the
- * size of all entries (header size minus total
- * size of RSDT) divided by the size of each entry
- * (4-byte table pointers).
- */
- tables = (header->length - sizeof(struct acpi_table_header)) / 4;
-
- if (!tables)
- goto out_err;
-
- memcpy(&saved_rsdt, rsdt, sizeof(saved_rsdt));
-
- if (saved_rsdt.header.length > sizeof(saved_rsdt)) {
- printk(KERN_WARNING "ACPI: Too big length in RSDT: %d\n",
- saved_rsdt.header.length);
- goto out_err;
- }
-
- printk("Begin SRAT table scan....\n");
-
- for (i = 0; i < tables; i++) {
- /* Map in header, then map in full table length. */
- header = (struct acpi_table_header *)
- boot_ioremap(saved_rsdt.entry[i], sizeof(struct acpi_table_header));
- if (!header)
- break;
- header = (struct acpi_table_header *)
- boot_ioremap(saved_rsdt.entry[i], header->length);
- if (!header)
- break;
-
- if (strncmp((char *) &header->signature, "SRAT", 4))
- continue;
-
- /* we've found the srat table. don't need to look at any more tables */
- return acpi20_parse_srat((struct acpi_table_srat *)header);
- }
-out_err:
- printk("failed to get NUMA memory information from SRAT table\n");
- return 0;
-}
-
-/* For each node run the memory list to determine whether there are
- * any memory holes. For each hole determine which ZONE they fall
- * into.
- *
- * NOTE#1: this requires knowledge of the zone boundries and so
- * _cannot_ be performed before those are calculated in setup_memory.
- *
- * NOTE#2: we rely on the fact that the memory chunks are ordered by
- * start pfn number during setup.
- */
-static void __init get_zholes_init(void)
-{
- int nid;
- int c;
- int first;
- unsigned long end = 0;
-
- for_each_online_node(nid) {
- first = 1;
- for (c = 0; c < num_memory_chunks; c++){
- if (node_memory_chunk[c].nid == nid) {
- if (first) {
- end = node_memory_chunk[c].end_pfn;
- first = 0;
-
- } else {
- /* Record any gap between this chunk
- * and the previous chunk on this node
- * against the zones it spans.
- */
- chunk_to_zones(end,
- node_memory_chunk[c].start_pfn,
- &zholes_size[nid * MAX_NR_ZONES]);
- }
- }
- }
- }
-}
-
-unsigned long * __init get_zholes_size(int nid)
-{
- if (!zholes_size_init) {
- zholes_size_init++;
- get_zholes_init();
- }
- if (nid >= MAX_NUMNODES || !node_online(nid))
- printk("%s: nid = %d is invalid/offline. num_online_nodes = %d",
- __FUNCTION__, nid, num_online_nodes());
- return &zholes_size[nid * MAX_NR_ZONES];
-}
diff --git a/arch/i386/kernel/summit.c b/arch/i386/kernel/summit.c
deleted file mode 100644
index d0e01a3acf3..00000000000
--- a/arch/i386/kernel/summit.c
+++ /dev/null
@@ -1,180 +0,0 @@
-/*
- * arch/i386/kernel/summit.c - IBM Summit-Specific Code
- *
- * Written By: Matthew Dobson, IBM Corporation
- *
- * Copyright (c) 2003 IBM Corp.
- *
- * All rights reserved.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or (at
- * your option) any later version.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
- * NON INFRINGEMENT. See the GNU General Public License for more
- * details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
- *
- * Send feedback to <colpatch@us.ibm.com>
- *
- */
-
-#include <linux/mm.h>
-#include <linux/init.h>
-#include <asm/io.h>
-#include <asm/mach-summit/mach_mpparse.h>
-
-static struct rio_table_hdr *rio_table_hdr __initdata;
-static struct scal_detail *scal_devs[MAX_NUMNODES] __initdata;
-static struct rio_detail *rio_devs[MAX_NUMNODES*4] __initdata;
-
-static int __init setup_pci_node_map_for_wpeg(int wpeg_num, int last_bus)
-{
- int twister = 0, node = 0;
- int i, bus, num_buses;
-
- for(i = 0; i < rio_table_hdr->num_rio_dev; i++){
- if (rio_devs[i]->node_id == rio_devs[wpeg_num]->owner_id){
- twister = rio_devs[i]->owner_id;
- break;
- }
- }
- if (i == rio_table_hdr->num_rio_dev){
- printk(KERN_ERR "%s: Couldn't find owner Cyclone for Winnipeg!\n", __FUNCTION__);
- return last_bus;
- }
-
- for(i = 0; i < rio_table_hdr->num_scal_dev; i++){
- if (scal_devs[i]->node_id == twister){
- node = scal_devs[i]->node_id;
- break;
- }
- }
- if (i == rio_table_hdr->num_scal_dev){
- printk(KERN_ERR "%s: Couldn't find owner Twister for Cyclone!\n", __FUNCTION__);
- return last_bus;
- }
-
- switch (rio_devs[wpeg_num]->type){
- case CompatWPEG:
- /* The Compatability Winnipeg controls the 2 legacy buses,
- * the 66MHz PCI bus [2 slots] and the 2 "extra" buses in case
- * a PCI-PCI bridge card is used in either slot: total 5 buses.
- */
- num_buses = 5;
- break;
- case AltWPEG:
- /* The Alternate Winnipeg controls the 2 133MHz buses [1 slot
- * each], their 2 "extra" buses, the 100MHz bus [2 slots] and
- * the "extra" buses for each of those slots: total 7 buses.
- */
- num_buses = 7;
- break;
- case LookOutAWPEG:
- case LookOutBWPEG:
- /* A Lookout Winnipeg controls 3 100MHz buses [2 slots each]
- * & the "extra" buses for each of those slots: total 9 buses.
- */
- num_buses = 9;
- break;
- default:
- printk(KERN_INFO "%s: Unsupported Winnipeg type!\n", __FUNCTION__);
- return last_bus;
- }
-
- for(bus = last_bus; bus < last_bus + num_buses; bus++)
- mp_bus_id_to_node[bus] = node;
- return bus;
-}
-
-static int __init build_detail_arrays(void)
-{
- unsigned long ptr;
- int i, scal_detail_size, rio_detail_size;
-
- if (rio_table_hdr->num_scal_dev > MAX_NUMNODES){
- printk(KERN_WARNING "%s: MAX_NUMNODES too low! Defined as %d, but system has %d nodes.\n", __FUNCTION__, MAX_NUMNODES, rio_table_hdr->num_scal_dev);
- return 0;
- }
-
- switch (rio_table_hdr->version){
- default:
- printk(KERN_WARNING "%s: Invalid Rio Grande Table Version: %d\n", __FUNCTION__, rio_table_hdr->version);
- return 0;
- case 2:
- scal_detail_size = 11;
- rio_detail_size = 13;
- break;
- case 3:
- scal_detail_size = 12;
- rio_detail_size = 15;
- break;
- }
-
- ptr = (unsigned long)rio_table_hdr + 3;
- for(i = 0; i < rio_table_hdr->num_scal_dev; i++, ptr += scal_detail_size)
- scal_devs[i] = (struct scal_detail *)ptr;
-
- for(i = 0; i < rio_table_hdr->num_rio_dev; i++, ptr += rio_detail_size)
- rio_devs[i] = (struct rio_detail *)ptr;
-
- return 1;
-}
-
-void __init setup_summit(void)
-{
- unsigned long ptr;
- unsigned short offset;
- int i, next_wpeg, next_bus = 0;
-
- /* The pointer to the EBDA is stored in the word @ phys 0x40E(40:0E) */
- ptr = *(unsigned short *)phys_to_virt(0x40Eul);
- ptr = (unsigned long)phys_to_virt(ptr << 4);
-
- rio_table_hdr = NULL;
- offset = 0x180;
- while (offset){
- /* The block id is stored in the 2nd word */
- if (*((unsigned short *)(ptr + offset + 2)) == 0x4752){
- /* set the pointer past the offset & block id */
- rio_table_hdr = (struct rio_table_hdr *)(ptr + offset + 4);
- break;
- }
- /* The next offset is stored in the 1st word. 0 means no more */
- offset = *((unsigned short *)(ptr + offset));
- }
- if (!rio_table_hdr){
- printk(KERN_ERR "%s: Unable to locate Rio Grande Table in EBDA - bailing!\n", __FUNCTION__);
- return;
- }
-
- if (!build_detail_arrays())
- return;
-
- /* The first Winnipeg we're looking for has an index of 0 */
- next_wpeg = 0;
- do {
- for(i = 0; i < rio_table_hdr->num_rio_dev; i++){
- if (is_WPEG(rio_devs[i]) && rio_devs[i]->WP_index == next_wpeg){
- /* It's the Winnipeg we're looking for! */
- next_bus = setup_pci_node_map_for_wpeg(i, next_bus);
- next_wpeg++;
- break;
- }
- }
- /*
- * If we go through all Rio devices and don't find one with
- * the next index, it means we've found all the Winnipegs,
- * and thus all the PCI buses.
- */
- if (i == rio_table_hdr->num_rio_dev)
- next_wpeg = 0;
- } while (next_wpeg != 0);
-}
diff --git a/arch/i386/kernel/sys_i386.c b/arch/i386/kernel/sys_i386.c
deleted file mode 100644
index a4a61976ecb..00000000000
--- a/arch/i386/kernel/sys_i386.c
+++ /dev/null
@@ -1,252 +0,0 @@
-/*
- * linux/arch/i386/kernel/sys_i386.c
- *
- * This file contains various random system calls that
- * have a non-standard calling sequence on the Linux/i386
- * platform.
- */
-
-#include <linux/errno.h>
-#include <linux/sched.h>
-#include <linux/mm.h>
-#include <linux/smp.h>
-#include <linux/smp_lock.h>
-#include <linux/sem.h>
-#include <linux/msg.h>
-#include <linux/shm.h>
-#include <linux/stat.h>
-#include <linux/syscalls.h>
-#include <linux/mman.h>
-#include <linux/file.h>
-#include <linux/utsname.h>
-
-#include <asm/uaccess.h>
-#include <asm/ipc.h>
-
-/*
- * sys_pipe() is the normal C calling standard for creating
- * a pipe. It's not the way Unix traditionally does this, though.
- */
-asmlinkage int sys_pipe(unsigned long __user * fildes)
-{
- int fd[2];
- int error;
-
- error = do_pipe(fd);
- if (!error) {
- if (copy_to_user(fildes, fd, 2*sizeof(int)))
- error = -EFAULT;
- }
- return error;
-}
-
-/* common code for old and new mmaps */
-static inline long do_mmap2(
- unsigned long addr, unsigned long len,
- unsigned long prot, unsigned long flags,
- unsigned long fd, unsigned long pgoff)
-{
- int error = -EBADF;
- struct file * file = NULL;
-
- flags &= ~(MAP_EXECUTABLE | MAP_DENYWRITE);
- if (!(flags & MAP_ANONYMOUS)) {
- file = fget(fd);
- if (!file)
- goto out;
- }
-
- down_write(&current->mm->mmap_sem);
- error = do_mmap_pgoff(file, addr, len, prot, flags, pgoff);
- up_write(&current->mm->mmap_sem);
-
- if (file)
- fput(file);
-out:
- return error;
-}
-
-asmlinkage long sys_mmap2(unsigned long addr, unsigned long len,
- unsigned long prot, unsigned long flags,
- unsigned long fd, unsigned long pgoff)
-{
- return do_mmap2(addr, len, prot, flags, fd, pgoff);
-}
-
-/*
- * Perform the select(nd, in, out, ex, tv) and mmap() system
- * calls. Linux/i386 didn't use to be able to handle more than
- * 4 system call parameters, so these system calls used a memory
- * block for parameter passing..
- */
-
-struct mmap_arg_struct {
- unsigned long addr;
- unsigned long len;
- unsigned long prot;
- unsigned long flags;
- unsigned long fd;
- unsigned long offset;
-};
-
-asmlinkage int old_mmap(struct mmap_arg_struct __user *arg)
-{
- struct mmap_arg_struct a;
- int err = -EFAULT;
-
- if (copy_from_user(&a, arg, sizeof(a)))
- goto out;
-
- err = -EINVAL;
- if (a.offset & ~PAGE_MASK)
- goto out;
-
- err = do_mmap2(a.addr, a.len, a.prot, a.flags, a.fd, a.offset >> PAGE_SHIFT);
-out:
- return err;
-}
-
-
-struct sel_arg_struct {
- unsigned long n;
- fd_set __user *inp, *outp, *exp;
- struct timeval __user *tvp;
-};
-
-asmlinkage int old_select(struct sel_arg_struct __user *arg)
-{
- struct sel_arg_struct a;
-
- if (copy_from_user(&a, arg, sizeof(a)))
- return -EFAULT;
- /* sys_select() does the appropriate kernel locking */
- return sys_select(a.n, a.inp, a.outp, a.exp, a.tvp);
-}
-
-/*
- * sys_ipc() is the de-multiplexer for the SysV IPC calls..
- *
- * This is really horribly ugly.
- */
-asmlinkage int sys_ipc (uint call, int first, int second,
- int third, void __user *ptr, long fifth)
-{
- int version, ret;
-
- version = call >> 16; /* hack for backward compatibility */
- call &= 0xffff;
-
- switch (call) {
- case SEMOP:
- return sys_semtimedop (first, (struct sembuf __user *)ptr, second, NULL);
- case SEMTIMEDOP:
- return sys_semtimedop(first, (struct sembuf __user *)ptr, second,
- (const struct timespec __user *)fifth);
-
- case SEMGET:
- return sys_semget (first, second, third);
- case SEMCTL: {
- union semun fourth;
- if (!ptr)
- return -EINVAL;
- if (get_user(fourth.__pad, (void __user * __user *) ptr))
- return -EFAULT;
- return sys_semctl (first, second, third, fourth);
- }
-
- case MSGSND:
- return sys_msgsnd (first, (struct msgbuf __user *) ptr,
- second, third);
- case MSGRCV:
- switch (version) {
- case 0: {
- struct ipc_kludge tmp;
- if (!ptr)
- return -EINVAL;
-
- if (copy_from_user(&tmp,
- (struct ipc_kludge __user *) ptr,
- sizeof (tmp)))
- return -EFAULT;
- return sys_msgrcv (first, tmp.msgp, second,
- tmp.msgtyp, third);
- }
- default:
- return sys_msgrcv (first,
- (struct msgbuf __user *) ptr,
- second, fifth, third);
- }
- case MSGGET:
- return sys_msgget ((key_t) first, second);
- case MSGCTL:
- return sys_msgctl (first, second, (struct msqid_ds __user *) ptr);
-
- case SHMAT:
- switch (version) {
- default: {
- ulong raddr;
- ret = do_shmat (first, (char __user *) ptr, second, &raddr);
- if (ret)
- return ret;
- return put_user (raddr, (ulong __user *) third);
- }
- case 1: /* iBCS2 emulator entry point */
- if (!segment_eq(get_fs(), get_ds()))
- return -EINVAL;
- /* The "(ulong *) third" is valid _only_ because of the kernel segment thing */
- return do_shmat (first, (char __user *) ptr, second, (ulong *) third);
- }
- case SHMDT:
- return sys_shmdt ((char __user *)ptr);
- case SHMGET:
- return sys_shmget (first, second, third);
- case SHMCTL:
- return sys_shmctl (first, second,
- (struct shmid_ds __user *) ptr);
- default:
- return -ENOSYS;
- }
-}
-
-/*
- * Old cruft
- */
-asmlinkage int sys_uname(struct old_utsname __user * name)
-{
- int err;
- if (!name)
- return -EFAULT;
- down_read(&uts_sem);
- err=copy_to_user(name, &system_utsname, sizeof (*name));
- up_read(&uts_sem);
- return err?-EFAULT:0;
-}
-
-asmlinkage int sys_olduname(struct oldold_utsname __user * name)
-{
- int error;
-
- if (!name)
- return -EFAULT;
- if (!access_ok(VERIFY_WRITE,name,sizeof(struct oldold_utsname)))
- return -EFAULT;
-
- down_read(&uts_sem);
-
- error = __copy_to_user(&name->sysname,&system_utsname.sysname,__OLD_UTS_LEN);
- error |= __put_user(0,name->sysname+__OLD_UTS_LEN);
- error |= __copy_to_user(&name->nodename,&system_utsname.nodename,__OLD_UTS_LEN);
- error |= __put_user(0,name->nodename+__OLD_UTS_LEN);
- error |= __copy_to_user(&name->release,&system_utsname.release,__OLD_UTS_LEN);
- error |= __put_user(0,name->release+__OLD_UTS_LEN);
- error |= __copy_to_user(&name->version,&system_utsname.version,__OLD_UTS_LEN);
- error |= __put_user(0,name->version+__OLD_UTS_LEN);
- error |= __copy_to_user(&name->machine,&system_utsname.machine,__OLD_UTS_LEN);
- error |= __put_user(0,name->machine+__OLD_UTS_LEN);
-
- up_read(&uts_sem);
-
- error = error ? -EFAULT : 0;
-
- return error;
-}
diff --git a/arch/i386/kernel/syscall_table.S b/arch/i386/kernel/syscall_table.S
deleted file mode 100644
index 9b21a31d4f4..00000000000
--- a/arch/i386/kernel/syscall_table.S
+++ /dev/null
@@ -1,296 +0,0 @@
-.data
-ENTRY(sys_call_table)
- .long sys_restart_syscall /* 0 - old "setup()" system call, used for restarting */
- .long sys_exit
- .long sys_fork
- .long sys_read
- .long sys_write
- .long sys_open /* 5 */
- .long sys_close
- .long sys_waitpid
- .long sys_creat
- .long sys_link
- .long sys_unlink /* 10 */
- .long sys_execve
- .long sys_chdir
- .long sys_time
- .long sys_mknod
- .long sys_chmod /* 15 */
- .long sys_lchown16
- .long sys_ni_syscall /* old break syscall holder */
- .long sys_stat
- .long sys_lseek
- .long sys_getpid /* 20 */
- .long sys_mount
- .long sys_oldumount
- .long sys_setuid16
- .long sys_getuid16
- .long sys_stime /* 25 */
- .long sys_ptrace
- .long sys_alarm
- .long sys_fstat
- .long sys_pause
- .long sys_utime /* 30 */
- .long sys_ni_syscall /* old stty syscall holder */
- .long sys_ni_syscall /* old gtty syscall holder */
- .long sys_access
- .long sys_nice
- .long sys_ni_syscall /* 35 - old ftime syscall holder */
- .long sys_sync
- .long sys_kill
- .long sys_rename
- .long sys_mkdir
- .long sys_rmdir /* 40 */
- .long sys_dup
- .long sys_pipe
- .long sys_times
- .long sys_ni_syscall /* old prof syscall holder */
- .long sys_brk /* 45 */
- .long sys_setgid16
- .long sys_getgid16
- .long sys_signal
- .long sys_geteuid16
- .long sys_getegid16 /* 50 */
- .long sys_acct
- .long sys_umount /* recycled never used phys() */
- .long sys_ni_syscall /* old lock syscall holder */
- .long sys_ioctl
- .long sys_fcntl /* 55 */
- .long sys_ni_syscall /* old mpx syscall holder */
- .long sys_setpgid
- .long sys_ni_syscall /* old ulimit syscall holder */
- .long sys_olduname
- .long sys_umask /* 60 */
- .long sys_chroot
- .long sys_ustat
- .long sys_dup2
- .long sys_getppid
- .long sys_getpgrp /* 65 */
- .long sys_setsid
- .long sys_sigaction
- .long sys_sgetmask
- .long sys_ssetmask
- .long sys_setreuid16 /* 70 */
- .long sys_setregid16
- .long sys_sigsuspend
- .long sys_sigpending
- .long sys_sethostname
- .long sys_setrlimit /* 75 */
- .long sys_old_getrlimit
- .long sys_getrusage
- .long sys_gettimeofday
- .long sys_settimeofday
- .long sys_getgroups16 /* 80 */
- .long sys_setgroups16
- .long old_select
- .long sys_symlink
- .long sys_lstat
- .long sys_readlink /* 85 */
- .long sys_uselib
- .long sys_swapon
- .long sys_reboot
- .long old_readdir
- .long old_mmap /* 90 */
- .long sys_munmap
- .long sys_truncate
- .long sys_ftruncate
- .long sys_fchmod
- .long sys_fchown16 /* 95 */
- .long sys_getpriority
- .long sys_setpriority
- .long sys_ni_syscall /* old profil syscall holder */
- .long sys_statfs
- .long sys_fstatfs /* 100 */
- .long sys_ioperm
- .long sys_socketcall
- .long sys_syslog
- .long sys_setitimer
- .long sys_getitimer /* 105 */
- .long sys_newstat
- .long sys_newlstat
- .long sys_newfstat
- .long sys_uname
- .long sys_iopl /* 110 */
- .long sys_vhangup
- .long sys_ni_syscall /* old "idle" system call */
- .long sys_vm86old
- .long sys_wait4
- .long sys_swapoff /* 115 */
- .long sys_sysinfo
- .long sys_ipc
- .long sys_fsync
- .long sys_sigreturn
- .long sys_clone /* 120 */
- .long sys_setdomainname
- .long sys_newuname
- .long sys_modify_ldt
- .long sys_adjtimex
- .long sys_mprotect /* 125 */
- .long sys_sigprocmask
- .long sys_ni_syscall /* old "create_module" */
- .long sys_init_module
- .long sys_delete_module
- .long sys_ni_syscall /* 130: old "get_kernel_syms" */
- .long sys_quotactl
- .long sys_getpgid
- .long sys_fchdir
- .long sys_bdflush
- .long sys_sysfs /* 135 */
- .long sys_personality
- .long sys_ni_syscall /* reserved for afs_syscall */
- .long sys_setfsuid16
- .long sys_setfsgid16
- .long sys_llseek /* 140 */
- .long sys_getdents
- .long sys_select
- .long sys_flock
- .long sys_msync
- .long sys_readv /* 145 */
- .long sys_writev
- .long sys_getsid
- .long sys_fdatasync
- .long sys_sysctl
- .long sys_mlock /* 150 */
- .long sys_munlock
- .long sys_mlockall
- .long sys_munlockall
- .long sys_sched_setparam
- .long sys_sched_getparam /* 155 */
- .long sys_sched_setscheduler
- .long sys_sched_getscheduler
- .long sys_sched_yield
- .long sys_sched_get_priority_max
- .long sys_sched_get_priority_min /* 160 */
- .long sys_sched_rr_get_interval
- .long sys_nanosleep
- .long sys_mremap
- .long sys_setresuid16
- .long sys_getresuid16 /* 165 */
- .long sys_vm86
- .long sys_ni_syscall /* Old sys_query_module */
- .long sys_poll
- .long sys_nfsservctl
- .long sys_setresgid16 /* 170 */
- .long sys_getresgid16
- .long sys_prctl
- .long sys_rt_sigreturn
- .long sys_rt_sigaction
- .long sys_rt_sigprocmask /* 175 */
- .long sys_rt_sigpending
- .long sys_rt_sigtimedwait
- .long sys_rt_sigqueueinfo
- .long sys_rt_sigsuspend
- .long sys_pread64 /* 180 */
- .long sys_pwrite64
- .long sys_chown16
- .long sys_getcwd
- .long sys_capget
- .long sys_capset /* 185 */
- .long sys_sigaltstack
- .long sys_sendfile
- .long sys_ni_syscall /* reserved for streams1 */
- .long sys_ni_syscall /* reserved for streams2 */
- .long sys_vfork /* 190 */
- .long sys_getrlimit
- .long sys_mmap2
- .long sys_truncate64
- .long sys_ftruncate64
- .long sys_stat64 /* 195 */
- .long sys_lstat64
- .long sys_fstat64
- .long sys_lchown
- .long sys_getuid
- .long sys_getgid /* 200 */
- .long sys_geteuid
- .long sys_getegid
- .long sys_setreuid
- .long sys_setregid
- .long sys_getgroups /* 205 */
- .long sys_setgroups
- .long sys_fchown
- .long sys_setresuid
- .long sys_getresuid
- .long sys_setresgid /* 210 */
- .long sys_getresgid
- .long sys_chown
- .long sys_setuid
- .long sys_setgid
- .long sys_setfsuid /* 215 */
- .long sys_setfsgid
- .long sys_pivot_root
- .long sys_mincore
- .long sys_madvise
- .long sys_getdents64 /* 220 */
- .long sys_fcntl64
- .long sys_ni_syscall /* reserved for TUX */
- .long sys_ni_syscall
- .long sys_gettid
- .long sys_readahead /* 225 */
- .long sys_setxattr
- .long sys_lsetxattr
- .long sys_fsetxattr
- .long sys_getxattr
- .long sys_lgetxattr /* 230 */
- .long sys_fgetxattr
- .long sys_listxattr
- .long sys_llistxattr
- .long sys_flistxattr
- .long sys_removexattr /* 235 */
- .long sys_lremovexattr
- .long sys_fremovexattr
- .long sys_tkill
- .long sys_sendfile64
- .long sys_futex /* 240 */
- .long sys_sched_setaffinity
- .long sys_sched_getaffinity
- .long sys_set_thread_area
- .long sys_get_thread_area
- .long sys_io_setup /* 245 */
- .long sys_io_destroy
- .long sys_io_getevents
- .long sys_io_submit
- .long sys_io_cancel
- .long sys_fadvise64 /* 250 */
- .long sys_ni_syscall
- .long sys_exit_group
- .long sys_lookup_dcookie
- .long sys_epoll_create
- .long sys_epoll_ctl /* 255 */
- .long sys_epoll_wait
- .long sys_remap_file_pages
- .long sys_set_tid_address
- .long sys_timer_create
- .long sys_timer_settime /* 260 */
- .long sys_timer_gettime
- .long sys_timer_getoverrun
- .long sys_timer_delete
- .long sys_clock_settime
- .long sys_clock_gettime /* 265 */
- .long sys_clock_getres
- .long sys_clock_nanosleep
- .long sys_statfs64
- .long sys_fstatfs64
- .long sys_tgkill /* 270 */
- .long sys_utimes
- .long sys_fadvise64_64
- .long sys_ni_syscall /* sys_vserver */
- .long sys_mbind
- .long sys_get_mempolicy
- .long sys_set_mempolicy
- .long sys_mq_open
- .long sys_mq_unlink
- .long sys_mq_timedsend
- .long sys_mq_timedreceive /* 280 */
- .long sys_mq_notify
- .long sys_mq_getsetattr
- .long sys_kexec_load
- .long sys_waitid
- .long sys_ni_syscall /* 285 */ /* available */
- .long sys_add_key
- .long sys_request_key
- .long sys_keyctl
- .long sys_ioprio_set
- .long sys_ioprio_get /* 290 */
- .long sys_inotify_init
- .long sys_inotify_add_watch
- .long sys_inotify_rm_watch
diff --git a/arch/i386/kernel/sysenter.c b/arch/i386/kernel/sysenter.c
deleted file mode 100644
index 0bada1870bd..00000000000
--- a/arch/i386/kernel/sysenter.c
+++ /dev/null
@@ -1,67 +0,0 @@
-/*
- * linux/arch/i386/kernel/sysenter.c
- *
- * (C) Copyright 2002 Linus Torvalds
- *
- * This file contains the needed initializations to support sysenter.
- */
-
-#include <linux/init.h>
-#include <linux/smp.h>
-#include <linux/thread_info.h>
-#include <linux/sched.h>
-#include <linux/gfp.h>
-#include <linux/string.h>
-#include <linux/elf.h>
-
-#include <asm/cpufeature.h>
-#include <asm/msr.h>
-#include <asm/pgtable.h>
-#include <asm/unistd.h>
-
-extern asmlinkage void sysenter_entry(void);
-
-void enable_sep_cpu(void)
-{
- int cpu = get_cpu();
- struct tss_struct *tss = &per_cpu(init_tss, cpu);
-
- if (!boot_cpu_has(X86_FEATURE_SEP)) {
- put_cpu();
- return;
- }
-
- tss->ss1 = __KERNEL_CS;
- tss->esp1 = sizeof(struct tss_struct) + (unsigned long) tss;
- wrmsr(MSR_IA32_SYSENTER_CS, __KERNEL_CS, 0);
- wrmsr(MSR_IA32_SYSENTER_ESP, tss->esp1, 0);
- wrmsr(MSR_IA32_SYSENTER_EIP, (unsigned long) sysenter_entry, 0);
- put_cpu();
-}
-
-/*
- * These symbols are defined by vsyscall.o to mark the bounds
- * of the ELF DSO images included therein.
- */
-extern const char vsyscall_int80_start, vsyscall_int80_end;
-extern const char vsyscall_sysenter_start, vsyscall_sysenter_end;
-
-int __init sysenter_setup(void)
-{
- void *page = (void *)get_zeroed_page(GFP_ATOMIC);
-
- __set_fixmap(FIX_VSYSCALL, __pa(page), PAGE_READONLY_EXEC);
-
- if (!boot_cpu_has(X86_FEATURE_SEP)) {
- memcpy(page,
- &vsyscall_int80_start,
- &vsyscall_int80_end - &vsyscall_int80_start);
- return 0;
- }
-
- memcpy(page,
- &vsyscall_sysenter_start,
- &vsyscall_sysenter_end - &vsyscall_sysenter_start);
-
- return 0;
-}
diff --git a/arch/i386/kernel/time.c b/arch/i386/kernel/time.c
deleted file mode 100644
index 41c5b2dc620..00000000000
--- a/arch/i386/kernel/time.c
+++ /dev/null
@@ -1,485 +0,0 @@
-/*
- * linux/arch/i386/kernel/time.c
- *
- * Copyright (C) 1991, 1992, 1995 Linus Torvalds
- *
- * This file contains the PC-specific time handling details:
- * reading the RTC at bootup, etc..
- * 1994-07-02 Alan Modra
- * fixed set_rtc_mmss, fixed time.year for >= 2000, new mktime
- * 1995-03-26 Markus Kuhn
- * fixed 500 ms bug at call to set_rtc_mmss, fixed DS12887
- * precision CMOS clock update
- * 1996-05-03 Ingo Molnar
- * fixed time warps in do_[slow|fast]_gettimeoffset()
- * 1997-09-10 Updated NTP code according to technical memorandum Jan '96
- * "A Kernel Model for Precision Timekeeping" by Dave Mills
- * 1998-09-05 (Various)
- * More robust do_fast_gettimeoffset() algorithm implemented
- * (works with APM, Cyrix 6x86MX and Centaur C6),
- * monotonic gettimeofday() with fast_get_timeoffset(),
- * drift-proof precision TSC calibration on boot
- * (C. Scott Ananian <cananian@alumni.princeton.edu>, Andrew D.
- * Balsa <andrebalsa@altern.org>, Philip Gladstone <philip@raptor.com>;
- * ported from 2.0.35 Jumbo-9 by Michael Krause <m.krause@tu-harburg.de>).
- * 1998-12-16 Andrea Arcangeli
- * Fixed Jumbo-9 code in 2.1.131: do_gettimeofday was missing 1 jiffy
- * because was not accounting lost_ticks.
- * 1998-12-24 Copyright (C) 1998 Andrea Arcangeli
- * Fixed a xtime SMP race (we need the xtime_lock rw spinlock to
- * serialize accesses to xtime/lost_ticks).
- */
-
-#include <linux/errno.h>
-#include <linux/sched.h>
-#include <linux/kernel.h>
-#include <linux/param.h>
-#include <linux/string.h>
-#include <linux/mm.h>
-#include <linux/interrupt.h>
-#include <linux/time.h>
-#include <linux/delay.h>
-#include <linux/init.h>
-#include <linux/smp.h>
-#include <linux/module.h>
-#include <linux/sysdev.h>
-#include <linux/bcd.h>
-#include <linux/efi.h>
-#include <linux/mca.h>
-
-#include <asm/io.h>
-#include <asm/smp.h>
-#include <asm/irq.h>
-#include <asm/msr.h>
-#include <asm/delay.h>
-#include <asm/mpspec.h>
-#include <asm/uaccess.h>
-#include <asm/processor.h>
-#include <asm/timer.h>
-
-#include "mach_time.h"
-
-#include <linux/timex.h>
-#include <linux/config.h>
-
-#include <asm/hpet.h>
-
-#include <asm/arch_hooks.h>
-
-#include "io_ports.h"
-
-#include <asm/i8259.h>
-
-int pit_latch_buggy; /* extern */
-
-#include "do_timer.h"
-
-unsigned int cpu_khz; /* Detected as we calibrate the TSC */
-EXPORT_SYMBOL(cpu_khz);
-
-extern unsigned long wall_jiffies;
-
-DEFINE_SPINLOCK(rtc_lock);
-EXPORT_SYMBOL(rtc_lock);
-
-#include <asm/i8253.h>
-
-DEFINE_SPINLOCK(i8253_lock);
-EXPORT_SYMBOL(i8253_lock);
-
-struct timer_opts *cur_timer __read_mostly = &timer_none;
-
-/*
- * This is a special lock that is owned by the CPU and holds the index
- * register we are working with. It is required for NMI access to the
- * CMOS/RTC registers. See include/asm-i386/mc146818rtc.h for details.
- */
-volatile unsigned long cmos_lock = 0;
-EXPORT_SYMBOL(cmos_lock);
-
-/* Routines for accessing the CMOS RAM/RTC. */
-unsigned char rtc_cmos_read(unsigned char addr)
-{
- unsigned char val;
- lock_cmos_prefix(addr);
- outb_p(addr, RTC_PORT(0));
- val = inb_p(RTC_PORT(1));
- lock_cmos_suffix(addr);
- return val;
-}
-EXPORT_SYMBOL(rtc_cmos_read);
-
-void rtc_cmos_write(unsigned char val, unsigned char addr)
-{
- lock_cmos_prefix(addr);
- outb_p(addr, RTC_PORT(0));
- outb_p(val, RTC_PORT(1));
- lock_cmos_suffix(addr);
-}
-EXPORT_SYMBOL(rtc_cmos_write);
-
-/*
- * This version of gettimeofday has microsecond resolution
- * and better than microsecond precision on fast x86 machines with TSC.
- */
-void do_gettimeofday(struct timeval *tv)
-{
- unsigned long seq;
- unsigned long usec, sec;
- unsigned long max_ntp_tick;
-
- do {
- unsigned long lost;
-
- seq = read_seqbegin(&xtime_lock);
-
- usec = cur_timer->get_offset();
- lost = jiffies - wall_jiffies;
-
- /*
- * If time_adjust is negative then NTP is slowing the clock
- * so make sure not to go into next possible interval.
- * Better to lose some accuracy than have time go backwards..
- */
- if (unlikely(time_adjust < 0)) {
- max_ntp_tick = (USEC_PER_SEC / HZ) - tickadj;
- usec = min(usec, max_ntp_tick);
-
- if (lost)
- usec += lost * max_ntp_tick;
- }
- else if (unlikely(lost))
- usec += lost * (USEC_PER_SEC / HZ);
-
- sec = xtime.tv_sec;
- usec += (xtime.tv_nsec / 1000);
- } while (read_seqretry(&xtime_lock, seq));
-
- while (usec >= 1000000) {
- usec -= 1000000;
- sec++;
- }
-
- tv->tv_sec = sec;
- tv->tv_usec = usec;
-}
-
-EXPORT_SYMBOL(do_gettimeofday);
-
-int do_settimeofday(struct timespec *tv)
-{
- time_t wtm_sec, sec = tv->tv_sec;
- long wtm_nsec, nsec = tv->tv_nsec;
-
- if ((unsigned long)tv->tv_nsec >= NSEC_PER_SEC)
- return -EINVAL;
-
- write_seqlock_irq(&xtime_lock);
- /*
- * This is revolting. We need to set "xtime" correctly. However, the
- * value in this location is the value at the most recent update of
- * wall time. Discover what correction gettimeofday() would have
- * made, and then undo it!
- */
- nsec -= cur_timer->get_offset() * NSEC_PER_USEC;
- nsec -= (jiffies - wall_jiffies) * TICK_NSEC;
-
- wtm_sec = wall_to_monotonic.tv_sec + (xtime.tv_sec - sec);
- wtm_nsec = wall_to_monotonic.tv_nsec + (xtime.tv_nsec - nsec);
-
- set_normalized_timespec(&xtime, sec, nsec);
- set_normalized_timespec(&wall_to_monotonic, wtm_sec, wtm_nsec);
-
- ntp_clear();
- write_sequnlock_irq(&xtime_lock);
- clock_was_set();
- return 0;
-}
-
-EXPORT_SYMBOL(do_settimeofday);
-
-static int set_rtc_mmss(unsigned long nowtime)
-{
- int retval;
-
- WARN_ON(irqs_disabled());
-
- /* gets recalled with irq locally disabled */
- spin_lock_irq(&rtc_lock);
- if (efi_enabled)
- retval = efi_set_rtc_mmss(nowtime);
- else
- retval = mach_set_rtc_mmss(nowtime);
- spin_unlock_irq(&rtc_lock);
-
- return retval;
-}
-
-
-int timer_ack;
-
-/* monotonic_clock(): returns # of nanoseconds passed since time_init()
- * Note: This function is required to return accurate
- * time even in the absence of multiple timer ticks.
- */
-unsigned long long monotonic_clock(void)
-{
- return cur_timer->monotonic_clock();
-}
-EXPORT_SYMBOL(monotonic_clock);
-
-#if defined(CONFIG_SMP) && defined(CONFIG_FRAME_POINTER)
-unsigned long profile_pc(struct pt_regs *regs)
-{
- unsigned long pc = instruction_pointer(regs);
-
- if (in_lock_functions(pc))
- return *(unsigned long *)(regs->ebp + 4);
-
- return pc;
-}
-EXPORT_SYMBOL(profile_pc);
-#endif
-
-/*
- * timer_interrupt() needs to keep up the real-time clock,
- * as well as call the "do_timer()" routine every clocktick
- */
-static inline void do_timer_interrupt(int irq, struct pt_regs *regs)
-{
-#ifdef CONFIG_X86_IO_APIC
- if (timer_ack) {
- /*
- * Subtle, when I/O APICs are used we have to ack timer IRQ
- * manually to reset the IRR bit for do_slow_gettimeoffset().
- * This will also deassert NMI lines for the watchdog if run
- * on an 82489DX-based system.
- */
- spin_lock(&i8259A_lock);
- outb(0x0c, PIC_MASTER_OCW3);
- /* Ack the IRQ; AEOI will end it automatically. */
- inb(PIC_MASTER_POLL);
- spin_unlock(&i8259A_lock);
- }
-#endif
-
- do_timer_interrupt_hook(regs);
-
-
- if (MCA_bus) {
- /* The PS/2 uses level-triggered interrupts. You can't
- turn them off, nor would you want to (any attempt to
- enable edge-triggered interrupts usually gets intercepted by a
- special hardware circuit). Hence we have to acknowledge
- the timer interrupt. Through some incredibly stupid
- design idea, the reset for IRQ 0 is done by setting the
- high bit of the PPI port B (0x61). Note that some PS/2s,
- notably the 55SX, work fine if this is removed. */
-
- irq = inb_p( 0x61 ); /* read the current state */
- outb_p( irq|0x80, 0x61 ); /* reset the IRQ */
- }
-}
-
-/*
- * This is the same as the above, except we _also_ save the current
- * Time Stamp Counter value at the time of the timer interrupt, so that
- * we later on can estimate the time of day more exactly.
- */
-irqreturn_t timer_interrupt(int irq, void *dev_id, struct pt_regs *regs)
-{
- /*
- * Here we are in the timer irq handler. We just have irqs locally
- * disabled but we don't know if the timer_bh is running on the other
- * CPU. We need to avoid to SMP race with it. NOTE: we don' t need
- * the irq version of write_lock because as just said we have irq
- * locally disabled. -arca
- */
- write_seqlock(&xtime_lock);
-
- cur_timer->mark_offset();
-
- do_timer_interrupt(irq, regs);
-
- write_sequnlock(&xtime_lock);
- return IRQ_HANDLED;
-}
-
-/* not static: needed by APM */
-unsigned long get_cmos_time(void)
-{
- unsigned long retval;
-
- spin_lock(&rtc_lock);
-
- if (efi_enabled)
- retval = efi_get_time();
- else
- retval = mach_get_cmos_time();
-
- spin_unlock(&rtc_lock);
-
- return retval;
-}
-EXPORT_SYMBOL(get_cmos_time);
-
-static void sync_cmos_clock(unsigned long dummy);
-
-static DEFINE_TIMER(sync_cmos_timer, sync_cmos_clock, 0, 0);
-
-static void sync_cmos_clock(unsigned long dummy)
-{
- struct timeval now, next;
- int fail = 1;
-
- /*
- * If we have an externally synchronized Linux clock, then update
- * CMOS clock accordingly every ~11 minutes. Set_rtc_mmss() has to be
- * called as close as possible to 500 ms before the new second starts.
- * This code is run on a timer. If the clock is set, that timer
- * may not expire at the correct time. Thus, we adjust...
- */
- if (!ntp_synced())
- /*
- * Not synced, exit, do not restart a timer (if one is
- * running, let it run out).
- */
- return;
-
- do_gettimeofday(&now);
- if (now.tv_usec >= USEC_AFTER - ((unsigned) TICK_SIZE) / 2 &&
- now.tv_usec <= USEC_BEFORE + ((unsigned) TICK_SIZE) / 2)
- fail = set_rtc_mmss(now.tv_sec);
-
- next.tv_usec = USEC_AFTER - now.tv_usec;
- if (next.tv_usec <= 0)
- next.tv_usec += USEC_PER_SEC;
-
- if (!fail)
- next.tv_sec = 659;
- else
- next.tv_sec = 0;
-
- if (next.tv_usec >= USEC_PER_SEC) {
- next.tv_sec++;
- next.tv_usec -= USEC_PER_SEC;
- }
- mod_timer(&sync_cmos_timer, jiffies + timeval_to_jiffies(&next));
-}
-
-void notify_arch_cmos_timer(void)
-{
- mod_timer(&sync_cmos_timer, jiffies + 1);
-}
-
-static long clock_cmos_diff, sleep_start;
-
-static struct timer_opts *last_timer;
-static int timer_suspend(struct sys_device *dev, pm_message_t state)
-{
- /*
- * Estimate time zone so that set_time can update the clock
- */
- clock_cmos_diff = -get_cmos_time();
- clock_cmos_diff += get_seconds();
- sleep_start = get_cmos_time();
- last_timer = cur_timer;
- cur_timer = &timer_none;
- if (last_timer->suspend)
- last_timer->suspend(state);
- return 0;
-}
-
-static int timer_resume(struct sys_device *dev)
-{
- unsigned long flags;
- unsigned long sec;
- unsigned long sleep_length;
-
-#ifdef CONFIG_HPET_TIMER
- if (is_hpet_enabled())
- hpet_reenable();
-#endif
- setup_pit_timer();
- sec = get_cmos_time() + clock_cmos_diff;
- sleep_length = (get_cmos_time() - sleep_start) * HZ;
- write_seqlock_irqsave(&xtime_lock, flags);
- xtime.tv_sec = sec;
- xtime.tv_nsec = 0;
- write_sequnlock_irqrestore(&xtime_lock, flags);
- jiffies += sleep_length;
- wall_jiffies += sleep_length;
- if (last_timer->resume)
- last_timer->resume();
- cur_timer = last_timer;
- last_timer = NULL;
- touch_softlockup_watchdog();
- return 0;
-}
-
-static struct sysdev_class timer_sysclass = {
- .resume = timer_resume,
- .suspend = timer_suspend,
- set_kset_name("timer"),
-};
-
-
-/* XXX this driverfs stuff should probably go elsewhere later -john */
-static struct sys_device device_timer = {
- .id = 0,
- .cls = &timer_sysclass,
-};
-
-static int time_init_device(void)
-{
- int error = sysdev_class_register(&timer_sysclass);
- if (!error)
- error = sysdev_register(&device_timer);
- return error;
-}
-
-device_initcall(time_init_device);
-
-#ifdef CONFIG_HPET_TIMER
-extern void (*late_time_init)(void);
-/* Duplicate of time_init() below, with hpet_enable part added */
-static void __init hpet_time_init(void)
-{
- xtime.tv_sec = get_cmos_time();
- xtime.tv_nsec = (INITIAL_JIFFIES % HZ) * (NSEC_PER_SEC / HZ);
- set_normalized_timespec(&wall_to_monotonic,
- -xtime.tv_sec, -xtime.tv_nsec);
-
- if ((hpet_enable() >= 0) && hpet_use_timer) {
- printk("Using HPET for base-timer\n");
- }
-
- cur_timer = select_timer();
- printk(KERN_INFO "Using %s for high-res timesource\n",cur_timer->name);
-
- time_init_hook();
-}
-#endif
-
-void __init time_init(void)
-{
-#ifdef CONFIG_HPET_TIMER
- if (is_hpet_capable()) {
- /*
- * HPET initialization needs to do memory-mapped io. So, let
- * us do a late initialization after mem_init().
- */
- late_time_init = hpet_time_init;
- return;
- }
-#endif
- xtime.tv_sec = get_cmos_time();
- xtime.tv_nsec = (INITIAL_JIFFIES % HZ) * (NSEC_PER_SEC / HZ);
- set_normalized_timespec(&wall_to_monotonic,
- -xtime.tv_sec, -xtime.tv_nsec);
-
- cur_timer = select_timer();
- printk(KERN_INFO "Using %s for high-res timesource\n",cur_timer->name);
-
- time_init_hook();
-}
diff --git a/arch/i386/kernel/time_hpet.c b/arch/i386/kernel/time_hpet.c
deleted file mode 100644
index 9caeaa315cd..00000000000
--- a/arch/i386/kernel/time_hpet.c
+++ /dev/null
@@ -1,466 +0,0 @@
-/*
- * linux/arch/i386/kernel/time_hpet.c
- * This code largely copied from arch/x86_64/kernel/time.c
- * See that file for credits.
- *
- * 2003-06-30 Venkatesh Pallipadi - Additional changes for HPET support
- */
-
-#include <linux/errno.h>
-#include <linux/kernel.h>
-#include <linux/param.h>
-#include <linux/string.h>
-#include <linux/init.h>
-#include <linux/smp.h>
-
-#include <asm/timer.h>
-#include <asm/fixmap.h>
-#include <asm/apic.h>
-
-#include <linux/timex.h>
-#include <linux/config.h>
-
-#include <asm/hpet.h>
-#include <linux/hpet.h>
-
-static unsigned long hpet_period; /* fsecs / HPET clock */
-unsigned long hpet_tick; /* hpet clks count per tick */
-unsigned long hpet_address; /* hpet memory map physical address */
-int hpet_use_timer;
-
-static int use_hpet; /* can be used for runtime check of hpet */
-static int boot_hpet_disable; /* boottime override for HPET timer */
-static void __iomem * hpet_virt_address; /* hpet kernel virtual address */
-
-#define FSEC_TO_USEC (1000000000UL)
-
-int hpet_readl(unsigned long a)
-{
- return readl(hpet_virt_address + a);
-}
-
-static void hpet_writel(unsigned long d, unsigned long a)
-{
- writel(d, hpet_virt_address + a);
-}
-
-#ifdef CONFIG_X86_LOCAL_APIC
-/*
- * HPET counters dont wrap around on every tick. They just change the
- * comparator value and continue. Next tick can be caught by checking
- * for a change in the comparator value. Used in apic.c.
- */
-static void __devinit wait_hpet_tick(void)
-{
- unsigned int start_cmp_val, end_cmp_val;
-
- start_cmp_val = hpet_readl(HPET_T0_CMP);
- do {
- end_cmp_val = hpet_readl(HPET_T0_CMP);
- } while (start_cmp_val == end_cmp_val);
-}
-#endif
-
-static int hpet_timer_stop_set_go(unsigned long tick)
-{
- unsigned int cfg;
-
- /*
- * Stop the timers and reset the main counter.
- */
- cfg = hpet_readl(HPET_CFG);
- cfg &= ~HPET_CFG_ENABLE;
- hpet_writel(cfg, HPET_CFG);
- hpet_writel(0, HPET_COUNTER);
- hpet_writel(0, HPET_COUNTER + 4);
-
- if (hpet_use_timer) {
- /*
- * Set up timer 0, as periodic with first interrupt to happen at
- * hpet_tick, and period also hpet_tick.
- */
- cfg = hpet_readl(HPET_T0_CFG);
- cfg |= HPET_TN_ENABLE | HPET_TN_PERIODIC |
- HPET_TN_SETVAL | HPET_TN_32BIT;
- hpet_writel(cfg, HPET_T0_CFG);
-
- /*
- * The first write after writing TN_SETVAL to the config register sets
- * the counter value, the second write sets the threshold.
- */
- hpet_writel(tick, HPET_T0_CMP);
- hpet_writel(tick, HPET_T0_CMP);
- }
- /*
- * Go!
- */
- cfg = hpet_readl(HPET_CFG);
- if (hpet_use_timer)
- cfg |= HPET_CFG_LEGACY;
- cfg |= HPET_CFG_ENABLE;
- hpet_writel(cfg, HPET_CFG);
-
- return 0;
-}
-
-/*
- * Check whether HPET was found by ACPI boot parse. If yes setup HPET
- * counter 0 for kernel base timer.
- */
-int __init hpet_enable(void)
-{
- unsigned int id;
- unsigned long tick_fsec_low, tick_fsec_high; /* tick in femto sec */
- unsigned long hpet_tick_rem;
-
- if (boot_hpet_disable)
- return -1;
-
- if (!hpet_address) {
- return -1;
- }
- hpet_virt_address = ioremap_nocache(hpet_address, HPET_MMAP_SIZE);
- /*
- * Read the period, compute tick and quotient.
- */
- id = hpet_readl(HPET_ID);
-
- /*
- * We are checking for value '1' or more in number field if
- * CONFIG_HPET_EMULATE_RTC is set because we will need an
- * additional timer for RTC emulation.
- * However, we can do with one timer otherwise using the
- * the single HPET timer for system time.
- */
-#ifdef CONFIG_HPET_EMULATE_RTC
- if (!(id & HPET_ID_NUMBER))
- return -1;
-#endif
-
-
- hpet_period = hpet_readl(HPET_PERIOD);
- if ((hpet_period < HPET_MIN_PERIOD) || (hpet_period > HPET_MAX_PERIOD))
- return -1;
-
- /*
- * 64 bit math
- * First changing tick into fsec
- * Then 64 bit div to find number of hpet clk per tick
- */
- ASM_MUL64_REG(tick_fsec_low, tick_fsec_high,
- KERNEL_TICK_USEC, FSEC_TO_USEC);
- ASM_DIV64_REG(hpet_tick, hpet_tick_rem,
- hpet_period, tick_fsec_low, tick_fsec_high);
-
- if (hpet_tick_rem > (hpet_period >> 1))
- hpet_tick++; /* rounding the result */
-
- hpet_use_timer = id & HPET_ID_LEGSUP;
-
- if (hpet_timer_stop_set_go(hpet_tick))
- return -1;
-
- use_hpet = 1;
-
-#ifdef CONFIG_HPET
- {
- struct hpet_data hd;
- unsigned int ntimer;
-
- memset(&hd, 0, sizeof (hd));
-
- ntimer = hpet_readl(HPET_ID);
- ntimer = (ntimer & HPET_ID_NUMBER) >> HPET_ID_NUMBER_SHIFT;
- ntimer++;
-
- /*
- * Register with driver.
- * Timer0 and Timer1 is used by platform.
- */
- hd.hd_phys_address = hpet_address;
- hd.hd_address = hpet_virt_address;
- hd.hd_nirqs = ntimer;
- hd.hd_flags = HPET_DATA_PLATFORM;
- hpet_reserve_timer(&hd, 0);
-#ifdef CONFIG_HPET_EMULATE_RTC
- hpet_reserve_timer(&hd, 1);
-#endif
- hd.hd_irq[0] = HPET_LEGACY_8254;
- hd.hd_irq[1] = HPET_LEGACY_RTC;
- if (ntimer > 2) {
- struct hpet __iomem *hpet;
- struct hpet_timer __iomem *timer;
- int i;
-
- hpet = hpet_virt_address;
-
- for (i = 2, timer = &hpet->hpet_timers[2]; i < ntimer;
- timer++, i++)
- hd.hd_irq[i] = (timer->hpet_config &
- Tn_INT_ROUTE_CNF_MASK) >>
- Tn_INT_ROUTE_CNF_SHIFT;
-
- }
-
- hpet_alloc(&hd);
- }
-#endif
-
-#ifdef CONFIG_X86_LOCAL_APIC
- if (hpet_use_timer)
- wait_timer_tick = wait_hpet_tick;
-#endif
- return 0;
-}
-
-int hpet_reenable(void)
-{
- return hpet_timer_stop_set_go(hpet_tick);
-}
-
-int is_hpet_enabled(void)
-{
- return use_hpet;
-}
-
-int is_hpet_capable(void)
-{
- if (!boot_hpet_disable && hpet_address)
- return 1;
- return 0;
-}
-
-static int __init hpet_setup(char* str)
-{
- if (str) {
- if (!strncmp("disable", str, 7))
- boot_hpet_disable = 1;
- }
- return 1;
-}
-
-__setup("hpet=", hpet_setup);
-
-#ifdef CONFIG_HPET_EMULATE_RTC
-/* HPET in LegacyReplacement Mode eats up RTC interrupt line. When, HPET
- * is enabled, we support RTC interrupt functionality in software.
- * RTC has 3 kinds of interrupts:
- * 1) Update Interrupt - generate an interrupt, every sec, when RTC clock
- * is updated
- * 2) Alarm Interrupt - generate an interrupt at a specific time of day
- * 3) Periodic Interrupt - generate periodic interrupt, with frequencies
- * 2Hz-8192Hz (2Hz-64Hz for non-root user) (all freqs in powers of 2)
- * (1) and (2) above are implemented using polling at a frequency of
- * 64 Hz. The exact frequency is a tradeoff between accuracy and interrupt
- * overhead. (DEFAULT_RTC_INT_FREQ)
- * For (3), we use interrupts at 64Hz or user specified periodic
- * frequency, whichever is higher.
- */
-#include <linux/mc146818rtc.h>
-#include <linux/rtc.h>
-
-extern irqreturn_t rtc_interrupt(int irq, void *dev_id, struct pt_regs *regs);
-
-#define DEFAULT_RTC_INT_FREQ 64
-#define RTC_NUM_INTS 1
-
-static unsigned long UIE_on;
-static unsigned long prev_update_sec;
-
-static unsigned long AIE_on;
-static struct rtc_time alarm_time;
-
-static unsigned long PIE_on;
-static unsigned long PIE_freq = DEFAULT_RTC_INT_FREQ;
-static unsigned long PIE_count;
-
-static unsigned long hpet_rtc_int_freq; /* RTC interrupt frequency */
-static unsigned int hpet_t1_cmp; /* cached comparator register */
-
-/*
- * Timer 1 for RTC, we do not use periodic interrupt feature,
- * even if HPET supports periodic interrupts on Timer 1.
- * The reason being, to set up a periodic interrupt in HPET, we need to
- * stop the main counter. And if we do that everytime someone diables/enables
- * RTC, we will have adverse effect on main kernel timer running on Timer 0.
- * So, for the time being, simulate the periodic interrupt in software.
- *
- * hpet_rtc_timer_init() is called for the first time and during subsequent
- * interuppts reinit happens through hpet_rtc_timer_reinit().
- */
-int hpet_rtc_timer_init(void)
-{
- unsigned int cfg, cnt;
- unsigned long flags;
-
- if (!is_hpet_enabled())
- return 0;
- /*
- * Set the counter 1 and enable the interrupts.
- */
- if (PIE_on && (PIE_freq > DEFAULT_RTC_INT_FREQ))
- hpet_rtc_int_freq = PIE_freq;
- else
- hpet_rtc_int_freq = DEFAULT_RTC_INT_FREQ;
-
- local_irq_save(flags);
- cnt = hpet_readl(HPET_COUNTER);
- cnt += ((hpet_tick*HZ)/hpet_rtc_int_freq);
- hpet_writel(cnt, HPET_T1_CMP);
- hpet_t1_cmp = cnt;
- local_irq_restore(flags);
-
- cfg = hpet_readl(HPET_T1_CFG);
- cfg &= ~HPET_TN_PERIODIC;
- cfg |= HPET_TN_ENABLE | HPET_TN_32BIT;
- hpet_writel(cfg, HPET_T1_CFG);
-
- return 1;
-}
-
-static void hpet_rtc_timer_reinit(void)
-{
- unsigned int cfg, cnt;
-
- if (unlikely(!(PIE_on | AIE_on | UIE_on))) {
- cfg = hpet_readl(HPET_T1_CFG);
- cfg &= ~HPET_TN_ENABLE;
- hpet_writel(cfg, HPET_T1_CFG);
- return;
- }
-
- if (PIE_on && (PIE_freq > DEFAULT_RTC_INT_FREQ))
- hpet_rtc_int_freq = PIE_freq;
- else
- hpet_rtc_int_freq = DEFAULT_RTC_INT_FREQ;
-
- /* It is more accurate to use the comparator value than current count.*/
- cnt = hpet_t1_cmp;
- cnt += hpet_tick*HZ/hpet_rtc_int_freq;
- hpet_writel(cnt, HPET_T1_CMP);
- hpet_t1_cmp = cnt;
-}
-
-/*
- * The functions below are called from rtc driver.
- * Return 0 if HPET is not being used.
- * Otherwise do the necessary changes and return 1.
- */
-int hpet_mask_rtc_irq_bit(unsigned long bit_mask)
-{
- if (!is_hpet_enabled())
- return 0;
-
- if (bit_mask & RTC_UIE)
- UIE_on = 0;
- if (bit_mask & RTC_PIE)
- PIE_on = 0;
- if (bit_mask & RTC_AIE)
- AIE_on = 0;
-
- return 1;
-}
-
-int hpet_set_rtc_irq_bit(unsigned long bit_mask)
-{
- int timer_init_reqd = 0;
-
- if (!is_hpet_enabled())
- return 0;
-
- if (!(PIE_on | AIE_on | UIE_on))
- timer_init_reqd = 1;
-
- if (bit_mask & RTC_UIE) {
- UIE_on = 1;
- }
- if (bit_mask & RTC_PIE) {
- PIE_on = 1;
- PIE_count = 0;
- }
- if (bit_mask & RTC_AIE) {
- AIE_on = 1;
- }
-
- if (timer_init_reqd)
- hpet_rtc_timer_init();
-
- return 1;
-}
-
-int hpet_set_alarm_time(unsigned char hrs, unsigned char min, unsigned char sec)
-{
- if (!is_hpet_enabled())
- return 0;
-
- alarm_time.tm_hour = hrs;
- alarm_time.tm_min = min;
- alarm_time.tm_sec = sec;
-
- return 1;
-}
-
-int hpet_set_periodic_freq(unsigned long freq)
-{
- if (!is_hpet_enabled())
- return 0;
-
- PIE_freq = freq;
- PIE_count = 0;
-
- return 1;
-}
-
-int hpet_rtc_dropped_irq(void)
-{
- if (!is_hpet_enabled())
- return 0;
-
- return 1;
-}
-
-irqreturn_t hpet_rtc_interrupt(int irq, void *dev_id, struct pt_regs *regs)
-{
- struct rtc_time curr_time;
- unsigned long rtc_int_flag = 0;
- int call_rtc_interrupt = 0;
-
- hpet_rtc_timer_reinit();
-
- if (UIE_on | AIE_on) {
- rtc_get_rtc_time(&curr_time);
- }
- if (UIE_on) {
- if (curr_time.tm_sec != prev_update_sec) {
- /* Set update int info, call real rtc int routine */
- call_rtc_interrupt = 1;
- rtc_int_flag = RTC_UF;
- prev_update_sec = curr_time.tm_sec;
- }
- }
- if (PIE_on) {
- PIE_count++;
- if (PIE_count >= hpet_rtc_int_freq/PIE_freq) {
- /* Set periodic int info, call real rtc int routine */
- call_rtc_interrupt = 1;
- rtc_int_flag |= RTC_PF;
- PIE_count = 0;
- }
- }
- if (AIE_on) {
- if ((curr_time.tm_sec == alarm_time.tm_sec) &&
- (curr_time.tm_min == alarm_time.tm_min) &&
- (curr_time.tm_hour == alarm_time.tm_hour)) {
- /* Set alarm int info, call real rtc int routine */
- call_rtc_interrupt = 1;
- rtc_int_flag |= RTC_AF;
- }
- }
- if (call_rtc_interrupt) {
- rtc_int_flag |= (RTC_IRQF | (RTC_NUM_INTS << 8));
- rtc_interrupt(rtc_int_flag, dev_id, regs);
- }
- return IRQ_HANDLED;
-}
-#endif
-
diff --git a/arch/i386/kernel/timers/Makefile b/arch/i386/kernel/timers/Makefile
deleted file mode 100644
index 8fa12be658d..00000000000
--- a/arch/i386/kernel/timers/Makefile
+++ /dev/null
@@ -1,9 +0,0 @@
-#
-# Makefile for x86 timers
-#
-
-obj-y := timer.o timer_none.o timer_tsc.o timer_pit.o common.o
-
-obj-$(CONFIG_X86_CYCLONE_TIMER) += timer_cyclone.o
-obj-$(CONFIG_HPET_TIMER) += timer_hpet.o
-obj-$(CONFIG_X86_PM_TIMER) += timer_pm.o
diff --git a/arch/i386/kernel/timers/common.c b/arch/i386/kernel/timers/common.c
deleted file mode 100644
index 8163fe0cf1f..00000000000
--- a/arch/i386/kernel/timers/common.c
+++ /dev/null
@@ -1,172 +0,0 @@
-/*
- * Common functions used across the timers go here
- */
-
-#include <linux/init.h>
-#include <linux/timex.h>
-#include <linux/errno.h>
-#include <linux/jiffies.h>
-#include <linux/module.h>
-
-#include <asm/io.h>
-#include <asm/timer.h>
-#include <asm/hpet.h>
-
-#include "mach_timer.h"
-
-/* ------ Calibrate the TSC -------
- * Return 2^32 * (1 / (TSC clocks per usec)) for do_fast_gettimeoffset().
- * Too much 64-bit arithmetic here to do this cleanly in C, and for
- * accuracy's sake we want to keep the overhead on the CTC speaker (channel 2)
- * output busy loop as low as possible. We avoid reading the CTC registers
- * directly because of the awkward 8-bit access mechanism of the 82C54
- * device.
- */
-
-#define CALIBRATE_TIME (5 * 1000020/HZ)
-
-unsigned long calibrate_tsc(void)
-{
- mach_prepare_counter();
-
- {
- unsigned long startlow, starthigh;
- unsigned long endlow, endhigh;
- unsigned long count;
-
- rdtsc(startlow,starthigh);
- mach_countup(&count);
- rdtsc(endlow,endhigh);
-
-
- /* Error: ECTCNEVERSET */
- if (count <= 1)
- goto bad_ctc;
-
- /* 64-bit subtract - gcc just messes up with long longs */
- __asm__("subl %2,%0\n\t"
- "sbbl %3,%1"
- :"=a" (endlow), "=d" (endhigh)
- :"g" (startlow), "g" (starthigh),
- "0" (endlow), "1" (endhigh));
-
- /* Error: ECPUTOOFAST */
- if (endhigh)
- goto bad_ctc;
-
- /* Error: ECPUTOOSLOW */
- if (endlow <= CALIBRATE_TIME)
- goto bad_ctc;
-
- __asm__("divl %2"
- :"=a" (endlow), "=d" (endhigh)
- :"r" (endlow), "0" (0), "1" (CALIBRATE_TIME));
-
- return endlow;
- }
-
- /*
- * The CTC wasn't reliable: we got a hit on the very first read,
- * or the CPU was so fast/slow that the quotient wouldn't fit in
- * 32 bits..
- */
-bad_ctc:
- return 0;
-}
-
-#ifdef CONFIG_HPET_TIMER
-/* ------ Calibrate the TSC using HPET -------
- * Return 2^32 * (1 / (TSC clocks per usec)) for getting the CPU freq.
- * Second output is parameter 1 (when non NULL)
- * Set 2^32 * (1 / (tsc per HPET clk)) for delay_hpet().
- * calibrate_tsc() calibrates the processor TSC by comparing
- * it to the HPET timer of known frequency.
- * Too much 64-bit arithmetic here to do this cleanly in C
- */
-#define CALIBRATE_CNT_HPET (5 * hpet_tick)
-#define CALIBRATE_TIME_HPET (5 * KERNEL_TICK_USEC)
-
-unsigned long __devinit calibrate_tsc_hpet(unsigned long *tsc_hpet_quotient_ptr)
-{
- unsigned long tsc_startlow, tsc_starthigh;
- unsigned long tsc_endlow, tsc_endhigh;
- unsigned long hpet_start, hpet_end;
- unsigned long result, remain;
-
- hpet_start = hpet_readl(HPET_COUNTER);
- rdtsc(tsc_startlow, tsc_starthigh);
- do {
- hpet_end = hpet_readl(HPET_COUNTER);
- } while ((hpet_end - hpet_start) < CALIBRATE_CNT_HPET);
- rdtsc(tsc_endlow, tsc_endhigh);
-
- /* 64-bit subtract - gcc just messes up with long longs */
- __asm__("subl %2,%0\n\t"
- "sbbl %3,%1"
- :"=a" (tsc_endlow), "=d" (tsc_endhigh)
- :"g" (tsc_startlow), "g" (tsc_starthigh),
- "0" (tsc_endlow), "1" (tsc_endhigh));
-
- /* Error: ECPUTOOFAST */
- if (tsc_endhigh)
- goto bad_calibration;
-
- /* Error: ECPUTOOSLOW */
- if (tsc_endlow <= CALIBRATE_TIME_HPET)
- goto bad_calibration;
-
- ASM_DIV64_REG(result, remain, tsc_endlow, 0, CALIBRATE_TIME_HPET);
- if (remain > (tsc_endlow >> 1))
- result++; /* rounding the result */
-
- if (tsc_hpet_quotient_ptr) {
- unsigned long tsc_hpet_quotient;
-
- ASM_DIV64_REG(tsc_hpet_quotient, remain, tsc_endlow, 0,
- CALIBRATE_CNT_HPET);
- if (remain > (tsc_endlow >> 1))
- tsc_hpet_quotient++; /* rounding the result */
- *tsc_hpet_quotient_ptr = tsc_hpet_quotient;
- }
-
- return result;
-bad_calibration:
- /*
- * the CPU was so fast/slow that the quotient wouldn't fit in
- * 32 bits..
- */
- return 0;
-}
-#endif
-
-
-unsigned long read_timer_tsc(void)
-{
- unsigned long retval;
- rdtscl(retval);
- return retval;
-}
-
-
-/* calculate cpu_khz */
-void init_cpu_khz(void)
-{
- if (cpu_has_tsc) {
- unsigned long tsc_quotient = calibrate_tsc();
- if (tsc_quotient) {
- /* report CPU clock rate in Hz.
- * The formula is (10^6 * 2^32) / (2^32 * 1 / (clocks/us)) =
- * clock/second. Our precision is about 100 ppm.
- */
- { unsigned long eax=0, edx=1000;
- __asm__("divl %2"
- :"=a" (cpu_khz), "=d" (edx)
- :"r" (tsc_quotient),
- "0" (eax), "1" (edx));
- printk("Detected %u.%03u MHz processor.\n",
- cpu_khz / 1000, cpu_khz % 1000);
- }
- }
- }
-}
-
diff --git a/arch/i386/kernel/timers/timer.c b/arch/i386/kernel/timers/timer.c
deleted file mode 100644
index 7e39ed8e33f..00000000000
--- a/arch/i386/kernel/timers/timer.c
+++ /dev/null
@@ -1,75 +0,0 @@
-#include <linux/init.h>
-#include <linux/kernel.h>
-#include <linux/string.h>
-#include <asm/timer.h>
-
-#ifdef CONFIG_HPET_TIMER
-/*
- * HPET memory read is slower than tsc reads, but is more dependable as it
- * always runs at constant frequency and reduces complexity due to
- * cpufreq. So, we prefer HPET timer to tsc based one. Also, we cannot use
- * timer_pit when HPET is active. So, we default to timer_tsc.
- */
-#endif
-/* list of timers, ordered by preference, NULL terminated */
-static struct init_timer_opts* __initdata timers[] = {
-#ifdef CONFIG_X86_CYCLONE_TIMER
- &timer_cyclone_init,
-#endif
-#ifdef CONFIG_HPET_TIMER
- &timer_hpet_init,
-#endif
-#ifdef CONFIG_X86_PM_TIMER
- &timer_pmtmr_init,
-#endif
- &timer_tsc_init,
- &timer_pit_init,
- NULL,
-};
-
-static char clock_override[10] __initdata;
-
-static int __init clock_setup(char* str)
-{
- if (str)
- strlcpy(clock_override, str, sizeof(clock_override));
- return 1;
-}
-__setup("clock=", clock_setup);
-
-
-/* The chosen timesource has been found to be bad.
- * Fall back to a known good timesource (the PIT)
- */
-void clock_fallback(void)
-{
- cur_timer = &timer_pit;
-}
-
-/* iterates through the list of timers, returning the first
- * one that initializes successfully.
- */
-struct timer_opts* __init select_timer(void)
-{
- int i = 0;
-
- /* find most preferred working timer */
- while (timers[i]) {
- if (timers[i]->init)
- if (timers[i]->init(clock_override) == 0)
- return timers[i]->opts;
- ++i;
- }
-
- panic("select_timer: Cannot find a suitable timer\n");
- return NULL;
-}
-
-int read_current_timer(unsigned long *timer_val)
-{
- if (cur_timer->read_timer) {
- *timer_val = cur_timer->read_timer();
- return 0;
- }
- return -1;
-}
diff --git a/arch/i386/kernel/timers/timer_cyclone.c b/arch/i386/kernel/timers/timer_cyclone.c
deleted file mode 100644
index 13892a65c94..00000000000
--- a/arch/i386/kernel/timers/timer_cyclone.c
+++ /dev/null
@@ -1,259 +0,0 @@
-/* Cyclone-timer:
- * This code implements timer_ops for the cyclone counter found
- * on IBM x440, x360, and other Summit based systems.
- *
- * Copyright (C) 2002 IBM, John Stultz (johnstul@us.ibm.com)
- */
-
-
-#include <linux/spinlock.h>
-#include <linux/init.h>
-#include <linux/timex.h>
-#include <linux/errno.h>
-#include <linux/string.h>
-#include <linux/jiffies.h>
-
-#include <asm/timer.h>
-#include <asm/io.h>
-#include <asm/pgtable.h>
-#include <asm/fixmap.h>
-#include <asm/i8253.h>
-
-#include "io_ports.h"
-
-/* Number of usecs that the last interrupt was delayed */
-static int delay_at_last_interrupt;
-
-#define CYCLONE_CBAR_ADDR 0xFEB00CD0
-#define CYCLONE_PMCC_OFFSET 0x51A0
-#define CYCLONE_MPMC_OFFSET 0x51D0
-#define CYCLONE_MPCS_OFFSET 0x51A8
-#define CYCLONE_TIMER_FREQ 100000000
-#define CYCLONE_TIMER_MASK (((u64)1<<40)-1) /* 40 bit mask */
-int use_cyclone = 0;
-
-static u32* volatile cyclone_timer; /* Cyclone MPMC0 register */
-static u32 last_cyclone_low;
-static u32 last_cyclone_high;
-static unsigned long long monotonic_base;
-static seqlock_t monotonic_lock = SEQLOCK_UNLOCKED;
-
-/* helper macro to atomically read both cyclone counter registers */
-#define read_cyclone_counter(low,high) \
- do{ \
- high = cyclone_timer[1]; low = cyclone_timer[0]; \
- } while (high != cyclone_timer[1]);
-
-
-static void mark_offset_cyclone(void)
-{
- unsigned long lost, delay;
- unsigned long delta = last_cyclone_low;
- int count;
- unsigned long long this_offset, last_offset;
-
- write_seqlock(&monotonic_lock);
- last_offset = ((unsigned long long)last_cyclone_high<<32)|last_cyclone_low;
-
- spin_lock(&i8253_lock);
- read_cyclone_counter(last_cyclone_low,last_cyclone_high);
-
- /* read values for delay_at_last_interrupt */
- outb_p(0x00, 0x43); /* latch the count ASAP */
-
- count = inb_p(0x40); /* read the latched count */
- count |= inb(0x40) << 8;
-
- /*
- * VIA686a test code... reset the latch if count > max + 1
- * from timer_pit.c - cjb
- */
- if (count > LATCH) {
- outb_p(0x34, PIT_MODE);
- outb_p(LATCH & 0xff, PIT_CH0);
- outb(LATCH >> 8, PIT_CH0);
- count = LATCH - 1;
- }
- spin_unlock(&i8253_lock);
-
- /* lost tick compensation */
- delta = last_cyclone_low - delta;
- delta /= (CYCLONE_TIMER_FREQ/1000000);
- delta += delay_at_last_interrupt;
- lost = delta/(1000000/HZ);
- delay = delta%(1000000/HZ);
- if (lost >= 2)
- jiffies_64 += lost-1;
-
- /* update the monotonic base value */
- this_offset = ((unsigned long long)last_cyclone_high<<32)|last_cyclone_low;
- monotonic_base += (this_offset - last_offset) & CYCLONE_TIMER_MASK;
- write_sequnlock(&monotonic_lock);
-
- /* calculate delay_at_last_interrupt */
- count = ((LATCH-1) - count) * TICK_SIZE;
- delay_at_last_interrupt = (count + LATCH/2) / LATCH;
-
-
- /* catch corner case where tick rollover occured
- * between cyclone and pit reads (as noted when
- * usec delta is > 90% # of usecs/tick)
- */
- if (lost && abs(delay - delay_at_last_interrupt) > (900000/HZ))
- jiffies_64++;
-}
-
-static unsigned long get_offset_cyclone(void)
-{
- u32 offset;
-
- if(!cyclone_timer)
- return delay_at_last_interrupt;
-
- /* Read the cyclone timer */
- offset = cyclone_timer[0];
-
- /* .. relative to previous jiffy */
- offset = offset - last_cyclone_low;
-
- /* convert cyclone ticks to microseconds */
- /* XXX slow, can we speed this up? */
- offset = offset/(CYCLONE_TIMER_FREQ/1000000);
-
- /* our adjusted time offset in microseconds */
- return delay_at_last_interrupt + offset;
-}
-
-static unsigned long long monotonic_clock_cyclone(void)
-{
- u32 now_low, now_high;
- unsigned long long last_offset, this_offset, base;
- unsigned long long ret;
- unsigned seq;
-
- /* atomically read monotonic base & last_offset */
- do {
- seq = read_seqbegin(&monotonic_lock);
- last_offset = ((unsigned long long)last_cyclone_high<<32)|last_cyclone_low;
- base = monotonic_base;
- } while (read_seqretry(&monotonic_lock, seq));
-
-
- /* Read the cyclone counter */
- read_cyclone_counter(now_low,now_high);
- this_offset = ((unsigned long long)now_high<<32)|now_low;
-
- /* convert to nanoseconds */
- ret = base + ((this_offset - last_offset)&CYCLONE_TIMER_MASK);
- return ret * (1000000000 / CYCLONE_TIMER_FREQ);
-}
-
-static int __init init_cyclone(char* override)
-{
- u32* reg;
- u32 base; /* saved cyclone base address */
- u32 pageaddr; /* page that contains cyclone_timer register */
- u32 offset; /* offset from pageaddr to cyclone_timer register */
- int i;
-
- /* check clock override */
- if (override[0] && strncmp(override,"cyclone",7))
- return -ENODEV;
-
- /*make sure we're on a summit box*/
- if(!use_cyclone) return -ENODEV;
-
- printk(KERN_INFO "Summit chipset: Starting Cyclone Counter.\n");
-
- /* find base address */
- pageaddr = (CYCLONE_CBAR_ADDR)&PAGE_MASK;
- offset = (CYCLONE_CBAR_ADDR)&(~PAGE_MASK);
- set_fixmap_nocache(FIX_CYCLONE_TIMER, pageaddr);
- reg = (u32*)(fix_to_virt(FIX_CYCLONE_TIMER) + offset);
- if(!reg){
- printk(KERN_ERR "Summit chipset: Could not find valid CBAR register.\n");
- return -ENODEV;
- }
- base = *reg;
- if(!base){
- printk(KERN_ERR "Summit chipset: Could not find valid CBAR value.\n");
- return -ENODEV;
- }
-
- /* setup PMCC */
- pageaddr = (base + CYCLONE_PMCC_OFFSET)&PAGE_MASK;
- offset = (base + CYCLONE_PMCC_OFFSET)&(~PAGE_MASK);
- set_fixmap_nocache(FIX_CYCLONE_TIMER, pageaddr);
- reg = (u32*)(fix_to_virt(FIX_CYCLONE_TIMER) + offset);
- if(!reg){
- printk(KERN_ERR "Summit chipset: Could not find valid PMCC register.\n");
- return -ENODEV;
- }
- reg[0] = 0x00000001;
-
- /* setup MPCS */
- pageaddr = (base + CYCLONE_MPCS_OFFSET)&PAGE_MASK;
- offset = (base + CYCLONE_MPCS_OFFSET)&(~PAGE_MASK);
- set_fixmap_nocache(FIX_CYCLONE_TIMER, pageaddr);
- reg = (u32*)(fix_to_virt(FIX_CYCLONE_TIMER) + offset);
- if(!reg){
- printk(KERN_ERR "Summit chipset: Could not find valid MPCS register.\n");
- return -ENODEV;
- }
- reg[0] = 0x00000001;
-
- /* map in cyclone_timer */
- pageaddr = (base + CYCLONE_MPMC_OFFSET)&PAGE_MASK;
- offset = (base + CYCLONE_MPMC_OFFSET)&(~PAGE_MASK);
- set_fixmap_nocache(FIX_CYCLONE_TIMER, pageaddr);
- cyclone_timer = (u32*)(fix_to_virt(FIX_CYCLONE_TIMER) + offset);
- if(!cyclone_timer){
- printk(KERN_ERR "Summit chipset: Could not find valid MPMC register.\n");
- return -ENODEV;
- }
-
- /*quick test to make sure its ticking*/
- for(i=0; i<3; i++){
- u32 old = cyclone_timer[0];
- int stall = 100;
- while(stall--) barrier();
- if(cyclone_timer[0] == old){
- printk(KERN_ERR "Summit chipset: Counter not counting! DISABLED\n");
- cyclone_timer = 0;
- return -ENODEV;
- }
- }
-
- init_cpu_khz();
-
- /* Everything looks good! */
- return 0;
-}
-
-
-static void delay_cyclone(unsigned long loops)
-{
- unsigned long bclock, now;
- if(!cyclone_timer)
- return;
- bclock = cyclone_timer[0];
- do {
- rep_nop();
- now = cyclone_timer[0];
- } while ((now-bclock) < loops);
-}
-/************************************************************/
-
-/* cyclone timer_opts struct */
-static struct timer_opts timer_cyclone = {
- .name = "cyclone",
- .mark_offset = mark_offset_cyclone,
- .get_offset = get_offset_cyclone,
- .monotonic_clock = monotonic_clock_cyclone,
- .delay = delay_cyclone,
-};
-
-struct init_timer_opts __initdata timer_cyclone_init = {
- .init = init_cyclone,
- .opts = &timer_cyclone,
-};
diff --git a/arch/i386/kernel/timers/timer_hpet.c b/arch/i386/kernel/timers/timer_hpet.c
deleted file mode 100644
index be242723c33..00000000000
--- a/arch/i386/kernel/timers/timer_hpet.c
+++ /dev/null
@@ -1,217 +0,0 @@
-/*
- * This code largely moved from arch/i386/kernel/time.c.
- * See comments there for proper credits.
- */
-
-#include <linux/spinlock.h>
-#include <linux/init.h>
-#include <linux/timex.h>
-#include <linux/errno.h>
-#include <linux/string.h>
-#include <linux/jiffies.h>
-
-#include <asm/timer.h>
-#include <asm/io.h>
-#include <asm/processor.h>
-
-#include "io_ports.h"
-#include "mach_timer.h"
-#include <asm/hpet.h>
-
-static unsigned long hpet_usec_quotient __read_mostly; /* convert hpet clks to usec */
-static unsigned long tsc_hpet_quotient __read_mostly; /* convert tsc to hpet clks */
-static unsigned long hpet_last; /* hpet counter value at last tick*/
-static unsigned long last_tsc_low; /* lsb 32 bits of Time Stamp Counter */
-static unsigned long last_tsc_high; /* msb 32 bits of Time Stamp Counter */
-static unsigned long long monotonic_base;
-static seqlock_t monotonic_lock = SEQLOCK_UNLOCKED;
-
-/* convert from cycles(64bits) => nanoseconds (64bits)
- * basic equation:
- * ns = cycles / (freq / ns_per_sec)
- * ns = cycles * (ns_per_sec / freq)
- * ns = cycles * (10^9 / (cpu_khz * 10^3))
- * ns = cycles * (10^6 / cpu_khz)
- *
- * Then we use scaling math (suggested by george@mvista.com) to get:
- * ns = cycles * (10^6 * SC / cpu_khz) / SC
- * ns = cycles * cyc2ns_scale / SC
- *
- * And since SC is a constant power of two, we can convert the div
- * into a shift.
- *
- * We can use khz divisor instead of mhz to keep a better percision, since
- * cyc2ns_scale is limited to 10^6 * 2^10, which fits in 32 bits.
- * (mathieu.desnoyers@polymtl.ca)
- *
- * -johnstul@us.ibm.com "math is hard, lets go shopping!"
- */
-static unsigned long cyc2ns_scale;
-#define CYC2NS_SCALE_FACTOR 10 /* 2^10, carefully chosen */
-
-static inline void set_cyc2ns_scale(unsigned long cpu_khz)
-{
- cyc2ns_scale = (1000000 << CYC2NS_SCALE_FACTOR)/cpu_khz;
-}
-
-static inline unsigned long long cycles_2_ns(unsigned long long cyc)
-{
- return (cyc * cyc2ns_scale) >> CYC2NS_SCALE_FACTOR;
-}
-
-static unsigned long long monotonic_clock_hpet(void)
-{
- unsigned long long last_offset, this_offset, base;
- unsigned seq;
-
- /* atomically read monotonic base & last_offset */
- do {
- seq = read_seqbegin(&monotonic_lock);
- last_offset = ((unsigned long long)last_tsc_high<<32)|last_tsc_low;
- base = monotonic_base;
- } while (read_seqretry(&monotonic_lock, seq));
-
- /* Read the Time Stamp Counter */
- rdtscll(this_offset);
-
- /* return the value in ns */
- return base + cycles_2_ns(this_offset - last_offset);
-}
-
-static unsigned long get_offset_hpet(void)
-{
- register unsigned long eax, edx;
-
- eax = hpet_readl(HPET_COUNTER);
- eax -= hpet_last; /* hpet delta */
- eax = min(hpet_tick, eax);
- /*
- * Time offset = (hpet delta) * ( usecs per HPET clock )
- * = (hpet delta) * ( usecs per tick / HPET clocks per tick)
- * = (hpet delta) * ( hpet_usec_quotient ) / (2^32)
- *
- * Where,
- * hpet_usec_quotient = (2^32 * usecs per tick)/HPET clocks per tick
- *
- * Using a mull instead of a divl saves some cycles in critical path.
- */
- ASM_MUL64_REG(eax, edx, hpet_usec_quotient, eax);
-
- /* our adjusted time offset in microseconds */
- return edx;
-}
-
-static void mark_offset_hpet(void)
-{
- unsigned long long this_offset, last_offset;
- unsigned long offset;
-
- write_seqlock(&monotonic_lock);
- last_offset = ((unsigned long long)last_tsc_high<<32)|last_tsc_low;
- rdtsc(last_tsc_low, last_tsc_high);
-
- if (hpet_use_timer)
- offset = hpet_readl(HPET_T0_CMP) - hpet_tick;
- else
- offset = hpet_readl(HPET_COUNTER);
- if (unlikely(((offset - hpet_last) >= (2*hpet_tick)) && (hpet_last != 0))) {
- int lost_ticks = ((offset - hpet_last) / hpet_tick) - 1;
- jiffies_64 += lost_ticks;
- }
- hpet_last = offset;
-
- /* update the monotonic base value */
- this_offset = ((unsigned long long)last_tsc_high<<32)|last_tsc_low;
- monotonic_base += cycles_2_ns(this_offset - last_offset);
- write_sequnlock(&monotonic_lock);
-}
-
-static void delay_hpet(unsigned long loops)
-{
- unsigned long hpet_start, hpet_end;
- unsigned long eax;
-
- /* loops is the number of cpu cycles. Convert it to hpet clocks */
- ASM_MUL64_REG(eax, loops, tsc_hpet_quotient, loops);
-
- hpet_start = hpet_readl(HPET_COUNTER);
- do {
- rep_nop();
- hpet_end = hpet_readl(HPET_COUNTER);
- } while ((hpet_end - hpet_start) < (loops));
-}
-
-static struct timer_opts timer_hpet;
-
-static int __init init_hpet(char* override)
-{
- unsigned long result, remain;
-
- /* check clock override */
- if (override[0] && strncmp(override,"hpet",4))
- return -ENODEV;
-
- if (!is_hpet_enabled())
- return -ENODEV;
-
- printk("Using HPET for gettimeofday\n");
- if (cpu_has_tsc) {
- unsigned long tsc_quotient = calibrate_tsc_hpet(&tsc_hpet_quotient);
- if (tsc_quotient) {
- /* report CPU clock rate in Hz.
- * The formula is (10^6 * 2^32) / (2^32 * 1 / (clocks/us)) =
- * clock/second. Our precision is about 100 ppm.
- */
- { unsigned long eax=0, edx=1000;
- ASM_DIV64_REG(cpu_khz, edx, tsc_quotient,
- eax, edx);
- printk("Detected %u.%03u MHz processor.\n",
- cpu_khz / 1000, cpu_khz % 1000);
- }
- set_cyc2ns_scale(cpu_khz);
- }
- /* set this only when cpu_has_tsc */
- timer_hpet.read_timer = read_timer_tsc;
- }
-
- /*
- * Math to calculate hpet to usec multiplier
- * Look for the comments at get_offset_hpet()
- */
- ASM_DIV64_REG(result, remain, hpet_tick, 0, KERNEL_TICK_USEC);
- if (remain > (hpet_tick >> 1))
- result++; /* rounding the result */
- hpet_usec_quotient = result;
-
- return 0;
-}
-
-static int hpet_resume(void)
-{
- write_seqlock(&monotonic_lock);
- /* Assume this is the last mark offset time */
- rdtsc(last_tsc_low, last_tsc_high);
-
- if (hpet_use_timer)
- hpet_last = hpet_readl(HPET_T0_CMP) - hpet_tick;
- else
- hpet_last = hpet_readl(HPET_COUNTER);
- write_sequnlock(&monotonic_lock);
- return 0;
-}
-/************************************************************/
-
-/* tsc timer_opts struct */
-static struct timer_opts timer_hpet __read_mostly = {
- .name = "hpet",
- .mark_offset = mark_offset_hpet,
- .get_offset = get_offset_hpet,
- .monotonic_clock = monotonic_clock_hpet,
- .delay = delay_hpet,
- .resume = hpet_resume,
-};
-
-struct init_timer_opts __initdata timer_hpet_init = {
- .init = init_hpet,
- .opts = &timer_hpet,
-};
diff --git a/arch/i386/kernel/timers/timer_none.c b/arch/i386/kernel/timers/timer_none.c
deleted file mode 100644
index 4ea2f414dbb..00000000000
--- a/arch/i386/kernel/timers/timer_none.c
+++ /dev/null
@@ -1,39 +0,0 @@
-#include <linux/init.h>
-#include <asm/timer.h>
-
-static void mark_offset_none(void)
-{
- /* nothing needed */
-}
-
-static unsigned long get_offset_none(void)
-{
- return 0;
-}
-
-static unsigned long long monotonic_clock_none(void)
-{
- return 0;
-}
-
-static void delay_none(unsigned long loops)
-{
- int d0;
- __asm__ __volatile__(
- "\tjmp 1f\n"
- ".align 16\n"
- "1:\tjmp 2f\n"
- ".align 16\n"
- "2:\tdecl %0\n\tjns 2b"
- :"=&a" (d0)
- :"0" (loops));
-}
-
-/* none timer_opts struct */
-struct timer_opts timer_none = {
- .name = "none",
- .mark_offset = mark_offset_none,
- .get_offset = get_offset_none,
- .monotonic_clock = monotonic_clock_none,
- .delay = delay_none,
-};
diff --git a/arch/i386/kernel/timers/timer_pit.c b/arch/i386/kernel/timers/timer_pit.c
deleted file mode 100644
index e42e46d3515..00000000000
--- a/arch/i386/kernel/timers/timer_pit.c
+++ /dev/null
@@ -1,176 +0,0 @@
-/*
- * This code largely moved from arch/i386/kernel/time.c.
- * See comments there for proper credits.
- */
-
-#include <linux/spinlock.h>
-#include <linux/module.h>
-#include <linux/device.h>
-#include <linux/sysdev.h>
-#include <linux/timex.h>
-#include <asm/delay.h>
-#include <asm/mpspec.h>
-#include <asm/timer.h>
-#include <asm/smp.h>
-#include <asm/io.h>
-#include <asm/arch_hooks.h>
-#include <asm/i8253.h>
-
-#include "do_timer.h"
-#include "io_ports.h"
-
-static int count_p; /* counter in get_offset_pit() */
-
-static int __init init_pit(char* override)
-{
- /* check clock override */
- if (override[0] && strncmp(override,"pit",3))
- printk(KERN_ERR "Warning: clock= override failed. Defaulting to PIT\n");
-
- count_p = LATCH;
- return 0;
-}
-
-static void mark_offset_pit(void)
-{
- /* nothing needed */
-}
-
-static unsigned long long monotonic_clock_pit(void)
-{
- return 0;
-}
-
-static void delay_pit(unsigned long loops)
-{
- int d0;
- __asm__ __volatile__(
- "\tjmp 1f\n"
- ".align 16\n"
- "1:\tjmp 2f\n"
- ".align 16\n"
- "2:\tdecl %0\n\tjns 2b"
- :"=&a" (d0)
- :"0" (loops));
-}
-
-
-/* This function must be called with xtime_lock held.
- * It was inspired by Steve McCanne's microtime-i386 for BSD. -- jrs
- *
- * However, the pc-audio speaker driver changes the divisor so that
- * it gets interrupted rather more often - it loads 64 into the
- * counter rather than 11932! This has an adverse impact on
- * do_gettimeoffset() -- it stops working! What is also not
- * good is that the interval that our timer function gets called
- * is no longer 10.0002 ms, but 9.9767 ms. To get around this
- * would require using a different timing source. Maybe someone
- * could use the RTC - I know that this can interrupt at frequencies
- * ranging from 8192Hz to 2Hz. If I had the energy, I'd somehow fix
- * it so that at startup, the timer code in sched.c would select
- * using either the RTC or the 8253 timer. The decision would be
- * based on whether there was any other device around that needed
- * to trample on the 8253. I'd set up the RTC to interrupt at 1024 Hz,
- * and then do some jiggery to have a version of do_timer that
- * advanced the clock by 1/1024 s. Every time that reached over 1/100
- * of a second, then do all the old code. If the time was kept correct
- * then do_gettimeoffset could just return 0 - there is no low order
- * divider that can be accessed.
- *
- * Ideally, you would be able to use the RTC for the speaker driver,
- * but it appears that the speaker driver really needs interrupt more
- * often than every 120 us or so.
- *
- * Anyway, this needs more thought.... pjsg (1993-08-28)
- *
- * If you are really that interested, you should be reading
- * comp.protocols.time.ntp!
- */
-
-static unsigned long get_offset_pit(void)
-{
- int count;
- unsigned long flags;
- static unsigned long jiffies_p = 0;
-
- /*
- * cache volatile jiffies temporarily; we have xtime_lock.
- */
- unsigned long jiffies_t;
-
- spin_lock_irqsave(&i8253_lock, flags);
- /* timer count may underflow right here */
- outb_p(0x00, PIT_MODE); /* latch the count ASAP */
-
- count = inb_p(PIT_CH0); /* read the latched count */
-
- /*
- * We do this guaranteed double memory access instead of a _p
- * postfix in the previous port access. Wheee, hackady hack
- */
- jiffies_t = jiffies;
-
- count |= inb_p(PIT_CH0) << 8;
-
- /* VIA686a test code... reset the latch if count > max + 1 */
- if (count > LATCH) {
- outb_p(0x34, PIT_MODE);
- outb_p(LATCH & 0xff, PIT_CH0);
- outb(LATCH >> 8, PIT_CH0);
- count = LATCH - 1;
- }
-
- /*
- * avoiding timer inconsistencies (they are rare, but they happen)...
- * there are two kinds of problems that must be avoided here:
- * 1. the timer counter underflows
- * 2. hardware problem with the timer, not giving us continuous time,
- * the counter does small "jumps" upwards on some Pentium systems,
- * (see c't 95/10 page 335 for Neptun bug.)
- */
-
- if( jiffies_t == jiffies_p ) {
- if( count > count_p ) {
- /* the nutcase */
- count = do_timer_overflow(count);
- }
- } else
- jiffies_p = jiffies_t;
-
- count_p = count;
-
- spin_unlock_irqrestore(&i8253_lock, flags);
-
- count = ((LATCH-1) - count) * TICK_SIZE;
- count = (count + LATCH/2) / LATCH;
-
- return count;
-}
-
-
-/* tsc timer_opts struct */
-struct timer_opts timer_pit = {
- .name = "pit",
- .mark_offset = mark_offset_pit,
- .get_offset = get_offset_pit,
- .monotonic_clock = monotonic_clock_pit,
- .delay = delay_pit,
-};
-
-struct init_timer_opts __initdata timer_pit_init = {
- .init = init_pit,
- .opts = &timer_pit,
-};
-
-void setup_pit_timer(void)
-{
- unsigned long flags;
-
- spin_lock_irqsave(&i8253_lock, flags);
- outb_p(0x34,PIT_MODE); /* binary, mode 2, LSB/MSB, ch 0 */
- udelay(10);
- outb_p(LATCH & 0xff , PIT_CH0); /* LSB */
- udelay(10);
- outb(LATCH >> 8 , PIT_CH0); /* MSB */
- spin_unlock_irqrestore(&i8253_lock, flags);
-}
diff --git a/arch/i386/kernel/timers/timer_pm.c b/arch/i386/kernel/timers/timer_pm.c
deleted file mode 100644
index 264edaaac31..00000000000
--- a/arch/i386/kernel/timers/timer_pm.c
+++ /dev/null
@@ -1,268 +0,0 @@
-/*
- * (C) Dominik Brodowski <linux@brodo.de> 2003
- *
- * Driver to use the Power Management Timer (PMTMR) available in some
- * southbridges as primary timing source for the Linux kernel.
- *
- * Based on parts of linux/drivers/acpi/hardware/hwtimer.c, timer_pit.c,
- * timer_hpet.c, and on Arjan van de Ven's implementation for 2.4.
- *
- * This file is licensed under the GPL v2.
- */
-
-
-#include <linux/kernel.h>
-#include <linux/module.h>
-#include <linux/device.h>
-#include <linux/init.h>
-#include <asm/types.h>
-#include <asm/timer.h>
-#include <asm/smp.h>
-#include <asm/io.h>
-#include <asm/arch_hooks.h>
-
-#include <linux/timex.h>
-#include "mach_timer.h"
-
-/* Number of PMTMR ticks expected during calibration run */
-#define PMTMR_TICKS_PER_SEC 3579545
-#define PMTMR_EXPECTED_RATE \
- ((CALIBRATE_LATCH * (PMTMR_TICKS_PER_SEC >> 10)) / (CLOCK_TICK_RATE>>10))
-
-
-/* The I/O port the PMTMR resides at.
- * The location is detected during setup_arch(),
- * in arch/i386/acpi/boot.c */
-u32 pmtmr_ioport = 0;
-
-
-/* value of the Power timer at last timer interrupt */
-static u32 offset_tick;
-static u32 offset_delay;
-
-static unsigned long long monotonic_base;
-static seqlock_t monotonic_lock = SEQLOCK_UNLOCKED;
-
-#define ACPI_PM_MASK 0xFFFFFF /* limit it to 24 bits */
-
-/*helper function to safely read acpi pm timesource*/
-static inline u32 read_pmtmr(void)
-{
- u32 v1=0,v2=0,v3=0;
- /* It has been reported that because of various broken
- * chipsets (ICH4, PIIX4 and PIIX4E) where the ACPI PM time
- * source is not latched, so you must read it multiple
- * times to insure a safe value is read.
- */
- do {
- v1 = inl(pmtmr_ioport);
- v2 = inl(pmtmr_ioport);
- v3 = inl(pmtmr_ioport);
- } while ((v1 > v2 && v1 < v3) || (v2 > v3 && v2 < v1)
- || (v3 > v1 && v3 < v2));
-
- /* mask the output to 24 bits */
- return v2 & ACPI_PM_MASK;
-}
-
-
-/*
- * Some boards have the PMTMR running way too fast. We check
- * the PMTMR rate against PIT channel 2 to catch these cases.
- */
-static int verify_pmtmr_rate(void)
-{
- u32 value1, value2;
- unsigned long count, delta;
-
- mach_prepare_counter();
- value1 = read_pmtmr();
- mach_countup(&count);
- value2 = read_pmtmr();
- delta = (value2 - value1) & ACPI_PM_MASK;
-
- /* Check that the PMTMR delta is within 5% of what we expect */
- if (delta < (PMTMR_EXPECTED_RATE * 19) / 20 ||
- delta > (PMTMR_EXPECTED_RATE * 21) / 20) {
- printk(KERN_INFO "PM-Timer running at invalid rate: %lu%% of normal - aborting.\n", 100UL * delta / PMTMR_EXPECTED_RATE);
- return -1;
- }
-
- return 0;
-}
-
-
-static int init_pmtmr(char* override)
-{
- u32 value1, value2;
- unsigned int i;
-
- if (override[0] && strncmp(override,"pmtmr",5))
- return -ENODEV;
-
- if (!pmtmr_ioport)
- return -ENODEV;
-
- /* we use the TSC for delay_pmtmr, so make sure it exists */
- if (!cpu_has_tsc)
- return -ENODEV;
-
- /* "verify" this timing source */
- value1 = read_pmtmr();
- for (i = 0; i < 10000; i++) {
- value2 = read_pmtmr();
- if (value2 == value1)
- continue;
- if (value2 > value1)
- goto pm_good;
- if ((value2 < value1) && ((value2) < 0xFFF))
- goto pm_good;
- printk(KERN_INFO "PM-Timer had inconsistent results: 0x%#x, 0x%#x - aborting.\n", value1, value2);
- return -EINVAL;
- }
- printk(KERN_INFO "PM-Timer had no reasonable result: 0x%#x - aborting.\n", value1);
- return -ENODEV;
-
-pm_good:
- if (verify_pmtmr_rate() != 0)
- return -ENODEV;
-
- init_cpu_khz();
- return 0;
-}
-
-static inline u32 cyc2us(u32 cycles)
-{
- /* The Power Management Timer ticks at 3.579545 ticks per microsecond.
- * 1 / PM_TIMER_FREQUENCY == 0.27936511 =~ 286/1024 [error: 0.024%]
- *
- * Even with HZ = 100, delta is at maximum 35796 ticks, so it can
- * easily be multiplied with 286 (=0x11E) without having to fear
- * u32 overflows.
- */
- cycles *= 286;
- return (cycles >> 10);
-}
-
-/*
- * this gets called during each timer interrupt
- * - Called while holding the writer xtime_lock
- */
-static void mark_offset_pmtmr(void)
-{
- u32 lost, delta, last_offset;
- static int first_run = 1;
- last_offset = offset_tick;
-
- write_seqlock(&monotonic_lock);
-
- offset_tick = read_pmtmr();
-
- /* calculate tick interval */
- delta = (offset_tick - last_offset) & ACPI_PM_MASK;
-
- /* convert to usecs */
- delta = cyc2us(delta);
-
- /* update the monotonic base value */
- monotonic_base += delta * NSEC_PER_USEC;
- write_sequnlock(&monotonic_lock);
-
- /* convert to ticks */
- delta += offset_delay;
- lost = delta / (USEC_PER_SEC / HZ);
- offset_delay = delta % (USEC_PER_SEC / HZ);
-
-
- /* compensate for lost ticks */
- if (lost >= 2)
- jiffies_64 += lost - 1;
-
- /* don't calculate delay for first run,
- or if we've got less then a tick */
- if (first_run || (lost < 1)) {
- first_run = 0;
- offset_delay = 0;
- }
-}
-
-static int pmtmr_resume(void)
-{
- write_seqlock(&monotonic_lock);
- /* Assume this is the last mark offset time */
- offset_tick = read_pmtmr();
- write_sequnlock(&monotonic_lock);
- return 0;
-}
-
-static unsigned long long monotonic_clock_pmtmr(void)
-{
- u32 last_offset, this_offset;
- unsigned long long base, ret;
- unsigned seq;
-
-
- /* atomically read monotonic base & last_offset */
- do {
- seq = read_seqbegin(&monotonic_lock);
- last_offset = offset_tick;
- base = monotonic_base;
- } while (read_seqretry(&monotonic_lock, seq));
-
- /* Read the pmtmr */
- this_offset = read_pmtmr();
-
- /* convert to nanoseconds */
- ret = (this_offset - last_offset) & ACPI_PM_MASK;
- ret = base + (cyc2us(ret) * NSEC_PER_USEC);
- return ret;
-}
-
-static void delay_pmtmr(unsigned long loops)
-{
- unsigned long bclock, now;
-
- rdtscl(bclock);
- do
- {
- rep_nop();
- rdtscl(now);
- } while ((now-bclock) < loops);
-}
-
-
-/*
- * get the offset (in microseconds) from the last call to mark_offset()
- * - Called holding a reader xtime_lock
- */
-static unsigned long get_offset_pmtmr(void)
-{
- u32 now, offset, delta = 0;
-
- offset = offset_tick;
- now = read_pmtmr();
- delta = (now - offset)&ACPI_PM_MASK;
-
- return (unsigned long) offset_delay + cyc2us(delta);
-}
-
-
-/* acpi timer_opts struct */
-static struct timer_opts timer_pmtmr = {
- .name = "pmtmr",
- .mark_offset = mark_offset_pmtmr,
- .get_offset = get_offset_pmtmr,
- .monotonic_clock = monotonic_clock_pmtmr,
- .delay = delay_pmtmr,
- .read_timer = read_timer_tsc,
- .resume = pmtmr_resume,
-};
-
-struct init_timer_opts __initdata timer_pmtmr_init = {
- .init = init_pmtmr,
- .opts = &timer_pmtmr,
-};
-
-MODULE_LICENSE("GPL");
-MODULE_AUTHOR("Dominik Brodowski <linux@brodo.de>");
-MODULE_DESCRIPTION("Power Management Timer (PMTMR) as primary timing source for x86");
diff --git a/arch/i386/kernel/timers/timer_tsc.c b/arch/i386/kernel/timers/timer_tsc.c
deleted file mode 100644
index d395e3b4248..00000000000
--- a/arch/i386/kernel/timers/timer_tsc.c
+++ /dev/null
@@ -1,600 +0,0 @@
-/*
- * This code largely moved from arch/i386/kernel/time.c.
- * See comments there for proper credits.
- *
- * 2004-06-25 Jesper Juhl
- * moved mark_offset_tsc below cpufreq_delayed_get to avoid gcc 3.4
- * failing to inline.
- */
-
-#include <linux/spinlock.h>
-#include <linux/init.h>
-#include <linux/timex.h>
-#include <linux/errno.h>
-#include <linux/cpufreq.h>
-#include <linux/string.h>
-#include <linux/jiffies.h>
-
-#include <asm/timer.h>
-#include <asm/io.h>
-/* processor.h for distable_tsc flag */
-#include <asm/processor.h>
-
-#include "io_ports.h"
-#include "mach_timer.h"
-
-#include <asm/hpet.h>
-#include <asm/i8253.h>
-
-#ifdef CONFIG_HPET_TIMER
-static unsigned long hpet_usec_quotient;
-static unsigned long hpet_last;
-static struct timer_opts timer_tsc;
-#endif
-
-static inline void cpufreq_delayed_get(void);
-
-int tsc_disable __devinitdata = 0;
-
-static int use_tsc;
-/* Number of usecs that the last interrupt was delayed */
-static int delay_at_last_interrupt;
-
-static unsigned long last_tsc_low; /* lsb 32 bits of Time Stamp Counter */
-static unsigned long last_tsc_high; /* msb 32 bits of Time Stamp Counter */
-static unsigned long long monotonic_base;
-static seqlock_t monotonic_lock = SEQLOCK_UNLOCKED;
-
-/* convert from cycles(64bits) => nanoseconds (64bits)
- * basic equation:
- * ns = cycles / (freq / ns_per_sec)
- * ns = cycles * (ns_per_sec / freq)
- * ns = cycles * (10^9 / (cpu_khz * 10^3))
- * ns = cycles * (10^6 / cpu_khz)
- *
- * Then we use scaling math (suggested by george@mvista.com) to get:
- * ns = cycles * (10^6 * SC / cpu_khz) / SC
- * ns = cycles * cyc2ns_scale / SC
- *
- * And since SC is a constant power of two, we can convert the div
- * into a shift.
- *
- * We can use khz divisor instead of mhz to keep a better percision, since
- * cyc2ns_scale is limited to 10^6 * 2^10, which fits in 32 bits.
- * (mathieu.desnoyers@polymtl.ca)
- *
- * -johnstul@us.ibm.com "math is hard, lets go shopping!"
- */
-static unsigned long cyc2ns_scale;
-#define CYC2NS_SCALE_FACTOR 10 /* 2^10, carefully chosen */
-
-static inline void set_cyc2ns_scale(unsigned long cpu_khz)
-{
- cyc2ns_scale = (1000000 << CYC2NS_SCALE_FACTOR)/cpu_khz;
-}
-
-static inline unsigned long long cycles_2_ns(unsigned long long cyc)
-{
- return (cyc * cyc2ns_scale) >> CYC2NS_SCALE_FACTOR;
-}
-
-static int count2; /* counter for mark_offset_tsc() */
-
-/* Cached *multiplier* to convert TSC counts to microseconds.
- * (see the equation below).
- * Equal to 2^32 * (1 / (clocks per usec) ).
- * Initialized in time_init.
- */
-static unsigned long fast_gettimeoffset_quotient;
-
-static unsigned long get_offset_tsc(void)
-{
- register unsigned long eax, edx;
-
- /* Read the Time Stamp Counter */
-
- rdtsc(eax,edx);
-
- /* .. relative to previous jiffy (32 bits is enough) */
- eax -= last_tsc_low; /* tsc_low delta */
-
- /*
- * Time offset = (tsc_low delta) * fast_gettimeoffset_quotient
- * = (tsc_low delta) * (usecs_per_clock)
- * = (tsc_low delta) * (usecs_per_jiffy / clocks_per_jiffy)
- *
- * Using a mull instead of a divl saves up to 31 clock cycles
- * in the critical path.
- */
-
- __asm__("mull %2"
- :"=a" (eax), "=d" (edx)
- :"rm" (fast_gettimeoffset_quotient),
- "0" (eax));
-
- /* our adjusted time offset in microseconds */
- return delay_at_last_interrupt + edx;
-}
-
-static unsigned long long monotonic_clock_tsc(void)
-{
- unsigned long long last_offset, this_offset, base;
- unsigned seq;
-
- /* atomically read monotonic base & last_offset */
- do {
- seq = read_seqbegin(&monotonic_lock);
- last_offset = ((unsigned long long)last_tsc_high<<32)|last_tsc_low;
- base = monotonic_base;
- } while (read_seqretry(&monotonic_lock, seq));
-
- /* Read the Time Stamp Counter */
- rdtscll(this_offset);
-
- /* return the value in ns */
- return base + cycles_2_ns(this_offset - last_offset);
-}
-
-/*
- * Scheduler clock - returns current time in nanosec units.
- */
-unsigned long long sched_clock(void)
-{
- unsigned long long this_offset;
-
- /*
- * In the NUMA case we dont use the TSC as they are not
- * synchronized across all CPUs.
- */
-#ifndef CONFIG_NUMA
- if (!use_tsc)
-#endif
- /* no locking but a rare wrong value is not a big deal */
- return jiffies_64 * (1000000000 / HZ);
-
- /* Read the Time Stamp Counter */
- rdtscll(this_offset);
-
- /* return the value in ns */
- return cycles_2_ns(this_offset);
-}
-
-static void delay_tsc(unsigned long loops)
-{
- unsigned long bclock, now;
-
- rdtscl(bclock);
- do
- {
- rep_nop();
- rdtscl(now);
- } while ((now-bclock) < loops);
-}
-
-#ifdef CONFIG_HPET_TIMER
-static void mark_offset_tsc_hpet(void)
-{
- unsigned long long this_offset, last_offset;
- unsigned long offset, temp, hpet_current;
-
- write_seqlock(&monotonic_lock);
- last_offset = ((unsigned long long)last_tsc_high<<32)|last_tsc_low;
- /*
- * It is important that these two operations happen almost at
- * the same time. We do the RDTSC stuff first, since it's
- * faster. To avoid any inconsistencies, we need interrupts
- * disabled locally.
- */
- /*
- * Interrupts are just disabled locally since the timer irq
- * has the SA_INTERRUPT flag set. -arca
- */
- /* read Pentium cycle counter */
-
- hpet_current = hpet_readl(HPET_COUNTER);
- rdtsc(last_tsc_low, last_tsc_high);
-
- /* lost tick compensation */
- offset = hpet_readl(HPET_T0_CMP) - hpet_tick;
- if (unlikely(((offset - hpet_last) > hpet_tick) && (hpet_last != 0))) {
- int lost_ticks = (offset - hpet_last) / hpet_tick;
- jiffies_64 += lost_ticks;
- }
- hpet_last = hpet_current;
-
- /* update the monotonic base value */
- this_offset = ((unsigned long long)last_tsc_high<<32)|last_tsc_low;
- monotonic_base += cycles_2_ns(this_offset - last_offset);
- write_sequnlock(&monotonic_lock);
-
- /* calculate delay_at_last_interrupt */
- /*
- * Time offset = (hpet delta) * ( usecs per HPET clock )
- * = (hpet delta) * ( usecs per tick / HPET clocks per tick)
- * = (hpet delta) * ( hpet_usec_quotient ) / (2^32)
- * Where,
- * hpet_usec_quotient = (2^32 * usecs per tick)/HPET clocks per tick
- */
- delay_at_last_interrupt = hpet_current - offset;
- ASM_MUL64_REG(temp, delay_at_last_interrupt,
- hpet_usec_quotient, delay_at_last_interrupt);
-}
-#endif
-
-
-#ifdef CONFIG_CPU_FREQ
-#include <linux/workqueue.h>
-
-static unsigned int cpufreq_delayed_issched = 0;
-static unsigned int cpufreq_init = 0;
-static struct work_struct cpufreq_delayed_get_work;
-
-static void handle_cpufreq_delayed_get(void *v)
-{
- unsigned int cpu;
- for_each_online_cpu(cpu) {
- cpufreq_get(cpu);
- }
- cpufreq_delayed_issched = 0;
-}
-
-/* if we notice lost ticks, schedule a call to cpufreq_get() as it tries
- * to verify the CPU frequency the timing core thinks the CPU is running
- * at is still correct.
- */
-static inline void cpufreq_delayed_get(void)
-{
- if (cpufreq_init && !cpufreq_delayed_issched) {
- cpufreq_delayed_issched = 1;
- printk(KERN_DEBUG "Losing some ticks... checking if CPU frequency changed.\n");
- schedule_work(&cpufreq_delayed_get_work);
- }
-}
-
-/* If the CPU frequency is scaled, TSC-based delays will need a different
- * loops_per_jiffy value to function properly.
- */
-
-static unsigned int ref_freq = 0;
-static unsigned long loops_per_jiffy_ref = 0;
-
-#ifndef CONFIG_SMP
-static unsigned long fast_gettimeoffset_ref = 0;
-static unsigned int cpu_khz_ref = 0;
-#endif
-
-static int
-time_cpufreq_notifier(struct notifier_block *nb, unsigned long val,
- void *data)
-{
- struct cpufreq_freqs *freq = data;
-
- if (val != CPUFREQ_RESUMECHANGE)
- write_seqlock_irq(&xtime_lock);
- if (!ref_freq) {
- ref_freq = freq->old;
- loops_per_jiffy_ref = cpu_data[freq->cpu].loops_per_jiffy;
-#ifndef CONFIG_SMP
- fast_gettimeoffset_ref = fast_gettimeoffset_quotient;
- cpu_khz_ref = cpu_khz;
-#endif
- }
-
- if ((val == CPUFREQ_PRECHANGE && freq->old < freq->new) ||
- (val == CPUFREQ_POSTCHANGE && freq->old > freq->new) ||
- (val == CPUFREQ_RESUMECHANGE)) {
- if (!(freq->flags & CPUFREQ_CONST_LOOPS))
- cpu_data[freq->cpu].loops_per_jiffy = cpufreq_scale(loops_per_jiffy_ref, ref_freq, freq->new);
-#ifndef CONFIG_SMP
- if (cpu_khz)
- cpu_khz = cpufreq_scale(cpu_khz_ref, ref_freq, freq->new);
- if (use_tsc) {
- if (!(freq->flags & CPUFREQ_CONST_LOOPS)) {
- fast_gettimeoffset_quotient = cpufreq_scale(fast_gettimeoffset_ref, freq->new, ref_freq);
- set_cyc2ns_scale(cpu_khz);
- }
- }
-#endif
- }
-
- if (val != CPUFREQ_RESUMECHANGE)
- write_sequnlock_irq(&xtime_lock);
-
- return 0;
-}
-
-static struct notifier_block time_cpufreq_notifier_block = {
- .notifier_call = time_cpufreq_notifier
-};
-
-
-static int __init cpufreq_tsc(void)
-{
- int ret;
- INIT_WORK(&cpufreq_delayed_get_work, handle_cpufreq_delayed_get, NULL);
- ret = cpufreq_register_notifier(&time_cpufreq_notifier_block,
- CPUFREQ_TRANSITION_NOTIFIER);
- if (!ret)
- cpufreq_init = 1;
- return ret;
-}
-core_initcall(cpufreq_tsc);
-
-#else /* CONFIG_CPU_FREQ */
-static inline void cpufreq_delayed_get(void) { return; }
-#endif
-
-int recalibrate_cpu_khz(void)
-{
-#ifndef CONFIG_SMP
- unsigned int cpu_khz_old = cpu_khz;
-
- if (cpu_has_tsc) {
- init_cpu_khz();
- cpu_data[0].loops_per_jiffy =
- cpufreq_scale(cpu_data[0].loops_per_jiffy,
- cpu_khz_old,
- cpu_khz);
- return 0;
- } else
- return -ENODEV;
-#else
- return -ENODEV;
-#endif
-}
-EXPORT_SYMBOL(recalibrate_cpu_khz);
-
-static void mark_offset_tsc(void)
-{
- unsigned long lost,delay;
- unsigned long delta = last_tsc_low;
- int count;
- int countmp;
- static int count1 = 0;
- unsigned long long this_offset, last_offset;
- static int lost_count = 0;
-
- write_seqlock(&monotonic_lock);
- last_offset = ((unsigned long long)last_tsc_high<<32)|last_tsc_low;
- /*
- * It is important that these two operations happen almost at
- * the same time. We do the RDTSC stuff first, since it's
- * faster. To avoid any inconsistencies, we need interrupts
- * disabled locally.
- */
-
- /*
- * Interrupts are just disabled locally since the timer irq
- * has the SA_INTERRUPT flag set. -arca
- */
-
- /* read Pentium cycle counter */
-
- rdtsc(last_tsc_low, last_tsc_high);
-
- spin_lock(&i8253_lock);
- outb_p(0x00, PIT_MODE); /* latch the count ASAP */
-
- count = inb_p(PIT_CH0); /* read the latched count */
- count |= inb(PIT_CH0) << 8;
-
- /*
- * VIA686a test code... reset the latch if count > max + 1
- * from timer_pit.c - cjb
- */
- if (count > LATCH) {
- outb_p(0x34, PIT_MODE);
- outb_p(LATCH & 0xff, PIT_CH0);
- outb(LATCH >> 8, PIT_CH0);
- count = LATCH - 1;
- }
-
- spin_unlock(&i8253_lock);
-
- if (pit_latch_buggy) {
- /* get center value of last 3 time lutch */
- if ((count2 >= count && count >= count1)
- || (count1 >= count && count >= count2)) {
- count2 = count1; count1 = count;
- } else if ((count1 >= count2 && count2 >= count)
- || (count >= count2 && count2 >= count1)) {
- countmp = count;count = count2;
- count2 = count1;count1 = countmp;
- } else {
- count2 = count1; count1 = count; count = count1;
- }
- }
-
- /* lost tick compensation */
- delta = last_tsc_low - delta;
- {
- register unsigned long eax, edx;
- eax = delta;
- __asm__("mull %2"
- :"=a" (eax), "=d" (edx)
- :"rm" (fast_gettimeoffset_quotient),
- "0" (eax));
- delta = edx;
- }
- delta += delay_at_last_interrupt;
- lost = delta/(1000000/HZ);
- delay = delta%(1000000/HZ);
- if (lost >= 2) {
- jiffies_64 += lost-1;
-
- /* sanity check to ensure we're not always losing ticks */
- if (lost_count++ > 100) {
- printk(KERN_WARNING "Losing too many ticks!\n");
- printk(KERN_WARNING "TSC cannot be used as a timesource. \n");
- printk(KERN_WARNING "Possible reasons for this are:\n");
- printk(KERN_WARNING " You're running with Speedstep,\n");
- printk(KERN_WARNING " You don't have DMA enabled for your hard disk (see hdparm),\n");
- printk(KERN_WARNING " Incorrect TSC synchronization on an SMP system (see dmesg).\n");
- printk(KERN_WARNING "Falling back to a sane timesource now.\n");
-
- clock_fallback();
- }
- /* ... but give the TSC a fair chance */
- if (lost_count > 25)
- cpufreq_delayed_get();
- } else
- lost_count = 0;
- /* update the monotonic base value */
- this_offset = ((unsigned long long)last_tsc_high<<32)|last_tsc_low;
- monotonic_base += cycles_2_ns(this_offset - last_offset);
- write_sequnlock(&monotonic_lock);
-
- /* calculate delay_at_last_interrupt */
- count = ((LATCH-1) - count) * TICK_SIZE;
- delay_at_last_interrupt = (count + LATCH/2) / LATCH;
-
- /* catch corner case where tick rollover occured
- * between tsc and pit reads (as noted when
- * usec delta is > 90% # of usecs/tick)
- */
- if (lost && abs(delay - delay_at_last_interrupt) > (900000/HZ))
- jiffies_64++;
-}
-
-static int __init init_tsc(char* override)
-{
-
- /* check clock override */
- if (override[0] && strncmp(override,"tsc",3)) {
-#ifdef CONFIG_HPET_TIMER
- if (is_hpet_enabled()) {
- printk(KERN_ERR "Warning: clock= override failed. Defaulting to tsc\n");
- } else
-#endif
- {
- return -ENODEV;
- }
- }
-
- /*
- * If we have APM enabled or the CPU clock speed is variable
- * (CPU stops clock on HLT or slows clock to save power)
- * then the TSC timestamps may diverge by up to 1 jiffy from
- * 'real time' but nothing will break.
- * The most frequent case is that the CPU is "woken" from a halt
- * state by the timer interrupt itself, so we get 0 error. In the
- * rare cases where a driver would "wake" the CPU and request a
- * timestamp, the maximum error is < 1 jiffy. But timestamps are
- * still perfectly ordered.
- * Note that the TSC counter will be reset if APM suspends
- * to disk; this won't break the kernel, though, 'cuz we're
- * smart. See arch/i386/kernel/apm.c.
- */
- /*
- * Firstly we have to do a CPU check for chips with
- * a potentially buggy TSC. At this point we haven't run
- * the ident/bugs checks so we must run this hook as it
- * may turn off the TSC flag.
- *
- * NOTE: this doesn't yet handle SMP 486 machines where only
- * some CPU's have a TSC. Thats never worked and nobody has
- * moaned if you have the only one in the world - you fix it!
- */
-
- count2 = LATCH; /* initialize counter for mark_offset_tsc() */
-
- if (cpu_has_tsc) {
- unsigned long tsc_quotient;
-#ifdef CONFIG_HPET_TIMER
- if (is_hpet_enabled() && hpet_use_timer) {
- unsigned long result, remain;
- printk("Using TSC for gettimeofday\n");
- tsc_quotient = calibrate_tsc_hpet(NULL);
- timer_tsc.mark_offset = &mark_offset_tsc_hpet;
- /*
- * Math to calculate hpet to usec multiplier
- * Look for the comments at get_offset_tsc_hpet()
- */
- ASM_DIV64_REG(result, remain, hpet_tick,
- 0, KERNEL_TICK_USEC);
- if (remain > (hpet_tick >> 1))
- result++; /* rounding the result */
-
- hpet_usec_quotient = result;
- } else
-#endif
- {
- tsc_quotient = calibrate_tsc();
- }
-
- if (tsc_quotient) {
- fast_gettimeoffset_quotient = tsc_quotient;
- use_tsc = 1;
- /*
- * We could be more selective here I suspect
- * and just enable this for the next intel chips ?
- */
- /* report CPU clock rate in Hz.
- * The formula is (10^6 * 2^32) / (2^32 * 1 / (clocks/us)) =
- * clock/second. Our precision is about 100 ppm.
- */
- { unsigned long eax=0, edx=1000;
- __asm__("divl %2"
- :"=a" (cpu_khz), "=d" (edx)
- :"r" (tsc_quotient),
- "0" (eax), "1" (edx));
- printk("Detected %u.%03u MHz processor.\n",
- cpu_khz / 1000, cpu_khz % 1000);
- }
- set_cyc2ns_scale(cpu_khz);
- return 0;
- }
- }
- return -ENODEV;
-}
-
-static int tsc_resume(void)
-{
- write_seqlock(&monotonic_lock);
- /* Assume this is the last mark offset time */
- rdtsc(last_tsc_low, last_tsc_high);
-#ifdef CONFIG_HPET_TIMER
- if (is_hpet_enabled() && hpet_use_timer)
- hpet_last = hpet_readl(HPET_COUNTER);
-#endif
- write_sequnlock(&monotonic_lock);
- return 0;
-}
-
-#ifndef CONFIG_X86_TSC
-/* disable flag for tsc. Takes effect by clearing the TSC cpu flag
- * in cpu/common.c */
-static int __init tsc_setup(char *str)
-{
- tsc_disable = 1;
- return 1;
-}
-#else
-static int __init tsc_setup(char *str)
-{
- printk(KERN_WARNING "notsc: Kernel compiled with CONFIG_X86_TSC, "
- "cannot disable TSC.\n");
- return 1;
-}
-#endif
-__setup("notsc", tsc_setup);
-
-
-
-/************************************************************/
-
-/* tsc timer_opts struct */
-static struct timer_opts timer_tsc = {
- .name = "tsc",
- .mark_offset = mark_offset_tsc,
- .get_offset = get_offset_tsc,
- .monotonic_clock = monotonic_clock_tsc,
- .delay = delay_tsc,
- .read_timer = read_timer_tsc,
- .resume = tsc_resume,
-};
-
-struct init_timer_opts __initdata timer_tsc_init = {
- .init = init_tsc,
- .opts = &timer_tsc,
-};
diff --git a/arch/i386/kernel/trampoline.S b/arch/i386/kernel/trampoline.S
deleted file mode 100644
index fcce0e61b0e..00000000000
--- a/arch/i386/kernel/trampoline.S
+++ /dev/null
@@ -1,80 +0,0 @@
-/*
- *
- * Trampoline.S Derived from Setup.S by Linus Torvalds
- *
- * 4 Jan 1997 Michael Chastain: changed to gnu as.
- *
- * This is only used for booting secondary CPUs in SMP machine
- *
- * Entry: CS:IP point to the start of our code, we are
- * in real mode with no stack, but the rest of the
- * trampoline page to make our stack and everything else
- * is a mystery.
- *
- * In fact we don't actually need a stack so we don't
- * set one up.
- *
- * We jump into the boot/compressed/head.S code. So you'd
- * better be running a compressed kernel image or you
- * won't get very far.
- *
- * On entry to trampoline_data, the processor is in real mode
- * with 16-bit addressing and 16-bit data. CS has some value
- * and IP is zero. Thus, data addresses need to be absolute
- * (no relocation) and are taken with regard to r_base.
- *
- * If you work on this file, check the object module with
- * objdump --reloc to make sure there are no relocation
- * entries except for:
- *
- * TYPE VALUE
- * R_386_32 startup_32_smp
- * R_386_32 boot_gdt_table
- */
-
-#include <linux/linkage.h>
-#include <asm/segment.h>
-#include <asm/page.h>
-
-.data
-
-.code16
-
-ENTRY(trampoline_data)
-r_base = .
- wbinvd # Needed for NUMA-Q should be harmless for others
- mov %cs, %ax # Code and data in the same place
- mov %ax, %ds
-
- cli # We should be safe anyway
-
- movl $0xA5A5A5A5, trampoline_data - r_base
- # write marker for master knows we're running
-
- /* GDT tables in non default location kernel can be beyond 16MB and
- * lgdt will not be able to load the address as in real mode default
- * operand size is 16bit. Use lgdtl instead to force operand size
- * to 32 bit.
- */
-
- lidtl boot_idt - r_base # load idt with 0, 0
- lgdtl boot_gdt - r_base # load gdt with whatever is appropriate
-
- xor %ax, %ax
- inc %ax # protected mode (PE) bit
- lmsw %ax # into protected mode
- # flush prefetch and jump to startup_32_smp in arch/i386/kernel/head.S
- ljmpl $__BOOT_CS, $(startup_32_smp-__PAGE_OFFSET)
-
- # These need to be in the same 64K segment as the above;
- # hence we don't use the boot_gdt_descr defined in head.S
-boot_gdt:
- .word __BOOT_DS + 7 # gdt limit
- .long boot_gdt_table-__PAGE_OFFSET # gdt base
-
-boot_idt:
- .word 0 # idt limit = 0
- .long 0 # idt base = 0L
-
-.globl trampoline_end
-trampoline_end:
diff --git a/arch/i386/kernel/traps.c b/arch/i386/kernel/traps.c
deleted file mode 100644
index c34d1bfc516..00000000000
--- a/arch/i386/kernel/traps.c
+++ /dev/null
@@ -1,1120 +0,0 @@
-/*
- * linux/arch/i386/traps.c
- *
- * Copyright (C) 1991, 1992 Linus Torvalds
- *
- * Pentium III FXSR, SSE support
- * Gareth Hughes <gareth@valinux.com>, May 2000
- */
-
-/*
- * 'Traps.c' handles hardware traps and faults after we have saved some
- * state in 'asm.s'.
- */
-#include <linux/config.h>
-#include <linux/sched.h>
-#include <linux/kernel.h>
-#include <linux/string.h>
-#include <linux/errno.h>
-#include <linux/timer.h>
-#include <linux/mm.h>
-#include <linux/init.h>
-#include <linux/delay.h>
-#include <linux/spinlock.h>
-#include <linux/interrupt.h>
-#include <linux/highmem.h>
-#include <linux/kallsyms.h>
-#include <linux/ptrace.h>
-#include <linux/utsname.h>
-#include <linux/kprobes.h>
-#include <linux/kexec.h>
-
-#ifdef CONFIG_EISA
-#include <linux/ioport.h>
-#include <linux/eisa.h>
-#endif
-
-#ifdef CONFIG_MCA
-#include <linux/mca.h>
-#endif
-
-#include <asm/processor.h>
-#include <asm/system.h>
-#include <asm/uaccess.h>
-#include <asm/io.h>
-#include <asm/atomic.h>
-#include <asm/debugreg.h>
-#include <asm/desc.h>
-#include <asm/i387.h>
-#include <asm/nmi.h>
-
-#include <asm/smp.h>
-#include <asm/arch_hooks.h>
-#include <asm/kdebug.h>
-
-#include <linux/module.h>
-
-#include "mach_traps.h"
-
-asmlinkage int system_call(void);
-
-struct desc_struct default_ldt[] = { { 0, 0 }, { 0, 0 }, { 0, 0 },
- { 0, 0 }, { 0, 0 } };
-
-/* Do we ignore FPU interrupts ? */
-char ignore_fpu_irq = 0;
-
-/*
- * The IDT has to be page-aligned to simplify the Pentium
- * F0 0F bug workaround.. We have a special link segment
- * for this.
- */
-struct desc_struct idt_table[256] __attribute__((__section__(".data.idt"))) = { {0, 0}, };
-
-asmlinkage void divide_error(void);
-asmlinkage void debug(void);
-asmlinkage void nmi(void);
-asmlinkage void int3(void);
-asmlinkage void overflow(void);
-asmlinkage void bounds(void);
-asmlinkage void invalid_op(void);
-asmlinkage void device_not_available(void);
-asmlinkage void coprocessor_segment_overrun(void);
-asmlinkage void invalid_TSS(void);
-asmlinkage void segment_not_present(void);
-asmlinkage void stack_segment(void);
-asmlinkage void general_protection(void);
-asmlinkage void page_fault(void);
-asmlinkage void coprocessor_error(void);
-asmlinkage void simd_coprocessor_error(void);
-asmlinkage void alignment_check(void);
-asmlinkage void spurious_interrupt_bug(void);
-asmlinkage void machine_check(void);
-
-static int kstack_depth_to_print = 24;
-struct notifier_block *i386die_chain;
-static DEFINE_SPINLOCK(die_notifier_lock);
-
-int register_die_notifier(struct notifier_block *nb)
-{
- int err = 0;
- unsigned long flags;
- spin_lock_irqsave(&die_notifier_lock, flags);
- err = notifier_chain_register(&i386die_chain, nb);
- spin_unlock_irqrestore(&die_notifier_lock, flags);
- return err;
-}
-EXPORT_SYMBOL(register_die_notifier);
-
-static inline int valid_stack_ptr(struct thread_info *tinfo, void *p)
-{
- return p > (void *)tinfo &&
- p < (void *)tinfo + THREAD_SIZE - 3;
-}
-
-static inline unsigned long print_context_stack(struct thread_info *tinfo,
- unsigned long *stack, unsigned long ebp)
-{
- unsigned long addr;
-
-#ifdef CONFIG_FRAME_POINTER
- while (valid_stack_ptr(tinfo, (void *)ebp)) {
- addr = *(unsigned long *)(ebp + 4);
- printk(" [<%08lx>] ", addr);
- print_symbol("%s", addr);
- printk("\n");
- ebp = *(unsigned long *)ebp;
- }
-#else
- while (valid_stack_ptr(tinfo, stack)) {
- addr = *stack++;
- if (__kernel_text_address(addr)) {
- printk(" [<%08lx>]", addr);
- print_symbol(" %s", addr);
- printk("\n");
- }
- }
-#endif
- return ebp;
-}
-
-void show_trace(struct task_struct *task, unsigned long * stack)
-{
- unsigned long ebp;
-
- if (!task)
- task = current;
-
- if (task == current) {
- /* Grab ebp right from our regs */
- asm ("movl %%ebp, %0" : "=r" (ebp) : );
- } else {
- /* ebp is the last reg pushed by switch_to */
- ebp = *(unsigned long *) task->thread.esp;
- }
-
- while (1) {
- struct thread_info *context;
- context = (struct thread_info *)
- ((unsigned long)stack & (~(THREAD_SIZE - 1)));
- ebp = print_context_stack(context, stack, ebp);
- stack = (unsigned long*)context->previous_esp;
- if (!stack)
- break;
- printk(" =======================\n");
- }
-}
-
-void show_stack(struct task_struct *task, unsigned long *esp)
-{
- unsigned long *stack;
- int i;
-
- if (esp == NULL) {
- if (task)
- esp = (unsigned long*)task->thread.esp;
- else
- esp = (unsigned long *)&esp;
- }
-
- stack = esp;
- for(i = 0; i < kstack_depth_to_print; i++) {
- if (kstack_end(stack))
- break;
- if (i && ((i % 8) == 0))
- printk("\n ");
- printk("%08lx ", *stack++);
- }
- printk("\nCall Trace:\n");
- show_trace(task, esp);
-}
-
-/*
- * The architecture-independent dump_stack generator
- */
-void dump_stack(void)
-{
- unsigned long stack;
-
- show_trace(current, &stack);
-}
-
-EXPORT_SYMBOL(dump_stack);
-
-void show_registers(struct pt_regs *regs)
-{
- int i;
- int in_kernel = 1;
- unsigned long esp;
- unsigned short ss;
-
- esp = (unsigned long) (&regs->esp);
- savesegment(ss, ss);
- if (user_mode(regs)) {
- in_kernel = 0;
- esp = regs->esp;
- ss = regs->xss & 0xffff;
- }
- print_modules();
- printk("CPU: %d\nEIP: %04x:[<%08lx>] %s VLI\nEFLAGS: %08lx"
- " (%s) \n",
- smp_processor_id(), 0xffff & regs->xcs, regs->eip,
- print_tainted(), regs->eflags, system_utsname.release);
- print_symbol("EIP is at %s\n", regs->eip);
- printk("eax: %08lx ebx: %08lx ecx: %08lx edx: %08lx\n",
- regs->eax, regs->ebx, regs->ecx, regs->edx);
- printk("esi: %08lx edi: %08lx ebp: %08lx esp: %08lx\n",
- regs->esi, regs->edi, regs->ebp, esp);
- printk("ds: %04x es: %04x ss: %04x\n",
- regs->xds & 0xffff, regs->xes & 0xffff, ss);
- printk("Process %s (pid: %d, threadinfo=%p task=%p)",
- current->comm, current->pid, current_thread_info(), current);
- /*
- * When in-kernel, we also print out the stack and code at the
- * time of the fault..
- */
- if (in_kernel) {
- u8 __user *eip;
-
- printk("\nStack: ");
- show_stack(NULL, (unsigned long*)esp);
-
- printk("Code: ");
-
- eip = (u8 __user *)regs->eip - 43;
- for (i = 0; i < 64; i++, eip++) {
- unsigned char c;
-
- if (eip < (u8 __user *)PAGE_OFFSET || __get_user(c, eip)) {
- printk(" Bad EIP value.");
- break;
- }
- if (eip == (u8 __user *)regs->eip)
- printk("<%02x> ", c);
- else
- printk("%02x ", c);
- }
- }
- printk("\n");
-}
-
-static void handle_BUG(struct pt_regs *regs)
-{
- unsigned short ud2;
- unsigned short line;
- char *file;
- char c;
- unsigned long eip;
-
- eip = regs->eip;
-
- if (eip < PAGE_OFFSET)
- goto no_bug;
- if (__get_user(ud2, (unsigned short __user *)eip))
- goto no_bug;
- if (ud2 != 0x0b0f)
- goto no_bug;
- if (__get_user(line, (unsigned short __user *)(eip + 2)))
- goto bug;
- if (__get_user(file, (char * __user *)(eip + 4)) ||
- (unsigned long)file < PAGE_OFFSET || __get_user(c, file))
- file = "<bad filename>";
-
- printk("------------[ cut here ]------------\n");
- printk(KERN_ALERT "kernel BUG at %s:%d!\n", file, line);
-
-no_bug:
- return;
-
- /* Here we know it was a BUG but file-n-line is unavailable */
-bug:
- printk("Kernel BUG\n");
-}
-
-/* This is gone through when something in the kernel
- * has done something bad and is about to be terminated.
-*/
-void die(const char * str, struct pt_regs * regs, long err)
-{
- static struct {
- spinlock_t lock;
- u32 lock_owner;
- int lock_owner_depth;
- } die = {
- .lock = SPIN_LOCK_UNLOCKED,
- .lock_owner = -1,
- .lock_owner_depth = 0
- };
- static int die_counter;
-
- if (die.lock_owner != raw_smp_processor_id()) {
- console_verbose();
- spin_lock_irq(&die.lock);
- die.lock_owner = smp_processor_id();
- die.lock_owner_depth = 0;
- bust_spinlocks(1);
- }
-
- if (++die.lock_owner_depth < 3) {
- int nl = 0;
- handle_BUG(regs);
- printk(KERN_ALERT "%s: %04lx [#%d]\n", str, err & 0xffff, ++die_counter);
-#ifdef CONFIG_PREEMPT
- printk("PREEMPT ");
- nl = 1;
-#endif
-#ifdef CONFIG_SMP
- printk("SMP ");
- nl = 1;
-#endif
-#ifdef CONFIG_DEBUG_PAGEALLOC
- printk("DEBUG_PAGEALLOC");
- nl = 1;
-#endif
- if (nl)
- printk("\n");
- notify_die(DIE_OOPS, (char *)str, regs, err, 255, SIGSEGV);
- show_registers(regs);
- } else
- printk(KERN_ERR "Recursive die() failure, output suppressed\n");
-
- bust_spinlocks(0);
- die.lock_owner = -1;
- spin_unlock_irq(&die.lock);
-
- if (kexec_should_crash(current))
- crash_kexec(regs);
-
- if (in_interrupt())
- panic("Fatal exception in interrupt");
-
- if (panic_on_oops) {
- printk(KERN_EMERG "Fatal exception: panic in 5 seconds\n");
- ssleep(5);
- panic("Fatal exception");
- }
- do_exit(SIGSEGV);
-}
-
-static inline void die_if_kernel(const char * str, struct pt_regs * regs, long err)
-{
- if (!user_mode_vm(regs))
- die(str, regs, err);
-}
-
-static void __kprobes do_trap(int trapnr, int signr, char *str, int vm86,
- struct pt_regs * regs, long error_code,
- siginfo_t *info)
-{
- struct task_struct *tsk = current;
- tsk->thread.error_code = error_code;
- tsk->thread.trap_no = trapnr;
-
- if (regs->eflags & VM_MASK) {
- if (vm86)
- goto vm86_trap;
- goto trap_signal;
- }
-
- if (!user_mode(regs))
- goto kernel_trap;
-
- trap_signal: {
- if (info)
- force_sig_info(signr, info, tsk);
- else
- force_sig(signr, tsk);
- return;
- }
-
- kernel_trap: {
- if (!fixup_exception(regs))
- die(str, regs, error_code);
- return;
- }
-
- vm86_trap: {
- int ret = handle_vm86_trap((struct kernel_vm86_regs *) regs, error_code, trapnr);
- if (ret) goto trap_signal;
- return;
- }
-}
-
-#define DO_ERROR(trapnr, signr, str, name) \
-fastcall void do_##name(struct pt_regs * regs, long error_code) \
-{ \
- if (notify_die(DIE_TRAP, str, regs, error_code, trapnr, signr) \
- == NOTIFY_STOP) \
- return; \
- do_trap(trapnr, signr, str, 0, regs, error_code, NULL); \
-}
-
-#define DO_ERROR_INFO(trapnr, signr, str, name, sicode, siaddr) \
-fastcall void do_##name(struct pt_regs * regs, long error_code) \
-{ \
- siginfo_t info; \
- info.si_signo = signr; \
- info.si_errno = 0; \
- info.si_code = sicode; \
- info.si_addr = (void __user *)siaddr; \
- if (notify_die(DIE_TRAP, str, regs, error_code, trapnr, signr) \
- == NOTIFY_STOP) \
- return; \
- do_trap(trapnr, signr, str, 0, regs, error_code, &info); \
-}
-
-#define DO_VM86_ERROR(trapnr, signr, str, name) \
-fastcall void do_##name(struct pt_regs * regs, long error_code) \
-{ \
- if (notify_die(DIE_TRAP, str, regs, error_code, trapnr, signr) \
- == NOTIFY_STOP) \
- return; \
- do_trap(trapnr, signr, str, 1, regs, error_code, NULL); \
-}
-
-#define DO_VM86_ERROR_INFO(trapnr, signr, str, name, sicode, siaddr) \
-fastcall void do_##name(struct pt_regs * regs, long error_code) \
-{ \
- siginfo_t info; \
- info.si_signo = signr; \
- info.si_errno = 0; \
- info.si_code = sicode; \
- info.si_addr = (void __user *)siaddr; \
- if (notify_die(DIE_TRAP, str, regs, error_code, trapnr, signr) \
- == NOTIFY_STOP) \
- return; \
- do_trap(trapnr, signr, str, 1, regs, error_code, &info); \
-}
-
-DO_VM86_ERROR_INFO( 0, SIGFPE, "divide error", divide_error, FPE_INTDIV, regs->eip)
-#ifndef CONFIG_KPROBES
-DO_VM86_ERROR( 3, SIGTRAP, "int3", int3)
-#endif
-DO_VM86_ERROR( 4, SIGSEGV, "overflow", overflow)
-DO_VM86_ERROR( 5, SIGSEGV, "bounds", bounds)
-DO_ERROR_INFO( 6, SIGILL, "invalid operand", invalid_op, ILL_ILLOPN, regs->eip)
-DO_ERROR( 9, SIGFPE, "coprocessor segment overrun", coprocessor_segment_overrun)
-DO_ERROR(10, SIGSEGV, "invalid TSS", invalid_TSS)
-DO_ERROR(11, SIGBUS, "segment not present", segment_not_present)
-DO_ERROR(12, SIGBUS, "stack segment", stack_segment)
-DO_ERROR_INFO(17, SIGBUS, "alignment check", alignment_check, BUS_ADRALN, 0)
-DO_ERROR_INFO(32, SIGSEGV, "iret exception", iret_error, ILL_BADSTK, 0)
-
-fastcall void __kprobes do_general_protection(struct pt_regs * regs,
- long error_code)
-{
- int cpu = get_cpu();
- struct tss_struct *tss = &per_cpu(init_tss, cpu);
- struct thread_struct *thread = &current->thread;
-
- /*
- * Perform the lazy TSS's I/O bitmap copy. If the TSS has an
- * invalid offset set (the LAZY one) and the faulting thread has
- * a valid I/O bitmap pointer, we copy the I/O bitmap in the TSS
- * and we set the offset field correctly. Then we let the CPU to
- * restart the faulting instruction.
- */
- if (tss->io_bitmap_base == INVALID_IO_BITMAP_OFFSET_LAZY &&
- thread->io_bitmap_ptr) {
- memcpy(tss->io_bitmap, thread->io_bitmap_ptr,
- thread->io_bitmap_max);
- /*
- * If the previously set map was extending to higher ports
- * than the current one, pad extra space with 0xff (no access).
- */
- if (thread->io_bitmap_max < tss->io_bitmap_max)
- memset((char *) tss->io_bitmap +
- thread->io_bitmap_max, 0xff,
- tss->io_bitmap_max - thread->io_bitmap_max);
- tss->io_bitmap_max = thread->io_bitmap_max;
- tss->io_bitmap_base = IO_BITMAP_OFFSET;
- tss->io_bitmap_owner = thread;
- put_cpu();
- return;
- }
- put_cpu();
-
- current->thread.error_code = error_code;
- current->thread.trap_no = 13;
-
- if (regs->eflags & VM_MASK)
- goto gp_in_vm86;
-
- if (!user_mode(regs))
- goto gp_in_kernel;
-
- current->thread.error_code = error_code;
- current->thread.trap_no = 13;
- force_sig(SIGSEGV, current);
- return;
-
-gp_in_vm86:
- local_irq_enable();
- handle_vm86_fault((struct kernel_vm86_regs *) regs, error_code);
- return;
-
-gp_in_kernel:
- if (!fixup_exception(regs)) {
- if (notify_die(DIE_GPF, "general protection fault", regs,
- error_code, 13, SIGSEGV) == NOTIFY_STOP)
- return;
- die("general protection fault", regs, error_code);
- }
-}
-
-static void mem_parity_error(unsigned char reason, struct pt_regs * regs)
-{
- printk("Uhhuh. NMI received. Dazed and confused, but trying to continue\n");
- printk("You probably have a hardware problem with your RAM chips\n");
-
- /* Clear and disable the memory parity error line. */
- clear_mem_error(reason);
-}
-
-static void io_check_error(unsigned char reason, struct pt_regs * regs)
-{
- unsigned long i;
-
- printk("NMI: IOCK error (debug interrupt?)\n");
- show_registers(regs);
-
- /* Re-enable the IOCK line, wait for a few seconds */
- reason = (reason & 0xf) | 8;
- outb(reason, 0x61);
- i = 2000;
- while (--i) udelay(1000);
- reason &= ~8;
- outb(reason, 0x61);
-}
-
-static void unknown_nmi_error(unsigned char reason, struct pt_regs * regs)
-{
-#ifdef CONFIG_MCA
- /* Might actually be able to figure out what the guilty party
- * is. */
- if( MCA_bus ) {
- mca_handle_nmi();
- return;
- }
-#endif
- printk("Uhhuh. NMI received for unknown reason %02x on CPU %d.\n",
- reason, smp_processor_id());
- printk("Dazed and confused, but trying to continue\n");
- printk("Do you have a strange power saving mode enabled?\n");
-}
-
-static DEFINE_SPINLOCK(nmi_print_lock);
-
-void die_nmi (struct pt_regs *regs, const char *msg)
-{
- if (notify_die(DIE_NMIWATCHDOG, msg, regs, 0, 0, SIGINT) ==
- NOTIFY_STOP)
- return;
-
- spin_lock(&nmi_print_lock);
- /*
- * We are in trouble anyway, lets at least try
- * to get a message out.
- */
- bust_spinlocks(1);
- printk(msg);
- printk(" on CPU%d, eip %08lx, registers:\n",
- smp_processor_id(), regs->eip);
- show_registers(regs);
- printk("console shuts up ...\n");
- console_silent();
- spin_unlock(&nmi_print_lock);
- bust_spinlocks(0);
-
- /* If we are in kernel we are probably nested up pretty bad
- * and might aswell get out now while we still can.
- */
- if (!user_mode(regs)) {
- current->thread.trap_no = 2;
- crash_kexec(regs);
- }
-
- do_exit(SIGSEGV);
-}
-
-static void default_do_nmi(struct pt_regs * regs)
-{
- unsigned char reason = 0;
-
- /* Only the BSP gets external NMIs from the system. */
- if (!smp_processor_id())
- reason = get_nmi_reason();
-
- if (!(reason & 0xc0)) {
- if (notify_die(DIE_NMI_IPI, "nmi_ipi", regs, reason, 0, SIGINT)
- == NOTIFY_STOP)
- return;
-#ifdef CONFIG_X86_LOCAL_APIC
- /*
- * Ok, so this is none of the documented NMI sources,
- * so it must be the NMI watchdog.
- */
- if (nmi_watchdog) {
- nmi_watchdog_tick(regs);
- return;
- }
-#endif
- unknown_nmi_error(reason, regs);
- return;
- }
- if (notify_die(DIE_NMI, "nmi", regs, reason, 0, SIGINT) == NOTIFY_STOP)
- return;
- if (reason & 0x80)
- mem_parity_error(reason, regs);
- if (reason & 0x40)
- io_check_error(reason, regs);
- /*
- * Reassert NMI in case it became active meanwhile
- * as it's edge-triggered.
- */
- reassert_nmi();
-}
-
-static int dummy_nmi_callback(struct pt_regs * regs, int cpu)
-{
- return 0;
-}
-
-static nmi_callback_t nmi_callback = dummy_nmi_callback;
-
-fastcall void do_nmi(struct pt_regs * regs, long error_code)
-{
- int cpu;
-
- nmi_enter();
-
- cpu = smp_processor_id();
-
-#ifdef CONFIG_HOTPLUG_CPU
- if (!cpu_online(cpu)) {
- nmi_exit();
- return;
- }
-#endif
-
- ++nmi_count(cpu);
-
- if (!rcu_dereference(nmi_callback)(regs, cpu))
- default_do_nmi(regs);
-
- nmi_exit();
-}
-
-void set_nmi_callback(nmi_callback_t callback)
-{
- rcu_assign_pointer(nmi_callback, callback);
-}
-EXPORT_SYMBOL_GPL(set_nmi_callback);
-
-void unset_nmi_callback(void)
-{
- nmi_callback = dummy_nmi_callback;
-}
-EXPORT_SYMBOL_GPL(unset_nmi_callback);
-
-#ifdef CONFIG_KPROBES
-fastcall void __kprobes do_int3(struct pt_regs *regs, long error_code)
-{
- if (notify_die(DIE_INT3, "int3", regs, error_code, 3, SIGTRAP)
- == NOTIFY_STOP)
- return;
- /* This is an interrupt gate, because kprobes wants interrupts
- disabled. Normal trap handlers don't. */
- restore_interrupts(regs);
- do_trap(3, SIGTRAP, "int3", 1, regs, error_code, NULL);
-}
-#endif
-
-/*
- * Our handling of the processor debug registers is non-trivial.
- * We do not clear them on entry and exit from the kernel. Therefore
- * it is possible to get a watchpoint trap here from inside the kernel.
- * However, the code in ./ptrace.c has ensured that the user can
- * only set watchpoints on userspace addresses. Therefore the in-kernel
- * watchpoint trap can only occur in code which is reading/writing
- * from user space. Such code must not hold kernel locks (since it
- * can equally take a page fault), therefore it is safe to call
- * force_sig_info even though that claims and releases locks.
- *
- * Code in ./signal.c ensures that the debug control register
- * is restored before we deliver any signal, and therefore that
- * user code runs with the correct debug control register even though
- * we clear it here.
- *
- * Being careful here means that we don't have to be as careful in a
- * lot of more complicated places (task switching can be a bit lazy
- * about restoring all the debug state, and ptrace doesn't have to
- * find every occurrence of the TF bit that could be saved away even
- * by user code)
- */
-fastcall void __kprobes do_debug(struct pt_regs * regs, long error_code)
-{
- unsigned int condition;
- struct task_struct *tsk = current;
-
- get_debugreg(condition, 6);
-
- if (notify_die(DIE_DEBUG, "debug", regs, condition, error_code,
- SIGTRAP) == NOTIFY_STOP)
- return;
- /* It's safe to allow irq's after DR6 has been saved */
- if (regs->eflags & X86_EFLAGS_IF)
- local_irq_enable();
-
- /* Mask out spurious debug traps due to lazy DR7 setting */
- if (condition & (DR_TRAP0|DR_TRAP1|DR_TRAP2|DR_TRAP3)) {
- if (!tsk->thread.debugreg[7])
- goto clear_dr7;
- }
-
- if (regs->eflags & VM_MASK)
- goto debug_vm86;
-
- /* Save debug status register where ptrace can see it */
- tsk->thread.debugreg[6] = condition;
-
- /*
- * Single-stepping through TF: make sure we ignore any events in
- * kernel space (but re-enable TF when returning to user mode).
- */
- if (condition & DR_STEP) {
- /*
- * We already checked v86 mode above, so we can
- * check for kernel mode by just checking the CPL
- * of CS.
- */
- if (!user_mode(regs))
- goto clear_TF_reenable;
- }
-
- /* Ok, finally something we can handle */
- send_sigtrap(tsk, regs, error_code);
-
- /* Disable additional traps. They'll be re-enabled when
- * the signal is delivered.
- */
-clear_dr7:
- set_debugreg(0, 7);
- return;
-
-debug_vm86:
- handle_vm86_trap((struct kernel_vm86_regs *) regs, error_code, 1);
- return;
-
-clear_TF_reenable:
- set_tsk_thread_flag(tsk, TIF_SINGLESTEP);
- regs->eflags &= ~TF_MASK;
- return;
-}
-
-/*
- * Note that we play around with the 'TS' bit in an attempt to get
- * the correct behaviour even in the presence of the asynchronous
- * IRQ13 behaviour
- */
-void math_error(void __user *eip)
-{
- struct task_struct * task;
- siginfo_t info;
- unsigned short cwd, swd;
-
- /*
- * Save the info for the exception handler and clear the error.
- */
- task = current;
- save_init_fpu(task);
- task->thread.trap_no = 16;
- task->thread.error_code = 0;
- info.si_signo = SIGFPE;
- info.si_errno = 0;
- info.si_code = __SI_FAULT;
- info.si_addr = eip;
- /*
- * (~cwd & swd) will mask out exceptions that are not set to unmasked
- * status. 0x3f is the exception bits in these regs, 0x200 is the
- * C1 reg you need in case of a stack fault, 0x040 is the stack
- * fault bit. We should only be taking one exception at a time,
- * so if this combination doesn't produce any single exception,
- * then we have a bad program that isn't syncronizing its FPU usage
- * and it will suffer the consequences since we won't be able to
- * fully reproduce the context of the exception
- */
- cwd = get_fpu_cwd(task);
- swd = get_fpu_swd(task);
- switch (swd & ~cwd & 0x3f) {
- case 0x000: /* No unmasked exception */
- return;
- default: /* Multiple exceptions */
- break;
- case 0x001: /* Invalid Op */
- /*
- * swd & 0x240 == 0x040: Stack Underflow
- * swd & 0x240 == 0x240: Stack Overflow
- * User must clear the SF bit (0x40) if set
- */
- info.si_code = FPE_FLTINV;
- break;
- case 0x002: /* Denormalize */
- case 0x010: /* Underflow */
- info.si_code = FPE_FLTUND;
- break;
- case 0x004: /* Zero Divide */
- info.si_code = FPE_FLTDIV;
- break;
- case 0x008: /* Overflow */
- info.si_code = FPE_FLTOVF;
- break;
- case 0x020: /* Precision */
- info.si_code = FPE_FLTRES;
- break;
- }
- force_sig_info(SIGFPE, &info, task);
-}
-
-fastcall void do_coprocessor_error(struct pt_regs * regs, long error_code)
-{
- ignore_fpu_irq = 1;
- math_error((void __user *)regs->eip);
-}
-
-static void simd_math_error(void __user *eip)
-{
- struct task_struct * task;
- siginfo_t info;
- unsigned short mxcsr;
-
- /*
- * Save the info for the exception handler and clear the error.
- */
- task = current;
- save_init_fpu(task);
- task->thread.trap_no = 19;
- task->thread.error_code = 0;
- info.si_signo = SIGFPE;
- info.si_errno = 0;
- info.si_code = __SI_FAULT;
- info.si_addr = eip;
- /*
- * The SIMD FPU exceptions are handled a little differently, as there
- * is only a single status/control register. Thus, to determine which
- * unmasked exception was caught we must mask the exception mask bits
- * at 0x1f80, and then use these to mask the exception bits at 0x3f.
- */
- mxcsr = get_fpu_mxcsr(task);
- switch (~((mxcsr & 0x1f80) >> 7) & (mxcsr & 0x3f)) {
- case 0x000:
- default:
- break;
- case 0x001: /* Invalid Op */
- info.si_code = FPE_FLTINV;
- break;
- case 0x002: /* Denormalize */
- case 0x010: /* Underflow */
- info.si_code = FPE_FLTUND;
- break;
- case 0x004: /* Zero Divide */
- info.si_code = FPE_FLTDIV;
- break;
- case 0x008: /* Overflow */
- info.si_code = FPE_FLTOVF;
- break;
- case 0x020: /* Precision */
- info.si_code = FPE_FLTRES;
- break;
- }
- force_sig_info(SIGFPE, &info, task);
-}
-
-fastcall void do_simd_coprocessor_error(struct pt_regs * regs,
- long error_code)
-{
- if (cpu_has_xmm) {
- /* Handle SIMD FPU exceptions on PIII+ processors. */
- ignore_fpu_irq = 1;
- simd_math_error((void __user *)regs->eip);
- } else {
- /*
- * Handle strange cache flush from user space exception
- * in all other cases. This is undocumented behaviour.
- */
- if (regs->eflags & VM_MASK) {
- handle_vm86_fault((struct kernel_vm86_regs *)regs,
- error_code);
- return;
- }
- current->thread.trap_no = 19;
- current->thread.error_code = error_code;
- die_if_kernel("cache flush denied", regs, error_code);
- force_sig(SIGSEGV, current);
- }
-}
-
-fastcall void do_spurious_interrupt_bug(struct pt_regs * regs,
- long error_code)
-{
-#if 0
- /* No need to warn about this any longer. */
- printk("Ignoring P6 Local APIC Spurious Interrupt Bug...\n");
-#endif
-}
-
-fastcall void setup_x86_bogus_stack(unsigned char * stk)
-{
- unsigned long *switch16_ptr, *switch32_ptr;
- struct pt_regs *regs;
- unsigned long stack_top, stack_bot;
- unsigned short iret_frame16_off;
- int cpu = smp_processor_id();
- /* reserve the space on 32bit stack for the magic switch16 pointer */
- memmove(stk, stk + 8, sizeof(struct pt_regs));
- switch16_ptr = (unsigned long *)(stk + sizeof(struct pt_regs));
- regs = (struct pt_regs *)stk;
- /* now the switch32 on 16bit stack */
- stack_bot = (unsigned long)&per_cpu(cpu_16bit_stack, cpu);
- stack_top = stack_bot + CPU_16BIT_STACK_SIZE;
- switch32_ptr = (unsigned long *)(stack_top - 8);
- iret_frame16_off = CPU_16BIT_STACK_SIZE - 8 - 20;
- /* copy iret frame on 16bit stack */
- memcpy((void *)(stack_bot + iret_frame16_off), &regs->eip, 20);
- /* fill in the switch pointers */
- switch16_ptr[0] = (regs->esp & 0xffff0000) | iret_frame16_off;
- switch16_ptr[1] = __ESPFIX_SS;
- switch32_ptr[0] = (unsigned long)stk + sizeof(struct pt_regs) +
- 8 - CPU_16BIT_STACK_SIZE;
- switch32_ptr[1] = __KERNEL_DS;
-}
-
-fastcall unsigned char * fixup_x86_bogus_stack(unsigned short sp)
-{
- unsigned long *switch32_ptr;
- unsigned char *stack16, *stack32;
- unsigned long stack_top, stack_bot;
- int len;
- int cpu = smp_processor_id();
- stack_bot = (unsigned long)&per_cpu(cpu_16bit_stack, cpu);
- stack_top = stack_bot + CPU_16BIT_STACK_SIZE;
- switch32_ptr = (unsigned long *)(stack_top - 8);
- /* copy the data from 16bit stack to 32bit stack */
- len = CPU_16BIT_STACK_SIZE - 8 - sp;
- stack16 = (unsigned char *)(stack_bot + sp);
- stack32 = (unsigned char *)
- (switch32_ptr[0] + CPU_16BIT_STACK_SIZE - 8 - len);
- memcpy(stack32, stack16, len);
- return stack32;
-}
-
-/*
- * 'math_state_restore()' saves the current math information in the
- * old math state array, and gets the new ones from the current task
- *
- * Careful.. There are problems with IBM-designed IRQ13 behaviour.
- * Don't touch unless you *really* know how it works.
- *
- * Must be called with kernel preemption disabled (in this case,
- * local interrupts are disabled at the call-site in entry.S).
- */
-asmlinkage void math_state_restore(struct pt_regs regs)
-{
- struct thread_info *thread = current_thread_info();
- struct task_struct *tsk = thread->task;
-
- clts(); /* Allow maths ops (or we recurse) */
- if (!tsk_used_math(tsk))
- init_fpu(tsk);
- restore_fpu(tsk);
- thread->status |= TS_USEDFPU; /* So we fnsave on switch_to() */
-}
-
-#ifndef CONFIG_MATH_EMULATION
-
-asmlinkage void math_emulate(long arg)
-{
- printk("math-emulation not enabled and no coprocessor found.\n");
- printk("killing %s.\n",current->comm);
- force_sig(SIGFPE,current);
- schedule();
-}
-
-#endif /* CONFIG_MATH_EMULATION */
-
-#ifdef CONFIG_X86_F00F_BUG
-void __init trap_init_f00f_bug(void)
-{
- __set_fixmap(FIX_F00F_IDT, __pa(&idt_table), PAGE_KERNEL_RO);
-
- /*
- * Update the IDT descriptor and reload the IDT so that
- * it uses the read-only mapped virtual address.
- */
- idt_descr.address = fix_to_virt(FIX_F00F_IDT);
- load_idt(&idt_descr);
-}
-#endif
-
-#define _set_gate(gate_addr,type,dpl,addr,seg) \
-do { \
- int __d0, __d1; \
- __asm__ __volatile__ ("movw %%dx,%%ax\n\t" \
- "movw %4,%%dx\n\t" \
- "movl %%eax,%0\n\t" \
- "movl %%edx,%1" \
- :"=m" (*((long *) (gate_addr))), \
- "=m" (*(1+(long *) (gate_addr))), "=&a" (__d0), "=&d" (__d1) \
- :"i" ((short) (0x8000+(dpl<<13)+(type<<8))), \
- "3" ((char *) (addr)),"2" ((seg) << 16)); \
-} while (0)
-
-
-/*
- * This needs to use 'idt_table' rather than 'idt', and
- * thus use the _nonmapped_ version of the IDT, as the
- * Pentium F0 0F bugfix can have resulted in the mapped
- * IDT being write-protected.
- */
-void set_intr_gate(unsigned int n, void *addr)
-{
- _set_gate(idt_table+n,14,0,addr,__KERNEL_CS);
-}
-
-/*
- * This routine sets up an interrupt gate at directory privilege level 3.
- */
-static inline void set_system_intr_gate(unsigned int n, void *addr)
-{
- _set_gate(idt_table+n, 14, 3, addr, __KERNEL_CS);
-}
-
-static void __init set_trap_gate(unsigned int n, void *addr)
-{
- _set_gate(idt_table+n,15,0,addr,__KERNEL_CS);
-}
-
-static void __init set_system_gate(unsigned int n, void *addr)
-{
- _set_gate(idt_table+n,15,3,addr,__KERNEL_CS);
-}
-
-static void __init set_task_gate(unsigned int n, unsigned int gdt_entry)
-{
- _set_gate(idt_table+n,5,0,0,(gdt_entry<<3));
-}
-
-
-void __init trap_init(void)
-{
-#ifdef CONFIG_EISA
- void __iomem *p = ioremap(0x0FFFD9, 4);
- if (readl(p) == 'E'+('I'<<8)+('S'<<16)+('A'<<24)) {
- EISA_bus = 1;
- }
- iounmap(p);
-#endif
-
-#ifdef CONFIG_X86_LOCAL_APIC
- init_apic_mappings();
-#endif
-
- set_trap_gate(0,&divide_error);
- set_intr_gate(1,&debug);
- set_intr_gate(2,&nmi);
- set_system_intr_gate(3, &int3); /* int3-5 can be called from all */
- set_system_gate(4,&overflow);
- set_system_gate(5,&bounds);
- set_trap_gate(6,&invalid_op);
- set_trap_gate(7,&device_not_available);
- set_task_gate(8,GDT_ENTRY_DOUBLEFAULT_TSS);
- set_trap_gate(9,&coprocessor_segment_overrun);
- set_trap_gate(10,&invalid_TSS);
- set_trap_gate(11,&segment_not_present);
- set_trap_gate(12,&stack_segment);
- set_trap_gate(13,&general_protection);
- set_intr_gate(14,&page_fault);
- set_trap_gate(15,&spurious_interrupt_bug);
- set_trap_gate(16,&coprocessor_error);
- set_trap_gate(17,&alignment_check);
-#ifdef CONFIG_X86_MCE
- set_trap_gate(18,&machine_check);
-#endif
- set_trap_gate(19,&simd_coprocessor_error);
-
- set_system_gate(SYSCALL_VECTOR,&system_call);
-
- /*
- * Should be a barrier for any external CPU state.
- */
- cpu_init();
-
- trap_init_hook();
-}
-
-static int __init kstack_setup(char *s)
-{
- kstack_depth_to_print = simple_strtoul(s, NULL, 0);
- return 0;
-}
-__setup("kstack=", kstack_setup);
diff --git a/arch/i386/kernel/vm86.c b/arch/i386/kernel/vm86.c
deleted file mode 100644
index fc1993564f9..00000000000
--- a/arch/i386/kernel/vm86.c
+++ /dev/null
@@ -1,808 +0,0 @@
-/*
- * linux/kernel/vm86.c
- *
- * Copyright (C) 1994 Linus Torvalds
- *
- * 29 dec 2001 - Fixed oopses caused by unchecked access to the vm86
- * stack - Manfred Spraul <manfreds@colorfullife.com>
- *
- * 22 mar 2002 - Manfred detected the stackfaults, but didn't handle
- * them correctly. Now the emulation will be in a
- * consistent state after stackfaults - Kasper Dupont
- * <kasperd@daimi.au.dk>
- *
- * 22 mar 2002 - Added missing clear_IF in set_vflags_* Kasper Dupont
- * <kasperd@daimi.au.dk>
- *
- * ?? ??? 2002 - Fixed premature returns from handle_vm86_fault
- * caused by Kasper Dupont's changes - Stas Sergeev
- *
- * 4 apr 2002 - Fixed CHECK_IF_IN_TRAP broken by Stas' changes.
- * Kasper Dupont <kasperd@daimi.au.dk>
- *
- * 9 apr 2002 - Changed syntax of macros in handle_vm86_fault.
- * Kasper Dupont <kasperd@daimi.au.dk>
- *
- * 9 apr 2002 - Changed stack access macros to jump to a label
- * instead of returning to userspace. This simplifies
- * do_int, and is needed by handle_vm6_fault. Kasper
- * Dupont <kasperd@daimi.au.dk>
- *
- */
-
-#include <linux/config.h>
-#include <linux/errno.h>
-#include <linux/interrupt.h>
-#include <linux/sched.h>
-#include <linux/kernel.h>
-#include <linux/signal.h>
-#include <linux/string.h>
-#include <linux/mm.h>
-#include <linux/smp.h>
-#include <linux/smp_lock.h>
-#include <linux/highmem.h>
-#include <linux/ptrace.h>
-
-#include <asm/uaccess.h>
-#include <asm/io.h>
-#include <asm/tlbflush.h>
-#include <asm/irq.h>
-
-/*
- * Known problems:
- *
- * Interrupt handling is not guaranteed:
- * - a real x86 will disable all interrupts for one instruction
- * after a "mov ss,xx" to make stack handling atomic even without
- * the 'lss' instruction. We can't guarantee this in v86 mode,
- * as the next instruction might result in a page fault or similar.
- * - a real x86 will have interrupts disabled for one instruction
- * past the 'sti' that enables them. We don't bother with all the
- * details yet.
- *
- * Let's hope these problems do not actually matter for anything.
- */
-
-
-#define KVM86 ((struct kernel_vm86_struct *)regs)
-#define VMPI KVM86->vm86plus
-
-
-/*
- * 8- and 16-bit register defines..
- */
-#define AL(regs) (((unsigned char *)&((regs)->eax))[0])
-#define AH(regs) (((unsigned char *)&((regs)->eax))[1])
-#define IP(regs) (*(unsigned short *)&((regs)->eip))
-#define SP(regs) (*(unsigned short *)&((regs)->esp))
-
-/*
- * virtual flags (16 and 32-bit versions)
- */
-#define VFLAGS (*(unsigned short *)&(current->thread.v86flags))
-#define VEFLAGS (current->thread.v86flags)
-
-#define set_flags(X,new,mask) \
-((X) = ((X) & ~(mask)) | ((new) & (mask)))
-
-#define SAFE_MASK (0xDD5)
-#define RETURN_MASK (0xDFF)
-
-#define VM86_REGS_PART2 orig_eax
-#define VM86_REGS_SIZE1 \
- ( (unsigned)( & (((struct kernel_vm86_regs *)0)->VM86_REGS_PART2) ) )
-#define VM86_REGS_SIZE2 (sizeof(struct kernel_vm86_regs) - VM86_REGS_SIZE1)
-
-struct pt_regs * FASTCALL(save_v86_state(struct kernel_vm86_regs * regs));
-struct pt_regs * fastcall save_v86_state(struct kernel_vm86_regs * regs)
-{
- struct tss_struct *tss;
- struct pt_regs *ret;
- unsigned long tmp;
-
- /*
- * This gets called from entry.S with interrupts disabled, but
- * from process context. Enable interrupts here, before trying
- * to access user space.
- */
- local_irq_enable();
-
- if (!current->thread.vm86_info) {
- printk("no vm86_info: BAD\n");
- do_exit(SIGSEGV);
- }
- set_flags(regs->eflags, VEFLAGS, VIF_MASK | current->thread.v86mask);
- tmp = copy_to_user(&current->thread.vm86_info->regs,regs, VM86_REGS_SIZE1);
- tmp += copy_to_user(&current->thread.vm86_info->regs.VM86_REGS_PART2,
- &regs->VM86_REGS_PART2, VM86_REGS_SIZE2);
- tmp += put_user(current->thread.screen_bitmap,&current->thread.vm86_info->screen_bitmap);
- if (tmp) {
- printk("vm86: could not access userspace vm86_info\n");
- do_exit(SIGSEGV);
- }
-
- tss = &per_cpu(init_tss, get_cpu());
- current->thread.esp0 = current->thread.saved_esp0;
- current->thread.sysenter_cs = __KERNEL_CS;
- load_esp0(tss, &current->thread);
- current->thread.saved_esp0 = 0;
- put_cpu();
-
- loadsegment(fs, current->thread.saved_fs);
- loadsegment(gs, current->thread.saved_gs);
- ret = KVM86->regs32;
- return ret;
-}
-
-static void mark_screen_rdonly(struct mm_struct *mm)
-{
- pgd_t *pgd;
- pud_t *pud;
- pmd_t *pmd;
- pte_t *pte;
- spinlock_t *ptl;
- int i;
-
- pgd = pgd_offset(mm, 0xA0000);
- if (pgd_none_or_clear_bad(pgd))
- goto out;
- pud = pud_offset(pgd, 0xA0000);
- if (pud_none_or_clear_bad(pud))
- goto out;
- pmd = pmd_offset(pud, 0xA0000);
- if (pmd_none_or_clear_bad(pmd))
- goto out;
- pte = pte_offset_map_lock(mm, pmd, 0xA0000, &ptl);
- for (i = 0; i < 32; i++) {
- if (pte_present(*pte))
- set_pte(pte, pte_wrprotect(*pte));
- pte++;
- }
- pte_unmap_unlock(pte, ptl);
-out:
- flush_tlb();
-}
-
-
-
-static int do_vm86_irq_handling(int subfunction, int irqnumber);
-static void do_sys_vm86(struct kernel_vm86_struct *info, struct task_struct *tsk);
-
-asmlinkage int sys_vm86old(struct pt_regs regs)
-{
- struct vm86_struct __user *v86 = (struct vm86_struct __user *)regs.ebx;
- struct kernel_vm86_struct info; /* declare this _on top_,
- * this avoids wasting of stack space.
- * This remains on the stack until we
- * return to 32 bit user space.
- */
- struct task_struct *tsk;
- int tmp, ret = -EPERM;
-
- tsk = current;
- if (tsk->thread.saved_esp0)
- goto out;
- tmp = copy_from_user(&info, v86, VM86_REGS_SIZE1);
- tmp += copy_from_user(&info.regs.VM86_REGS_PART2, &v86->regs.VM86_REGS_PART2,
- (long)&info.vm86plus - (long)&info.regs.VM86_REGS_PART2);
- ret = -EFAULT;
- if (tmp)
- goto out;
- memset(&info.vm86plus, 0, (int)&info.regs32 - (int)&info.vm86plus);
- info.regs32 = &regs;
- tsk->thread.vm86_info = v86;
- do_sys_vm86(&info, tsk);
- ret = 0; /* we never return here */
-out:
- return ret;
-}
-
-
-asmlinkage int sys_vm86(struct pt_regs regs)
-{
- struct kernel_vm86_struct info; /* declare this _on top_,
- * this avoids wasting of stack space.
- * This remains on the stack until we
- * return to 32 bit user space.
- */
- struct task_struct *tsk;
- int tmp, ret;
- struct vm86plus_struct __user *v86;
-
- tsk = current;
- switch (regs.ebx) {
- case VM86_REQUEST_IRQ:
- case VM86_FREE_IRQ:
- case VM86_GET_IRQ_BITS:
- case VM86_GET_AND_RESET_IRQ:
- ret = do_vm86_irq_handling(regs.ebx, (int)regs.ecx);
- goto out;
- case VM86_PLUS_INSTALL_CHECK:
- /* NOTE: on old vm86 stuff this will return the error
- from access_ok(), because the subfunction is
- interpreted as (invalid) address to vm86_struct.
- So the installation check works.
- */
- ret = 0;
- goto out;
- }
-
- /* we come here only for functions VM86_ENTER, VM86_ENTER_NO_BYPASS */
- ret = -EPERM;
- if (tsk->thread.saved_esp0)
- goto out;
- v86 = (struct vm86plus_struct __user *)regs.ecx;
- tmp = copy_from_user(&info, v86, VM86_REGS_SIZE1);
- tmp += copy_from_user(&info.regs.VM86_REGS_PART2, &v86->regs.VM86_REGS_PART2,
- (long)&info.regs32 - (long)&info.regs.VM86_REGS_PART2);
- ret = -EFAULT;
- if (tmp)
- goto out;
- info.regs32 = &regs;
- info.vm86plus.is_vm86pus = 1;
- tsk->thread.vm86_info = (struct vm86_struct __user *)v86;
- do_sys_vm86(&info, tsk);
- ret = 0; /* we never return here */
-out:
- return ret;
-}
-
-
-static void do_sys_vm86(struct kernel_vm86_struct *info, struct task_struct *tsk)
-{
- struct tss_struct *tss;
-/*
- * make sure the vm86() system call doesn't try to do anything silly
- */
- info->regs.__null_ds = 0;
- info->regs.__null_es = 0;
-
-/* we are clearing fs,gs later just before "jmp resume_userspace",
- * because starting with Linux 2.1.x they aren't no longer saved/restored
- */
-
-/*
- * The eflags register is also special: we cannot trust that the user
- * has set it up safely, so this makes sure interrupt etc flags are
- * inherited from protected mode.
- */
- VEFLAGS = info->regs.eflags;
- info->regs.eflags &= SAFE_MASK;
- info->regs.eflags |= info->regs32->eflags & ~SAFE_MASK;
- info->regs.eflags |= VM_MASK;
-
- switch (info->cpu_type) {
- case CPU_286:
- tsk->thread.v86mask = 0;
- break;
- case CPU_386:
- tsk->thread.v86mask = NT_MASK | IOPL_MASK;
- break;
- case CPU_486:
- tsk->thread.v86mask = AC_MASK | NT_MASK | IOPL_MASK;
- break;
- default:
- tsk->thread.v86mask = ID_MASK | AC_MASK | NT_MASK | IOPL_MASK;
- break;
- }
-
-/*
- * Save old state, set default return value (%eax) to 0
- */
- info->regs32->eax = 0;
- tsk->thread.saved_esp0 = tsk->thread.esp0;
- savesegment(fs, tsk->thread.saved_fs);
- savesegment(gs, tsk->thread.saved_gs);
-
- tss = &per_cpu(init_tss, get_cpu());
- tsk->thread.esp0 = (unsigned long) &info->VM86_TSS_ESP0;
- if (cpu_has_sep)
- tsk->thread.sysenter_cs = 0;
- load_esp0(tss, &tsk->thread);
- put_cpu();
-
- tsk->thread.screen_bitmap = info->screen_bitmap;
- if (info->flags & VM86_SCREEN_BITMAP)
- mark_screen_rdonly(tsk->mm);
- __asm__ __volatile__(
- "xorl %%eax,%%eax; movl %%eax,%%fs; movl %%eax,%%gs\n\t"
- "movl %0,%%esp\n\t"
- "movl %1,%%ebp\n\t"
- "jmp resume_userspace"
- : /* no outputs */
- :"r" (&info->regs), "r" (tsk->thread_info) : "ax");
- /* we never return here */
-}
-
-static inline void return_to_32bit(struct kernel_vm86_regs * regs16, int retval)
-{
- struct pt_regs * regs32;
-
- regs32 = save_v86_state(regs16);
- regs32->eax = retval;
- __asm__ __volatile__("movl %0,%%esp\n\t"
- "movl %1,%%ebp\n\t"
- "jmp resume_userspace"
- : : "r" (regs32), "r" (current_thread_info()));
-}
-
-static inline void set_IF(struct kernel_vm86_regs * regs)
-{
- VEFLAGS |= VIF_MASK;
- if (VEFLAGS & VIP_MASK)
- return_to_32bit(regs, VM86_STI);
-}
-
-static inline void clear_IF(struct kernel_vm86_regs * regs)
-{
- VEFLAGS &= ~VIF_MASK;
-}
-
-static inline void clear_TF(struct kernel_vm86_regs * regs)
-{
- regs->eflags &= ~TF_MASK;
-}
-
-static inline void clear_AC(struct kernel_vm86_regs * regs)
-{
- regs->eflags &= ~AC_MASK;
-}
-
-/* It is correct to call set_IF(regs) from the set_vflags_*
- * functions. However someone forgot to call clear_IF(regs)
- * in the opposite case.
- * After the command sequence CLI PUSHF STI POPF you should
- * end up with interrups disabled, but you ended up with
- * interrupts enabled.
- * ( I was testing my own changes, but the only bug I
- * could find was in a function I had not changed. )
- * [KD]
- */
-
-static inline void set_vflags_long(unsigned long eflags, struct kernel_vm86_regs * regs)
-{
- set_flags(VEFLAGS, eflags, current->thread.v86mask);
- set_flags(regs->eflags, eflags, SAFE_MASK);
- if (eflags & IF_MASK)
- set_IF(regs);
- else
- clear_IF(regs);
-}
-
-static inline void set_vflags_short(unsigned short flags, struct kernel_vm86_regs * regs)
-{
- set_flags(VFLAGS, flags, current->thread.v86mask);
- set_flags(regs->eflags, flags, SAFE_MASK);
- if (flags & IF_MASK)
- set_IF(regs);
- else
- clear_IF(regs);
-}
-
-static inline unsigned long get_vflags(struct kernel_vm86_regs * regs)
-{
- unsigned long flags = regs->eflags & RETURN_MASK;
-
- if (VEFLAGS & VIF_MASK)
- flags |= IF_MASK;
- flags |= IOPL_MASK;
- return flags | (VEFLAGS & current->thread.v86mask);
-}
-
-static inline int is_revectored(int nr, struct revectored_struct * bitmap)
-{
- __asm__ __volatile__("btl %2,%1\n\tsbbl %0,%0"
- :"=r" (nr)
- :"m" (*bitmap),"r" (nr));
- return nr;
-}
-
-#define val_byte(val, n) (((__u8 *)&val)[n])
-
-#define pushb(base, ptr, val, err_label) \
- do { \
- __u8 __val = val; \
- ptr--; \
- if (put_user(__val, base + ptr) < 0) \
- goto err_label; \
- } while(0)
-
-#define pushw(base, ptr, val, err_label) \
- do { \
- __u16 __val = val; \
- ptr--; \
- if (put_user(val_byte(__val, 1), base + ptr) < 0) \
- goto err_label; \
- ptr--; \
- if (put_user(val_byte(__val, 0), base + ptr) < 0) \
- goto err_label; \
- } while(0)
-
-#define pushl(base, ptr, val, err_label) \
- do { \
- __u32 __val = val; \
- ptr--; \
- if (put_user(val_byte(__val, 3), base + ptr) < 0) \
- goto err_label; \
- ptr--; \
- if (put_user(val_byte(__val, 2), base + ptr) < 0) \
- goto err_label; \
- ptr--; \
- if (put_user(val_byte(__val, 1), base + ptr) < 0) \
- goto err_label; \
- ptr--; \
- if (put_user(val_byte(__val, 0), base + ptr) < 0) \
- goto err_label; \
- } while(0)
-
-#define popb(base, ptr, err_label) \
- ({ \
- __u8 __res; \
- if (get_user(__res, base + ptr) < 0) \
- goto err_label; \
- ptr++; \
- __res; \
- })
-
-#define popw(base, ptr, err_label) \
- ({ \
- __u16 __res; \
- if (get_user(val_byte(__res, 0), base + ptr) < 0) \
- goto err_label; \
- ptr++; \
- if (get_user(val_byte(__res, 1), base + ptr) < 0) \
- goto err_label; \
- ptr++; \
- __res; \
- })
-
-#define popl(base, ptr, err_label) \
- ({ \
- __u32 __res; \
- if (get_user(val_byte(__res, 0), base + ptr) < 0) \
- goto err_label; \
- ptr++; \
- if (get_user(val_byte(__res, 1), base + ptr) < 0) \
- goto err_label; \
- ptr++; \
- if (get_user(val_byte(__res, 2), base + ptr) < 0) \
- goto err_label; \
- ptr++; \
- if (get_user(val_byte(__res, 3), base + ptr) < 0) \
- goto err_label; \
- ptr++; \
- __res; \
- })
-
-/* There are so many possible reasons for this function to return
- * VM86_INTx, so adding another doesn't bother me. We can expect
- * userspace programs to be able to handle it. (Getting a problem
- * in userspace is always better than an Oops anyway.) [KD]
- */
-static void do_int(struct kernel_vm86_regs *regs, int i,
- unsigned char __user * ssp, unsigned short sp)
-{
- unsigned long __user *intr_ptr;
- unsigned long segoffs;
-
- if (regs->cs == BIOSSEG)
- goto cannot_handle;
- if (is_revectored(i, &KVM86->int_revectored))
- goto cannot_handle;
- if (i==0x21 && is_revectored(AH(regs),&KVM86->int21_revectored))
- goto cannot_handle;
- intr_ptr = (unsigned long __user *) (i << 2);
- if (get_user(segoffs, intr_ptr))
- goto cannot_handle;
- if ((segoffs >> 16) == BIOSSEG)
- goto cannot_handle;
- pushw(ssp, sp, get_vflags(regs), cannot_handle);
- pushw(ssp, sp, regs->cs, cannot_handle);
- pushw(ssp, sp, IP(regs), cannot_handle);
- regs->cs = segoffs >> 16;
- SP(regs) -= 6;
- IP(regs) = segoffs & 0xffff;
- clear_TF(regs);
- clear_IF(regs);
- clear_AC(regs);
- return;
-
-cannot_handle:
- return_to_32bit(regs, VM86_INTx + (i << 8));
-}
-
-int handle_vm86_trap(struct kernel_vm86_regs * regs, long error_code, int trapno)
-{
- if (VMPI.is_vm86pus) {
- if ( (trapno==3) || (trapno==1) )
- return_to_32bit(regs, VM86_TRAP + (trapno << 8));
- do_int(regs, trapno, (unsigned char __user *) (regs->ss << 4), SP(regs));
- return 0;
- }
- if (trapno !=1)
- return 1; /* we let this handle by the calling routine */
- if (current->ptrace & PT_PTRACED) {
- unsigned long flags;
- spin_lock_irqsave(&current->sighand->siglock, flags);
- sigdelset(&current->blocked, SIGTRAP);
- recalc_sigpending();
- spin_unlock_irqrestore(&current->sighand->siglock, flags);
- }
- send_sig(SIGTRAP, current, 1);
- current->thread.trap_no = trapno;
- current->thread.error_code = error_code;
- return 0;
-}
-
-void handle_vm86_fault(struct kernel_vm86_regs * regs, long error_code)
-{
- unsigned char opcode;
- unsigned char __user *csp;
- unsigned char __user *ssp;
- unsigned short ip, sp, orig_flags;
- int data32, pref_done;
-
-#define CHECK_IF_IN_TRAP \
- if (VMPI.vm86dbg_active && VMPI.vm86dbg_TFpendig) \
- newflags |= TF_MASK
-#define VM86_FAULT_RETURN do { \
- if (VMPI.force_return_for_pic && (VEFLAGS & (IF_MASK | VIF_MASK))) \
- return_to_32bit(regs, VM86_PICRETURN); \
- if (orig_flags & TF_MASK) \
- handle_vm86_trap(regs, 0, 1); \
- return; } while (0)
-
- orig_flags = *(unsigned short *)&regs->eflags;
-
- csp = (unsigned char __user *) (regs->cs << 4);
- ssp = (unsigned char __user *) (regs->ss << 4);
- sp = SP(regs);
- ip = IP(regs);
-
- data32 = 0;
- pref_done = 0;
- do {
- switch (opcode = popb(csp, ip, simulate_sigsegv)) {
- case 0x66: /* 32-bit data */ data32=1; break;
- case 0x67: /* 32-bit address */ break;
- case 0x2e: /* CS */ break;
- case 0x3e: /* DS */ break;
- case 0x26: /* ES */ break;
- case 0x36: /* SS */ break;
- case 0x65: /* GS */ break;
- case 0x64: /* FS */ break;
- case 0xf2: /* repnz */ break;
- case 0xf3: /* rep */ break;
- default: pref_done = 1;
- }
- } while (!pref_done);
-
- switch (opcode) {
-
- /* pushf */
- case 0x9c:
- if (data32) {
- pushl(ssp, sp, get_vflags(regs), simulate_sigsegv);
- SP(regs) -= 4;
- } else {
- pushw(ssp, sp, get_vflags(regs), simulate_sigsegv);
- SP(regs) -= 2;
- }
- IP(regs) = ip;
- VM86_FAULT_RETURN;
-
- /* popf */
- case 0x9d:
- {
- unsigned long newflags;
- if (data32) {
- newflags=popl(ssp, sp, simulate_sigsegv);
- SP(regs) += 4;
- } else {
- newflags = popw(ssp, sp, simulate_sigsegv);
- SP(regs) += 2;
- }
- IP(regs) = ip;
- CHECK_IF_IN_TRAP;
- if (data32) {
- set_vflags_long(newflags, regs);
- } else {
- set_vflags_short(newflags, regs);
- }
- VM86_FAULT_RETURN;
- }
-
- /* int xx */
- case 0xcd: {
- int intno=popb(csp, ip, simulate_sigsegv);
- IP(regs) = ip;
- if (VMPI.vm86dbg_active) {
- if ( (1 << (intno &7)) & VMPI.vm86dbg_intxxtab[intno >> 3] )
- return_to_32bit(regs, VM86_INTx + (intno << 8));
- }
- do_int(regs, intno, ssp, sp);
- return;
- }
-
- /* iret */
- case 0xcf:
- {
- unsigned long newip;
- unsigned long newcs;
- unsigned long newflags;
- if (data32) {
- newip=popl(ssp, sp, simulate_sigsegv);
- newcs=popl(ssp, sp, simulate_sigsegv);
- newflags=popl(ssp, sp, simulate_sigsegv);
- SP(regs) += 12;
- } else {
- newip = popw(ssp, sp, simulate_sigsegv);
- newcs = popw(ssp, sp, simulate_sigsegv);
- newflags = popw(ssp, sp, simulate_sigsegv);
- SP(regs) += 6;
- }
- IP(regs) = newip;
- regs->cs = newcs;
- CHECK_IF_IN_TRAP;
- if (data32) {
- set_vflags_long(newflags, regs);
- } else {
- set_vflags_short(newflags, regs);
- }
- VM86_FAULT_RETURN;
- }
-
- /* cli */
- case 0xfa:
- IP(regs) = ip;
- clear_IF(regs);
- VM86_FAULT_RETURN;
-
- /* sti */
- /*
- * Damn. This is incorrect: the 'sti' instruction should actually
- * enable interrupts after the /next/ instruction. Not good.
- *
- * Probably needs some horsing around with the TF flag. Aiee..
- */
- case 0xfb:
- IP(regs) = ip;
- set_IF(regs);
- VM86_FAULT_RETURN;
-
- default:
- return_to_32bit(regs, VM86_UNKNOWN);
- }
-
- return;
-
-simulate_sigsegv:
- /* FIXME: After a long discussion with Stas we finally
- * agreed, that this is wrong. Here we should
- * really send a SIGSEGV to the user program.
- * But how do we create the correct context? We
- * are inside a general protection fault handler
- * and has just returned from a page fault handler.
- * The correct context for the signal handler
- * should be a mixture of the two, but how do we
- * get the information? [KD]
- */
- return_to_32bit(regs, VM86_UNKNOWN);
-}
-
-/* ---------------- vm86 special IRQ passing stuff ----------------- */
-
-#define VM86_IRQNAME "vm86irq"
-
-static struct vm86_irqs {
- struct task_struct *tsk;
- int sig;
-} vm86_irqs[16];
-
-static DEFINE_SPINLOCK(irqbits_lock);
-static int irqbits;
-
-#define ALLOWED_SIGS ( 1 /* 0 = don't send a signal */ \
- | (1 << SIGUSR1) | (1 << SIGUSR2) | (1 << SIGIO) | (1 << SIGURG) \
- | (1 << SIGUNUSED) )
-
-static irqreturn_t irq_handler(int intno, void *dev_id, struct pt_regs * regs)
-{
- int irq_bit;
- unsigned long flags;
-
- spin_lock_irqsave(&irqbits_lock, flags);
- irq_bit = 1 << intno;
- if ((irqbits & irq_bit) || ! vm86_irqs[intno].tsk)
- goto out;
- irqbits |= irq_bit;
- if (vm86_irqs[intno].sig)
- send_sig(vm86_irqs[intno].sig, vm86_irqs[intno].tsk, 1);
- /*
- * IRQ will be re-enabled when user asks for the irq (whether
- * polling or as a result of the signal)
- */
- disable_irq_nosync(intno);
- spin_unlock_irqrestore(&irqbits_lock, flags);
- return IRQ_HANDLED;
-
-out:
- spin_unlock_irqrestore(&irqbits_lock, flags);
- return IRQ_NONE;
-}
-
-static inline void free_vm86_irq(int irqnumber)
-{
- unsigned long flags;
-
- free_irq(irqnumber, NULL);
- vm86_irqs[irqnumber].tsk = NULL;
-
- spin_lock_irqsave(&irqbits_lock, flags);
- irqbits &= ~(1 << irqnumber);
- spin_unlock_irqrestore(&irqbits_lock, flags);
-}
-
-void release_vm86_irqs(struct task_struct *task)
-{
- int i;
- for (i = FIRST_VM86_IRQ ; i <= LAST_VM86_IRQ; i++)
- if (vm86_irqs[i].tsk == task)
- free_vm86_irq(i);
-}
-
-static inline int get_and_reset_irq(int irqnumber)
-{
- int bit;
- unsigned long flags;
- int ret = 0;
-
- if (invalid_vm86_irq(irqnumber)) return 0;
- if (vm86_irqs[irqnumber].tsk != current) return 0;
- spin_lock_irqsave(&irqbits_lock, flags);
- bit = irqbits & (1 << irqnumber);
- irqbits &= ~bit;
- if (bit) {
- enable_irq(irqnumber);
- ret = 1;
- }
-
- spin_unlock_irqrestore(&irqbits_lock, flags);
- return ret;
-}
-
-
-static int do_vm86_irq_handling(int subfunction, int irqnumber)
-{
- int ret;
- switch (subfunction) {
- case VM86_GET_AND_RESET_IRQ: {
- return get_and_reset_irq(irqnumber);
- }
- case VM86_GET_IRQ_BITS: {
- return irqbits;
- }
- case VM86_REQUEST_IRQ: {
- int sig = irqnumber >> 8;
- int irq = irqnumber & 255;
- if (!capable(CAP_SYS_ADMIN)) return -EPERM;
- if (!((1 << sig) & ALLOWED_SIGS)) return -EPERM;
- if (invalid_vm86_irq(irq)) return -EPERM;
- if (vm86_irqs[irq].tsk) return -EPERM;
- ret = request_irq(irq, &irq_handler, 0, VM86_IRQNAME, NULL);
- if (ret) return ret;
- vm86_irqs[irq].sig = sig;
- vm86_irqs[irq].tsk = current;
- return irq;
- }
- case VM86_FREE_IRQ: {
- if (invalid_vm86_irq(irqnumber)) return -EPERM;
- if (!vm86_irqs[irqnumber].tsk) return 0;
- if (vm86_irqs[irqnumber].tsk != current) return -EPERM;
- free_vm86_irq(irqnumber);
- return 0;
- }
- }
- return -EINVAL;
-}
-
diff --git a/arch/i386/kernel/vmlinux.lds.S b/arch/i386/kernel/vmlinux.lds.S
deleted file mode 100644
index 4710195b6b7..00000000000
--- a/arch/i386/kernel/vmlinux.lds.S
+++ /dev/null
@@ -1,150 +0,0 @@
-/* ld script to make i386 Linux kernel
- * Written by Martin Mares <mj@atrey.karlin.mff.cuni.cz>;
- */
-
-#define LOAD_OFFSET __PAGE_OFFSET
-
-#include <asm-generic/vmlinux.lds.h>
-#include <asm/thread_info.h>
-#include <asm/page.h>
-
-OUTPUT_FORMAT("elf32-i386", "elf32-i386", "elf32-i386")
-OUTPUT_ARCH(i386)
-ENTRY(phys_startup_32)
-jiffies = jiffies_64;
-SECTIONS
-{
- . = __KERNEL_START;
- phys_startup_32 = startup_32 - LOAD_OFFSET;
- /* read-only */
- _text = .; /* Text and read-only data */
- .text : AT(ADDR(.text) - LOAD_OFFSET) {
- *(.text)
- SCHED_TEXT
- LOCK_TEXT
- KPROBES_TEXT
- *(.fixup)
- *(.gnu.warning)
- } = 0x9090
-
- _etext = .; /* End of text section */
-
- . = ALIGN(16); /* Exception table */
- __start___ex_table = .;
- __ex_table : AT(ADDR(__ex_table) - LOAD_OFFSET) { *(__ex_table) }
- __stop___ex_table = .;
-
- RODATA
-
- /* writeable */
- .data : AT(ADDR(.data) - LOAD_OFFSET) { /* Data */
- *(.data)
- CONSTRUCTORS
- }
-
- . = ALIGN(4096);
- __nosave_begin = .;
- .data_nosave : AT(ADDR(.data_nosave) - LOAD_OFFSET) { *(.data.nosave) }
- . = ALIGN(4096);
- __nosave_end = .;
-
- . = ALIGN(4096);
- .data.page_aligned : AT(ADDR(.data.page_aligned) - LOAD_OFFSET) {
- *(.data.idt)
- }
-
- . = ALIGN(32);
- .data.cacheline_aligned : AT(ADDR(.data.cacheline_aligned) - LOAD_OFFSET) {
- *(.data.cacheline_aligned)
- }
-
- /* rarely changed data like cpu maps */
- . = ALIGN(32);
- .data.read_mostly : AT(ADDR(.data.read_mostly) - LOAD_OFFSET) { *(.data.read_mostly) }
- _edata = .; /* End of data section */
-
- . = ALIGN(THREAD_SIZE); /* init_task */
- .data.init_task : AT(ADDR(.data.init_task) - LOAD_OFFSET) {
- *(.data.init_task)
- }
-
- /* will be freed after init */
- . = ALIGN(4096); /* Init code and data */
- __init_begin = .;
- .init.text : AT(ADDR(.init.text) - LOAD_OFFSET) {
- _sinittext = .;
- *(.init.text)
- _einittext = .;
- }
- .init.data : AT(ADDR(.init.data) - LOAD_OFFSET) { *(.init.data) }
- . = ALIGN(16);
- __setup_start = .;
- .init.setup : AT(ADDR(.init.setup) - LOAD_OFFSET) { *(.init.setup) }
- __setup_end = .;
- __initcall_start = .;
- .initcall.init : AT(ADDR(.initcall.init) - LOAD_OFFSET) {
- *(.initcall1.init)
- *(.initcall2.init)
- *(.initcall3.init)
- *(.initcall4.init)
- *(.initcall5.init)
- *(.initcall6.init)
- *(.initcall7.init)
- }
- __initcall_end = .;
- __con_initcall_start = .;
- .con_initcall.init : AT(ADDR(.con_initcall.init) - LOAD_OFFSET) {
- *(.con_initcall.init)
- }
- __con_initcall_end = .;
- SECURITY_INIT
- . = ALIGN(4);
- __alt_instructions = .;
- .altinstructions : AT(ADDR(.altinstructions) - LOAD_OFFSET) {
- *(.altinstructions)
- }
- __alt_instructions_end = .;
- .altinstr_replacement : AT(ADDR(.altinstr_replacement) - LOAD_OFFSET) {
- *(.altinstr_replacement)
- }
- /* .exit.text is discard at runtime, not link time, to deal with references
- from .altinstructions and .eh_frame */
- .exit.text : AT(ADDR(.exit.text) - LOAD_OFFSET) { *(.exit.text) }
- .exit.data : AT(ADDR(.exit.data) - LOAD_OFFSET) { *(.exit.data) }
- . = ALIGN(4096);
- __initramfs_start = .;
- .init.ramfs : AT(ADDR(.init.ramfs) - LOAD_OFFSET) { *(.init.ramfs) }
- __initramfs_end = .;
- . = ALIGN(32);
- __per_cpu_start = .;
- .data.percpu : AT(ADDR(.data.percpu) - LOAD_OFFSET) { *(.data.percpu) }
- __per_cpu_end = .;
- . = ALIGN(4096);
- __init_end = .;
- /* freed after init ends here */
-
- __bss_start = .; /* BSS */
- .bss.page_aligned : AT(ADDR(.bss.page_aligned) - LOAD_OFFSET) {
- *(.bss.page_aligned)
- }
- .bss : AT(ADDR(.bss) - LOAD_OFFSET) {
- *(.bss)
- }
- . = ALIGN(4);
- __bss_stop = .;
-
- _end = . ;
-
- /* This is where the kernel creates the early boot page tables */
- . = ALIGN(4096);
- pg0 = .;
-
- /* Sections to be discarded */
- /DISCARD/ : {
- *(.exitcall.exit)
- }
-
- STABS_DEBUG
-
- DWARF_DEBUG
-}
diff --git a/arch/i386/kernel/vsyscall-int80.S b/arch/i386/kernel/vsyscall-int80.S
deleted file mode 100644
index 530d0525e5e..00000000000
--- a/arch/i386/kernel/vsyscall-int80.S
+++ /dev/null
@@ -1,53 +0,0 @@
-/*
- * Code for the vsyscall page. This version uses the old int $0x80 method.
- *
- * NOTE:
- * 1) __kernel_vsyscall _must_ be first in this page.
- * 2) there are alignment constraints on this stub, see vsyscall-sigreturn.S
- * for details.
- */
-
- .text
- .globl __kernel_vsyscall
- .type __kernel_vsyscall,@function
-__kernel_vsyscall:
-.LSTART_vsyscall:
- int $0x80
- ret
-.LEND_vsyscall:
- .size __kernel_vsyscall,.-.LSTART_vsyscall
- .previous
-
- .section .eh_frame,"a",@progbits
-.LSTARTFRAMEDLSI:
- .long .LENDCIEDLSI-.LSTARTCIEDLSI
-.LSTARTCIEDLSI:
- .long 0 /* CIE ID */
- .byte 1 /* Version number */
- .string "zR" /* NUL-terminated augmentation string */
- .uleb128 1 /* Code alignment factor */
- .sleb128 -4 /* Data alignment factor */
- .byte 8 /* Return address register column */
- .uleb128 1 /* Augmentation value length */
- .byte 0x1b /* DW_EH_PE_pcrel|DW_EH_PE_sdata4. */
- .byte 0x0c /* DW_CFA_def_cfa */
- .uleb128 4
- .uleb128 4
- .byte 0x88 /* DW_CFA_offset, column 0x8 */
- .uleb128 1
- .align 4
-.LENDCIEDLSI:
- .long .LENDFDEDLSI-.LSTARTFDEDLSI /* Length FDE */
-.LSTARTFDEDLSI:
- .long .LSTARTFDEDLSI-.LSTARTFRAMEDLSI /* CIE pointer */
- .long .LSTART_vsyscall-. /* PC-relative start address */
- .long .LEND_vsyscall-.LSTART_vsyscall
- .uleb128 0
- .align 4
-.LENDFDEDLSI:
- .previous
-
-/*
- * Get the common code for the sigreturn entry points.
- */
-#include "vsyscall-sigreturn.S"
diff --git a/arch/i386/kernel/vsyscall-note.S b/arch/i386/kernel/vsyscall-note.S
deleted file mode 100644
index d4b5be4f3d5..00000000000
--- a/arch/i386/kernel/vsyscall-note.S
+++ /dev/null
@@ -1,25 +0,0 @@
-/*
- * This supplies .note.* sections to go into the PT_NOTE inside the vDSO text.
- * Here we can supply some information useful to userland.
- */
-
-#include <linux/uts.h>
-#include <linux/version.h>
-
-#define ASM_ELF_NOTE_BEGIN(name, flags, vendor, type) \
- .section name, flags; \
- .balign 4; \
- .long 1f - 0f; /* name length */ \
- .long 3f - 2f; /* data length */ \
- .long type; /* note type */ \
-0: .asciz vendor; /* vendor name */ \
-1: .balign 4; \
-2:
-
-#define ASM_ELF_NOTE_END \
-3: .balign 4; /* pad out section */ \
- .previous
-
- ASM_ELF_NOTE_BEGIN(".note.kernel-version", "a", UTS_SYSNAME, 0)
- .long LINUX_VERSION_CODE
- ASM_ELF_NOTE_END
diff --git a/arch/i386/kernel/vsyscall-sigreturn.S b/arch/i386/kernel/vsyscall-sigreturn.S
deleted file mode 100644
index fadb5bc3c37..00000000000
--- a/arch/i386/kernel/vsyscall-sigreturn.S
+++ /dev/null
@@ -1,143 +0,0 @@
-/*
- * Common code for the sigreturn entry points on the vsyscall page.
- * So far this code is the same for both int80 and sysenter versions.
- * This file is #include'd by vsyscall-*.S to define them after the
- * vsyscall entry point. The kernel assumes that the addresses of these
- * routines are constant for all vsyscall implementations.
- */
-
-#include <asm/unistd.h>
-#include <asm/asm-offsets.h>
-
-
-/* XXX
- Should these be named "_sigtramp" or something?
-*/
-
- .text
- .org __kernel_vsyscall+32,0x90
- .globl __kernel_sigreturn
- .type __kernel_sigreturn,@function
-__kernel_sigreturn:
-.LSTART_sigreturn:
- popl %eax /* XXX does this mean it needs unwind info? */
- movl $__NR_sigreturn, %eax
- int $0x80
-.LEND_sigreturn:
- .size __kernel_sigreturn,.-.LSTART_sigreturn
-
- .balign 32
- .globl __kernel_rt_sigreturn
- .type __kernel_rt_sigreturn,@function
-__kernel_rt_sigreturn:
-.LSTART_rt_sigreturn:
- movl $__NR_rt_sigreturn, %eax
- int $0x80
-.LEND_rt_sigreturn:
- .size __kernel_rt_sigreturn,.-.LSTART_rt_sigreturn
- .balign 32
- .previous
-
- .section .eh_frame,"a",@progbits
-.LSTARTFRAMEDLSI1:
- .long .LENDCIEDLSI1-.LSTARTCIEDLSI1
-.LSTARTCIEDLSI1:
- .long 0 /* CIE ID */
- .byte 1 /* Version number */
- .string "zR" /* NUL-terminated augmentation string */
- .uleb128 1 /* Code alignment factor */
- .sleb128 -4 /* Data alignment factor */
- .byte 8 /* Return address register column */
- .uleb128 1 /* Augmentation value length */
- .byte 0x1b /* DW_EH_PE_pcrel|DW_EH_PE_sdata4. */
- .byte 0 /* DW_CFA_nop */
- .align 4
-.LENDCIEDLSI1:
- .long .LENDFDEDLSI1-.LSTARTFDEDLSI1 /* Length FDE */
-.LSTARTFDEDLSI1:
- .long .LSTARTFDEDLSI1-.LSTARTFRAMEDLSI1 /* CIE pointer */
- /* HACK: The dwarf2 unwind routines will subtract 1 from the
- return address to get an address in the middle of the
- presumed call instruction. Since we didn't get here via
- a call, we need to include the nop before the real start
- to make up for it. */
- .long .LSTART_sigreturn-1-. /* PC-relative start address */
- .long .LEND_sigreturn-.LSTART_sigreturn+1
- .uleb128 0 /* Augmentation */
- /* What follows are the instructions for the table generation.
- We record the locations of each register saved. This is
- complicated by the fact that the "CFA" is always assumed to
- be the value of the stack pointer in the caller. This means
- that we must define the CFA of this body of code to be the
- saved value of the stack pointer in the sigcontext. Which
- also means that there is no fixed relation to the other
- saved registers, which means that we must use DW_CFA_expression
- to compute their addresses. It also means that when we
- adjust the stack with the popl, we have to do it all over again. */
-
-#define do_cfa_expr(offset) \
- .byte 0x0f; /* DW_CFA_def_cfa_expression */ \
- .uleb128 1f-0f; /* length */ \
-0: .byte 0x74; /* DW_OP_breg4 */ \
- .sleb128 offset; /* offset */ \
- .byte 0x06; /* DW_OP_deref */ \
-1:
-
-#define do_expr(regno, offset) \
- .byte 0x10; /* DW_CFA_expression */ \
- .uleb128 regno; /* regno */ \
- .uleb128 1f-0f; /* length */ \
-0: .byte 0x74; /* DW_OP_breg4 */ \
- .sleb128 offset; /* offset */ \
-1:
-
- do_cfa_expr(SIGCONTEXT_esp+4)
- do_expr(0, SIGCONTEXT_eax+4)
- do_expr(1, SIGCONTEXT_ecx+4)
- do_expr(2, SIGCONTEXT_edx+4)
- do_expr(3, SIGCONTEXT_ebx+4)
- do_expr(5, SIGCONTEXT_ebp+4)
- do_expr(6, SIGCONTEXT_esi+4)
- do_expr(7, SIGCONTEXT_edi+4)
- do_expr(8, SIGCONTEXT_eip+4)
-
- .byte 0x42 /* DW_CFA_advance_loc 2 -- nop; popl eax. */
-
- do_cfa_expr(SIGCONTEXT_esp)
- do_expr(0, SIGCONTEXT_eax)
- do_expr(1, SIGCONTEXT_ecx)
- do_expr(2, SIGCONTEXT_edx)
- do_expr(3, SIGCONTEXT_ebx)
- do_expr(5, SIGCONTEXT_ebp)
- do_expr(6, SIGCONTEXT_esi)
- do_expr(7, SIGCONTEXT_edi)
- do_expr(8, SIGCONTEXT_eip)
-
- .align 4
-.LENDFDEDLSI1:
-
- .long .LENDFDEDLSI2-.LSTARTFDEDLSI2 /* Length FDE */
-.LSTARTFDEDLSI2:
- .long .LSTARTFDEDLSI2-.LSTARTFRAMEDLSI1 /* CIE pointer */
- /* HACK: See above wrt unwind library assumptions. */
- .long .LSTART_rt_sigreturn-1-. /* PC-relative start address */
- .long .LEND_rt_sigreturn-.LSTART_rt_sigreturn+1
- .uleb128 0 /* Augmentation */
- /* What follows are the instructions for the table generation.
- We record the locations of each register saved. This is
- slightly less complicated than the above, since we don't
- modify the stack pointer in the process. */
-
- do_cfa_expr(RT_SIGFRAME_sigcontext-4 + SIGCONTEXT_esp)
- do_expr(0, RT_SIGFRAME_sigcontext-4 + SIGCONTEXT_eax)
- do_expr(1, RT_SIGFRAME_sigcontext-4 + SIGCONTEXT_ecx)
- do_expr(2, RT_SIGFRAME_sigcontext-4 + SIGCONTEXT_edx)
- do_expr(3, RT_SIGFRAME_sigcontext-4 + SIGCONTEXT_ebx)
- do_expr(5, RT_SIGFRAME_sigcontext-4 + SIGCONTEXT_ebp)
- do_expr(6, RT_SIGFRAME_sigcontext-4 + SIGCONTEXT_esi)
- do_expr(7, RT_SIGFRAME_sigcontext-4 + SIGCONTEXT_edi)
- do_expr(8, RT_SIGFRAME_sigcontext-4 + SIGCONTEXT_eip)
-
- .align 4
-.LENDFDEDLSI2:
- .previous
diff --git a/arch/i386/kernel/vsyscall-sysenter.S b/arch/i386/kernel/vsyscall-sysenter.S
deleted file mode 100644
index 4daefb2ec1b..00000000000
--- a/arch/i386/kernel/vsyscall-sysenter.S
+++ /dev/null
@@ -1,104 +0,0 @@
-/*
- * Code for the vsyscall page. This version uses the sysenter instruction.
- *
- * NOTE:
- * 1) __kernel_vsyscall _must_ be first in this page.
- * 2) there are alignment constraints on this stub, see vsyscall-sigreturn.S
- * for details.
- */
-
- .text
- .globl __kernel_vsyscall
- .type __kernel_vsyscall,@function
-__kernel_vsyscall:
-.LSTART_vsyscall:
- push %ecx
-.Lpush_ecx:
- push %edx
-.Lpush_edx:
- push %ebp
-.Lenter_kernel:
- movl %esp,%ebp
- sysenter
-
- /* 7: align return point with nop's to make disassembly easier */
- .space 7,0x90
-
- /* 14: System call restart point is here! (SYSENTER_RETURN - 2) */
- jmp .Lenter_kernel
- /* 16: System call normal return point is here! */
- .globl SYSENTER_RETURN /* Symbol used by entry.S. */
-SYSENTER_RETURN:
- pop %ebp
-.Lpop_ebp:
- pop %edx
-.Lpop_edx:
- pop %ecx
-.Lpop_ecx:
- ret
-.LEND_vsyscall:
- .size __kernel_vsyscall,.-.LSTART_vsyscall
- .previous
-
- .section .eh_frame,"a",@progbits
-.LSTARTFRAMEDLSI:
- .long .LENDCIEDLSI-.LSTARTCIEDLSI
-.LSTARTCIEDLSI:
- .long 0 /* CIE ID */
- .byte 1 /* Version number */
- .string "zR" /* NUL-terminated augmentation string */
- .uleb128 1 /* Code alignment factor */
- .sleb128 -4 /* Data alignment factor */
- .byte 8 /* Return address register column */
- .uleb128 1 /* Augmentation value length */
- .byte 0x1b /* DW_EH_PE_pcrel|DW_EH_PE_sdata4. */
- .byte 0x0c /* DW_CFA_def_cfa */
- .uleb128 4
- .uleb128 4
- .byte 0x88 /* DW_CFA_offset, column 0x8 */
- .uleb128 1
- .align 4
-.LENDCIEDLSI:
- .long .LENDFDEDLSI-.LSTARTFDEDLSI /* Length FDE */
-.LSTARTFDEDLSI:
- .long .LSTARTFDEDLSI-.LSTARTFRAMEDLSI /* CIE pointer */
- .long .LSTART_vsyscall-. /* PC-relative start address */
- .long .LEND_vsyscall-.LSTART_vsyscall
- .uleb128 0
- /* What follows are the instructions for the table generation.
- We have to record all changes of the stack pointer. */
- .byte 0x04 /* DW_CFA_advance_loc4 */
- .long .Lpush_ecx-.LSTART_vsyscall
- .byte 0x0e /* DW_CFA_def_cfa_offset */
- .byte 0x08 /* RA at offset 8 now */
- .byte 0x04 /* DW_CFA_advance_loc4 */
- .long .Lpush_edx-.Lpush_ecx
- .byte 0x0e /* DW_CFA_def_cfa_offset */
- .byte 0x0c /* RA at offset 12 now */
- .byte 0x04 /* DW_CFA_advance_loc4 */
- .long .Lenter_kernel-.Lpush_edx
- .byte 0x0e /* DW_CFA_def_cfa_offset */
- .byte 0x10 /* RA at offset 16 now */
- .byte 0x85, 0x04 /* DW_CFA_offset %ebp -16 */
- /* Finally the epilogue. */
- .byte 0x04 /* DW_CFA_advance_loc4 */
- .long .Lpop_ebp-.Lenter_kernel
- .byte 0x0e /* DW_CFA_def_cfa_offset */
- .byte 0x0c /* RA at offset 12 now */
- .byte 0xc5 /* DW_CFA_restore %ebp */
- .byte 0x04 /* DW_CFA_advance_loc4 */
- .long .Lpop_edx-.Lpop_ebp
- .byte 0x0e /* DW_CFA_def_cfa_offset */
- .byte 0x08 /* RA at offset 8 now */
- .byte 0x04 /* DW_CFA_advance_loc4 */
- .long .Lpop_ecx-.Lpop_edx
- .byte 0x0e /* DW_CFA_def_cfa_offset */
- .byte 0x04 /* RA at offset 4 now */
- .align 4
-.LENDFDEDLSI:
- .previous
-
-/*
- * Get the common code for the sigreturn entry points.
- */
-#include "vsyscall-sigreturn.S"
diff --git a/arch/i386/kernel/vsyscall.S b/arch/i386/kernel/vsyscall.S
deleted file mode 100644
index b403890fe39..00000000000
--- a/arch/i386/kernel/vsyscall.S
+++ /dev/null
@@ -1,15 +0,0 @@
-#include <linux/init.h>
-
-__INITDATA
-
- .globl vsyscall_int80_start, vsyscall_int80_end
-vsyscall_int80_start:
- .incbin "arch/i386/kernel/vsyscall-int80.so"
-vsyscall_int80_end:
-
- .globl vsyscall_sysenter_start, vsyscall_sysenter_end
-vsyscall_sysenter_start:
- .incbin "arch/i386/kernel/vsyscall-sysenter.so"
-vsyscall_sysenter_end:
-
-__FINIT
diff --git a/arch/i386/kernel/vsyscall.lds.S b/arch/i386/kernel/vsyscall.lds.S
deleted file mode 100644
index 98699ca6e52..00000000000
--- a/arch/i386/kernel/vsyscall.lds.S
+++ /dev/null
@@ -1,66 +0,0 @@
-/*
- * Linker script for vsyscall DSO. The vsyscall page is an ELF shared
- * object prelinked to its virtual address, and with only one read-only
- * segment (that fits in one page). This script controls its layout.
- */
-#include <asm/asm-offsets.h>
-
-SECTIONS
-{
- . = VSYSCALL_BASE + SIZEOF_HEADERS;
-
- .hash : { *(.hash) } :text
- .dynsym : { *(.dynsym) }
- .dynstr : { *(.dynstr) }
- .gnu.version : { *(.gnu.version) }
- .gnu.version_d : { *(.gnu.version_d) }
- .gnu.version_r : { *(.gnu.version_r) }
-
- /* This linker script is used both with -r and with -shared.
- For the layouts to match, we need to skip more than enough
- space for the dynamic symbol table et al. If this amount
- is insufficient, ld -shared will barf. Just increase it here. */
- . = VSYSCALL_BASE + 0x400;
-
- .text : { *(.text) } :text =0x90909090
- .note : { *(.note.*) } :text :note
- .eh_frame_hdr : { *(.eh_frame_hdr) } :text :eh_frame_hdr
- .eh_frame : { KEEP (*(.eh_frame)) } :text
- .dynamic : { *(.dynamic) } :text :dynamic
- .useless : {
- *(.got.plt) *(.got)
- *(.data .data.* .gnu.linkonce.d.*)
- *(.dynbss)
- *(.bss .bss.* .gnu.linkonce.b.*)
- } :text
-}
-
-/*
- * We must supply the ELF program headers explicitly to get just one
- * PT_LOAD segment, and set the flags explicitly to make segments read-only.
- */
-PHDRS
-{
- text PT_LOAD FILEHDR PHDRS FLAGS(5); /* PF_R|PF_X */
- dynamic PT_DYNAMIC FLAGS(4); /* PF_R */
- note PT_NOTE FLAGS(4); /* PF_R */
- eh_frame_hdr 0x6474e550; /* PT_GNU_EH_FRAME, but ld doesn't match the name */
-}
-
-/*
- * This controls what symbols we export from the DSO.
- */
-VERSION
-{
- LINUX_2.5 {
- global:
- __kernel_vsyscall;
- __kernel_sigreturn;
- __kernel_rt_sigreturn;
-
- local: *;
- };
-}
-
-/* The ELF entry point can be used to set the AT_SYSINFO value. */
-ENTRY(__kernel_vsyscall);