aboutsummaryrefslogtreecommitdiff
path: root/drivers/edac
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/edac')
-rw-r--r--drivers/edac/Kconfig122
-rw-r--r--drivers/edac/Makefile20
-rw-r--r--drivers/edac/amd64_edac.c3143
-rw-r--r--drivers/edac/amd64_edac.h569
-rw-r--r--drivers/edac/amd64_edac_dbg.c89
-rw-r--r--drivers/edac/amd64_edac_inj.c273
-rw-r--r--drivers/edac/amd76x_edac.c74
-rw-r--r--drivers/edac/amd8111_edac.c46
-rw-r--r--drivers/edac/amd8131_edac.c2
-rw-r--r--drivers/edac/amd8131_edac.h2
-rw-r--r--drivers/edac/cell_edac.c74
-rw-r--r--drivers/edac/cpc925_edac.c177
-rw-r--r--drivers/edac/e752x_edac.c189
-rw-r--r--drivers/edac/e7xxx_edac.c120
-rw-r--r--drivers/edac/edac_core.h461
-rw-r--r--drivers/edac/edac_device.c121
-rw-r--r--drivers/edac/edac_device_sysfs.c93
-rw-r--r--drivers/edac/edac_mc.c1093
-rw-r--r--drivers/edac/edac_mc_sysfs.c1458
-rw-r--r--drivers/edac/edac_mce.c61
-rw-r--r--drivers/edac/edac_module.c49
-rw-r--r--drivers/edac/edac_module.h32
-rw-r--r--drivers/edac/edac_pci.c65
-rw-r--r--drivers/edac/edac_pci_sysfs.c81
-rw-r--r--drivers/edac/edac_stub.c51
-rw-r--r--drivers/edac/ghes_edac.c547
-rw-r--r--drivers/edac/highbank_l2_edac.c154
-rw-r--r--drivers/edac/highbank_mc_edac.c281
-rw-r--r--drivers/edac/i3000_edac.c91
-rw-r--r--drivers/edac/i3200_edac.c135
-rw-r--r--drivers/edac/i5000_edac.c428
-rw-r--r--drivers/edac/i5100_edac.c333
-rw-r--r--drivers/edac/i5400_edac.c511
-rw-r--r--drivers/edac/i7300_edac.c411
-rw-r--r--drivers/edac/i7core_edac.c1216
-rw-r--r--drivers/edac/i82443bxgx_edac.c94
-rw-r--r--drivers/edac/i82860_edac.c96
-rw-r--r--drivers/edac/i82875p_edac.c112
-rw-r--r--drivers/edac/i82975x_edac.c171
-rw-r--r--drivers/edac/mce_amd.c772
-rw-r--r--drivers/edac/mce_amd.h59
-rw-r--r--drivers/edac/mce_amd_inj.c34
-rw-r--r--drivers/edac/mpc85xx_edac.c390
-rw-r--r--drivers/edac/mpc85xx_edac.h9
-rw-r--r--drivers/edac/mv64x60_edac.c87
-rw-r--r--drivers/edac/mv64x60_edac.h2
-rw-r--r--drivers/edac/octeon_edac-l2c.c208
-rw-r--r--drivers/edac/octeon_edac-lmc.c353
-rw-r--r--drivers/edac/octeon_edac-pc.c143
-rw-r--r--drivers/edac/octeon_edac-pci.c111
-rw-r--r--drivers/edac/pasemi_edac.c65
-rw-r--r--drivers/edac/ppc4xx_edac.c120
-rw-r--r--drivers/edac/r82600_edac.c96
-rw-r--r--drivers/edac/sb_edac.c2189
-rw-r--r--drivers/edac/tile_edac.c266
-rw-r--r--drivers/edac/x38_edac.c98
56 files changed, 11772 insertions, 6275 deletions
diff --git a/drivers/edac/Kconfig b/drivers/edac/Kconfig
index f436a2fa9f3..878f09005fa 100644
--- a/drivers/edac/Kconfig
+++ b/drivers/edac/Kconfig
@@ -4,10 +4,13 @@
# Licensed and distributed under the GPL
#
+config EDAC_SUPPORT
+ bool
+
menuconfig EDAC
bool "EDAC (Error Detection And Correction) reporting"
depends on HAS_IOMEM
- depends on X86 || PPC
+ depends on X86 || PPC || TILE || ARM || EDAC_SUPPORT
help
EDAC is designed to report errors in the core system.
These are low-level errors that are reported in the CPU or
@@ -29,23 +32,29 @@ menuconfig EDAC
if EDAC
-comment "Reporting subsystems"
+config EDAC_LEGACY_SYSFS
+ bool "EDAC legacy sysfs"
+ default y
+ help
+ Enable the compatibility sysfs nodes.
+ Use 'Y' if your edac utilities aren't ported to work with the newer
+ structures.
config EDAC_DEBUG
bool "Debugging"
help
- This turns on debugging information for the entire EDAC
- sub-system. You can insert module with "debug_level=x", current
- there're four debug levels (x=0,1,2,3 from low to high).
- Usually you should select 'N'.
+ This turns on debugging information for the entire EDAC subsystem.
+ You do so by inserting edac_module with "edac_debug_level=x." Valid
+ levels are 0-4 (from low to high) and by default it is set to 2.
+ Usually you should select 'N' here.
config EDAC_DECODE_MCE
tristate "Decode MCEs in human-readable form (only on AMD for now)"
- depends on CPU_SUP_AMD && X86_MCE
+ depends on CPU_SUP_AMD && X86_MCE_AMD
default y
---help---
Enable this option if you want to decode Machine Check Exceptions
- occuring on your machine in human-readable form.
+ occurring on your machine in human-readable form.
You should definitely say Y here in case you want to decode MCEs
which occur really early upon boot, before the module infrastructure
@@ -71,15 +80,35 @@ config EDAC_MM_EDAC
occurred so that a particular failing memory module can be
replaced. If unsure, select 'Y'.
-config EDAC_MCE
- bool
+config EDAC_GHES
+ bool "Output ACPI APEI/GHES BIOS detected errors via EDAC"
+ depends on ACPI_APEI_GHES && (EDAC_MM_EDAC=y)
+ default y
+ help
+ Not all machines support hardware-driven error report. Some of those
+ provide a BIOS-driven error report mechanism via ACPI, using the
+ APEI/GHES driver. By enabling this option, the error reports provided
+ by GHES are sent to userspace via the EDAC API.
+
+ When this option is enabled, it will disable the hardware-driven
+ mechanisms, if a GHES BIOS is detected, entering into the
+ "Firmware First" mode.
+
+ It should be noticed that keeping both GHES and a hardware-driven
+ error mechanism won't work well, as BIOS will race with OS, while
+ reading the error registers. So, if you want to not use "Firmware
+ first" GHES error mechanism, you should disable GHES either at
+ compilation time or by passing "ghes.disable=1" Kernel parameter
+ at boot time.
+
+ In doubt, say 'Y'.
config EDAC_AMD64
- tristate "AMD64 (Opteron, Athlon64) K8, F10h, F11h"
- depends on EDAC_MM_EDAC && AMD_NB && X86_64 && PCI && EDAC_DECODE_MCE
+ tristate "AMD64 (Opteron, Athlon64) K8, F10h"
+ depends on EDAC_MM_EDAC && AMD_NB && X86_64 && EDAC_DECODE_MCE
help
- Support for error detection and correction on the AMD 64
- Families of Memory Controllers (K8, F10h and F11h)
+ Support for error detection and correction of DRAM ECC errors on
+ the AMD64 families of memory controllers (K8 and F10h)
config EDAC_AMD64_ERROR_INJECTION
bool "Sysfs HW Error injection facilities"
@@ -116,7 +145,7 @@ config EDAC_E7XXX
config EDAC_E752X
tristate "Intel e752x (e7520, e7525, e7320) and 3100"
- depends on EDAC_MM_EDAC && PCI && X86 && HOTPLUG
+ depends on EDAC_MM_EDAC && PCI && X86
help
Support for error detection and correction on the Intel
E7520, E7525, E7320 server chipsets.
@@ -152,7 +181,7 @@ config EDAC_I3000
config EDAC_I3200
tristate "Intel 3200"
- depends on EDAC_MM_EDAC && PCI && X86 && EXPERIMENTAL
+ depends on EDAC_MM_EDAC && PCI && X86
help
Support for error detection and correction on the Intel
3200 and 3210 server chipsets.
@@ -173,8 +202,7 @@ config EDAC_I5400
config EDAC_I7CORE
tristate "Intel i7 Core (Nehalem) processors"
- depends on EDAC_MM_EDAC && PCI && X86
- select EDAC_MCE
+ depends on EDAC_MM_EDAC && PCI && X86 && X86_MCE_INTEL
help
Support for error detection and correction the Intel
i7 Core (Nehalem) Integrated Memory Controller that exists on
@@ -216,6 +244,14 @@ config EDAC_I7300
Support for error detection and correction the Intel
Clarksboro MCH (Intel 7300 chipset).
+config EDAC_SBRIDGE
+ tristate "Intel Sandy-Bridge Integrated MC"
+ depends on EDAC_MM_EDAC && PCI && X86_64 && X86_MCE_INTEL
+ depends on PCI_MMCONFIG
+ help
+ Support for error detection and correction the Intel
+ Sandy Bridge Integrated Memory Controller.
+
config EDAC_MPC85XX
tristate "Freescale MPC83xx / MPC85xx"
depends on EDAC_MM_EDAC && FSL_SOC && (PPC_83xx || PPC_85xx)
@@ -282,4 +318,54 @@ config EDAC_CPC925
a companion chip to the PowerPC 970 family of
processors.
+config EDAC_TILE
+ tristate "Tilera Memory Controller"
+ depends on EDAC_MM_EDAC && TILE
+ default y
+ help
+ Support for error detection and correction on the
+ Tilera memory controller.
+
+config EDAC_HIGHBANK_MC
+ tristate "Highbank Memory Controller"
+ depends on EDAC_MM_EDAC && ARCH_HIGHBANK
+ help
+ Support for error detection and correction on the
+ Calxeda Highbank memory controller.
+
+config EDAC_HIGHBANK_L2
+ tristate "Highbank L2 Cache"
+ depends on EDAC_MM_EDAC && ARCH_HIGHBANK
+ help
+ Support for error detection and correction on the
+ Calxeda Highbank memory controller.
+
+config EDAC_OCTEON_PC
+ tristate "Cavium Octeon Primary Caches"
+ depends on EDAC_MM_EDAC && CPU_CAVIUM_OCTEON
+ help
+ Support for error detection and correction on the primary caches of
+ the cnMIPS cores of Cavium Octeon family SOCs.
+
+config EDAC_OCTEON_L2C
+ tristate "Cavium Octeon Secondary Caches (L2C)"
+ depends on EDAC_MM_EDAC && CAVIUM_OCTEON_SOC
+ help
+ Support for error detection and correction on the
+ Cavium Octeon family of SOCs.
+
+config EDAC_OCTEON_LMC
+ tristate "Cavium Octeon DRAM Memory Controller (LMC)"
+ depends on EDAC_MM_EDAC && CAVIUM_OCTEON_SOC
+ help
+ Support for error detection and correction on the
+ Cavium Octeon family of SOCs.
+
+config EDAC_OCTEON_PCI
+ tristate "Cavium Octeon PCI Controller"
+ depends on EDAC_MM_EDAC && PCI && CAVIUM_OCTEON_SOC
+ help
+ Support for error detection and correction on the
+ Cavium Octeon family of SOCs.
+
endif # EDAC
diff --git a/drivers/edac/Makefile b/drivers/edac/Makefile
index b3781399b38..4154ed6a02c 100644
--- a/drivers/edac/Makefile
+++ b/drivers/edac/Makefile
@@ -8,18 +8,18 @@
obj-$(CONFIG_EDAC) := edac_stub.o
obj-$(CONFIG_EDAC_MM_EDAC) += edac_core.o
-obj-$(CONFIG_EDAC_MCE) += edac_mce.o
-edac_core-objs := edac_mc.o edac_device.o edac_mc_sysfs.o edac_pci_sysfs.o
-edac_core-objs += edac_module.o edac_device_sysfs.o
+edac_core-y := edac_mc.o edac_device.o edac_mc_sysfs.o edac_pci_sysfs.o
+edac_core-y += edac_module.o edac_device_sysfs.o
ifdef CONFIG_PCI
-edac_core-objs += edac_pci.o edac_pci_sysfs.o
+edac_core-y += edac_pci.o edac_pci_sysfs.o
endif
+obj-$(CONFIG_EDAC_GHES) += ghes_edac.o
obj-$(CONFIG_EDAC_MCE_INJ) += mce_amd_inj.o
-edac_mce_amd-objs := mce_amd.o
+edac_mce_amd-y := mce_amd.o
obj-$(CONFIG_EDAC_DECODE_MCE) += edac_mce_amd.o
obj-$(CONFIG_EDAC_AMD76X) += amd76x_edac.o
@@ -29,6 +29,7 @@ obj-$(CONFIG_EDAC_I5100) += i5100_edac.o
obj-$(CONFIG_EDAC_I5400) += i5400_edac.o
obj-$(CONFIG_EDAC_I7300) += i7300_edac.o
obj-$(CONFIG_EDAC_I7CORE) += i7core_edac.o
+obj-$(CONFIG_EDAC_SBRIDGE) += sb_edac.o
obj-$(CONFIG_EDAC_E7XXX) += e7xxx_edac.o
obj-$(CONFIG_EDAC_E752X) += e752x_edac.o
obj-$(CONFIG_EDAC_I82443BXGX) += i82443bxgx_edac.o
@@ -54,3 +55,12 @@ obj-$(CONFIG_EDAC_PPC4XX) += ppc4xx_edac.o
obj-$(CONFIG_EDAC_AMD8111) += amd8111_edac.o
obj-$(CONFIG_EDAC_AMD8131) += amd8131_edac.o
+obj-$(CONFIG_EDAC_TILE) += tile_edac.o
+
+obj-$(CONFIG_EDAC_HIGHBANK_MC) += highbank_mc_edac.o
+obj-$(CONFIG_EDAC_HIGHBANK_L2) += highbank_l2_edac.o
+
+obj-$(CONFIG_EDAC_OCTEON_PC) += octeon_edac-pc.o
+obj-$(CONFIG_EDAC_OCTEON_L2C) += octeon_edac-l2c.o
+obj-$(CONFIG_EDAC_OCTEON_LMC) += octeon_edac-lmc.o
+obj-$(CONFIG_EDAC_OCTEON_PCI) += octeon_edac-pci.o
diff --git a/drivers/edac/amd64_edac.c b/drivers/edac/amd64_edac.c
index 8521401bbd7..f8bf00010d4 100644
--- a/drivers/edac/amd64_edac.c
+++ b/drivers/edac/amd64_edac.c
@@ -1,7 +1,7 @@
#include "amd64_edac.h"
#include <asm/amd_nb.h>
-static struct edac_pci_ctl_info *amd64_ctl_pci;
+static struct edac_pci_ctl_info *pci_ctl;
static int report_gart_errors;
module_param(report_gart_errors, int, 0644);
@@ -15,55 +15,14 @@ module_param(ecc_enable_override, int, 0644);
static struct msr __percpu *msrs;
-/* Lookup table for all possible MC control instances */
-struct amd64_pvt;
-static struct mem_ctl_info *mci_lookup[EDAC_MAX_NUMNODES];
-static struct amd64_pvt *pvt_lookup[EDAC_MAX_NUMNODES];
-
/*
- * Address to DRAM bank mapping: see F2x80 for K8 and F2x[1,0]80 for Fam10 and
- * later.
+ * count successfully initialized driver instances for setup_pci_device()
*/
-static int ddr2_dbam_revCG[] = {
- [0] = 32,
- [1] = 64,
- [2] = 128,
- [3] = 256,
- [4] = 512,
- [5] = 1024,
- [6] = 2048,
-};
-
-static int ddr2_dbam_revD[] = {
- [0] = 32,
- [1] = 64,
- [2 ... 3] = 128,
- [4] = 256,
- [5] = 512,
- [6] = 256,
- [7] = 512,
- [8 ... 9] = 1024,
- [10] = 2048,
-};
-
-static int ddr2_dbam[] = { [0] = 128,
- [1] = 256,
- [2 ... 4] = 512,
- [5 ... 6] = 1024,
- [7 ... 8] = 2048,
- [9 ... 10] = 4096,
- [11] = 8192,
-};
+static atomic_t drv_instances = ATOMIC_INIT(0);
-static int ddr3_dbam[] = { [0] = -1,
- [1] = 256,
- [2] = 512,
- [3 ... 4] = -1,
- [5 ... 6] = 1024,
- [7 ... 8] = 2048,
- [9 ... 10] = 4096,
- [11] = 8192,
-};
+/* Per-node driver instances */
+static struct mem_ctl_info **mcis;
+static struct ecc_settings **ecc_stngs;
/*
* Valid scrub rates for the K8 hardware memory scrubber. We map the scrubbing
@@ -72,8 +31,10 @@ static int ddr3_dbam[] = { [0] = -1,
*
*FIXME: Produce a better mapping/linearisation.
*/
-
-struct scrubrate scrubrates[] = {
+static const struct scrubrate {
+ u32 scrubval; /* bit pattern for scrub rate */
+ u32 bandwidth; /* bandwidth consumed (bytes/sec) */
+} scrubrates[] = {
{ 0x01, 1600000000UL},
{ 0x02, 800000000UL},
{ 0x03, 400000000UL},
@@ -99,6 +60,90 @@ struct scrubrate scrubrates[] = {
{ 0x00, 0UL}, /* scrubbing off */
};
+int __amd64_read_pci_cfg_dword(struct pci_dev *pdev, int offset,
+ u32 *val, const char *func)
+{
+ int err = 0;
+
+ err = pci_read_config_dword(pdev, offset, val);
+ if (err)
+ amd64_warn("%s: error reading F%dx%03x.\n",
+ func, PCI_FUNC(pdev->devfn), offset);
+
+ return err;
+}
+
+int __amd64_write_pci_cfg_dword(struct pci_dev *pdev, int offset,
+ u32 val, const char *func)
+{
+ int err = 0;
+
+ err = pci_write_config_dword(pdev, offset, val);
+ if (err)
+ amd64_warn("%s: error writing to F%dx%03x.\n",
+ func, PCI_FUNC(pdev->devfn), offset);
+
+ return err;
+}
+
+/*
+ *
+ * Depending on the family, F2 DCT reads need special handling:
+ *
+ * K8: has a single DCT only
+ *
+ * F10h: each DCT has its own set of regs
+ * DCT0 -> F2x040..
+ * DCT1 -> F2x140..
+ *
+ * F15h: we select which DCT we access using F1x10C[DctCfgSel]
+ *
+ * F16h: has only 1 DCT
+ */
+static int k8_read_dct_pci_cfg(struct amd64_pvt *pvt, int addr, u32 *val,
+ const char *func)
+{
+ if (addr >= 0x100)
+ return -EINVAL;
+
+ return __amd64_read_pci_cfg_dword(pvt->F2, addr, val, func);
+}
+
+static int f10_read_dct_pci_cfg(struct amd64_pvt *pvt, int addr, u32 *val,
+ const char *func)
+{
+ return __amd64_read_pci_cfg_dword(pvt->F2, addr, val, func);
+}
+
+/*
+ * Select DCT to which PCI cfg accesses are routed
+ */
+static void f15h_select_dct(struct amd64_pvt *pvt, u8 dct)
+{
+ u32 reg = 0;
+
+ amd64_read_pci_cfg(pvt->F1, DCT_CFG_SEL, &reg);
+ reg &= (pvt->model >= 0x30) ? ~3 : ~1;
+ reg |= dct;
+ amd64_write_pci_cfg(pvt->F1, DCT_CFG_SEL, reg);
+}
+
+static int f15_read_dct_pci_cfg(struct amd64_pvt *pvt, int addr, u32 *val,
+ const char *func)
+{
+ u8 dct = 0;
+
+ /* For F15 M30h, the second dct is DCT 3, refer to BKDG Section 2.10 */
+ if (addr >= 0x140 && addr <= 0x1a0) {
+ dct = (pvt->model >= 0x30) ? 3 : 1;
+ addr -= 0x100;
+ }
+
+ f15h_select_dct(pvt, dct);
+
+ return __amd64_read_pci_cfg_dword(pvt->F2, addr, val, func);
+}
+
/*
* Memory scrubber control interface. For K8, memory scrubbing is handled by
* hardware and can involve L2 cache, dcache as well as the main memory. With
@@ -117,8 +162,7 @@ struct scrubrate scrubrates[] = {
* scan the scrub rate mapping table for a close or matching bandwidth value to
* issue. If requested is too big, then use last maximum value found.
*/
-static int amd64_search_set_scrub_rate(struct pci_dev *ctl, u32 new_bw,
- u32 min_scrubrate)
+static int __set_scrub_rate(struct pci_dev *ctl, u32 new_bw, u32 min_rate)
{
u32 scrubval;
int i;
@@ -128,143 +172,77 @@ static int amd64_search_set_scrub_rate(struct pci_dev *ctl, u32 new_bw,
* memory controller and apply to register. Search for the first
* bandwidth entry that is greater or equal than the setting requested
* and program that. If at last entry, turn off DRAM scrubbing.
+ *
+ * If no suitable bandwidth is found, turn off DRAM scrubbing entirely
+ * by falling back to the last element in scrubrates[].
*/
- for (i = 0; i < ARRAY_SIZE(scrubrates); i++) {
+ for (i = 0; i < ARRAY_SIZE(scrubrates) - 1; i++) {
/*
* skip scrub rates which aren't recommended
* (see F10 BKDG, F3x58)
*/
- if (scrubrates[i].scrubval < min_scrubrate)
+ if (scrubrates[i].scrubval < min_rate)
continue;
if (scrubrates[i].bandwidth <= new_bw)
break;
-
- /*
- * if no suitable bandwidth found, turn off DRAM scrubbing
- * entirely by falling back to the last element in the
- * scrubrates array.
- */
}
scrubval = scrubrates[i].scrubval;
- if (scrubval)
- edac_printk(KERN_DEBUG, EDAC_MC,
- "Setting scrub rate bandwidth: %u\n",
- scrubrates[i].bandwidth);
- else
- edac_printk(KERN_DEBUG, EDAC_MC, "Turning scrubbing off.\n");
- pci_write_bits32(ctl, K8_SCRCTRL, scrubval, 0x001F);
+ pci_write_bits32(ctl, SCRCTRL, scrubval, 0x001F);
+
+ if (scrubval)
+ return scrubrates[i].bandwidth;
return 0;
}
-static int amd64_set_scrub_rate(struct mem_ctl_info *mci, u32 bandwidth)
+static int set_scrub_rate(struct mem_ctl_info *mci, u32 bw)
{
struct amd64_pvt *pvt = mci->pvt_info;
- u32 min_scrubrate = 0x0;
+ u32 min_scrubrate = 0x5;
- switch (boot_cpu_data.x86) {
- case 0xf:
- min_scrubrate = K8_MIN_SCRUB_RATE_BITS;
- break;
- case 0x10:
- min_scrubrate = F10_MIN_SCRUB_RATE_BITS;
- break;
- case 0x11:
- min_scrubrate = F11_MIN_SCRUB_RATE_BITS;
- break;
+ if (pvt->fam == 0xf)
+ min_scrubrate = 0x0;
- default:
- amd64_printk(KERN_ERR, "Unsupported family!\n");
- return -EINVAL;
- }
- return amd64_search_set_scrub_rate(pvt->misc_f3_ctl, bandwidth,
- min_scrubrate);
+ /* Erratum #505 */
+ if (pvt->fam == 0x15 && pvt->model < 0x10)
+ f15h_select_dct(pvt, 0);
+
+ return __set_scrub_rate(pvt->F3, bw, min_scrubrate);
}
-static int amd64_get_scrub_rate(struct mem_ctl_info *mci, u32 *bw)
+static int get_scrub_rate(struct mem_ctl_info *mci)
{
struct amd64_pvt *pvt = mci->pvt_info;
u32 scrubval = 0;
- int status = -1, i;
+ int i, retval = -EINVAL;
- amd64_read_pci_cfg(pvt->misc_f3_ctl, K8_SCRCTRL, &scrubval);
+ /* Erratum #505 */
+ if (pvt->fam == 0x15 && pvt->model < 0x10)
+ f15h_select_dct(pvt, 0);
- scrubval = scrubval & 0x001F;
+ amd64_read_pci_cfg(pvt->F3, SCRCTRL, &scrubval);
- edac_printk(KERN_DEBUG, EDAC_MC,
- "pci-read, sdram scrub control value: %d \n", scrubval);
+ scrubval = scrubval & 0x001F;
for (i = 0; i < ARRAY_SIZE(scrubrates); i++) {
if (scrubrates[i].scrubval == scrubval) {
- *bw = scrubrates[i].bandwidth;
- status = 0;
+ retval = scrubrates[i].bandwidth;
break;
}
}
-
- return status;
-}
-
-/* Map from a CSROW entry to the mask entry that operates on it */
-static inline u32 amd64_map_to_dcs_mask(struct amd64_pvt *pvt, int csrow)
-{
- if (boot_cpu_data.x86 == 0xf && pvt->ext_model < K8_REV_F)
- return csrow;
- else
- return csrow >> 1;
-}
-
-/* return the 'base' address the i'th CS entry of the 'dct' DRAM controller */
-static u32 amd64_get_dct_base(struct amd64_pvt *pvt, int dct, int csrow)
-{
- if (dct == 0)
- return pvt->dcsb0[csrow];
- else
- return pvt->dcsb1[csrow];
+ return retval;
}
/*
- * Return the 'mask' address the i'th CS entry. This function is needed because
- * there number of DCSM registers on Rev E and prior vs Rev F and later is
- * different.
+ * returns true if the SysAddr given by sys_addr matches the
+ * DRAM base/limit associated with node_id
*/
-static u32 amd64_get_dct_mask(struct amd64_pvt *pvt, int dct, int csrow)
+static bool base_limit_match(struct amd64_pvt *pvt, u64 sys_addr, u8 nid)
{
- if (dct == 0)
- return pvt->dcsm0[amd64_map_to_dcs_mask(pvt, csrow)];
- else
- return pvt->dcsm1[amd64_map_to_dcs_mask(pvt, csrow)];
-}
-
-
-/*
- * In *base and *limit, pass back the full 40-bit base and limit physical
- * addresses for the node given by node_id. This information is obtained from
- * DRAM Base (section 3.4.4.1) and DRAM Limit (section 3.4.4.2) registers. The
- * base and limit addresses are of type SysAddr, as defined at the start of
- * section 3.4.4 (p. 70). They are the lowest and highest physical addresses
- * in the address range they represent.
- */
-static void amd64_get_base_and_limit(struct amd64_pvt *pvt, int node_id,
- u64 *base, u64 *limit)
-{
- *base = pvt->dram_base[node_id];
- *limit = pvt->dram_limit[node_id];
-}
-
-/*
- * Return 1 if the SysAddr given by sys_addr matches the base/limit associated
- * with node_id
- */
-static int amd64_base_limit_match(struct amd64_pvt *pvt,
- u64 sys_addr, int node_id)
-{
- u64 base, limit, addr;
-
- amd64_get_base_and_limit(pvt, node_id, &base, &limit);
+ u64 addr;
/* The K8 treats this as a 40-bit value. However, bits 63-40 will be
* all ones if the most significant implemented address bit is 1.
@@ -274,7 +252,8 @@ static int amd64_base_limit_match(struct amd64_pvt *pvt,
*/
addr = sys_addr & 0x000000ffffffffffull;
- return (addr >= base) && (addr <= limit);
+ return ((addr >= get_dram_base(pvt, nid)) &&
+ (addr <= get_dram_limit(pvt, nid)));
}
/*
@@ -287,7 +266,7 @@ static struct mem_ctl_info *find_mc_by_sys_addr(struct mem_ctl_info *mci,
u64 sys_addr)
{
struct amd64_pvt *pvt;
- int node_id;
+ u8 node_id;
u32 intlv_en, bits;
/*
@@ -301,11 +280,11 @@ static struct mem_ctl_info *find_mc_by_sys_addr(struct mem_ctl_info *mci,
* registers. Therefore we arbitrarily choose to read it from the
* register for node 0.
*/
- intlv_en = pvt->dram_IntlvEn[0];
+ intlv_en = dram_intlv_en(pvt, 0);
if (intlv_en == 0) {
- for (node_id = 0; node_id < DRAM_REG_COUNT; node_id++) {
- if (amd64_base_limit_match(pvt, sys_addr, node_id))
+ for (node_id = 0; node_id < DRAM_RANGES; node_id++) {
+ if (base_limit_match(pvt, sys_addr, node_id))
goto found;
}
goto err_no_match;
@@ -314,74 +293,107 @@ static struct mem_ctl_info *find_mc_by_sys_addr(struct mem_ctl_info *mci,
if (unlikely((intlv_en != 0x01) &&
(intlv_en != 0x03) &&
(intlv_en != 0x07))) {
- amd64_printk(KERN_WARNING, "junk value of 0x%x extracted from "
- "IntlvEn field of DRAM Base Register for node 0: "
- "this probably indicates a BIOS bug.\n", intlv_en);
+ amd64_warn("DRAM Base[IntlvEn] junk value: 0x%x, BIOS bug?\n", intlv_en);
return NULL;
}
bits = (((u32) sys_addr) >> 12) & intlv_en;
for (node_id = 0; ; ) {
- if ((pvt->dram_IntlvSel[node_id] & intlv_en) == bits)
+ if ((dram_intlv_sel(pvt, node_id) & intlv_en) == bits)
break; /* intlv_sel field matches */
- if (++node_id >= DRAM_REG_COUNT)
+ if (++node_id >= DRAM_RANGES)
goto err_no_match;
}
/* sanity test for sys_addr */
- if (unlikely(!amd64_base_limit_match(pvt, sys_addr, node_id))) {
- amd64_printk(KERN_WARNING,
- "%s(): sys_addr 0x%llx falls outside base/limit "
- "address range for node %d with node interleaving "
- "enabled.\n",
- __func__, sys_addr, node_id);
+ if (unlikely(!base_limit_match(pvt, sys_addr, node_id))) {
+ amd64_warn("%s: sys_addr 0x%llx falls outside base/limit address"
+ "range for node %d with node interleaving enabled.\n",
+ __func__, sys_addr, node_id);
return NULL;
}
found:
- return edac_mc_find(node_id);
+ return edac_mc_find((int)node_id);
err_no_match:
- debugf2("sys_addr 0x%lx doesn't match any node\n",
- (unsigned long)sys_addr);
+ edac_dbg(2, "sys_addr 0x%lx doesn't match any node\n",
+ (unsigned long)sys_addr);
return NULL;
}
/*
- * Extract the DRAM CS base address from selected csrow register.
- */
-static u64 base_from_dct_base(struct amd64_pvt *pvt, int csrow)
-{
- return ((u64) (amd64_get_dct_base(pvt, 0, csrow) & pvt->dcsb_base)) <<
- pvt->dcs_shift;
-}
-
-/*
- * Extract the mask from the dcsb0[csrow] entry in a CPU revision-specific way.
+ * compute the CS base address of the @csrow on the DRAM controller @dct.
+ * For details see F2x[5C:40] in the processor's BKDG
*/
-static u64 mask_from_dct_mask(struct amd64_pvt *pvt, int csrow)
+static void get_cs_base_and_mask(struct amd64_pvt *pvt, int csrow, u8 dct,
+ u64 *base, u64 *mask)
{
- u64 dcsm_bits, other_bits;
- u64 mask;
+ u64 csbase, csmask, base_bits, mask_bits;
+ u8 addr_shift;
- /* Extract bits from DRAM CS Mask. */
- dcsm_bits = amd64_get_dct_mask(pvt, 0, csrow) & pvt->dcsm_mask;
-
- other_bits = pvt->dcsm_mask;
- other_bits = ~(other_bits << pvt->dcs_shift);
+ if (pvt->fam == 0xf && pvt->ext_model < K8_REV_F) {
+ csbase = pvt->csels[dct].csbases[csrow];
+ csmask = pvt->csels[dct].csmasks[csrow];
+ base_bits = GENMASK_ULL(31, 21) | GENMASK_ULL(15, 9);
+ mask_bits = GENMASK_ULL(29, 21) | GENMASK_ULL(15, 9);
+ addr_shift = 4;
/*
- * The extracted bits from DCSM belong in the spaces represented by
- * the cleared bits in other_bits.
+ * F16h and F15h, models 30h and later need two addr_shift values:
+ * 8 for high and 6 for low (cf. F16h BKDG).
*/
- mask = (dcsm_bits << pvt->dcs_shift) | other_bits;
+ } else if (pvt->fam == 0x16 ||
+ (pvt->fam == 0x15 && pvt->model >= 0x30)) {
+ csbase = pvt->csels[dct].csbases[csrow];
+ csmask = pvt->csels[dct].csmasks[csrow >> 1];
+
+ *base = (csbase & GENMASK_ULL(15, 5)) << 6;
+ *base |= (csbase & GENMASK_ULL(30, 19)) << 8;
+
+ *mask = ~0ULL;
+ /* poke holes for the csmask */
+ *mask &= ~((GENMASK_ULL(15, 5) << 6) |
+ (GENMASK_ULL(30, 19) << 8));
+
+ *mask |= (csmask & GENMASK_ULL(15, 5)) << 6;
+ *mask |= (csmask & GENMASK_ULL(30, 19)) << 8;
+
+ return;
+ } else {
+ csbase = pvt->csels[dct].csbases[csrow];
+ csmask = pvt->csels[dct].csmasks[csrow >> 1];
+ addr_shift = 8;
+
+ if (pvt->fam == 0x15)
+ base_bits = mask_bits =
+ GENMASK_ULL(30,19) | GENMASK_ULL(13,5);
+ else
+ base_bits = mask_bits =
+ GENMASK_ULL(28,19) | GENMASK_ULL(13,5);
+ }
+
+ *base = (csbase & base_bits) << addr_shift;
- return mask;
+ *mask = ~0ULL;
+ /* poke holes for the csmask */
+ *mask &= ~(mask_bits << addr_shift);
+ /* OR them in */
+ *mask |= (csmask & mask_bits) << addr_shift;
}
+#define for_each_chip_select(i, dct, pvt) \
+ for (i = 0; i < pvt->csels[dct].b_cnt; i++)
+
+#define chip_select_base(i, dct, pvt) \
+ pvt->csels[dct].csbases[i]
+
+#define for_each_chip_select_mask(i, dct, pvt) \
+ for (i = 0; i < pvt->csels[dct].m_cnt; i++)
+
/*
* @input_addr is an InputAddr associated with the node given by mci. Return the
* csrow that input_addr maps to, or -1 on failure (no csrow claims input_addr).
@@ -394,49 +406,29 @@ static int input_addr_to_csrow(struct mem_ctl_info *mci, u64 input_addr)
pvt = mci->pvt_info;
- /*
- * Here we use the DRAM CS Base and DRAM CS Mask registers. For each CS
- * base/mask register pair, test the condition shown near the start of
- * section 3.5.4 (p. 84, BKDG #26094, K8, revA-E).
- */
- for (csrow = 0; csrow < pvt->cs_count; csrow++) {
-
- /* This DRAM chip select is disabled on this node */
- if ((pvt->dcsb0[csrow] & K8_DCSB_CS_ENABLE) == 0)
+ for_each_chip_select(csrow, 0, pvt) {
+ if (!csrow_enabled(csrow, 0, pvt))
continue;
- base = base_from_dct_base(pvt, csrow);
- mask = ~mask_from_dct_mask(pvt, csrow);
+ get_cs_base_and_mask(pvt, csrow, 0, &base, &mask);
+
+ mask = ~mask;
if ((input_addr & mask) == (base & mask)) {
- debugf2("InputAddr 0x%lx matches csrow %d (node %d)\n",
- (unsigned long)input_addr, csrow,
- pvt->mc_node_id);
+ edac_dbg(2, "InputAddr 0x%lx matches csrow %d (node %d)\n",
+ (unsigned long)input_addr, csrow,
+ pvt->mc_node_id);
return csrow;
}
}
-
- debugf2("no matching csrow for InputAddr 0x%lx (MC node %d)\n",
- (unsigned long)input_addr, pvt->mc_node_id);
+ edac_dbg(2, "no matching csrow for InputAddr 0x%lx (MC node %d)\n",
+ (unsigned long)input_addr, pvt->mc_node_id);
return -1;
}
/*
- * Return the base value defined by the DRAM Base register for the node
- * represented by mci. This function returns the full 40-bit value despite the
- * fact that the register only stores bits 39-24 of the value. See section
- * 3.4.4.1 (BKDG #26094, K8, revA-E)
- */
-static inline u64 get_dram_base(struct mem_ctl_info *mci)
-{
- struct amd64_pvt *pvt = mci->pvt_info;
-
- return pvt->dram_base[pvt->mc_node_id];
-}
-
-/*
* Obtain info from the DRAM Hole Address Register (section 3.4.8, pub #26094)
* for the node represented by mci. Info is passed back in *hole_base,
* *hole_offset, and *hole_size. Function returns 0 if info is valid or 1 if
@@ -456,25 +448,23 @@ int amd64_get_dram_hole_info(struct mem_ctl_info *mci, u64 *hole_base,
u64 *hole_offset, u64 *hole_size)
{
struct amd64_pvt *pvt = mci->pvt_info;
- u64 base;
/* only revE and later have the DRAM Hole Address Register */
- if (boot_cpu_data.x86 == 0xf && pvt->ext_model < K8_REV_E) {
- debugf1(" revision %d for node %d does not support DHAR\n",
- pvt->ext_model, pvt->mc_node_id);
+ if (pvt->fam == 0xf && pvt->ext_model < K8_REV_E) {
+ edac_dbg(1, " revision %d for node %d does not support DHAR\n",
+ pvt->ext_model, pvt->mc_node_id);
return 1;
}
- /* only valid for Fam10h */
- if (boot_cpu_data.x86 == 0x10 &&
- (pvt->dhar & F10_DRAM_MEM_HOIST_VALID) == 0) {
- debugf1(" Dram Memory Hoisting is DISABLED on this system\n");
+ /* valid for Fam10h and above */
+ if (pvt->fam >= 0x10 && !dhar_mem_hoist_valid(pvt)) {
+ edac_dbg(1, " Dram Memory Hoisting is DISABLED on this system\n");
return 1;
}
- if ((pvt->dhar & DHAR_VALID) == 0) {
- debugf1(" Dram Memory Hoisting is DISABLED on this node %d\n",
- pvt->mc_node_id);
+ if (!dhar_valid(pvt)) {
+ edac_dbg(1, " Dram Memory Hoisting is DISABLED on this node %d\n",
+ pvt->mc_node_id);
return 1;
}
@@ -496,19 +486,15 @@ int amd64_get_dram_hole_info(struct mem_ctl_info *mci, u64 *hole_base,
* addresses in the hole so that they start at 0x100000000.
*/
- base = dhar_base(pvt->dhar);
-
- *hole_base = base;
- *hole_size = (0x1ull << 32) - base;
+ *hole_base = dhar_base(pvt);
+ *hole_size = (1ULL << 32) - *hole_base;
- if (boot_cpu_data.x86 > 0xf)
- *hole_offset = f10_dhar_offset(pvt->dhar);
- else
- *hole_offset = k8_dhar_offset(pvt->dhar);
+ *hole_offset = (pvt->fam > 0xf) ? f10_dhar_offset(pvt)
+ : k8_dhar_offset(pvt);
- debugf1(" DHAR info for node %d base 0x%lx offset 0x%lx size 0x%lx\n",
- pvt->mc_node_id, (unsigned long)*hole_base,
- (unsigned long)*hole_offset, (unsigned long)*hole_size);
+ edac_dbg(1, " DHAR info for node %d base 0x%lx offset 0x%lx size 0x%lx\n",
+ pvt->mc_node_id, (unsigned long)*hole_base,
+ (unsigned long)*hole_offset, (unsigned long)*hole_size);
return 0;
}
@@ -545,23 +531,23 @@ EXPORT_SYMBOL_GPL(amd64_get_dram_hole_info);
*/
static u64 sys_addr_to_dram_addr(struct mem_ctl_info *mci, u64 sys_addr)
{
+ struct amd64_pvt *pvt = mci->pvt_info;
u64 dram_base, hole_base, hole_offset, hole_size, dram_addr;
- int ret = 0;
+ int ret;
- dram_base = get_dram_base(mci);
+ dram_base = get_dram_base(pvt, pvt->mc_node_id);
ret = amd64_get_dram_hole_info(mci, &hole_base, &hole_offset,
&hole_size);
if (!ret) {
- if ((sys_addr >= (1ull << 32)) &&
- (sys_addr < ((1ull << 32) + hole_size))) {
+ if ((sys_addr >= (1ULL << 32)) &&
+ (sys_addr < ((1ULL << 32) + hole_size))) {
/* use DHAR to translate SysAddr to DramAddr */
dram_addr = sys_addr - hole_offset;
- debugf2("using DHAR to translate SysAddr 0x%lx to "
- "DramAddr 0x%lx\n",
- (unsigned long)sys_addr,
- (unsigned long)dram_addr);
+ edac_dbg(2, "using DHAR to translate SysAddr 0x%lx to DramAddr 0x%lx\n",
+ (unsigned long)sys_addr,
+ (unsigned long)dram_addr);
return dram_addr;
}
@@ -576,11 +562,10 @@ static u64 sys_addr_to_dram_addr(struct mem_ctl_info *mci, u64 sys_addr)
* section 3.4.2 of AMD publication 24592: AMD x86-64 Architecture
* Programmer's Manual Volume 1 Application Programming.
*/
- dram_addr = (sys_addr & 0xffffffffffull) - dram_base;
+ dram_addr = (sys_addr & GENMASK_ULL(39, 0)) - dram_base;
- debugf2("using DRAM Base register to translate SysAddr 0x%lx to "
- "DramAddr 0x%lx\n", (unsigned long)sys_addr,
- (unsigned long)dram_addr);
+ edac_dbg(2, "using DRAM Base register to translate SysAddr 0x%lx to DramAddr 0x%lx\n",
+ (unsigned long)sys_addr, (unsigned long)dram_addr);
return dram_addr;
}
@@ -612,13 +597,13 @@ static u64 dram_addr_to_input_addr(struct mem_ctl_info *mci, u64 dram_addr)
* See the start of section 3.4.4 (p. 70, BKDG #26094, K8, revA-E)
* concerning translating a DramAddr to an InputAddr.
*/
- intlv_shift = num_node_interleave_bits(pvt->dram_IntlvEn[0]);
- input_addr = ((dram_addr >> intlv_shift) & 0xffffff000ull) +
- (dram_addr & 0xfff);
+ intlv_shift = num_node_interleave_bits(dram_intlv_en(pvt, 0));
+ input_addr = ((dram_addr >> intlv_shift) & GENMASK_ULL(35, 12)) +
+ (dram_addr & 0xfff);
- debugf2(" Intlv Shift=%d DramAddr=0x%lx maps to InputAddr=0x%lx\n",
- intlv_shift, (unsigned long)dram_addr,
- (unsigned long)input_addr);
+ edac_dbg(2, " Intlv Shift=%d DramAddr=0x%lx maps to InputAddr=0x%lx\n",
+ intlv_shift, (unsigned long)dram_addr,
+ (unsigned long)input_addr);
return input_addr;
}
@@ -634,143 +619,18 @@ static u64 sys_addr_to_input_addr(struct mem_ctl_info *mci, u64 sys_addr)
input_addr =
dram_addr_to_input_addr(mci, sys_addr_to_dram_addr(mci, sys_addr));
- debugf2("SysAdddr 0x%lx translates to InputAddr 0x%lx\n",
- (unsigned long)sys_addr, (unsigned long)input_addr);
+ edac_dbg(2, "SysAdddr 0x%lx translates to InputAddr 0x%lx\n",
+ (unsigned long)sys_addr, (unsigned long)input_addr);
return input_addr;
}
-
-/*
- * @input_addr is an InputAddr associated with the node represented by mci.
- * Translate @input_addr to a DramAddr and return the result.
- */
-static u64 input_addr_to_dram_addr(struct mem_ctl_info *mci, u64 input_addr)
-{
- struct amd64_pvt *pvt;
- int node_id, intlv_shift;
- u64 bits, dram_addr;
- u32 intlv_sel;
-
- /*
- * Near the start of section 3.4.4 (p. 70, BKDG #26094, K8, revA-E)
- * shows how to translate a DramAddr to an InputAddr. Here we reverse
- * this procedure. When translating from a DramAddr to an InputAddr, the
- * bits used for node interleaving are discarded. Here we recover these
- * bits from the IntlvSel field of the DRAM Limit register (section
- * 3.4.4.2) for the node that input_addr is associated with.
- */
- pvt = mci->pvt_info;
- node_id = pvt->mc_node_id;
- BUG_ON((node_id < 0) || (node_id > 7));
-
- intlv_shift = num_node_interleave_bits(pvt->dram_IntlvEn[0]);
-
- if (intlv_shift == 0) {
- debugf1(" InputAddr 0x%lx translates to DramAddr of "
- "same value\n", (unsigned long)input_addr);
-
- return input_addr;
- }
-
- bits = ((input_addr & 0xffffff000ull) << intlv_shift) +
- (input_addr & 0xfff);
-
- intlv_sel = pvt->dram_IntlvSel[node_id] & ((1 << intlv_shift) - 1);
- dram_addr = bits + (intlv_sel << 12);
-
- debugf1("InputAddr 0x%lx translates to DramAddr 0x%lx "
- "(%d node interleave bits)\n", (unsigned long)input_addr,
- (unsigned long)dram_addr, intlv_shift);
-
- return dram_addr;
-}
-
-/*
- * @dram_addr is a DramAddr that maps to the node represented by mci. Convert
- * @dram_addr to a SysAddr.
- */
-static u64 dram_addr_to_sys_addr(struct mem_ctl_info *mci, u64 dram_addr)
-{
- struct amd64_pvt *pvt = mci->pvt_info;
- u64 hole_base, hole_offset, hole_size, base, limit, sys_addr;
- int ret = 0;
-
- ret = amd64_get_dram_hole_info(mci, &hole_base, &hole_offset,
- &hole_size);
- if (!ret) {
- if ((dram_addr >= hole_base) &&
- (dram_addr < (hole_base + hole_size))) {
- sys_addr = dram_addr + hole_offset;
-
- debugf1("using DHAR to translate DramAddr 0x%lx to "
- "SysAddr 0x%lx\n", (unsigned long)dram_addr,
- (unsigned long)sys_addr);
-
- return sys_addr;
- }
- }
-
- amd64_get_base_and_limit(pvt, pvt->mc_node_id, &base, &limit);
- sys_addr = dram_addr + base;
-
- /*
- * The sys_addr we have computed up to this point is a 40-bit value
- * because the k8 deals with 40-bit values. However, the value we are
- * supposed to return is a full 64-bit physical address. The AMD
- * x86-64 architecture specifies that the most significant implemented
- * address bit through bit 63 of a physical address must be either all
- * 0s or all 1s. Therefore we sign-extend the 40-bit sys_addr to a
- * 64-bit value below. See section 3.4.2 of AMD publication 24592:
- * AMD x86-64 Architecture Programmer's Manual Volume 1 Application
- * Programming.
- */
- sys_addr |= ~((sys_addr & (1ull << 39)) - 1);
-
- debugf1(" Node %d, DramAddr 0x%lx to SysAddr 0x%lx\n",
- pvt->mc_node_id, (unsigned long)dram_addr,
- (unsigned long)sys_addr);
-
- return sys_addr;
-}
-
-/*
- * @input_addr is an InputAddr associated with the node given by mci. Translate
- * @input_addr to a SysAddr.
- */
-static inline u64 input_addr_to_sys_addr(struct mem_ctl_info *mci,
- u64 input_addr)
-{
- return dram_addr_to_sys_addr(mci,
- input_addr_to_dram_addr(mci, input_addr));
-}
-
-/*
- * Find the minimum and maximum InputAddr values that map to the given @csrow.
- * Pass back these values in *input_addr_min and *input_addr_max.
- */
-static void find_csrow_limits(struct mem_ctl_info *mci, int csrow,
- u64 *input_addr_min, u64 *input_addr_max)
-{
- struct amd64_pvt *pvt;
- u64 base, mask;
-
- pvt = mci->pvt_info;
- BUG_ON((csrow < 0) || (csrow >= pvt->cs_count));
-
- base = base_from_dct_base(pvt, csrow);
- mask = mask_from_dct_mask(pvt, csrow);
-
- *input_addr_min = base & ~mask;
- *input_addr_max = base | mask | pvt->dcs_mask_notused;
-}
-
/* Map the Error address to a PAGE and PAGE OFFSET. */
static inline void error_address_to_page_and_offset(u64 error_address,
- u32 *page, u32 *offset)
+ struct err_info *err)
{
- *page = (u32) (error_address >> PAGE_SHIFT);
- *offset = ((u32) error_address) & ~PAGE_MASK;
+ err->page = (u32) (error_address >> PAGE_SHIFT);
+ err->offset = ((u32) error_address) & ~PAGE_MASK;
}
/*
@@ -788,44 +648,23 @@ static int sys_addr_to_csrow(struct mem_ctl_info *mci, u64 sys_addr)
csrow = input_addr_to_csrow(mci, sys_addr_to_input_addr(mci, sys_addr));
if (csrow == -1)
- amd64_mc_printk(mci, KERN_ERR,
- "Failed to translate InputAddr to csrow for "
- "address 0x%lx\n", (unsigned long)sys_addr);
+ amd64_mc_err(mci, "Failed to translate InputAddr to csrow for "
+ "address 0x%lx\n", (unsigned long)sys_addr);
return csrow;
}
static int get_channel_from_ecc_syndrome(struct mem_ctl_info *, u16);
-static u16 extract_syndrome(struct err_regs *err)
-{
- return ((err->nbsh >> 15) & 0xff) | ((err->nbsl >> 16) & 0xff00);
-}
-
-static void amd64_cpu_display_info(struct amd64_pvt *pvt)
-{
- if (boot_cpu_data.x86 == 0x11)
- edac_printk(KERN_DEBUG, EDAC_MC, "F11h CPU detected\n");
- else if (boot_cpu_data.x86 == 0x10)
- edac_printk(KERN_DEBUG, EDAC_MC, "F10h CPU detected\n");
- else if (boot_cpu_data.x86 == 0xf)
- edac_printk(KERN_DEBUG, EDAC_MC, "%s detected\n",
- (pvt->ext_model >= K8_REV_F) ?
- "Rev F or later" : "Rev E or earlier");
- else
- /* we'll hardly ever ever get here */
- edac_printk(KERN_ERR, EDAC_MC, "Unknown cpu!\n");
-}
-
/*
* Determine if the DIMMs have ECC enabled. ECC is enabled ONLY if all the DIMMs
* are ECC capable.
*/
-static enum edac_type amd64_determine_edac_cap(struct amd64_pvt *pvt)
+static unsigned long determine_edac_cap(struct amd64_pvt *pvt)
{
- int bit;
- enum dev_type edac_cap = EDAC_FLAG_NONE;
+ u8 bit;
+ unsigned long edac_cap = EDAC_FLAG_NONE;
- bit = (boot_cpu_data.x86 > 0xf || pvt->ext_model >= K8_REV_F)
+ bit = (pvt->fam > 0xf || pvt->ext_model >= K8_REV_F)
? 19
: 17;
@@ -835,198 +674,139 @@ static enum edac_type amd64_determine_edac_cap(struct amd64_pvt *pvt)
return edac_cap;
}
+static void debug_display_dimm_sizes(struct amd64_pvt *, u8);
-static void amd64_debug_display_dimm_sizes(int ctrl, struct amd64_pvt *pvt);
-
-static void amd64_dump_dramcfg_low(u32 dclr, int chan)
+static void debug_dump_dramcfg_low(struct amd64_pvt *pvt, u32 dclr, int chan)
{
- debugf1("F2x%d90 (DRAM Cfg Low): 0x%08x\n", chan, dclr);
+ edac_dbg(1, "F2x%d90 (DRAM Cfg Low): 0x%08x\n", chan, dclr);
- debugf1(" DIMM type: %sbuffered; all DIMMs support ECC: %s\n",
- (dclr & BIT(16)) ? "un" : "",
- (dclr & BIT(19)) ? "yes" : "no");
+ edac_dbg(1, " DIMM type: %sbuffered; all DIMMs support ECC: %s\n",
+ (dclr & BIT(16)) ? "un" : "",
+ (dclr & BIT(19)) ? "yes" : "no");
- debugf1(" PAR/ERR parity: %s\n",
- (dclr & BIT(8)) ? "enabled" : "disabled");
+ edac_dbg(1, " PAR/ERR parity: %s\n",
+ (dclr & BIT(8)) ? "enabled" : "disabled");
- debugf1(" DCT 128bit mode width: %s\n",
- (dclr & BIT(11)) ? "128b" : "64b");
+ if (pvt->fam == 0x10)
+ edac_dbg(1, " DCT 128bit mode width: %s\n",
+ (dclr & BIT(11)) ? "128b" : "64b");
- debugf1(" x4 logical DIMMs present: L0: %s L1: %s L2: %s L3: %s\n",
- (dclr & BIT(12)) ? "yes" : "no",
- (dclr & BIT(13)) ? "yes" : "no",
- (dclr & BIT(14)) ? "yes" : "no",
- (dclr & BIT(15)) ? "yes" : "no");
+ edac_dbg(1, " x4 logical DIMMs present: L0: %s L1: %s L2: %s L3: %s\n",
+ (dclr & BIT(12)) ? "yes" : "no",
+ (dclr & BIT(13)) ? "yes" : "no",
+ (dclr & BIT(14)) ? "yes" : "no",
+ (dclr & BIT(15)) ? "yes" : "no");
}
/* Display and decode various NB registers for debug purposes. */
-static void amd64_dump_misc_regs(struct amd64_pvt *pvt)
+static void dump_misc_regs(struct amd64_pvt *pvt)
{
- int ganged;
+ edac_dbg(1, "F3xE8 (NB Cap): 0x%08x\n", pvt->nbcap);
- debugf1("F3xE8 (NB Cap): 0x%08x\n", pvt->nbcap);
+ edac_dbg(1, " NB two channel DRAM capable: %s\n",
+ (pvt->nbcap & NBCAP_DCT_DUAL) ? "yes" : "no");
- debugf1(" NB two channel DRAM capable: %s\n",
- (pvt->nbcap & K8_NBCAP_DCT_DUAL) ? "yes" : "no");
+ edac_dbg(1, " ECC capable: %s, ChipKill ECC capable: %s\n",
+ (pvt->nbcap & NBCAP_SECDED) ? "yes" : "no",
+ (pvt->nbcap & NBCAP_CHIPKILL) ? "yes" : "no");
- debugf1(" ECC capable: %s, ChipKill ECC capable: %s\n",
- (pvt->nbcap & K8_NBCAP_SECDED) ? "yes" : "no",
- (pvt->nbcap & K8_NBCAP_CHIPKILL) ? "yes" : "no");
+ debug_dump_dramcfg_low(pvt, pvt->dclr0, 0);
- amd64_dump_dramcfg_low(pvt->dclr0, 0);
+ edac_dbg(1, "F3xB0 (Online Spare): 0x%08x\n", pvt->online_spare);
- debugf1("F3xB0 (Online Spare): 0x%08x\n", pvt->online_spare);
+ edac_dbg(1, "F1xF0 (DRAM Hole Address): 0x%08x, base: 0x%08x, offset: 0x%08x\n",
+ pvt->dhar, dhar_base(pvt),
+ (pvt->fam == 0xf) ? k8_dhar_offset(pvt)
+ : f10_dhar_offset(pvt));
- debugf1("F1xF0 (DRAM Hole Address): 0x%08x, base: 0x%08x, "
- "offset: 0x%08x\n",
- pvt->dhar,
- dhar_base(pvt->dhar),
- (boot_cpu_data.x86 == 0xf) ? k8_dhar_offset(pvt->dhar)
- : f10_dhar_offset(pvt->dhar));
+ edac_dbg(1, " DramHoleValid: %s\n", dhar_valid(pvt) ? "yes" : "no");
- debugf1(" DramHoleValid: %s\n",
- (pvt->dhar & DHAR_VALID) ? "yes" : "no");
+ debug_display_dimm_sizes(pvt, 0);
/* everything below this point is Fam10h and above */
- if (boot_cpu_data.x86 == 0xf) {
- amd64_debug_display_dimm_sizes(0, pvt);
+ if (pvt->fam == 0xf)
return;
- }
- amd64_printk(KERN_INFO, "using %s syndromes.\n",
- ((pvt->syn_type == 8) ? "x8" : "x4"));
+ debug_display_dimm_sizes(pvt, 1);
+
+ amd64_info("using %s syndromes.\n", ((pvt->ecc_sym_sz == 8) ? "x8" : "x4"));
/* Only if NOT ganged does dclr1 have valid info */
if (!dct_ganging_enabled(pvt))
- amd64_dump_dramcfg_low(pvt->dclr1, 1);
-
- /*
- * Determine if ganged and then dump memory sizes for first controller,
- * and if NOT ganged dump info for 2nd controller.
- */
- ganged = dct_ganging_enabled(pvt);
-
- amd64_debug_display_dimm_sizes(0, pvt);
-
- if (!ganged)
- amd64_debug_display_dimm_sizes(1, pvt);
-}
-
-/* Read in both of DBAM registers */
-static void amd64_read_dbam_reg(struct amd64_pvt *pvt)
-{
- amd64_read_pci_cfg(pvt->dram_f2_ctl, DBAM0, &pvt->dbam0);
-
- if (boot_cpu_data.x86 >= 0x10)
- amd64_read_pci_cfg(pvt->dram_f2_ctl, DBAM1, &pvt->dbam1);
+ debug_dump_dramcfg_low(pvt, pvt->dclr1, 1);
}
/*
- * NOTE: CPU Revision Dependent code: Rev E and Rev F
- *
- * Set the DCSB and DCSM mask values depending on the CPU revision value. Also
- * set the shift factor for the DCSB and DCSM values.
- *
- * ->dcs_mask_notused, RevE:
- *
- * To find the max InputAddr for the csrow, start with the base address and set
- * all bits that are "don't care" bits in the test at the start of section
- * 3.5.4 (p. 84).
- *
- * The "don't care" bits are all set bits in the mask and all bits in the gaps
- * between bit ranges [35:25] and [19:13]. The value REV_E_DCS_NOTUSED_BITS
- * represents bits [24:20] and [12:0], which are all bits in the above-mentioned
- * gaps.
- *
- * ->dcs_mask_notused, RevF and later:
- *
- * To find the max InputAddr for the csrow, start with the base address and set
- * all bits that are "don't care" bits in the test at the start of NPT section
- * 4.5.4 (p. 87).
- *
- * The "don't care" bits are all set bits in the mask and all bits in the gaps
- * between bit ranges [36:27] and [21:13].
- *
- * The value REV_F_F1Xh_DCS_NOTUSED_BITS represents bits [26:22] and [12:0],
- * which are all bits in the above-mentioned gaps.
+ * See BKDG, F2x[1,0][5C:40], F2[1,0][6C:60]
*/
-static void amd64_set_dct_base_and_mask(struct amd64_pvt *pvt)
-{
-
- if (boot_cpu_data.x86 == 0xf && pvt->ext_model < K8_REV_F) {
- pvt->dcsb_base = REV_E_DCSB_BASE_BITS;
- pvt->dcsm_mask = REV_E_DCSM_MASK_BITS;
- pvt->dcs_mask_notused = REV_E_DCS_NOTUSED_BITS;
- pvt->dcs_shift = REV_E_DCS_SHIFT;
- pvt->cs_count = 8;
- pvt->num_dcsm = 8;
+static void prep_chip_selects(struct amd64_pvt *pvt)
+{
+ if (pvt->fam == 0xf && pvt->ext_model < K8_REV_F) {
+ pvt->csels[0].b_cnt = pvt->csels[1].b_cnt = 8;
+ pvt->csels[0].m_cnt = pvt->csels[1].m_cnt = 8;
+ } else if (pvt->fam == 0x15 && pvt->model >= 0x30) {
+ pvt->csels[0].b_cnt = pvt->csels[1].b_cnt = 4;
+ pvt->csels[0].m_cnt = pvt->csels[1].m_cnt = 2;
} else {
- pvt->dcsb_base = REV_F_F1Xh_DCSB_BASE_BITS;
- pvt->dcsm_mask = REV_F_F1Xh_DCSM_MASK_BITS;
- pvt->dcs_mask_notused = REV_F_F1Xh_DCS_NOTUSED_BITS;
- pvt->dcs_shift = REV_F_F1Xh_DCS_SHIFT;
-
- if (boot_cpu_data.x86 == 0x11) {
- pvt->cs_count = 4;
- pvt->num_dcsm = 2;
- } else {
- pvt->cs_count = 8;
- pvt->num_dcsm = 4;
- }
+ pvt->csels[0].b_cnt = pvt->csels[1].b_cnt = 8;
+ pvt->csels[0].m_cnt = pvt->csels[1].m_cnt = 4;
}
}
/*
- * Function 2 Offset F10_DCSB0; read in the DCS Base and DCS Mask hw registers
+ * Function 2 Offset F10_DCSB0; read in the DCS Base and DCS Mask registers
*/
-static void amd64_read_dct_base_mask(struct amd64_pvt *pvt)
+static void read_dct_base_mask(struct amd64_pvt *pvt)
{
- int cs, reg;
+ int cs;
- amd64_set_dct_base_and_mask(pvt);
+ prep_chip_selects(pvt);
- for (cs = 0; cs < pvt->cs_count; cs++) {
- reg = K8_DCSB0 + (cs * 4);
- if (!amd64_read_pci_cfg(pvt->dram_f2_ctl, reg, &pvt->dcsb0[cs]))
- debugf0(" DCSB0[%d]=0x%08x reg: F2x%x\n",
- cs, pvt->dcsb0[cs], reg);
+ for_each_chip_select(cs, 0, pvt) {
+ int reg0 = DCSB0 + (cs * 4);
+ int reg1 = DCSB1 + (cs * 4);
+ u32 *base0 = &pvt->csels[0].csbases[cs];
+ u32 *base1 = &pvt->csels[1].csbases[cs];
- /* If DCT are NOT ganged, then read in DCT1's base */
- if (boot_cpu_data.x86 >= 0x10 && !dct_ganging_enabled(pvt)) {
- reg = F10_DCSB1 + (cs * 4);
- if (!amd64_read_pci_cfg(pvt->dram_f2_ctl, reg,
- &pvt->dcsb1[cs]))
- debugf0(" DCSB1[%d]=0x%08x reg: F2x%x\n",
- cs, pvt->dcsb1[cs], reg);
- } else {
- pvt->dcsb1[cs] = 0;
- }
+ if (!amd64_read_dct_pci_cfg(pvt, reg0, base0))
+ edac_dbg(0, " DCSB0[%d]=0x%08x reg: F2x%x\n",
+ cs, *base0, reg0);
+
+ if (pvt->fam == 0xf || dct_ganging_enabled(pvt))
+ continue;
+
+ if (!amd64_read_dct_pci_cfg(pvt, reg1, base1))
+ edac_dbg(0, " DCSB1[%d]=0x%08x reg: F2x%x\n",
+ cs, *base1, reg1);
}
- for (cs = 0; cs < pvt->num_dcsm; cs++) {
- reg = K8_DCSM0 + (cs * 4);
- if (!amd64_read_pci_cfg(pvt->dram_f2_ctl, reg, &pvt->dcsm0[cs]))
- debugf0(" DCSM0[%d]=0x%08x reg: F2x%x\n",
- cs, pvt->dcsm0[cs], reg);
+ for_each_chip_select_mask(cs, 0, pvt) {
+ int reg0 = DCSM0 + (cs * 4);
+ int reg1 = DCSM1 + (cs * 4);
+ u32 *mask0 = &pvt->csels[0].csmasks[cs];
+ u32 *mask1 = &pvt->csels[1].csmasks[cs];
- /* If DCT are NOT ganged, then read in DCT1's mask */
- if (boot_cpu_data.x86 >= 0x10 && !dct_ganging_enabled(pvt)) {
- reg = F10_DCSM1 + (cs * 4);
- if (!amd64_read_pci_cfg(pvt->dram_f2_ctl, reg,
- &pvt->dcsm1[cs]))
- debugf0(" DCSM1[%d]=0x%08x reg: F2x%x\n",
- cs, pvt->dcsm1[cs], reg);
- } else {
- pvt->dcsm1[cs] = 0;
- }
+ if (!amd64_read_dct_pci_cfg(pvt, reg0, mask0))
+ edac_dbg(0, " DCSM0[%d]=0x%08x reg: F2x%x\n",
+ cs, *mask0, reg0);
+
+ if (pvt->fam == 0xf || dct_ganging_enabled(pvt))
+ continue;
+
+ if (!amd64_read_dct_pci_cfg(pvt, reg1, mask1))
+ edac_dbg(0, " DCSM1[%d]=0x%08x reg: F2x%x\n",
+ cs, *mask1, reg1);
}
}
-static enum mem_type amd64_determine_memory_type(struct amd64_pvt *pvt)
+static enum mem_type determine_memory_type(struct amd64_pvt *pvt, int cs)
{
enum mem_type type;
- if (boot_cpu_data.x86 >= 0x10 || pvt->ext_model >= K8_REV_F) {
+ /* F15h supports only DDR3 */
+ if (pvt->fam >= 0x15)
+ type = (pvt->dclr0 & BIT(16)) ? MEM_DDR3 : MEM_RDDR3;
+ else if (pvt->fam == 0x10 || pvt->ext_model >= K8_REV_F) {
if (pvt->dchr0 & DDR3_MODE)
type = (pvt->dclr0 & BIT(16)) ? MEM_DDR3 : MEM_RDDR3;
else
@@ -1035,35 +815,22 @@ static enum mem_type amd64_determine_memory_type(struct amd64_pvt *pvt)
type = (pvt->dclr0 & BIT(18)) ? MEM_DDR : MEM_RDDR;
}
- debugf1(" Memory type is: %s\n", edac_mem_types[type]);
+ amd64_info("CS%d: %s\n", cs, edac_mem_types[type]);
return type;
}
-/*
- * Read the DRAM Configuration Low register. It differs between CG, D & E revs
- * and the later RevF memory controllers (DDR vs DDR2)
- *
- * Return:
- * number of memory channels in operation
- * Pass back:
- * contents of the DCL0_LOW register
- */
+/* Get the number of DCT channels the memory controller is using. */
static int k8_early_channel_count(struct amd64_pvt *pvt)
{
- int flag, err = 0;
+ int flag;
- err = amd64_read_pci_cfg(pvt->dram_f2_ctl, F10_DCLR_0, &pvt->dclr0);
- if (err)
- return err;
-
- if ((boot_cpu_data.x86_model >> 4) >= K8_REV_F) {
+ if (pvt->ext_model >= K8_REV_F)
/* RevF (NPT) and later */
- flag = pvt->dclr0 & F10_WIDTH_128;
- } else {
+ flag = pvt->dclr0 & WIDTH_128;
+ else
/* RevE and earlier */
flag = pvt->dclr0 & REVE_WIDTH_128;
- }
/* not used */
pvt->dclr1 = 0;
@@ -1071,66 +838,173 @@ static int k8_early_channel_count(struct amd64_pvt *pvt)
return (flag) ? 2 : 1;
}
-/* extract the ERROR ADDRESS for the K8 CPUs */
-static u64 k8_get_error_address(struct mem_ctl_info *mci,
- struct err_regs *info)
+/* On F10h and later ErrAddr is MC4_ADDR[47:1] */
+static u64 get_error_address(struct amd64_pvt *pvt, struct mce *m)
{
- return (((u64) (info->nbeah & 0xff)) << 32) +
- (info->nbeal & ~0x03);
+ u64 addr;
+ u8 start_bit = 1;
+ u8 end_bit = 47;
+
+ if (pvt->fam == 0xf) {
+ start_bit = 3;
+ end_bit = 39;
+ }
+
+ addr = m->addr & GENMASK_ULL(end_bit, start_bit);
+
+ /*
+ * Erratum 637 workaround
+ */
+ if (pvt->fam == 0x15) {
+ struct amd64_pvt *pvt;
+ u64 cc6_base, tmp_addr;
+ u32 tmp;
+ u16 mce_nid;
+ u8 intlv_en;
+
+ if ((addr & GENMASK_ULL(47, 24)) >> 24 != 0x00fdf7)
+ return addr;
+
+ mce_nid = amd_get_nb_id(m->extcpu);
+ pvt = mcis[mce_nid]->pvt_info;
+
+ amd64_read_pci_cfg(pvt->F1, DRAM_LOCAL_NODE_LIM, &tmp);
+ intlv_en = tmp >> 21 & 0x7;
+
+ /* add [47:27] + 3 trailing bits */
+ cc6_base = (tmp & GENMASK_ULL(20, 0)) << 3;
+
+ /* reverse and add DramIntlvEn */
+ cc6_base |= intlv_en ^ 0x7;
+
+ /* pin at [47:24] */
+ cc6_base <<= 24;
+
+ if (!intlv_en)
+ return cc6_base | (addr & GENMASK_ULL(23, 0));
+
+ amd64_read_pci_cfg(pvt->F1, DRAM_LOCAL_NODE_BASE, &tmp);
+
+ /* faster log2 */
+ tmp_addr = (addr & GENMASK_ULL(23, 12)) << __fls(intlv_en + 1);
+
+ /* OR DramIntlvSel into bits [14:12] */
+ tmp_addr |= (tmp & GENMASK_ULL(23, 21)) >> 9;
+
+ /* add remaining [11:0] bits from original MC4_ADDR */
+ tmp_addr |= addr & GENMASK_ULL(11, 0);
+
+ return cc6_base | tmp_addr;
+ }
+
+ return addr;
}
-/*
- * Read the Base and Limit registers for K8 based Memory controllers; extract
- * fields from the 'raw' reg into separate data fields
- *
- * Isolates: BASE, LIMIT, IntlvEn, IntlvSel, RW_EN
- */
-static void k8_read_dram_base_limit(struct amd64_pvt *pvt, int dram)
+static struct pci_dev *pci_get_related_function(unsigned int vendor,
+ unsigned int device,
+ struct pci_dev *related)
{
- u32 low;
- u32 off = dram << 3; /* 8 bytes between DRAM entries */
+ struct pci_dev *dev = NULL;
- amd64_read_pci_cfg(pvt->addr_f1_ctl, K8_DRAM_BASE_LOW + off, &low);
+ while ((dev = pci_get_device(vendor, device, dev))) {
+ if (pci_domain_nr(dev->bus) == pci_domain_nr(related->bus) &&
+ (dev->bus->number == related->bus->number) &&
+ (PCI_SLOT(dev->devfn) == PCI_SLOT(related->devfn)))
+ break;
+ }
- /* Extract parts into separate data entries */
- pvt->dram_base[dram] = ((u64) low & 0xFFFF0000) << 8;
- pvt->dram_IntlvEn[dram] = (low >> 8) & 0x7;
- pvt->dram_rw_en[dram] = (low & 0x3);
+ return dev;
+}
- amd64_read_pci_cfg(pvt->addr_f1_ctl, K8_DRAM_LIMIT_LOW + off, &low);
+static void read_dram_base_limit_regs(struct amd64_pvt *pvt, unsigned range)
+{
+ struct amd_northbridge *nb;
+ struct pci_dev *f1 = NULL;
+ unsigned int pci_func;
+ int off = range << 3;
+ u32 llim;
- /*
- * Extract parts into separate data entries. Limit is the HIGHEST memory
- * location of the region, so lower 24 bits need to be all ones
- */
- pvt->dram_limit[dram] = (((u64) low & 0xFFFF0000) << 8) | 0x00FFFFFF;
- pvt->dram_IntlvSel[dram] = (low >> 8) & 0x7;
- pvt->dram_DstNode[dram] = (low & 0x7);
+ amd64_read_pci_cfg(pvt->F1, DRAM_BASE_LO + off, &pvt->ranges[range].base.lo);
+ amd64_read_pci_cfg(pvt->F1, DRAM_LIMIT_LO + off, &pvt->ranges[range].lim.lo);
+
+ if (pvt->fam == 0xf)
+ return;
+
+ if (!dram_rw(pvt, range))
+ return;
+
+ amd64_read_pci_cfg(pvt->F1, DRAM_BASE_HI + off, &pvt->ranges[range].base.hi);
+ amd64_read_pci_cfg(pvt->F1, DRAM_LIMIT_HI + off, &pvt->ranges[range].lim.hi);
+
+ /* F15h: factor in CC6 save area by reading dst node's limit reg */
+ if (pvt->fam != 0x15)
+ return;
+
+ nb = node_to_amd_nb(dram_dst_node(pvt, range));
+ if (WARN_ON(!nb))
+ return;
+
+ pci_func = (pvt->model == 0x30) ? PCI_DEVICE_ID_AMD_15H_M30H_NB_F1
+ : PCI_DEVICE_ID_AMD_15H_NB_F1;
+
+ f1 = pci_get_related_function(nb->misc->vendor, pci_func, nb->misc);
+ if (WARN_ON(!f1))
+ return;
+
+ amd64_read_pci_cfg(f1, DRAM_LOCAL_NODE_LIM, &llim);
+
+ pvt->ranges[range].lim.lo &= GENMASK_ULL(15, 0);
+
+ /* {[39:27],111b} */
+ pvt->ranges[range].lim.lo |= ((llim & 0x1fff) << 3 | 0x7) << 16;
+
+ pvt->ranges[range].lim.hi &= GENMASK_ULL(7, 0);
+
+ /* [47:40] */
+ pvt->ranges[range].lim.hi |= llim >> 13;
+
+ pci_dev_put(f1);
}
-static void k8_map_sysaddr_to_csrow(struct mem_ctl_info *mci,
- struct err_regs *err_info, u64 sys_addr)
+static void k8_map_sysaddr_to_csrow(struct mem_ctl_info *mci, u64 sys_addr,
+ struct err_info *err)
{
- struct mem_ctl_info *src_mci;
- int channel, csrow;
- u32 page, offset;
- u16 syndrome;
+ struct amd64_pvt *pvt = mci->pvt_info;
+
+ error_address_to_page_and_offset(sys_addr, err);
+
+ /*
+ * Find out which node the error address belongs to. This may be
+ * different from the node that detected the error.
+ */
+ err->src_mci = find_mc_by_sys_addr(mci, sys_addr);
+ if (!err->src_mci) {
+ amd64_mc_err(mci, "failed to map error addr 0x%lx to a node\n",
+ (unsigned long)sys_addr);
+ err->err_code = ERR_NODE;
+ return;
+ }
- syndrome = extract_syndrome(err_info);
+ /* Now map the sys_addr to a CSROW */
+ err->csrow = sys_addr_to_csrow(err->src_mci, sys_addr);
+ if (err->csrow < 0) {
+ err->err_code = ERR_CSROW;
+ return;
+ }
/* CHIPKILL enabled */
- if (err_info->nbcfg & K8_NBCFG_CHIPKILL) {
- channel = get_channel_from_ecc_syndrome(mci, syndrome);
- if (channel < 0) {
+ if (pvt->nbcfg & NBCFG_CHIPKILL) {
+ err->channel = get_channel_from_ecc_syndrome(mci, err->syndrome);
+ if (err->channel < 0) {
/*
* Syndrome didn't map, so we don't know which of the
* 2 DIMMs is in error. So we need to ID 'both' of them
* as suspect.
*/
- amd64_mc_printk(mci, KERN_WARNING,
- "unknown syndrome 0x%04x - possible "
- "error reporting race\n", syndrome);
- edac_mc_handle_ce_no_info(mci, EDAC_MOD_STR);
+ amd64_mc_warn(err->src_mci, "unknown syndrome 0x%04x - "
+ "possible error reporting race\n",
+ err->syndrome);
+ err->err_code = ERR_CHANNEL;
return;
}
} else {
@@ -1142,46 +1016,69 @@ static void k8_map_sysaddr_to_csrow(struct mem_ctl_info *mci,
* was obtained from email communication with someone at AMD.
* (Wish the email was placed in this comment - norsk)
*/
- channel = ((sys_addr & BIT(3)) != 0);
+ err->channel = ((sys_addr & BIT(3)) != 0);
}
+}
- /*
- * Find out which node the error address belongs to. This may be
- * different from the node that detected the error.
- */
- src_mci = find_mc_by_sys_addr(mci, sys_addr);
- if (!src_mci) {
- amd64_mc_printk(mci, KERN_ERR,
- "failed to map error address 0x%lx to a node\n",
- (unsigned long)sys_addr);
- edac_mc_handle_ce_no_info(mci, EDAC_MOD_STR);
- return;
- }
+static int ddr2_cs_size(unsigned i, bool dct_width)
+{
+ unsigned shift = 0;
- /* Now map the sys_addr to a CSROW */
- csrow = sys_addr_to_csrow(src_mci, sys_addr);
- if (csrow < 0) {
- edac_mc_handle_ce_no_info(src_mci, EDAC_MOD_STR);
- } else {
- error_address_to_page_and_offset(sys_addr, &page, &offset);
+ if (i <= 2)
+ shift = i;
+ else if (!(i & 0x1))
+ shift = i >> 1;
+ else
+ shift = (i + 1) >> 1;
- edac_mc_handle_ce(src_mci, page, offset, syndrome, csrow,
- channel, EDAC_MOD_STR);
- }
+ return 128 << (shift + !!dct_width);
}
-static int k8_dbam_to_chip_select(struct amd64_pvt *pvt, int cs_mode)
+static int k8_dbam_to_chip_select(struct amd64_pvt *pvt, u8 dct,
+ unsigned cs_mode)
{
- int *dbam_map;
+ u32 dclr = dct ? pvt->dclr1 : pvt->dclr0;
- if (pvt->ext_model >= K8_REV_F)
- dbam_map = ddr2_dbam;
- else if (pvt->ext_model >= K8_REV_D)
- dbam_map = ddr2_dbam_revD;
- else
- dbam_map = ddr2_dbam_revCG;
+ if (pvt->ext_model >= K8_REV_F) {
+ WARN_ON(cs_mode > 11);
+ return ddr2_cs_size(cs_mode, dclr & WIDTH_128);
+ }
+ else if (pvt->ext_model >= K8_REV_D) {
+ unsigned diff;
+ WARN_ON(cs_mode > 10);
- return dbam_map[cs_mode];
+ /*
+ * the below calculation, besides trying to win an obfuscated C
+ * contest, maps cs_mode values to DIMM chip select sizes. The
+ * mappings are:
+ *
+ * cs_mode CS size (mb)
+ * ======= ============
+ * 0 32
+ * 1 64
+ * 2 128
+ * 3 128
+ * 4 256
+ * 5 512
+ * 6 256
+ * 7 512
+ * 8 1024
+ * 9 1024
+ * 10 2048
+ *
+ * Basically, it calculates a value with which to shift the
+ * smallest CS size of 32MB.
+ *
+ * ddr[23]_cs_size have a similar purpose.
+ */
+ diff = cs_mode/3 + (unsigned)(cs_mode > 5);
+
+ return 32 << (cs_mode - diff);
+ }
+ else {
+ WARN_ON(cs_mode > 6);
+ return 32 << cs_mode;
+ }
}
/*
@@ -1192,17 +1089,13 @@ static int k8_dbam_to_chip_select(struct amd64_pvt *pvt, int cs_mode)
* Pass back:
* contents of the DCL0_LOW register
*/
-static int f10_early_channel_count(struct amd64_pvt *pvt)
+static int f1x_early_channel_count(struct amd64_pvt *pvt)
{
- int dbams[] = { DBAM0, DBAM1 };
int i, j, channels = 0;
- u32 dbam;
- /* If we are in 128 bit mode, then we are using 2 channels */
- if (pvt->dclr0 & F10_WIDTH_128) {
- channels = 2;
- return channels;
- }
+ /* On F10h, if we are in 128 bit mode, then we are using 2 channels */
+ if (pvt->fam == 0x10 && (pvt->dclr0 & WIDTH_128))
+ return 2;
/*
* Need to check if in unganged mode: In such, there are 2 channels,
@@ -1212,16 +1105,15 @@ static int f10_early_channel_count(struct amd64_pvt *pvt)
* Need to check DCT0[0] and DCT1[0] to see if only one of them has
* their CSEnable bit on. If so, then SINGLE DIMM case.
*/
- debugf0("Data width is not 128 bits - need more decoding\n");
+ edac_dbg(0, "Data width is not 128 bits - need more decoding\n");
/*
* Check DRAM Bank Address Mapping values for each DIMM to see if there
* is more than just one DIMM present in unganged mode. Need to check
* both controllers since DIMMs can be placed in either one.
*/
- for (i = 0; i < ARRAY_SIZE(dbams); i++) {
- if (amd64_read_pci_cfg(pvt->dram_f2_ctl, dbams[i], &dbam))
- goto err_reg;
+ for (i = 0; i < 2; i++) {
+ u32 dbam = (i ? pvt->dbam1 : pvt->dbam0);
for (j = 0; j < 4; j++) {
if (DBAM_DIMM(j, dbam) > 0) {
@@ -1234,248 +1126,239 @@ static int f10_early_channel_count(struct amd64_pvt *pvt)
if (channels > 2)
channels = 2;
- debugf0("MCT channel count: %d\n", channels);
+ amd64_info("MCT channel count: %d\n", channels);
return channels;
-
-err_reg:
- return -1;
-
}
-static int f10_dbam_to_chip_select(struct amd64_pvt *pvt, int cs_mode)
+static int ddr3_cs_size(unsigned i, bool dct_width)
{
- int *dbam_map;
+ unsigned shift = 0;
+ int cs_size = 0;
- if (pvt->dchr0 & DDR3_MODE || pvt->dchr1 & DDR3_MODE)
- dbam_map = ddr3_dbam;
+ if (i == 0 || i == 3 || i == 4)
+ cs_size = -1;
+ else if (i <= 2)
+ shift = i;
+ else if (i == 12)
+ shift = 7;
+ else if (!(i & 0x1))
+ shift = i >> 1;
else
- dbam_map = ddr2_dbam;
-
- return dbam_map[cs_mode];
-}
+ shift = (i + 1) >> 1;
-/* Enable extended configuration access via 0xCF8 feature */
-static void amd64_setup(struct amd64_pvt *pvt)
-{
- u32 reg;
-
- amd64_read_pci_cfg(pvt->misc_f3_ctl, F10_NB_CFG_HIGH, &reg);
+ if (cs_size != -1)
+ cs_size = (128 * (1 << !!dct_width)) << shift;
- pvt->flags.cf8_extcfg = !!(reg & F10_NB_CFG_LOW_ENABLE_EXT_CFG);
- reg |= F10_NB_CFG_LOW_ENABLE_EXT_CFG;
- pci_write_config_dword(pvt->misc_f3_ctl, F10_NB_CFG_HIGH, reg);
+ return cs_size;
}
-/* Restore the extended configuration access via 0xCF8 feature */
-static void amd64_teardown(struct amd64_pvt *pvt)
+static int f10_dbam_to_chip_select(struct amd64_pvt *pvt, u8 dct,
+ unsigned cs_mode)
{
- u32 reg;
+ u32 dclr = dct ? pvt->dclr1 : pvt->dclr0;
- amd64_read_pci_cfg(pvt->misc_f3_ctl, F10_NB_CFG_HIGH, &reg);
+ WARN_ON(cs_mode > 11);
- reg &= ~F10_NB_CFG_LOW_ENABLE_EXT_CFG;
- if (pvt->flags.cf8_extcfg)
- reg |= F10_NB_CFG_LOW_ENABLE_EXT_CFG;
- pci_write_config_dword(pvt->misc_f3_ctl, F10_NB_CFG_HIGH, reg);
+ if (pvt->dchr0 & DDR3_MODE || pvt->dchr1 & DDR3_MODE)
+ return ddr3_cs_size(cs_mode, dclr & WIDTH_128);
+ else
+ return ddr2_cs_size(cs_mode, dclr & WIDTH_128);
}
-static u64 f10_get_error_address(struct mem_ctl_info *mci,
- struct err_regs *info)
+/*
+ * F15h supports only 64bit DCT interfaces
+ */
+static int f15_dbam_to_chip_select(struct amd64_pvt *pvt, u8 dct,
+ unsigned cs_mode)
{
- return (((u64) (info->nbeah & 0xffff)) << 32) +
- (info->nbeal & ~0x01);
+ WARN_ON(cs_mode > 12);
+
+ return ddr3_cs_size(cs_mode, false);
}
/*
- * Read the Base and Limit registers for F10 based Memory controllers. Extract
- * fields from the 'raw' reg into separate data fields.
- *
- * Isolates: BASE, LIMIT, IntlvEn, IntlvSel, RW_EN.
+ * F16h and F15h model 30h have only limited cs_modes.
*/
-static void f10_read_dram_base_limit(struct amd64_pvt *pvt, int dram)
+static int f16_dbam_to_chip_select(struct amd64_pvt *pvt, u8 dct,
+ unsigned cs_mode)
{
- u32 high_offset, low_offset, high_base, low_base, high_limit, low_limit;
-
- low_offset = K8_DRAM_BASE_LOW + (dram << 3);
- high_offset = F10_DRAM_BASE_HIGH + (dram << 3);
+ WARN_ON(cs_mode > 12);
- /* read the 'raw' DRAM BASE Address register */
- amd64_read_pci_cfg(pvt->addr_f1_ctl, low_offset, &low_base);
-
- /* Read from the ECS data register */
- amd64_read_pci_cfg(pvt->addr_f1_ctl, high_offset, &high_base);
+ if (cs_mode == 6 || cs_mode == 8 ||
+ cs_mode == 9 || cs_mode == 12)
+ return -1;
+ else
+ return ddr3_cs_size(cs_mode, false);
+}
- /* Extract parts into separate data entries */
- pvt->dram_rw_en[dram] = (low_base & 0x3);
+static void read_dram_ctl_register(struct amd64_pvt *pvt)
+{
- if (pvt->dram_rw_en[dram] == 0)
+ if (pvt->fam == 0xf)
return;
- pvt->dram_IntlvEn[dram] = (low_base >> 8) & 0x7;
-
- pvt->dram_base[dram] = (((u64)high_base & 0x000000FF) << 40) |
- (((u64)low_base & 0xFFFF0000) << 8);
+ if (!amd64_read_dct_pci_cfg(pvt, DCT_SEL_LO, &pvt->dct_sel_lo)) {
+ edac_dbg(0, "F2x110 (DCTSelLow): 0x%08x, High range addrs at: 0x%x\n",
+ pvt->dct_sel_lo, dct_sel_baseaddr(pvt));
- low_offset = K8_DRAM_LIMIT_LOW + (dram << 3);
- high_offset = F10_DRAM_LIMIT_HIGH + (dram << 3);
+ edac_dbg(0, " DCTs operate in %s mode\n",
+ (dct_ganging_enabled(pvt) ? "ganged" : "unganged"));
- /* read the 'raw' LIMIT registers */
- amd64_read_pci_cfg(pvt->addr_f1_ctl, low_offset, &low_limit);
+ if (!dct_ganging_enabled(pvt))
+ edac_dbg(0, " Address range split per DCT: %s\n",
+ (dct_high_range_enabled(pvt) ? "yes" : "no"));
- /* Read from the ECS data register for the HIGH portion */
- amd64_read_pci_cfg(pvt->addr_f1_ctl, high_offset, &high_limit);
+ edac_dbg(0, " data interleave for ECC: %s, DRAM cleared since last warm reset: %s\n",
+ (dct_data_intlv_enabled(pvt) ? "enabled" : "disabled"),
+ (dct_memory_cleared(pvt) ? "yes" : "no"));
- pvt->dram_DstNode[dram] = (low_limit & 0x7);
- pvt->dram_IntlvSel[dram] = (low_limit >> 8) & 0x7;
+ edac_dbg(0, " channel interleave: %s, "
+ "interleave bits selector: 0x%x\n",
+ (dct_interleave_enabled(pvt) ? "enabled" : "disabled"),
+ dct_sel_interleave_addr(pvt));
+ }
- /*
- * Extract address values and form a LIMIT address. Limit is the HIGHEST
- * memory location of the region, so low 24 bits need to be all ones.
- */
- pvt->dram_limit[dram] = (((u64)high_limit & 0x000000FF) << 40) |
- (((u64) low_limit & 0xFFFF0000) << 8) |
- 0x00FFFFFF;
+ amd64_read_dct_pci_cfg(pvt, DCT_SEL_HI, &pvt->dct_sel_hi);
}
-static void f10_read_dram_ctl_register(struct amd64_pvt *pvt)
-{
-
- if (!amd64_read_pci_cfg(pvt->dram_f2_ctl, F10_DCTL_SEL_LOW,
- &pvt->dram_ctl_select_low)) {
- debugf0("F2x110 (DCTL Sel. Low): 0x%08x, "
- "High range addresses at: 0x%x\n",
- pvt->dram_ctl_select_low,
- dct_sel_baseaddr(pvt));
-
- debugf0(" DCT mode: %s, All DCTs on: %s\n",
- (dct_ganging_enabled(pvt) ? "ganged" : "unganged"),
- (dct_dram_enabled(pvt) ? "yes" : "no"));
-
- if (!dct_ganging_enabled(pvt))
- debugf0(" Address range split per DCT: %s\n",
- (dct_high_range_enabled(pvt) ? "yes" : "no"));
-
- debugf0(" DCT data interleave for ECC: %s, "
- "DRAM cleared since last warm reset: %s\n",
- (dct_data_intlv_enabled(pvt) ? "enabled" : "disabled"),
- (dct_memory_cleared(pvt) ? "yes" : "no"));
-
- debugf0(" DCT channel interleave: %s, "
- "DCT interleave bits selector: 0x%x\n",
- (dct_interleave_enabled(pvt) ? "enabled" : "disabled"),
- dct_sel_interleave_addr(pvt));
+/*
+ * Determine channel (DCT) based on the interleaving mode (see F15h M30h BKDG,
+ * 2.10.12 Memory Interleaving Modes).
+ */
+static u8 f15_m30h_determine_channel(struct amd64_pvt *pvt, u64 sys_addr,
+ u8 intlv_en, int num_dcts_intlv,
+ u32 dct_sel)
+{
+ u8 channel = 0;
+ u8 select;
+
+ if (!(intlv_en))
+ return (u8)(dct_sel);
+
+ if (num_dcts_intlv == 2) {
+ select = (sys_addr >> 8) & 0x3;
+ channel = select ? 0x3 : 0;
+ } else if (num_dcts_intlv == 4) {
+ u8 intlv_addr = dct_sel_interleave_addr(pvt);
+ switch (intlv_addr) {
+ case 0x4:
+ channel = (sys_addr >> 8) & 0x3;
+ break;
+ case 0x5:
+ channel = (sys_addr >> 9) & 0x3;
+ break;
+ }
}
-
- amd64_read_pci_cfg(pvt->dram_f2_ctl, F10_DCTL_SEL_HIGH,
- &pvt->dram_ctl_select_high);
+ return channel;
}
/*
- * determine channel based on the interleaving mode: F10h BKDG, 2.8.9 Memory
+ * Determine channel (DCT) based on the interleaving mode: F10h BKDG, 2.8.9 Memory
* Interleaving Modes.
*/
-static u32 f10_determine_channel(struct amd64_pvt *pvt, u64 sys_addr,
- int hi_range_sel, u32 intlv_en)
+static u8 f1x_determine_channel(struct amd64_pvt *pvt, u64 sys_addr,
+ bool hi_range_sel, u8 intlv_en)
{
- u32 cs, temp, dct_sel_high = (pvt->dram_ctl_select_low >> 1) & 1;
+ u8 dct_sel_high = (pvt->dct_sel_lo >> 1) & 1;
if (dct_ganging_enabled(pvt))
- cs = 0;
- else if (hi_range_sel)
- cs = dct_sel_high;
- else if (dct_interleave_enabled(pvt)) {
- /*
- * see F2x110[DctSelIntLvAddr] - channel interleave mode
- */
- if (dct_sel_interleave_addr(pvt) == 0)
- cs = sys_addr >> 6 & 1;
- else if ((dct_sel_interleave_addr(pvt) >> 1) & 1) {
- temp = hweight_long((u32) ((sys_addr >> 16) & 0x1F)) % 2;
+ return 0;
- if (dct_sel_interleave_addr(pvt) & 1)
- cs = (sys_addr >> 9 & 1) ^ temp;
- else
- cs = (sys_addr >> 6 & 1) ^ temp;
- } else if (intlv_en & 4)
- cs = sys_addr >> 15 & 1;
- else if (intlv_en & 2)
- cs = sys_addr >> 14 & 1;
- else if (intlv_en & 1)
- cs = sys_addr >> 13 & 1;
- else
- cs = sys_addr >> 12 & 1;
- } else if (dct_high_range_enabled(pvt) && !dct_ganging_enabled(pvt))
- cs = ~dct_sel_high & 1;
- else
- cs = 0;
+ if (hi_range_sel)
+ return dct_sel_high;
- return cs;
-}
+ /*
+ * see F2x110[DctSelIntLvAddr] - channel interleave mode
+ */
+ if (dct_interleave_enabled(pvt)) {
+ u8 intlv_addr = dct_sel_interleave_addr(pvt);
-static inline u32 f10_map_intlv_en_to_shift(u32 intlv_en)
-{
- if (intlv_en == 1)
- return 1;
- else if (intlv_en == 3)
- return 2;
- else if (intlv_en == 7)
- return 3;
+ /* return DCT select function: 0=DCT0, 1=DCT1 */
+ if (!intlv_addr)
+ return sys_addr >> 6 & 1;
+
+ if (intlv_addr & 0x2) {
+ u8 shift = intlv_addr & 0x1 ? 9 : 6;
+ u32 temp = hweight_long((u32) ((sys_addr >> 16) & 0x1F)) % 2;
+
+ return ((sys_addr >> shift) & 1) ^ temp;
+ }
+
+ return (sys_addr >> (12 + hweight8(intlv_en))) & 1;
+ }
+
+ if (dct_high_range_enabled(pvt))
+ return ~dct_sel_high & 1;
return 0;
}
-/* See F10h BKDG, 2.8.10.2 DctSelBaseOffset Programming */
-static inline u64 f10_get_base_addr_offset(u64 sys_addr, int hi_range_sel,
- u32 dct_sel_base_addr,
- u64 dct_sel_base_off,
- u32 hole_valid, u32 hole_off,
- u64 dram_base)
+/* Convert the sys_addr to the normalized DCT address */
+static u64 f1x_get_norm_dct_addr(struct amd64_pvt *pvt, u8 range,
+ u64 sys_addr, bool hi_rng,
+ u32 dct_sel_base_addr)
{
u64 chan_off;
+ u64 dram_base = get_dram_base(pvt, range);
+ u64 hole_off = f10_dhar_offset(pvt);
+ u64 dct_sel_base_off = (pvt->dct_sel_hi & 0xFFFFFC00) << 16;
- if (hi_range_sel) {
- if (!(dct_sel_base_addr & 0xFFFF0000) &&
- hole_valid && (sys_addr >= 0x100000000ULL))
- chan_off = hole_off << 16;
+ if (hi_rng) {
+ /*
+ * if
+ * base address of high range is below 4Gb
+ * (bits [47:27] at [31:11])
+ * DRAM address space on this DCT is hoisted above 4Gb &&
+ * sys_addr > 4Gb
+ *
+ * remove hole offset from sys_addr
+ * else
+ * remove high range offset from sys_addr
+ */
+ if ((!(dct_sel_base_addr >> 16) ||
+ dct_sel_base_addr < dhar_base(pvt)) &&
+ dhar_valid(pvt) &&
+ (sys_addr >= BIT_64(32)))
+ chan_off = hole_off;
else
chan_off = dct_sel_base_off;
} else {
- if (hole_valid && (sys_addr >= 0x100000000ULL))
- chan_off = hole_off << 16;
+ /*
+ * if
+ * we have a valid hole &&
+ * sys_addr > 4Gb
+ *
+ * remove hole
+ * else
+ * remove dram base to normalize to DCT address
+ */
+ if (dhar_valid(pvt) && (sys_addr >= BIT_64(32)))
+ chan_off = hole_off;
else
- chan_off = dram_base & 0xFFFFF8000000ULL;
+ chan_off = dram_base;
}
- return (sys_addr & 0x0000FFFFFFFFFFC0ULL) -
- (chan_off & 0x0000FFFFFF800000ULL);
+ return (sys_addr & GENMASK_ULL(47,6)) - (chan_off & GENMASK_ULL(47,23));
}
-/* Hack for the time being - Can we get this from BIOS?? */
-#define CH0SPARE_RANK 0
-#define CH1SPARE_RANK 1
-
/*
* checks if the csrow passed in is marked as SPARED, if so returns the new
* spare row
*/
-static inline int f10_process_possible_spare(int csrow,
- u32 cs, struct amd64_pvt *pvt)
-{
- u32 swap_done;
- u32 bad_dram_cs;
-
- /* Depending on channel, isolate respective SPARING info */
- if (cs) {
- swap_done = F10_ONLINE_SPARE_SWAPDONE1(pvt->online_spare);
- bad_dram_cs = F10_ONLINE_SPARE_BADDRAM_CS1(pvt->online_spare);
- if (swap_done && (csrow == bad_dram_cs))
- csrow = CH1SPARE_RANK;
- } else {
- swap_done = F10_ONLINE_SPARE_SWAPDONE0(pvt->online_spare);
- bad_dram_cs = F10_ONLINE_SPARE_BADDRAM_CS0(pvt->online_spare);
- if (swap_done && (csrow == bad_dram_cs))
- csrow = CH0SPARE_RANK;
+static int f10_process_possible_spare(struct amd64_pvt *pvt, u8 dct, int csrow)
+{
+ int tmp_cs;
+
+ if (online_spare_swap_done(pvt, dct) &&
+ csrow == online_spare_bad_dramcs(pvt, dct)) {
+
+ for_each_chip_select(tmp_cs, dct, pvt) {
+ if (chip_select_base(tmp_cs, dct, pvt) & 0x2) {
+ csrow = tmp_cs;
+ break;
+ }
+ }
}
return csrow;
}
@@ -1488,94 +1371,114 @@ static inline int f10_process_possible_spare(int csrow,
* -EINVAL: NOT FOUND
* 0..csrow = Chip-Select Row
*/
-static int f10_lookup_addr_in_dct(u32 in_addr, u32 nid, u32 cs)
+static int f1x_lookup_addr_in_dct(u64 in_addr, u8 nid, u8 dct)
{
struct mem_ctl_info *mci;
struct amd64_pvt *pvt;
- u32 cs_base, cs_mask;
+ u64 cs_base, cs_mask;
int cs_found = -EINVAL;
int csrow;
- mci = mci_lookup[nid];
+ mci = mcis[nid];
if (!mci)
return cs_found;
pvt = mci->pvt_info;
- debugf1("InputAddr=0x%x channelselect=%d\n", in_addr, cs);
-
- for (csrow = 0; csrow < pvt->cs_count; csrow++) {
+ edac_dbg(1, "input addr: 0x%llx, DCT: %d\n", in_addr, dct);
- cs_base = amd64_get_dct_base(pvt, cs, csrow);
- if (!(cs_base & K8_DCSB_CS_ENABLE))
+ for_each_chip_select(csrow, dct, pvt) {
+ if (!csrow_enabled(csrow, dct, pvt))
continue;
- /*
- * We have an ENABLED CSROW, Isolate just the MASK bits of the
- * target: [28:19] and [13:5], which map to [36:27] and [21:13]
- * of the actual address.
- */
- cs_base &= REV_F_F1Xh_DCSB_BASE_BITS;
-
- /*
- * Get the DCT Mask, and ENABLE the reserved bits: [18:16] and
- * [4:0] to become ON. Then mask off bits [28:0] ([36:8])
- */
- cs_mask = amd64_get_dct_mask(pvt, cs, csrow);
+ get_cs_base_and_mask(pvt, csrow, dct, &cs_base, &cs_mask);
- debugf1(" CSROW=%d CSBase=0x%x RAW CSMask=0x%x\n",
- csrow, cs_base, cs_mask);
+ edac_dbg(1, " CSROW=%d CSBase=0x%llx CSMask=0x%llx\n",
+ csrow, cs_base, cs_mask);
- cs_mask = (cs_mask | 0x0007C01F) & 0x1FFFFFFF;
+ cs_mask = ~cs_mask;
- debugf1(" Final CSMask=0x%x\n", cs_mask);
- debugf1(" (InputAddr & ~CSMask)=0x%x "
- "(CSBase & ~CSMask)=0x%x\n",
- (in_addr & ~cs_mask), (cs_base & ~cs_mask));
+ edac_dbg(1, " (InputAddr & ~CSMask)=0x%llx (CSBase & ~CSMask)=0x%llx\n",
+ (in_addr & cs_mask), (cs_base & cs_mask));
- if ((in_addr & ~cs_mask) == (cs_base & ~cs_mask)) {
- cs_found = f10_process_possible_spare(csrow, cs, pvt);
+ if ((in_addr & cs_mask) == (cs_base & cs_mask)) {
+ if (pvt->fam == 0x15 && pvt->model >= 0x30) {
+ cs_found = csrow;
+ break;
+ }
+ cs_found = f10_process_possible_spare(pvt, dct, csrow);
- debugf1(" MATCH csrow=%d\n", cs_found);
+ edac_dbg(1, " MATCH csrow=%d\n", cs_found);
break;
}
}
return cs_found;
}
-/* For a given @dram_range, check if @sys_addr falls within it. */
-static int f10_match_to_this_node(struct amd64_pvt *pvt, int dram_range,
- u64 sys_addr, int *nid, int *chan_sel)
+/*
+ * See F2x10C. Non-interleaved graphics framebuffer memory under the 16G is
+ * swapped with a region located at the bottom of memory so that the GPU can use
+ * the interleaved region and thus two channels.
+ */
+static u64 f1x_swap_interleaved_region(struct amd64_pvt *pvt, u64 sys_addr)
{
- int node_id, cs_found = -EINVAL, high_range = 0;
- u32 intlv_en, intlv_sel, intlv_shift, hole_off;
- u32 hole_valid, tmp, dct_sel_base, channel;
- u64 dram_base, chan_addr, dct_sel_base_off;
+ u32 swap_reg, swap_base, swap_limit, rgn_size, tmp_addr;
- dram_base = pvt->dram_base[dram_range];
- intlv_en = pvt->dram_IntlvEn[dram_range];
+ if (pvt->fam == 0x10) {
+ /* only revC3 and revE have that feature */
+ if (pvt->model < 4 || (pvt->model < 0xa && pvt->stepping < 3))
+ return sys_addr;
+ }
- node_id = pvt->dram_DstNode[dram_range];
- intlv_sel = pvt->dram_IntlvSel[dram_range];
+ amd64_read_dct_pci_cfg(pvt, SWAP_INTLV_REG, &swap_reg);
- debugf1("(dram=%d) Base=0x%llx SystemAddr= 0x%llx Limit=0x%llx\n",
- dram_range, dram_base, sys_addr, pvt->dram_limit[dram_range]);
+ if (!(swap_reg & 0x1))
+ return sys_addr;
- /*
- * This assumes that one node's DHAR is the same as all the other
- * nodes' DHAR.
- */
- hole_off = (pvt->dhar & 0x0000FF80);
- hole_valid = (pvt->dhar & 0x1);
- dct_sel_base_off = (pvt->dram_ctl_select_high & 0xFFFFFC00) << 16;
+ swap_base = (swap_reg >> 3) & 0x7f;
+ swap_limit = (swap_reg >> 11) & 0x7f;
+ rgn_size = (swap_reg >> 20) & 0x7f;
+ tmp_addr = sys_addr >> 27;
+
+ if (!(sys_addr >> 34) &&
+ (((tmp_addr >= swap_base) &&
+ (tmp_addr <= swap_limit)) ||
+ (tmp_addr < rgn_size)))
+ return sys_addr ^ (u64)swap_base << 27;
- debugf1(" HoleOffset=0x%x HoleValid=0x%x IntlvSel=0x%x\n",
- hole_off, hole_valid, intlv_sel);
+ return sys_addr;
+}
+
+/* For a given @dram_range, check if @sys_addr falls within it. */
+static int f1x_match_to_this_node(struct amd64_pvt *pvt, unsigned range,
+ u64 sys_addr, int *chan_sel)
+{
+ int cs_found = -EINVAL;
+ u64 chan_addr;
+ u32 dct_sel_base;
+ u8 channel;
+ bool high_range = false;
+
+ u8 node_id = dram_dst_node(pvt, range);
+ u8 intlv_en = dram_intlv_en(pvt, range);
+ u32 intlv_sel = dram_intlv_sel(pvt, range);
+
+ edac_dbg(1, "(range %d) SystemAddr= 0x%llx Limit=0x%llx\n",
+ range, sys_addr, get_dram_limit(pvt, range));
+
+ if (dhar_valid(pvt) &&
+ dhar_base(pvt) <= sys_addr &&
+ sys_addr < BIT_64(32)) {
+ amd64_warn("Huh? Address is in the MMIO hole: 0x%016llx\n",
+ sys_addr);
+ return -EINVAL;
+ }
- if (intlv_en ||
- (intlv_sel != ((sys_addr >> 12) & intlv_en)))
+ if (intlv_en && (intlv_sel != ((sys_addr >> 12) & intlv_en)))
return -EINVAL;
+ sys_addr = f1x_swap_interleaved_region(pvt, sys_addr);
+
dct_sel_base = dct_sel_baseaddr(pvt);
/*
@@ -1585,65 +1488,187 @@ static int f10_match_to_this_node(struct amd64_pvt *pvt, int dram_range,
if (dct_high_range_enabled(pvt) &&
!dct_ganging_enabled(pvt) &&
((sys_addr >> 27) >= (dct_sel_base >> 11)))
- high_range = 1;
+ high_range = true;
- channel = f10_determine_channel(pvt, sys_addr, high_range, intlv_en);
+ channel = f1x_determine_channel(pvt, sys_addr, high_range, intlv_en);
- chan_addr = f10_get_base_addr_offset(sys_addr, high_range, dct_sel_base,
- dct_sel_base_off, hole_valid,
- hole_off, dram_base);
+ chan_addr = f1x_get_norm_dct_addr(pvt, range, sys_addr,
+ high_range, dct_sel_base);
- intlv_shift = f10_map_intlv_en_to_shift(intlv_en);
+ /* Remove node interleaving, see F1x120 */
+ if (intlv_en)
+ chan_addr = ((chan_addr >> (12 + hweight8(intlv_en))) << 12) |
+ (chan_addr & 0xfff);
- /* remove Node ID (in case of memory interleaving) */
- tmp = chan_addr & 0xFC0;
-
- chan_addr = ((chan_addr >> intlv_shift) & 0xFFFFFFFFF000ULL) | tmp;
-
- /* remove channel interleave and hash */
+ /* remove channel interleave */
if (dct_interleave_enabled(pvt) &&
!dct_high_range_enabled(pvt) &&
!dct_ganging_enabled(pvt)) {
- if (dct_sel_interleave_addr(pvt) != 1)
- chan_addr = (chan_addr >> 1) & 0xFFFFFFFFFFFFFFC0ULL;
- else {
- tmp = chan_addr & 0xFC0;
- chan_addr = ((chan_addr & 0xFFFFFFFFFFFFC000ULL) >> 1)
- | tmp;
- }
+
+ if (dct_sel_interleave_addr(pvt) != 1) {
+ if (dct_sel_interleave_addr(pvt) == 0x3)
+ /* hash 9 */
+ chan_addr = ((chan_addr >> 10) << 9) |
+ (chan_addr & 0x1ff);
+ else
+ /* A[6] or hash 6 */
+ chan_addr = ((chan_addr >> 7) << 6) |
+ (chan_addr & 0x3f);
+ } else
+ /* A[12] */
+ chan_addr = ((chan_addr >> 13) << 12) |
+ (chan_addr & 0xfff);
}
- debugf1(" (ChannelAddrLong=0x%llx) >> 8 becomes InputAddr=0x%x\n",
- chan_addr, (u32)(chan_addr >> 8));
+ edac_dbg(1, " Normalized DCT addr: 0x%llx\n", chan_addr);
- cs_found = f10_lookup_addr_in_dct(chan_addr >> 8, node_id, channel);
+ cs_found = f1x_lookup_addr_in_dct(chan_addr, node_id, channel);
- if (cs_found >= 0) {
- *nid = node_id;
+ if (cs_found >= 0)
*chan_sel = channel;
- }
+
return cs_found;
}
-static int f10_translate_sysaddr_to_cs(struct amd64_pvt *pvt, u64 sys_addr,
- int *node, int *chan_sel)
+static int f15_m30h_match_to_this_node(struct amd64_pvt *pvt, unsigned range,
+ u64 sys_addr, int *chan_sel)
{
- int dram_range, cs_found = -EINVAL;
- u64 dram_base, dram_limit;
+ int cs_found = -EINVAL;
+ int num_dcts_intlv = 0;
+ u64 chan_addr, chan_offset;
+ u64 dct_base, dct_limit;
+ u32 dct_cont_base_reg, dct_cont_limit_reg, tmp;
+ u8 channel, alias_channel, leg_mmio_hole, dct_sel, dct_offset_en;
- for (dram_range = 0; dram_range < DRAM_REG_COUNT; dram_range++) {
+ u64 dhar_offset = f10_dhar_offset(pvt);
+ u8 intlv_addr = dct_sel_interleave_addr(pvt);
+ u8 node_id = dram_dst_node(pvt, range);
+ u8 intlv_en = dram_intlv_en(pvt, range);
- if (!pvt->dram_rw_en[dram_range])
- continue;
+ amd64_read_pci_cfg(pvt->F1, DRAM_CONT_BASE, &dct_cont_base_reg);
+ amd64_read_pci_cfg(pvt->F1, DRAM_CONT_LIMIT, &dct_cont_limit_reg);
+
+ dct_offset_en = (u8) ((dct_cont_base_reg >> 3) & BIT(0));
+ dct_sel = (u8) ((dct_cont_base_reg >> 4) & 0x7);
+
+ edac_dbg(1, "(range %d) SystemAddr= 0x%llx Limit=0x%llx\n",
+ range, sys_addr, get_dram_limit(pvt, range));
+
+ if (!(get_dram_base(pvt, range) <= sys_addr) &&
+ !(get_dram_limit(pvt, range) >= sys_addr))
+ return -EINVAL;
+
+ if (dhar_valid(pvt) &&
+ dhar_base(pvt) <= sys_addr &&
+ sys_addr < BIT_64(32)) {
+ amd64_warn("Huh? Address is in the MMIO hole: 0x%016llx\n",
+ sys_addr);
+ return -EINVAL;
+ }
+
+ /* Verify sys_addr is within DCT Range. */
+ dct_base = (u64) dct_sel_baseaddr(pvt);
+ dct_limit = (dct_cont_limit_reg >> 11) & 0x1FFF;
+
+ if (!(dct_cont_base_reg & BIT(0)) &&
+ !(dct_base <= (sys_addr >> 27) &&
+ dct_limit >= (sys_addr >> 27)))
+ return -EINVAL;
+
+ /* Verify number of dct's that participate in channel interleaving. */
+ num_dcts_intlv = (int) hweight8(intlv_en);
+
+ if (!(num_dcts_intlv % 2 == 0) || (num_dcts_intlv > 4))
+ return -EINVAL;
+
+ channel = f15_m30h_determine_channel(pvt, sys_addr, intlv_en,
+ num_dcts_intlv, dct_sel);
+
+ /* Verify we stay within the MAX number of channels allowed */
+ if (channel > 3)
+ return -EINVAL;
+
+ leg_mmio_hole = (u8) (dct_cont_base_reg >> 1 & BIT(0));
+
+ /* Get normalized DCT addr */
+ if (leg_mmio_hole && (sys_addr >= BIT_64(32)))
+ chan_offset = dhar_offset;
+ else
+ chan_offset = dct_base << 27;
+
+ chan_addr = sys_addr - chan_offset;
+
+ /* remove channel interleave */
+ if (num_dcts_intlv == 2) {
+ if (intlv_addr == 0x4)
+ chan_addr = ((chan_addr >> 9) << 8) |
+ (chan_addr & 0xff);
+ else if (intlv_addr == 0x5)
+ chan_addr = ((chan_addr >> 10) << 9) |
+ (chan_addr & 0x1ff);
+ else
+ return -EINVAL;
+
+ } else if (num_dcts_intlv == 4) {
+ if (intlv_addr == 0x4)
+ chan_addr = ((chan_addr >> 10) << 8) |
+ (chan_addr & 0xff);
+ else if (intlv_addr == 0x5)
+ chan_addr = ((chan_addr >> 11) << 9) |
+ (chan_addr & 0x1ff);
+ else
+ return -EINVAL;
+ }
+
+ if (dct_offset_en) {
+ amd64_read_pci_cfg(pvt->F1,
+ DRAM_CONT_HIGH_OFF + (int) channel * 4,
+ &tmp);
+ chan_addr += (u64) ((tmp >> 11) & 0xfff) << 27;
+ }
+
+ f15h_select_dct(pvt, channel);
+
+ edac_dbg(1, " Normalized DCT addr: 0x%llx\n", chan_addr);
+
+ /*
+ * Find Chip select:
+ * if channel = 3, then alias it to 1. This is because, in F15 M30h,
+ * there is support for 4 DCT's, but only 2 are currently functional.
+ * They are DCT0 and DCT3. But we have read all registers of DCT3 into
+ * pvt->csels[1]. So we need to use '1' here to get correct info.
+ * Refer F15 M30h BKDG Section 2.10 and 2.10.3 for clarifications.
+ */
+ alias_channel = (channel == 3) ? 1 : channel;
+
+ cs_found = f1x_lookup_addr_in_dct(chan_addr, node_id, alias_channel);
+
+ if (cs_found >= 0)
+ *chan_sel = alias_channel;
+
+ return cs_found;
+}
+
+static int f1x_translate_sysaddr_to_cs(struct amd64_pvt *pvt,
+ u64 sys_addr,
+ int *chan_sel)
+{
+ int cs_found = -EINVAL;
+ unsigned range;
- dram_base = pvt->dram_base[dram_range];
- dram_limit = pvt->dram_limit[dram_range];
+ for (range = 0; range < DRAM_RANGES; range++) {
+ if (!dram_rw(pvt, range))
+ continue;
- if ((dram_base <= sys_addr) && (sys_addr <= dram_limit)) {
+ if (pvt->fam == 0x15 && pvt->model >= 0x30)
+ cs_found = f15_m30h_match_to_this_node(pvt, range,
+ sys_addr,
+ chan_sel);
- cs_found = f10_match_to_this_node(pvt, dram_range,
- sys_addr, node,
- chan_sel);
+ else if ((get_dram_base(pvt, range) <= sys_addr) &&
+ (get_dram_limit(pvt, range) >= sys_addr)) {
+ cs_found = f1x_match_to_this_node(pvt, range,
+ sys_addr, chan_sel);
if (cs_found >= 0)
break;
}
@@ -1658,60 +1683,39 @@ static int f10_translate_sysaddr_to_cs(struct amd64_pvt *pvt, u64 sys_addr,
* The @sys_addr is usually an error address received from the hardware
* (MCX_ADDR).
*/
-static void f10_map_sysaddr_to_csrow(struct mem_ctl_info *mci,
- struct err_regs *err_info,
- u64 sys_addr)
+static void f1x_map_sysaddr_to_csrow(struct mem_ctl_info *mci, u64 sys_addr,
+ struct err_info *err)
{
struct amd64_pvt *pvt = mci->pvt_info;
- u32 page, offset;
- int nid, csrow, chan = 0;
- u16 syndrome;
- csrow = f10_translate_sysaddr_to_cs(pvt, sys_addr, &nid, &chan);
+ error_address_to_page_and_offset(sys_addr, err);
- if (csrow < 0) {
- edac_mc_handle_ce_no_info(mci, EDAC_MOD_STR);
+ err->csrow = f1x_translate_sysaddr_to_cs(pvt, sys_addr, &err->channel);
+ if (err->csrow < 0) {
+ err->err_code = ERR_CSROW;
return;
}
- error_address_to_page_and_offset(sys_addr, &page, &offset);
-
- syndrome = extract_syndrome(err_info);
-
/*
* We need the syndromes for channel detection only when we're
* ganged. Otherwise @chan should already contain the channel at
* this point.
*/
- if (dct_ganging_enabled(pvt) && (pvt->nbcfg & K8_NBCFG_CHIPKILL))
- chan = get_channel_from_ecc_syndrome(mci, syndrome);
-
- if (chan >= 0)
- edac_mc_handle_ce(mci, page, offset, syndrome, csrow, chan,
- EDAC_MOD_STR);
- else
- /*
- * Channel unknown, report all channels on this CSROW as failed.
- */
- for (chan = 0; chan < mci->csrows[csrow].nr_channels; chan++)
- edac_mc_handle_ce(mci, page, offset, syndrome,
- csrow, chan, EDAC_MOD_STR);
+ if (dct_ganging_enabled(pvt))
+ err->channel = get_channel_from_ecc_syndrome(mci, err->syndrome);
}
/*
* debug routine to display the memory sizes of all logical DIMMs and its
- * CSROWs as well
+ * CSROWs
*/
-static void amd64_debug_display_dimm_sizes(int ctrl, struct amd64_pvt *pvt)
+static void debug_display_dimm_sizes(struct amd64_pvt *pvt, u8 ctrl)
{
- int dimm, size0, size1, factor = 0;
- u32 dbam;
- u32 *dcsb;
-
- if (boot_cpu_data.x86 == 0xf) {
- if (pvt->dclr0 & F10_WIDTH_128)
- factor = 1;
+ int dimm, size0, size1;
+ u32 *dcsb = ctrl ? pvt->csels[1].csbases : pvt->csels[0].csbases;
+ u32 dbam = ctrl ? pvt->dbam1 : pvt->dbam0;
+ if (pvt->fam == 0xf) {
/* K8 families < revF not supported yet */
if (pvt->ext_model < K8_REV_F)
return;
@@ -1719,11 +1723,12 @@ static void amd64_debug_display_dimm_sizes(int ctrl, struct amd64_pvt *pvt)
WARN_ON(ctrl != 0);
}
- debugf1("F2x%d80 (DRAM Bank Address Mapping): 0x%08x\n",
- ctrl, ctrl ? pvt->dbam1 : pvt->dbam0);
+ dbam = (ctrl && !dct_ganging_enabled(pvt)) ? pvt->dbam1 : pvt->dbam0;
+ dcsb = (ctrl && !dct_ganging_enabled(pvt)) ? pvt->csels[1].csbases
+ : pvt->csels[0].csbases;
- dbam = ctrl ? pvt->dbam1 : pvt->dbam0;
- dcsb = ctrl ? pvt->dcsb1 : pvt->dcsb0;
+ edac_dbg(1, "F2x%d80 (DRAM Bank Address Mapping): 0x%08x\n",
+ ctrl, dbam);
edac_printk(KERN_DEBUG, EDAC_MC, "DCT%d chip selects:\n", ctrl);
@@ -1731,88 +1736,90 @@ static void amd64_debug_display_dimm_sizes(int ctrl, struct amd64_pvt *pvt)
for (dimm = 0; dimm < 4; dimm++) {
size0 = 0;
- if (dcsb[dimm*2] & K8_DCSB_CS_ENABLE)
- size0 = pvt->ops->dbam_to_cs(pvt, DBAM_DIMM(dimm, dbam));
+ if (dcsb[dimm*2] & DCSB_CS_ENABLE)
+ size0 = pvt->ops->dbam_to_cs(pvt, ctrl,
+ DBAM_DIMM(dimm, dbam));
size1 = 0;
- if (dcsb[dimm*2 + 1] & K8_DCSB_CS_ENABLE)
- size1 = pvt->ops->dbam_to_cs(pvt, DBAM_DIMM(dimm, dbam));
+ if (dcsb[dimm*2 + 1] & DCSB_CS_ENABLE)
+ size1 = pvt->ops->dbam_to_cs(pvt, ctrl,
+ DBAM_DIMM(dimm, dbam));
- edac_printk(KERN_DEBUG, EDAC_MC, " %d: %5dMB %d: %5dMB\n",
- dimm * 2, size0 << factor,
- dimm * 2 + 1, size1 << factor);
+ amd64_info(EDAC_MC ": %d: %5dMB %d: %5dMB\n",
+ dimm * 2, size0,
+ dimm * 2 + 1, size1);
}
}
-/*
- * There currently are 3 types type of MC devices for AMD Athlon/Opterons
- * (as per PCI DEVICE_IDs):
- *
- * Family K8: That is the Athlon64 and Opteron CPUs. They all have the same PCI
- * DEVICE ID, even though there is differences between the different Revisions
- * (CG,D,E,F).
- *
- * Family F10h and F11h.
- *
- */
-static struct amd64_family_type amd64_family_types[] = {
+static struct amd64_family_type family_types[] = {
[K8_CPUS] = {
- .ctl_name = "RevF",
- .addr_f1_ctl = PCI_DEVICE_ID_AMD_K8_NB_ADDRMAP,
- .misc_f3_ctl = PCI_DEVICE_ID_AMD_K8_NB_MISC,
+ .ctl_name = "K8",
+ .f1_id = PCI_DEVICE_ID_AMD_K8_NB_ADDRMAP,
+ .f3_id = PCI_DEVICE_ID_AMD_K8_NB_MISC,
.ops = {
.early_channel_count = k8_early_channel_count,
- .get_error_address = k8_get_error_address,
- .read_dram_base_limit = k8_read_dram_base_limit,
.map_sysaddr_to_csrow = k8_map_sysaddr_to_csrow,
.dbam_to_cs = k8_dbam_to_chip_select,
+ .read_dct_pci_cfg = k8_read_dct_pci_cfg,
}
},
[F10_CPUS] = {
- .ctl_name = "Family 10h",
- .addr_f1_ctl = PCI_DEVICE_ID_AMD_10H_NB_MAP,
- .misc_f3_ctl = PCI_DEVICE_ID_AMD_10H_NB_MISC,
+ .ctl_name = "F10h",
+ .f1_id = PCI_DEVICE_ID_AMD_10H_NB_MAP,
+ .f3_id = PCI_DEVICE_ID_AMD_10H_NB_MISC,
.ops = {
- .early_channel_count = f10_early_channel_count,
- .get_error_address = f10_get_error_address,
- .read_dram_base_limit = f10_read_dram_base_limit,
- .read_dram_ctl_register = f10_read_dram_ctl_register,
- .map_sysaddr_to_csrow = f10_map_sysaddr_to_csrow,
+ .early_channel_count = f1x_early_channel_count,
+ .map_sysaddr_to_csrow = f1x_map_sysaddr_to_csrow,
.dbam_to_cs = f10_dbam_to_chip_select,
+ .read_dct_pci_cfg = f10_read_dct_pci_cfg,
}
},
- [F11_CPUS] = {
- .ctl_name = "Family 11h",
- .addr_f1_ctl = PCI_DEVICE_ID_AMD_11H_NB_MAP,
- .misc_f3_ctl = PCI_DEVICE_ID_AMD_11H_NB_MISC,
+ [F15_CPUS] = {
+ .ctl_name = "F15h",
+ .f1_id = PCI_DEVICE_ID_AMD_15H_NB_F1,
+ .f3_id = PCI_DEVICE_ID_AMD_15H_NB_F3,
.ops = {
- .early_channel_count = f10_early_channel_count,
- .get_error_address = f10_get_error_address,
- .read_dram_base_limit = f10_read_dram_base_limit,
- .read_dram_ctl_register = f10_read_dram_ctl_register,
- .map_sysaddr_to_csrow = f10_map_sysaddr_to_csrow,
- .dbam_to_cs = f10_dbam_to_chip_select,
+ .early_channel_count = f1x_early_channel_count,
+ .map_sysaddr_to_csrow = f1x_map_sysaddr_to_csrow,
+ .dbam_to_cs = f15_dbam_to_chip_select,
+ .read_dct_pci_cfg = f15_read_dct_pci_cfg,
+ }
+ },
+ [F15_M30H_CPUS] = {
+ .ctl_name = "F15h_M30h",
+ .f1_id = PCI_DEVICE_ID_AMD_15H_M30H_NB_F1,
+ .f3_id = PCI_DEVICE_ID_AMD_15H_M30H_NB_F3,
+ .ops = {
+ .early_channel_count = f1x_early_channel_count,
+ .map_sysaddr_to_csrow = f1x_map_sysaddr_to_csrow,
+ .dbam_to_cs = f16_dbam_to_chip_select,
+ .read_dct_pci_cfg = f15_read_dct_pci_cfg,
+ }
+ },
+ [F16_CPUS] = {
+ .ctl_name = "F16h",
+ .f1_id = PCI_DEVICE_ID_AMD_16H_NB_F1,
+ .f3_id = PCI_DEVICE_ID_AMD_16H_NB_F3,
+ .ops = {
+ .early_channel_count = f1x_early_channel_count,
+ .map_sysaddr_to_csrow = f1x_map_sysaddr_to_csrow,
+ .dbam_to_cs = f16_dbam_to_chip_select,
+ .read_dct_pci_cfg = f10_read_dct_pci_cfg,
+ }
+ },
+ [F16_M30H_CPUS] = {
+ .ctl_name = "F16h_M30h",
+ .f1_id = PCI_DEVICE_ID_AMD_16H_M30H_NB_F1,
+ .f3_id = PCI_DEVICE_ID_AMD_16H_M30H_NB_F3,
+ .ops = {
+ .early_channel_count = f1x_early_channel_count,
+ .map_sysaddr_to_csrow = f1x_map_sysaddr_to_csrow,
+ .dbam_to_cs = f16_dbam_to_chip_select,
+ .read_dct_pci_cfg = f10_read_dct_pci_cfg,
}
},
};
-static struct pci_dev *pci_get_related_function(unsigned int vendor,
- unsigned int device,
- struct pci_dev *related)
-{
- struct pci_dev *dev = NULL;
-
- dev = pci_get_device(vendor, device, dev);
- while (dev) {
- if ((dev->bus->number == related->bus->number) &&
- (PCI_SLOT(dev->devfn) == PCI_SLOT(related->devfn)))
- break;
- dev = pci_get_device(vendor, device, dev);
- }
-
- return dev;
-}
-
/*
* These are tables of eigenvectors (one per line) which can be used for the
* construction of the syndrome tables. The modified syndrome search algorithm
@@ -1820,7 +1827,7 @@ static struct pci_dev *pci_get_related_function(unsigned int vendor,
*
* Algorithm courtesy of Ross LaFetra from AMD.
*/
-static u16 x4_vectors[] = {
+static const u16 x4_vectors[] = {
0x2f57, 0x1afe, 0x66cc, 0xdd88,
0x11eb, 0x3396, 0x7f4c, 0xeac8,
0x0001, 0x0002, 0x0004, 0x0008,
@@ -1859,7 +1866,7 @@ static u16 x4_vectors[] = {
0x19a9, 0x2efe, 0xb5cc, 0x6f88,
};
-static u16 x8_vectors[] = {
+static const u16 x8_vectors[] = {
0x0145, 0x028a, 0x2374, 0x43c8, 0xa1f0, 0x0520, 0x0a40, 0x1480,
0x0211, 0x0422, 0x0844, 0x1088, 0x01b0, 0x44e0, 0x23c0, 0xed80,
0x1011, 0x0116, 0x022c, 0x0458, 0x08b0, 0x8c60, 0x2740, 0x4e80,
@@ -1881,15 +1888,15 @@ static u16 x8_vectors[] = {
0x0100, 0x0200, 0x0400, 0x0800, 0x1000, 0x2000, 0x4000, 0x8000,
};
-static int decode_syndrome(u16 syndrome, u16 *vectors, int num_vecs,
- int v_dim)
+static int decode_syndrome(u16 syndrome, const u16 *vectors, unsigned num_vecs,
+ unsigned v_dim)
{
unsigned int i, err_sym;
for (err_sym = 0; err_sym < num_vecs / v_dim; err_sym++) {
u16 s = syndrome;
- int v_idx = err_sym * v_dim;
- int v_end = (err_sym + 1) * v_dim;
+ unsigned v_idx = err_sym * v_dim;
+ unsigned v_end = (err_sym + 1) * v_dim;
/* walk over all 16 bits of the syndrome */
for (i = 1; i < (1U << 16); i <<= 1) {
@@ -1913,7 +1920,7 @@ static int decode_syndrome(u16 syndrome, u16 *vectors, int num_vecs,
}
}
- debugf0("syndrome(%x) not found\n", syndrome);
+ edac_dbg(0, "syndrome(%x) not found\n", syndrome);
return -1;
}
@@ -1961,305 +1968,218 @@ static int get_channel_from_ecc_syndrome(struct mem_ctl_info *mci, u16 syndrome)
struct amd64_pvt *pvt = mci->pvt_info;
int err_sym = -1;
- if (pvt->syn_type == 8)
+ if (pvt->ecc_sym_sz == 8)
err_sym = decode_syndrome(syndrome, x8_vectors,
ARRAY_SIZE(x8_vectors),
- pvt->syn_type);
- else if (pvt->syn_type == 4)
+ pvt->ecc_sym_sz);
+ else if (pvt->ecc_sym_sz == 4)
err_sym = decode_syndrome(syndrome, x4_vectors,
ARRAY_SIZE(x4_vectors),
- pvt->syn_type);
+ pvt->ecc_sym_sz);
else {
- amd64_printk(KERN_WARNING, "%s: Illegal syndrome type: %u\n",
- __func__, pvt->syn_type);
+ amd64_warn("Illegal syndrome type: %u\n", pvt->ecc_sym_sz);
return err_sym;
}
- return map_err_sym_to_channel(err_sym, pvt->syn_type);
+ return map_err_sym_to_channel(err_sym, pvt->ecc_sym_sz);
}
-/*
- * Handle any Correctable Errors (CEs) that have occurred. Check for valid ERROR
- * ADDRESS and process.
- */
-static void amd64_handle_ce(struct mem_ctl_info *mci,
- struct err_regs *info)
+static void __log_bus_error(struct mem_ctl_info *mci, struct err_info *err,
+ u8 ecc_type)
{
- struct amd64_pvt *pvt = mci->pvt_info;
- u64 sys_addr;
+ enum hw_event_mc_err_type err_type;
+ const char *string;
- /* Ensure that the Error Address is VALID */
- if ((info->nbsh & K8_NBSH_VALID_ERROR_ADDR) == 0) {
- amd64_mc_printk(mci, KERN_ERR,
- "HW has no ERROR_ADDRESS available\n");
- edac_mc_handle_ce_no_info(mci, EDAC_MOD_STR);
+ if (ecc_type == 2)
+ err_type = HW_EVENT_ERR_CORRECTED;
+ else if (ecc_type == 1)
+ err_type = HW_EVENT_ERR_UNCORRECTED;
+ else {
+ WARN(1, "Something is rotten in the state of Denmark.\n");
return;
}
- sys_addr = pvt->ops->get_error_address(mci, info);
-
- amd64_mc_printk(mci, KERN_ERR,
- "CE ERROR_ADDRESS= 0x%llx\n", sys_addr);
+ switch (err->err_code) {
+ case DECODE_OK:
+ string = "";
+ break;
+ case ERR_NODE:
+ string = "Failed to map error addr to a node";
+ break;
+ case ERR_CSROW:
+ string = "Failed to map error addr to a csrow";
+ break;
+ case ERR_CHANNEL:
+ string = "unknown syndrome - possible error reporting race";
+ break;
+ default:
+ string = "WTF error";
+ break;
+ }
- pvt->ops->map_sysaddr_to_csrow(mci, info, sys_addr);
+ edac_mc_handle_error(err_type, mci, 1,
+ err->page, err->offset, err->syndrome,
+ err->csrow, err->channel, -1,
+ string, "");
}
-/* Handle any Un-correctable Errors (UEs) */
-static void amd64_handle_ue(struct mem_ctl_info *mci,
- struct err_regs *info)
+static inline void decode_bus_error(int node_id, struct mce *m)
{
+ struct mem_ctl_info *mci = mcis[node_id];
struct amd64_pvt *pvt = mci->pvt_info;
- struct mem_ctl_info *log_mci, *src_mci = NULL;
- int csrow;
+ u8 ecc_type = (m->status >> 45) & 0x3;
+ u8 xec = XEC(m->status, 0x1f);
+ u16 ec = EC(m->status);
u64 sys_addr;
- u32 page, offset;
-
- log_mci = mci;
-
- if ((info->nbsh & K8_NBSH_VALID_ERROR_ADDR) == 0) {
- amd64_mc_printk(mci, KERN_CRIT,
- "HW has no ERROR_ADDRESS available\n");
- edac_mc_handle_ue_no_info(log_mci, EDAC_MOD_STR);
- return;
- }
+ struct err_info err;
- sys_addr = pvt->ops->get_error_address(mci, info);
-
- /*
- * Find out which node the error address belongs to. This may be
- * different from the node that detected the error.
- */
- src_mci = find_mc_by_sys_addr(mci, sys_addr);
- if (!src_mci) {
- amd64_mc_printk(mci, KERN_CRIT,
- "ERROR ADDRESS (0x%lx) value NOT mapped to a MC\n",
- (unsigned long)sys_addr);
- edac_mc_handle_ue_no_info(log_mci, EDAC_MOD_STR);
- return;
- }
-
- log_mci = src_mci;
-
- csrow = sys_addr_to_csrow(log_mci, sys_addr);
- if (csrow < 0) {
- amd64_mc_printk(mci, KERN_CRIT,
- "ERROR_ADDRESS (0x%lx) value NOT mapped to 'csrow'\n",
- (unsigned long)sys_addr);
- edac_mc_handle_ue_no_info(log_mci, EDAC_MOD_STR);
- } else {
- error_address_to_page_and_offset(sys_addr, &page, &offset);
- edac_mc_handle_ue(log_mci, page, offset, csrow, EDAC_MOD_STR);
- }
-}
-
-static inline void __amd64_decode_bus_error(struct mem_ctl_info *mci,
- struct err_regs *info)
-{
- u32 ec = ERROR_CODE(info->nbsl);
- u32 xec = EXT_ERROR_CODE(info->nbsl);
- int ecc_type = (info->nbsh >> 13) & 0x3;
-
- /* Bail early out if this was an 'observed' error */
- if (PP(ec) == K8_NBSL_PP_OBS)
+ /* Bail out early if this was an 'observed' error */
+ if (PP(ec) == NBSL_PP_OBS)
return;
/* Do only ECC errors */
if (xec && xec != F10_NBSL_EXT_ERR_ECC)
return;
- if (ecc_type == 2)
- amd64_handle_ce(mci, info);
- else if (ecc_type == 1)
- amd64_handle_ue(mci, info);
-}
-
-void amd64_decode_bus_error(int node_id, struct mce *m, u32 nbcfg)
-{
- struct mem_ctl_info *mci = mci_lookup[node_id];
- struct err_regs regs;
+ memset(&err, 0, sizeof(err));
- regs.nbsl = (u32) m->status;
- regs.nbsh = (u32)(m->status >> 32);
- regs.nbeal = (u32) m->addr;
- regs.nbeah = (u32)(m->addr >> 32);
- regs.nbcfg = nbcfg;
+ sys_addr = get_error_address(pvt, m);
- __amd64_decode_bus_error(mci, &regs);
+ if (ecc_type == 2)
+ err.syndrome = extract_syndrome(m->status);
- /*
- * Check the UE bit of the NB status high register, if set generate some
- * logs. If NOT a GART error, then process the event as a NO-INFO event.
- * If it was a GART error, skip that process.
- *
- * FIXME: this should go somewhere else, if at all.
- */
- if (regs.nbsh & K8_NBSH_UC_ERR && !report_gart_errors)
- edac_mc_handle_ue_no_info(mci, "UE bit is set");
+ pvt->ops->map_sysaddr_to_csrow(mci, sys_addr, &err);
+ __log_bus_error(mci, &err, ecc_type);
}
/*
- * Input:
- * 1) struct amd64_pvt which contains pvt->dram_f2_ctl pointer
- * 2) AMD Family index value
- *
- * Ouput:
- * Upon return of 0, the following filled in:
- *
- * struct pvt->addr_f1_ctl
- * struct pvt->misc_f3_ctl
- *
- * Filled in with related device funcitions of 'dram_f2_ctl'
- * These devices are "reserved" via the pci_get_device()
- *
- * Upon return of 1 (error status):
- *
- * Nothing reserved
+ * Use pvt->F2 which contains the F2 CPU PCI device to get the related
+ * F1 (AddrMap) and F3 (Misc) devices. Return negative value on error.
*/
-static int amd64_reserve_mc_sibling_devices(struct amd64_pvt *pvt, int mc_idx)
+static int reserve_mc_sibling_devs(struct amd64_pvt *pvt, u16 f1_id, u16 f3_id)
{
- const struct amd64_family_type *amd64_dev = &amd64_family_types[mc_idx];
-
/* Reserve the ADDRESS MAP Device */
- pvt->addr_f1_ctl = pci_get_related_function(pvt->dram_f2_ctl->vendor,
- amd64_dev->addr_f1_ctl,
- pvt->dram_f2_ctl);
-
- if (!pvt->addr_f1_ctl) {
- amd64_printk(KERN_ERR, "error address map device not found: "
- "vendor %x device 0x%x (broken BIOS?)\n",
- PCI_VENDOR_ID_AMD, amd64_dev->addr_f1_ctl);
- return 1;
+ pvt->F1 = pci_get_related_function(pvt->F2->vendor, f1_id, pvt->F2);
+ if (!pvt->F1) {
+ amd64_err("error address map device not found: "
+ "vendor %x device 0x%x (broken BIOS?)\n",
+ PCI_VENDOR_ID_AMD, f1_id);
+ return -ENODEV;
}
/* Reserve the MISC Device */
- pvt->misc_f3_ctl = pci_get_related_function(pvt->dram_f2_ctl->vendor,
- amd64_dev->misc_f3_ctl,
- pvt->dram_f2_ctl);
+ pvt->F3 = pci_get_related_function(pvt->F2->vendor, f3_id, pvt->F2);
+ if (!pvt->F3) {
+ pci_dev_put(pvt->F1);
+ pvt->F1 = NULL;
- if (!pvt->misc_f3_ctl) {
- pci_dev_put(pvt->addr_f1_ctl);
- pvt->addr_f1_ctl = NULL;
+ amd64_err("error F3 device not found: "
+ "vendor %x device 0x%x (broken BIOS?)\n",
+ PCI_VENDOR_ID_AMD, f3_id);
- amd64_printk(KERN_ERR, "error miscellaneous device not found: "
- "vendor %x device 0x%x (broken BIOS?)\n",
- PCI_VENDOR_ID_AMD, amd64_dev->misc_f3_ctl);
- return 1;
+ return -ENODEV;
}
-
- debugf1(" Addr Map device PCI Bus ID:\t%s\n",
- pci_name(pvt->addr_f1_ctl));
- debugf1(" DRAM MEM-CTL PCI Bus ID:\t%s\n",
- pci_name(pvt->dram_f2_ctl));
- debugf1(" Misc device PCI Bus ID:\t%s\n",
- pci_name(pvt->misc_f3_ctl));
+ edac_dbg(1, "F1: %s\n", pci_name(pvt->F1));
+ edac_dbg(1, "F2: %s\n", pci_name(pvt->F2));
+ edac_dbg(1, "F3: %s\n", pci_name(pvt->F3));
return 0;
}
-static void amd64_free_mc_sibling_devices(struct amd64_pvt *pvt)
+static void free_mc_sibling_devs(struct amd64_pvt *pvt)
{
- pci_dev_put(pvt->addr_f1_ctl);
- pci_dev_put(pvt->misc_f3_ctl);
+ pci_dev_put(pvt->F1);
+ pci_dev_put(pvt->F3);
}
/*
* Retrieve the hardware registers of the memory controller (this includes the
* 'Address Map' and 'Misc' device regs)
*/
-static void amd64_read_mc_registers(struct amd64_pvt *pvt)
+static void read_mc_regs(struct amd64_pvt *pvt)
{
+ unsigned range;
u64 msr_val;
u32 tmp;
- int dram;
/*
* Retrieve TOP_MEM and TOP_MEM2; no masking off of reserved bits since
* those are Read-As-Zero
*/
rdmsrl(MSR_K8_TOP_MEM1, pvt->top_mem);
- debugf0(" TOP_MEM: 0x%016llx\n", pvt->top_mem);
+ edac_dbg(0, " TOP_MEM: 0x%016llx\n", pvt->top_mem);
/* check first whether TOP_MEM2 is enabled */
rdmsrl(MSR_K8_SYSCFG, msr_val);
if (msr_val & (1U << 21)) {
rdmsrl(MSR_K8_TOP_MEM2, pvt->top_mem2);
- debugf0(" TOP_MEM2: 0x%016llx\n", pvt->top_mem2);
+ edac_dbg(0, " TOP_MEM2: 0x%016llx\n", pvt->top_mem2);
} else
- debugf0(" TOP_MEM2 disabled.\n");
+ edac_dbg(0, " TOP_MEM2 disabled\n");
- amd64_cpu_display_info(pvt);
+ amd64_read_pci_cfg(pvt->F3, NBCAP, &pvt->nbcap);
- amd64_read_pci_cfg(pvt->misc_f3_ctl, K8_NBCAP, &pvt->nbcap);
+ read_dram_ctl_register(pvt);
- if (pvt->ops->read_dram_ctl_register)
- pvt->ops->read_dram_ctl_register(pvt);
+ for (range = 0; range < DRAM_RANGES; range++) {
+ u8 rw;
- for (dram = 0; dram < DRAM_REG_COUNT; dram++) {
- /*
- * Call CPU specific READ function to get the DRAM Base and
- * Limit values from the DCT.
- */
- pvt->ops->read_dram_base_limit(pvt, dram);
+ /* read settings for this DRAM range */
+ read_dram_base_limit_regs(pvt, range);
- /*
- * Only print out debug info on rows with both R and W Enabled.
- * Normal processing, compiler should optimize this whole 'if'
- * debug output block away.
- */
- if (pvt->dram_rw_en[dram] != 0) {
- debugf1(" DRAM-BASE[%d]: 0x%016llx "
- "DRAM-LIMIT: 0x%016llx\n",
- dram,
- pvt->dram_base[dram],
- pvt->dram_limit[dram]);
-
- debugf1(" IntlvEn=%s %s %s "
- "IntlvSel=%d DstNode=%d\n",
- pvt->dram_IntlvEn[dram] ?
- "Enabled" : "Disabled",
- (pvt->dram_rw_en[dram] & 0x2) ? "W" : "!W",
- (pvt->dram_rw_en[dram] & 0x1) ? "R" : "!R",
- pvt->dram_IntlvSel[dram],
- pvt->dram_DstNode[dram]);
- }
+ rw = dram_rw(pvt, range);
+ if (!rw)
+ continue;
+
+ edac_dbg(1, " DRAM range[%d], base: 0x%016llx; limit: 0x%016llx\n",
+ range,
+ get_dram_base(pvt, range),
+ get_dram_limit(pvt, range));
+
+ edac_dbg(1, " IntlvEn=%s; Range access: %s%s IntlvSel=%d DstNode=%d\n",
+ dram_intlv_en(pvt, range) ? "Enabled" : "Disabled",
+ (rw & 0x1) ? "R" : "-",
+ (rw & 0x2) ? "W" : "-",
+ dram_intlv_sel(pvt, range),
+ dram_dst_node(pvt, range));
}
- amd64_read_dct_base_mask(pvt);
+ read_dct_base_mask(pvt);
- amd64_read_pci_cfg(pvt->addr_f1_ctl, K8_DHAR, &pvt->dhar);
- amd64_read_dbam_reg(pvt);
+ amd64_read_pci_cfg(pvt->F1, DHAR, &pvt->dhar);
+ amd64_read_dct_pci_cfg(pvt, DBAM0, &pvt->dbam0);
- amd64_read_pci_cfg(pvt->misc_f3_ctl,
- F10_ONLINE_SPARE, &pvt->online_spare);
+ amd64_read_pci_cfg(pvt->F3, F10_ONLINE_SPARE, &pvt->online_spare);
- amd64_read_pci_cfg(pvt->dram_f2_ctl, F10_DCLR_0, &pvt->dclr0);
- amd64_read_pci_cfg(pvt->dram_f2_ctl, F10_DCHR_0, &pvt->dchr0);
+ amd64_read_dct_pci_cfg(pvt, DCLR0, &pvt->dclr0);
+ amd64_read_dct_pci_cfg(pvt, DCHR0, &pvt->dchr0);
- if (boot_cpu_data.x86 >= 0x10) {
- if (!dct_ganging_enabled(pvt)) {
- amd64_read_pci_cfg(pvt->dram_f2_ctl, F10_DCLR_1, &pvt->dclr1);
- amd64_read_pci_cfg(pvt->dram_f2_ctl, F10_DCHR_1, &pvt->dchr1);
- }
- amd64_read_pci_cfg(pvt->misc_f3_ctl, EXT_NB_MCA_CFG, &tmp);
+ if (!dct_ganging_enabled(pvt)) {
+ amd64_read_dct_pci_cfg(pvt, DCLR1, &pvt->dclr1);
+ amd64_read_dct_pci_cfg(pvt, DCHR1, &pvt->dchr1);
}
- if (boot_cpu_data.x86 == 0x10 &&
- boot_cpu_data.x86_model > 7 &&
- /* F3x180[EccSymbolSize]=1 => x8 symbols */
- tmp & BIT(25))
- pvt->syn_type = 8;
- else
- pvt->syn_type = 4;
+ pvt->ecc_sym_sz = 4;
+
+ if (pvt->fam >= 0x10) {
+ amd64_read_pci_cfg(pvt->F3, EXT_NB_MCA_CFG, &tmp);
+ if (pvt->fam != 0x16)
+ /* F16h has only DCT0 */
+ amd64_read_dct_pci_cfg(pvt, DBAM1, &pvt->dbam1);
- amd64_dump_misc_regs(pvt);
+ /* F10h, revD and later can do x8 ECC too */
+ if ((pvt->fam > 0x10 || pvt->model > 7) && tmp & BIT(25))
+ pvt->ecc_sym_sz = 8;
+ }
+ dump_misc_regs(pvt);
}
/*
* NOTE: CPU Revision Dependent code
*
* Input:
- * @csrow_nr ChipSelect Row Number (0..pvt->cs_count-1)
+ * @csrow_nr ChipSelect Row Number (0..NUM_CHIPSELECTS-1)
* k8 private pointer to -->
* DRAM Bank Address mapping register
* node_id
@@ -2289,9 +2209,11 @@ static void amd64_read_mc_registers(struct amd64_pvt *pvt)
* encompasses
*
*/
-static u32 amd64_csrow_nr_pages(int csrow_nr, struct amd64_pvt *pvt)
+static u32 get_csrow_nr_pages(struct amd64_pvt *pvt, u8 dct, int csrow_nr)
{
u32 cs_mode, nr_pages;
+ u32 dbam = dct ? pvt->dbam1 : pvt->dbam0;
+
/*
* The math on this doesn't look right on the surface because x/2*4 can
@@ -2300,19 +2222,13 @@ static u32 amd64_csrow_nr_pages(int csrow_nr, struct amd64_pvt *pvt)
* number of bits to shift the DBAM register to extract the proper CSROW
* field.
*/
- cs_mode = (pvt->dbam0 >> ((csrow_nr / 2) * 4)) & 0xF;
-
- nr_pages = pvt->ops->dbam_to_cs(pvt, cs_mode) << (20 - PAGE_SHIFT);
+ cs_mode = DBAM_DIMM(csrow_nr / 2, dbam);
- /*
- * If dual channel then double the memory size of single channel.
- * Channel count is 1 or 2
- */
- nr_pages <<= (pvt->channel_count - 1);
+ nr_pages = pvt->ops->dbam_to_cs(pvt, dct, cs_mode) << (20 - PAGE_SHIFT);
- debugf0(" (csrow=%d) DBAM map index= %d\n", csrow_nr, cs_mode);
- debugf0(" nr_pages= %u channel-count = %d\n",
- nr_pages, pvt->channel_count);
+ edac_dbg(0, "csrow: %d, channel: %d, DBAM idx: %d\n",
+ csrow_nr, dct, cs_mode);
+ edac_dbg(0, "nr_pages/channel: %u\n", nr_pages);
return nr_pages;
}
@@ -2321,73 +2237,82 @@ static u32 amd64_csrow_nr_pages(int csrow_nr, struct amd64_pvt *pvt)
* Initialize the array of csrow attribute instances, based on the values
* from pci config hardware registers.
*/
-static int amd64_init_csrows(struct mem_ctl_info *mci)
+static int init_csrows(struct mem_ctl_info *mci)
{
+ struct amd64_pvt *pvt = mci->pvt_info;
struct csrow_info *csrow;
- struct amd64_pvt *pvt;
- u64 input_addr_min, input_addr_max, sys_addr;
- int i, empty = 1;
+ struct dimm_info *dimm;
+ enum edac_type edac_mode;
+ enum mem_type mtype;
+ int i, j, empty = 1;
+ int nr_pages = 0;
+ u32 val;
- pvt = mci->pvt_info;
+ amd64_read_pci_cfg(pvt->F3, NBCFG, &val);
+
+ pvt->nbcfg = val;
- amd64_read_pci_cfg(pvt->misc_f3_ctl, K8_NBCFG, &pvt->nbcfg);
+ edac_dbg(0, "node %d, NBCFG=0x%08x[ChipKillEccCap: %d|DramEccEn: %d]\n",
+ pvt->mc_node_id, val,
+ !!(val & NBCFG_CHIPKILL), !!(val & NBCFG_ECC_ENABLE));
- debugf0("NBCFG= 0x%x CHIPKILL= %s DRAM ECC= %s\n", pvt->nbcfg,
- (pvt->nbcfg & K8_NBCFG_CHIPKILL) ? "Enabled" : "Disabled",
- (pvt->nbcfg & K8_NBCFG_ECC_ENABLE) ? "Enabled" : "Disabled"
- );
+ /*
+ * We iterate over DCT0 here but we look at DCT1 in parallel, if needed.
+ */
+ for_each_chip_select(i, 0, pvt) {
+ bool row_dct0 = !!csrow_enabled(i, 0, pvt);
+ bool row_dct1 = false;
- for (i = 0; i < pvt->cs_count; i++) {
- csrow = &mci->csrows[i];
+ if (pvt->fam != 0xf)
+ row_dct1 = !!csrow_enabled(i, 1, pvt);
- if ((pvt->dcsb0[i] & K8_DCSB_CS_ENABLE) == 0) {
- debugf1("----CSROW %d EMPTY for node %d\n", i,
- pvt->mc_node_id);
+ if (!row_dct0 && !row_dct1)
continue;
+
+ csrow = mci->csrows[i];
+ empty = 0;
+
+ edac_dbg(1, "MC node: %d, csrow: %d\n",
+ pvt->mc_node_id, i);
+
+ if (row_dct0) {
+ nr_pages = get_csrow_nr_pages(pvt, 0, i);
+ csrow->channels[0]->dimm->nr_pages = nr_pages;
}
- debugf1("----CSROW %d VALID for MC node %d\n",
- i, pvt->mc_node_id);
+ /* K8 has only one DCT */
+ if (pvt->fam != 0xf && row_dct1) {
+ int row_dct1_pages = get_csrow_nr_pages(pvt, 1, i);
- empty = 0;
- csrow->nr_pages = amd64_csrow_nr_pages(i, pvt);
- find_csrow_limits(mci, i, &input_addr_min, &input_addr_max);
- sys_addr = input_addr_to_sys_addr(mci, input_addr_min);
- csrow->first_page = (u32) (sys_addr >> PAGE_SHIFT);
- sys_addr = input_addr_to_sys_addr(mci, input_addr_max);
- csrow->last_page = (u32) (sys_addr >> PAGE_SHIFT);
- csrow->page_mask = ~mask_from_dct_mask(pvt, i);
- /* 8 bytes of resolution */
-
- csrow->mtype = amd64_determine_memory_type(pvt);
-
- debugf1(" for MC node %d csrow %d:\n", pvt->mc_node_id, i);
- debugf1(" input_addr_min: 0x%lx input_addr_max: 0x%lx\n",
- (unsigned long)input_addr_min,
- (unsigned long)input_addr_max);
- debugf1(" sys_addr: 0x%lx page_mask: 0x%lx\n",
- (unsigned long)sys_addr, csrow->page_mask);
- debugf1(" nr_pages: %u first_page: 0x%lx "
- "last_page: 0x%lx\n",
- (unsigned)csrow->nr_pages,
- csrow->first_page, csrow->last_page);
+ csrow->channels[1]->dimm->nr_pages = row_dct1_pages;
+ nr_pages += row_dct1_pages;
+ }
+
+ mtype = determine_memory_type(pvt, i);
+
+ edac_dbg(1, "Total csrow%d pages: %u\n", i, nr_pages);
/*
* determine whether CHIPKILL or JUST ECC or NO ECC is operating
*/
- if (pvt->nbcfg & K8_NBCFG_ECC_ENABLE)
- csrow->edac_mode =
- (pvt->nbcfg & K8_NBCFG_CHIPKILL) ?
- EDAC_S4ECD4ED : EDAC_SECDED;
+ if (pvt->nbcfg & NBCFG_ECC_ENABLE)
+ edac_mode = (pvt->nbcfg & NBCFG_CHIPKILL) ?
+ EDAC_S4ECD4ED : EDAC_SECDED;
else
- csrow->edac_mode = EDAC_NONE;
+ edac_mode = EDAC_NONE;
+
+ for (j = 0; j < pvt->channel_count; j++) {
+ dimm = csrow->channels[j]->dimm;
+ dimm->mtype = mtype;
+ dimm->edac_mode = edac_mode;
+ }
}
return empty;
}
/* get all cores on this DCT */
-static void get_cpus_on_this_dct_cpumask(struct cpumask *mask, int nid)
+static void get_cpus_on_this_dct_cpumask(struct cpumask *mask, u16 nid)
{
int cpu;
@@ -2397,15 +2322,14 @@ static void get_cpus_on_this_dct_cpumask(struct cpumask *mask, int nid)
}
/* check MCG_CTL on all the cpus on this node */
-static bool amd64_nb_mce_bank_enabled_on_node(int nid)
+static bool nb_mce_bank_enabled_on_node(u16 nid)
{
cpumask_var_t mask;
int cpu, nbe;
bool ret = false;
if (!zalloc_cpumask_var(&mask, GFP_KERNEL)) {
- amd64_printk(KERN_WARNING, "%s: error allocating mask\n",
- __func__);
+ amd64_warn("%s: Error allocating mask\n", __func__);
return false;
}
@@ -2415,11 +2339,11 @@ static bool amd64_nb_mce_bank_enabled_on_node(int nid)
for_each_cpu(cpu, mask) {
struct msr *reg = per_cpu_ptr(msrs, cpu);
- nbe = reg->l & K8_MSR_MCGCTL_NBE;
+ nbe = reg->l & MSR_MCGCTL_NBE;
- debugf0("core: %u, MCG_CTL: 0x%llx, NB MSR is %s\n",
- cpu, reg->q,
- (nbe ? "enabled" : "disabled"));
+ edac_dbg(0, "core: %u, MCG_CTL: 0x%llx, NB MSR is %s\n",
+ cpu, reg->q,
+ (nbe ? "enabled" : "disabled"));
if (!nbe)
goto out;
@@ -2431,18 +2355,17 @@ out:
return ret;
}
-static int amd64_toggle_ecc_err_reporting(struct amd64_pvt *pvt, bool on)
+static int toggle_ecc_err_reporting(struct ecc_settings *s, u16 nid, bool on)
{
cpumask_var_t cmask;
int cpu;
if (!zalloc_cpumask_var(&cmask, GFP_KERNEL)) {
- amd64_printk(KERN_WARNING, "%s: error allocating mask\n",
- __func__);
+ amd64_warn("%s: error allocating mask\n", __func__);
return false;
}
- get_cpus_on_this_dct_cpumask(cmask, pvt->mc_node_id);
+ get_cpus_on_this_dct_cpumask(cmask, nid);
rdmsr_on_cpus(cmask, MSR_IA32_MCG_CTL, msrs);
@@ -2451,16 +2374,16 @@ static int amd64_toggle_ecc_err_reporting(struct amd64_pvt *pvt, bool on)
struct msr *reg = per_cpu_ptr(msrs, cpu);
if (on) {
- if (reg->l & K8_MSR_MCGCTL_NBE)
- pvt->flags.nb_mce_enable = 1;
+ if (reg->l & MSR_MCGCTL_NBE)
+ s->flags.nb_mce_enable = 1;
- reg->l |= K8_MSR_MCGCTL_NBE;
+ reg->l |= MSR_MCGCTL_NBE;
} else {
/*
* Turn off NB MCE reporting only when it was off before
*/
- if (!pvt->flags.nb_mce_enable)
- reg->l &= ~K8_MSR_MCGCTL_NBE;
+ if (!s->flags.nb_mce_enable)
+ reg->l &= ~MSR_MCGCTL_NBE;
}
}
wrmsr_on_cpus(cmask, MSR_IA32_MCG_CTL, msrs);
@@ -2470,92 +2393,90 @@ static int amd64_toggle_ecc_err_reporting(struct amd64_pvt *pvt, bool on)
return 0;
}
-static void amd64_enable_ecc_error_reporting(struct mem_ctl_info *mci)
+static bool enable_ecc_error_reporting(struct ecc_settings *s, u16 nid,
+ struct pci_dev *F3)
{
- struct amd64_pvt *pvt = mci->pvt_info;
- u32 value, mask = K8_NBCTL_CECCEn | K8_NBCTL_UECCEn;
+ bool ret = true;
+ u32 value, mask = 0x3; /* UECC/CECC enable */
+
+ if (toggle_ecc_err_reporting(s, nid, ON)) {
+ amd64_warn("Error enabling ECC reporting over MCGCTL!\n");
+ return false;
+ }
- amd64_read_pci_cfg(pvt->misc_f3_ctl, K8_NBCTL, &value);
+ amd64_read_pci_cfg(F3, NBCTL, &value);
- /* turn on UECCn and CECCEn bits */
- pvt->old_nbctl = value & mask;
- pvt->nbctl_mcgctl_saved = 1;
+ s->old_nbctl = value & mask;
+ s->nbctl_valid = true;
value |= mask;
- pci_write_config_dword(pvt->misc_f3_ctl, K8_NBCTL, value);
-
- if (amd64_toggle_ecc_err_reporting(pvt, ON))
- amd64_printk(KERN_WARNING, "Error enabling ECC reporting over "
- "MCGCTL!\n");
+ amd64_write_pci_cfg(F3, NBCTL, value);
- amd64_read_pci_cfg(pvt->misc_f3_ctl, K8_NBCFG, &value);
+ amd64_read_pci_cfg(F3, NBCFG, &value);
- debugf0("NBCFG(1)= 0x%x CHIPKILL= %s ECC_ENABLE= %s\n", value,
- (value & K8_NBCFG_CHIPKILL) ? "Enabled" : "Disabled",
- (value & K8_NBCFG_ECC_ENABLE) ? "Enabled" : "Disabled");
+ edac_dbg(0, "1: node %d, NBCFG=0x%08x[DramEccEn: %d]\n",
+ nid, value, !!(value & NBCFG_ECC_ENABLE));
- if (!(value & K8_NBCFG_ECC_ENABLE)) {
- amd64_printk(KERN_WARNING,
- "This node reports that DRAM ECC is "
- "currently Disabled; ENABLING now\n");
+ if (!(value & NBCFG_ECC_ENABLE)) {
+ amd64_warn("DRAM ECC disabled on this node, enabling...\n");
- pvt->flags.nb_ecc_prev = 0;
+ s->flags.nb_ecc_prev = 0;
/* Attempt to turn on DRAM ECC Enable */
- value |= K8_NBCFG_ECC_ENABLE;
- pci_write_config_dword(pvt->misc_f3_ctl, K8_NBCFG, value);
+ value |= NBCFG_ECC_ENABLE;
+ amd64_write_pci_cfg(F3, NBCFG, value);
- amd64_read_pci_cfg(pvt->misc_f3_ctl, K8_NBCFG, &value);
+ amd64_read_pci_cfg(F3, NBCFG, &value);
- if (!(value & K8_NBCFG_ECC_ENABLE)) {
- amd64_printk(KERN_WARNING,
- "Hardware rejects Enabling DRAM ECC checking\n"
- "Check memory DIMM configuration\n");
+ if (!(value & NBCFG_ECC_ENABLE)) {
+ amd64_warn("Hardware rejected DRAM ECC enable,"
+ "check memory DIMM configuration.\n");
+ ret = false;
} else {
- amd64_printk(KERN_DEBUG,
- "Hardware accepted DRAM ECC Enable\n");
+ amd64_info("Hardware accepted DRAM ECC Enable\n");
}
} else {
- pvt->flags.nb_ecc_prev = 1;
+ s->flags.nb_ecc_prev = 1;
}
- debugf0("NBCFG(2)= 0x%x CHIPKILL= %s ECC_ENABLE= %s\n", value,
- (value & K8_NBCFG_CHIPKILL) ? "Enabled" : "Disabled",
- (value & K8_NBCFG_ECC_ENABLE) ? "Enabled" : "Disabled");
+ edac_dbg(0, "2: node %d, NBCFG=0x%08x[DramEccEn: %d]\n",
+ nid, value, !!(value & NBCFG_ECC_ENABLE));
- pvt->ctl_error_info.nbcfg = value;
+ return ret;
}
-static void amd64_restore_ecc_error_reporting(struct amd64_pvt *pvt)
+static void restore_ecc_error_reporting(struct ecc_settings *s, u16 nid,
+ struct pci_dev *F3)
{
- u32 value, mask = K8_NBCTL_CECCEn | K8_NBCTL_UECCEn;
+ u32 value, mask = 0x3; /* UECC/CECC enable */
- if (!pvt->nbctl_mcgctl_saved)
+
+ if (!s->nbctl_valid)
return;
- amd64_read_pci_cfg(pvt->misc_f3_ctl, K8_NBCTL, &value);
+ amd64_read_pci_cfg(F3, NBCTL, &value);
value &= ~mask;
- value |= pvt->old_nbctl;
+ value |= s->old_nbctl;
- pci_write_config_dword(pvt->misc_f3_ctl, K8_NBCTL, value);
+ amd64_write_pci_cfg(F3, NBCTL, value);
- /* restore previous BIOS DRAM ECC "off" setting which we force-enabled */
- if (!pvt->flags.nb_ecc_prev) {
- amd64_read_pci_cfg(pvt->misc_f3_ctl, K8_NBCFG, &value);
- value &= ~K8_NBCFG_ECC_ENABLE;
- pci_write_config_dword(pvt->misc_f3_ctl, K8_NBCFG, value);
+ /* restore previous BIOS DRAM ECC "off" setting we force-enabled */
+ if (!s->flags.nb_ecc_prev) {
+ amd64_read_pci_cfg(F3, NBCFG, &value);
+ value &= ~NBCFG_ECC_ENABLE;
+ amd64_write_pci_cfg(F3, NBCFG, value);
}
/* restore the NB Enable MCGCTL bit */
- if (amd64_toggle_ecc_err_reporting(pvt, OFF))
- amd64_printk(KERN_WARNING, "Error restoring NB MCGCTL settings!\n");
+ if (toggle_ecc_err_reporting(s, nid, OFF))
+ amd64_warn("Error restoring NB MCGCTL settings!\n");
}
/*
- * EDAC requires that the BIOS have ECC enabled before taking over the
- * processing of ECC errors. This is because the BIOS can properly initialize
- * the memory system completely. A command line option allows to force-enable
- * hardware ECC later in amd64_enable_ecc_error_reporting().
+ * EDAC requires that the BIOS have ECC enabled before
+ * taking over the processing of ECC errors. A command line
+ * option allows to force-enable hardware ECC later in
+ * enable_ecc_error_reporting().
*/
static const char *ecc_msg =
"ECC disabled in the BIOS or no ECC capability, module will not load.\n"
@@ -2563,255 +2484,304 @@ static const char *ecc_msg =
"'ecc_enable_override'.\n"
" (Note that use of the override may cause unknown side effects.)\n";
-static int amd64_check_ecc_enabled(struct amd64_pvt *pvt)
+static bool ecc_enabled(struct pci_dev *F3, u16 nid)
{
u32 value;
- u8 ecc_enabled = 0;
+ u8 ecc_en = 0;
bool nb_mce_en = false;
- amd64_read_pci_cfg(pvt->misc_f3_ctl, K8_NBCFG, &value);
+ amd64_read_pci_cfg(F3, NBCFG, &value);
- ecc_enabled = !!(value & K8_NBCFG_ECC_ENABLE);
- if (!ecc_enabled)
- amd64_printk(KERN_NOTICE, "This node reports that Memory ECC "
- "is currently disabled, set F3x%x[22] (%s).\n",
- K8_NBCFG, pci_name(pvt->misc_f3_ctl));
- else
- amd64_printk(KERN_INFO, "ECC is enabled by BIOS.\n");
+ ecc_en = !!(value & NBCFG_ECC_ENABLE);
+ amd64_info("DRAM ECC %s.\n", (ecc_en ? "enabled" : "disabled"));
- nb_mce_en = amd64_nb_mce_bank_enabled_on_node(pvt->mc_node_id);
+ nb_mce_en = nb_mce_bank_enabled_on_node(nid);
if (!nb_mce_en)
- amd64_printk(KERN_NOTICE, "NB MCE bank disabled, set MSR "
+ amd64_notice("NB MCE bank disabled, set MSR "
"0x%08x[4] on node %d to enable.\n",
- MSR_IA32_MCG_CTL, pvt->mc_node_id);
+ MSR_IA32_MCG_CTL, nid);
- if (!ecc_enabled || !nb_mce_en) {
- if (!ecc_enable_override) {
- amd64_printk(KERN_NOTICE, "%s", ecc_msg);
- return -ENODEV;
- } else {
- amd64_printk(KERN_WARNING, "Forcing ECC checking on!\n");
- }
+ if (!ecc_en || !nb_mce_en) {
+ amd64_notice("%s", ecc_msg);
+ return false;
}
-
- return 0;
+ return true;
}
-struct mcidev_sysfs_attribute sysfs_attrs[ARRAY_SIZE(amd64_dbg_attrs) +
- ARRAY_SIZE(amd64_inj_attrs) +
- 1];
+static int set_mc_sysfs_attrs(struct mem_ctl_info *mci)
+{
+ struct amd64_pvt *pvt = mci->pvt_info;
+ int rc;
-struct mcidev_sysfs_attribute terminator = { .attr = { .name = NULL } };
+ rc = amd64_create_sysfs_dbg_files(mci);
+ if (rc < 0)
+ return rc;
-static void amd64_set_mc_sysfs_attributes(struct mem_ctl_info *mci)
-{
- unsigned int i = 0, j = 0;
+ if (pvt->fam >= 0x10) {
+ rc = amd64_create_sysfs_inject_files(mci);
+ if (rc < 0)
+ return rc;
+ }
- for (; i < ARRAY_SIZE(amd64_dbg_attrs); i++)
- sysfs_attrs[i] = amd64_dbg_attrs[i];
+ return 0;
+}
- for (j = 0; j < ARRAY_SIZE(amd64_inj_attrs); j++, i++)
- sysfs_attrs[i] = amd64_inj_attrs[j];
+static void del_mc_sysfs_attrs(struct mem_ctl_info *mci)
+{
+ struct amd64_pvt *pvt = mci->pvt_info;
- sysfs_attrs[i] = terminator;
+ amd64_remove_sysfs_dbg_files(mci);
- mci->mc_driver_sysfs_attributes = sysfs_attrs;
+ if (pvt->fam >= 0x10)
+ amd64_remove_sysfs_inject_files(mci);
}
-static void amd64_setup_mci_misc_attributes(struct mem_ctl_info *mci)
+static void setup_mci_misc_attrs(struct mem_ctl_info *mci,
+ struct amd64_family_type *fam)
{
struct amd64_pvt *pvt = mci->pvt_info;
mci->mtype_cap = MEM_FLAG_DDR2 | MEM_FLAG_RDDR2;
mci->edac_ctl_cap = EDAC_FLAG_NONE;
- if (pvt->nbcap & K8_NBCAP_SECDED)
+ if (pvt->nbcap & NBCAP_SECDED)
mci->edac_ctl_cap |= EDAC_FLAG_SECDED;
- if (pvt->nbcap & K8_NBCAP_CHIPKILL)
+ if (pvt->nbcap & NBCAP_CHIPKILL)
mci->edac_ctl_cap |= EDAC_FLAG_S4ECD4ED;
- mci->edac_cap = amd64_determine_edac_cap(pvt);
+ mci->edac_cap = determine_edac_cap(pvt);
mci->mod_name = EDAC_MOD_STR;
mci->mod_ver = EDAC_AMD64_VERSION;
- mci->ctl_name = get_amd_family_name(pvt->mc_type_index);
- mci->dev_name = pci_name(pvt->dram_f2_ctl);
+ mci->ctl_name = fam->ctl_name;
+ mci->dev_name = pci_name(pvt->F2);
mci->ctl_page_to_phys = NULL;
/* memory scrubber interface */
- mci->set_sdram_scrub_rate = amd64_set_scrub_rate;
- mci->get_sdram_scrub_rate = amd64_get_scrub_rate;
+ mci->set_sdram_scrub_rate = set_scrub_rate;
+ mci->get_sdram_scrub_rate = get_scrub_rate;
}
/*
- * Init stuff for this DRAM Controller device.
- *
- * Due to a hardware feature on Fam10h CPUs, the Enable Extended Configuration
- * Space feature MUST be enabled on ALL Processors prior to actually reading
- * from the ECS registers. Since the loading of the module can occur on any
- * 'core', and cores don't 'see' all the other processors ECS data when the
- * others are NOT enabled. Our solution is to first enable ECS access in this
- * routine on all processors, gather some data in a amd64_pvt structure and
- * later come back in a finish-setup function to perform that final
- * initialization. See also amd64_init_2nd_stage() for that.
+ * returns a pointer to the family descriptor on success, NULL otherwise.
*/
-static int amd64_probe_one_instance(struct pci_dev *dram_f2_ctl,
- int mc_type_index)
+static struct amd64_family_type *per_family_init(struct amd64_pvt *pvt)
{
- struct amd64_pvt *pvt = NULL;
- int err = 0, ret;
+ struct amd64_family_type *fam_type = NULL;
- ret = -ENOMEM;
- pvt = kzalloc(sizeof(struct amd64_pvt), GFP_KERNEL);
- if (!pvt)
- goto err_exit;
+ pvt->ext_model = boot_cpu_data.x86_model >> 4;
+ pvt->stepping = boot_cpu_data.x86_mask;
+ pvt->model = boot_cpu_data.x86_model;
+ pvt->fam = boot_cpu_data.x86;
- pvt->mc_node_id = get_node_id(dram_f2_ctl);
+ switch (pvt->fam) {
+ case 0xf:
+ fam_type = &family_types[K8_CPUS];
+ pvt->ops = &family_types[K8_CPUS].ops;
+ break;
- pvt->dram_f2_ctl = dram_f2_ctl;
- pvt->ext_model = boot_cpu_data.x86_model >> 4;
- pvt->mc_type_index = mc_type_index;
- pvt->ops = family_ops(mc_type_index);
+ case 0x10:
+ fam_type = &family_types[F10_CPUS];
+ pvt->ops = &family_types[F10_CPUS].ops;
+ break;
- /*
- * We have the dram_f2_ctl device as an argument, now go reserve its
- * sibling devices from the PCI system.
- */
- ret = -ENODEV;
- err = amd64_reserve_mc_sibling_devices(pvt, mc_type_index);
- if (err)
- goto err_free;
+ case 0x15:
+ if (pvt->model == 0x30) {
+ fam_type = &family_types[F15_M30H_CPUS];
+ pvt->ops = &family_types[F15_M30H_CPUS].ops;
+ break;
+ }
- ret = -EINVAL;
- err = amd64_check_ecc_enabled(pvt);
- if (err)
- goto err_put;
+ fam_type = &family_types[F15_CPUS];
+ pvt->ops = &family_types[F15_CPUS].ops;
+ break;
- /*
- * Key operation here: setup of HW prior to performing ops on it. Some
- * setup is required to access ECS data. After this is performed, the
- * 'teardown' function must be called upon error and normal exit paths.
- */
- if (boot_cpu_data.x86 >= 0x10)
- amd64_setup(pvt);
+ case 0x16:
+ if (pvt->model == 0x30) {
+ fam_type = &family_types[F16_M30H_CPUS];
+ pvt->ops = &family_types[F16_M30H_CPUS].ops;
+ break;
+ }
+ fam_type = &family_types[F16_CPUS];
+ pvt->ops = &family_types[F16_CPUS].ops;
+ break;
- /*
- * Save the pointer to the private data for use in 2nd initialization
- * stage
- */
- pvt_lookup[pvt->mc_node_id] = pvt;
+ default:
+ amd64_err("Unsupported family!\n");
+ return NULL;
+ }
- return 0;
+ amd64_info("%s %sdetected (node %d).\n", fam_type->ctl_name,
+ (pvt->fam == 0xf ?
+ (pvt->ext_model >= K8_REV_F ? "revF or later "
+ : "revE or earlier ")
+ : ""), pvt->mc_node_id);
+ return fam_type;
+}
-err_put:
- amd64_free_mc_sibling_devices(pvt);
+static int init_one_instance(struct pci_dev *F2)
+{
+ struct amd64_pvt *pvt = NULL;
+ struct amd64_family_type *fam_type = NULL;
+ struct mem_ctl_info *mci = NULL;
+ struct edac_mc_layer layers[2];
+ int err = 0, ret;
+ u16 nid = amd_get_node_id(F2);
-err_free:
- kfree(pvt);
+ ret = -ENOMEM;
+ pvt = kzalloc(sizeof(struct amd64_pvt), GFP_KERNEL);
+ if (!pvt)
+ goto err_ret;
-err_exit:
- return ret;
-}
+ pvt->mc_node_id = nid;
+ pvt->F2 = F2;
-/*
- * This is the finishing stage of the init code. Needs to be performed after all
- * MCs' hardware have been prepped for accessing extended config space.
- */
-static int amd64_init_2nd_stage(struct amd64_pvt *pvt)
-{
- int node_id = pvt->mc_node_id;
- struct mem_ctl_info *mci;
- int ret = -ENODEV;
+ ret = -EINVAL;
+ fam_type = per_family_init(pvt);
+ if (!fam_type)
+ goto err_free;
- amd64_read_mc_registers(pvt);
+ ret = -ENODEV;
+ err = reserve_mc_sibling_devs(pvt, fam_type->f1_id, fam_type->f3_id);
+ if (err)
+ goto err_free;
+
+ read_mc_regs(pvt);
/*
* We need to determine how many memory channels there are. Then use
* that information for calculating the size of the dynamic instance
- * tables in the 'mci' structure
+ * tables in the 'mci' structure.
*/
+ ret = -EINVAL;
pvt->channel_count = pvt->ops->early_channel_count(pvt);
if (pvt->channel_count < 0)
- goto err_exit;
+ goto err_siblings;
ret = -ENOMEM;
- mci = edac_mc_alloc(0, pvt->cs_count, pvt->channel_count, node_id);
+ layers[0].type = EDAC_MC_LAYER_CHIP_SELECT;
+ layers[0].size = pvt->csels[0].b_cnt;
+ layers[0].is_virt_csrow = true;
+ layers[1].type = EDAC_MC_LAYER_CHANNEL;
+
+ /*
+ * Always allocate two channels since we can have setups with DIMMs on
+ * only one channel. Also, this simplifies handling later for the price
+ * of a couple of KBs tops.
+ */
+ layers[1].size = 2;
+ layers[1].is_virt_csrow = false;
+
+ mci = edac_mc_alloc(nid, ARRAY_SIZE(layers), layers, 0);
if (!mci)
- goto err_exit;
+ goto err_siblings;
mci->pvt_info = pvt;
+ mci->pdev = &pvt->F2->dev;
- mci->dev = &pvt->dram_f2_ctl->dev;
- amd64_setup_mci_misc_attributes(mci);
+ setup_mci_misc_attrs(mci, fam_type);
- if (amd64_init_csrows(mci))
+ if (init_csrows(mci))
mci->edac_cap = EDAC_FLAG_NONE;
- amd64_enable_ecc_error_reporting(mci);
- amd64_set_mc_sysfs_attributes(mci);
-
ret = -ENODEV;
if (edac_mc_add_mc(mci)) {
- debugf1("failed edac_mc_add_mc()\n");
+ edac_dbg(1, "failed edac_mc_add_mc()\n");
goto err_add_mc;
}
-
- mci_lookup[node_id] = mci;
- pvt_lookup[node_id] = NULL;
+ if (set_mc_sysfs_attrs(mci)) {
+ edac_dbg(1, "failed edac_mc_add_mc()\n");
+ goto err_add_sysfs;
+ }
/* register stuff with EDAC MCE */
if (report_gart_errors)
amd_report_gart_errors(true);
- amd_register_ecc_decoder(amd64_decode_bus_error);
+ amd_register_ecc_decoder(decode_bus_error);
+
+ mcis[nid] = mci;
+
+ atomic_inc(&drv_instances);
return 0;
+err_add_sysfs:
+ edac_mc_del_mc(mci->pdev);
err_add_mc:
edac_mc_free(mci);
-err_exit:
- debugf0("failure to init 2nd stage: ret=%d\n", ret);
-
- amd64_restore_ecc_error_reporting(pvt);
+err_siblings:
+ free_mc_sibling_devs(pvt);
- if (boot_cpu_data.x86 > 0xf)
- amd64_teardown(pvt);
-
- amd64_free_mc_sibling_devices(pvt);
-
- kfree(pvt_lookup[pvt->mc_node_id]);
- pvt_lookup[node_id] = NULL;
+err_free:
+ kfree(pvt);
+err_ret:
return ret;
}
-
-static int __devinit amd64_init_one_instance(struct pci_dev *pdev,
- const struct pci_device_id *mc_type)
+static int probe_one_instance(struct pci_dev *pdev,
+ const struct pci_device_id *mc_type)
{
+ u16 nid = amd_get_node_id(pdev);
+ struct pci_dev *F3 = node_to_amd_nb(nid)->misc;
+ struct ecc_settings *s;
int ret = 0;
- debugf0("(MC node=%d,mc_type='%s')\n", get_node_id(pdev),
- get_amd_family_name(mc_type->driver_data));
-
ret = pci_enable_device(pdev);
- if (ret < 0)
- ret = -EIO;
- else
- ret = amd64_probe_one_instance(pdev, mc_type->driver_data);
+ if (ret < 0) {
+ edac_dbg(0, "ret=%d\n", ret);
+ return -EIO;
+ }
- if (ret < 0)
- debugf0("ret=%d\n", ret);
+ ret = -ENOMEM;
+ s = kzalloc(sizeof(struct ecc_settings), GFP_KERNEL);
+ if (!s)
+ goto err_out;
+
+ ecc_stngs[nid] = s;
+
+ if (!ecc_enabled(F3, nid)) {
+ ret = -ENODEV;
+ if (!ecc_enable_override)
+ goto err_enable;
+
+ amd64_warn("Forcing ECC on!\n");
+
+ if (!enable_ecc_error_reporting(s, nid, F3))
+ goto err_enable;
+ }
+
+ ret = init_one_instance(pdev);
+ if (ret < 0) {
+ amd64_err("Error probing instance: %d\n", nid);
+ restore_ecc_error_reporting(s, nid, F3);
+ }
+
+ return ret;
+
+err_enable:
+ kfree(s);
+ ecc_stngs[nid] = NULL;
+
+err_out:
return ret;
}
-static void __devexit amd64_remove_one_instance(struct pci_dev *pdev)
+static void remove_one_instance(struct pci_dev *pdev)
{
struct mem_ctl_info *mci;
struct amd64_pvt *pvt;
+ u16 nid = amd_get_node_id(pdev);
+ struct pci_dev *F3 = node_to_amd_nb(nid)->misc;
+ struct ecc_settings *s = ecc_stngs[nid];
+ mci = find_mci_by_dev(&pdev->dev);
+ WARN_ON(!mci);
+
+ del_mc_sysfs_attrs(mci);
/* Remove from EDAC CORE tracking list */
mci = edac_mc_del_mc(&pdev->dev);
if (!mci)
@@ -2819,20 +2789,20 @@ static void __devexit amd64_remove_one_instance(struct pci_dev *pdev)
pvt = mci->pvt_info;
- amd64_restore_ecc_error_reporting(pvt);
-
- if (boot_cpu_data.x86 > 0xf)
- amd64_teardown(pvt);
+ restore_ecc_error_reporting(s, nid, F3);
- amd64_free_mc_sibling_devices(pvt);
+ free_mc_sibling_devs(pvt);
/* unregister from EDAC MCE */
amd_report_gart_errors(false);
- amd_unregister_ecc_decoder(amd64_decode_bus_error);
+ amd_unregister_ecc_decoder(decode_bus_error);
+
+ kfree(ecc_stngs[nid]);
+ ecc_stngs[nid] = NULL;
/* Free the EDAC CORE resources */
mci->pvt_info = NULL;
- mci_lookup[pvt->mc_node_id] = NULL;
+ mcis[nid] = NULL;
kfree(pvt);
edac_mc_free(mci);
@@ -2843,7 +2813,7 @@ static void __devexit amd64_remove_one_instance(struct pci_dev *pdev)
* PCI core identifies what devices are on a system during boot, and then
* inquiry this table to see if this driver is for a given device found.
*/
-static const struct pci_device_id amd64_pci_table[] __devinitdata = {
+static const struct pci_device_id amd64_pci_table[] = {
{
.vendor = PCI_VENDOR_ID_AMD,
.device = PCI_DEVICE_ID_AMD_K8_NB_MEMCTL,
@@ -2851,7 +2821,6 @@ static const struct pci_device_id amd64_pci_table[] __devinitdata = {
.subdevice = PCI_ANY_ID,
.class = 0,
.class_mask = 0,
- .driver_data = K8_CPUS
},
{
.vendor = PCI_VENDOR_ID_AMD,
@@ -2860,112 +2829,134 @@ static const struct pci_device_id amd64_pci_table[] __devinitdata = {
.subdevice = PCI_ANY_ID,
.class = 0,
.class_mask = 0,
- .driver_data = F10_CPUS
},
{
.vendor = PCI_VENDOR_ID_AMD,
- .device = PCI_DEVICE_ID_AMD_11H_NB_DRAM,
+ .device = PCI_DEVICE_ID_AMD_15H_NB_F2,
.subvendor = PCI_ANY_ID,
.subdevice = PCI_ANY_ID,
.class = 0,
.class_mask = 0,
- .driver_data = F11_CPUS
},
+ {
+ .vendor = PCI_VENDOR_ID_AMD,
+ .device = PCI_DEVICE_ID_AMD_15H_M30H_NB_F2,
+ .subvendor = PCI_ANY_ID,
+ .subdevice = PCI_ANY_ID,
+ .class = 0,
+ .class_mask = 0,
+ },
+ {
+ .vendor = PCI_VENDOR_ID_AMD,
+ .device = PCI_DEVICE_ID_AMD_16H_NB_F2,
+ .subvendor = PCI_ANY_ID,
+ .subdevice = PCI_ANY_ID,
+ .class = 0,
+ .class_mask = 0,
+ },
+ {
+ .vendor = PCI_VENDOR_ID_AMD,
+ .device = PCI_DEVICE_ID_AMD_16H_M30H_NB_F2,
+ .subvendor = PCI_ANY_ID,
+ .subdevice = PCI_ANY_ID,
+ .class = 0,
+ .class_mask = 0,
+ },
+
{0, }
};
MODULE_DEVICE_TABLE(pci, amd64_pci_table);
static struct pci_driver amd64_pci_driver = {
.name = EDAC_MOD_STR,
- .probe = amd64_init_one_instance,
- .remove = __devexit_p(amd64_remove_one_instance),
+ .probe = probe_one_instance,
+ .remove = remove_one_instance,
.id_table = amd64_pci_table,
};
-static void amd64_setup_pci_device(void)
+static void setup_pci_device(void)
{
struct mem_ctl_info *mci;
struct amd64_pvt *pvt;
- if (amd64_ctl_pci)
+ if (pci_ctl)
return;
- mci = mci_lookup[0];
- if (mci) {
-
- pvt = mci->pvt_info;
- amd64_ctl_pci =
- edac_pci_create_generic_ctl(&pvt->dram_f2_ctl->dev,
- EDAC_MOD_STR);
-
- if (!amd64_ctl_pci) {
- pr_warning("%s(): Unable to create PCI control\n",
- __func__);
+ mci = mcis[0];
+ if (!mci)
+ return;
- pr_warning("%s(): PCI error report via EDAC not set\n",
- __func__);
- }
+ pvt = mci->pvt_info;
+ pci_ctl = edac_pci_create_generic_ctl(&pvt->F2->dev, EDAC_MOD_STR);
+ if (!pci_ctl) {
+ pr_warn("%s(): Unable to create PCI control\n", __func__);
+ pr_warn("%s(): PCI error report via EDAC not set\n", __func__);
}
}
static int __init amd64_edac_init(void)
{
- int nb, err = -ENODEV;
- bool load_ok = false;
+ int err = -ENODEV;
- edac_printk(KERN_INFO, EDAC_MOD_STR, EDAC_AMD64_VERSION "\n");
+ printk(KERN_INFO "AMD64 EDAC driver v%s\n", EDAC_AMD64_VERSION);
opstate_init();
- if (cache_k8_northbridges() < 0)
+ if (amd_cache_northbridges() < 0)
goto err_ret;
+ err = -ENOMEM;
+ mcis = kzalloc(amd_nb_num() * sizeof(mcis[0]), GFP_KERNEL);
+ ecc_stngs = kzalloc(amd_nb_num() * sizeof(ecc_stngs[0]), GFP_KERNEL);
+ if (!(mcis && ecc_stngs))
+ goto err_free;
+
msrs = msrs_alloc();
if (!msrs)
- goto err_ret;
+ goto err_free;
err = pci_register_driver(&amd64_pci_driver);
if (err)
goto err_pci;
- /*
- * At this point, the array 'pvt_lookup[]' contains pointers to alloc'd
- * amd64_pvt structs. These will be used in the 2nd stage init function
- * to finish initialization of the MC instances.
- */
err = -ENODEV;
- for (nb = 0; nb < k8_northbridges.num; nb++) {
- if (!pvt_lookup[nb])
- continue;
-
- err = amd64_init_2nd_stage(pvt_lookup[nb]);
- if (err)
- goto err_2nd_stage;
+ if (!atomic_read(&drv_instances))
+ goto err_no_instances;
- load_ok = true;
- }
-
- if (load_ok) {
- amd64_setup_pci_device();
- return 0;
- }
+ setup_pci_device();
+ return 0;
-err_2nd_stage:
+err_no_instances:
pci_unregister_driver(&amd64_pci_driver);
+
err_pci:
msrs_free(msrs);
msrs = NULL;
+
+err_free:
+ kfree(mcis);
+ mcis = NULL;
+
+ kfree(ecc_stngs);
+ ecc_stngs = NULL;
+
err_ret:
return err;
}
static void __exit amd64_edac_exit(void)
{
- if (amd64_ctl_pci)
- edac_pci_release_generic_ctl(amd64_ctl_pci);
+ if (pci_ctl)
+ edac_pci_release_generic_ctl(pci_ctl);
pci_unregister_driver(&amd64_pci_driver);
+ kfree(ecc_stngs);
+ ecc_stngs = NULL;
+
+ kfree(mcis);
+ mcis = NULL;
+
msrs_free(msrs);
msrs = NULL;
}
diff --git a/drivers/edac/amd64_edac.h b/drivers/edac/amd64_edac.h
index 044aee4f944..d903e0c2114 100644
--- a/drivers/edac/amd64_edac.h
+++ b/drivers/edac/amd64_edac.h
@@ -33,7 +33,7 @@
* detection. The mods to Rev F required more family
* information detection.
*
- * Changes/Fixes by Borislav Petkov <borislav.petkov@amd.com>:
+ * Changes/Fixes by Borislav Petkov <bp@alien8.de>:
* - misc fixes and code cleanups
*
* This module is based on the following documents
@@ -74,11 +74,26 @@
#include "edac_core.h"
#include "mce_amd.h"
-#define amd64_printk(level, fmt, arg...) \
- edac_printk(level, "amd64", fmt, ##arg)
+#define amd64_debug(fmt, arg...) \
+ edac_printk(KERN_DEBUG, "amd64", fmt, ##arg)
-#define amd64_mc_printk(mci, level, fmt, arg...) \
- edac_mc_chipset_printk(mci, level, "amd64", fmt, ##arg)
+#define amd64_info(fmt, arg...) \
+ edac_printk(KERN_INFO, "amd64", fmt, ##arg)
+
+#define amd64_notice(fmt, arg...) \
+ edac_printk(KERN_NOTICE, "amd64", fmt, ##arg)
+
+#define amd64_warn(fmt, arg...) \
+ edac_printk(KERN_WARNING, "amd64", fmt, ##arg)
+
+#define amd64_err(fmt, arg...) \
+ edac_printk(KERN_ERR, "amd64", fmt, ##arg)
+
+#define amd64_mc_warn(mci, fmt, arg...) \
+ edac_mc_chipset_printk(mci, KERN_WARNING, "amd64", fmt, ##arg)
+
+#define amd64_mc_err(mci, fmt, arg...) \
+ edac_mc_chipset_printk(mci, KERN_ERR, "amd64", fmt, ##arg)
/*
* Throughout the comments in this code, the following terms are used:
@@ -129,19 +144,17 @@
* sections 3.5.4 and 3.5.5 for more information.
*/
-#define EDAC_AMD64_VERSION " Ver: 3.3.0 " __DATE__
+#define EDAC_AMD64_VERSION "3.4.0"
#define EDAC_MOD_STR "amd64_edac"
-#define EDAC_MAX_NUMNODES 8
-
/* Extended Model from CPUID, for CPU Revision numbers */
#define K8_REV_D 1
#define K8_REV_E 2
#define K8_REV_F 4
/* Hardware limit on ChipSelect rows per MC and processors per system */
-#define MAX_CS_COUNT 8
-#define DRAM_REG_COUNT 8
+#define NUM_CHIPSELECTS 8
+#define DRAM_RANGES 8
#define ON true
#define OFF false
@@ -149,251 +162,191 @@
/*
* PCI-defined configuration space registers
*/
-
+#define PCI_DEVICE_ID_AMD_15H_M30H_NB_F1 0x141b
+#define PCI_DEVICE_ID_AMD_15H_M30H_NB_F2 0x141c
+#define PCI_DEVICE_ID_AMD_15H_NB_F1 0x1601
+#define PCI_DEVICE_ID_AMD_15H_NB_F2 0x1602
+#define PCI_DEVICE_ID_AMD_16H_NB_F1 0x1531
+#define PCI_DEVICE_ID_AMD_16H_NB_F2 0x1532
+#define PCI_DEVICE_ID_AMD_16H_M30H_NB_F1 0x1581
+#define PCI_DEVICE_ID_AMD_16H_M30H_NB_F2 0x1582
/*
* Function 1 - Address Map
*/
-#define K8_DRAM_BASE_LOW 0x40
-#define K8_DRAM_LIMIT_LOW 0x44
-#define K8_DHAR 0xf0
-
-#define DHAR_VALID BIT(0)
-#define F10_DRAM_MEM_HOIST_VALID BIT(1)
+#define DRAM_BASE_LO 0x40
+#define DRAM_LIMIT_LO 0x44
-#define DHAR_BASE_MASK 0xff000000
-#define dhar_base(dhar) (dhar & DHAR_BASE_MASK)
-
-#define K8_DHAR_OFFSET_MASK 0x0000ff00
-#define k8_dhar_offset(dhar) ((dhar & K8_DHAR_OFFSET_MASK) << 16)
+/*
+ * F15 M30h D18F1x2[1C:00]
+ */
+#define DRAM_CONT_BASE 0x200
+#define DRAM_CONT_LIMIT 0x204
-#define F10_DHAR_OFFSET_MASK 0x0000ff80
- /* NOTE: Extra mask bit vs K8 */
-#define f10_dhar_offset(dhar) ((dhar & F10_DHAR_OFFSET_MASK) << 16)
+/*
+ * F15 M30h D18F1x2[4C:40]
+ */
+#define DRAM_CONT_HIGH_OFF 0x240
+#define dram_rw(pvt, i) ((u8)(pvt->ranges[i].base.lo & 0x3))
+#define dram_intlv_sel(pvt, i) ((u8)((pvt->ranges[i].lim.lo >> 8) & 0x7))
+#define dram_dst_node(pvt, i) ((u8)(pvt->ranges[i].lim.lo & 0x7))
-/* F10 High BASE/LIMIT registers */
-#define F10_DRAM_BASE_HIGH 0x140
-#define F10_DRAM_LIMIT_HIGH 0x144
+#define DHAR 0xf0
+#define dhar_mem_hoist_valid(pvt) ((pvt)->dhar & BIT(1))
+#define dhar_base(pvt) ((pvt)->dhar & 0xff000000)
+#define k8_dhar_offset(pvt) (((pvt)->dhar & 0x0000ff00) << 16)
+ /* NOTE: Extra mask bit vs K8 */
+#define f10_dhar_offset(pvt) (((pvt)->dhar & 0x0000ff80) << 16)
-/*
- * Function 2 - DRAM controller
- */
-#define K8_DCSB0 0x40
-#define F10_DCSB1 0x140
+#define DCT_CFG_SEL 0x10C
-#define K8_DCSB_CS_ENABLE BIT(0)
-#define K8_DCSB_NPT_SPARE BIT(1)
-#define K8_DCSB_NPT_TESTFAIL BIT(2)
+#define DRAM_LOCAL_NODE_BASE 0x120
+#define DRAM_LOCAL_NODE_LIM 0x124
-/*
- * REV E: select [31:21] and [15:9] from DCSB and the shift amount to form
- * the address
- */
-#define REV_E_DCSB_BASE_BITS (0xFFE0FE00ULL)
-#define REV_E_DCS_SHIFT 4
+#define DRAM_BASE_HI 0x140
+#define DRAM_LIMIT_HI 0x144
-#define REV_F_F1Xh_DCSB_BASE_BITS (0x1FF83FE0ULL)
-#define REV_F_F1Xh_DCS_SHIFT 8
/*
- * REV F and later: selects [28:19] and [13:5] from DCSB and the shift amount
- * to form the address
+ * Function 2 - DRAM controller
*/
-#define REV_F_DCSB_BASE_BITS (0x1FF83FE0ULL)
-#define REV_F_DCS_SHIFT 8
-
-/* DRAM CS Mask Registers */
-#define K8_DCSM0 0x60
-#define F10_DCSM1 0x160
-
-/* REV E: select [29:21] and [15:9] from DCSM */
-#define REV_E_DCSM_MASK_BITS 0x3FE0FE00
+#define DCSB0 0x40
+#define DCSB1 0x140
+#define DCSB_CS_ENABLE BIT(0)
-/* unused bits [24:20] and [12:0] */
-#define REV_E_DCS_NOTUSED_BITS 0x01F01FFF
+#define DCSM0 0x60
+#define DCSM1 0x160
-/* REV F and later: select [28:19] and [13:5] from DCSM */
-#define REV_F_F1Xh_DCSM_MASK_BITS 0x1FF83FE0
-
-/* unused bits [26:22] and [12:0] */
-#define REV_F_F1Xh_DCS_NOTUSED_BITS 0x07C01FFF
+#define csrow_enabled(i, dct, pvt) ((pvt)->csels[(dct)].csbases[(i)] & DCSB_CS_ENABLE)
#define DBAM0 0x80
#define DBAM1 0x180
/* Extract the DIMM 'type' on the i'th DIMM from the DBAM reg value passed */
-#define DBAM_DIMM(i, reg) ((((reg) >> (4*i))) & 0xF)
+#define DBAM_DIMM(i, reg) ((((reg) >> (4*(i)))) & 0xF)
#define DBAM_MAX_VALUE 11
-
-#define F10_DCLR_0 0x90
-#define F10_DCLR_1 0x190
+#define DCLR0 0x90
+#define DCLR1 0x190
#define REVE_WIDTH_128 BIT(16)
-#define F10_WIDTH_128 BIT(11)
+#define WIDTH_128 BIT(11)
+#define DCHR0 0x94
+#define DCHR1 0x194
+#define DDR3_MODE BIT(8)
-#define F10_DCHR_0 0x94
-#define F10_DCHR_1 0x194
+#define DCT_SEL_LO 0x110
+#define dct_high_range_enabled(pvt) ((pvt)->dct_sel_lo & BIT(0))
+#define dct_interleave_enabled(pvt) ((pvt)->dct_sel_lo & BIT(2))
-#define F10_DCHR_FOUR_RANK_DIMM BIT(18)
-#define DDR3_MODE BIT(8)
-#define F10_DCHR_MblMode BIT(6)
+#define dct_ganging_enabled(pvt) ((boot_cpu_data.x86 == 0x10) && ((pvt)->dct_sel_lo & BIT(4)))
+#define dct_data_intlv_enabled(pvt) ((pvt)->dct_sel_lo & BIT(5))
+#define dct_memory_cleared(pvt) ((pvt)->dct_sel_lo & BIT(10))
-#define F10_DCTL_SEL_LOW 0x110
-#define dct_sel_baseaddr(pvt) ((pvt->dram_ctl_select_low) & 0xFFFFF800)
-#define dct_sel_interleave_addr(pvt) (((pvt->dram_ctl_select_low) >> 6) & 0x3)
-#define dct_high_range_enabled(pvt) (pvt->dram_ctl_select_low & BIT(0))
-#define dct_interleave_enabled(pvt) (pvt->dram_ctl_select_low & BIT(2))
-#define dct_ganging_enabled(pvt) (pvt->dram_ctl_select_low & BIT(4))
-#define dct_data_intlv_enabled(pvt) (pvt->dram_ctl_select_low & BIT(5))
-#define dct_dram_enabled(pvt) (pvt->dram_ctl_select_low & BIT(8))
-#define dct_memory_cleared(pvt) (pvt->dram_ctl_select_low & BIT(10))
+#define SWAP_INTLV_REG 0x10c
-#define F10_DCTL_SEL_HIGH 0x114
+#define DCT_SEL_HI 0x114
/*
* Function 3 - Misc Control
*/
-#define K8_NBCTL 0x40
-
-/* Correctable ECC error reporting enable */
-#define K8_NBCTL_CECCEn BIT(0)
-
-/* UnCorrectable ECC error reporting enable */
-#define K8_NBCTL_UECCEn BIT(1)
-
-#define K8_NBCFG 0x44
-#define K8_NBCFG_CHIPKILL BIT(23)
-#define K8_NBCFG_ECC_ENABLE BIT(22)
+#define NBCTL 0x40
-#define K8_NBSL 0x48
+#define NBCFG 0x44
+#define NBCFG_CHIPKILL BIT(23)
+#define NBCFG_ECC_ENABLE BIT(22)
-
-/* Family F10h: Normalized Extended Error Codes */
-#define F10_NBSL_EXT_ERR_RES 0x0
+/* F3x48: NBSL */
#define F10_NBSL_EXT_ERR_ECC 0x8
+#define NBSL_PP_OBS 0x2
-/* Next two are overloaded values */
-#define F10_NBSL_EXT_ERR_LINK_PROTO 0xB
-#define F10_NBSL_EXT_ERR_L3_PROTO 0xB
-
-#define F10_NBSL_EXT_ERR_NB_ARRAY 0xC
-#define F10_NBSL_EXT_ERR_DRAM_PARITY 0xD
-#define F10_NBSL_EXT_ERR_LINK_RETRY 0xE
-
-/* Next two are overloaded values */
-#define F10_NBSL_EXT_ERR_GART_WALK 0xF
-#define F10_NBSL_EXT_ERR_DEV_WALK 0xF
-
-/* 0x10 to 0x1B: Reserved */
-#define F10_NBSL_EXT_ERR_L3_DATA 0x1C
-#define F10_NBSL_EXT_ERR_L3_TAG 0x1D
-#define F10_NBSL_EXT_ERR_L3_LRU 0x1E
-
-/* K8: Normalized Extended Error Codes */
-#define K8_NBSL_EXT_ERR_ECC 0x0
-#define K8_NBSL_EXT_ERR_CRC 0x1
-#define K8_NBSL_EXT_ERR_SYNC 0x2
-#define K8_NBSL_EXT_ERR_MST 0x3
-#define K8_NBSL_EXT_ERR_TGT 0x4
-#define K8_NBSL_EXT_ERR_GART 0x5
-#define K8_NBSL_EXT_ERR_RMW 0x6
-#define K8_NBSL_EXT_ERR_WDT 0x7
-#define K8_NBSL_EXT_ERR_CHIPKILL_ECC 0x8
-#define K8_NBSL_EXT_ERR_DRAM_PARITY 0xD
-
-/*
- * The following are for BUS type errors AFTER values have been normalized by
- * shifting right
- */
-#define K8_NBSL_PP_SRC 0x0
-#define K8_NBSL_PP_RES 0x1
-#define K8_NBSL_PP_OBS 0x2
-#define K8_NBSL_PP_GENERIC 0x3
-
-#define EXTRACT_ERR_CPU_MAP(x) ((x) & 0xF)
-
-#define K8_NBEAL 0x50
-#define K8_NBEAH 0x54
-#define K8_SCRCTRL 0x58
-
-#define F10_NB_CFG_LOW 0x88
-#define F10_NB_CFG_LOW_ENABLE_EXT_CFG BIT(14)
-
-#define F10_NB_CFG_HIGH 0x8C
+#define SCRCTRL 0x58
#define F10_ONLINE_SPARE 0xB0
-#define F10_ONLINE_SPARE_SWAPDONE0(x) ((x) & BIT(1))
-#define F10_ONLINE_SPARE_SWAPDONE1(x) ((x) & BIT(3))
-#define F10_ONLINE_SPARE_BADDRAM_CS0(x) (((x) >> 4) & 0x00000007)
-#define F10_ONLINE_SPARE_BADDRAM_CS1(x) (((x) >> 8) & 0x00000007)
+#define online_spare_swap_done(pvt, c) (((pvt)->online_spare >> (1 + 2 * (c))) & 0x1)
+#define online_spare_bad_dramcs(pvt, c) (((pvt)->online_spare >> (4 + 4 * (c))) & 0x7)
#define F10_NB_ARRAY_ADDR 0xB8
-
-#define F10_NB_ARRAY_DRAM_ECC 0x80000000
+#define F10_NB_ARRAY_DRAM BIT(31)
/* Bits [2:1] are used to select 16-byte section within a 64-byte cacheline */
-#define SET_NB_ARRAY_ADDRESS(section) (((section) & 0x3) << 1)
+#define SET_NB_ARRAY_ADDR(section) (((section) & 0x3) << 1)
#define F10_NB_ARRAY_DATA 0xBC
+#define F10_NB_ARR_ECC_WR_REQ BIT(17)
+#define SET_NB_DRAM_INJECTION_WRITE(inj) \
+ (BIT(((inj.word) & 0xF) + 20) | \
+ F10_NB_ARR_ECC_WR_REQ | inj.bit_map)
+#define SET_NB_DRAM_INJECTION_READ(inj) \
+ (BIT(((inj.word) & 0xF) + 20) | \
+ BIT(16) | inj.bit_map)
-#define SET_NB_DRAM_INJECTION_WRITE(word, bits) \
- (BIT(((word) & 0xF) + 20) | \
- BIT(17) | bits)
-
-#define SET_NB_DRAM_INJECTION_READ(word, bits) \
- (BIT(((word) & 0xF) + 20) | \
- BIT(16) | bits)
-#define K8_NBCAP 0xE8
-#define K8_NBCAP_CORES (BIT(12)|BIT(13))
-#define K8_NBCAP_CHIPKILL BIT(4)
-#define K8_NBCAP_SECDED BIT(3)
-#define K8_NBCAP_DCT_DUAL BIT(0)
+#define NBCAP 0xE8
+#define NBCAP_CHIPKILL BIT(4)
+#define NBCAP_SECDED BIT(3)
+#define NBCAP_DCT_DUAL BIT(0)
#define EXT_NB_MCA_CFG 0x180
/* MSRs */
-#define K8_MSR_MCGCTL_NBE BIT(4)
+#define MSR_MCGCTL_NBE BIT(4)
-#define K8_MSR_MC4CTL 0x0410
-#define K8_MSR_MC4STAT 0x0411
-#define K8_MSR_MC4ADDR 0x0412
-
-/* AMD sets the first MC device at device ID 0x18. */
-static inline int get_node_id(struct pci_dev *pdev)
-{
- return PCI_SLOT(pdev->devfn) - 0x18;
-}
-
-enum amd64_chipset_families {
+enum amd_families {
K8_CPUS = 0,
F10_CPUS,
- F11_CPUS,
+ F15_CPUS,
+ F15_M30H_CPUS,
+ F16_CPUS,
+ F16_M30H_CPUS,
+ NUM_FAMILIES,
};
/* Error injection control structure */
struct error_injection {
- u32 section;
- u32 word;
- u32 bit_map;
+ u32 section;
+ u32 word;
+ u32 bit_map;
+};
+
+/* low and high part of PCI config space regs */
+struct reg_pair {
+ u32 lo, hi;
+};
+
+/*
+ * See F1x[1, 0][7C:40] DRAM Base/Limit Registers
+ */
+struct dram_range {
+ struct reg_pair base;
+ struct reg_pair lim;
+};
+
+/* A DCT chip selects collection */
+struct chip_select {
+ u32 csbases[NUM_CHIPSELECTS];
+ u8 b_cnt;
+
+ u32 csmasks[NUM_CHIPSELECTS];
+ u8 m_cnt;
};
struct amd64_pvt {
- /* pci_device handles which we utilize */
- struct pci_dev *addr_f1_ctl;
- struct pci_dev *dram_f2_ctl;
- struct pci_dev *misc_f3_ctl;
+ struct low_ops *ops;
- int mc_node_id; /* MC index of this MC node */
- int ext_model; /* extended model value of this node */
+ /* pci_device handles which we utilize */
+ struct pci_dev *F1, *F2, *F3;
- struct low_ops *ops; /* pointer to per PCI Device ID func table */
+ u16 mc_node_id; /* MC index of this MC node */
+ u8 fam; /* CPU family */
+ u8 model; /* ... model */
+ u8 stepping; /* ... stepping */
+ int ext_model; /* extended model value of this node */
int channel_count;
/* Raw registers */
@@ -408,155 +361,193 @@ struct amd64_pvt {
u32 dbam0; /* DRAM Base Address Mapping reg for DCT0 */
u32 dbam1; /* DRAM Base Address Mapping reg for DCT1 */
- /* DRAM CS Base Address Registers F2x[1,0][5C:40] */
- u32 dcsb0[MAX_CS_COUNT];
- u32 dcsb1[MAX_CS_COUNT];
-
- /* DRAM CS Mask Registers F2x[1,0][6C:60] */
- u32 dcsm0[MAX_CS_COUNT];
- u32 dcsm1[MAX_CS_COUNT];
-
- /*
- * Decoded parts of DRAM BASE and LIMIT Registers
- * F1x[78,70,68,60,58,50,48,40]
- */
- u64 dram_base[DRAM_REG_COUNT];
- u64 dram_limit[DRAM_REG_COUNT];
- u8 dram_IntlvSel[DRAM_REG_COUNT];
- u8 dram_IntlvEn[DRAM_REG_COUNT];
- u8 dram_DstNode[DRAM_REG_COUNT];
- u8 dram_rw_en[DRAM_REG_COUNT];
-
- /*
- * The following fields are set at (load) run time, after CPU revision
- * has been determined, since the dct_base and dct_mask registers vary
- * based on revision
- */
- u32 dcsb_base; /* DCSB base bits */
- u32 dcsm_mask; /* DCSM mask bits */
- u32 cs_count; /* num chip selects (== num DCSB registers) */
- u32 num_dcsm; /* Number of DCSM registers */
- u32 dcs_mask_notused; /* DCSM notused mask bits */
- u32 dcs_shift; /* DCSB and DCSM shift value */
+ /* one for each DCT */
+ struct chip_select csels[2];
+
+ /* DRAM base and limit pairs F1x[78,70,68,60,58,50,48,40] */
+ struct dram_range ranges[DRAM_RANGES];
u64 top_mem; /* top of memory below 4GB */
u64 top_mem2; /* top of memory above 4GB */
- u32 dram_ctl_select_low; /* DRAM Controller Select Low Reg */
- u32 dram_ctl_select_high; /* DRAM Controller Select High Reg */
- u32 online_spare; /* On-Line spare Reg */
+ u32 dct_sel_lo; /* DRAM Controller Select Low */
+ u32 dct_sel_hi; /* DRAM Controller Select High */
+ u32 online_spare; /* On-Line spare Reg */
/* x4 or x8 syndromes in use */
- u8 syn_type;
-
- /* temp storage for when input is received from sysfs */
- struct err_regs ctl_error_info;
+ u8 ecc_sym_sz;
/* place to store error injection parameters prior to issue */
struct error_injection injection;
+};
- /* Save old hw registers' values before we modified them */
- u32 nbctl_mcgctl_saved; /* When true, following 2 are valid */
- u32 old_nbctl;
+enum err_codes {
+ DECODE_OK = 0,
+ ERR_NODE = -1,
+ ERR_CSROW = -2,
+ ERR_CHANNEL = -3,
+};
+
+struct err_info {
+ int err_code;
+ struct mem_ctl_info *src_mci;
+ int csrow;
+ int channel;
+ u16 syndrome;
+ u32 page;
+ u32 offset;
+};
+
+static inline u64 get_dram_base(struct amd64_pvt *pvt, u8 i)
+{
+ u64 addr = ((u64)pvt->ranges[i].base.lo & 0xffff0000) << 8;
+
+ if (boot_cpu_data.x86 == 0xf)
+ return addr;
+
+ return (((u64)pvt->ranges[i].base.hi & 0x000000ff) << 40) | addr;
+}
+
+static inline u64 get_dram_limit(struct amd64_pvt *pvt, u8 i)
+{
+ u64 lim = (((u64)pvt->ranges[i].lim.lo & 0xffff0000) << 8) | 0x00ffffff;
+
+ if (boot_cpu_data.x86 == 0xf)
+ return lim;
+
+ return (((u64)pvt->ranges[i].lim.hi & 0x000000ff) << 40) | lim;
+}
- /* MC Type Index value: socket F vs Family 10h */
- u32 mc_type_index;
+static inline u16 extract_syndrome(u64 status)
+{
+ return ((status >> 47) & 0xff) | ((status >> 16) & 0xff00);
+}
+
+static inline u8 dct_sel_interleave_addr(struct amd64_pvt *pvt)
+{
+ if (pvt->fam == 0x15 && pvt->model >= 0x30)
+ return (((pvt->dct_sel_hi >> 9) & 0x1) << 2) |
+ ((pvt->dct_sel_lo >> 6) & 0x3);
+
+ return ((pvt)->dct_sel_lo >> 6) & 0x3;
+}
+/*
+ * per-node ECC settings descriptor
+ */
+struct ecc_settings {
+ u32 old_nbctl;
+ bool nbctl_valid;
- /* misc settings */
struct flags {
- unsigned long cf8_extcfg:1;
unsigned long nb_mce_enable:1;
unsigned long nb_ecc_prev:1;
} flags;
};
-struct scrubrate {
- u32 scrubval; /* bit pattern for scrub rate */
- u32 bandwidth; /* bandwidth consumed (bytes/sec) */
-};
-
-extern struct scrubrate scrubrates[23];
-extern const char *tt_msgs[4];
-extern const char *ll_msgs[4];
-extern const char *rrrr_msgs[16];
-extern const char *to_msgs[2];
-extern const char *pp_msgs[4];
-extern const char *ii_msgs[4];
-extern const char *htlink_msgs[8];
-
#ifdef CONFIG_EDAC_DEBUG
-#define NUM_DBG_ATTRS 5
+int amd64_create_sysfs_dbg_files(struct mem_ctl_info *mci);
+void amd64_remove_sysfs_dbg_files(struct mem_ctl_info *mci);
+
#else
-#define NUM_DBG_ATTRS 0
+static inline int amd64_create_sysfs_dbg_files(struct mem_ctl_info *mci)
+{
+ return 0;
+}
+static void inline amd64_remove_sysfs_dbg_files(struct mem_ctl_info *mci)
+{
+}
#endif
#ifdef CONFIG_EDAC_AMD64_ERROR_INJECTION
-#define NUM_INJ_ATTRS 5
+int amd64_create_sysfs_inject_files(struct mem_ctl_info *mci);
+void amd64_remove_sysfs_inject_files(struct mem_ctl_info *mci);
+
#else
-#define NUM_INJ_ATTRS 0
+static inline int amd64_create_sysfs_inject_files(struct mem_ctl_info *mci)
+{
+ return 0;
+}
+static inline void amd64_remove_sysfs_inject_files(struct mem_ctl_info *mci)
+{
+}
#endif
-extern struct mcidev_sysfs_attribute amd64_dbg_attrs[NUM_DBG_ATTRS],
- amd64_inj_attrs[NUM_INJ_ATTRS];
-
/*
* Each of the PCI Device IDs types have their own set of hardware accessor
* functions and per device encoding/decoding logic.
*/
struct low_ops {
int (*early_channel_count) (struct amd64_pvt *pvt);
-
- u64 (*get_error_address) (struct mem_ctl_info *mci,
- struct err_regs *info);
- void (*read_dram_base_limit) (struct amd64_pvt *pvt, int dram);
- void (*read_dram_ctl_register) (struct amd64_pvt *pvt);
- void (*map_sysaddr_to_csrow) (struct mem_ctl_info *mci,
- struct err_regs *info, u64 SystemAddr);
- int (*dbam_to_cs) (struct amd64_pvt *pvt, int cs_mode);
+ void (*map_sysaddr_to_csrow) (struct mem_ctl_info *mci, u64 sys_addr,
+ struct err_info *);
+ int (*dbam_to_cs) (struct amd64_pvt *pvt, u8 dct, unsigned cs_mode);
+ int (*read_dct_pci_cfg) (struct amd64_pvt *pvt, int offset,
+ u32 *val, const char *func);
};
struct amd64_family_type {
const char *ctl_name;
- u16 addr_f1_ctl;
- u16 misc_f3_ctl;
+ u16 f1_id, f3_id;
struct low_ops ops;
};
-static struct amd64_family_type amd64_family_types[];
+int __amd64_read_pci_cfg_dword(struct pci_dev *pdev, int offset,
+ u32 *val, const char *func);
+int __amd64_write_pci_cfg_dword(struct pci_dev *pdev, int offset,
+ u32 val, const char *func);
+
+#define amd64_read_pci_cfg(pdev, offset, val) \
+ __amd64_read_pci_cfg_dword(pdev, offset, val, __func__)
+
+#define amd64_write_pci_cfg(pdev, offset, val) \
+ __amd64_write_pci_cfg_dword(pdev, offset, val, __func__)
-static inline const char *get_amd_family_name(int index)
+#define amd64_read_dct_pci_cfg(pvt, offset, val) \
+ pvt->ops->read_dct_pci_cfg(pvt, offset, val, __func__)
+
+int amd64_get_dram_hole_info(struct mem_ctl_info *mci, u64 *hole_base,
+ u64 *hole_offset, u64 *hole_size);
+
+#define to_mci(k) container_of(k, struct mem_ctl_info, dev)
+
+/* Injection helpers */
+static inline void disable_caches(void *dummy)
{
- return amd64_family_types[index].ctl_name;
+ write_cr0(read_cr0() | X86_CR0_CD);
+ wbinvd();
}
-static inline struct low_ops *family_ops(int index)
+static inline void enable_caches(void *dummy)
{
- return &amd64_family_types[index].ops;
+ write_cr0(read_cr0() & ~X86_CR0_CD);
}
-static inline int amd64_read_pci_cfg_dword(struct pci_dev *pdev, int offset,
- u32 *val, const char *func)
+static inline u8 dram_intlv_en(struct amd64_pvt *pvt, unsigned int i)
{
- int err = 0;
-
- err = pci_read_config_dword(pdev, offset, val);
- if (err)
- amd64_printk(KERN_WARNING, "%s: error reading F%dx%x.\n",
- func, PCI_FUNC(pdev->devfn), offset);
-
- return err;
+ if (pvt->fam == 0x15 && pvt->model >= 0x30) {
+ u32 tmp;
+ amd64_read_pci_cfg(pvt->F1, DRAM_CONT_LIMIT, &tmp);
+ return (u8) tmp & 0xF;
+ }
+ return (u8) (pvt->ranges[i].base.lo >> 8) & 0x7;
}
-#define amd64_read_pci_cfg(pdev, offset, val) \
- amd64_read_pci_cfg_dword(pdev, offset, val, __func__)
-
-/*
- * For future CPU versions, verify the following as new 'slow' rates appear and
- * modify the necessary skip values for the supported CPU.
- */
-#define K8_MIN_SCRUB_RATE_BITS 0x0
-#define F10_MIN_SCRUB_RATE_BITS 0x5
-#define F11_MIN_SCRUB_RATE_BITS 0x6
+static inline u8 dhar_valid(struct amd64_pvt *pvt)
+{
+ if (pvt->fam == 0x15 && pvt->model >= 0x30) {
+ u32 tmp;
+ amd64_read_pci_cfg(pvt->F1, DRAM_CONT_BASE, &tmp);
+ return (tmp >> 1) & BIT(0);
+ }
+ return (pvt)->dhar & BIT(0);
+}
-int amd64_get_dram_hole_info(struct mem_ctl_info *mci, u64 *hole_base,
- u64 *hole_offset, u64 *hole_size);
+static inline u32 dct_sel_baseaddr(struct amd64_pvt *pvt)
+{
+ if (pvt->fam == 0x15 && pvt->model >= 0x30) {
+ u32 tmp;
+ amd64_read_pci_cfg(pvt->F1, DRAM_CONT_BASE, &tmp);
+ return (tmp >> 11) & 0x1FFF;
+ }
+ return (pvt)->dct_sel_lo & 0xFFFFF800;
+}
diff --git a/drivers/edac/amd64_edac_dbg.c b/drivers/edac/amd64_edac_dbg.c
index e3562288f4c..2c1bbf74060 100644
--- a/drivers/edac/amd64_edac_dbg.c
+++ b/drivers/edac/amd64_edac_dbg.c
@@ -1,8 +1,11 @@
#include "amd64_edac.h"
#define EDAC_DCT_ATTR_SHOW(reg) \
-static ssize_t amd64_##reg##_show(struct mem_ctl_info *mci, char *data) \
+static ssize_t amd64_##reg##_show(struct device *dev, \
+ struct device_attribute *mattr, \
+ char *data) \
{ \
+ struct mem_ctl_info *mci = to_mci(dev); \
struct amd64_pvt *pvt = mci->pvt_info; \
return sprintf(data, "0x%016llx\n", (u64)pvt->reg); \
}
@@ -12,8 +15,12 @@ EDAC_DCT_ATTR_SHOW(dbam0);
EDAC_DCT_ATTR_SHOW(top_mem);
EDAC_DCT_ATTR_SHOW(top_mem2);
-static ssize_t amd64_hole_show(struct mem_ctl_info *mci, char *data)
+static ssize_t amd64_hole_show(struct device *dev,
+ struct device_attribute *mattr,
+ char *data)
{
+ struct mem_ctl_info *mci = to_mci(dev);
+
u64 hole_base = 0;
u64 hole_offset = 0;
u64 hole_size = 0;
@@ -27,46 +34,40 @@ static ssize_t amd64_hole_show(struct mem_ctl_info *mci, char *data)
/*
* update NUM_DBG_ATTRS in case you add new members
*/
-struct mcidev_sysfs_attribute amd64_dbg_attrs[] = {
+static DEVICE_ATTR(dhar, S_IRUGO, amd64_dhar_show, NULL);
+static DEVICE_ATTR(dbam, S_IRUGO, amd64_dbam0_show, NULL);
+static DEVICE_ATTR(topmem, S_IRUGO, amd64_top_mem_show, NULL);
+static DEVICE_ATTR(topmem2, S_IRUGO, amd64_top_mem2_show, NULL);
+static DEVICE_ATTR(dram_hole, S_IRUGO, amd64_hole_show, NULL);
+
+int amd64_create_sysfs_dbg_files(struct mem_ctl_info *mci)
+{
+ int rc;
+
+ rc = device_create_file(&mci->dev, &dev_attr_dhar);
+ if (rc < 0)
+ return rc;
+ rc = device_create_file(&mci->dev, &dev_attr_dbam);
+ if (rc < 0)
+ return rc;
+ rc = device_create_file(&mci->dev, &dev_attr_topmem);
+ if (rc < 0)
+ return rc;
+ rc = device_create_file(&mci->dev, &dev_attr_topmem2);
+ if (rc < 0)
+ return rc;
+ rc = device_create_file(&mci->dev, &dev_attr_dram_hole);
+ if (rc < 0)
+ return rc;
- {
- .attr = {
- .name = "dhar",
- .mode = (S_IRUGO)
- },
- .show = amd64_dhar_show,
- .store = NULL,
- },
- {
- .attr = {
- .name = "dbam",
- .mode = (S_IRUGO)
- },
- .show = amd64_dbam0_show,
- .store = NULL,
- },
- {
- .attr = {
- .name = "topmem",
- .mode = (S_IRUGO)
- },
- .show = amd64_top_mem_show,
- .store = NULL,
- },
- {
- .attr = {
- .name = "topmem2",
- .mode = (S_IRUGO)
- },
- .show = amd64_top_mem2_show,
- .store = NULL,
- },
- {
- .attr = {
- .name = "dram_hole",
- .mode = (S_IRUGO)
- },
- .show = amd64_hole_show,
- .store = NULL,
- },
-};
+ return 0;
+}
+
+void amd64_remove_sysfs_dbg_files(struct mem_ctl_info *mci)
+{
+ device_remove_file(&mci->dev, &dev_attr_dhar);
+ device_remove_file(&mci->dev, &dev_attr_dbam);
+ device_remove_file(&mci->dev, &dev_attr_topmem);
+ device_remove_file(&mci->dev, &dev_attr_topmem2);
+ device_remove_file(&mci->dev, &dev_attr_dram_hole);
+}
diff --git a/drivers/edac/amd64_edac_inj.c b/drivers/edac/amd64_edac_inj.c
index 29f1f7a612d..0d66ae68d46 100644
--- a/drivers/edac/amd64_edac_inj.c
+++ b/drivers/edac/amd64_edac_inj.c
@@ -1,7 +1,10 @@
#include "amd64_edac.h"
-static ssize_t amd64_inject_section_show(struct mem_ctl_info *mci, char *buf)
+static ssize_t amd64_inject_section_show(struct device *dev,
+ struct device_attribute *mattr,
+ char *buf)
{
+ struct mem_ctl_info *mci = to_mci(dev);
struct amd64_pvt *pvt = mci->pvt_info;
return sprintf(buf, "0x%x\n", pvt->injection.section);
}
@@ -12,31 +15,33 @@ static ssize_t amd64_inject_section_show(struct mem_ctl_info *mci, char *buf)
*
* range: 0..3
*/
-static ssize_t amd64_inject_section_store(struct mem_ctl_info *mci,
+static ssize_t amd64_inject_section_store(struct device *dev,
+ struct device_attribute *mattr,
const char *data, size_t count)
{
+ struct mem_ctl_info *mci = to_mci(dev);
struct amd64_pvt *pvt = mci->pvt_info;
unsigned long value;
- int ret = 0;
+ int ret;
- ret = strict_strtoul(data, 10, &value);
- if (ret != -EINVAL) {
+ ret = kstrtoul(data, 10, &value);
+ if (ret < 0)
+ return ret;
- if (value > 3) {
- amd64_printk(KERN_WARNING,
- "%s: invalid section 0x%lx\n",
- __func__, value);
- return -EINVAL;
- }
-
- pvt->injection.section = (u32) value;
- return count;
+ if (value > 3) {
+ amd64_warn("%s: invalid section 0x%lx\n", __func__, value);
+ return -EINVAL;
}
- return ret;
+
+ pvt->injection.section = (u32) value;
+ return count;
}
-static ssize_t amd64_inject_word_show(struct mem_ctl_info *mci, char *buf)
+static ssize_t amd64_inject_word_show(struct device *dev,
+ struct device_attribute *mattr,
+ char *buf)
{
+ struct mem_ctl_info *mci = to_mci(dev);
struct amd64_pvt *pvt = mci->pvt_info;
return sprintf(buf, "0x%x\n", pvt->injection.word);
}
@@ -47,31 +52,33 @@ static ssize_t amd64_inject_word_show(struct mem_ctl_info *mci, char *buf)
*
* range: 0..8
*/
-static ssize_t amd64_inject_word_store(struct mem_ctl_info *mci,
- const char *data, size_t count)
+static ssize_t amd64_inject_word_store(struct device *dev,
+ struct device_attribute *mattr,
+ const char *data, size_t count)
{
+ struct mem_ctl_info *mci = to_mci(dev);
struct amd64_pvt *pvt = mci->pvt_info;
unsigned long value;
- int ret = 0;
+ int ret;
- ret = strict_strtoul(data, 10, &value);
- if (ret != -EINVAL) {
+ ret = kstrtoul(data, 10, &value);
+ if (ret < 0)
+ return ret;
- if (value > 8) {
- amd64_printk(KERN_WARNING,
- "%s: invalid word 0x%lx\n",
- __func__, value);
- return -EINVAL;
- }
-
- pvt->injection.word = (u32) value;
- return count;
+ if (value > 8) {
+ amd64_warn("%s: invalid word 0x%lx\n", __func__, value);
+ return -EINVAL;
}
- return ret;
+
+ pvt->injection.word = (u32) value;
+ return count;
}
-static ssize_t amd64_inject_ecc_vector_show(struct mem_ctl_info *mci, char *buf)
+static ssize_t amd64_inject_ecc_vector_show(struct device *dev,
+ struct device_attribute *mattr,
+ char *buf)
{
+ struct mem_ctl_info *mci = to_mci(dev);
struct amd64_pvt *pvt = mci->pvt_info;
return sprintf(buf, "0x%x\n", pvt->injection.bit_map);
}
@@ -81,142 +88,154 @@ static ssize_t amd64_inject_ecc_vector_show(struct mem_ctl_info *mci, char *buf)
* corresponding bit within the error injection word above. When used during a
* DRAM ECC read, it holds the contents of the of the DRAM ECC bits.
*/
-static ssize_t amd64_inject_ecc_vector_store(struct mem_ctl_info *mci,
- const char *data, size_t count)
+static ssize_t amd64_inject_ecc_vector_store(struct device *dev,
+ struct device_attribute *mattr,
+ const char *data, size_t count)
{
+ struct mem_ctl_info *mci = to_mci(dev);
struct amd64_pvt *pvt = mci->pvt_info;
unsigned long value;
- int ret = 0;
-
- ret = strict_strtoul(data, 16, &value);
- if (ret != -EINVAL) {
+ int ret;
- if (value & 0xFFFF0000) {
- amd64_printk(KERN_WARNING,
- "%s: invalid EccVector: 0x%lx\n",
- __func__, value);
- return -EINVAL;
- }
+ ret = kstrtoul(data, 16, &value);
+ if (ret < 0)
+ return ret;
- pvt->injection.bit_map = (u32) value;
- return count;
+ if (value & 0xFFFF0000) {
+ amd64_warn("%s: invalid EccVector: 0x%lx\n", __func__, value);
+ return -EINVAL;
}
- return ret;
+
+ pvt->injection.bit_map = (u32) value;
+ return count;
}
/*
* Do a DRAM ECC read. Assemble staged values in the pvt area, format into
* fields needed by the injection registers and read the NB Array Data Port.
*/
-static ssize_t amd64_inject_read_store(struct mem_ctl_info *mci,
- const char *data, size_t count)
+static ssize_t amd64_inject_read_store(struct device *dev,
+ struct device_attribute *mattr,
+ const char *data, size_t count)
{
+ struct mem_ctl_info *mci = to_mci(dev);
struct amd64_pvt *pvt = mci->pvt_info;
unsigned long value;
u32 section, word_bits;
- int ret = 0;
+ int ret;
- ret = strict_strtoul(data, 10, &value);
- if (ret != -EINVAL) {
+ ret = kstrtoul(data, 10, &value);
+ if (ret < 0)
+ return ret;
- /* Form value to choose 16-byte section of cacheline */
- section = F10_NB_ARRAY_DRAM_ECC |
- SET_NB_ARRAY_ADDRESS(pvt->injection.section);
- pci_write_config_dword(pvt->misc_f3_ctl,
- F10_NB_ARRAY_ADDR, section);
+ /* Form value to choose 16-byte section of cacheline */
+ section = F10_NB_ARRAY_DRAM | SET_NB_ARRAY_ADDR(pvt->injection.section);
- word_bits = SET_NB_DRAM_INJECTION_READ(pvt->injection.word,
- pvt->injection.bit_map);
+ amd64_write_pci_cfg(pvt->F3, F10_NB_ARRAY_ADDR, section);
- /* Issue 'word' and 'bit' along with the READ request */
- pci_write_config_dword(pvt->misc_f3_ctl,
- F10_NB_ARRAY_DATA, word_bits);
+ word_bits = SET_NB_DRAM_INJECTION_READ(pvt->injection);
- debugf0("section=0x%x word_bits=0x%x\n", section, word_bits);
+ /* Issue 'word' and 'bit' along with the READ request */
+ amd64_write_pci_cfg(pvt->F3, F10_NB_ARRAY_DATA, word_bits);
- return count;
- }
- return ret;
+ edac_dbg(0, "section=0x%x word_bits=0x%x\n", section, word_bits);
+
+ return count;
}
/*
* Do a DRAM ECC write. Assemble staged values in the pvt area and format into
* fields needed by the injection registers.
*/
-static ssize_t amd64_inject_write_store(struct mem_ctl_info *mci,
+static ssize_t amd64_inject_write_store(struct device *dev,
+ struct device_attribute *mattr,
const char *data, size_t count)
{
+ struct mem_ctl_info *mci = to_mci(dev);
struct amd64_pvt *pvt = mci->pvt_info;
+ u32 section, word_bits, tmp;
unsigned long value;
- u32 section, word_bits;
- int ret = 0;
+ int ret;
+
+ ret = kstrtoul(data, 10, &value);
+ if (ret < 0)
+ return ret;
- ret = strict_strtoul(data, 10, &value);
- if (ret != -EINVAL) {
+ /* Form value to choose 16-byte section of cacheline */
+ section = F10_NB_ARRAY_DRAM | SET_NB_ARRAY_ADDR(pvt->injection.section);
- /* Form value to choose 16-byte section of cacheline */
- section = F10_NB_ARRAY_DRAM_ECC |
- SET_NB_ARRAY_ADDRESS(pvt->injection.section);
- pci_write_config_dword(pvt->misc_f3_ctl,
- F10_NB_ARRAY_ADDR, section);
+ amd64_write_pci_cfg(pvt->F3, F10_NB_ARRAY_ADDR, section);
- word_bits = SET_NB_DRAM_INJECTION_WRITE(pvt->injection.word,
- pvt->injection.bit_map);
+ word_bits = SET_NB_DRAM_INJECTION_WRITE(pvt->injection);
- /* Issue 'word' and 'bit' along with the READ request */
- pci_write_config_dword(pvt->misc_f3_ctl,
- F10_NB_ARRAY_DATA, word_bits);
+ pr_notice_once("Don't forget to decrease MCE polling interval in\n"
+ "/sys/bus/machinecheck/devices/machinecheck<CPUNUM>/check_interval\n"
+ "so that you can get the error report faster.\n");
- debugf0("section=0x%x word_bits=0x%x\n", section, word_bits);
+ on_each_cpu(disable_caches, NULL, 1);
- return count;
+ /* Issue 'word' and 'bit' along with the READ request */
+ amd64_write_pci_cfg(pvt->F3, F10_NB_ARRAY_DATA, word_bits);
+
+ retry:
+ /* wait until injection happens */
+ amd64_read_pci_cfg(pvt->F3, F10_NB_ARRAY_DATA, &tmp);
+ if (tmp & F10_NB_ARR_ECC_WR_REQ) {
+ cpu_relax();
+ goto retry;
}
- return ret;
+
+ on_each_cpu(enable_caches, NULL, 1);
+
+ edac_dbg(0, "section=0x%x word_bits=0x%x\n", section, word_bits);
+
+ return count;
}
/*
* update NUM_INJ_ATTRS in case you add new members
*/
-struct mcidev_sysfs_attribute amd64_inj_attrs[] = {
-
- {
- .attr = {
- .name = "inject_section",
- .mode = (S_IRUGO | S_IWUSR)
- },
- .show = amd64_inject_section_show,
- .store = amd64_inject_section_store,
- },
- {
- .attr = {
- .name = "inject_word",
- .mode = (S_IRUGO | S_IWUSR)
- },
- .show = amd64_inject_word_show,
- .store = amd64_inject_word_store,
- },
- {
- .attr = {
- .name = "inject_ecc_vector",
- .mode = (S_IRUGO | S_IWUSR)
- },
- .show = amd64_inject_ecc_vector_show,
- .store = amd64_inject_ecc_vector_store,
- },
- {
- .attr = {
- .name = "inject_write",
- .mode = (S_IRUGO | S_IWUSR)
- },
- .show = NULL,
- .store = amd64_inject_write_store,
- },
- {
- .attr = {
- .name = "inject_read",
- .mode = (S_IRUGO | S_IWUSR)
- },
- .show = NULL,
- .store = amd64_inject_read_store,
- },
-};
+
+static DEVICE_ATTR(inject_section, S_IRUGO | S_IWUSR,
+ amd64_inject_section_show, amd64_inject_section_store);
+static DEVICE_ATTR(inject_word, S_IRUGO | S_IWUSR,
+ amd64_inject_word_show, amd64_inject_word_store);
+static DEVICE_ATTR(inject_ecc_vector, S_IRUGO | S_IWUSR,
+ amd64_inject_ecc_vector_show, amd64_inject_ecc_vector_store);
+static DEVICE_ATTR(inject_write, S_IWUSR,
+ NULL, amd64_inject_write_store);
+static DEVICE_ATTR(inject_read, S_IWUSR,
+ NULL, amd64_inject_read_store);
+
+
+int amd64_create_sysfs_inject_files(struct mem_ctl_info *mci)
+{
+ int rc;
+
+ rc = device_create_file(&mci->dev, &dev_attr_inject_section);
+ if (rc < 0)
+ return rc;
+ rc = device_create_file(&mci->dev, &dev_attr_inject_word);
+ if (rc < 0)
+ return rc;
+ rc = device_create_file(&mci->dev, &dev_attr_inject_ecc_vector);
+ if (rc < 0)
+ return rc;
+ rc = device_create_file(&mci->dev, &dev_attr_inject_write);
+ if (rc < 0)
+ return rc;
+ rc = device_create_file(&mci->dev, &dev_attr_inject_read);
+ if (rc < 0)
+ return rc;
+
+ return 0;
+}
+
+void amd64_remove_sysfs_inject_files(struct mem_ctl_info *mci)
+{
+ device_remove_file(&mci->dev, &dev_attr_inject_section);
+ device_remove_file(&mci->dev, &dev_attr_inject_word);
+ device_remove_file(&mci->dev, &dev_attr_inject_ecc_vector);
+ device_remove_file(&mci->dev, &dev_attr_inject_write);
+ device_remove_file(&mci->dev, &dev_attr_inject_read);
+}
diff --git a/drivers/edac/amd76x_edac.c b/drivers/edac/amd76x_edac.c
index cace0a7b707..3a501b530e1 100644
--- a/drivers/edac/amd76x_edac.c
+++ b/drivers/edac/amd76x_edac.c
@@ -19,7 +19,7 @@
#include <linux/edac.h>
#include "edac_core.h"
-#define AMD76X_REVISION " Ver: 2.0.2 " __DATE__
+#define AMD76X_REVISION " Ver: 2.0.2"
#define EDAC_MOD_STR "amd76x_edac"
#define amd76x_printk(level, fmt, arg...) \
@@ -29,7 +29,6 @@
edac_mc_chipset_printk(mci, level, "amd76x", fmt, ##arg)
#define AMD76X_NR_CSROWS 8
-#define AMD76X_NR_CHANS 1
#define AMD76X_NR_DIMMS 4
/* AMD 76x register addresses - device 0 function 0 - PCI bridge */
@@ -106,7 +105,7 @@ static void amd76x_get_error_info(struct mem_ctl_info *mci,
{
struct pci_dev *pdev;
- pdev = to_pci_dev(mci->dev);
+ pdev = to_pci_dev(mci->pdev);
pci_read_config_dword(pdev, AMD76X_ECC_MODE_STATUS,
&info->ecc_mode_status);
@@ -146,8 +145,10 @@ static int amd76x_process_error_info(struct mem_ctl_info *mci,
if (handle_errors) {
row = (info->ecc_mode_status >> 4) & 0xf;
- edac_mc_handle_ue(mci, mci->csrows[row].first_page, 0,
- row, mci->ctl_name);
+ edac_mc_handle_error(HW_EVENT_ERR_UNCORRECTED, mci, 1,
+ mci->csrows[row]->first_page, 0, 0,
+ row, 0, -1,
+ mci->ctl_name, "");
}
}
@@ -159,8 +160,10 @@ static int amd76x_process_error_info(struct mem_ctl_info *mci,
if (handle_errors) {
row = info->ecc_mode_status & 0xf;
- edac_mc_handle_ce(mci, mci->csrows[row].first_page, 0,
- 0, row, 0, mci->ctl_name);
+ edac_mc_handle_error(HW_EVENT_ERR_CORRECTED, mci, 1,
+ mci->csrows[row]->first_page, 0, 0,
+ row, 0, -1,
+ mci->ctl_name, "");
}
}
@@ -177,7 +180,7 @@ static int amd76x_process_error_info(struct mem_ctl_info *mci,
static void amd76x_check(struct mem_ctl_info *mci)
{
struct amd76x_error_info info;
- debugf3("%s()\n", __func__);
+ edac_dbg(3, "\n");
amd76x_get_error_info(mci, &info);
amd76x_process_error_info(mci, &info, 1);
}
@@ -186,11 +189,13 @@ static void amd76x_init_csrows(struct mem_ctl_info *mci, struct pci_dev *pdev,
enum edac_type edac_mode)
{
struct csrow_info *csrow;
+ struct dimm_info *dimm;
u32 mba, mba_base, mba_mask, dms;
int index;
for (index = 0; index < mci->nr_csrows; index++) {
- csrow = &mci->csrows[index];
+ csrow = mci->csrows[index];
+ dimm = csrow->channels[0]->dimm;
/* find the DRAM Chip Select Base address and mask */
pci_read_config_dword(pdev,
@@ -203,13 +208,13 @@ static void amd76x_init_csrows(struct mem_ctl_info *mci, struct pci_dev *pdev,
mba_mask = ((mba & 0xff80) << 16) | 0x7fffffUL;
pci_read_config_dword(pdev, AMD76X_DRAM_MODE_STATUS, &dms);
csrow->first_page = mba_base >> PAGE_SHIFT;
- csrow->nr_pages = (mba_mask + 1) >> PAGE_SHIFT;
- csrow->last_page = csrow->first_page + csrow->nr_pages - 1;
+ dimm->nr_pages = (mba_mask + 1) >> PAGE_SHIFT;
+ csrow->last_page = csrow->first_page + dimm->nr_pages - 1;
csrow->page_mask = mba_mask >> PAGE_SHIFT;
- csrow->grain = csrow->nr_pages << PAGE_SHIFT;
- csrow->mtype = MEM_RDDR;
- csrow->dtype = ((dms >> index) & 0x1) ? DEV_X4 : DEV_UNKNOWN;
- csrow->edac_mode = edac_mode;
+ dimm->grain = dimm->nr_pages << PAGE_SHIFT;
+ dimm->mtype = MEM_RDDR;
+ dimm->dtype = ((dms >> index) & 0x1) ? DEV_X4 : DEV_UNKNOWN;
+ dimm->edac_mode = edac_mode;
}
}
@@ -230,22 +235,29 @@ static int amd76x_probe1(struct pci_dev *pdev, int dev_idx)
EDAC_SECDED,
EDAC_SECDED
};
- struct mem_ctl_info *mci = NULL;
+ struct mem_ctl_info *mci;
+ struct edac_mc_layer layers[2];
u32 ems;
u32 ems_mode;
struct amd76x_error_info discard;
- debugf0("%s()\n", __func__);
+ edac_dbg(0, "\n");
pci_read_config_dword(pdev, AMD76X_ECC_MODE_STATUS, &ems);
ems_mode = (ems >> 10) & 0x3;
- mci = edac_mc_alloc(0, AMD76X_NR_CSROWS, AMD76X_NR_CHANS, 0);
- if (mci == NULL) {
+ layers[0].type = EDAC_MC_LAYER_CHIP_SELECT;
+ layers[0].size = AMD76X_NR_CSROWS;
+ layers[0].is_virt_csrow = true;
+ layers[1].type = EDAC_MC_LAYER_CHANNEL;
+ layers[1].size = 1;
+ layers[1].is_virt_csrow = false;
+ mci = edac_mc_alloc(0, ARRAY_SIZE(layers), layers, 0);
+
+ if (mci == NULL)
return -ENOMEM;
- }
- debugf0("%s(): mci = %p\n", __func__, mci);
- mci->dev = &pdev->dev;
+ edac_dbg(0, "mci = %p\n", mci);
+ mci->pdev = &pdev->dev;
mci->mtype_cap = MEM_FLAG_RDDR;
mci->edac_ctl_cap = EDAC_FLAG_NONE | EDAC_FLAG_EC | EDAC_FLAG_SECDED;
mci->edac_cap = ems_mode ?
@@ -264,7 +276,7 @@ static int amd76x_probe1(struct pci_dev *pdev, int dev_idx)
* type of memory controller. The ID is therefore hardcoded to 0.
*/
if (edac_mc_add_mc(mci)) {
- debugf3("%s(): failed edac_mc_add_mc()\n", __func__);
+ edac_dbg(3, "failed edac_mc_add_mc()\n");
goto fail;
}
@@ -280,7 +292,7 @@ static int amd76x_probe1(struct pci_dev *pdev, int dev_idx)
}
/* get this far and it's successful */
- debugf3("%s(): success\n", __func__);
+ edac_dbg(3, "success\n");
return 0;
fail:
@@ -289,10 +301,10 @@ fail:
}
/* returns count (>= 0), or negative on error */
-static int __devinit amd76x_init_one(struct pci_dev *pdev,
- const struct pci_device_id *ent)
+static int amd76x_init_one(struct pci_dev *pdev,
+ const struct pci_device_id *ent)
{
- debugf0("%s()\n", __func__);
+ edac_dbg(0, "\n");
/* don't need to call pci_enable_device() */
return amd76x_probe1(pdev, ent->driver_data);
@@ -306,11 +318,11 @@ static int __devinit amd76x_init_one(struct pci_dev *pdev,
* structure for the device then delete the mci and free the
* resources.
*/
-static void __devexit amd76x_remove_one(struct pci_dev *pdev)
+static void amd76x_remove_one(struct pci_dev *pdev)
{
struct mem_ctl_info *mci;
- debugf0("%s()\n", __func__);
+ edac_dbg(0, "\n");
if (amd76x_pci)
edac_pci_release_generic_ctl(amd76x_pci);
@@ -321,7 +333,7 @@ static void __devexit amd76x_remove_one(struct pci_dev *pdev)
edac_mc_free(mci);
}
-static const struct pci_device_id amd76x_pci_tbl[] __devinitdata = {
+static const struct pci_device_id amd76x_pci_tbl[] = {
{
PCI_VEND_DEV(AMD, FE_GATE_700C), PCI_ANY_ID, PCI_ANY_ID, 0, 0,
AMD762},
@@ -338,7 +350,7 @@ MODULE_DEVICE_TABLE(pci, amd76x_pci_tbl);
static struct pci_driver amd76x_driver = {
.name = EDAC_MOD_STR,
.probe = amd76x_init_one,
- .remove = __devexit_p(amd76x_remove_one),
+ .remove = amd76x_remove_one,
.id_table = amd76x_pci_tbl,
};
diff --git a/drivers/edac/amd8111_edac.c b/drivers/edac/amd8111_edac.c
index 35b78d04bbf..2b63f7c2d6d 100644
--- a/drivers/edac/amd8111_edac.c
+++ b/drivers/edac/amd8111_edac.c
@@ -33,7 +33,7 @@
#include "edac_module.h"
#include "amd8111_edac.h"
-#define AMD8111_EDAC_REVISION " Ver: 1.0.0 " __DATE__
+#define AMD8111_EDAC_REVISION " Ver: 1.0.0"
#define AMD8111_EDAC_MOD_STR "amd8111_edac"
#define PCI_DEVICE_ID_AMD_8111_PCI 0x7460
@@ -350,6 +350,7 @@ static int amd8111_dev_probe(struct pci_dev *dev,
const struct pci_device_id *id)
{
struct amd8111_dev_info *dev_info = &amd8111_devices[id->driver_data];
+ int ret = -ENODEV;
dev_info->dev = pci_get_device(PCI_VENDOR_ID_AMD,
dev_info->err_dev, NULL);
@@ -359,16 +360,15 @@ static int amd8111_dev_probe(struct pci_dev *dev,
"vendor %x, device %x, name %s\n",
PCI_VENDOR_ID_AMD, dev_info->err_dev,
dev_info->ctl_name);
- return -ENODEV;
+ goto err;
}
if (pci_enable_device(dev_info->dev)) {
- pci_dev_put(dev_info->dev);
printk(KERN_ERR "failed to enable:"
"vendor %x, device %x, name %s\n",
PCI_VENDOR_ID_AMD, dev_info->err_dev,
dev_info->ctl_name);
- return -ENODEV;
+ goto err_dev_put;
}
/*
@@ -381,8 +381,10 @@ static int amd8111_dev_probe(struct pci_dev *dev,
edac_device_alloc_ctl_info(0, dev_info->ctl_name, 1,
NULL, 0, 0,
NULL, 0, dev_info->edac_idx);
- if (!dev_info->edac_dev)
- return -ENOMEM;
+ if (!dev_info->edac_dev) {
+ ret = -ENOMEM;
+ goto err_dev_put;
+ }
dev_info->edac_dev->pvt_info = dev_info;
dev_info->edac_dev->dev = &dev_info->dev->dev;
@@ -399,8 +401,7 @@ static int amd8111_dev_probe(struct pci_dev *dev,
if (edac_device_add_device(dev_info->edac_dev) > 0) {
printk(KERN_ERR "failed to add edac_dev for %s\n",
dev_info->ctl_name);
- edac_device_free_ctl_info(dev_info->edac_dev);
- return -ENODEV;
+ goto err_edac_free_ctl;
}
printk(KERN_INFO "added one edac_dev on AMD8111 "
@@ -409,6 +410,13 @@ static int amd8111_dev_probe(struct pci_dev *dev,
dev_info->ctl_name);
return 0;
+
+err_edac_free_ctl:
+ edac_device_free_ctl_info(dev_info->edac_dev);
+err_dev_put:
+ pci_dev_put(dev_info->dev);
+err:
+ return ret;
}
static void amd8111_dev_remove(struct pci_dev *dev)
@@ -437,6 +445,7 @@ static int amd8111_pci_probe(struct pci_dev *dev,
const struct pci_device_id *id)
{
struct amd8111_pci_info *pci_info = &amd8111_pcis[id->driver_data];
+ int ret = -ENODEV;
pci_info->dev = pci_get_device(PCI_VENDOR_ID_AMD,
pci_info->err_dev, NULL);
@@ -446,16 +455,15 @@ static int amd8111_pci_probe(struct pci_dev *dev,
"vendor %x, device %x, name %s\n",
PCI_VENDOR_ID_AMD, pci_info->err_dev,
pci_info->ctl_name);
- return -ENODEV;
+ goto err;
}
if (pci_enable_device(pci_info->dev)) {
- pci_dev_put(pci_info->dev);
printk(KERN_ERR "failed to enable:"
"vendor %x, device %x, name %s\n",
PCI_VENDOR_ID_AMD, pci_info->err_dev,
pci_info->ctl_name);
- return -ENODEV;
+ goto err_dev_put;
}
/*
@@ -465,8 +473,10 @@ static int amd8111_pci_probe(struct pci_dev *dev,
*/
pci_info->edac_idx = edac_pci_alloc_index();
pci_info->edac_dev = edac_pci_alloc_ctl_info(0, pci_info->ctl_name);
- if (!pci_info->edac_dev)
- return -ENOMEM;
+ if (!pci_info->edac_dev) {
+ ret = -ENOMEM;
+ goto err_dev_put;
+ }
pci_info->edac_dev->pvt_info = pci_info;
pci_info->edac_dev->dev = &pci_info->dev->dev;
@@ -483,8 +493,7 @@ static int amd8111_pci_probe(struct pci_dev *dev,
if (edac_pci_add_device(pci_info->edac_dev, pci_info->edac_idx) > 0) {
printk(KERN_ERR "failed to add edac_pci for %s\n",
pci_info->ctl_name);
- edac_pci_free_ctl_info(pci_info->edac_dev);
- return -ENODEV;
+ goto err_edac_free_ctl;
}
printk(KERN_INFO "added one edac_pci on AMD8111 "
@@ -493,6 +502,13 @@ static int amd8111_pci_probe(struct pci_dev *dev,
pci_info->ctl_name);
return 0;
+
+err_edac_free_ctl:
+ edac_pci_free_ctl_info(pci_info->edac_dev);
+err_dev_put:
+ pci_dev_put(pci_info->dev);
+err:
+ return ret;
}
static void amd8111_pci_remove(struct pci_dev *dev)
diff --git a/drivers/edac/amd8131_edac.c b/drivers/edac/amd8131_edac.c
index b432d60c622..a5c680561c7 100644
--- a/drivers/edac/amd8131_edac.c
+++ b/drivers/edac/amd8131_edac.c
@@ -33,7 +33,7 @@
#include "edac_module.h"
#include "amd8131_edac.h"
-#define AMD8131_EDAC_REVISION " Ver: 1.0.0 " __DATE__
+#define AMD8131_EDAC_REVISION " Ver: 1.0.0"
#define AMD8131_EDAC_MOD_STR "amd8131_edac"
/* Wrapper functions for accessing PCI configuration space */
diff --git a/drivers/edac/amd8131_edac.h b/drivers/edac/amd8131_edac.h
index 60e0d1c72de..6f8b07131ec 100644
--- a/drivers/edac/amd8131_edac.h
+++ b/drivers/edac/amd8131_edac.h
@@ -99,7 +99,7 @@ struct amd8131_dev_info {
/*
* AMD8131 chipset has two pairs of PCIX Bridge and related IOAPIC
- * Controler, and ATCA-6101 has two AMD8131 chipsets, so there are
+ * Controller, and ATCA-6101 has two AMD8131 chipsets, so there are
* four PCIX Bridges on ATCA-6101 altogether.
*
* These PCIX Bridges share the same PCI Device ID and are all of
diff --git a/drivers/edac/cell_edac.c b/drivers/edac/cell_edac.c
index c973004c002..374b57fc596 100644
--- a/drivers/edac/cell_edac.c
+++ b/drivers/edac/cell_edac.c
@@ -15,6 +15,7 @@
#include <linux/platform_device.h>
#include <linux/stop_machine.h>
#include <linux/io.h>
+#include <linux/of_address.h>
#include <asm/machdep.h>
#include <asm/cell-regs.h>
@@ -33,10 +34,10 @@ struct cell_edac_priv
static void cell_edac_count_ce(struct mem_ctl_info *mci, int chan, u64 ar)
{
struct cell_edac_priv *priv = mci->pvt_info;
- struct csrow_info *csrow = &mci->csrows[0];
+ struct csrow_info *csrow = mci->csrows[0];
unsigned long address, pfn, offset, syndrome;
- dev_dbg(mci->dev, "ECC CE err on node %d, channel %d, ar = 0x%016llx\n",
+ dev_dbg(mci->pdev, "ECC CE err on node %d, channel %d, ar = 0x%016llx\n",
priv->node, chan, ar);
/* Address decoding is likely a bit bogus, to dbl check */
@@ -47,18 +48,19 @@ static void cell_edac_count_ce(struct mem_ctl_info *mci, int chan, u64 ar)
offset = address & ~PAGE_MASK;
syndrome = (ar & 0x000000001fe00000ul) >> 21;
- /* TODO: Decoding of the error addresss */
- edac_mc_handle_ce(mci, csrow->first_page + pfn, offset,
- syndrome, 0, chan, "");
+ /* TODO: Decoding of the error address */
+ edac_mc_handle_error(HW_EVENT_ERR_CORRECTED, mci, 1,
+ csrow->first_page + pfn, offset, syndrome,
+ 0, chan, -1, "", "");
}
static void cell_edac_count_ue(struct mem_ctl_info *mci, int chan, u64 ar)
{
struct cell_edac_priv *priv = mci->pvt_info;
- struct csrow_info *csrow = &mci->csrows[0];
+ struct csrow_info *csrow = mci->csrows[0];
unsigned long address, pfn, offset;
- dev_dbg(mci->dev, "ECC UE err on node %d, channel %d, ar = 0x%016llx\n",
+ dev_dbg(mci->pdev, "ECC UE err on node %d, channel %d, ar = 0x%016llx\n",
priv->node, chan, ar);
/* Address decoding is likely a bit bogus, to dbl check */
@@ -68,8 +70,10 @@ static void cell_edac_count_ue(struct mem_ctl_info *mci, int chan, u64 ar)
pfn = address >> PAGE_SHIFT;
offset = address & ~PAGE_MASK;
- /* TODO: Decoding of the error addresss */
- edac_mc_handle_ue(mci, csrow->first_page + pfn, offset, 0, "");
+ /* TODO: Decoding of the error address */
+ edac_mc_handle_error(HW_EVENT_ERR_UNCORRECTED, mci, 1,
+ csrow->first_page + pfn, offset, 0,
+ 0, chan, -1, "", "");
}
static void cell_edac_check(struct mem_ctl_info *mci)
@@ -80,7 +84,7 @@ static void cell_edac_check(struct mem_ctl_info *mci)
fir = in_be64(&priv->regs->mic_fir);
#ifdef DEBUG
if (fir != priv->prev_fir) {
- dev_dbg(mci->dev, "fir change : 0x%016lx\n", fir);
+ dev_dbg(mci->pdev, "fir change : 0x%016lx\n", fir);
priv->prev_fir = fir;
}
#endif
@@ -116,16 +120,19 @@ static void cell_edac_check(struct mem_ctl_info *mci)
mb(); /* sync up */
#ifdef DEBUG
fir = in_be64(&priv->regs->mic_fir);
- dev_dbg(mci->dev, "fir clear : 0x%016lx\n", fir);
+ dev_dbg(mci->pdev, "fir clear : 0x%016lx\n", fir);
#endif
}
}
-static void __devinit cell_edac_init_csrows(struct mem_ctl_info *mci)
+static void cell_edac_init_csrows(struct mem_ctl_info *mci)
{
- struct csrow_info *csrow = &mci->csrows[0];
+ struct csrow_info *csrow = mci->csrows[0];
+ struct dimm_info *dimm;
struct cell_edac_priv *priv = mci->pvt_info;
struct device_node *np;
+ int j;
+ u32 nr_pages;
for (np = NULL;
(np = of_find_node_by_name(np, "memory")) != NULL;) {
@@ -140,26 +147,33 @@ static void __devinit cell_edac_init_csrows(struct mem_ctl_info *mci)
if (of_node_to_nid(np) != priv->node)
continue;
csrow->first_page = r.start >> PAGE_SHIFT;
- csrow->nr_pages = (r.end - r.start + 1) >> PAGE_SHIFT;
- csrow->last_page = csrow->first_page + csrow->nr_pages - 1;
- csrow->mtype = MEM_XDR;
- csrow->edac_mode = EDAC_SECDED;
- dev_dbg(mci->dev,
+ nr_pages = resource_size(&r) >> PAGE_SHIFT;
+ csrow->last_page = csrow->first_page + nr_pages - 1;
+
+ for (j = 0; j < csrow->nr_channels; j++) {
+ dimm = csrow->channels[j]->dimm;
+ dimm->mtype = MEM_XDR;
+ dimm->edac_mode = EDAC_SECDED;
+ dimm->nr_pages = nr_pages / csrow->nr_channels;
+ }
+ dev_dbg(mci->pdev,
"Initialized on node %d, chanmask=0x%x,"
" first_page=0x%lx, nr_pages=0x%x\n",
priv->node, priv->chanmask,
- csrow->first_page, csrow->nr_pages);
+ csrow->first_page, nr_pages);
break;
}
+ of_node_put(np);
}
-static int __devinit cell_edac_probe(struct platform_device *pdev)
+static int cell_edac_probe(struct platform_device *pdev)
{
struct cbe_mic_tm_regs __iomem *regs;
struct mem_ctl_info *mci;
+ struct edac_mc_layer layers[2];
struct cell_edac_priv *priv;
u64 reg;
- int rc, chanmask;
+ int rc, chanmask, num_chans;
regs = cbe_get_cpu_mic_tm_regs(cbe_node_to_cpu(pdev->id));
if (regs == NULL)
@@ -184,15 +198,23 @@ static int __devinit cell_edac_probe(struct platform_device *pdev)
in_be64(&regs->mic_fir));
/* Allocate & init EDAC MC data structure */
- mci = edac_mc_alloc(sizeof(struct cell_edac_priv), 1,
- chanmask == 3 ? 2 : 1, pdev->id);
+ num_chans = chanmask == 3 ? 2 : 1;
+
+ layers[0].type = EDAC_MC_LAYER_CHIP_SELECT;
+ layers[0].size = 1;
+ layers[0].is_virt_csrow = true;
+ layers[1].type = EDAC_MC_LAYER_CHANNEL;
+ layers[1].size = num_chans;
+ layers[1].is_virt_csrow = false;
+ mci = edac_mc_alloc(pdev->id, ARRAY_SIZE(layers), layers,
+ sizeof(struct cell_edac_priv));
if (mci == NULL)
return -ENOMEM;
priv = mci->pvt_info;
priv->regs = regs;
priv->node = pdev->id;
priv->chanmask = chanmask;
- mci->dev = &pdev->dev;
+ mci->pdev = &pdev->dev;
mci->mtype_cap = MEM_FLAG_XDR;
mci->edac_ctl_cap = EDAC_FLAG_NONE | EDAC_FLAG_EC | EDAC_FLAG_SECDED;
mci->edac_cap = EDAC_FLAG_EC | EDAC_FLAG_SECDED;
@@ -213,7 +235,7 @@ static int __devinit cell_edac_probe(struct platform_device *pdev)
return 0;
}
-static int __devexit cell_edac_remove(struct platform_device *pdev)
+static int cell_edac_remove(struct platform_device *pdev)
{
struct mem_ctl_info *mci = edac_mc_del_mc(&pdev->dev);
if (mci)
@@ -227,7 +249,7 @@ static struct platform_driver cell_edac_driver = {
.owner = THIS_MODULE,
},
.probe = cell_edac_probe,
- .remove = __devexit_p(cell_edac_remove),
+ .remove = cell_edac_remove,
};
static int __init cell_edac_init(void)
diff --git a/drivers/edac/cpc925_edac.c b/drivers/edac/cpc925_edac.c
index 1609a19df49..df6575f1430 100644
--- a/drivers/edac/cpc925_edac.c
+++ b/drivers/edac/cpc925_edac.c
@@ -30,7 +30,7 @@
#include "edac_core.h"
#include "edac_module.h"
-#define CPC925_EDAC_REVISION " Ver: 1.0.0 " __DATE__
+#define CPC925_EDAC_REVISION " Ver: 1.0.0"
#define CPC925_EDAC_MOD_STR "cpc925_edac"
#define cpc925_printk(level, fmt, arg...) \
@@ -90,6 +90,7 @@ enum apimask_bits {
ECC_MASK_ENABLE = (APIMASK_ECC_UE_H | APIMASK_ECC_CE_H |
APIMASK_ECC_UE_L | APIMASK_ECC_CE_L),
};
+#define APIMASK_ADI(n) CPC925_BIT(((n)+1))
/************************************************************
* Processor Interface Exception Register (APIEXCP)
@@ -315,22 +316,23 @@ static void get_total_mem(struct cpc925_mc_pdata *pdata)
reg += aw;
size = of_read_number(reg, sw);
reg += sw;
- debugf1("%s: start 0x%lx, size 0x%lx\n", __func__,
- start, size);
+ edac_dbg(1, "start 0x%lx, size 0x%lx\n", start, size);
pdata->total_mem += size;
} while (reg < reg_end);
of_node_put(np);
- debugf0("%s: total_mem 0x%lx\n", __func__, pdata->total_mem);
+ edac_dbg(0, "total_mem 0x%lx\n", pdata->total_mem);
}
static void cpc925_init_csrows(struct mem_ctl_info *mci)
{
struct cpc925_mc_pdata *pdata = mci->pvt_info;
struct csrow_info *csrow;
- int index;
- u32 mbmr, mbbar, bba;
- unsigned long row_size, last_nr_pages = 0;
+ struct dimm_info *dimm;
+ enum dev_type dtype;
+ int index, j;
+ u32 mbmr, mbbar, bba, grain;
+ unsigned long row_size, nr_pages, last_nr_pages = 0;
get_total_mem(pdata);
@@ -345,40 +347,44 @@ static void cpc925_init_csrows(struct mem_ctl_info *mci)
if (bba == 0)
continue; /* not populated */
- csrow = &mci->csrows[index];
+ csrow = mci->csrows[index];
row_size = bba * (1UL << 28); /* 256M */
csrow->first_page = last_nr_pages;
- csrow->nr_pages = row_size >> PAGE_SHIFT;
- csrow->last_page = csrow->first_page + csrow->nr_pages - 1;
+ nr_pages = row_size >> PAGE_SHIFT;
+ csrow->last_page = csrow->first_page + nr_pages - 1;
last_nr_pages = csrow->last_page + 1;
- csrow->mtype = MEM_RDDR;
- csrow->edac_mode = EDAC_SECDED;
-
switch (csrow->nr_channels) {
case 1: /* Single channel */
- csrow->grain = 32; /* four-beat burst of 32 bytes */
+ grain = 32; /* four-beat burst of 32 bytes */
break;
case 2: /* Dual channel */
default:
- csrow->grain = 64; /* four-beat burst of 64 bytes */
+ grain = 64; /* four-beat burst of 64 bytes */
break;
}
-
switch ((mbmr & MBMR_MODE_MASK) >> MBMR_MODE_SHIFT) {
case 6: /* 0110, no way to differentiate X8 VS X16 */
case 5: /* 0101 */
case 8: /* 1000 */
- csrow->dtype = DEV_X16;
+ dtype = DEV_X16;
break;
case 7: /* 0111 */
case 9: /* 1001 */
- csrow->dtype = DEV_X8;
+ dtype = DEV_X8;
break;
default:
- csrow->dtype = DEV_UNKNOWN;
- break;
+ dtype = DEV_UNKNOWN;
+ break;
+ }
+ for (j = 0; j < csrow->nr_channels; j++) {
+ dimm = csrow->channels[j]->dimm;
+ dimm->nr_pages = nr_pages / csrow->nr_channels;
+ dimm->mtype = MEM_RDDR;
+ dimm->edac_mode = EDAC_SECDED;
+ dimm->grain = grain;
+ dimm->dtype = dtype;
}
}
}
@@ -456,7 +462,7 @@ static void cpc925_mc_get_pfn(struct mem_ctl_info *mci, u32 mear,
*csrow = rank;
#ifdef CONFIG_EDAC_DEBUG
- if (mci->csrows[rank].first_page == 0) {
+ if (mci->csrows[rank]->first_page == 0) {
cpc925_mc_printk(mci, KERN_ERR, "ECC occurs in a "
"non-populated csrow, broken hardware?\n");
return;
@@ -464,7 +470,7 @@ static void cpc925_mc_get_pfn(struct mem_ctl_info *mci, u32 mear,
#endif
/* Revert csrow number */
- pa = mci->csrows[rank].first_page << PAGE_SHIFT;
+ pa = mci->csrows[rank]->first_page << PAGE_SHIFT;
/* Revert column address */
col += bcnt;
@@ -505,7 +511,7 @@ static void cpc925_mc_get_pfn(struct mem_ctl_info *mci, u32 mear,
*offset = pa & (PAGE_SIZE - 1);
*pfn = pa >> PAGE_SHIFT;
- debugf0("%s: ECC physical address 0x%lx\n", __func__, pa);
+ edac_dbg(0, "ECC physical address 0x%lx\n", pa);
}
static int cpc925_mc_find_channel(struct mem_ctl_info *mci, u16 syndrome)
@@ -548,13 +554,18 @@ static void cpc925_mc_check(struct mem_ctl_info *mci)
if (apiexcp & CECC_EXCP_DETECTED) {
cpc925_mc_printk(mci, KERN_INFO, "DRAM CECC Fault\n");
channel = cpc925_mc_find_channel(mci, syndrome);
- edac_mc_handle_ce(mci, pfn, offset, syndrome,
- csrow, channel, mci->ctl_name);
+ edac_mc_handle_error(HW_EVENT_ERR_CORRECTED, mci, 1,
+ pfn, offset, syndrome,
+ csrow, channel, -1,
+ mci->ctl_name, "");
}
if (apiexcp & UECC_EXCP_DETECTED) {
cpc925_mc_printk(mci, KERN_INFO, "DRAM UECC Fault\n");
- edac_mc_handle_ue(mci, pfn, offset, csrow, mci->ctl_name);
+ edac_mc_handle_error(HW_EVENT_ERR_CORRECTED, mci, 1,
+ pfn, offset, 0,
+ csrow, -1, -1,
+ mci->ctl_name, "");
}
cpc925_mc_printk(mci, KERN_INFO, "Dump registers:\n");
@@ -581,16 +592,73 @@ static void cpc925_mc_check(struct mem_ctl_info *mci)
}
/******************** CPU err device********************************/
+static u32 cpc925_cpu_mask_disabled(void)
+{
+ struct device_node *cpus;
+ struct device_node *cpunode = NULL;
+ static u32 mask = 0;
+
+ /* use cached value if available */
+ if (mask != 0)
+ return mask;
+
+ mask = APIMASK_ADI0 | APIMASK_ADI1;
+
+ cpus = of_find_node_by_path("/cpus");
+ if (cpus == NULL) {
+ cpc925_printk(KERN_DEBUG, "No /cpus node !\n");
+ return 0;
+ }
+
+ while ((cpunode = of_get_next_child(cpus, cpunode)) != NULL) {
+ const u32 *reg = of_get_property(cpunode, "reg", NULL);
+
+ if (strcmp(cpunode->type, "cpu")) {
+ cpc925_printk(KERN_ERR, "Not a cpu node in /cpus: %s\n", cpunode->name);
+ continue;
+ }
+
+ if (reg == NULL || *reg > 2) {
+ cpc925_printk(KERN_ERR, "Bad reg value at %s\n", cpunode->full_name);
+ continue;
+ }
+
+ mask &= ~APIMASK_ADI(*reg);
+ }
+
+ if (mask != (APIMASK_ADI0 | APIMASK_ADI1)) {
+ /* We assume that each CPU sits on it's own PI and that
+ * for present CPUs the reg property equals to the PI
+ * interface id */
+ cpc925_printk(KERN_WARNING,
+ "Assuming PI id is equal to CPU MPIC id!\n");
+ }
+
+ of_node_put(cpunode);
+ of_node_put(cpus);
+
+ return mask;
+}
+
/* Enable CPU Errors detection */
static void cpc925_cpu_init(struct cpc925_dev_info *dev_info)
{
u32 apimask;
+ u32 cpumask;
apimask = __raw_readl(dev_info->vbase + REG_APIMASK_OFFSET);
- if ((apimask & CPU_MASK_ENABLE) == 0) {
- apimask |= CPU_MASK_ENABLE;
- __raw_writel(apimask, dev_info->vbase + REG_APIMASK_OFFSET);
+
+ cpumask = cpc925_cpu_mask_disabled();
+ if (apimask & cpumask) {
+ cpc925_printk(KERN_WARNING, "CPU(s) not present, "
+ "but enabled in APIMASK, disabling\n");
+ apimask &= ~cpumask;
}
+
+ if ((apimask & CPU_MASK_ENABLE) == 0)
+ apimask |= CPU_MASK_ENABLE;
+
+ __raw_writel(apimask, dev_info->vbase + REG_APIMASK_OFFSET);
}
/* Disable CPU Errors detection */
@@ -622,6 +690,9 @@ static void cpc925_cpu_check(struct edac_device_ctl_info *edac_dev)
if ((apiexcp & CPU_EXCP_DETECTED) == 0)
return;
+ if ((apiexcp & ~cpc925_cpu_mask_disabled()) == 0)
+ return;
+
apimask = __raw_readl(dev_info->vbase + REG_APIMASK_OFFSET);
cpc925_printk(KERN_INFO, "Processor Interface Fault\n"
"Processor Interface register dump:\n");
@@ -718,7 +789,7 @@ static struct cpc925_dev_info cpc925_devs[] = {
.exit = cpc925_htlink_exit,
.check = cpc925_htlink_check,
},
- {0}, /* Terminated by NULL */
+ { }
};
/*
@@ -780,8 +851,8 @@ static void cpc925_add_edac_devices(void __iomem *vbase)
goto err2;
}
- debugf0("%s: Successfully added edac device for %s\n",
- __func__, dev_info->ctl_name);
+ edac_dbg(0, "Successfully added edac device for %s\n",
+ dev_info->ctl_name);
continue;
@@ -812,31 +883,32 @@ static void cpc925_del_edac_devices(void)
if (dev_info->exit)
dev_info->exit(dev_info);
- debugf0("%s: Successfully deleted edac device for %s\n",
- __func__, dev_info->ctl_name);
+ edac_dbg(0, "Successfully deleted edac device for %s\n",
+ dev_info->ctl_name);
}
}
-/* Convert current back-ground scrub rate into byte/sec bandwith */
-static int cpc925_get_sdram_scrub_rate(struct mem_ctl_info *mci, u32 *bw)
+/* Convert current back-ground scrub rate into byte/sec bandwidth */
+static int cpc925_get_sdram_scrub_rate(struct mem_ctl_info *mci)
{
struct cpc925_mc_pdata *pdata = mci->pvt_info;
+ int bw;
u32 mscr;
u8 si;
mscr = __raw_readl(pdata->vbase + REG_MSCR_OFFSET);
si = (mscr & MSCR_SI_MASK) >> MSCR_SI_SHIFT;
- debugf0("%s, Mem Scrub Ctrl Register 0x%x\n", __func__, mscr);
+ edac_dbg(0, "Mem Scrub Ctrl Register 0x%x\n", mscr);
if (((mscr & MSCR_SCRUB_MOD_MASK) != MSCR_BACKGR_SCRUB) ||
(si == 0)) {
cpc925_mc_printk(mci, KERN_INFO, "Scrub mode not enabled\n");
- *bw = 0;
+ bw = 0;
} else
- *bw = CPC925_SCRUB_BLOCK_SIZE * 0xFA67 / si;
+ bw = CPC925_SCRUB_BLOCK_SIZE * 0xFA67 / si;
- return 0;
+ return bw;
}
/* Return 0 for single channel; 1 for dual channel */
@@ -855,22 +927,22 @@ static int cpc925_mc_get_channels(void __iomem *vbase)
((mbcr & MBCR_64BITBUS_MASK) == 0))
dual = 1;
- debugf0("%s: %s channel\n", __func__,
- (dual > 0) ? "Dual" : "Single");
+ edac_dbg(0, "%s channel\n", (dual > 0) ? "Dual" : "Single");
return dual;
}
-static int __devinit cpc925_probe(struct platform_device *pdev)
+static int cpc925_probe(struct platform_device *pdev)
{
static int edac_mc_idx;
struct mem_ctl_info *mci;
+ struct edac_mc_layer layers[2];
void __iomem *vbase;
struct cpc925_mc_pdata *pdata;
struct resource *r;
int res = 0, nr_channels;
- debugf0("%s: %s platform device found!\n", __func__, pdev->name);
+ edac_dbg(0, "%s platform device found!\n", pdev->name);
if (!devres_open_group(&pdev->dev, cpc925_probe, GFP_KERNEL)) {
res = -ENOMEM;
@@ -900,9 +972,16 @@ static int __devinit cpc925_probe(struct platform_device *pdev)
goto err2;
}
- nr_channels = cpc925_mc_get_channels(vbase);
- mci = edac_mc_alloc(sizeof(struct cpc925_mc_pdata),
- CPC925_NR_CSROWS, nr_channels + 1, edac_mc_idx);
+ nr_channels = cpc925_mc_get_channels(vbase) + 1;
+
+ layers[0].type = EDAC_MC_LAYER_CHIP_SELECT;
+ layers[0].size = CPC925_NR_CSROWS;
+ layers[0].is_virt_csrow = true;
+ layers[1].type = EDAC_MC_LAYER_CHANNEL;
+ layers[1].size = nr_channels;
+ layers[1].is_virt_csrow = false;
+ mci = edac_mc_alloc(edac_mc_idx, ARRAY_SIZE(layers), layers,
+ sizeof(struct cpc925_mc_pdata));
if (!mci) {
cpc925_printk(KERN_ERR, "No memory for mem_ctl_info\n");
res = -ENOMEM;
@@ -914,7 +993,7 @@ static int __devinit cpc925_probe(struct platform_device *pdev)
pdata->edac_idx = edac_mc_idx++;
pdata->name = pdev->name;
- mci->dev = &pdev->dev;
+ mci->pdev = &pdev->dev;
platform_set_drvdata(pdev, mci);
mci->dev_name = dev_name(&pdev->dev);
mci->mtype_cap = MEM_FLAG_RDDR | MEM_FLAG_DDR;
@@ -945,7 +1024,7 @@ static int __devinit cpc925_probe(struct platform_device *pdev)
cpc925_add_edac_devices(vbase);
/* get this far and it's successful */
- debugf0("%s: success\n", __func__);
+ edac_dbg(0, "success\n");
res = 0;
goto out;
diff --git a/drivers/edac/e752x_edac.c b/drivers/edac/e752x_edac.c
index 073f5a06d23..b2d71388172 100644
--- a/drivers/edac/e752x_edac.c
+++ b/drivers/edac/e752x_edac.c
@@ -4,7 +4,11 @@
* This file may be distributed under the terms of the
* GNU General Public License.
*
- * See "enum e752x_chips" below for supported chipsets
+ * Implement support for the e7520, E7525, e7320 and i3100 memory controllers.
+ *
+ * Datasheets:
+ * http://www.intel.in/content/www/in/en/chipsets/e7525-memory-controller-hub-datasheet.html
+ * ftp://download.intel.com/design/intarch/datashts/31345803.pdf
*
* Written by Tom Zimmerman
*
@@ -13,8 +17,6 @@
* Wang Zhenyu at intel.com
* Dave Jiang at mvista.com
*
- * $Id: edac_e752x.c,v 1.5.2.11 2005/10/05 00:43:44 dsp_llnl Exp $
- *
*/
#include <linux/module.h>
@@ -24,7 +26,7 @@
#include <linux/edac.h>
#include "edac_core.h"
-#define E752X_REVISION " Ver: 2.0.2 " __DATE__
+#define E752X_REVISION " Ver: 2.0.2"
#define EDAC_MOD_STR "e752x_edac"
static int report_non_memory_errors;
@@ -187,8 +189,26 @@ enum e752x_chips {
I3100 = 3
};
+/*
+ * Those chips Support single-rank and dual-rank memories only.
+ *
+ * On e752x chips, the odd rows are present only on dual-rank memories.
+ * Dividing the rank by two will provide the dimm#
+ *
+ * i3100 MC has a different mapping: it supports only 4 ranks.
+ *
+ * The mapping is (from 1 to n):
+ * slot single-ranked double-ranked
+ * dimm #1 -> rank #4 NA
+ * dimm #2 -> rank #3 NA
+ * dimm #3 -> rank #2 Ranks 2 and 3
+ * dimm #4 -> rank $1 Ranks 1 and 4
+ *
+ * FIXME: The current mapping for i3100 considers that it supports up to 8
+ * ranks/chanel, but datasheet says that the MC supports only 4 ranks.
+ */
+
struct e752x_pvt {
- struct pci_dev *bridge_ck;
struct pci_dev *dev_d0f0;
struct pci_dev *dev_d0f1;
u32 tolm;
@@ -288,7 +308,7 @@ static unsigned long ctl_page_to_phys(struct mem_ctl_info *mci,
u32 remap;
struct e752x_pvt *pvt = (struct e752x_pvt *)mci->pvt_info;
- debugf3("%s()\n", __func__);
+ edac_dbg(3, "\n");
if (page < pvt->tolm)
return page;
@@ -314,7 +334,7 @@ static void do_process_ce(struct mem_ctl_info *mci, u16 error_one,
int i;
struct e752x_pvt *pvt = (struct e752x_pvt *)mci->pvt_info;
- debugf3("%s()\n", __func__);
+ edac_dbg(3, "\n");
/* convert the addr to 4k page */
page = sec1_add >> (PAGE_SHIFT - 4);
@@ -350,8 +370,10 @@ static void do_process_ce(struct mem_ctl_info *mci, u16 error_one,
channel = !(error_one & 1);
/* e752x mc reads 34:6 of the DRAM linear address */
- edac_mc_handle_ce(mci, page, offset_in_page(sec1_add << 4),
- sec1_syndrome, row, channel, "e752x CE");
+ edac_mc_handle_error(HW_EVENT_ERR_CORRECTED, mci, 1,
+ page, offset_in_page(sec1_add << 4), sec1_syndrome,
+ row, channel, -1,
+ "e752x CE", "");
}
static inline void process_ce(struct mem_ctl_info *mci, u16 error_one,
@@ -371,7 +393,7 @@ static void do_process_ue(struct mem_ctl_info *mci, u16 error_one,
int row;
struct e752x_pvt *pvt = (struct e752x_pvt *)mci->pvt_info;
- debugf3("%s()\n", __func__);
+ edac_dbg(3, "\n");
if (error_one & 0x0202) {
error_2b = ded_add;
@@ -385,9 +407,12 @@ static void do_process_ue(struct mem_ctl_info *mci, u16 error_one,
edac_mc_find_csrow_by_page(mci, block_page);
/* e752x mc reads 34:6 of the DRAM linear address */
- edac_mc_handle_ue(mci, block_page,
- offset_in_page(error_2b << 4),
- row, "e752x UE from Read");
+ edac_mc_handle_error(HW_EVENT_ERR_UNCORRECTED, mci, 1,
+ block_page,
+ offset_in_page(error_2b << 4), 0,
+ row, -1, -1,
+ "e752x UE from Read", "");
+
}
if (error_one & 0x0404) {
error_2b = scrb_add;
@@ -401,9 +426,11 @@ static void do_process_ue(struct mem_ctl_info *mci, u16 error_one,
edac_mc_find_csrow_by_page(mci, block_page);
/* e752x mc reads 34:6 of the DRAM linear address */
- edac_mc_handle_ue(mci, block_page,
- offset_in_page(error_2b << 4),
- row, "e752x UE from Scruber");
+ edac_mc_handle_error(HW_EVENT_ERR_UNCORRECTED, mci, 1,
+ block_page,
+ offset_in_page(error_2b << 4), 0,
+ row, -1, -1,
+ "e752x UE from Scruber", "");
}
}
@@ -425,8 +452,10 @@ static inline void process_ue_no_info_wr(struct mem_ctl_info *mci,
if (!handle_error)
return;
- debugf3("%s()\n", __func__);
- edac_mc_handle_ue_no_info(mci, "e752x UE log memory write");
+ edac_dbg(3, "\n");
+ edac_mc_handle_error(HW_EVENT_ERR_UNCORRECTED, mci, 1, 0, 0, 0,
+ -1, -1, -1,
+ "e752x UE log memory write", "");
}
static void do_process_ded_retry(struct mem_ctl_info *mci, u16 error,
@@ -861,7 +890,7 @@ static void e752x_get_error_info(struct mem_ctl_info *mci,
info->buf_ferr);
if (info->dram_ferr)
- pci_write_bits16(pvt->bridge_ck, E752X_DRAM_FERR,
+ pci_write_bits16(pvt->dev_d0f1, E752X_DRAM_FERR,
info->dram_ferr, info->dram_ferr);
pci_write_config_dword(dev, E752X_FERR_GLOBAL,
@@ -906,7 +935,7 @@ static void e752x_get_error_info(struct mem_ctl_info *mci,
info->buf_nerr);
if (info->dram_nerr)
- pci_write_bits16(pvt->bridge_ck, E752X_DRAM_NERR,
+ pci_write_bits16(pvt->dev_d0f1, E752X_DRAM_NERR,
info->dram_nerr, info->dram_nerr);
pci_write_config_dword(dev, E752X_NERR_GLOBAL,
@@ -952,7 +981,7 @@ static void e752x_check(struct mem_ctl_info *mci)
{
struct e752x_error_info info;
- debugf3("%s()\n", __func__);
+ edac_dbg(3, "\n");
e752x_get_error_info(mci, &info);
e752x_process_error_info(mci, &info, 1);
}
@@ -983,11 +1012,11 @@ static int set_sdram_scrub_rate(struct mem_ctl_info *mci, u32 new_bw)
pci_write_config_word(pdev, E752X_MCHSCRB, scrubrates[i].scrubval);
- return 0;
+ return scrubrates[i].bandwidth;
}
/* Convert current scrub rate value into byte/sec bandwidth */
-static int get_sdram_scrub_rate(struct mem_ctl_info *mci, u32 *bw)
+static int get_sdram_scrub_rate(struct mem_ctl_info *mci)
{
const struct scrubrate *scrubrates;
struct e752x_pvt *pvt = (struct e752x_pvt *) mci->pvt_info;
@@ -1013,10 +1042,8 @@ static int get_sdram_scrub_rate(struct mem_ctl_info *mci, u32 *bw)
"Invalid sdram scrub control value: 0x%x\n", scrubval);
return -1;
}
+ return scrubrates[i].bandwidth;
- *bw = scrubrates[i].bandwidth;
-
- return 0;
}
/* Return 1 if dual channel mode is active. Else return 0. */
@@ -1041,12 +1068,13 @@ static void e752x_init_csrows(struct mem_ctl_info *mci, struct pci_dev *pdev,
u16 ddrcsr)
{
struct csrow_info *csrow;
+ enum edac_type edac_mode;
unsigned long last_cumul_size;
int index, mem_dev, drc_chan;
int drc_drbg; /* DRB granularity 0=64mb, 1=128mb */
int drc_ddim; /* DRAM Data Integrity Mode 0=none, 2=edac */
u8 value;
- u32 dra, drc, cumul_size;
+ u32 dra, drc, cumul_size, i, nr_pages;
dra = 0;
for (index = 0; index < 4; index++) {
@@ -1055,7 +1083,7 @@ static void e752x_init_csrows(struct mem_ctl_info *mci, struct pci_dev *pdev,
dra |= dra_reg << (index * 8);
}
pci_read_config_dword(pdev, E752X_DRC, &drc);
- drc_chan = dual_channel_active(ddrcsr);
+ drc_chan = dual_channel_active(ddrcsr) ? 1 : 0;
drc_drbg = drc_chan + 1; /* 128 in dual mode, 64 in single */
drc_ddim = (drc >> 20) & 0x3;
@@ -1067,39 +1095,45 @@ static void e752x_init_csrows(struct mem_ctl_info *mci, struct pci_dev *pdev,
for (last_cumul_size = index = 0; index < mci->nr_csrows; index++) {
/* mem_dev 0=x8, 1=x4 */
mem_dev = (dra >> (index * 4 + 2)) & 0x3;
- csrow = &mci->csrows[remap_csrow_index(mci, index)];
+ csrow = mci->csrows[remap_csrow_index(mci, index)];
mem_dev = (mem_dev == 2);
pci_read_config_byte(pdev, E752X_DRB + index, &value);
/* convert a 128 or 64 MiB DRB to a page size. */
cumul_size = value << (25 + drc_drbg - PAGE_SHIFT);
- debugf3("%s(): (%d) cumul_size 0x%x\n", __func__, index,
- cumul_size);
+ edac_dbg(3, "(%d) cumul_size 0x%x\n", index, cumul_size);
if (cumul_size == last_cumul_size)
continue; /* not populated */
csrow->first_page = last_cumul_size;
csrow->last_page = cumul_size - 1;
- csrow->nr_pages = cumul_size - last_cumul_size;
+ nr_pages = cumul_size - last_cumul_size;
last_cumul_size = cumul_size;
- csrow->grain = 1 << 12; /* 4KiB - resolution of CELOG */
- csrow->mtype = MEM_RDDR; /* only one type supported */
- csrow->dtype = mem_dev ? DEV_X4 : DEV_X8;
/*
- * if single channel or x8 devices then SECDED
- * if dual channel and x4 then S4ECD4ED
- */
+ * if single channel or x8 devices then SECDED
+ * if dual channel and x4 then S4ECD4ED
+ */
if (drc_ddim) {
if (drc_chan && mem_dev) {
- csrow->edac_mode = EDAC_S4ECD4ED;
+ edac_mode = EDAC_S4ECD4ED;
mci->edac_cap |= EDAC_FLAG_S4ECD4ED;
} else {
- csrow->edac_mode = EDAC_SECDED;
+ edac_mode = EDAC_SECDED;
mci->edac_cap |= EDAC_FLAG_SECDED;
}
} else
- csrow->edac_mode = EDAC_NONE;
+ edac_mode = EDAC_NONE;
+ for (i = 0; i < csrow->nr_channels; i++) {
+ struct dimm_info *dimm = csrow->channels[i]->dimm;
+
+ edac_dbg(3, "Initializing rank at (%i,%i)\n", index, i);
+ dimm->nr_pages = nr_pages / csrow->nr_channels;
+ dimm->grain = 1 << 12; /* 4KiB - resolution of CELOG */
+ dimm->mtype = MEM_RDDR; /* only one type supported */
+ dimm->dtype = mem_dev ? DEV_X4 : DEV_X8;
+ dimm->edac_mode = edac_mode;
+ }
}
}
@@ -1142,36 +1176,33 @@ static void e752x_init_mem_map_table(struct pci_dev *pdev,
static int e752x_get_devs(struct pci_dev *pdev, int dev_idx,
struct e752x_pvt *pvt)
{
- struct pci_dev *dev;
-
- pvt->bridge_ck = pci_get_device(PCI_VENDOR_ID_INTEL,
- pvt->dev_info->err_dev, pvt->bridge_ck);
+ pvt->dev_d0f1 = pci_get_device(PCI_VENDOR_ID_INTEL,
+ pvt->dev_info->err_dev, NULL);
- if (pvt->bridge_ck == NULL)
- pvt->bridge_ck = pci_scan_single_device(pdev->bus,
+ if (pvt->dev_d0f1 == NULL) {
+ pvt->dev_d0f1 = pci_scan_single_device(pdev->bus,
PCI_DEVFN(0, 1));
+ pci_dev_get(pvt->dev_d0f1);
+ }
- if (pvt->bridge_ck == NULL) {
+ if (pvt->dev_d0f1 == NULL) {
e752x_printk(KERN_ERR, "error reporting device not found:"
"vendor %x device 0x%x (broken BIOS?)\n",
PCI_VENDOR_ID_INTEL, e752x_devs[dev_idx].err_dev);
return 1;
}
- dev = pci_get_device(PCI_VENDOR_ID_INTEL,
+ pvt->dev_d0f0 = pci_get_device(PCI_VENDOR_ID_INTEL,
e752x_devs[dev_idx].ctl_dev,
NULL);
- if (dev == NULL)
+ if (pvt->dev_d0f0 == NULL)
goto fail;
- pvt->dev_d0f0 = dev;
- pvt->dev_d0f1 = pci_dev_get(pvt->bridge_ck);
-
return 0;
fail:
- pci_dev_put(pvt->bridge_ck);
+ pci_dev_put(pvt->dev_d0f1);
return 1;
}
@@ -1228,13 +1259,14 @@ static int e752x_probe1(struct pci_dev *pdev, int dev_idx)
u16 pci_data;
u8 stat8;
struct mem_ctl_info *mci;
+ struct edac_mc_layer layers[2];
struct e752x_pvt *pvt;
u16 ddrcsr;
int drc_chan; /* Number of channels 0=1chan,1=2chan */
struct e752x_error_info discard;
- debugf0("%s(): mci\n", __func__);
- debugf0("Starting Probe1\n");
+ edac_dbg(0, "mci\n");
+ edac_dbg(0, "Starting Probe1\n");
/* check to see if device 0 function 1 is enabled; if it isn't, we
* assume the BIOS has reserved it for a reason and is expecting
@@ -1254,13 +1286,17 @@ static int e752x_probe1(struct pci_dev *pdev, int dev_idx)
/* Dual channel = 1, Single channel = 0 */
drc_chan = dual_channel_active(ddrcsr);
- mci = edac_mc_alloc(sizeof(*pvt), E752X_NR_CSROWS, drc_chan + 1, 0);
-
- if (mci == NULL) {
+ layers[0].type = EDAC_MC_LAYER_CHIP_SELECT;
+ layers[0].size = E752X_NR_CSROWS;
+ layers[0].is_virt_csrow = true;
+ layers[1].type = EDAC_MC_LAYER_CHANNEL;
+ layers[1].size = drc_chan + 1;
+ layers[1].is_virt_csrow = false;
+ mci = edac_mc_alloc(0, ARRAY_SIZE(layers), layers, sizeof(*pvt));
+ if (mci == NULL)
return -ENOMEM;
- }
- debugf3("%s(): init mci\n", __func__);
+ edac_dbg(3, "init mci\n");
mci->mtype_cap = MEM_FLAG_RDDR;
/* 3100 IMCH supports SECDEC only */
mci->edac_ctl_cap = (dev_idx == I3100) ? EDAC_FLAG_SECDED :
@@ -1268,9 +1304,9 @@ static int e752x_probe1(struct pci_dev *pdev, int dev_idx)
/* FIXME - what if different memory types are in different csrows? */
mci->mod_name = EDAC_MOD_STR;
mci->mod_ver = E752X_REVISION;
- mci->dev = &pdev->dev;
+ mci->pdev = &pdev->dev;
- debugf3("%s(): init pvt\n", __func__);
+ edac_dbg(3, "init pvt\n");
pvt = (struct e752x_pvt *)mci->pvt_info;
pvt->dev_info = &e752x_devs[dev_idx];
pvt->mc_symmetric = ((ddrcsr & 0x10) != 0);
@@ -1280,7 +1316,7 @@ static int e752x_probe1(struct pci_dev *pdev, int dev_idx)
return -ENODEV;
}
- debugf3("%s(): more mci init\n", __func__);
+ edac_dbg(3, "more mci init\n");
mci->ctl_name = pvt->dev_info->ctl_name;
mci->dev_name = pci_name(pdev);
mci->edac_check = e752x_check;
@@ -1302,7 +1338,7 @@ static int e752x_probe1(struct pci_dev *pdev, int dev_idx)
mci->edac_cap = EDAC_FLAG_SECDED; /* the only mode supported */
else
mci->edac_cap |= EDAC_FLAG_NONE;
- debugf3("%s(): tolm, remapbase, remaplimit\n", __func__);
+ edac_dbg(3, "tolm, remapbase, remaplimit\n");
/* load the top of low memory, remap base, and remap limit vars */
pci_read_config_word(pdev, E752X_TOLM, &pci_data);
@@ -1319,7 +1355,7 @@ static int e752x_probe1(struct pci_dev *pdev, int dev_idx)
* type of memory controller. The ID is therefore hardcoded to 0.
*/
if (edac_mc_add_mc(mci)) {
- debugf3("%s(): failed edac_mc_add_mc()\n", __func__);
+ edac_dbg(3, "failed edac_mc_add_mc()\n");
goto fail;
}
@@ -1337,23 +1373,21 @@ static int e752x_probe1(struct pci_dev *pdev, int dev_idx)
}
/* get this far and it's successful */
- debugf3("%s(): success\n", __func__);
+ edac_dbg(3, "success\n");
return 0;
fail:
pci_dev_put(pvt->dev_d0f0);
pci_dev_put(pvt->dev_d0f1);
- pci_dev_put(pvt->bridge_ck);
edac_mc_free(mci);
return -ENODEV;
}
/* returns count (>= 0), or negative on error */
-static int __devinit e752x_init_one(struct pci_dev *pdev,
- const struct pci_device_id *ent)
+static int e752x_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
{
- debugf0("%s()\n", __func__);
+ edac_dbg(0, "\n");
/* wake up and enable device */
if (pci_enable_device(pdev) < 0)
@@ -1362,12 +1396,12 @@ static int __devinit e752x_init_one(struct pci_dev *pdev,
return e752x_probe1(pdev, ent->driver_data);
}
-static void __devexit e752x_remove_one(struct pci_dev *pdev)
+static void e752x_remove_one(struct pci_dev *pdev)
{
struct mem_ctl_info *mci;
struct e752x_pvt *pvt;
- debugf0("%s()\n", __func__);
+ edac_dbg(0, "\n");
if (e752x_pci)
edac_pci_release_generic_ctl(e752x_pci);
@@ -1378,11 +1412,10 @@ static void __devexit e752x_remove_one(struct pci_dev *pdev)
pvt = (struct e752x_pvt *)mci->pvt_info;
pci_dev_put(pvt->dev_d0f0);
pci_dev_put(pvt->dev_d0f1);
- pci_dev_put(pvt->bridge_ck);
edac_mc_free(mci);
}
-static const struct pci_device_id e752x_pci_tbl[] __devinitdata = {
+static const struct pci_device_id e752x_pci_tbl[] = {
{
PCI_VEND_DEV(INTEL, 7520_0), PCI_ANY_ID, PCI_ANY_ID, 0, 0,
E7520},
@@ -1405,7 +1438,7 @@ MODULE_DEVICE_TABLE(pci, e752x_pci_tbl);
static struct pci_driver e752x_driver = {
.name = EDAC_MOD_STR,
.probe = e752x_init_one,
- .remove = __devexit_p(e752x_remove_one),
+ .remove = e752x_remove_one,
.id_table = e752x_pci_tbl,
};
@@ -1413,7 +1446,7 @@ static int __init e752x_init(void)
{
int pci_rc;
- debugf3("%s()\n", __func__);
+ edac_dbg(3, "\n");
/* Ensure that the OPSTATE is set correctly for POLL or NMI */
opstate_init();
@@ -1424,7 +1457,7 @@ static int __init e752x_init(void)
static void __exit e752x_exit(void)
{
- debugf3("%s()\n", __func__);
+ edac_dbg(3, "\n");
pci_unregister_driver(&e752x_driver);
}
diff --git a/drivers/edac/e7xxx_edac.c b/drivers/edac/e7xxx_edac.c
index 1731d724581..3cda79bc8b0 100644
--- a/drivers/edac/e7xxx_edac.c
+++ b/drivers/edac/e7xxx_edac.c
@@ -10,6 +10,9 @@
* Based on work by Dan Hollis <goemon at anime dot net> and others.
* http://www.anime.net/~goemon/linux-ecc/
*
+ * Datasheet:
+ * http://www.intel.com/content/www/us/en/chipsets/e7501-chipset-memory-controller-hub-datasheet.html
+ *
* Contributors:
* Eric Biederman (Linux Networx)
* Tom Zimmerman (Linux Networx)
@@ -29,7 +32,7 @@
#include <linux/edac.h>
#include "edac_core.h"
-#define E7XXX_REVISION " Ver: 2.0.2 " __DATE__
+#define E7XXX_REVISION " Ver: 2.0.2"
#define EDAC_MOD_STR "e7xxx_edac"
#define e7xxx_printk(level, fmt, arg...) \
@@ -71,7 +74,7 @@
#endif /* PCI_DEVICE_ID_INTEL_7505_1_ERR */
#define E7XXX_NR_CSROWS 8 /* number of csrows */
-#define E7XXX_NR_DIMMS 8 /* FIXME - is this correct? */
+#define E7XXX_NR_DIMMS 8 /* 2 channels, 4 dimms/channel */
/* E7XXX register addresses - device 0 function 0 */
#define E7XXX_DRB 0x60 /* DRAM row boundary register (8b) */
@@ -163,7 +166,7 @@ static const struct e7xxx_dev_info e7xxx_devs[] = {
/* FIXME - is this valid for both SECDED and S4ECD4ED? */
static inline int e7xxx_find_channel(u16 syndrome)
{
- debugf3("%s()\n", __func__);
+ edac_dbg(3, "\n");
if ((syndrome & 0xff00) == 0)
return 0;
@@ -183,7 +186,7 @@ static unsigned long ctl_page_to_phys(struct mem_ctl_info *mci,
u32 remap;
struct e7xxx_pvt *pvt = (struct e7xxx_pvt *)mci->pvt_info;
- debugf3("%s()\n", __func__);
+ edac_dbg(3, "\n");
if ((page < pvt->tolm) ||
((page >= 0x100000) && (page < pvt->remapbase)))
@@ -205,7 +208,7 @@ static void process_ce(struct mem_ctl_info *mci, struct e7xxx_error_info *info)
int row;
int channel;
- debugf3("%s()\n", __func__);
+ edac_dbg(3, "\n");
/* read the error address */
error_1b = info->dram_celog_add;
/* FIXME - should use PAGE_SHIFT */
@@ -216,13 +219,15 @@ static void process_ce(struct mem_ctl_info *mci, struct e7xxx_error_info *info)
row = edac_mc_find_csrow_by_page(mci, page);
/* convert syndrome to channel */
channel = e7xxx_find_channel(syndrome);
- edac_mc_handle_ce(mci, page, 0, syndrome, row, channel, "e7xxx CE");
+ edac_mc_handle_error(HW_EVENT_ERR_CORRECTED, mci, 1, page, 0, syndrome,
+ row, channel, -1, "e7xxx CE", "");
}
static void process_ce_no_info(struct mem_ctl_info *mci)
{
- debugf3("%s()\n", __func__);
- edac_mc_handle_ce_no_info(mci, "e7xxx CE log register overflow");
+ edac_dbg(3, "\n");
+ edac_mc_handle_error(HW_EVENT_ERR_UNCORRECTED, mci, 1, 0, 0, 0, -1, -1, -1,
+ "e7xxx CE log register overflow", "");
}
static void process_ue(struct mem_ctl_info *mci, struct e7xxx_error_info *info)
@@ -230,19 +235,23 @@ static void process_ue(struct mem_ctl_info *mci, struct e7xxx_error_info *info)
u32 error_2b, block_page;
int row;
- debugf3("%s()\n", __func__);
+ edac_dbg(3, "\n");
/* read the error address */
error_2b = info->dram_uelog_add;
/* FIXME - should use PAGE_SHIFT */
block_page = error_2b >> 6; /* convert to 4k address */
row = edac_mc_find_csrow_by_page(mci, block_page);
- edac_mc_handle_ue(mci, block_page, 0, row, "e7xxx UE");
+
+ edac_mc_handle_error(HW_EVENT_ERR_UNCORRECTED, mci, 1, block_page, 0, 0,
+ row, -1, -1, "e7xxx UE", "");
}
static void process_ue_no_info(struct mem_ctl_info *mci)
{
- debugf3("%s()\n", __func__);
- edac_mc_handle_ue_no_info(mci, "e7xxx UE log register overflow");
+ edac_dbg(3, "\n");
+
+ edac_mc_handle_error(HW_EVENT_ERR_UNCORRECTED, mci, 1, 0, 0, 0, -1, -1, -1,
+ "e7xxx UE log register overflow", "");
}
static void e7xxx_get_error_info(struct mem_ctl_info *mci,
@@ -325,7 +334,7 @@ static void e7xxx_check(struct mem_ctl_info *mci)
{
struct e7xxx_error_info info;
- debugf3("%s()\n", __func__);
+ edac_dbg(3, "\n");
e7xxx_get_error_info(mci, &info);
e7xxx_process_error_info(mci, &info, 1);
}
@@ -347,11 +356,13 @@ static void e7xxx_init_csrows(struct mem_ctl_info *mci, struct pci_dev *pdev,
int dev_idx, u32 drc)
{
unsigned long last_cumul_size;
- int index;
+ int index, j;
u8 value;
- u32 dra, cumul_size;
+ u32 dra, cumul_size, nr_pages;
int drc_chan, drc_drbg, drc_ddim, mem_dev;
struct csrow_info *csrow;
+ struct dimm_info *dimm;
+ enum edac_type edac_mode;
pci_read_config_dword(pdev, E7XXX_DRA, &dra);
drc_chan = dual_channel_active(drc, dev_idx);
@@ -367,38 +378,44 @@ static void e7xxx_init_csrows(struct mem_ctl_info *mci, struct pci_dev *pdev,
for (index = 0; index < mci->nr_csrows; index++) {
/* mem_dev 0=x8, 1=x4 */
mem_dev = (dra >> (index * 4 + 3)) & 0x1;
- csrow = &mci->csrows[index];
+ csrow = mci->csrows[index];
pci_read_config_byte(pdev, E7XXX_DRB + index, &value);
/* convert a 64 or 32 MiB DRB to a page size. */
cumul_size = value << (25 + drc_drbg - PAGE_SHIFT);
- debugf3("%s(): (%d) cumul_size 0x%x\n", __func__, index,
- cumul_size);
+ edac_dbg(3, "(%d) cumul_size 0x%x\n", index, cumul_size);
if (cumul_size == last_cumul_size)
continue; /* not populated */
csrow->first_page = last_cumul_size;
csrow->last_page = cumul_size - 1;
- csrow->nr_pages = cumul_size - last_cumul_size;
+ nr_pages = cumul_size - last_cumul_size;
last_cumul_size = cumul_size;
- csrow->grain = 1 << 12; /* 4KiB - resolution of CELOG */
- csrow->mtype = MEM_RDDR; /* only one type supported */
- csrow->dtype = mem_dev ? DEV_X4 : DEV_X8;
/*
- * if single channel or x8 devices then SECDED
- * if dual channel and x4 then S4ECD4ED
- */
+ * if single channel or x8 devices then SECDED
+ * if dual channel and x4 then S4ECD4ED
+ */
if (drc_ddim) {
if (drc_chan && mem_dev) {
- csrow->edac_mode = EDAC_S4ECD4ED;
+ edac_mode = EDAC_S4ECD4ED;
mci->edac_cap |= EDAC_FLAG_S4ECD4ED;
} else {
- csrow->edac_mode = EDAC_SECDED;
+ edac_mode = EDAC_SECDED;
mci->edac_cap |= EDAC_FLAG_SECDED;
}
} else
- csrow->edac_mode = EDAC_NONE;
+ edac_mode = EDAC_NONE;
+
+ for (j = 0; j < drc_chan + 1; j++) {
+ dimm = csrow->channels[j]->dimm;
+
+ dimm->nr_pages = nr_pages / (drc_chan + 1);
+ dimm->grain = 1 << 12; /* 4KiB - resolution of CELOG */
+ dimm->mtype = MEM_RDDR; /* only one type supported */
+ dimm->dtype = mem_dev ? DEV_X4 : DEV_X8;
+ dimm->edac_mode = edac_mode;
+ }
}
}
@@ -406,30 +423,44 @@ static int e7xxx_probe1(struct pci_dev *pdev, int dev_idx)
{
u16 pci_data;
struct mem_ctl_info *mci = NULL;
+ struct edac_mc_layer layers[2];
struct e7xxx_pvt *pvt = NULL;
u32 drc;
int drc_chan;
struct e7xxx_error_info discard;
- debugf0("%s(): mci\n", __func__);
+ edac_dbg(0, "mci\n");
pci_read_config_dword(pdev, E7XXX_DRC, &drc);
drc_chan = dual_channel_active(drc, dev_idx);
- mci = edac_mc_alloc(sizeof(*pvt), E7XXX_NR_CSROWS, drc_chan + 1, 0);
-
+ /*
+ * According with the datasheet, this device has a maximum of
+ * 4 DIMMS per channel, either single-rank or dual-rank. So, the
+ * total amount of dimms is 8 (E7XXX_NR_DIMMS).
+ * That means that the DIMM is mapped as CSROWs, and the channel
+ * will map the rank. So, an error to either channel should be
+ * attributed to the same dimm.
+ */
+ layers[0].type = EDAC_MC_LAYER_CHIP_SELECT;
+ layers[0].size = E7XXX_NR_CSROWS;
+ layers[0].is_virt_csrow = true;
+ layers[1].type = EDAC_MC_LAYER_CHANNEL;
+ layers[1].size = drc_chan + 1;
+ layers[1].is_virt_csrow = false;
+ mci = edac_mc_alloc(0, ARRAY_SIZE(layers), layers, sizeof(*pvt));
if (mci == NULL)
return -ENOMEM;
- debugf3("%s(): init mci\n", __func__);
+ edac_dbg(3, "init mci\n");
mci->mtype_cap = MEM_FLAG_RDDR;
mci->edac_ctl_cap = EDAC_FLAG_NONE | EDAC_FLAG_SECDED |
EDAC_FLAG_S4ECD4ED;
/* FIXME - what if different memory types are in different csrows? */
mci->mod_name = EDAC_MOD_STR;
mci->mod_ver = E7XXX_REVISION;
- mci->dev = &pdev->dev;
- debugf3("%s(): init pvt\n", __func__);
+ mci->pdev = &pdev->dev;
+ edac_dbg(3, "init pvt\n");
pvt = (struct e7xxx_pvt *)mci->pvt_info;
pvt->dev_info = &e7xxx_devs[dev_idx];
pvt->bridge_ck = pci_get_device(PCI_VENDOR_ID_INTEL,
@@ -442,14 +473,14 @@ static int e7xxx_probe1(struct pci_dev *pdev, int dev_idx)
goto fail0;
}
- debugf3("%s(): more mci init\n", __func__);
+ edac_dbg(3, "more mci init\n");
mci->ctl_name = pvt->dev_info->ctl_name;
mci->dev_name = pci_name(pdev);
mci->edac_check = e7xxx_check;
mci->ctl_page_to_phys = ctl_page_to_phys;
e7xxx_init_csrows(mci, pdev, dev_idx, drc);
mci->edac_cap |= EDAC_FLAG_NONE;
- debugf3("%s(): tolm, remapbase, remaplimit\n", __func__);
+ edac_dbg(3, "tolm, remapbase, remaplimit\n");
/* load the top of low memory, remap base, and remap limit vars */
pci_read_config_word(pdev, E7XXX_TOLM, &pci_data);
pvt->tolm = ((u32) pci_data) << 4;
@@ -468,7 +499,7 @@ static int e7xxx_probe1(struct pci_dev *pdev, int dev_idx)
* type of memory controller. The ID is therefore hardcoded to 0.
*/
if (edac_mc_add_mc(mci)) {
- debugf3("%s(): failed edac_mc_add_mc()\n", __func__);
+ edac_dbg(3, "failed edac_mc_add_mc()\n");
goto fail1;
}
@@ -484,7 +515,7 @@ static int e7xxx_probe1(struct pci_dev *pdev, int dev_idx)
}
/* get this far and it's successful */
- debugf3("%s(): success\n", __func__);
+ edac_dbg(3, "success\n");
return 0;
fail1:
@@ -497,22 +528,21 @@ fail0:
}
/* returns count (>= 0), or negative on error */
-static int __devinit e7xxx_init_one(struct pci_dev *pdev,
- const struct pci_device_id *ent)
+static int e7xxx_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
{
- debugf0("%s()\n", __func__);
+ edac_dbg(0, "\n");
/* wake up and enable device */
return pci_enable_device(pdev) ?
-EIO : e7xxx_probe1(pdev, ent->driver_data);
}
-static void __devexit e7xxx_remove_one(struct pci_dev *pdev)
+static void e7xxx_remove_one(struct pci_dev *pdev)
{
struct mem_ctl_info *mci;
struct e7xxx_pvt *pvt;
- debugf0("%s()\n", __func__);
+ edac_dbg(0, "\n");
if (e7xxx_pci)
edac_pci_release_generic_ctl(e7xxx_pci);
@@ -525,7 +555,7 @@ static void __devexit e7xxx_remove_one(struct pci_dev *pdev)
edac_mc_free(mci);
}
-static const struct pci_device_id e7xxx_pci_tbl[] __devinitdata = {
+static const struct pci_device_id e7xxx_pci_tbl[] = {
{
PCI_VEND_DEV(INTEL, 7205_0), PCI_ANY_ID, PCI_ANY_ID, 0, 0,
E7205},
@@ -548,7 +578,7 @@ MODULE_DEVICE_TABLE(pci, e7xxx_pci_tbl);
static struct pci_driver e7xxx_driver = {
.name = EDAC_MOD_STR,
.probe = e7xxx_init_one,
- .remove = __devexit_p(e7xxx_remove_one),
+ .remove = e7xxx_remove_one,
.id_table = e7xxx_pci_tbl,
};
diff --git a/drivers/edac/edac_core.h b/drivers/edac/edac_core.h
index d7ca43a828b..3c2625e7980 100644
--- a/drivers/edac/edac_core.h
+++ b/drivers/edac/edac_core.h
@@ -32,19 +32,17 @@
#include <linux/completion.h>
#include <linux/kobject.h>
#include <linux/platform_device.h>
-#include <linux/sysdev.h>
#include <linux/workqueue.h>
+#include <linux/edac.h>
-#define EDAC_MC_LABEL_LEN 31
#define EDAC_DEVICE_NAME_LEN 31
#define EDAC_ATTRIB_VALUE_LEN 15
-#define MC_PROC_NAME_MAX_LEN 7
#if PAGE_SHIFT < 20
-#define PAGES_TO_MiB( pages ) ( ( pages ) >> ( 20 - PAGE_SHIFT ) )
-#define MiB_TO_PAGES(mb) ((mb) >> (20 - PAGE_SHIFT))
+#define PAGES_TO_MiB(pages) ((pages) >> (20 - PAGE_SHIFT))
+#define MiB_TO_PAGES(mb) ((mb) << (20 - PAGE_SHIFT))
#else /* PAGE_SHIFT > 20 */
-#define PAGES_TO_MiB( pages ) ( ( pages ) << ( PAGE_SHIFT - 20 ) )
+#define PAGES_TO_MiB(pages) ((pages) << (PAGE_SHIFT - 20))
#define MiB_TO_PAGES(mb) ((mb) >> (PAGE_SHIFT - 20))
#endif
@@ -68,30 +66,26 @@
#define EDAC_PCI "PCI"
#define EDAC_DEBUG "DEBUG"
-#ifdef CONFIG_EDAC_DEBUG
-extern int edac_debug_level;
extern const char *edac_mem_types[];
-#define edac_debug_printk(level, fmt, arg...) \
- do { \
- if (level <= edac_debug_level) \
- edac_printk(KERN_DEBUG, EDAC_DEBUG, \
- "%s: " fmt, __func__, ##arg); \
- } while (0)
+#ifdef CONFIG_EDAC_DEBUG
+extern int edac_debug_level;
-#define debugf0( ... ) edac_debug_printk(0, __VA_ARGS__ )
-#define debugf1( ... ) edac_debug_printk(1, __VA_ARGS__ )
-#define debugf2( ... ) edac_debug_printk(2, __VA_ARGS__ )
-#define debugf3( ... ) edac_debug_printk(3, __VA_ARGS__ )
-#define debugf4( ... ) edac_debug_printk(4, __VA_ARGS__ )
+#define edac_dbg(level, fmt, ...) \
+do { \
+ if (level <= edac_debug_level) \
+ edac_printk(KERN_DEBUG, EDAC_DEBUG, \
+ "%s: " fmt, __func__, ##__VA_ARGS__); \
+} while (0)
#else /* !CONFIG_EDAC_DEBUG */
-#define debugf0( ... )
-#define debugf1( ... )
-#define debugf2( ... )
-#define debugf3( ... )
-#define debugf4( ... )
+#define edac_dbg(level, fmt, ...) \
+do { \
+ if (0) \
+ edac_printk(KERN_DEBUG, EDAC_DEBUG, \
+ "%s: " fmt, __func__, ##__VA_ARGS__); \
+} while (0)
#endif /* !CONFIG_EDAC_DEBUG */
@@ -100,357 +94,6 @@ extern const char *edac_mem_types[];
#define edac_dev_name(dev) (dev)->dev_name
-/* memory devices */
-enum dev_type {
- DEV_UNKNOWN = 0,
- DEV_X1,
- DEV_X2,
- DEV_X4,
- DEV_X8,
- DEV_X16,
- DEV_X32, /* Do these parts exist? */
- DEV_X64 /* Do these parts exist? */
-};
-
-#define DEV_FLAG_UNKNOWN BIT(DEV_UNKNOWN)
-#define DEV_FLAG_X1 BIT(DEV_X1)
-#define DEV_FLAG_X2 BIT(DEV_X2)
-#define DEV_FLAG_X4 BIT(DEV_X4)
-#define DEV_FLAG_X8 BIT(DEV_X8)
-#define DEV_FLAG_X16 BIT(DEV_X16)
-#define DEV_FLAG_X32 BIT(DEV_X32)
-#define DEV_FLAG_X64 BIT(DEV_X64)
-
-/* memory types */
-enum mem_type {
- MEM_EMPTY = 0, /* Empty csrow */
- MEM_RESERVED, /* Reserved csrow type */
- MEM_UNKNOWN, /* Unknown csrow type */
- MEM_FPM, /* Fast page mode */
- MEM_EDO, /* Extended data out */
- MEM_BEDO, /* Burst Extended data out */
- MEM_SDR, /* Single data rate SDRAM */
- MEM_RDR, /* Registered single data rate SDRAM */
- MEM_DDR, /* Double data rate SDRAM */
- MEM_RDDR, /* Registered Double data rate SDRAM */
- MEM_RMBS, /* Rambus DRAM */
- MEM_DDR2, /* DDR2 RAM */
- MEM_FB_DDR2, /* fully buffered DDR2 */
- MEM_RDDR2, /* Registered DDR2 RAM */
- MEM_XDR, /* Rambus XDR */
- MEM_DDR3, /* DDR3 RAM */
- MEM_RDDR3, /* Registered DDR3 RAM */
-};
-
-#define MEM_FLAG_EMPTY BIT(MEM_EMPTY)
-#define MEM_FLAG_RESERVED BIT(MEM_RESERVED)
-#define MEM_FLAG_UNKNOWN BIT(MEM_UNKNOWN)
-#define MEM_FLAG_FPM BIT(MEM_FPM)
-#define MEM_FLAG_EDO BIT(MEM_EDO)
-#define MEM_FLAG_BEDO BIT(MEM_BEDO)
-#define MEM_FLAG_SDR BIT(MEM_SDR)
-#define MEM_FLAG_RDR BIT(MEM_RDR)
-#define MEM_FLAG_DDR BIT(MEM_DDR)
-#define MEM_FLAG_RDDR BIT(MEM_RDDR)
-#define MEM_FLAG_RMBS BIT(MEM_RMBS)
-#define MEM_FLAG_DDR2 BIT(MEM_DDR2)
-#define MEM_FLAG_FB_DDR2 BIT(MEM_FB_DDR2)
-#define MEM_FLAG_RDDR2 BIT(MEM_RDDR2)
-#define MEM_FLAG_XDR BIT(MEM_XDR)
-#define MEM_FLAG_DDR3 BIT(MEM_DDR3)
-#define MEM_FLAG_RDDR3 BIT(MEM_RDDR3)
-
-/* chipset Error Detection and Correction capabilities and mode */
-enum edac_type {
- EDAC_UNKNOWN = 0, /* Unknown if ECC is available */
- EDAC_NONE, /* Doesnt support ECC */
- EDAC_RESERVED, /* Reserved ECC type */
- EDAC_PARITY, /* Detects parity errors */
- EDAC_EC, /* Error Checking - no correction */
- EDAC_SECDED, /* Single bit error correction, Double detection */
- EDAC_S2ECD2ED, /* Chipkill x2 devices - do these exist? */
- EDAC_S4ECD4ED, /* Chipkill x4 devices */
- EDAC_S8ECD8ED, /* Chipkill x8 devices */
- EDAC_S16ECD16ED, /* Chipkill x16 devices */
-};
-
-#define EDAC_FLAG_UNKNOWN BIT(EDAC_UNKNOWN)
-#define EDAC_FLAG_NONE BIT(EDAC_NONE)
-#define EDAC_FLAG_PARITY BIT(EDAC_PARITY)
-#define EDAC_FLAG_EC BIT(EDAC_EC)
-#define EDAC_FLAG_SECDED BIT(EDAC_SECDED)
-#define EDAC_FLAG_S2ECD2ED BIT(EDAC_S2ECD2ED)
-#define EDAC_FLAG_S4ECD4ED BIT(EDAC_S4ECD4ED)
-#define EDAC_FLAG_S8ECD8ED BIT(EDAC_S8ECD8ED)
-#define EDAC_FLAG_S16ECD16ED BIT(EDAC_S16ECD16ED)
-
-/* scrubbing capabilities */
-enum scrub_type {
- SCRUB_UNKNOWN = 0, /* Unknown if scrubber is available */
- SCRUB_NONE, /* No scrubber */
- SCRUB_SW_PROG, /* SW progressive (sequential) scrubbing */
- SCRUB_SW_SRC, /* Software scrub only errors */
- SCRUB_SW_PROG_SRC, /* Progressive software scrub from an error */
- SCRUB_SW_TUNABLE, /* Software scrub frequency is tunable */
- SCRUB_HW_PROG, /* HW progressive (sequential) scrubbing */
- SCRUB_HW_SRC, /* Hardware scrub only errors */
- SCRUB_HW_PROG_SRC, /* Progressive hardware scrub from an error */
- SCRUB_HW_TUNABLE /* Hardware scrub frequency is tunable */
-};
-
-#define SCRUB_FLAG_SW_PROG BIT(SCRUB_SW_PROG)
-#define SCRUB_FLAG_SW_SRC BIT(SCRUB_SW_SRC)
-#define SCRUB_FLAG_SW_PROG_SRC BIT(SCRUB_SW_PROG_SRC)
-#define SCRUB_FLAG_SW_TUN BIT(SCRUB_SW_SCRUB_TUNABLE)
-#define SCRUB_FLAG_HW_PROG BIT(SCRUB_HW_PROG)
-#define SCRUB_FLAG_HW_SRC BIT(SCRUB_HW_SRC)
-#define SCRUB_FLAG_HW_PROG_SRC BIT(SCRUB_HW_PROG_SRC)
-#define SCRUB_FLAG_HW_TUN BIT(SCRUB_HW_TUNABLE)
-
-/* FIXME - should have notify capabilities: NMI, LOG, PROC, etc */
-
-/* EDAC internal operation states */
-#define OP_ALLOC 0x100
-#define OP_RUNNING_POLL 0x201
-#define OP_RUNNING_INTERRUPT 0x202
-#define OP_RUNNING_POLL_INTR 0x203
-#define OP_OFFLINE 0x300
-
-/*
- * There are several things to be aware of that aren't at all obvious:
- *
- *
- * SOCKETS, SOCKET SETS, BANKS, ROWS, CHIP-SELECT ROWS, CHANNELS, etc..
- *
- * These are some of the many terms that are thrown about that don't always
- * mean what people think they mean (Inconceivable!). In the interest of
- * creating a common ground for discussion, terms and their definitions
- * will be established.
- *
- * Memory devices: The individual chip on a memory stick. These devices
- * commonly output 4 and 8 bits each. Grouping several
- * of these in parallel provides 64 bits which is common
- * for a memory stick.
- *
- * Memory Stick: A printed circuit board that agregates multiple
- * memory devices in parallel. This is the atomic
- * memory component that is purchaseable by Joe consumer
- * and loaded into a memory socket.
- *
- * Socket: A physical connector on the motherboard that accepts
- * a single memory stick.
- *
- * Channel: Set of memory devices on a memory stick that must be
- * grouped in parallel with one or more additional
- * channels from other memory sticks. This parallel
- * grouping of the output from multiple channels are
- * necessary for the smallest granularity of memory access.
- * Some memory controllers are capable of single channel -
- * which means that memory sticks can be loaded
- * individually. Other memory controllers are only
- * capable of dual channel - which means that memory
- * sticks must be loaded as pairs (see "socket set").
- *
- * Chip-select row: All of the memory devices that are selected together.
- * for a single, minimum grain of memory access.
- * This selects all of the parallel memory devices across
- * all of the parallel channels. Common chip-select rows
- * for single channel are 64 bits, for dual channel 128
- * bits.
- *
- * Single-Ranked stick: A Single-ranked stick has 1 chip-select row of memmory.
- * Motherboards commonly drive two chip-select pins to
- * a memory stick. A single-ranked stick, will occupy
- * only one of those rows. The other will be unused.
- *
- * Double-Ranked stick: A double-ranked stick has two chip-select rows which
- * access different sets of memory devices. The two
- * rows cannot be accessed concurrently.
- *
- * Double-sided stick: DEPRECATED TERM, see Double-Ranked stick.
- * A double-sided stick has two chip-select rows which
- * access different sets of memory devices. The two
- * rows cannot be accessed concurrently. "Double-sided"
- * is irrespective of the memory devices being mounted
- * on both sides of the memory stick.
- *
- * Socket set: All of the memory sticks that are required for
- * a single memory access or all of the memory sticks
- * spanned by a chip-select row. A single socket set
- * has two chip-select rows and if double-sided sticks
- * are used these will occupy those chip-select rows.
- *
- * Bank: This term is avoided because it is unclear when
- * needing to distinguish between chip-select rows and
- * socket sets.
- *
- * Controller pages:
- *
- * Physical pages:
- *
- * Virtual pages:
- *
- *
- * STRUCTURE ORGANIZATION AND CHOICES
- *
- *
- *
- * PS - I enjoyed writing all that about as much as you enjoyed reading it.
- */
-
-struct channel_info {
- int chan_idx; /* channel index */
- u32 ce_count; /* Correctable Errors for this CHANNEL */
- char label[EDAC_MC_LABEL_LEN + 1]; /* DIMM label on motherboard */
- struct csrow_info *csrow; /* the parent */
-};
-
-struct csrow_info {
- unsigned long first_page; /* first page number in dimm */
- unsigned long last_page; /* last page number in dimm */
- unsigned long page_mask; /* used for interleaving -
- * 0UL for non intlv
- */
- u32 nr_pages; /* number of pages in csrow */
- u32 grain; /* granularity of reported error in bytes */
- int csrow_idx; /* the chip-select row */
- enum dev_type dtype; /* memory device type */
- u32 ue_count; /* Uncorrectable Errors for this csrow */
- u32 ce_count; /* Correctable Errors for this csrow */
- enum mem_type mtype; /* memory csrow type */
- enum edac_type edac_mode; /* EDAC mode for this csrow */
- struct mem_ctl_info *mci; /* the parent */
-
- struct kobject kobj; /* sysfs kobject for this csrow */
-
- /* channel information for this csrow */
- u32 nr_channels;
- struct channel_info *channels;
-};
-
-struct mcidev_sysfs_group {
- const char *name; /* group name */
- const struct mcidev_sysfs_attribute *mcidev_attr; /* group attributes */
-};
-
-struct mcidev_sysfs_group_kobj {
- struct list_head list; /* list for all instances within a mc */
-
- struct kobject kobj; /* kobj for the group */
-
- const struct mcidev_sysfs_group *grp; /* group description table */
- struct mem_ctl_info *mci; /* the parent */
-};
-
-/* mcidev_sysfs_attribute structure
- * used for driver sysfs attributes and in mem_ctl_info
- * sysfs top level entries
- */
-struct mcidev_sysfs_attribute {
- /* It should use either attr or grp */
- struct attribute attr;
- const struct mcidev_sysfs_group *grp; /* Points to a group of attributes */
-
- /* Ops for show/store values at the attribute - not used on group */
- ssize_t (*show)(struct mem_ctl_info *,char *);
- ssize_t (*store)(struct mem_ctl_info *, const char *,size_t);
-};
-
-/* MEMORY controller information structure
- */
-struct mem_ctl_info {
- struct list_head link; /* for global list of mem_ctl_info structs */
-
- struct module *owner; /* Module owner of this control struct */
-
- unsigned long mtype_cap; /* memory types supported by mc */
- unsigned long edac_ctl_cap; /* Mem controller EDAC capabilities */
- unsigned long edac_cap; /* configuration capabilities - this is
- * closely related to edac_ctl_cap. The
- * difference is that the controller may be
- * capable of s4ecd4ed which would be listed
- * in edac_ctl_cap, but if channels aren't
- * capable of s4ecd4ed then the edac_cap would
- * not have that capability.
- */
- unsigned long scrub_cap; /* chipset scrub capabilities */
- enum scrub_type scrub_mode; /* current scrub mode */
-
- /* Translates sdram memory scrub rate given in bytes/sec to the
- internal representation and configures whatever else needs
- to be configured.
- */
- int (*set_sdram_scrub_rate) (struct mem_ctl_info * mci, u32 bw);
-
- /* Get the current sdram memory scrub rate from the internal
- representation and converts it to the closest matching
- bandwith in bytes/sec.
- */
- int (*get_sdram_scrub_rate) (struct mem_ctl_info * mci, u32 * bw);
-
-
- /* pointer to edac checking routine */
- void (*edac_check) (struct mem_ctl_info * mci);
-
- /*
- * Remaps memory pages: controller pages to physical pages.
- * For most MC's, this will be NULL.
- */
- /* FIXME - why not send the phys page to begin with? */
- unsigned long (*ctl_page_to_phys) (struct mem_ctl_info * mci,
- unsigned long page);
- int mc_idx;
- int nr_csrows;
- struct csrow_info *csrows;
- /*
- * FIXME - what about controllers on other busses? - IDs must be
- * unique. dev pointer should be sufficiently unique, but
- * BUS:SLOT.FUNC numbers may not be unique.
- */
- struct device *dev;
- const char *mod_name;
- const char *mod_ver;
- const char *ctl_name;
- const char *dev_name;
- char proc_name[MC_PROC_NAME_MAX_LEN + 1];
- void *pvt_info;
- u32 ue_noinfo_count; /* Uncorrectable Errors w/o info */
- u32 ce_noinfo_count; /* Correctable Errors w/o info */
- u32 ue_count; /* Total Uncorrectable Errors for this MC */
- u32 ce_count; /* Total Correctable Errors for this MC */
- unsigned long start_time; /* mci load start time (in jiffies) */
-
- /* this stuff is for safe removal of mc devices from global list while
- * NMI handlers may be traversing list
- */
- struct rcu_head rcu;
- struct completion complete;
-
- /* edac sysfs device control */
- struct kobject edac_mci_kobj;
-
- /* list for all grp instances within a mc */
- struct list_head grp_kobj_list;
-
- /* Additional top controller level attributes, but specified
- * by the low level driver.
- *
- * Set by the low level driver to provide attributes at the
- * controller level, same level as 'ue_count' and 'ce_count' above.
- * An array of structures, NULL terminated
- *
- * If attributes are desired, then set to array of attributes
- * If no attributes are desired, leave NULL
- */
- const struct mcidev_sysfs_attribute *mc_driver_sysfs_attributes;
-
- /* work struct for this MC */
- struct delayed_work work;
-
- /* the internal state of this controller instance */
- int op_state;
-};
-
/*
* The following are the structures to provide for a generic
* or abstract 'edac_device'. This set of structures and the
@@ -459,13 +102,13 @@ struct mem_ctl_info {
*
* CPU caches (L1 and L2)
* DMA engines
- * Core CPU swithces
+ * Core CPU switches
* Fabric switch units
* PCIe interface controllers
* other EDAC/ECC type devices that can be monitored for
* errors, etc.
*
- * It allows for a 2 level set of hiearchry. For example:
+ * It allows for a 2 level set of hierarchy. For example:
*
* cache could be composed of L1, L2 and L3 levels of cache.
* Each CPU core would have its own L1 cache, while sharing
@@ -594,8 +237,8 @@ struct edac_device_ctl_info {
*/
struct edac_dev_sysfs_attribute *sysfs_attributes;
- /* pointer to main 'edac' class in sysfs */
- struct sysdev_class *edac_class;
+ /* pointer to main 'edac' subsys in sysfs */
+ struct bus_type *edac_subsys;
/* the internal state of this controller instance */
int op_state;
@@ -619,10 +262,6 @@ struct edac_device_ctl_info {
unsigned long start_time; /* edac_device load start time (jiffies) */
- /* these are for safe removal of mc devices from global list while
- * NMI handlers may be traversing list
- */
- struct rcu_head rcu;
struct completion removal_complete;
/* sysfs top name under 'edac' directory
@@ -697,7 +336,7 @@ struct edac_pci_ctl_info {
int pci_idx;
- struct sysdev_class *edac_class; /* pointer to class */
+ struct bus_type *edac_subsys; /* pointer to subsystem */
/* the internal state of this controller instance */
int op_state;
@@ -721,10 +360,6 @@ struct edac_pci_ctl_info {
unsigned long start_time; /* edac_pci load start time (jiffies) */
- /* these are for safe removal of devices from global list while
- * NMI handlers may be traversing list
- */
- struct rcu_head rcu;
struct completion complete;
/* sysfs top name under 'edac' directory
@@ -807,8 +442,10 @@ static inline void pci_write_bits32(struct pci_dev *pdev, int offset,
#endif /* CONFIG_PCI */
-extern struct mem_ctl_info *edac_mc_alloc(unsigned sz_pvt, unsigned nr_csrows,
- unsigned nr_chans, int edac_index);
+struct mem_ctl_info *edac_mc_alloc(unsigned mc_num,
+ unsigned n_layers,
+ struct edac_mc_layer *layers,
+ unsigned sz_pvt);
extern int edac_mc_add_mc(struct mem_ctl_info *mci);
extern void edac_mc_free(struct mem_ctl_info *mci);
extern struct mem_ctl_info *edac_mc_find(int idx);
@@ -817,34 +454,21 @@ extern struct mem_ctl_info *edac_mc_del_mc(struct device *dev);
extern int edac_mc_find_csrow_by_page(struct mem_ctl_info *mci,
unsigned long page);
-/*
- * The no info errors are used when error overflows are reported.
- * There are a limited number of error logging registers that can
- * be exausted. When all registers are exhausted and an additional
- * error occurs then an error overflow register records that an
- * error occured and the type of error, but doesn't have any
- * further information. The ce/ue versions make for cleaner
- * reporting logic and function interface - reduces conditional
- * statement clutter and extra function arguments.
- */
-extern void edac_mc_handle_ce(struct mem_ctl_info *mci,
- unsigned long page_frame_number,
- unsigned long offset_in_page,
- unsigned long syndrome, int row, int channel,
- const char *msg);
-extern void edac_mc_handle_ce_no_info(struct mem_ctl_info *mci,
- const char *msg);
-extern void edac_mc_handle_ue(struct mem_ctl_info *mci,
- unsigned long page_frame_number,
- unsigned long offset_in_page, int row,
- const char *msg);
-extern void edac_mc_handle_ue_no_info(struct mem_ctl_info *mci,
- const char *msg);
-extern void edac_mc_handle_fbd_ue(struct mem_ctl_info *mci, unsigned int csrow,
- unsigned int channel0, unsigned int channel1,
- char *msg);
-extern void edac_mc_handle_fbd_ce(struct mem_ctl_info *mci, unsigned int csrow,
- unsigned int channel, char *msg);
+void edac_raw_mc_handle_error(const enum hw_event_mc_err_type type,
+ struct mem_ctl_info *mci,
+ struct edac_raw_error_desc *e);
+
+void edac_mc_handle_error(const enum hw_event_mc_err_type type,
+ struct mem_ctl_info *mci,
+ const u16 error_count,
+ const unsigned long page_frame_number,
+ const unsigned long offset_in_page,
+ const unsigned long syndrome,
+ const int top_layer,
+ const int mid_layer,
+ const int low_layer,
+ const char *msg,
+ const char *other_detail);
/*
* edac_device APIs
@@ -856,6 +480,7 @@ extern void edac_device_handle_ue(struct edac_device_ctl_info *edac_dev,
extern void edac_device_handle_ce(struct edac_device_ctl_info *edac_dev,
int inst_nr, int block_nr, const char *msg);
extern int edac_device_alloc_index(void);
+extern const char *edac_layer_name[];
/*
* edac_pci APIs
diff --git a/drivers/edac/edac_device.c b/drivers/edac/edac_device.c
index d5e13c94714..592af5f0cf3 100644
--- a/drivers/edac/edac_device.c
+++ b/drivers/edac/edac_device.c
@@ -23,7 +23,6 @@
#include <linux/jiffies.h>
#include <linux/spinlock.h>
#include <linux/list.h>
-#include <linux/sysdev.h>
#include <linux/ctype.h>
#include <linux/workqueue.h>
#include <asm/uaccess.h>
@@ -41,12 +40,13 @@ static LIST_HEAD(edac_device_list);
#ifdef CONFIG_EDAC_DEBUG
static void edac_device_dump_device(struct edac_device_ctl_info *edac_dev)
{
- debugf3("\tedac_dev = %p dev_idx=%d \n", edac_dev, edac_dev->dev_idx);
- debugf4("\tedac_dev->edac_check = %p\n", edac_dev->edac_check);
- debugf3("\tdev = %p\n", edac_dev->dev);
- debugf3("\tmod_name:ctl_name = %s:%s\n",
- edac_dev->mod_name, edac_dev->ctl_name);
- debugf3("\tpvt_info = %p\n\n", edac_dev->pvt_info);
+ edac_dbg(3, "\tedac_dev = %p dev_idx=%d\n",
+ edac_dev, edac_dev->dev_idx);
+ edac_dbg(4, "\tedac_dev->edac_check = %p\n", edac_dev->edac_check);
+ edac_dbg(3, "\tdev = %p\n", edac_dev->dev);
+ edac_dbg(3, "\tmod_name:ctl_name = %s:%s\n",
+ edac_dev->mod_name, edac_dev->ctl_name);
+ edac_dbg(3, "\tpvt_info = %p\n\n", edac_dev->pvt_info);
}
#endif /* CONFIG_EDAC_DEBUG */
@@ -57,7 +57,7 @@ static void edac_device_dump_device(struct edac_device_ctl_info *edac_dev)
*
* The control structure is allocated in complete chunk
* from the OS. It is in turn sub allocated to the
- * various objects that compose the struture
+ * various objects that compose the structure
*
* The structure has a 'nr_instance' array within itself.
* Each instance represents a major component
@@ -80,11 +80,10 @@ struct edac_device_ctl_info *edac_device_alloc_ctl_info(
unsigned total_size;
unsigned count;
unsigned instance, block, attr;
- void *pvt;
+ void *pvt, *p;
int err;
- debugf4("%s() instances=%d blocks=%d\n",
- __func__, nr_instances, nr_blocks);
+ edac_dbg(4, "instances=%d blocks=%d\n", nr_instances, nr_blocks);
/* Calculate the size of memory we need to allocate AND
* determine the offsets of the various item arrays
@@ -93,35 +92,30 @@ struct edac_device_ctl_info *edac_device_alloc_ctl_info(
* to be at least as stringent as what the compiler would
* provide if we could simply hardcode everything into a single struct.
*/
- dev_ctl = (struct edac_device_ctl_info *)NULL;
+ p = NULL;
+ dev_ctl = edac_align_ptr(&p, sizeof(*dev_ctl), 1);
/* Calc the 'end' offset past end of ONE ctl_info structure
* which will become the start of the 'instance' array
*/
- dev_inst = edac_align_ptr(&dev_ctl[1], sizeof(*dev_inst));
+ dev_inst = edac_align_ptr(&p, sizeof(*dev_inst), nr_instances);
/* Calc the 'end' offset past the instance array within the ctl_info
* which will become the start of the block array
*/
- dev_blk = edac_align_ptr(&dev_inst[nr_instances], sizeof(*dev_blk));
+ count = nr_instances * nr_blocks;
+ dev_blk = edac_align_ptr(&p, sizeof(*dev_blk), count);
/* Calc the 'end' offset past the dev_blk array
* which will become the start of the attrib array, if any.
*/
- count = nr_instances * nr_blocks;
- dev_attrib = edac_align_ptr(&dev_blk[count], sizeof(*dev_attrib));
-
- /* Check for case of when an attribute array is specified */
- if (nr_attrib > 0) {
- /* calc how many nr_attrib we need */
+ /* calc how many nr_attrib we need */
+ if (nr_attrib > 0)
count *= nr_attrib;
+ dev_attrib = edac_align_ptr(&p, sizeof(*dev_attrib), count);
- /* Calc the 'end' offset past the attributes array */
- pvt = edac_align_ptr(&dev_attrib[count], sz_private);
- } else {
- /* no attribute array specificed */
- pvt = edac_align_ptr(dev_attrib, sz_private);
- }
+ /* Calc the 'end' offset past the attributes array */
+ pvt = edac_align_ptr(&p, sz_private, 1);
/* 'pvt' now points to where the private data area is.
* At this point 'pvt' (like dev_inst,dev_blk and dev_attrib)
@@ -162,8 +156,8 @@ struct edac_device_ctl_info *edac_device_alloc_ctl_info(
/* Name of this edac device */
snprintf(dev_ctl->name,sizeof(dev_ctl->name),"%s",edac_device_name);
- debugf4("%s() edac_dev=%p next after end=%p\n",
- __func__, dev_ctl, pvt + sz_private );
+ edac_dbg(4, "edac_dev=%p next after end=%p\n",
+ dev_ctl, pvt + sz_private);
/* Initialize every Instance */
for (instance = 0; instance < nr_instances; instance++) {
@@ -184,10 +178,8 @@ struct edac_device_ctl_info *edac_device_alloc_ctl_info(
snprintf(blk->name, sizeof(blk->name),
"%s%d", edac_block_name, block+offset_value);
- debugf4("%s() instance=%d inst_p=%p block=#%d "
- "block_p=%p name='%s'\n",
- __func__, instance, inst, block,
- blk, blk->name);
+ edac_dbg(4, "instance=%d inst_p=%p block=#%d block_p=%p name='%s'\n",
+ instance, inst, block, blk, blk->name);
/* if there are NO attributes OR no attribute pointer
* then continue on to next block iteration
@@ -200,8 +192,8 @@ struct edac_device_ctl_info *edac_device_alloc_ctl_info(
attrib_p = &dev_attrib[block*nr_instances*nr_attrib];
blk->block_attributes = attrib_p;
- debugf4("%s() THIS BLOCK_ATTRIB=%p\n",
- __func__, blk->block_attributes);
+ edac_dbg(4, "THIS BLOCK_ATTRIB=%p\n",
+ blk->block_attributes);
/* Initialize every user specified attribute in this
* block with the data the caller passed in
@@ -220,11 +212,10 @@ struct edac_device_ctl_info *edac_device_alloc_ctl_info(
attrib->block = blk; /* up link */
- debugf4("%s() alloc-attrib=%p attrib_name='%s' "
- "attrib-spec=%p spec-name=%s\n",
- __func__, attrib, attrib->attr.name,
- &attrib_spec[attr],
- attrib_spec[attr].attr.name
+ edac_dbg(4, "alloc-attrib=%p attrib_name='%s' attrib-spec=%p spec-name=%s\n",
+ attrib, attrib->attr.name,
+ &attrib_spec[attr],
+ attrib_spec[attr].attr.name
);
}
}
@@ -279,7 +270,7 @@ static struct edac_device_ctl_info *find_edac_device_by_dev(struct device *dev)
struct edac_device_ctl_info *edac_dev;
struct list_head *item;
- debugf0("%s()\n", __func__);
+ edac_dbg(0, "\n");
list_for_each(item, &edac_device_list) {
edac_dev = list_entry(item, struct edac_device_ctl_info, link);
@@ -346,30 +337,18 @@ fail1:
}
/*
- * complete_edac_device_list_del
- *
- * callback function when reference count is zero
- */
-static void complete_edac_device_list_del(struct rcu_head *head)
-{
- struct edac_device_ctl_info *edac_dev;
-
- edac_dev = container_of(head, struct edac_device_ctl_info, rcu);
- INIT_LIST_HEAD(&edac_dev->link);
-}
-
-/*
* del_edac_device_from_global_list
- *
- * remove the RCU, setup for a callback call,
- * then wait for the callback to occur
*/
static void del_edac_device_from_global_list(struct edac_device_ctl_info
*edac_device)
{
list_del_rcu(&edac_device->link);
- call_rcu(&edac_device->rcu, complete_edac_device_list_del);
- rcu_barrier();
+
+ /* these are for safe removal of devices from global list while
+ * NMI handlers may be traversing list
+ */
+ synchronize_rcu();
+ INIT_LIST_HEAD(&edac_device->link);
}
/*
@@ -407,7 +386,7 @@ static void edac_device_workq_function(struct work_struct *work_req)
/* Reschedule the workq for the next time period to start again
* if the number of msec is for 1 sec, then adjust to the next
- * whole one second to save timers fireing all over the period
+ * whole one second to save timers firing all over the period
* between integral seconds
*/
if (edac_dev->poll_msec == 1000)
@@ -426,7 +405,7 @@ static void edac_device_workq_function(struct work_struct *work_req)
void edac_device_workq_setup(struct edac_device_ctl_info *edac_dev,
unsigned msec)
{
- debugf0("%s()\n", __func__);
+ edac_dbg(0, "\n");
/* take the arg 'msec' and set it into the control structure
* to used in the time period calculation
@@ -458,6 +437,9 @@ void edac_device_workq_teardown(struct edac_device_ctl_info *edac_dev)
{
int status;
+ if (!edac_dev->edac_check)
+ return;
+
status = cancel_delayed_work(&edac_dev->work);
if (status == 0) {
/* workq instance might be running, wait for it */
@@ -514,7 +496,7 @@ EXPORT_SYMBOL_GPL(edac_device_alloc_index);
*/
int edac_device_add_device(struct edac_device_ctl_info *edac_dev)
{
- debugf0("%s()\n", __func__);
+ edac_dbg(0, "\n");
#ifdef CONFIG_EDAC_DEBUG
if (edac_debug_level >= 3)
@@ -551,12 +533,9 @@ int edac_device_add_device(struct edac_device_ctl_info *edac_dev)
/* Report action taken */
edac_device_printk(edac_dev, KERN_INFO,
- "Giving out device to module '%s' controller "
- "'%s': DEV '%s' (%s)\n",
- edac_dev->mod_name,
- edac_dev->ctl_name,
- edac_dev_name(edac_dev),
- edac_op_state_to_string(edac_dev->op_state));
+ "Giving out device to module %s controller %s: DEV %s (%s)\n",
+ edac_dev->mod_name, edac_dev->ctl_name, edac_dev->dev_name,
+ edac_op_state_to_string(edac_dev->op_state));
mutex_unlock(&device_ctls_mutex);
return 0;
@@ -576,7 +555,7 @@ EXPORT_SYMBOL_GPL(edac_device_add_device);
* Remove sysfs entries for specified edac_device structure and
* then remove edac_device structure from global list
*
- * @pdev:
+ * @dev:
* Pointer to 'struct device' representing edac_device
* structure to remove.
*
@@ -588,7 +567,7 @@ struct edac_device_ctl_info *edac_device_del_device(struct device *dev)
{
struct edac_device_ctl_info *edac_dev;
- debugf0("%s()\n", __func__);
+ edac_dbg(0, "\n");
mutex_lock(&device_ctls_mutex);
@@ -672,7 +651,7 @@ void edac_device_handle_ce(struct edac_device_ctl_info *edac_dev,
block->counters.ce_count++;
}
- /* Propogate the count up the 'totals' tree */
+ /* Propagate the count up the 'totals' tree */
instance->counters.ce_count++;
edac_dev->counters.ce_count++;
@@ -718,7 +697,7 @@ void edac_device_handle_ue(struct edac_device_ctl_info *edac_dev,
block->counters.ue_count++;
}
- /* Propogate the count up the 'totals' tree */
+ /* Propagate the count up the 'totals' tree */
instance->counters.ue_count++;
edac_dev->counters.ue_count++;
diff --git a/drivers/edac/edac_device_sysfs.c b/drivers/edac/edac_device_sysfs.c
index 400de071cab..fb68a06ad68 100644
--- a/drivers/edac/edac_device_sysfs.c
+++ b/drivers/edac/edac_device_sysfs.c
@@ -1,5 +1,5 @@
/*
- * file for managing the edac_device class of devices for EDAC
+ * file for managing the edac_device subsystem of devices for EDAC
*
* (C) 2007 SoftwareBitMaker
*
@@ -202,7 +202,7 @@ static void edac_device_ctrl_master_release(struct kobject *kobj)
{
struct edac_device_ctl_info *edac_dev = to_edacdev(kobj);
- debugf4("%s() control index=%d\n", __func__, edac_dev->dev_idx);
+ edac_dbg(4, "control index=%d\n", edac_dev->dev_idx);
/* decrement the EDAC CORE module ref count */
module_put(edac_dev->owner);
@@ -230,21 +230,21 @@ static struct kobj_type ktype_device_ctrl = {
*/
int edac_device_register_sysfs_main_kobj(struct edac_device_ctl_info *edac_dev)
{
- struct sysdev_class *edac_class;
+ struct bus_type *edac_subsys;
int err;
- debugf1("%s()\n", __func__);
+ edac_dbg(1, "\n");
/* get the /sys/devices/system/edac reference */
- edac_class = edac_get_sysfs_class();
- if (edac_class == NULL) {
- debugf1("%s() no edac_class error\n", __func__);
+ edac_subsys = edac_get_sysfs_subsys();
+ if (edac_subsys == NULL) {
+ edac_dbg(1, "no edac_subsys error\n");
err = -ENODEV;
goto err_out;
}
- /* Point to the 'edac_class' this instance 'reports' to */
- edac_dev->edac_class = edac_class;
+ /* Point to the 'edac_subsys' this instance 'reports' to */
+ edac_dev->edac_subsys = edac_subsys;
/* Init the devices's kobject */
memset(&edac_dev->kobj, 0, sizeof(struct kobject));
@@ -261,11 +261,11 @@ int edac_device_register_sysfs_main_kobj(struct edac_device_ctl_info *edac_dev)
/* register */
err = kobject_init_and_add(&edac_dev->kobj, &ktype_device_ctrl,
- &edac_class->kset.kobj,
+ &edac_subsys->dev_root->kobj,
"%s", edac_dev->name);
if (err) {
- debugf1("%s()Failed to register '.../edac/%s'\n",
- __func__, edac_dev->name);
+ edac_dbg(1, "Failed to register '.../edac/%s'\n",
+ edac_dev->name);
goto err_kobj_reg;
}
kobject_uevent(&edac_dev->kobj, KOBJ_ADD);
@@ -274,8 +274,7 @@ int edac_device_register_sysfs_main_kobj(struct edac_device_ctl_info *edac_dev)
* edac_device_unregister_sysfs_main_kobj() must be used
*/
- debugf4("%s() Registered '.../edac/%s' kobject\n",
- __func__, edac_dev->name);
+ edac_dbg(4, "Registered '.../edac/%s' kobject\n", edac_dev->name);
return 0;
@@ -284,7 +283,7 @@ err_kobj_reg:
module_put(edac_dev->owner);
err_mod_get:
- edac_put_sysfs_class();
+ edac_put_sysfs_subsys();
err_out:
return err;
@@ -296,9 +295,8 @@ err_out:
*/
void edac_device_unregister_sysfs_main_kobj(struct edac_device_ctl_info *dev)
{
- debugf0("%s()\n", __func__);
- debugf4("%s() name of kobject is: %s\n",
- __func__, kobject_name(&dev->kobj));
+ edac_dbg(0, "\n");
+ edac_dbg(4, "name of kobject is: %s\n", kobject_name(&dev->kobj));
/*
* Unregister the edac device's kobject and
@@ -308,7 +306,7 @@ void edac_device_unregister_sysfs_main_kobj(struct edac_device_ctl_info *dev)
* b) 'kfree' the memory
*/
kobject_put(&dev->kobj);
- edac_put_sysfs_class();
+ edac_put_sysfs_subsys();
}
/* edac_dev -> instance information */
@@ -336,7 +334,7 @@ static void edac_device_ctrl_instance_release(struct kobject *kobj)
{
struct edac_device_instance *instance;
- debugf1("%s()\n", __func__);
+ edac_dbg(1, "\n");
/* map from this kobj to the main control struct
* and then dec the main kobj count
@@ -442,7 +440,7 @@ static void edac_device_ctrl_block_release(struct kobject *kobj)
{
struct edac_device_block *block;
- debugf1("%s()\n", __func__);
+ edac_dbg(1, "\n");
/* get the container of the kobj */
block = to_block(kobj);
@@ -524,16 +522,16 @@ static int edac_device_create_block(struct edac_device_ctl_info *edac_dev,
struct edac_dev_sysfs_block_attribute *sysfs_attrib;
struct kobject *main_kobj;
- debugf4("%s() Instance '%s' inst_p=%p block '%s' block_p=%p\n",
- __func__, instance->name, instance, block->name, block);
- debugf4("%s() block kobj=%p block kobj->parent=%p\n",
- __func__, &block->kobj, &block->kobj.parent);
+ edac_dbg(4, "Instance '%s' inst_p=%p block '%s' block_p=%p\n",
+ instance->name, instance, block->name, block);
+ edac_dbg(4, "block kobj=%p block kobj->parent=%p\n",
+ &block->kobj, &block->kobj.parent);
/* init this block's kobject */
memset(&block->kobj, 0, sizeof(struct kobject));
/* bump the main kobject's reference count for this controller
- * and this instance is dependant on the main
+ * and this instance is dependent on the main
*/
main_kobj = kobject_get(&edac_dev->kobj);
if (!main_kobj) {
@@ -546,8 +544,7 @@ static int edac_device_create_block(struct edac_device_ctl_info *edac_dev,
&instance->kobj,
"%s", block->name);
if (err) {
- debugf1("%s() Failed to register instance '%s'\n",
- __func__, block->name);
+ edac_dbg(1, "Failed to register instance '%s'\n", block->name);
kobject_put(main_kobj);
err = -ENODEV;
goto err_out;
@@ -560,11 +557,9 @@ static int edac_device_create_block(struct edac_device_ctl_info *edac_dev,
if (sysfs_attrib && block->nr_attribs) {
for (i = 0; i < block->nr_attribs; i++, sysfs_attrib++) {
- debugf4("%s() creating block attrib='%s' "
- "attrib->%p to kobj=%p\n",
- __func__,
- sysfs_attrib->attr.name,
- sysfs_attrib, &block->kobj);
+ edac_dbg(4, "creating block attrib='%s' attrib->%p to kobj=%p\n",
+ sysfs_attrib->attr.name,
+ sysfs_attrib, &block->kobj);
/* Create each block_attribute file */
err = sysfs_create_file(&block->kobj,
@@ -635,7 +630,7 @@ static int edac_device_create_instance(struct edac_device_ctl_info *edac_dev,
instance->ctl = edac_dev;
/* bump the main kobject's reference count for this controller
- * and this instance is dependant on the main
+ * and this instance is dependent on the main
*/
main_kobj = kobject_get(&edac_dev->kobj);
if (!main_kobj) {
@@ -647,14 +642,14 @@ static int edac_device_create_instance(struct edac_device_ctl_info *edac_dev,
err = kobject_init_and_add(&instance->kobj, &ktype_instance_ctrl,
&edac_dev->kobj, "%s", instance->name);
if (err != 0) {
- debugf2("%s() Failed to register instance '%s'\n",
- __func__, instance->name);
+ edac_dbg(2, "Failed to register instance '%s'\n",
+ instance->name);
kobject_put(main_kobj);
goto err_out;
}
- debugf4("%s() now register '%d' blocks for instance %d\n",
- __func__, instance->nr_blocks, idx);
+ edac_dbg(4, "now register '%d' blocks for instance %d\n",
+ instance->nr_blocks, idx);
/* register all blocks of this instance */
for (i = 0; i < instance->nr_blocks; i++) {
@@ -670,8 +665,8 @@ static int edac_device_create_instance(struct edac_device_ctl_info *edac_dev,
}
kobject_uevent(&instance->kobj, KOBJ_ADD);
- debugf4("%s() Registered instance %d '%s' kobject\n",
- __func__, idx, instance->name);
+ edac_dbg(4, "Registered instance %d '%s' kobject\n",
+ idx, instance->name);
return 0;
@@ -715,7 +710,7 @@ static int edac_device_create_instances(struct edac_device_ctl_info *edac_dev)
int i, j;
int err;
- debugf0("%s()\n", __func__);
+ edac_dbg(0, "\n");
/* iterate over creation of the instances */
for (i = 0; i < edac_dev->nr_instances; i++) {
@@ -817,12 +812,12 @@ int edac_device_create_sysfs(struct edac_device_ctl_info *edac_dev)
int err;
struct kobject *edac_kobj = &edac_dev->kobj;
- debugf0("%s() idx=%d\n", __func__, edac_dev->dev_idx);
+ edac_dbg(0, "idx=%d\n", edac_dev->dev_idx);
/* go create any main attributes callers wants */
err = edac_device_add_main_sysfs_attributes(edac_dev);
if (err) {
- debugf0("%s() failed to add sysfs attribs\n", __func__);
+ edac_dbg(0, "failed to add sysfs attribs\n");
goto err_out;
}
@@ -832,8 +827,7 @@ int edac_device_create_sysfs(struct edac_device_ctl_info *edac_dev)
err = sysfs_create_link(edac_kobj,
&edac_dev->dev->kobj, EDAC_DEVICE_SYMLINK);
if (err) {
- debugf0("%s() sysfs_create_link() returned err= %d\n",
- __func__, err);
+ edac_dbg(0, "sysfs_create_link() returned err= %d\n", err);
goto err_remove_main_attribs;
}
@@ -843,14 +837,13 @@ int edac_device_create_sysfs(struct edac_device_ctl_info *edac_dev)
*/
err = edac_device_create_instances(edac_dev);
if (err) {
- debugf0("%s() edac_device_create_instances() "
- "returned err= %d\n", __func__, err);
+ edac_dbg(0, "edac_device_create_instances() returned err= %d\n",
+ err);
goto err_remove_link;
}
- debugf4("%s() create-instances done, idx=%d\n",
- __func__, edac_dev->dev_idx);
+ edac_dbg(4, "create-instances done, idx=%d\n", edac_dev->dev_idx);
return 0;
@@ -873,7 +866,7 @@ err_out:
*/
void edac_device_remove_sysfs(struct edac_device_ctl_info *edac_dev)
{
- debugf0("%s()\n", __func__);
+ edac_dbg(0, "\n");
/* remove any main attributes for this device */
edac_device_remove_main_sysfs_attributes(edac_dev);
diff --git a/drivers/edac/edac_mc.c b/drivers/edac/edac_mc.c
index ba6586a69cc..2c694b5297c 100644
--- a/drivers/edac/edac_mc.c
+++ b/drivers/edac/edac_mc.c
@@ -25,57 +25,109 @@
#include <linux/jiffies.h>
#include <linux/spinlock.h>
#include <linux/list.h>
-#include <linux/sysdev.h>
#include <linux/ctype.h>
#include <linux/edac.h>
+#include <linux/bitops.h>
#include <asm/uaccess.h>
#include <asm/page.h>
#include <asm/edac.h>
#include "edac_core.h"
#include "edac_module.h"
+#define CREATE_TRACE_POINTS
+#define TRACE_INCLUDE_PATH ../../include/ras
+#include <ras/ras_event.h>
+
/* lock to memory controller's control array */
static DEFINE_MUTEX(mem_ctls_mutex);
static LIST_HEAD(mc_devices);
+/*
+ * Used to lock EDAC MC to just one module, avoiding two drivers e. g.
+ * apei/ghes and i7core_edac to be used at the same time.
+ */
+static void const *edac_mc_owner;
+
+static struct bus_type mc_bus[EDAC_MAX_MCS];
+
+unsigned edac_dimm_info_location(struct dimm_info *dimm, char *buf,
+ unsigned len)
+{
+ struct mem_ctl_info *mci = dimm->mci;
+ int i, n, count = 0;
+ char *p = buf;
+
+ for (i = 0; i < mci->n_layers; i++) {
+ n = snprintf(p, len, "%s %d ",
+ edac_layer_name[mci->layers[i].type],
+ dimm->location[i]);
+ p += n;
+ len -= n;
+ count += n;
+ if (!len)
+ break;
+ }
+
+ return count;
+}
+
#ifdef CONFIG_EDAC_DEBUG
-static void edac_mc_dump_channel(struct channel_info *chan)
+static void edac_mc_dump_channel(struct rank_info *chan)
{
- debugf4("\tchannel = %p\n", chan);
- debugf4("\tchannel->chan_idx = %d\n", chan->chan_idx);
- debugf4("\tchannel->ce_count = %d\n", chan->ce_count);
- debugf4("\tchannel->label = '%s'\n", chan->label);
- debugf4("\tchannel->csrow = %p\n\n", chan->csrow);
+ edac_dbg(4, " channel->chan_idx = %d\n", chan->chan_idx);
+ edac_dbg(4, " channel = %p\n", chan);
+ edac_dbg(4, " channel->csrow = %p\n", chan->csrow);
+ edac_dbg(4, " channel->dimm = %p\n", chan->dimm);
+}
+
+static void edac_mc_dump_dimm(struct dimm_info *dimm, int number)
+{
+ char location[80];
+
+ edac_dimm_info_location(dimm, location, sizeof(location));
+
+ edac_dbg(4, "%s%i: %smapped as virtual row %d, chan %d\n",
+ dimm->mci->csbased ? "rank" : "dimm",
+ number, location, dimm->csrow, dimm->cschannel);
+ edac_dbg(4, " dimm = %p\n", dimm);
+ edac_dbg(4, " dimm->label = '%s'\n", dimm->label);
+ edac_dbg(4, " dimm->nr_pages = 0x%x\n", dimm->nr_pages);
+ edac_dbg(4, " dimm->grain = %d\n", dimm->grain);
+ edac_dbg(4, " dimm->nr_pages = 0x%x\n", dimm->nr_pages);
}
static void edac_mc_dump_csrow(struct csrow_info *csrow)
{
- debugf4("\tcsrow = %p\n", csrow);
- debugf4("\tcsrow->csrow_idx = %d\n", csrow->csrow_idx);
- debugf4("\tcsrow->first_page = 0x%lx\n", csrow->first_page);
- debugf4("\tcsrow->last_page = 0x%lx\n", csrow->last_page);
- debugf4("\tcsrow->page_mask = 0x%lx\n", csrow->page_mask);
- debugf4("\tcsrow->nr_pages = 0x%x\n", csrow->nr_pages);
- debugf4("\tcsrow->nr_channels = %d\n", csrow->nr_channels);
- debugf4("\tcsrow->channels = %p\n", csrow->channels);
- debugf4("\tcsrow->mci = %p\n\n", csrow->mci);
+ edac_dbg(4, "csrow->csrow_idx = %d\n", csrow->csrow_idx);
+ edac_dbg(4, " csrow = %p\n", csrow);
+ edac_dbg(4, " csrow->first_page = 0x%lx\n", csrow->first_page);
+ edac_dbg(4, " csrow->last_page = 0x%lx\n", csrow->last_page);
+ edac_dbg(4, " csrow->page_mask = 0x%lx\n", csrow->page_mask);
+ edac_dbg(4, " csrow->nr_channels = %d\n", csrow->nr_channels);
+ edac_dbg(4, " csrow->channels = %p\n", csrow->channels);
+ edac_dbg(4, " csrow->mci = %p\n", csrow->mci);
}
static void edac_mc_dump_mci(struct mem_ctl_info *mci)
{
- debugf3("\tmci = %p\n", mci);
- debugf3("\tmci->mtype_cap = %lx\n", mci->mtype_cap);
- debugf3("\tmci->edac_ctl_cap = %lx\n", mci->edac_ctl_cap);
- debugf3("\tmci->edac_cap = %lx\n", mci->edac_cap);
- debugf4("\tmci->edac_check = %p\n", mci->edac_check);
- debugf3("\tmci->nr_csrows = %d, csrows = %p\n",
- mci->nr_csrows, mci->csrows);
- debugf3("\tdev = %p\n", mci->dev);
- debugf3("\tmod_name:ctl_name = %s:%s\n", mci->mod_name, mci->ctl_name);
- debugf3("\tpvt_info = %p\n\n", mci->pvt_info);
+ edac_dbg(3, "\tmci = %p\n", mci);
+ edac_dbg(3, "\tmci->mtype_cap = %lx\n", mci->mtype_cap);
+ edac_dbg(3, "\tmci->edac_ctl_cap = %lx\n", mci->edac_ctl_cap);
+ edac_dbg(3, "\tmci->edac_cap = %lx\n", mci->edac_cap);
+ edac_dbg(4, "\tmci->edac_check = %p\n", mci->edac_check);
+ edac_dbg(3, "\tmci->nr_csrows = %d, csrows = %p\n",
+ mci->nr_csrows, mci->csrows);
+ edac_dbg(3, "\tmci->nr_dimms = %d, dimms = %p\n",
+ mci->tot_dimms, mci->dimms);
+ edac_dbg(3, "\tdev = %p\n", mci->pdev);
+ edac_dbg(3, "\tmod_name:ctl_name = %s:%s\n",
+ mci->mod_name, mci->ctl_name);
+ edac_dbg(3, "\tpvt_info = %p\n\n", mci->pvt_info);
}
+#endif /* CONFIG_EDAC_DEBUG */
+
/*
* keep those in sync with the enum mem_type
*/
@@ -100,20 +152,37 @@ const char *edac_mem_types[] = {
};
EXPORT_SYMBOL_GPL(edac_mem_types);
-#endif /* CONFIG_EDAC_DEBUG */
-
-/* 'ptr' points to a possibly unaligned item X such that sizeof(X) is 'size'.
- * Adjust 'ptr' so that its alignment is at least as stringent as what the
- * compiler would provide for X and return the aligned result.
+/**
+ * edac_align_ptr - Prepares the pointer offsets for a single-shot allocation
+ * @p: pointer to a pointer with the memory offset to be used. At
+ * return, this will be incremented to point to the next offset
+ * @size: Size of the data structure to be reserved
+ * @n_elems: Number of elements that should be reserved
*
* If 'size' is a constant, the compiler will optimize this whole function
- * down to either a no-op or the addition of a constant to the value of 'ptr'.
+ * down to either a no-op or the addition of a constant to the value of '*p'.
+ *
+ * The 'p' pointer is absolutely needed to keep the proper advancing
+ * further in memory to the proper offsets when allocating the struct along
+ * with its embedded structs, as edac_device_alloc_ctl_info() does it
+ * above, for example.
+ *
+ * At return, the pointer 'p' will be incremented to be used on a next call
+ * to this function.
*/
-void *edac_align_ptr(void *ptr, unsigned size)
+void *edac_align_ptr(void **p, unsigned size, int n_elems)
{
unsigned align, r;
+ void *ptr = *p;
- /* Here we assume that the alignment of a "long long" is the most
+ *p += size * n_elems;
+
+ /*
+ * 'p' can possibly be an unaligned item X such that sizeof(X) is
+ * 'size'. Adjust 'p' so that its alignment is at least as
+ * stringent as what the compiler would provide for X and return
+ * the aligned result.
+ * Here we assume that the alignment of a "long long" is the most
* stringent alignment that the compiler will ever provide by default.
* As far as I know, this is a reasonable assumption.
*/
@@ -128,19 +197,53 @@ void *edac_align_ptr(void *ptr, unsigned size)
else
return (char *)ptr;
- r = size % align;
+ r = (unsigned long)p % align;
if (r == 0)
return (char *)ptr;
+ *p += align - r;
+
return (void *)(((unsigned long)ptr) + align - r);
}
+static void _edac_mc_free(struct mem_ctl_info *mci)
+{
+ int i, chn, row;
+ struct csrow_info *csr;
+ const unsigned int tot_dimms = mci->tot_dimms;
+ const unsigned int tot_channels = mci->num_cschannel;
+ const unsigned int tot_csrows = mci->nr_csrows;
+
+ if (mci->dimms) {
+ for (i = 0; i < tot_dimms; i++)
+ kfree(mci->dimms[i]);
+ kfree(mci->dimms);
+ }
+ if (mci->csrows) {
+ for (row = 0; row < tot_csrows; row++) {
+ csr = mci->csrows[row];
+ if (csr) {
+ if (csr->channels) {
+ for (chn = 0; chn < tot_channels; chn++)
+ kfree(csr->channels[chn]);
+ kfree(csr->channels);
+ }
+ kfree(csr);
+ }
+ }
+ kfree(mci->csrows);
+ }
+ kfree(mci);
+}
+
/**
- * edac_mc_alloc: Allocate a struct mem_ctl_info structure
- * @size_pvt: size of private storage needed
- * @nr_csrows: Number of CWROWS needed for this MC
- * @nr_chans: Number of channels for the MC
+ * edac_mc_alloc: Allocate and partially fill a struct mem_ctl_info structure
+ * @mc_num: Memory controller number
+ * @n_layers: Number of MC hierarchy layers
+ * layers: Describes each layer as seen by the Memory Controller
+ * @size_pvt: size of private storage needed
+ *
*
* Everything is kmalloc'ed as one big chunk - more efficient.
* Only can be used if all structures have the same lifetime - otherwise
@@ -148,32 +251,75 @@ void *edac_align_ptr(void *ptr, unsigned size)
*
* Use edac_mc_free() to free mc structures allocated by this function.
*
+ * NOTE: drivers handle multi-rank memories in different ways: in some
+ * drivers, one multi-rank memory stick is mapped as one entry, while, in
+ * others, a single multi-rank memory stick would be mapped into several
+ * entries. Currently, this function will allocate multiple struct dimm_info
+ * on such scenarios, as grouping the multiple ranks require drivers change.
+ *
* Returns:
- * NULL allocation failed
- * struct mem_ctl_info pointer
+ * On failure: NULL
+ * On success: struct mem_ctl_info pointer
*/
-struct mem_ctl_info *edac_mc_alloc(unsigned sz_pvt, unsigned nr_csrows,
- unsigned nr_chans, int edac_index)
+struct mem_ctl_info *edac_mc_alloc(unsigned mc_num,
+ unsigned n_layers,
+ struct edac_mc_layer *layers,
+ unsigned sz_pvt)
{
struct mem_ctl_info *mci;
- struct csrow_info *csi, *csrow;
- struct channel_info *chi, *chp, *chan;
- void *pvt;
- unsigned size;
- int row, chn;
- int err;
+ struct edac_mc_layer *layer;
+ struct csrow_info *csr;
+ struct rank_info *chan;
+ struct dimm_info *dimm;
+ u32 *ce_per_layer[EDAC_MAX_LAYERS], *ue_per_layer[EDAC_MAX_LAYERS];
+ unsigned pos[EDAC_MAX_LAYERS];
+ unsigned size, tot_dimms = 1, count = 1;
+ unsigned tot_csrows = 1, tot_channels = 1, tot_errcount = 0;
+ void *pvt, *p, *ptr = NULL;
+ int i, j, row, chn, n, len, off;
+ bool per_rank = false;
+
+ BUG_ON(n_layers > EDAC_MAX_LAYERS || n_layers == 0);
+ /*
+ * Calculate the total amount of dimms and csrows/cschannels while
+ * in the old API emulation mode
+ */
+ for (i = 0; i < n_layers; i++) {
+ tot_dimms *= layers[i].size;
+ if (layers[i].is_virt_csrow)
+ tot_csrows *= layers[i].size;
+ else
+ tot_channels *= layers[i].size;
+
+ if (layers[i].type == EDAC_MC_LAYER_CHIP_SELECT)
+ per_rank = true;
+ }
/* Figure out the offsets of the various items from the start of an mc
* structure. We want the alignment of each item to be at least as
* stringent as what the compiler would provide if we could simply
* hardcode everything into a single struct.
*/
- mci = (struct mem_ctl_info *)0;
- csi = edac_align_ptr(&mci[1], sizeof(*csi));
- chi = edac_align_ptr(&csi[nr_csrows], sizeof(*chi));
- pvt = edac_align_ptr(&chi[nr_chans * nr_csrows], sz_pvt);
+ mci = edac_align_ptr(&ptr, sizeof(*mci), 1);
+ layer = edac_align_ptr(&ptr, sizeof(*layer), n_layers);
+ for (i = 0; i < n_layers; i++) {
+ count *= layers[i].size;
+ edac_dbg(4, "errcount layer %d size %d\n", i, count);
+ ce_per_layer[i] = edac_align_ptr(&ptr, sizeof(u32), count);
+ ue_per_layer[i] = edac_align_ptr(&ptr, sizeof(u32), count);
+ tot_errcount += 2 * count;
+ }
+
+ edac_dbg(4, "allocating %d error counters\n", tot_errcount);
+ pvt = edac_align_ptr(&ptr, sz_pvt, 1);
size = ((unsigned long)pvt) + sz_pvt;
+ edac_dbg(1, "allocating %u bytes for mci data (%d %s, %d csrows/channels)\n",
+ size,
+ tot_dimms,
+ per_rank ? "ranks" : "dimms",
+ tot_csrows * tot_channels);
+
mci = kzalloc(size, GFP_KERNEL);
if (mci == NULL)
return NULL;
@@ -181,50 +327,134 @@ struct mem_ctl_info *edac_mc_alloc(unsigned sz_pvt, unsigned nr_csrows,
/* Adjust pointers so they point within the memory we just allocated
* rather than an imaginary chunk of memory located at address 0.
*/
- csi = (struct csrow_info *)(((char *)mci) + ((unsigned long)csi));
- chi = (struct channel_info *)(((char *)mci) + ((unsigned long)chi));
+ layer = (struct edac_mc_layer *)(((char *)mci) + ((unsigned long)layer));
+ for (i = 0; i < n_layers; i++) {
+ mci->ce_per_layer[i] = (u32 *)((char *)mci + ((unsigned long)ce_per_layer[i]));
+ mci->ue_per_layer[i] = (u32 *)((char *)mci + ((unsigned long)ue_per_layer[i]));
+ }
pvt = sz_pvt ? (((char *)mci) + ((unsigned long)pvt)) : NULL;
/* setup index and various internal pointers */
- mci->mc_idx = edac_index;
- mci->csrows = csi;
+ mci->mc_idx = mc_num;
+ mci->tot_dimms = tot_dimms;
mci->pvt_info = pvt;
- mci->nr_csrows = nr_csrows;
-
- for (row = 0; row < nr_csrows; row++) {
- csrow = &csi[row];
- csrow->csrow_idx = row;
- csrow->mci = mci;
- csrow->nr_channels = nr_chans;
- chp = &chi[row * nr_chans];
- csrow->channels = chp;
-
- for (chn = 0; chn < nr_chans; chn++) {
- chan = &chp[chn];
+ mci->n_layers = n_layers;
+ mci->layers = layer;
+ memcpy(mci->layers, layers, sizeof(*layer) * n_layers);
+ mci->nr_csrows = tot_csrows;
+ mci->num_cschannel = tot_channels;
+ mci->csbased = per_rank;
+
+ /*
+ * Alocate and fill the csrow/channels structs
+ */
+ mci->csrows = kcalloc(tot_csrows, sizeof(*mci->csrows), GFP_KERNEL);
+ if (!mci->csrows)
+ goto error;
+ for (row = 0; row < tot_csrows; row++) {
+ csr = kzalloc(sizeof(**mci->csrows), GFP_KERNEL);
+ if (!csr)
+ goto error;
+ mci->csrows[row] = csr;
+ csr->csrow_idx = row;
+ csr->mci = mci;
+ csr->nr_channels = tot_channels;
+ csr->channels = kcalloc(tot_channels, sizeof(*csr->channels),
+ GFP_KERNEL);
+ if (!csr->channels)
+ goto error;
+
+ for (chn = 0; chn < tot_channels; chn++) {
+ chan = kzalloc(sizeof(**csr->channels), GFP_KERNEL);
+ if (!chan)
+ goto error;
+ csr->channels[chn] = chan;
chan->chan_idx = chn;
- chan->csrow = csrow;
+ chan->csrow = csr;
}
}
- mci->op_state = OP_ALLOC;
- INIT_LIST_HEAD(&mci->grp_kobj_list);
-
/*
- * Initialize the 'root' kobj for the edac_mc controller
+ * Allocate and fill the dimm structs
*/
- err = edac_mc_register_sysfs_main_kobj(mci);
- if (err) {
- kfree(mci);
- return NULL;
+ mci->dimms = kcalloc(tot_dimms, sizeof(*mci->dimms), GFP_KERNEL);
+ if (!mci->dimms)
+ goto error;
+
+ memset(&pos, 0, sizeof(pos));
+ row = 0;
+ chn = 0;
+ for (i = 0; i < tot_dimms; i++) {
+ chan = mci->csrows[row]->channels[chn];
+ off = EDAC_DIMM_OFF(layer, n_layers, pos[0], pos[1], pos[2]);
+ if (off < 0 || off >= tot_dimms) {
+ edac_mc_printk(mci, KERN_ERR, "EDAC core bug: EDAC_DIMM_OFF is trying to do an illegal data access\n");
+ goto error;
+ }
+
+ dimm = kzalloc(sizeof(**mci->dimms), GFP_KERNEL);
+ if (!dimm)
+ goto error;
+ mci->dimms[off] = dimm;
+ dimm->mci = mci;
+
+ /*
+ * Copy DIMM location and initialize it.
+ */
+ len = sizeof(dimm->label);
+ p = dimm->label;
+ n = snprintf(p, len, "mc#%u", mc_num);
+ p += n;
+ len -= n;
+ for (j = 0; j < n_layers; j++) {
+ n = snprintf(p, len, "%s#%u",
+ edac_layer_name[layers[j].type],
+ pos[j]);
+ p += n;
+ len -= n;
+ dimm->location[j] = pos[j];
+
+ if (len <= 0)
+ break;
+ }
+
+ /* Link it to the csrows old API data */
+ chan->dimm = dimm;
+ dimm->csrow = row;
+ dimm->cschannel = chn;
+
+ /* Increment csrow location */
+ if (layers[0].is_virt_csrow) {
+ chn++;
+ if (chn == tot_channels) {
+ chn = 0;
+ row++;
+ }
+ } else {
+ row++;
+ if (row == tot_csrows) {
+ row = 0;
+ chn++;
+ }
+ }
+
+ /* Increment dimm location */
+ for (j = n_layers - 1; j >= 0; j--) {
+ pos[j]++;
+ if (pos[j] < layers[j].size)
+ break;
+ pos[j] = 0;
+ }
}
- /* at this point, the root kobj is valid, and in order to
- * 'free' the object, then the function:
- * edac_mc_unregister_sysfs_main_kobj() must be called
- * which will perform kobj unregistration and the actual free
- * will occur during the kobject callback operation
- */
+ mci->op_state = OP_ALLOC;
+
return mci;
+
+error:
+ _edac_mc_free(mci);
+
+ return NULL;
}
EXPORT_SYMBOL_GPL(edac_mc_alloc);
@@ -235,12 +465,18 @@ EXPORT_SYMBOL_GPL(edac_mc_alloc);
*/
void edac_mc_free(struct mem_ctl_info *mci)
{
- debugf1("%s()\n", __func__);
+ edac_dbg(1, "\n");
- edac_mc_unregister_sysfs_main_kobj(mci);
+ /* If we're not yet registered with sysfs free only what was allocated
+ * in edac_mc_alloc().
+ */
+ if (!device_is_registered(&mci->dev)) {
+ _edac_mc_free(mci);
+ return;
+ }
- /* free the mci instance memory here */
- kfree(mci);
+ /* the mci instance is freed here, when the sysfs object is dropped */
+ edac_unregister_sysfs(mci);
}
EXPORT_SYMBOL_GPL(edac_mc_free);
@@ -257,12 +493,12 @@ struct mem_ctl_info *find_mci_by_dev(struct device *dev)
struct mem_ctl_info *mci;
struct list_head *item;
- debugf3("%s()\n", __func__);
+ edac_dbg(3, "\n");
list_for_each(item, &mc_devices) {
mci = list_entry(item, struct mem_ctl_info, link);
- if (mci->dev == dev)
+ if (mci->pdev == dev)
return mci;
}
@@ -323,16 +559,19 @@ static void edac_mc_workq_function(struct work_struct *work_req)
*
* called with the mem_ctls_mutex held
*/
-static void edac_mc_workq_setup(struct mem_ctl_info *mci, unsigned msec)
+static void edac_mc_workq_setup(struct mem_ctl_info *mci, unsigned msec,
+ bool init)
{
- debugf0("%s()\n", __func__);
+ edac_dbg(0, "\n");
/* if this instance is not in the POLL state, then simply return */
if (mci->op_state != OP_RUNNING_POLL)
return;
- INIT_DELAYED_WORK(&mci->work, edac_mc_workq_function);
- queue_delayed_work(edac_workqueue, &mci->work, msecs_to_jiffies(msec));
+ if (init)
+ INIT_DELAYED_WORK(&mci->work, edac_mc_workq_function);
+
+ mod_delayed_work(edac_workqueue, &mci->work, msecs_to_jiffies(msec));
}
/*
@@ -352,8 +591,7 @@ static void edac_mc_workq_teardown(struct mem_ctl_info *mci)
status = cancel_delayed_work(&mci->work);
if (status == 0) {
- debugf0("%s() not canceled, flush the queue\n",
- __func__);
+ edac_dbg(0, "not canceled, flush the queue\n");
/* workq instance might be running, wait for it */
flush_workqueue(edac_workqueue);
@@ -366,32 +604,17 @@ static void edac_mc_workq_teardown(struct mem_ctl_info *mci)
* user space has updated our poll period value, need to
* reset our workq delays
*/
-void edac_mc_reset_delay_period(int value)
+void edac_mc_reset_delay_period(unsigned long value)
{
struct mem_ctl_info *mci;
struct list_head *item;
mutex_lock(&mem_ctls_mutex);
- /* scan the list and turn off all workq timers, doing so under lock
- */
list_for_each(item, &mc_devices) {
mci = list_entry(item, struct mem_ctl_info, link);
- if (mci->op_state == OP_RUNNING_POLL)
- cancel_delayed_work(&mci->work);
- }
-
- mutex_unlock(&mem_ctls_mutex);
-
-
- /* re-walk the list, and reset the poll delay */
- mutex_lock(&mem_ctls_mutex);
-
- list_for_each(item, &mc_devices) {
- mci = list_entry(item, struct mem_ctl_info, link);
-
- edac_mc_workq_setup(mci, (unsigned long) value);
+ edac_mc_workq_setup(mci, value, false);
}
mutex_unlock(&mem_ctls_mutex);
@@ -414,7 +637,7 @@ static int add_mc_to_global_list(struct mem_ctl_info *mci)
insert_before = &mc_devices;
- p = find_mci_by_dev(mci->dev);
+ p = find_mci_by_dev(mci->pdev);
if (unlikely(p != NULL))
goto fail0;
@@ -436,7 +659,7 @@ static int add_mc_to_global_list(struct mem_ctl_info *mci)
fail0:
edac_printk(KERN_WARNING, EDAC_MC,
- "%s (%s) %s %s already assigned %d\n", dev_name(p->dev),
+ "%s (%s) %s %s already assigned %d\n", dev_name(p->pdev),
edac_dev_name(mci), p->mod_name, p->ctl_name, p->mc_idx);
return 1;
@@ -447,20 +670,18 @@ fail1:
return 1;
}
-static void complete_mc_list_del(struct rcu_head *head)
+static int del_mc_from_global_list(struct mem_ctl_info *mci)
{
- struct mem_ctl_info *mci;
+ int handlers = atomic_dec_return(&edac_handlers);
+ list_del_rcu(&mci->link);
- mci = container_of(head, struct mem_ctl_info, rcu);
+ /* these are for safe removal of devices from global list while
+ * NMI handlers may be traversing list
+ */
+ synchronize_rcu();
INIT_LIST_HEAD(&mci->link);
-}
-static void del_mc_from_global_list(struct mem_ctl_info *mci)
-{
- atomic_dec(&edac_handlers);
- list_del_rcu(&mci->link);
- call_rcu(&mci->rcu, complete_mc_list_del);
- rcu_barrier();
+ return handlers;
}
/**
@@ -495,7 +716,6 @@ EXPORT_SYMBOL(edac_mc_find);
* edac_mc_add_mc: Insert the 'mci' structure into the mci global list and
* create sysfs entries associated with mci structure
* @mci: pointer to the mci structure to be added to the list
- * @mc_idx: A unique numeric identifier to be assigned to the 'mci' structure.
*
* Return:
* 0 Success
@@ -505,7 +725,13 @@ EXPORT_SYMBOL(edac_mc_find);
/* FIXME - should a warning be printed if no error detection? correction? */
int edac_mc_add_mc(struct mem_ctl_info *mci)
{
- debugf0("%s()\n", __func__);
+ int ret = -EINVAL;
+ edac_dbg(0, "\n");
+
+ if (mci->mc_idx >= EDAC_MAX_MCS) {
+ pr_warn_once("Too many memory controllers: %d\n", mci->mc_idx);
+ return -ENODEV;
+ }
#ifdef CONFIG_EDAC_DEBUG
if (edac_debug_level >= 3)
@@ -515,23 +741,39 @@ int edac_mc_add_mc(struct mem_ctl_info *mci)
int i;
for (i = 0; i < mci->nr_csrows; i++) {
+ struct csrow_info *csrow = mci->csrows[i];
+ u32 nr_pages = 0;
int j;
- edac_mc_dump_csrow(&mci->csrows[i]);
- for (j = 0; j < mci->csrows[i].nr_channels; j++)
- edac_mc_dump_channel(&mci->csrows[i].
- channels[j]);
+ for (j = 0; j < csrow->nr_channels; j++)
+ nr_pages += csrow->channels[j]->dimm->nr_pages;
+ if (!nr_pages)
+ continue;
+ edac_mc_dump_csrow(csrow);
+ for (j = 0; j < csrow->nr_channels; j++)
+ if (csrow->channels[j]->dimm->nr_pages)
+ edac_mc_dump_channel(csrow->channels[j]);
}
+ for (i = 0; i < mci->tot_dimms; i++)
+ if (mci->dimms[i]->nr_pages)
+ edac_mc_dump_dimm(mci->dimms[i], i);
}
#endif
mutex_lock(&mem_ctls_mutex);
+ if (edac_mc_owner && edac_mc_owner != mci->mod_name) {
+ ret = -EPERM;
+ goto fail0;
+ }
+
if (add_mc_to_global_list(mci))
goto fail0;
/* set load time so that error rate can be tracked */
mci->start_time = jiffies;
+ mci->bus = &mc_bus[mci->mc_idx];
+
if (edac_create_sysfs_mci_device(mci)) {
edac_mc_printk(mci, KERN_WARNING,
"failed to create sysfs device\n");
@@ -543,14 +785,18 @@ int edac_mc_add_mc(struct mem_ctl_info *mci)
/* This instance is NOW RUNNING */
mci->op_state = OP_RUNNING_POLL;
- edac_mc_workq_setup(mci, edac_mc_get_poll_msec());
+ edac_mc_workq_setup(mci, edac_mc_get_poll_msec(), true);
} else {
mci->op_state = OP_RUNNING_INTERRUPT;
}
/* Report action taken */
- edac_mc_printk(mci, KERN_INFO, "Giving out device to '%s' '%s':"
- " DEV %s\n", mci->mod_name, mci->ctl_name, edac_dev_name(mci));
+ edac_mc_printk(mci, KERN_INFO,
+ "Giving out device to module %s controller %s: DEV %s (%s)\n",
+ mci->mod_name, mci->ctl_name, mci->dev_name,
+ edac_op_state_to_string(mci->op_state));
+
+ edac_mc_owner = mci->mod_name;
mutex_unlock(&mem_ctls_mutex);
return 0;
@@ -560,7 +806,7 @@ fail1:
fail0:
mutex_unlock(&mem_ctls_mutex);
- return 1;
+ return ret;
}
EXPORT_SYMBOL_GPL(edac_mc_add_mc);
@@ -575,7 +821,7 @@ struct mem_ctl_info *edac_mc_del_mc(struct device *dev)
{
struct mem_ctl_info *mci;
- debugf0("%s()\n", __func__);
+ edac_dbg(0, "\n");
mutex_lock(&mem_ctls_mutex);
@@ -586,14 +832,17 @@ struct mem_ctl_info *edac_mc_del_mc(struct device *dev)
return NULL;
}
- /* marking MCI offline */
- mci->op_state = OP_OFFLINE;
-
- del_mc_from_global_list(mci);
+ if (!del_mc_from_global_list(mci))
+ edac_mc_owner = NULL;
mutex_unlock(&mem_ctls_mutex);
- /* flush workq processes and remove sysfs */
+ /* flush workq processes */
edac_mc_workq_teardown(mci);
+
+ /* marking MCI offline */
+ mci->op_state = OP_OFFLINE;
+
+ /* remove from sysfs */
edac_remove_sysfs_mci_device(mci);
edac_printk(KERN_INFO, EDAC_MC,
@@ -611,7 +860,7 @@ static void edac_mc_scrub_block(unsigned long page, unsigned long offset,
void *virt_addr;
unsigned long flags = 0;
- debugf3("%s()\n", __func__);
+ edac_dbg(3, "\n");
/* ECC error page was not in our memory. Ignore it. */
if (!pfn_valid(page))
@@ -623,13 +872,13 @@ static void edac_mc_scrub_block(unsigned long page, unsigned long offset,
if (PageHighMem(pg))
local_irq_save(flags);
- virt_addr = kmap_atomic(pg, KM_BOUNCE_READ);
+ virt_addr = kmap_atomic(pg);
/* Perform architecture specific atomic scrub operation */
atomic_scrub(virt_addr + offset, size);
/* Unmap and complete */
- kunmap_atomic(virt_addr, KM_BOUNCE_READ);
+ kunmap_atomic(virt_addr);
if (PageHighMem(pg))
local_irq_restore(flags);
@@ -638,22 +887,26 @@ static void edac_mc_scrub_block(unsigned long page, unsigned long offset,
/* FIXME - should return -1 */
int edac_mc_find_csrow_by_page(struct mem_ctl_info *mci, unsigned long page)
{
- struct csrow_info *csrows = mci->csrows;
- int row, i;
+ struct csrow_info **csrows = mci->csrows;
+ int row, i, j, n;
- debugf1("MC%d: %s(): 0x%lx\n", mci->mc_idx, __func__, page);
+ edac_dbg(1, "MC%d: 0x%lx\n", mci->mc_idx, page);
row = -1;
for (i = 0; i < mci->nr_csrows; i++) {
- struct csrow_info *csrow = &csrows[i];
-
- if (csrow->nr_pages == 0)
+ struct csrow_info *csrow = csrows[i];
+ n = 0;
+ for (j = 0; j < csrow->nr_channels; j++) {
+ struct dimm_info *dimm = csrow->channels[j]->dimm;
+ n += dimm->nr_pages;
+ }
+ if (n == 0)
continue;
- debugf3("MC%d: %s(): first(0x%lx) page(0x%lx) last(0x%lx) "
- "mask(0x%lx)\n", mci->mc_idx, __func__,
- csrow->first_page, page, csrow->last_page,
- csrow->page_mask);
+ edac_dbg(3, "MC%d: first(0x%lx) page(0x%lx) last(0x%lx) mask(0x%lx)\n",
+ mci->mc_idx,
+ csrow->first_page, page, csrow->last_page,
+ csrow->page_mask);
if ((page >= csrow->first_page) &&
(page <= csrow->last_page) &&
@@ -673,249 +926,381 @@ int edac_mc_find_csrow_by_page(struct mem_ctl_info *mci, unsigned long page)
}
EXPORT_SYMBOL_GPL(edac_mc_find_csrow_by_page);
-/* FIXME - setable log (warning/emerg) levels */
-/* FIXME - integrate with evlog: http://evlog.sourceforge.net/ */
-void edac_mc_handle_ce(struct mem_ctl_info *mci,
- unsigned long page_frame_number,
- unsigned long offset_in_page, unsigned long syndrome,
- int row, int channel, const char *msg)
+const char *edac_layer_name[] = {
+ [EDAC_MC_LAYER_BRANCH] = "branch",
+ [EDAC_MC_LAYER_CHANNEL] = "channel",
+ [EDAC_MC_LAYER_SLOT] = "slot",
+ [EDAC_MC_LAYER_CHIP_SELECT] = "csrow",
+ [EDAC_MC_LAYER_ALL_MEM] = "memory",
+};
+EXPORT_SYMBOL_GPL(edac_layer_name);
+
+static void edac_inc_ce_error(struct mem_ctl_info *mci,
+ bool enable_per_layer_report,
+ const int pos[EDAC_MAX_LAYERS],
+ const u16 count)
{
- unsigned long remapped_page;
+ int i, index = 0;
- debugf3("MC%d: %s()\n", mci->mc_idx, __func__);
+ mci->ce_mc += count;
- /* FIXME - maybe make panic on INTERNAL ERROR an option */
- if (row >= mci->nr_csrows || row < 0) {
- /* something is wrong */
- edac_mc_printk(mci, KERN_ERR,
- "INTERNAL ERROR: row out of range "
- "(%d >= %d)\n", row, mci->nr_csrows);
- edac_mc_handle_ce_no_info(mci, "INTERNAL ERROR");
+ if (!enable_per_layer_report) {
+ mci->ce_noinfo_count += count;
return;
}
- if (channel >= mci->csrows[row].nr_channels || channel < 0) {
- /* something is wrong */
- edac_mc_printk(mci, KERN_ERR,
- "INTERNAL ERROR: channel out of range "
- "(%d >= %d)\n", channel,
- mci->csrows[row].nr_channels);
- edac_mc_handle_ce_no_info(mci, "INTERNAL ERROR");
+ for (i = 0; i < mci->n_layers; i++) {
+ if (pos[i] < 0)
+ break;
+ index += pos[i];
+ mci->ce_per_layer[i][index] += count;
+
+ if (i < mci->n_layers - 1)
+ index *= mci->layers[i + 1].size;
+ }
+}
+
+static void edac_inc_ue_error(struct mem_ctl_info *mci,
+ bool enable_per_layer_report,
+ const int pos[EDAC_MAX_LAYERS],
+ const u16 count)
+{
+ int i, index = 0;
+
+ mci->ue_mc += count;
+
+ if (!enable_per_layer_report) {
+ mci->ce_noinfo_count += count;
return;
}
- if (edac_mc_get_log_ce())
- /* FIXME - put in DIMM location */
- edac_mc_printk(mci, KERN_WARNING,
- "CE page 0x%lx, offset 0x%lx, grain %d, syndrome "
- "0x%lx, row %d, channel %d, label \"%s\": %s\n",
- page_frame_number, offset_in_page,
- mci->csrows[row].grain, syndrome, row, channel,
- mci->csrows[row].channels[channel].label, msg);
+ for (i = 0; i < mci->n_layers; i++) {
+ if (pos[i] < 0)
+ break;
+ index += pos[i];
+ mci->ue_per_layer[i][index] += count;
- mci->ce_count++;
- mci->csrows[row].ce_count++;
- mci->csrows[row].channels[channel].ce_count++;
+ if (i < mci->n_layers - 1)
+ index *= mci->layers[i + 1].size;
+ }
+}
- if (mci->scrub_mode & SCRUB_SW_SRC) {
+static void edac_ce_error(struct mem_ctl_info *mci,
+ const u16 error_count,
+ const int pos[EDAC_MAX_LAYERS],
+ const char *msg,
+ const char *location,
+ const char *label,
+ const char *detail,
+ const char *other_detail,
+ const bool enable_per_layer_report,
+ const unsigned long page_frame_number,
+ const unsigned long offset_in_page,
+ long grain)
+{
+ unsigned long remapped_page;
+ char *msg_aux = "";
+
+ if (*msg)
+ msg_aux = " ";
+
+ if (edac_mc_get_log_ce()) {
+ if (other_detail && *other_detail)
+ edac_mc_printk(mci, KERN_WARNING,
+ "%d CE %s%son %s (%s %s - %s)\n",
+ error_count, msg, msg_aux, label,
+ location, detail, other_detail);
+ else
+ edac_mc_printk(mci, KERN_WARNING,
+ "%d CE %s%son %s (%s %s)\n",
+ error_count, msg, msg_aux, label,
+ location, detail);
+ }
+ edac_inc_ce_error(mci, enable_per_layer_report, pos, error_count);
+
+ if (mci->scrub_mode == SCRUB_SW_SRC) {
/*
- * Some MC's can remap memory so that it is still available
- * at a different address when PCI devices map into memory.
- * MC's that can't do this lose the memory where PCI devices
- * are mapped. This mapping is MC dependant and so we call
- * back into the MC driver for it to map the MC page to
- * a physical (CPU) page which can then be mapped to a virtual
- * page - which can then be scrubbed.
- */
+ * Some memory controllers (called MCs below) can remap
+ * memory so that it is still available at a different
+ * address when PCI devices map into memory.
+ * MC's that can't do this, lose the memory where PCI
+ * devices are mapped. This mapping is MC-dependent
+ * and so we call back into the MC driver for it to
+ * map the MC page to a physical (CPU) page which can
+ * then be mapped to a virtual page - which can then
+ * be scrubbed.
+ */
remapped_page = mci->ctl_page_to_phys ?
mci->ctl_page_to_phys(mci, page_frame_number) :
page_frame_number;
- edac_mc_scrub_block(remapped_page, offset_in_page,
- mci->csrows[row].grain);
+ edac_mc_scrub_block(remapped_page,
+ offset_in_page, grain);
}
}
-EXPORT_SYMBOL_GPL(edac_mc_handle_ce);
-void edac_mc_handle_ce_no_info(struct mem_ctl_info *mci, const char *msg)
+static void edac_ue_error(struct mem_ctl_info *mci,
+ const u16 error_count,
+ const int pos[EDAC_MAX_LAYERS],
+ const char *msg,
+ const char *location,
+ const char *label,
+ const char *detail,
+ const char *other_detail,
+ const bool enable_per_layer_report)
{
- if (edac_mc_get_log_ce())
- edac_mc_printk(mci, KERN_WARNING,
- "CE - no information available: %s\n", msg);
-
- mci->ce_noinfo_count++;
- mci->ce_count++;
-}
-EXPORT_SYMBOL_GPL(edac_mc_handle_ce_no_info);
-
-void edac_mc_handle_ue(struct mem_ctl_info *mci,
- unsigned long page_frame_number,
- unsigned long offset_in_page, int row, const char *msg)
-{
- int len = EDAC_MC_LABEL_LEN * 4;
- char labels[len + 1];
- char *pos = labels;
- int chan;
- int chars;
-
- debugf3("MC%d: %s()\n", mci->mc_idx, __func__);
-
- /* FIXME - maybe make panic on INTERNAL ERROR an option */
- if (row >= mci->nr_csrows || row < 0) {
- /* something is wrong */
- edac_mc_printk(mci, KERN_ERR,
- "INTERNAL ERROR: row out of range "
- "(%d >= %d)\n", row, mci->nr_csrows);
- edac_mc_handle_ue_no_info(mci, "INTERNAL ERROR");
- return;
+ char *msg_aux = "";
+
+ if (*msg)
+ msg_aux = " ";
+
+ if (edac_mc_get_log_ue()) {
+ if (other_detail && *other_detail)
+ edac_mc_printk(mci, KERN_WARNING,
+ "%d UE %s%son %s (%s %s - %s)\n",
+ error_count, msg, msg_aux, label,
+ location, detail, other_detail);
+ else
+ edac_mc_printk(mci, KERN_WARNING,
+ "%d UE %s%son %s (%s %s)\n",
+ error_count, msg, msg_aux, label,
+ location, detail);
}
- chars = snprintf(pos, len + 1, "%s",
- mci->csrows[row].channels[0].label);
- len -= chars;
- pos += chars;
-
- for (chan = 1; (chan < mci->csrows[row].nr_channels) && (len > 0);
- chan++) {
- chars = snprintf(pos, len + 1, ":%s",
- mci->csrows[row].channels[chan].label);
- len -= chars;
- pos += chars;
+ if (edac_mc_get_panic_on_ue()) {
+ if (other_detail && *other_detail)
+ panic("UE %s%son %s (%s%s - %s)\n",
+ msg, msg_aux, label, location, detail, other_detail);
+ else
+ panic("UE %s%son %s (%s%s)\n",
+ msg, msg_aux, label, location, detail);
}
- if (edac_mc_get_log_ue())
- edac_mc_printk(mci, KERN_EMERG,
- "UE page 0x%lx, offset 0x%lx, grain %d, row %d, "
- "labels \"%s\": %s\n", page_frame_number,
- offset_in_page, mci->csrows[row].grain, row,
- labels, msg);
-
- if (edac_mc_get_panic_on_ue())
- panic("EDAC MC%d: UE page 0x%lx, offset 0x%lx, grain %d, "
- "row %d, labels \"%s\": %s\n", mci->mc_idx,
- page_frame_number, offset_in_page,
- mci->csrows[row].grain, row, labels, msg);
-
- mci->ue_count++;
- mci->csrows[row].ue_count++;
+ edac_inc_ue_error(mci, enable_per_layer_report, pos, error_count);
}
-EXPORT_SYMBOL_GPL(edac_mc_handle_ue);
-void edac_mc_handle_ue_no_info(struct mem_ctl_info *mci, const char *msg)
+/**
+ * edac_raw_mc_handle_error - reports a memory event to userspace without doing
+ * anything to discover the error location
+ *
+ * @type: severity of the error (CE/UE/Fatal)
+ * @mci: a struct mem_ctl_info pointer
+ * @e: error description
+ *
+ * This raw function is used internally by edac_mc_handle_error(). It should
+ * only be called directly when the hardware error come directly from BIOS,
+ * like in the case of APEI GHES driver.
+ */
+void edac_raw_mc_handle_error(const enum hw_event_mc_err_type type,
+ struct mem_ctl_info *mci,
+ struct edac_raw_error_desc *e)
{
- if (edac_mc_get_panic_on_ue())
- panic("EDAC MC%d: Uncorrected Error", mci->mc_idx);
+ char detail[80];
+ int pos[EDAC_MAX_LAYERS] = { e->top_layer, e->mid_layer, e->low_layer };
+
+ /* Memory type dependent details about the error */
+ if (type == HW_EVENT_ERR_CORRECTED) {
+ snprintf(detail, sizeof(detail),
+ "page:0x%lx offset:0x%lx grain:%ld syndrome:0x%lx",
+ e->page_frame_number, e->offset_in_page,
+ e->grain, e->syndrome);
+ edac_ce_error(mci, e->error_count, pos, e->msg, e->location, e->label,
+ detail, e->other_detail, e->enable_per_layer_report,
+ e->page_frame_number, e->offset_in_page, e->grain);
+ } else {
+ snprintf(detail, sizeof(detail),
+ "page:0x%lx offset:0x%lx grain:%ld",
+ e->page_frame_number, e->offset_in_page, e->grain);
+
+ edac_ue_error(mci, e->error_count, pos, e->msg, e->location, e->label,
+ detail, e->other_detail, e->enable_per_layer_report);
+ }
+
- if (edac_mc_get_log_ue())
- edac_mc_printk(mci, KERN_WARNING,
- "UE - no information available: %s\n", msg);
- mci->ue_noinfo_count++;
- mci->ue_count++;
}
-EXPORT_SYMBOL_GPL(edac_mc_handle_ue_no_info);
+EXPORT_SYMBOL_GPL(edac_raw_mc_handle_error);
-/*************************************************************
- * On Fully Buffered DIMM modules, this help function is
- * called to process UE events
+/**
+ * edac_mc_handle_error - reports a memory event to userspace
+ *
+ * @type: severity of the error (CE/UE/Fatal)
+ * @mci: a struct mem_ctl_info pointer
+ * @error_count: Number of errors of the same type
+ * @page_frame_number: mem page where the error occurred
+ * @offset_in_page: offset of the error inside the page
+ * @syndrome: ECC syndrome
+ * @top_layer: Memory layer[0] position
+ * @mid_layer: Memory layer[1] position
+ * @low_layer: Memory layer[2] position
+ * @msg: Message meaningful to the end users that
+ * explains the event
+ * @other_detail: Technical details about the event that
+ * may help hardware manufacturers and
+ * EDAC developers to analyse the event
*/
-void edac_mc_handle_fbd_ue(struct mem_ctl_info *mci,
- unsigned int csrow,
- unsigned int channela,
- unsigned int channelb, char *msg)
+void edac_mc_handle_error(const enum hw_event_mc_err_type type,
+ struct mem_ctl_info *mci,
+ const u16 error_count,
+ const unsigned long page_frame_number,
+ const unsigned long offset_in_page,
+ const unsigned long syndrome,
+ const int top_layer,
+ const int mid_layer,
+ const int low_layer,
+ const char *msg,
+ const char *other_detail)
{
- int len = EDAC_MC_LABEL_LEN * 4;
- char labels[len + 1];
- char *pos = labels;
- int chars;
+ char *p;
+ int row = -1, chan = -1;
+ int pos[EDAC_MAX_LAYERS] = { top_layer, mid_layer, low_layer };
+ int i, n_labels = 0;
+ u8 grain_bits;
+ struct edac_raw_error_desc *e = &mci->error_desc;
+
+ edac_dbg(3, "MC%d\n", mci->mc_idx);
+
+ /* Fills the error report buffer */
+ memset(e, 0, sizeof (*e));
+ e->error_count = error_count;
+ e->top_layer = top_layer;
+ e->mid_layer = mid_layer;
+ e->low_layer = low_layer;
+ e->page_frame_number = page_frame_number;
+ e->offset_in_page = offset_in_page;
+ e->syndrome = syndrome;
+ e->msg = msg;
+ e->other_detail = other_detail;
- if (csrow >= mci->nr_csrows) {
- /* something is wrong */
- edac_mc_printk(mci, KERN_ERR,
- "INTERNAL ERROR: row out of range (%d >= %d)\n",
- csrow, mci->nr_csrows);
- edac_mc_handle_ue_no_info(mci, "INTERNAL ERROR");
- return;
+ /*
+ * Check if the event report is consistent and if the memory
+ * location is known. If it is known, enable_per_layer_report will be
+ * true, the DIMM(s) label info will be filled and the per-layer
+ * error counters will be incremented.
+ */
+ for (i = 0; i < mci->n_layers; i++) {
+ if (pos[i] >= (int)mci->layers[i].size) {
+
+ edac_mc_printk(mci, KERN_ERR,
+ "INTERNAL ERROR: %s value is out of range (%d >= %d)\n",
+ edac_layer_name[mci->layers[i].type],
+ pos[i], mci->layers[i].size);
+ /*
+ * Instead of just returning it, let's use what's
+ * known about the error. The increment routines and
+ * the DIMM filter logic will do the right thing by
+ * pointing the likely damaged DIMMs.
+ */
+ pos[i] = -1;
+ }
+ if (pos[i] >= 0)
+ e->enable_per_layer_report = true;
}
- if (channela >= mci->csrows[csrow].nr_channels) {
- /* something is wrong */
- edac_mc_printk(mci, KERN_ERR,
- "INTERNAL ERROR: channel-a out of range "
- "(%d >= %d)\n",
- channela, mci->csrows[csrow].nr_channels);
- edac_mc_handle_ue_no_info(mci, "INTERNAL ERROR");
- return;
- }
+ /*
+ * Get the dimm label/grain that applies to the match criteria.
+ * As the error algorithm may not be able to point to just one memory
+ * stick, the logic here will get all possible labels that could
+ * pottentially be affected by the error.
+ * On FB-DIMM memory controllers, for uncorrected errors, it is common
+ * to have only the MC channel and the MC dimm (also called "branch")
+ * but the channel is not known, as the memory is arranged in pairs,
+ * where each memory belongs to a separate channel within the same
+ * branch.
+ */
+ p = e->label;
+ *p = '\0';
- if (channelb >= mci->csrows[csrow].nr_channels) {
- /* something is wrong */
- edac_mc_printk(mci, KERN_ERR,
- "INTERNAL ERROR: channel-b out of range "
- "(%d >= %d)\n",
- channelb, mci->csrows[csrow].nr_channels);
- edac_mc_handle_ue_no_info(mci, "INTERNAL ERROR");
- return;
- }
+ for (i = 0; i < mci->tot_dimms; i++) {
+ struct dimm_info *dimm = mci->dimms[i];
- mci->ue_count++;
- mci->csrows[csrow].ue_count++;
+ if (top_layer >= 0 && top_layer != dimm->location[0])
+ continue;
+ if (mid_layer >= 0 && mid_layer != dimm->location[1])
+ continue;
+ if (low_layer >= 0 && low_layer != dimm->location[2])
+ continue;
- /* Generate the DIMM labels from the specified channels */
- chars = snprintf(pos, len + 1, "%s",
- mci->csrows[csrow].channels[channela].label);
- len -= chars;
- pos += chars;
- chars = snprintf(pos, len + 1, "-%s",
- mci->csrows[csrow].channels[channelb].label);
+ /* get the max grain, over the error match range */
+ if (dimm->grain > e->grain)
+ e->grain = dimm->grain;
- if (edac_mc_get_log_ue())
- edac_mc_printk(mci, KERN_EMERG,
- "UE row %d, channel-a= %d channel-b= %d "
- "labels \"%s\": %s\n", csrow, channela, channelb,
- labels, msg);
+ /*
+ * If the error is memory-controller wide, there's no need to
+ * seek for the affected DIMMs because the whole
+ * channel/memory controller/... may be affected.
+ * Also, don't show errors for empty DIMM slots.
+ */
+ if (e->enable_per_layer_report && dimm->nr_pages) {
+ if (n_labels >= EDAC_MAX_LABELS) {
+ e->enable_per_layer_report = false;
+ break;
+ }
+ n_labels++;
+ if (p != e->label) {
+ strcpy(p, OTHER_LABEL);
+ p += strlen(OTHER_LABEL);
+ }
+ strcpy(p, dimm->label);
+ p += strlen(p);
+ *p = '\0';
+
+ /*
+ * get csrow/channel of the DIMM, in order to allow
+ * incrementing the compat API counters
+ */
+ edac_dbg(4, "%s csrows map: (%d,%d)\n",
+ mci->csbased ? "rank" : "dimm",
+ dimm->csrow, dimm->cschannel);
+ if (row == -1)
+ row = dimm->csrow;
+ else if (row >= 0 && row != dimm->csrow)
+ row = -2;
+
+ if (chan == -1)
+ chan = dimm->cschannel;
+ else if (chan >= 0 && chan != dimm->cschannel)
+ chan = -2;
+ }
+ }
- if (edac_mc_get_panic_on_ue())
- panic("UE row %d, channel-a= %d channel-b= %d "
- "labels \"%s\": %s\n", csrow, channela,
- channelb, labels, msg);
-}
-EXPORT_SYMBOL(edac_mc_handle_fbd_ue);
+ if (!e->enable_per_layer_report) {
+ strcpy(e->label, "any memory");
+ } else {
+ edac_dbg(4, "csrow/channel to increment: (%d,%d)\n", row, chan);
+ if (p == e->label)
+ strcpy(e->label, "unknown memory");
+ if (type == HW_EVENT_ERR_CORRECTED) {
+ if (row >= 0) {
+ mci->csrows[row]->ce_count += error_count;
+ if (chan >= 0)
+ mci->csrows[row]->channels[chan]->ce_count += error_count;
+ }
+ } else
+ if (row >= 0)
+ mci->csrows[row]->ue_count += error_count;
+ }
-/*************************************************************
- * On Fully Buffered DIMM modules, this help function is
- * called to process CE events
- */
-void edac_mc_handle_fbd_ce(struct mem_ctl_info *mci,
- unsigned int csrow, unsigned int channel, char *msg)
-{
+ /* Fill the RAM location data */
+ p = e->location;
- /* Ensure boundary values */
- if (csrow >= mci->nr_csrows) {
- /* something is wrong */
- edac_mc_printk(mci, KERN_ERR,
- "INTERNAL ERROR: row out of range (%d >= %d)\n",
- csrow, mci->nr_csrows);
- edac_mc_handle_ce_no_info(mci, "INTERNAL ERROR");
- return;
- }
- if (channel >= mci->csrows[csrow].nr_channels) {
- /* something is wrong */
- edac_mc_printk(mci, KERN_ERR,
- "INTERNAL ERROR: channel out of range (%d >= %d)\n",
- channel, mci->csrows[csrow].nr_channels);
- edac_mc_handle_ce_no_info(mci, "INTERNAL ERROR");
- return;
+ for (i = 0; i < mci->n_layers; i++) {
+ if (pos[i] < 0)
+ continue;
+
+ p += sprintf(p, "%s:%d ",
+ edac_layer_name[mci->layers[i].type],
+ pos[i]);
}
+ if (p > e->location)
+ *(p - 1) = '\0';
- if (edac_mc_get_log_ce())
- /* FIXME - put in DIMM location */
- edac_mc_printk(mci, KERN_WARNING,
- "CE row %d, channel %d, label \"%s\": %s\n",
- csrow, channel,
- mci->csrows[csrow].channels[channel].label, msg);
+ /* Report the error via the trace interface */
+ grain_bits = fls_long(e->grain) + 1;
+ trace_mc_event(type, e->msg, e->label, e->error_count,
+ mci->mc_idx, e->top_layer, e->mid_layer, e->low_layer,
+ PAGES_TO_MiB(e->page_frame_number) | e->offset_in_page,
+ grain_bits, e->syndrome, e->other_detail);
- mci->ce_count++;
- mci->csrows[csrow].ce_count++;
- mci->csrows[csrow].channels[channel].ce_count++;
+ edac_raw_mc_handle_error(type, mci, e);
}
-EXPORT_SYMBOL(edac_mc_handle_fbd_ce);
+EXPORT_SYMBOL_GPL(edac_mc_handle_error);
diff --git a/drivers/edac/edac_mc_sysfs.c b/drivers/edac/edac_mc_sysfs.c
index dce61f7ba38..01fae8289cf 100644
--- a/drivers/edac/edac_mc_sysfs.c
+++ b/drivers/edac/edac_mc_sysfs.c
@@ -7,17 +7,21 @@
*
* Written Doug Thompson <norsk5@xmission.com> www.softwarebitmaker.com
*
+ * (c) 2012-2013 - Mauro Carvalho Chehab
+ * The entire API were re-written, and ported to use struct device
+ *
*/
#include <linux/ctype.h>
#include <linux/slab.h>
#include <linux/edac.h>
#include <linux/bug.h>
+#include <linux/pm_runtime.h>
+#include <linux/uaccess.h>
#include "edac_core.h"
#include "edac_module.h"
-
/* MC EDAC Controls, setable by module parameter, and sysfs */
static int edac_mc_log_ue = 1;
static int edac_mc_log_ce = 1;
@@ -48,16 +52,20 @@ int edac_mc_get_poll_msec(void)
static int edac_set_poll_msec(const char *val, struct kernel_param *kp)
{
- long l;
+ unsigned long l;
int ret;
if (!val)
return -EINVAL;
- ret = strict_strtol(val, 0, &l);
- if (ret == -EINVAL || ((int)l != l))
+ ret = kstrtoul(val, 0, &l);
+ if (ret)
+ return ret;
+
+ if (l < 1000)
return -EINVAL;
- *((int *)kp->arg) = l;
+
+ *((unsigned long *)kp->arg) = l;
/* notify edac_mc engine to reset the poll period */
edac_mc_reset_delay_period(l);
@@ -78,10 +86,12 @@ module_param_call(edac_mc_poll_msec, edac_set_poll_msec, param_get_int,
&edac_mc_poll_msec, 0644);
MODULE_PARM_DESC(edac_mc_poll_msec, "Polling period in milliseconds");
+static struct device *mci_pdev;
+
/*
* various constants for Memory Controllers
*/
-static const char *mem_types[] = {
+static const char * const mem_types[] = {
[MEM_EMPTY] = "Empty",
[MEM_RESERVED] = "Reserved",
[MEM_UNKNOWN] = "Unknown",
@@ -101,7 +111,7 @@ static const char *mem_types[] = {
[MEM_RDDR3] = "Registered-DDR3"
};
-static const char *dev_types[] = {
+static const char * const dev_types[] = {
[DEV_UNKNOWN] = "Unknown",
[DEV_X1] = "x1",
[DEV_X2] = "x2",
@@ -112,7 +122,7 @@ static const char *dev_types[] = {
[DEV_X64] = "x64"
};
-static const char *edac_caps[] = {
+static const char * const edac_caps[] = {
[EDAC_UNKNOWN] = "Unknown",
[EDAC_NONE] = "None",
[EDAC_RESERVED] = "Reserved",
@@ -125,772 +135,833 @@ static const char *edac_caps[] = {
[EDAC_S16ECD16ED] = "S16ECD16ED"
};
-/* EDAC sysfs CSROW data structures and methods
+#ifdef CONFIG_EDAC_LEGACY_SYSFS
+/*
+ * EDAC sysfs CSROW data structures and methods
+ */
+
+#define to_csrow(k) container_of(k, struct csrow_info, dev)
+
+/*
+ * We need it to avoid namespace conflicts between the legacy API
+ * and the per-dimm/per-rank one
*/
+#define DEVICE_ATTR_LEGACY(_name, _mode, _show, _store) \
+ static struct device_attribute dev_attr_legacy_##_name = __ATTR(_name, _mode, _show, _store)
+
+struct dev_ch_attribute {
+ struct device_attribute attr;
+ int channel;
+};
+
+#define DEVICE_CHANNEL(_name, _mode, _show, _store, _var) \
+ struct dev_ch_attribute dev_attr_legacy_##_name = \
+ { __ATTR(_name, _mode, _show, _store), (_var) }
+
+#define to_channel(k) (container_of(k, struct dev_ch_attribute, attr)->channel)
/* Set of more default csrow<id> attribute show/store functions */
-static ssize_t csrow_ue_count_show(struct csrow_info *csrow, char *data,
- int private)
+static ssize_t csrow_ue_count_show(struct device *dev,
+ struct device_attribute *mattr, char *data)
{
+ struct csrow_info *csrow = to_csrow(dev);
+
return sprintf(data, "%u\n", csrow->ue_count);
}
-static ssize_t csrow_ce_count_show(struct csrow_info *csrow, char *data,
- int private)
+static ssize_t csrow_ce_count_show(struct device *dev,
+ struct device_attribute *mattr, char *data)
{
+ struct csrow_info *csrow = to_csrow(dev);
+
return sprintf(data, "%u\n", csrow->ce_count);
}
-static ssize_t csrow_size_show(struct csrow_info *csrow, char *data,
- int private)
+static ssize_t csrow_size_show(struct device *dev,
+ struct device_attribute *mattr, char *data)
{
- return sprintf(data, "%u\n", PAGES_TO_MiB(csrow->nr_pages));
+ struct csrow_info *csrow = to_csrow(dev);
+ int i;
+ u32 nr_pages = 0;
+
+ for (i = 0; i < csrow->nr_channels; i++)
+ nr_pages += csrow->channels[i]->dimm->nr_pages;
+ return sprintf(data, "%u\n", PAGES_TO_MiB(nr_pages));
}
-static ssize_t csrow_mem_type_show(struct csrow_info *csrow, char *data,
- int private)
+static ssize_t csrow_mem_type_show(struct device *dev,
+ struct device_attribute *mattr, char *data)
{
- return sprintf(data, "%s\n", mem_types[csrow->mtype]);
+ struct csrow_info *csrow = to_csrow(dev);
+
+ return sprintf(data, "%s\n", mem_types[csrow->channels[0]->dimm->mtype]);
}
-static ssize_t csrow_dev_type_show(struct csrow_info *csrow, char *data,
- int private)
+static ssize_t csrow_dev_type_show(struct device *dev,
+ struct device_attribute *mattr, char *data)
{
- return sprintf(data, "%s\n", dev_types[csrow->dtype]);
+ struct csrow_info *csrow = to_csrow(dev);
+
+ return sprintf(data, "%s\n", dev_types[csrow->channels[0]->dimm->dtype]);
}
-static ssize_t csrow_edac_mode_show(struct csrow_info *csrow, char *data,
- int private)
+static ssize_t csrow_edac_mode_show(struct device *dev,
+ struct device_attribute *mattr,
+ char *data)
{
- return sprintf(data, "%s\n", edac_caps[csrow->edac_mode]);
+ struct csrow_info *csrow = to_csrow(dev);
+
+ return sprintf(data, "%s\n", edac_caps[csrow->channels[0]->dimm->edac_mode]);
}
/* show/store functions for DIMM Label attributes */
-static ssize_t channel_dimm_label_show(struct csrow_info *csrow,
- char *data, int channel)
+static ssize_t channel_dimm_label_show(struct device *dev,
+ struct device_attribute *mattr,
+ char *data)
{
+ struct csrow_info *csrow = to_csrow(dev);
+ unsigned chan = to_channel(mattr);
+ struct rank_info *rank = csrow->channels[chan];
+
/* if field has not been initialized, there is nothing to send */
- if (!csrow->channels[channel].label[0])
+ if (!rank->dimm->label[0])
return 0;
return snprintf(data, EDAC_MC_LABEL_LEN, "%s\n",
- csrow->channels[channel].label);
+ rank->dimm->label);
}
-static ssize_t channel_dimm_label_store(struct csrow_info *csrow,
- const char *data,
- size_t count, int channel)
+static ssize_t channel_dimm_label_store(struct device *dev,
+ struct device_attribute *mattr,
+ const char *data, size_t count)
{
+ struct csrow_info *csrow = to_csrow(dev);
+ unsigned chan = to_channel(mattr);
+ struct rank_info *rank = csrow->channels[chan];
+
ssize_t max_size = 0;
max_size = min((ssize_t) count, (ssize_t) EDAC_MC_LABEL_LEN - 1);
- strncpy(csrow->channels[channel].label, data, max_size);
- csrow->channels[channel].label[max_size] = '\0';
+ strncpy(rank->dimm->label, data, max_size);
+ rank->dimm->label[max_size] = '\0';
return max_size;
}
/* show function for dynamic chX_ce_count attribute */
-static ssize_t channel_ce_count_show(struct csrow_info *csrow,
- char *data, int channel)
+static ssize_t channel_ce_count_show(struct device *dev,
+ struct device_attribute *mattr, char *data)
{
- return sprintf(data, "%u\n", csrow->channels[channel].ce_count);
+ struct csrow_info *csrow = to_csrow(dev);
+ unsigned chan = to_channel(mattr);
+ struct rank_info *rank = csrow->channels[chan];
+
+ return sprintf(data, "%u\n", rank->ce_count);
}
-/* csrow specific attribute structure */
-struct csrowdev_attribute {
- struct attribute attr;
- ssize_t(*show) (struct csrow_info *, char *, int);
- ssize_t(*store) (struct csrow_info *, const char *, size_t, int);
- int private;
-};
+/* cwrow<id>/attribute files */
+DEVICE_ATTR_LEGACY(size_mb, S_IRUGO, csrow_size_show, NULL);
+DEVICE_ATTR_LEGACY(dev_type, S_IRUGO, csrow_dev_type_show, NULL);
+DEVICE_ATTR_LEGACY(mem_type, S_IRUGO, csrow_mem_type_show, NULL);
+DEVICE_ATTR_LEGACY(edac_mode, S_IRUGO, csrow_edac_mode_show, NULL);
+DEVICE_ATTR_LEGACY(ue_count, S_IRUGO, csrow_ue_count_show, NULL);
+DEVICE_ATTR_LEGACY(ce_count, S_IRUGO, csrow_ce_count_show, NULL);
-#define to_csrow(k) container_of(k, struct csrow_info, kobj)
-#define to_csrowdev_attr(a) container_of(a, struct csrowdev_attribute, attr)
+/* default attributes of the CSROW<id> object */
+static struct attribute *csrow_attrs[] = {
+ &dev_attr_legacy_dev_type.attr,
+ &dev_attr_legacy_mem_type.attr,
+ &dev_attr_legacy_edac_mode.attr,
+ &dev_attr_legacy_size_mb.attr,
+ &dev_attr_legacy_ue_count.attr,
+ &dev_attr_legacy_ce_count.attr,
+ NULL,
+};
-/* Set of show/store higher level functions for default csrow attributes */
-static ssize_t csrowdev_show(struct kobject *kobj,
- struct attribute *attr, char *buffer)
-{
- struct csrow_info *csrow = to_csrow(kobj);
- struct csrowdev_attribute *csrowdev_attr = to_csrowdev_attr(attr);
+static struct attribute_group csrow_attr_grp = {
+ .attrs = csrow_attrs,
+};
- if (csrowdev_attr->show)
- return csrowdev_attr->show(csrow,
- buffer, csrowdev_attr->private);
- return -EIO;
-}
+static const struct attribute_group *csrow_attr_groups[] = {
+ &csrow_attr_grp,
+ NULL
+};
-static ssize_t csrowdev_store(struct kobject *kobj, struct attribute *attr,
- const char *buffer, size_t count)
+static void csrow_attr_release(struct device *dev)
{
- struct csrow_info *csrow = to_csrow(kobj);
- struct csrowdev_attribute *csrowdev_attr = to_csrowdev_attr(attr);
-
- if (csrowdev_attr->store)
- return csrowdev_attr->store(csrow,
- buffer,
- count, csrowdev_attr->private);
- return -EIO;
-}
+ struct csrow_info *csrow = container_of(dev, struct csrow_info, dev);
-static const struct sysfs_ops csrowfs_ops = {
- .show = csrowdev_show,
- .store = csrowdev_store
-};
+ edac_dbg(1, "Releasing csrow device %s\n", dev_name(dev));
+ kfree(csrow);
+}
-#define CSROWDEV_ATTR(_name,_mode,_show,_store,_private) \
-static struct csrowdev_attribute attr_##_name = { \
- .attr = {.name = __stringify(_name), .mode = _mode }, \
- .show = _show, \
- .store = _store, \
- .private = _private, \
+static struct device_type csrow_attr_type = {
+ .groups = csrow_attr_groups,
+ .release = csrow_attr_release,
};
-/* default cwrow<id>/attribute files */
-CSROWDEV_ATTR(size_mb, S_IRUGO, csrow_size_show, NULL, 0);
-CSROWDEV_ATTR(dev_type, S_IRUGO, csrow_dev_type_show, NULL, 0);
-CSROWDEV_ATTR(mem_type, S_IRUGO, csrow_mem_type_show, NULL, 0);
-CSROWDEV_ATTR(edac_mode, S_IRUGO, csrow_edac_mode_show, NULL, 0);
-CSROWDEV_ATTR(ue_count, S_IRUGO, csrow_ue_count_show, NULL, 0);
-CSROWDEV_ATTR(ce_count, S_IRUGO, csrow_ce_count_show, NULL, 0);
+/*
+ * possible dynamic channel DIMM Label attribute files
+ *
+ */
-/* default attributes of the CSROW<id> object */
-static struct csrowdev_attribute *default_csrow_attr[] = {
- &attr_dev_type,
- &attr_mem_type,
- &attr_edac_mode,
- &attr_size_mb,
- &attr_ue_count,
- &attr_ce_count,
- NULL,
-};
+#define EDAC_NR_CHANNELS 6
-/* possible dynamic channel DIMM Label attribute files */
-CSROWDEV_ATTR(ch0_dimm_label, S_IRUGO | S_IWUSR,
+DEVICE_CHANNEL(ch0_dimm_label, S_IRUGO | S_IWUSR,
channel_dimm_label_show, channel_dimm_label_store, 0);
-CSROWDEV_ATTR(ch1_dimm_label, S_IRUGO | S_IWUSR,
+DEVICE_CHANNEL(ch1_dimm_label, S_IRUGO | S_IWUSR,
channel_dimm_label_show, channel_dimm_label_store, 1);
-CSROWDEV_ATTR(ch2_dimm_label, S_IRUGO | S_IWUSR,
+DEVICE_CHANNEL(ch2_dimm_label, S_IRUGO | S_IWUSR,
channel_dimm_label_show, channel_dimm_label_store, 2);
-CSROWDEV_ATTR(ch3_dimm_label, S_IRUGO | S_IWUSR,
+DEVICE_CHANNEL(ch3_dimm_label, S_IRUGO | S_IWUSR,
channel_dimm_label_show, channel_dimm_label_store, 3);
-CSROWDEV_ATTR(ch4_dimm_label, S_IRUGO | S_IWUSR,
+DEVICE_CHANNEL(ch4_dimm_label, S_IRUGO | S_IWUSR,
channel_dimm_label_show, channel_dimm_label_store, 4);
-CSROWDEV_ATTR(ch5_dimm_label, S_IRUGO | S_IWUSR,
+DEVICE_CHANNEL(ch5_dimm_label, S_IRUGO | S_IWUSR,
channel_dimm_label_show, channel_dimm_label_store, 5);
/* Total possible dynamic DIMM Label attribute file table */
-static struct csrowdev_attribute *dynamic_csrow_dimm_attr[] = {
- &attr_ch0_dimm_label,
- &attr_ch1_dimm_label,
- &attr_ch2_dimm_label,
- &attr_ch3_dimm_label,
- &attr_ch4_dimm_label,
- &attr_ch5_dimm_label
+static struct device_attribute *dynamic_csrow_dimm_attr[] = {
+ &dev_attr_legacy_ch0_dimm_label.attr,
+ &dev_attr_legacy_ch1_dimm_label.attr,
+ &dev_attr_legacy_ch2_dimm_label.attr,
+ &dev_attr_legacy_ch3_dimm_label.attr,
+ &dev_attr_legacy_ch4_dimm_label.attr,
+ &dev_attr_legacy_ch5_dimm_label.attr
};
/* possible dynamic channel ce_count attribute files */
-CSROWDEV_ATTR(ch0_ce_count, S_IRUGO | S_IWUSR, channel_ce_count_show, NULL, 0);
-CSROWDEV_ATTR(ch1_ce_count, S_IRUGO | S_IWUSR, channel_ce_count_show, NULL, 1);
-CSROWDEV_ATTR(ch2_ce_count, S_IRUGO | S_IWUSR, channel_ce_count_show, NULL, 2);
-CSROWDEV_ATTR(ch3_ce_count, S_IRUGO | S_IWUSR, channel_ce_count_show, NULL, 3);
-CSROWDEV_ATTR(ch4_ce_count, S_IRUGO | S_IWUSR, channel_ce_count_show, NULL, 4);
-CSROWDEV_ATTR(ch5_ce_count, S_IRUGO | S_IWUSR, channel_ce_count_show, NULL, 5);
+DEVICE_CHANNEL(ch0_ce_count, S_IRUGO,
+ channel_ce_count_show, NULL, 0);
+DEVICE_CHANNEL(ch1_ce_count, S_IRUGO,
+ channel_ce_count_show, NULL, 1);
+DEVICE_CHANNEL(ch2_ce_count, S_IRUGO,
+ channel_ce_count_show, NULL, 2);
+DEVICE_CHANNEL(ch3_ce_count, S_IRUGO,
+ channel_ce_count_show, NULL, 3);
+DEVICE_CHANNEL(ch4_ce_count, S_IRUGO,
+ channel_ce_count_show, NULL, 4);
+DEVICE_CHANNEL(ch5_ce_count, S_IRUGO,
+ channel_ce_count_show, NULL, 5);
/* Total possible dynamic ce_count attribute file table */
-static struct csrowdev_attribute *dynamic_csrow_ce_count_attr[] = {
- &attr_ch0_ce_count,
- &attr_ch1_ce_count,
- &attr_ch2_ce_count,
- &attr_ch3_ce_count,
- &attr_ch4_ce_count,
- &attr_ch5_ce_count
+static struct device_attribute *dynamic_csrow_ce_count_attr[] = {
+ &dev_attr_legacy_ch0_ce_count.attr,
+ &dev_attr_legacy_ch1_ce_count.attr,
+ &dev_attr_legacy_ch2_ce_count.attr,
+ &dev_attr_legacy_ch3_ce_count.attr,
+ &dev_attr_legacy_ch4_ce_count.attr,
+ &dev_attr_legacy_ch5_ce_count.attr
};
-#define EDAC_NR_CHANNELS 6
-
-/* Create dynamic CHANNEL files, indexed by 'chan', under specifed CSROW */
-static int edac_create_channel_files(struct kobject *kobj, int chan)
+static inline int nr_pages_per_csrow(struct csrow_info *csrow)
{
- int err = -ENODEV;
+ int chan, nr_pages = 0;
- if (chan >= EDAC_NR_CHANNELS)
- return err;
-
- /* create the DIMM label attribute file */
- err = sysfs_create_file(kobj,
- (struct attribute *)
- dynamic_csrow_dimm_attr[chan]);
-
- if (!err) {
- /* create the CE Count attribute file */
- err = sysfs_create_file(kobj,
- (struct attribute *)
- dynamic_csrow_ce_count_attr[chan]);
- } else {
- debugf1("%s() dimm labels and ce_count files created",
- __func__);
- }
+ for (chan = 0; chan < csrow->nr_channels; chan++)
+ nr_pages += csrow->channels[chan]->dimm->nr_pages;
- return err;
+ return nr_pages;
}
-/* No memory to release for this kobj */
-static void edac_csrow_instance_release(struct kobject *kobj)
-{
- struct mem_ctl_info *mci;
- struct csrow_info *cs;
-
- debugf1("%s()\n", __func__);
-
- cs = container_of(kobj, struct csrow_info, kobj);
- mci = cs->mci;
-
- kobject_put(&mci->edac_mci_kobj);
-}
-
-/* the kobj_type instance for a CSROW */
-static struct kobj_type ktype_csrow = {
- .release = edac_csrow_instance_release,
- .sysfs_ops = &csrowfs_ops,
- .default_attrs = (struct attribute **)default_csrow_attr,
-};
-
/* Create a CSROW object under specifed edac_mc_device */
static int edac_create_csrow_object(struct mem_ctl_info *mci,
- struct csrow_info *csrow, int index)
+ struct csrow_info *csrow, int index)
{
- struct kobject *kobj_mci = &mci->edac_mci_kobj;
- struct kobject *kobj;
- int chan;
- int err;
+ int err, chan;
- /* generate ..../edac/mc/mc<id>/csrow<index> */
- memset(&csrow->kobj, 0, sizeof(csrow->kobj));
- csrow->mci = mci; /* include container up link */
+ if (csrow->nr_channels >= EDAC_NR_CHANNELS)
+ return -ENODEV;
- /* bump the mci instance's kobject's ref count */
- kobj = kobject_get(&mci->edac_mci_kobj);
- if (!kobj) {
- err = -ENODEV;
- goto err_out;
- }
+ csrow->dev.type = &csrow_attr_type;
+ csrow->dev.bus = mci->bus;
+ device_initialize(&csrow->dev);
+ csrow->dev.parent = &mci->dev;
+ csrow->mci = mci;
+ dev_set_name(&csrow->dev, "csrow%d", index);
+ dev_set_drvdata(&csrow->dev, csrow);
- /* Instanstiate the csrow object */
- err = kobject_init_and_add(&csrow->kobj, &ktype_csrow, kobj_mci,
- "csrow%d", index);
- if (err)
- goto err_release_top_kobj;
+ edac_dbg(0, "creating (virtual) csrow node %s\n",
+ dev_name(&csrow->dev));
- /* At this point, to release a csrow kobj, one must
- * call the kobject_put and allow that tear down
- * to work the releasing
- */
+ err = device_add(&csrow->dev);
+ if (err < 0)
+ return err;
- /* Create the dyanmic attribute files on this csrow,
- * namely, the DIMM labels and the channel ce_count
- */
for (chan = 0; chan < csrow->nr_channels; chan++) {
- err = edac_create_channel_files(&csrow->kobj, chan);
- if (err) {
- /* special case the unregister here */
- kobject_put(&csrow->kobj);
- goto err_out;
+ /* Only expose populated DIMMs */
+ if (!csrow->channels[chan]->dimm->nr_pages)
+ continue;
+ err = device_create_file(&csrow->dev,
+ dynamic_csrow_dimm_attr[chan]);
+ if (err < 0)
+ goto error;
+ err = device_create_file(&csrow->dev,
+ dynamic_csrow_ce_count_attr[chan]);
+ if (err < 0) {
+ device_remove_file(&csrow->dev,
+ dynamic_csrow_dimm_attr[chan]);
+ goto error;
}
}
- kobject_uevent(&csrow->kobj, KOBJ_ADD);
+
return 0;
- /* error unwind stack */
-err_release_top_kobj:
- kobject_put(&mci->edac_mci_kobj);
+error:
+ for (--chan; chan >= 0; chan--) {
+ device_remove_file(&csrow->dev,
+ dynamic_csrow_dimm_attr[chan]);
+ device_remove_file(&csrow->dev,
+ dynamic_csrow_ce_count_attr[chan]);
+ }
+ put_device(&csrow->dev);
-err_out:
return err;
}
-/* default sysfs methods and data structures for the main MCI kobject */
-
-static ssize_t mci_reset_counters_store(struct mem_ctl_info *mci,
- const char *data, size_t count)
+/* Create a CSROW object under specifed edac_mc_device */
+static int edac_create_csrow_objects(struct mem_ctl_info *mci)
{
- int row, chan;
-
- mci->ue_noinfo_count = 0;
- mci->ce_noinfo_count = 0;
- mci->ue_count = 0;
- mci->ce_count = 0;
-
- for (row = 0; row < mci->nr_csrows; row++) {
- struct csrow_info *ri = &mci->csrows[row];
+ int err, i, chan;
+ struct csrow_info *csrow;
- ri->ue_count = 0;
- ri->ce_count = 0;
+ for (i = 0; i < mci->nr_csrows; i++) {
+ csrow = mci->csrows[i];
+ if (!nr_pages_per_csrow(csrow))
+ continue;
+ err = edac_create_csrow_object(mci, mci->csrows[i], i);
+ if (err < 0) {
+ edac_dbg(1,
+ "failure: create csrow objects for csrow %d\n",
+ i);
+ goto error;
+ }
+ }
+ return 0;
- for (chan = 0; chan < ri->nr_channels; chan++)
- ri->channels[chan].ce_count = 0;
+error:
+ for (--i; i >= 0; i--) {
+ csrow = mci->csrows[i];
+ if (!nr_pages_per_csrow(csrow))
+ continue;
+ for (chan = csrow->nr_channels - 1; chan >= 0; chan--) {
+ if (!csrow->channels[chan]->dimm->nr_pages)
+ continue;
+ device_remove_file(&csrow->dev,
+ dynamic_csrow_dimm_attr[chan]);
+ device_remove_file(&csrow->dev,
+ dynamic_csrow_ce_count_attr[chan]);
+ }
+ put_device(&mci->csrows[i]->dev);
}
- mci->start_time = jiffies;
- return count;
+ return err;
}
-/* memory scrubbing */
-static ssize_t mci_sdram_scrub_rate_store(struct mem_ctl_info *mci,
- const char *data, size_t count)
+static void edac_delete_csrow_objects(struct mem_ctl_info *mci)
{
- unsigned long bandwidth = 0;
- int err;
+ int i, chan;
+ struct csrow_info *csrow;
- if (!mci->set_sdram_scrub_rate) {
- edac_printk(KERN_WARNING, EDAC_MC,
- "Memory scrub rate setting not implemented!\n");
- return -EINVAL;
+ for (i = mci->nr_csrows - 1; i >= 0; i--) {
+ csrow = mci->csrows[i];
+ if (!nr_pages_per_csrow(csrow))
+ continue;
+ for (chan = csrow->nr_channels - 1; chan >= 0; chan--) {
+ if (!csrow->channels[chan]->dimm->nr_pages)
+ continue;
+ edac_dbg(1, "Removing csrow %d channel %d sysfs nodes\n",
+ i, chan);
+ device_remove_file(&csrow->dev,
+ dynamic_csrow_dimm_attr[chan]);
+ device_remove_file(&csrow->dev,
+ dynamic_csrow_ce_count_attr[chan]);
+ }
+ device_unregister(&mci->csrows[i]->dev);
}
+}
+#endif
- if (strict_strtoul(data, 10, &bandwidth) < 0)
- return -EINVAL;
+/*
+ * Per-dimm (or per-rank) devices
+ */
- err = mci->set_sdram_scrub_rate(mci, (u32)bandwidth);
- if (err) {
- edac_printk(KERN_DEBUG, EDAC_MC,
- "Failed setting scrub rate to %lu\n", bandwidth);
- return -EINVAL;
- }
- else {
- edac_printk(KERN_DEBUG, EDAC_MC,
- "Scrub rate set to: %lu\n", bandwidth);
- return count;
- }
-}
+#define to_dimm(k) container_of(k, struct dimm_info, dev)
-static ssize_t mci_sdram_scrub_rate_show(struct mem_ctl_info *mci, char *data)
+/* show/store functions for DIMM Label attributes */
+static ssize_t dimmdev_location_show(struct device *dev,
+ struct device_attribute *mattr, char *data)
{
- u32 bandwidth = 0;
- int err;
+ struct dimm_info *dimm = to_dimm(dev);
- if (!mci->get_sdram_scrub_rate) {
- edac_printk(KERN_WARNING, EDAC_MC,
- "Memory scrub rate reading not implemented\n");
- return -EINVAL;
- }
-
- err = mci->get_sdram_scrub_rate(mci, &bandwidth);
- if (err) {
- edac_printk(KERN_DEBUG, EDAC_MC, "Error reading scrub rate\n");
- return err;
- }
- else {
- edac_printk(KERN_DEBUG, EDAC_MC,
- "Read scrub rate: %d\n", bandwidth);
- return sprintf(data, "%d\n", bandwidth);
- }
+ return edac_dimm_info_location(dimm, data, PAGE_SIZE);
}
-/* default attribute files for the MCI object */
-static ssize_t mci_ue_count_show(struct mem_ctl_info *mci, char *data)
+static ssize_t dimmdev_label_show(struct device *dev,
+ struct device_attribute *mattr, char *data)
{
- return sprintf(data, "%d\n", mci->ue_count);
+ struct dimm_info *dimm = to_dimm(dev);
+
+ /* if field has not been initialized, there is nothing to send */
+ if (!dimm->label[0])
+ return 0;
+
+ return snprintf(data, EDAC_MC_LABEL_LEN, "%s\n", dimm->label);
}
-static ssize_t mci_ce_count_show(struct mem_ctl_info *mci, char *data)
+static ssize_t dimmdev_label_store(struct device *dev,
+ struct device_attribute *mattr,
+ const char *data,
+ size_t count)
{
- return sprintf(data, "%d\n", mci->ce_count);
+ struct dimm_info *dimm = to_dimm(dev);
+
+ ssize_t max_size = 0;
+
+ max_size = min((ssize_t) count, (ssize_t) EDAC_MC_LABEL_LEN - 1);
+ strncpy(dimm->label, data, max_size);
+ dimm->label[max_size] = '\0';
+
+ return max_size;
}
-static ssize_t mci_ce_noinfo_show(struct mem_ctl_info *mci, char *data)
+static ssize_t dimmdev_size_show(struct device *dev,
+ struct device_attribute *mattr, char *data)
{
- return sprintf(data, "%d\n", mci->ce_noinfo_count);
+ struct dimm_info *dimm = to_dimm(dev);
+
+ return sprintf(data, "%u\n", PAGES_TO_MiB(dimm->nr_pages));
}
-static ssize_t mci_ue_noinfo_show(struct mem_ctl_info *mci, char *data)
+static ssize_t dimmdev_mem_type_show(struct device *dev,
+ struct device_attribute *mattr, char *data)
{
- return sprintf(data, "%d\n", mci->ue_noinfo_count);
+ struct dimm_info *dimm = to_dimm(dev);
+
+ return sprintf(data, "%s\n", mem_types[dimm->mtype]);
}
-static ssize_t mci_seconds_show(struct mem_ctl_info *mci, char *data)
+static ssize_t dimmdev_dev_type_show(struct device *dev,
+ struct device_attribute *mattr, char *data)
{
- return sprintf(data, "%ld\n", (jiffies - mci->start_time) / HZ);
+ struct dimm_info *dimm = to_dimm(dev);
+
+ return sprintf(data, "%s\n", dev_types[dimm->dtype]);
}
-static ssize_t mci_ctl_name_show(struct mem_ctl_info *mci, char *data)
+static ssize_t dimmdev_edac_mode_show(struct device *dev,
+ struct device_attribute *mattr,
+ char *data)
{
- return sprintf(data, "%s\n", mci->ctl_name);
+ struct dimm_info *dimm = to_dimm(dev);
+
+ return sprintf(data, "%s\n", edac_caps[dimm->edac_mode]);
}
-static ssize_t mci_size_mb_show(struct mem_ctl_info *mci, char *data)
-{
- int total_pages, csrow_idx;
+/* dimm/rank attribute files */
+static DEVICE_ATTR(dimm_label, S_IRUGO | S_IWUSR,
+ dimmdev_label_show, dimmdev_label_store);
+static DEVICE_ATTR(dimm_location, S_IRUGO, dimmdev_location_show, NULL);
+static DEVICE_ATTR(size, S_IRUGO, dimmdev_size_show, NULL);
+static DEVICE_ATTR(dimm_mem_type, S_IRUGO, dimmdev_mem_type_show, NULL);
+static DEVICE_ATTR(dimm_dev_type, S_IRUGO, dimmdev_dev_type_show, NULL);
+static DEVICE_ATTR(dimm_edac_mode, S_IRUGO, dimmdev_edac_mode_show, NULL);
+
+/* attributes of the dimm<id>/rank<id> object */
+static struct attribute *dimm_attrs[] = {
+ &dev_attr_dimm_label.attr,
+ &dev_attr_dimm_location.attr,
+ &dev_attr_size.attr,
+ &dev_attr_dimm_mem_type.attr,
+ &dev_attr_dimm_dev_type.attr,
+ &dev_attr_dimm_edac_mode.attr,
+ NULL,
+};
- for (total_pages = csrow_idx = 0; csrow_idx < mci->nr_csrows;
- csrow_idx++) {
- struct csrow_info *csrow = &mci->csrows[csrow_idx];
+static struct attribute_group dimm_attr_grp = {
+ .attrs = dimm_attrs,
+};
- if (!csrow->nr_pages)
- continue;
+static const struct attribute_group *dimm_attr_groups[] = {
+ &dimm_attr_grp,
+ NULL
+};
- total_pages += csrow->nr_pages;
- }
+static void dimm_attr_release(struct device *dev)
+{
+ struct dimm_info *dimm = container_of(dev, struct dimm_info, dev);
- return sprintf(data, "%u\n", PAGES_TO_MiB(total_pages));
+ edac_dbg(1, "Releasing dimm device %s\n", dev_name(dev));
+ kfree(dimm);
}
-#define to_mci(k) container_of(k, struct mem_ctl_info, edac_mci_kobj)
-#define to_mcidev_attr(a) container_of(a,struct mcidev_sysfs_attribute,attr)
+static struct device_type dimm_attr_type = {
+ .groups = dimm_attr_groups,
+ .release = dimm_attr_release,
+};
-/* MCI show/store functions for top most object */
-static ssize_t mcidev_show(struct kobject *kobj, struct attribute *attr,
- char *buffer)
+/* Create a DIMM object under specifed memory controller device */
+static int edac_create_dimm_object(struct mem_ctl_info *mci,
+ struct dimm_info *dimm,
+ int index)
{
- struct mem_ctl_info *mem_ctl_info = to_mci(kobj);
- struct mcidev_sysfs_attribute *mcidev_attr = to_mcidev_attr(attr);
+ int err;
+ dimm->mci = mci;
- debugf1("%s() mem_ctl_info %p\n", __func__, mem_ctl_info);
+ dimm->dev.type = &dimm_attr_type;
+ dimm->dev.bus = mci->bus;
+ device_initialize(&dimm->dev);
- if (mcidev_attr->show)
- return mcidev_attr->show(mem_ctl_info, buffer);
+ dimm->dev.parent = &mci->dev;
+ if (mci->csbased)
+ dev_set_name(&dimm->dev, "rank%d", index);
+ else
+ dev_set_name(&dimm->dev, "dimm%d", index);
+ dev_set_drvdata(&dimm->dev, dimm);
+ pm_runtime_forbid(&mci->dev);
- return -EIO;
-}
+ err = device_add(&dimm->dev);
-static ssize_t mcidev_store(struct kobject *kobj, struct attribute *attr,
- const char *buffer, size_t count)
-{
- struct mem_ctl_info *mem_ctl_info = to_mci(kobj);
- struct mcidev_sysfs_attribute *mcidev_attr = to_mcidev_attr(attr);
+ edac_dbg(0, "creating rank/dimm device %s\n", dev_name(&dimm->dev));
- debugf1("%s() mem_ctl_info %p\n", __func__, mem_ctl_info);
+ return err;
+}
- if (mcidev_attr->store)
- return mcidev_attr->store(mem_ctl_info, buffer, count);
+/*
+ * Memory controller device
+ */
- return -EIO;
-}
+#define to_mci(k) container_of(k, struct mem_ctl_info, dev)
-/* Intermediate show/store table */
-static const struct sysfs_ops mci_ops = {
- .show = mcidev_show,
- .store = mcidev_store
-};
+static ssize_t mci_reset_counters_store(struct device *dev,
+ struct device_attribute *mattr,
+ const char *data, size_t count)
+{
+ struct mem_ctl_info *mci = to_mci(dev);
+ int cnt, row, chan, i;
+ mci->ue_mc = 0;
+ mci->ce_mc = 0;
+ mci->ue_noinfo_count = 0;
+ mci->ce_noinfo_count = 0;
-#define MCIDEV_ATTR(_name,_mode,_show,_store) \
-static struct mcidev_sysfs_attribute mci_attr_##_name = { \
- .attr = {.name = __stringify(_name), .mode = _mode }, \
- .show = _show, \
- .store = _store, \
-};
+ for (row = 0; row < mci->nr_csrows; row++) {
+ struct csrow_info *ri = mci->csrows[row];
-/* default Control file */
-MCIDEV_ATTR(reset_counters, S_IWUSR, NULL, mci_reset_counters_store);
+ ri->ue_count = 0;
+ ri->ce_count = 0;
-/* default Attribute files */
-MCIDEV_ATTR(mc_name, S_IRUGO, mci_ctl_name_show, NULL);
-MCIDEV_ATTR(size_mb, S_IRUGO, mci_size_mb_show, NULL);
-MCIDEV_ATTR(seconds_since_reset, S_IRUGO, mci_seconds_show, NULL);
-MCIDEV_ATTR(ue_noinfo_count, S_IRUGO, mci_ue_noinfo_show, NULL);
-MCIDEV_ATTR(ce_noinfo_count, S_IRUGO, mci_ce_noinfo_show, NULL);
-MCIDEV_ATTR(ue_count, S_IRUGO, mci_ue_count_show, NULL);
-MCIDEV_ATTR(ce_count, S_IRUGO, mci_ce_count_show, NULL);
+ for (chan = 0; chan < ri->nr_channels; chan++)
+ ri->channels[chan]->ce_count = 0;
+ }
-/* memory scrubber attribute file */
-MCIDEV_ATTR(sdram_scrub_rate, S_IRUGO | S_IWUSR, mci_sdram_scrub_rate_show,
- mci_sdram_scrub_rate_store);
-
-static struct mcidev_sysfs_attribute *mci_attr[] = {
- &mci_attr_reset_counters,
- &mci_attr_mc_name,
- &mci_attr_size_mb,
- &mci_attr_seconds_since_reset,
- &mci_attr_ue_noinfo_count,
- &mci_attr_ce_noinfo_count,
- &mci_attr_ue_count,
- &mci_attr_ce_count,
- &mci_attr_sdram_scrub_rate,
- NULL
-};
+ cnt = 1;
+ for (i = 0; i < mci->n_layers; i++) {
+ cnt *= mci->layers[i].size;
+ memset(mci->ce_per_layer[i], 0, cnt * sizeof(u32));
+ memset(mci->ue_per_layer[i], 0, cnt * sizeof(u32));
+ }
+ mci->start_time = jiffies;
+ return count;
+}
-/*
- * Release of a MC controlling instance
+/* Memory scrubbing interface:
*
- * each MC control instance has the following resources upon entry:
- * a) a ref count on the top memctl kobj
- * b) a ref count on this module
+ * A MC driver can limit the scrubbing bandwidth based on the CPU type.
+ * Therefore, ->set_sdram_scrub_rate should be made to return the actual
+ * bandwidth that is accepted or 0 when scrubbing is to be disabled.
*
- * this function must decrement those ref counts and then
- * issue a free on the instance's memory
+ * Negative value still means that an error has occurred while setting
+ * the scrub rate.
*/
-static void edac_mci_control_release(struct kobject *kobj)
+static ssize_t mci_sdram_scrub_rate_store(struct device *dev,
+ struct device_attribute *mattr,
+ const char *data, size_t count)
{
- struct mem_ctl_info *mci;
+ struct mem_ctl_info *mci = to_mci(dev);
+ unsigned long bandwidth = 0;
+ int new_bw = 0;
- mci = to_mci(kobj);
+ if (kstrtoul(data, 10, &bandwidth) < 0)
+ return -EINVAL;
- debugf0("%s() mci instance idx=%d releasing\n", __func__, mci->mc_idx);
+ new_bw = mci->set_sdram_scrub_rate(mci, bandwidth);
+ if (new_bw < 0) {
+ edac_printk(KERN_WARNING, EDAC_MC,
+ "Error setting scrub rate to: %lu\n", bandwidth);
+ return -EINVAL;
+ }
- /* decrement the module ref count */
- module_put(mci->owner);
+ return count;
}
-static struct kobj_type ktype_mci = {
- .release = edac_mci_control_release,
- .sysfs_ops = &mci_ops,
- .default_attrs = (struct attribute **)mci_attr,
-};
-
-/* EDAC memory controller sysfs kset:
- * /sys/devices/system/edac/mc
- */
-static struct kset *mc_kset;
-
/*
- * edac_mc_register_sysfs_main_kobj
- *
- * setups and registers the main kobject for each mci
+ * ->get_sdram_scrub_rate() return value semantics same as above.
*/
-int edac_mc_register_sysfs_main_kobj(struct mem_ctl_info *mci)
+static ssize_t mci_sdram_scrub_rate_show(struct device *dev,
+ struct device_attribute *mattr,
+ char *data)
{
- struct kobject *kobj_mci;
- int err;
-
- debugf1("%s()\n", __func__);
-
- kobj_mci = &mci->edac_mci_kobj;
+ struct mem_ctl_info *mci = to_mci(dev);
+ int bandwidth = 0;
- /* Init the mci's kobject */
- memset(kobj_mci, 0, sizeof(*kobj_mci));
-
- /* Record which module 'owns' this control structure
- * and bump the ref count of the module
- */
- mci->owner = THIS_MODULE;
-
- /* bump ref count on this module */
- if (!try_module_get(mci->owner)) {
- err = -ENODEV;
- goto fail_out;
+ bandwidth = mci->get_sdram_scrub_rate(mci);
+ if (bandwidth < 0) {
+ edac_printk(KERN_DEBUG, EDAC_MC, "Error reading scrub rate\n");
+ return bandwidth;
}
- /* this instance become part of the mc_kset */
- kobj_mci->kset = mc_kset;
+ return sprintf(data, "%d\n", bandwidth);
+}
- /* register the mc<id> kobject to the mc_kset */
- err = kobject_init_and_add(kobj_mci, &ktype_mci, NULL,
- "mc%d", mci->mc_idx);
- if (err) {
- debugf1("%s()Failed to register '.../edac/mc%d'\n",
- __func__, mci->mc_idx);
- goto kobj_reg_fail;
- }
- kobject_uevent(kobj_mci, KOBJ_ADD);
+/* default attribute files for the MCI object */
+static ssize_t mci_ue_count_show(struct device *dev,
+ struct device_attribute *mattr,
+ char *data)
+{
+ struct mem_ctl_info *mci = to_mci(dev);
- /* At this point, to 'free' the control struct,
- * edac_mc_unregister_sysfs_main_kobj() must be used
- */
+ return sprintf(data, "%d\n", mci->ue_mc);
+}
- debugf1("%s() Registered '.../edac/mc%d' kobject\n",
- __func__, mci->mc_idx);
+static ssize_t mci_ce_count_show(struct device *dev,
+ struct device_attribute *mattr,
+ char *data)
+{
+ struct mem_ctl_info *mci = to_mci(dev);
- return 0;
+ return sprintf(data, "%d\n", mci->ce_mc);
+}
- /* Error exit stack */
+static ssize_t mci_ce_noinfo_show(struct device *dev,
+ struct device_attribute *mattr,
+ char *data)
+{
+ struct mem_ctl_info *mci = to_mci(dev);
-kobj_reg_fail:
- module_put(mci->owner);
+ return sprintf(data, "%d\n", mci->ce_noinfo_count);
+}
-fail_out:
- return err;
+static ssize_t mci_ue_noinfo_show(struct device *dev,
+ struct device_attribute *mattr,
+ char *data)
+{
+ struct mem_ctl_info *mci = to_mci(dev);
+
+ return sprintf(data, "%d\n", mci->ue_noinfo_count);
}
-/*
- * edac_mc_register_sysfs_main_kobj
- *
- * tears down and the main mci kobject from the mc_kset
- */
-void edac_mc_unregister_sysfs_main_kobj(struct mem_ctl_info *mci)
+static ssize_t mci_seconds_show(struct device *dev,
+ struct device_attribute *mattr,
+ char *data)
{
- debugf1("%s()\n", __func__);
+ struct mem_ctl_info *mci = to_mci(dev);
- /* delete the kobj from the mc_kset */
- kobject_put(&mci->edac_mci_kobj);
+ return sprintf(data, "%ld\n", (jiffies - mci->start_time) / HZ);
}
-#define EDAC_DEVICE_SYMLINK "device"
+static ssize_t mci_ctl_name_show(struct device *dev,
+ struct device_attribute *mattr,
+ char *data)
+{
+ struct mem_ctl_info *mci = to_mci(dev);
-#define grp_to_mci(k) (container_of(k, struct mcidev_sysfs_group_kobj, kobj)->mci)
+ return sprintf(data, "%s\n", mci->ctl_name);
+}
-/* MCI show/store functions for top most object */
-static ssize_t inst_grp_show(struct kobject *kobj, struct attribute *attr,
- char *buffer)
+static ssize_t mci_size_mb_show(struct device *dev,
+ struct device_attribute *mattr,
+ char *data)
{
- struct mem_ctl_info *mem_ctl_info = grp_to_mci(kobj);
- struct mcidev_sysfs_attribute *mcidev_attr = to_mcidev_attr(attr);
+ struct mem_ctl_info *mci = to_mci(dev);
+ int total_pages = 0, csrow_idx, j;
+
+ for (csrow_idx = 0; csrow_idx < mci->nr_csrows; csrow_idx++) {
+ struct csrow_info *csrow = mci->csrows[csrow_idx];
- debugf1("%s() mem_ctl_info %p\n", __func__, mem_ctl_info);
+ for (j = 0; j < csrow->nr_channels; j++) {
+ struct dimm_info *dimm = csrow->channels[j]->dimm;
- if (mcidev_attr->show)
- return mcidev_attr->show(mem_ctl_info, buffer);
+ total_pages += dimm->nr_pages;
+ }
+ }
- return -EIO;
+ return sprintf(data, "%u\n", PAGES_TO_MiB(total_pages));
}
-static ssize_t inst_grp_store(struct kobject *kobj, struct attribute *attr,
- const char *buffer, size_t count)
+static ssize_t mci_max_location_show(struct device *dev,
+ struct device_attribute *mattr,
+ char *data)
{
- struct mem_ctl_info *mem_ctl_info = grp_to_mci(kobj);
- struct mcidev_sysfs_attribute *mcidev_attr = to_mcidev_attr(attr);
-
- debugf1("%s() mem_ctl_info %p\n", __func__, mem_ctl_info);
+ struct mem_ctl_info *mci = to_mci(dev);
+ int i;
+ char *p = data;
- if (mcidev_attr->store)
- return mcidev_attr->store(mem_ctl_info, buffer, count);
+ for (i = 0; i < mci->n_layers; i++) {
+ p += sprintf(p, "%s %d ",
+ edac_layer_name[mci->layers[i].type],
+ mci->layers[i].size - 1);
+ }
- return -EIO;
+ return p - data;
}
-/* No memory to release for this kobj */
-static void edac_inst_grp_release(struct kobject *kobj)
+#ifdef CONFIG_EDAC_DEBUG
+static ssize_t edac_fake_inject_write(struct file *file,
+ const char __user *data,
+ size_t count, loff_t *ppos)
{
- struct mcidev_sysfs_group_kobj *grp;
- struct mem_ctl_info *mci;
-
- debugf1("%s()\n", __func__);
+ struct device *dev = file->private_data;
+ struct mem_ctl_info *mci = to_mci(dev);
+ static enum hw_event_mc_err_type type;
+ u16 errcount = mci->fake_inject_count;
+
+ if (!errcount)
+ errcount = 1;
+
+ type = mci->fake_inject_ue ? HW_EVENT_ERR_UNCORRECTED
+ : HW_EVENT_ERR_CORRECTED;
+
+ printk(KERN_DEBUG
+ "Generating %d %s fake error%s to %d.%d.%d to test core handling. NOTE: this won't test the driver-specific decoding logic.\n",
+ errcount,
+ (type == HW_EVENT_ERR_UNCORRECTED) ? "UE" : "CE",
+ errcount > 1 ? "s" : "",
+ mci->fake_inject_layer[0],
+ mci->fake_inject_layer[1],
+ mci->fake_inject_layer[2]
+ );
+ edac_mc_handle_error(type, mci, errcount, 0, 0, 0,
+ mci->fake_inject_layer[0],
+ mci->fake_inject_layer[1],
+ mci->fake_inject_layer[2],
+ "FAKE ERROR", "for EDAC testing only");
- grp = container_of(kobj, struct mcidev_sysfs_group_kobj, kobj);
- mci = grp->mci;
+ return count;
}
-/* Intermediate show/store table */
-static struct sysfs_ops inst_grp_ops = {
- .show = inst_grp_show,
- .store = inst_grp_store
+static const struct file_operations debug_fake_inject_fops = {
+ .open = simple_open,
+ .write = edac_fake_inject_write,
+ .llseek = generic_file_llseek,
};
+#endif
+
+/* default Control file */
+DEVICE_ATTR(reset_counters, S_IWUSR, NULL, mci_reset_counters_store);
-/* the kobj_type instance for a instance group */
-static struct kobj_type ktype_inst_grp = {
- .release = edac_inst_grp_release,
- .sysfs_ops = &inst_grp_ops,
+/* default Attribute files */
+DEVICE_ATTR(mc_name, S_IRUGO, mci_ctl_name_show, NULL);
+DEVICE_ATTR(size_mb, S_IRUGO, mci_size_mb_show, NULL);
+DEVICE_ATTR(seconds_since_reset, S_IRUGO, mci_seconds_show, NULL);
+DEVICE_ATTR(ue_noinfo_count, S_IRUGO, mci_ue_noinfo_show, NULL);
+DEVICE_ATTR(ce_noinfo_count, S_IRUGO, mci_ce_noinfo_show, NULL);
+DEVICE_ATTR(ue_count, S_IRUGO, mci_ue_count_show, NULL);
+DEVICE_ATTR(ce_count, S_IRUGO, mci_ce_count_show, NULL);
+DEVICE_ATTR(max_location, S_IRUGO, mci_max_location_show, NULL);
+
+/* memory scrubber attribute file */
+DEVICE_ATTR(sdram_scrub_rate, 0, NULL, NULL);
+
+static struct attribute *mci_attrs[] = {
+ &dev_attr_reset_counters.attr,
+ &dev_attr_mc_name.attr,
+ &dev_attr_size_mb.attr,
+ &dev_attr_seconds_since_reset.attr,
+ &dev_attr_ue_noinfo_count.attr,
+ &dev_attr_ce_noinfo_count.attr,
+ &dev_attr_ue_count.attr,
+ &dev_attr_ce_count.attr,
+ &dev_attr_max_location.attr,
+ NULL
};
+static struct attribute_group mci_attr_grp = {
+ .attrs = mci_attrs,
+};
-/*
- * edac_create_mci_instance_attributes
- * create MC driver specific attributes bellow an specified kobj
- * This routine calls itself recursively, in order to create an entire
- * object tree.
- */
-static int edac_create_mci_instance_attributes(struct mem_ctl_info *mci,
- const struct mcidev_sysfs_attribute *sysfs_attrib,
- struct kobject *kobj)
+static const struct attribute_group *mci_attr_groups[] = {
+ &mci_attr_grp,
+ NULL
+};
+
+static void mci_attr_release(struct device *dev)
{
- int err;
+ struct mem_ctl_info *mci = container_of(dev, struct mem_ctl_info, dev);
- debugf1("%s()\n", __func__);
-
- while (sysfs_attrib) {
- debugf1("%s() sysfs_attrib = %p\n",__func__, sysfs_attrib);
- if (sysfs_attrib->grp) {
- struct mcidev_sysfs_group_kobj *grp_kobj;
-
- grp_kobj = kzalloc(sizeof(*grp_kobj), GFP_KERNEL);
- if (!grp_kobj)
- return -ENOMEM;
-
- grp_kobj->grp = sysfs_attrib->grp;
- grp_kobj->mci = mci;
- list_add_tail(&grp_kobj->list, &mci->grp_kobj_list);
-
- debugf0("%s() grp %s, mci %p\n", __func__,
- sysfs_attrib->grp->name, mci);
-
- err = kobject_init_and_add(&grp_kobj->kobj,
- &ktype_inst_grp,
- &mci->edac_mci_kobj,
- sysfs_attrib->grp->name);
- if (err < 0) {
- printk(KERN_ERR "kobject_init_and_add failed: %d\n", err);
- return err;
- }
- err = edac_create_mci_instance_attributes(mci,
- grp_kobj->grp->mcidev_attr,
- &grp_kobj->kobj);
-
- if (err < 0)
- return err;
- } else if (sysfs_attrib->attr.name) {
- debugf0("%s() file %s\n", __func__,
- sysfs_attrib->attr.name);
-
- err = sysfs_create_file(kobj, &sysfs_attrib->attr);
- if (err < 0) {
- printk(KERN_ERR "sysfs_create_file failed: %d\n", err);
- return err;
- }
- } else
- break;
-
- sysfs_attrib++;
- }
+ edac_dbg(1, "Releasing csrow device %s\n", dev_name(dev));
+ kfree(mci);
+}
+static struct device_type mci_attr_type = {
+ .groups = mci_attr_groups,
+ .release = mci_attr_release,
+};
+
+#ifdef CONFIG_EDAC_DEBUG
+static struct dentry *edac_debugfs;
+
+int __init edac_debugfs_init(void)
+{
+ edac_debugfs = debugfs_create_dir("edac", NULL);
+ if (IS_ERR(edac_debugfs)) {
+ edac_debugfs = NULL;
+ return -ENOMEM;
+ }
return 0;
}
-/*
- * edac_remove_mci_instance_attributes
- * remove MC driver specific attributes at the topmost level
- * directory of this mci instance.
- */
-static void edac_remove_mci_instance_attributes(struct mem_ctl_info *mci,
- const struct mcidev_sysfs_attribute *sysfs_attrib,
- struct kobject *kobj, int count)
+void __exit edac_debugfs_exit(void)
{
- struct mcidev_sysfs_group_kobj *grp_kobj, *tmp;
+ debugfs_remove(edac_debugfs);
+}
- debugf1("%s()\n", __func__);
+static int edac_create_debug_nodes(struct mem_ctl_info *mci)
+{
+ struct dentry *d, *parent;
+ char name[80];
+ int i;
- /*
- * loop if there are attributes and until we hit a NULL entry
- * Remove first all the atributes
- */
- while (sysfs_attrib) {
- debugf1("%s() sysfs_attrib = %p\n",__func__, sysfs_attrib);
- if (sysfs_attrib->grp) {
- debugf1("%s() seeking for group %s\n",
- __func__, sysfs_attrib->grp->name);
- list_for_each_entry(grp_kobj,
- &mci->grp_kobj_list, list) {
- debugf1("%s() grp_kobj->grp = %p\n",__func__, grp_kobj->grp);
- if (grp_kobj->grp == sysfs_attrib->grp) {
- edac_remove_mci_instance_attributes(mci,
- grp_kobj->grp->mcidev_attr,
- &grp_kobj->kobj, count + 1);
- debugf0("%s() group %s\n", __func__,
- sysfs_attrib->grp->name);
- kobject_put(&grp_kobj->kobj);
- }
- }
- debugf1("%s() end of seeking for group %s\n",
- __func__, sysfs_attrib->grp->name);
- } else if (sysfs_attrib->attr.name) {
- debugf0("%s() file %s\n", __func__,
- sysfs_attrib->attr.name);
- sysfs_remove_file(kobj, &sysfs_attrib->attr);
- } else
- break;
- sysfs_attrib++;
+ if (!edac_debugfs)
+ return -ENODEV;
+
+ d = debugfs_create_dir(mci->dev.kobj.name, edac_debugfs);
+ if (!d)
+ return -ENOMEM;
+ parent = d;
+
+ for (i = 0; i < mci->n_layers; i++) {
+ sprintf(name, "fake_inject_%s",
+ edac_layer_name[mci->layers[i].type]);
+ d = debugfs_create_u8(name, S_IRUGO | S_IWUSR, parent,
+ &mci->fake_inject_layer[i]);
+ if (!d)
+ goto nomem;
}
- /* Remove the group objects */
- if (count)
- return;
- list_for_each_entry_safe(grp_kobj, tmp,
- &mci->grp_kobj_list, list) {
- list_del(&grp_kobj->list);
- kfree(grp_kobj);
- }
-}
+ d = debugfs_create_bool("fake_inject_ue", S_IRUGO | S_IWUSR, parent,
+ &mci->fake_inject_ue);
+ if (!d)
+ goto nomem;
+
+ d = debugfs_create_u16("fake_inject_count", S_IRUGO | S_IWUSR, parent,
+ &mci->fake_inject_count);
+ if (!d)
+ goto nomem;
+ d = debugfs_create_file("fake_inject", S_IWUSR, parent,
+ &mci->dev,
+ &debug_fake_inject_fops);
+ if (!d)
+ goto nomem;
+
+ mci->debugfs = parent;
+ return 0;
+nomem:
+ debugfs_remove(mci->debugfs);
+ return -ENOMEM;
+}
+#endif
/*
* Create a new Memory Controller kobject instance,
@@ -902,71 +973,105 @@ static void edac_remove_mci_instance_attributes(struct mem_ctl_info *mci,
*/
int edac_create_sysfs_mci_device(struct mem_ctl_info *mci)
{
- int i;
- int err;
- struct csrow_info *csrow;
- struct kobject *kobj_mci = &mci->edac_mci_kobj;
+ int i, err;
- debugf0("%s() idx=%d\n", __func__, mci->mc_idx);
+ /*
+ * The memory controller needs its own bus, in order to avoid
+ * namespace conflicts at /sys/bus/edac.
+ */
+ mci->bus->name = kasprintf(GFP_KERNEL, "mc%d", mci->mc_idx);
+ if (!mci->bus->name)
+ return -ENOMEM;
- INIT_LIST_HEAD(&mci->grp_kobj_list);
+ edac_dbg(0, "creating bus %s\n", mci->bus->name);
+
+ err = bus_register(mci->bus);
+ if (err < 0)
+ return err;
- /* create a symlink for the device */
- err = sysfs_create_link(kobj_mci, &mci->dev->kobj,
- EDAC_DEVICE_SYMLINK);
- if (err) {
- debugf1("%s() failure to create symlink\n", __func__);
- goto fail0;
+ /* get the /sys/devices/system/edac subsys reference */
+ mci->dev.type = &mci_attr_type;
+ device_initialize(&mci->dev);
+
+ mci->dev.parent = mci_pdev;
+ mci->dev.bus = mci->bus;
+ dev_set_name(&mci->dev, "mc%d", mci->mc_idx);
+ dev_set_drvdata(&mci->dev, mci);
+ pm_runtime_forbid(&mci->dev);
+
+ edac_dbg(0, "creating device %s\n", dev_name(&mci->dev));
+ err = device_add(&mci->dev);
+ if (err < 0) {
+ edac_dbg(1, "failure: create device %s\n", dev_name(&mci->dev));
+ bus_unregister(mci->bus);
+ kfree(mci->bus->name);
+ return err;
}
- /* If the low level driver desires some attributes,
- * then create them now for the driver.
- */
- if (mci->mc_driver_sysfs_attributes) {
- err = edac_create_mci_instance_attributes(mci,
- mci->mc_driver_sysfs_attributes,
- &mci->edac_mci_kobj);
+ if (mci->set_sdram_scrub_rate || mci->get_sdram_scrub_rate) {
+ if (mci->get_sdram_scrub_rate) {
+ dev_attr_sdram_scrub_rate.attr.mode |= S_IRUGO;
+ dev_attr_sdram_scrub_rate.show = &mci_sdram_scrub_rate_show;
+ }
+ if (mci->set_sdram_scrub_rate) {
+ dev_attr_sdram_scrub_rate.attr.mode |= S_IWUSR;
+ dev_attr_sdram_scrub_rate.store = &mci_sdram_scrub_rate_store;
+ }
+ err = device_create_file(&mci->dev,
+ &dev_attr_sdram_scrub_rate);
if (err) {
- debugf1("%s() failure to create mci attributes\n",
- __func__);
- goto fail0;
+ edac_dbg(1, "failure: create sdram_scrub_rate\n");
+ goto fail2;
}
}
-
- /* Make directories for each CSROW object under the mc<id> kobject
+ /*
+ * Create the dimm/rank devices
*/
- for (i = 0; i < mci->nr_csrows; i++) {
- csrow = &mci->csrows[i];
-
- /* Only expose populated CSROWs */
- if (csrow->nr_pages > 0) {
- err = edac_create_csrow_object(mci, csrow, i);
- if (err) {
- debugf1("%s() failure: create csrow %d obj\n",
- __func__, i);
- goto fail1;
- }
+ for (i = 0; i < mci->tot_dimms; i++) {
+ struct dimm_info *dimm = mci->dimms[i];
+ /* Only expose populated DIMMs */
+ if (dimm->nr_pages == 0)
+ continue;
+#ifdef CONFIG_EDAC_DEBUG
+ edac_dbg(1, "creating dimm%d, located at ", i);
+ if (edac_debug_level >= 1) {
+ int lay;
+ for (lay = 0; lay < mci->n_layers; lay++)
+ printk(KERN_CONT "%s %d ",
+ edac_layer_name[mci->layers[lay].type],
+ dimm->location[lay]);
+ printk(KERN_CONT "\n");
+ }
+#endif
+ err = edac_create_dimm_object(mci, dimm, i);
+ if (err) {
+ edac_dbg(1, "failure: create dimm %d obj\n", i);
+ goto fail;
}
}
+#ifdef CONFIG_EDAC_LEGACY_SYSFS
+ err = edac_create_csrow_objects(mci);
+ if (err < 0)
+ goto fail;
+#endif
+
+#ifdef CONFIG_EDAC_DEBUG
+ edac_create_debug_nodes(mci);
+#endif
return 0;
- /* CSROW error: backout what has already been registered, */
-fail1:
+fail:
for (i--; i >= 0; i--) {
- if (csrow->nr_pages > 0) {
- kobject_put(&mci->csrows[i].kobj);
- }
+ struct dimm_info *dimm = mci->dimms[i];
+ if (dimm->nr_pages == 0)
+ continue;
+ device_unregister(&dimm->dev);
}
-
- /* remove the mci instance's attributes, if any */
- edac_remove_mci_instance_attributes(mci,
- mci->mc_driver_sysfs_attributes, &mci->edac_mci_kobj, 0);
-
- /* remove the symlink */
- sysfs_remove_link(kobj_mci, EDAC_DEVICE_SYMLINK);
-
-fail0:
+fail2:
+ device_unregister(&mci->dev);
+ bus_unregister(mci->bus);
+ kfree(mci->bus->name);
return err;
}
@@ -977,90 +1082,91 @@ void edac_remove_sysfs_mci_device(struct mem_ctl_info *mci)
{
int i;
- debugf0("%s()\n", __func__);
+ edac_dbg(0, "\n");
- /* remove all csrow kobjects */
- debugf0("%s() unregister this mci kobj\n", __func__);
- for (i = 0; i < mci->nr_csrows; i++) {
- if (mci->csrows[i].nr_pages > 0) {
- debugf0("%s() unreg csrow-%d\n", __func__, i);
- kobject_put(&mci->csrows[i].kobj);
- }
- }
+#ifdef CONFIG_EDAC_DEBUG
+ debugfs_remove(mci->debugfs);
+#endif
+#ifdef CONFIG_EDAC_LEGACY_SYSFS
+ edac_delete_csrow_objects(mci);
+#endif
- /* remove this mci instance's attribtes */
- if (mci->mc_driver_sysfs_attributes) {
- debugf0("%s() unregister mci private attributes\n", __func__);
- edac_remove_mci_instance_attributes(mci,
- mci->mc_driver_sysfs_attributes,
- &mci->edac_mci_kobj, 0);
+ for (i = 0; i < mci->tot_dimms; i++) {
+ struct dimm_info *dimm = mci->dimms[i];
+ if (dimm->nr_pages == 0)
+ continue;
+ edac_dbg(0, "removing device %s\n", dev_name(&dimm->dev));
+ device_unregister(&dimm->dev);
}
-
- /* remove the symlink */
- debugf0("%s() remove_link\n", __func__);
- sysfs_remove_link(&mci->edac_mci_kobj, EDAC_DEVICE_SYMLINK);
-
- /* unregister this instance's kobject */
- debugf0("%s() remove_mci_instance\n", __func__);
- kobject_put(&mci->edac_mci_kobj);
}
+void edac_unregister_sysfs(struct mem_ctl_info *mci)
+{
+ edac_dbg(1, "Unregistering device %s\n", dev_name(&mci->dev));
+ device_unregister(&mci->dev);
+ bus_unregister(mci->bus);
+ kfree(mci->bus->name);
+}
+static void mc_attr_release(struct device *dev)
+{
+ /*
+ * There's no container structure here, as this is just the mci
+ * parent device, used to create the /sys/devices/mc sysfs node.
+ * So, there are no attributes on it.
+ */
+ edac_dbg(1, "Releasing device %s\n", dev_name(dev));
+ kfree(dev);
+}
-
+static struct device_type mc_attr_type = {
+ .release = mc_attr_release,
+};
/*
- * edac_setup_sysfs_mc_kset(void)
- *
- * Initialize the mc_kset for the 'mc' entry
- * This requires creating the top 'mc' directory with a kset
- * and its controls/attributes.
- *
- * To this 'mc' kset, instance 'mci' will be grouped as children.
- *
- * Return: 0 SUCCESS
- * !0 FAILURE error code
+ * Init/exit code for the module. Basically, creates/removes /sys/class/rc
*/
-int edac_sysfs_setup_mc_kset(void)
+int __init edac_mc_sysfs_init(void)
{
- int err = -EINVAL;
- struct sysdev_class *edac_class;
-
- debugf1("%s()\n", __func__);
+ struct bus_type *edac_subsys;
+ int err;
- /* get the /sys/devices/system/edac class reference */
- edac_class = edac_get_sysfs_class();
- if (edac_class == NULL) {
- debugf1("%s() no edac_class error=%d\n", __func__, err);
- goto fail_out;
+ /* get the /sys/devices/system/edac subsys reference */
+ edac_subsys = edac_get_sysfs_subsys();
+ if (edac_subsys == NULL) {
+ edac_dbg(1, "no edac_subsys\n");
+ err = -EINVAL;
+ goto out;
}
- /* Init the MC's kobject */
- mc_kset = kset_create_and_add("mc", NULL, &edac_class->kset.kobj);
- if (!mc_kset) {
+ mci_pdev = kzalloc(sizeof(*mci_pdev), GFP_KERNEL);
+ if (!mci_pdev) {
err = -ENOMEM;
- debugf1("%s() Failed to register '.../edac/mc'\n", __func__);
- goto fail_kset;
+ goto out_put_sysfs;
}
- debugf1("%s() Registered '.../edac/mc' kobject\n", __func__);
+ mci_pdev->bus = edac_subsys;
+ mci_pdev->type = &mc_attr_type;
+ device_initialize(mci_pdev);
+ dev_set_name(mci_pdev, "mc");
- return 0;
+ err = device_add(mci_pdev);
+ if (err < 0)
+ goto out_dev_free;
-fail_kset:
- edac_put_sysfs_class();
+ edac_dbg(0, "device %s created\n", dev_name(mci_pdev));
+
+ return 0;
-fail_out:
+ out_dev_free:
+ kfree(mci_pdev);
+ out_put_sysfs:
+ edac_put_sysfs_subsys();
+ out:
return err;
}
-/*
- * edac_sysfs_teardown_mc_kset
- *
- * deconstruct the mc_ket for memory controllers
- */
-void edac_sysfs_teardown_mc_kset(void)
+void __exit edac_mc_sysfs_exit(void)
{
- kset_unregister(mc_kset);
- edac_put_sysfs_class();
+ device_unregister(mci_pdev);
+ edac_put_sysfs_subsys();
}
-
diff --git a/drivers/edac/edac_mce.c b/drivers/edac/edac_mce.c
deleted file mode 100644
index 9ccdc5b140e..00000000000
--- a/drivers/edac/edac_mce.c
+++ /dev/null
@@ -1,61 +0,0 @@
-/* Provides edac interface to mcelog events
- *
- * This file may be distributed under the terms of the
- * GNU General Public License version 2.
- *
- * Copyright (c) 2009 by:
- * Mauro Carvalho Chehab <mchehab@redhat.com>
- *
- * Red Hat Inc. http://www.redhat.com
- */
-
-#include <linux/module.h>
-#include <linux/edac_mce.h>
-#include <asm/mce.h>
-
-int edac_mce_enabled;
-EXPORT_SYMBOL_GPL(edac_mce_enabled);
-
-
-/*
- * Extension interface
- */
-
-static LIST_HEAD(edac_mce_list);
-static DEFINE_MUTEX(edac_mce_lock);
-
-int edac_mce_register(struct edac_mce *edac_mce)
-{
- mutex_lock(&edac_mce_lock);
- list_add_tail(&edac_mce->list, &edac_mce_list);
- mutex_unlock(&edac_mce_lock);
- return 0;
-}
-EXPORT_SYMBOL(edac_mce_register);
-
-void edac_mce_unregister(struct edac_mce *edac_mce)
-{
- mutex_lock(&edac_mce_lock);
- list_del(&edac_mce->list);
- mutex_unlock(&edac_mce_lock);
-}
-EXPORT_SYMBOL(edac_mce_unregister);
-
-int edac_mce_parse(struct mce *mce)
-{
- struct edac_mce *edac_mce;
-
- list_for_each_entry(edac_mce, &edac_mce_list, list) {
- if (edac_mce->check_error(edac_mce->priv, mce))
- return 1;
- }
-
- /* Nobody queued the error */
- return 0;
-}
-EXPORT_SYMBOL_GPL(edac_mce_parse);
-
-MODULE_LICENSE("GPL");
-MODULE_AUTHOR("Mauro Carvalho Chehab <mchehab@redhat.com>");
-MODULE_AUTHOR("Red Hat Inc. (http://www.redhat.com)");
-MODULE_DESCRIPTION("EDAC Driver for mcelog captured errors");
diff --git a/drivers/edac/edac_module.c b/drivers/edac/edac_module.c
index be4b075c309..a66941fea5a 100644
--- a/drivers/edac/edac_module.c
+++ b/drivers/edac/edac_module.c
@@ -15,12 +15,32 @@
#include "edac_core.h"
#include "edac_module.h"
-#define EDAC_VERSION "Ver: 2.1.0 " __DATE__
+#define EDAC_VERSION "Ver: 3.0.0"
#ifdef CONFIG_EDAC_DEBUG
+
+static int edac_set_debug_level(const char *buf, struct kernel_param *kp)
+{
+ unsigned long val;
+ int ret;
+
+ ret = kstrtoul(buf, 0, &val);
+ if (ret)
+ return ret;
+
+ if (val < 0 || val > 4)
+ return -EINVAL;
+
+ return param_set_int(buf, kp);
+}
+
/* Values of 0 to 4 will generate output */
int edac_debug_level = 2;
EXPORT_SYMBOL_GPL(edac_debug_level);
+
+module_param_call(edac_debug_level, edac_set_debug_level, param_get_int,
+ &edac_debug_level, 0644);
+MODULE_PARM_DESC(edac_debug_level, "EDAC debug level: [0-4], default: 2");
#endif
/* scope is to module level only */
@@ -90,26 +110,21 @@ static int __init edac_init(void)
*/
edac_pci_clear_parity_errors();
- /*
- * now set up the mc_kset under the edac class object
- */
- err = edac_sysfs_setup_mc_kset();
+ err = edac_mc_sysfs_init();
if (err)
goto error;
+ edac_debugfs_init();
+
/* Setup/Initialize the workq for this core */
err = edac_workqueue_setup();
if (err) {
edac_printk(KERN_ERR, EDAC_MC, "init WorkQueue failure\n");
- goto workq_fail;
+ goto error;
}
return 0;
- /* Error teardown stack */
-workq_fail:
- edac_sysfs_teardown_mc_kset();
-
error:
return err;
}
@@ -120,26 +135,20 @@ error:
*/
static void __exit edac_exit(void)
{
- debugf0("%s()\n", __func__);
+ edac_dbg(0, "\n");
/* tear down the various subsystems */
edac_workqueue_teardown();
- edac_sysfs_teardown_mc_kset();
+ edac_mc_sysfs_exit();
+ edac_debugfs_exit();
}
/*
* Inform the kernel of our entry and exit points
*/
-module_init(edac_init);
+subsys_initcall(edac_init);
module_exit(edac_exit);
MODULE_LICENSE("GPL");
MODULE_AUTHOR("Doug Thompson www.softwarebitmaker.com, et al");
MODULE_DESCRIPTION("Core library routines for EDAC reporting");
-
-/* refer to *_sysfs.c files for parameters that are exported via sysfs */
-
-#ifdef CONFIG_EDAC_DEBUG
-module_param(edac_debug_level, int, 0644);
-MODULE_PARM_DESC(edac_debug_level, "Debug level");
-#endif
diff --git a/drivers/edac/edac_module.h b/drivers/edac/edac_module.h
index 17aabb7b90e..f2118bfcf8d 100644
--- a/drivers/edac/edac_module.h
+++ b/drivers/edac/edac_module.h
@@ -10,8 +10,6 @@
#ifndef __EDAC_MODULE_H__
#define __EDAC_MODULE_H__
-#include <linux/sysdev.h>
-
#include "edac_core.h"
/*
@@ -21,12 +19,12 @@
*
* edac_mc objects
*/
-extern int edac_sysfs_setup_mc_kset(void);
-extern void edac_sysfs_teardown_mc_kset(void);
-extern int edac_mc_register_sysfs_main_kobj(struct mem_ctl_info *mci);
-extern void edac_mc_unregister_sysfs_main_kobj(struct mem_ctl_info *mci);
+ /* on edac_mc_sysfs.c */
+int edac_mc_sysfs_init(void);
+void edac_mc_sysfs_exit(void);
extern int edac_create_sysfs_mci_device(struct mem_ctl_info *mci);
extern void edac_remove_sysfs_mci_device(struct mem_ctl_info *mci);
+void edac_unregister_sysfs(struct mem_ctl_info *mci);
extern int edac_get_log_ue(void);
extern int edac_get_log_ce(void);
extern int edac_get_panic_on_ue(void);
@@ -36,6 +34,10 @@ extern int edac_mc_get_panic_on_ue(void);
extern int edac_get_poll_msec(void);
extern int edac_mc_get_poll_msec(void);
+unsigned edac_dimm_info_location(struct dimm_info *dimm, char *buf,
+ unsigned len);
+
+ /* on edac_device.c */
extern int edac_device_register_sysfs_main_kobj(
struct edac_device_ctl_info *edac_dev);
extern void edac_device_unregister_sysfs_main_kobj(
@@ -50,9 +52,23 @@ extern void edac_device_workq_setup(struct edac_device_ctl_info *edac_dev,
extern void edac_device_workq_teardown(struct edac_device_ctl_info *edac_dev);
extern void edac_device_reset_delay_period(struct edac_device_ctl_info
*edac_dev, unsigned long value);
-extern void edac_mc_reset_delay_period(int value);
+extern void edac_mc_reset_delay_period(unsigned long value);
+
+extern void *edac_align_ptr(void **p, unsigned size, int n_elems);
-extern void *edac_align_ptr(void *ptr, unsigned size);
+/*
+ * EDAC debugfs functions
+ */
+#ifdef CONFIG_EDAC_DEBUG
+int edac_debugfs_init(void);
+void edac_debugfs_exit(void);
+#else
+static inline int edac_debugfs_init(void)
+{
+ return -ENODEV;
+}
+static inline void edac_debugfs_exit(void) {}
+#endif
/*
* EDAC PCI functions
diff --git a/drivers/edac/edac_pci.c b/drivers/edac/edac_pci.c
index efb5d565078..2cf44b4db80 100644
--- a/drivers/edac/edac_pci.c
+++ b/drivers/edac/edac_pci.c
@@ -19,7 +19,6 @@
#include <linux/slab.h>
#include <linux/spinlock.h>
#include <linux/list.h>
-#include <linux/sysdev.h>
#include <linux/ctype.h>
#include <linux/workqueue.h>
#include <asm/uaccess.h>
@@ -43,13 +42,13 @@ struct edac_pci_ctl_info *edac_pci_alloc_ctl_info(unsigned int sz_pvt,
const char *edac_pci_name)
{
struct edac_pci_ctl_info *pci;
- void *pvt;
+ void *p = NULL, *pvt;
unsigned int size;
- debugf1("%s()\n", __func__);
+ edac_dbg(1, "\n");
- pci = (struct edac_pci_ctl_info *)0;
- pvt = edac_align_ptr(&pci[1], sz_pvt);
+ pci = edac_align_ptr(&p, sizeof(*pci), 1);
+ pvt = edac_align_ptr(&p, 1, sz_pvt);
size = ((unsigned long)pvt) + sz_pvt;
/* Alloc the needed control struct memory */
@@ -81,7 +80,7 @@ EXPORT_SYMBOL_GPL(edac_pci_alloc_ctl_info);
*/
void edac_pci_free_ctl_info(struct edac_pci_ctl_info *pci)
{
- debugf1("%s()\n", __func__);
+ edac_dbg(1, "\n");
edac_pci_remove_sysfs(pci);
}
@@ -98,7 +97,7 @@ static struct edac_pci_ctl_info *find_edac_pci_by_dev(struct device *dev)
struct edac_pci_ctl_info *pci;
struct list_head *item;
- debugf1("%s()\n", __func__);
+ edac_dbg(1, "\n");
list_for_each(item, &edac_pci_list) {
pci = list_entry(item, struct edac_pci_ctl_info, link);
@@ -123,7 +122,7 @@ static int add_edac_pci_to_global_list(struct edac_pci_ctl_info *pci)
struct list_head *item, *insert_before;
struct edac_pci_ctl_info *rover;
- debugf1("%s()\n", __func__);
+ edac_dbg(1, "\n");
insert_before = &edac_pci_list;
@@ -164,19 +163,6 @@ fail1:
}
/*
- * complete_edac_pci_list_del
- *
- * RCU completion callback to indicate item is deleted
- */
-static void complete_edac_pci_list_del(struct rcu_head *head)
-{
- struct edac_pci_ctl_info *pci;
-
- pci = container_of(head, struct edac_pci_ctl_info, rcu);
- INIT_LIST_HEAD(&pci->link);
-}
-
-/*
* del_edac_pci_from_global_list
*
* remove the PCI control struct from the global list
@@ -184,8 +170,12 @@ static void complete_edac_pci_list_del(struct rcu_head *head)
static void del_edac_pci_from_global_list(struct edac_pci_ctl_info *pci)
{
list_del_rcu(&pci->link);
- call_rcu(&pci->rcu, complete_edac_pci_list_del);
- rcu_barrier();
+
+ /* these are for safe removal of devices from global list while
+ * NMI handlers may be traversing list
+ */
+ synchronize_rcu();
+ INIT_LIST_HEAD(&pci->link);
}
#if 0
@@ -236,7 +226,7 @@ static void edac_pci_workq_function(struct work_struct *work_req)
int msec;
unsigned long delay;
- debugf3("%s() checking\n", __func__);
+ edac_dbg(3, "checking\n");
mutex_lock(&edac_pci_ctls_mutex);
@@ -271,7 +261,7 @@ static void edac_pci_workq_function(struct work_struct *work_req)
static void edac_pci_workq_setup(struct edac_pci_ctl_info *pci,
unsigned int msec)
{
- debugf0("%s()\n", __func__);
+ edac_dbg(0, "\n");
INIT_DELAYED_WORK(&pci->work, edac_pci_workq_function);
queue_delayed_work(edac_workqueue, &pci->work,
@@ -286,7 +276,7 @@ static void edac_pci_workq_teardown(struct edac_pci_ctl_info *pci)
{
int status;
- debugf0("%s()\n", __func__);
+ edac_dbg(0, "\n");
status = cancel_delayed_work(&pci->work);
if (status == 0)
@@ -303,7 +293,7 @@ static void edac_pci_workq_teardown(struct edac_pci_ctl_info *pci)
void edac_pci_reset_delay_period(struct edac_pci_ctl_info *pci,
unsigned long value)
{
- debugf0("%s()\n", __func__);
+ edac_dbg(0, "\n");
edac_pci_workq_teardown(pci);
@@ -343,7 +333,7 @@ EXPORT_SYMBOL_GPL(edac_pci_alloc_index);
*/
int edac_pci_add_device(struct edac_pci_ctl_info *pci, int edac_idx)
{
- debugf0("%s()\n", __func__);
+ edac_dbg(0, "\n");
pci->pci_idx = edac_idx;
pci->start_time = jiffies;
@@ -368,11 +358,9 @@ int edac_pci_add_device(struct edac_pci_ctl_info *pci, int edac_idx)
}
edac_pci_printk(pci, KERN_INFO,
- "Giving out device to module '%s' controller '%s':"
- " DEV '%s' (%s)\n",
- pci->mod_name,
- pci->ctl_name,
- edac_dev_name(pci), edac_op_state_to_string(pci->op_state));
+ "Giving out device to module %s controller %s: DEV %s (%s)\n",
+ pci->mod_name, pci->ctl_name, pci->dev_name,
+ edac_op_state_to_string(pci->op_state));
mutex_unlock(&edac_pci_ctls_mutex);
return 0;
@@ -403,7 +391,7 @@ struct edac_pci_ctl_info *edac_pci_del_device(struct device *dev)
{
struct edac_pci_ctl_info *pci;
- debugf0("%s()\n", __func__);
+ edac_dbg(0, "\n");
mutex_lock(&edac_pci_ctls_mutex);
@@ -440,7 +428,7 @@ EXPORT_SYMBOL_GPL(edac_pci_del_device);
*/
static void edac_pci_generic_check(struct edac_pci_ctl_info *pci)
{
- debugf4("%s()\n", __func__);
+ edac_dbg(4, "\n");
edac_pci_do_parity_check();
}
@@ -480,12 +468,13 @@ struct edac_pci_ctl_info *edac_pci_create_generic_ctl(struct device *dev,
pci->mod_name = mod_name;
pci->ctl_name = EDAC_PCI_GENCTL_NAME;
- pci->edac_check = edac_pci_generic_check;
+ if (edac_op_state == EDAC_OPSTATE_POLL)
+ pci->edac_check = edac_pci_generic_check;
pdata->edac_idx = edac_pci_idx++;
if (edac_pci_add_device(pci, pdata->edac_idx) > 0) {
- debugf3("%s(): failed edac_pci_add_device()\n", __func__);
+ edac_dbg(3, "failed edac_pci_add_device()\n");
edac_pci_free_ctl_info(pci);
return NULL;
}
@@ -501,7 +490,7 @@ EXPORT_SYMBOL_GPL(edac_pci_create_generic_ctl);
*/
void edac_pci_release_generic_ctl(struct edac_pci_ctl_info *pci)
{
- debugf0("%s() pci mod=%s\n", __func__, pci->mod_name);
+ edac_dbg(0, "pci mod=%s\n", pci->mod_name);
edac_pci_del_device(pci->dev);
edac_pci_free_ctl_info(pci);
diff --git a/drivers/edac/edac_pci_sysfs.c b/drivers/edac/edac_pci_sysfs.c
index 023b01cb517..e8658e45176 100644
--- a/drivers/edac/edac_pci_sysfs.c
+++ b/drivers/edac/edac_pci_sysfs.c
@@ -78,7 +78,7 @@ static void edac_pci_instance_release(struct kobject *kobj)
{
struct edac_pci_ctl_info *pci;
- debugf0("%s()\n", __func__);
+ edac_dbg(0, "\n");
/* Form pointer to containing struct, the pci control struct */
pci = to_instance(kobj);
@@ -161,7 +161,7 @@ static int edac_pci_create_instance_kobj(struct edac_pci_ctl_info *pci, int idx)
struct kobject *main_kobj;
int err;
- debugf0("%s()\n", __func__);
+ edac_dbg(0, "\n");
/* First bump the ref count on the top main kobj, which will
* track the number of PCI instances we have, and thus nest
@@ -177,14 +177,13 @@ static int edac_pci_create_instance_kobj(struct edac_pci_ctl_info *pci, int idx)
err = kobject_init_and_add(&pci->kobj, &ktype_pci_instance,
edac_pci_top_main_kobj, "pci%d", idx);
if (err != 0) {
- debugf2("%s() failed to register instance pci%d\n",
- __func__, idx);
+ edac_dbg(2, "failed to register instance pci%d\n", idx);
kobject_put(edac_pci_top_main_kobj);
goto error_out;
}
kobject_uevent(&pci->kobj, KOBJ_ADD);
- debugf1("%s() Register instance 'pci%d' kobject\n", __func__, idx);
+ edac_dbg(1, "Register instance 'pci%d' kobject\n", idx);
return 0;
@@ -201,7 +200,7 @@ error_out:
static void edac_pci_unregister_sysfs_instance_kobj(
struct edac_pci_ctl_info *pci)
{
- debugf0("%s()\n", __func__);
+ edac_dbg(0, "\n");
/* Unregister the instance kobject and allow its release
* function release the main reference count and then
@@ -257,7 +256,7 @@ static ssize_t edac_pci_dev_store(struct kobject *kobj,
struct edac_pci_dev_attribute *edac_pci_dev;
edac_pci_dev = (struct edac_pci_dev_attribute *)attr;
- if (edac_pci_dev->show)
+ if (edac_pci_dev->store)
return edac_pci_dev->store(edac_pci_dev->value, buffer, count);
return -EIO;
}
@@ -317,7 +316,7 @@ static struct edac_pci_dev_attribute *edac_pci_attr[] = {
*/
static void edac_pci_release_main_kobj(struct kobject *kobj)
{
- debugf0("%s() here to module_put(THIS_MODULE)\n", __func__);
+ edac_dbg(0, "here to module_put(THIS_MODULE)\n");
kfree(kobj);
@@ -338,25 +337,25 @@ static struct kobj_type ktype_edac_pci_main_kobj = {
* edac_pci_main_kobj_setup()
*
* setup the sysfs for EDAC PCI attributes
- * assumes edac_class has already been initialized
+ * assumes edac_subsys has already been initialized
*/
static int edac_pci_main_kobj_setup(void)
{
int err;
- struct sysdev_class *edac_class;
+ struct bus_type *edac_subsys;
- debugf0("%s()\n", __func__);
+ edac_dbg(0, "\n");
/* check and count if we have already created the main kobject */
if (atomic_inc_return(&edac_pci_sysfs_refcount) != 1)
return 0;
/* First time, so create the main kobject and its
- * controls and atributes
+ * controls and attributes
*/
- edac_class = edac_get_sysfs_class();
- if (edac_class == NULL) {
- debugf1("%s() no edac_class\n", __func__);
+ edac_subsys = edac_get_sysfs_subsys();
+ if (edac_subsys == NULL) {
+ edac_dbg(1, "no edac_subsys\n");
err = -ENODEV;
goto decrement_count_fail;
}
@@ -366,14 +365,14 @@ static int edac_pci_main_kobj_setup(void)
* level main kobj for EDAC PCI
*/
if (!try_module_get(THIS_MODULE)) {
- debugf1("%s() try_module_get() failed\n", __func__);
+ edac_dbg(1, "try_module_get() failed\n");
err = -ENODEV;
goto mod_get_fail;
}
edac_pci_top_main_kobj = kzalloc(sizeof(struct kobject), GFP_KERNEL);
if (!edac_pci_top_main_kobj) {
- debugf1("Failed to allocate\n");
+ edac_dbg(1, "Failed to allocate\n");
err = -ENOMEM;
goto kzalloc_fail;
}
@@ -381,9 +380,9 @@ static int edac_pci_main_kobj_setup(void)
/* Instanstiate the pci object */
err = kobject_init_and_add(edac_pci_top_main_kobj,
&ktype_edac_pci_main_kobj,
- &edac_class->kset.kobj, "pci");
+ &edac_subsys->dev_root->kobj, "pci");
if (err) {
- debugf1("Failed to register '.../edac/pci'\n");
+ edac_dbg(1, "Failed to register '.../edac/pci'\n");
goto kobject_init_and_add_fail;
}
@@ -392,7 +391,7 @@ static int edac_pci_main_kobj_setup(void)
* must be used, for resources to be cleaned up properly
*/
kobject_uevent(edac_pci_top_main_kobj, KOBJ_ADD);
- debugf1("Registered '.../edac/pci' kobject\n");
+ edac_dbg(1, "Registered '.../edac/pci' kobject\n");
return 0;
@@ -404,7 +403,7 @@ kzalloc_fail:
module_put(THIS_MODULE);
mod_get_fail:
- edac_put_sysfs_class();
+ edac_put_sysfs_subsys();
decrement_count_fail:
/* if are on this error exit, nothing to tear down */
@@ -421,18 +420,17 @@ decrement_count_fail:
*/
static void edac_pci_main_kobj_teardown(void)
{
- debugf0("%s()\n", __func__);
+ edac_dbg(0, "\n");
/* Decrement the count and only if no more controller instances
* are connected perform the unregisteration of the top level
* main kobj
*/
if (atomic_dec_return(&edac_pci_sysfs_refcount) == 0) {
- debugf0("%s() called kobject_put on main kobj\n",
- __func__);
+ edac_dbg(0, "called kobject_put on main kobj\n");
kobject_put(edac_pci_top_main_kobj);
+ edac_put_sysfs_subsys();
}
- edac_put_sysfs_class();
}
/*
@@ -446,7 +444,7 @@ int edac_pci_create_sysfs(struct edac_pci_ctl_info *pci)
int err;
struct kobject *edac_kobj = &pci->kobj;
- debugf0("%s() idx=%d\n", __func__, pci->pci_idx);
+ edac_dbg(0, "idx=%d\n", pci->pci_idx);
/* create the top main EDAC PCI kobject, IF needed */
err = edac_pci_main_kobj_setup();
@@ -460,8 +458,7 @@ int edac_pci_create_sysfs(struct edac_pci_ctl_info *pci)
err = sysfs_create_link(edac_kobj, &pci->dev->kobj, EDAC_PCI_SYMLINK);
if (err) {
- debugf0("%s() sysfs_create_link() returned err= %d\n",
- __func__, err);
+ edac_dbg(0, "sysfs_create_link() returned err= %d\n", err);
goto symlink_fail;
}
@@ -484,7 +481,7 @@ unregister_cleanup:
*/
void edac_pci_remove_sysfs(struct edac_pci_ctl_info *pci)
{
- debugf0("%s() index=%d\n", __func__, pci->pci_idx);
+ edac_dbg(0, "index=%d\n", pci->pci_idx);
/* Remove the symlink */
sysfs_remove_link(&pci->kobj, EDAC_PCI_SYMLINK);
@@ -496,7 +493,7 @@ void edac_pci_remove_sysfs(struct edac_pci_ctl_info *pci)
* if this 'pci' is the last instance.
* If it is, the main kobject will be unregistered as a result
*/
- debugf0("%s() calling edac_pci_main_kobj_teardown()\n", __func__);
+ edac_dbg(0, "calling edac_pci_main_kobj_teardown()\n");
edac_pci_main_kobj_teardown();
}
@@ -551,7 +548,7 @@ static void edac_pci_dev_parity_clear(struct pci_dev *dev)
/*
* PCI Parity polling
*
- * Fucntion to retrieve the current parity status
+ * Function to retrieve the current parity status
* and decode it
*
*/
@@ -572,7 +569,7 @@ static void edac_pci_dev_parity_test(struct pci_dev *dev)
local_irq_restore(flags);
- debugf4("PCI STATUS= 0x%04x %s\n", status, dev_name(&dev->dev));
+ edac_dbg(4, "PCI STATUS= 0x%04x %s\n", status, dev_name(&dev->dev));
/* check the status reg for errors on boards NOT marked as broken
* if broken, we cannot trust any of the status bits
@@ -603,13 +600,15 @@ static void edac_pci_dev_parity_test(struct pci_dev *dev)
}
- debugf4("PCI HEADER TYPE= 0x%02x %s\n", header_type, dev_name(&dev->dev));
+ edac_dbg(4, "PCI HEADER TYPE= 0x%02x %s\n",
+ header_type, dev_name(&dev->dev));
if ((header_type & 0x7F) == PCI_HEADER_TYPE_BRIDGE) {
/* On bridges, need to examine secondary status register */
status = get_pci_parity_status(dev, 1);
- debugf4("PCI SEC_STATUS= 0x%04x %s\n", status, dev_name(&dev->dev));
+ edac_dbg(4, "PCI SEC_STATUS= 0x%04x %s\n",
+ status, dev_name(&dev->dev));
/* check the secondary status reg for errors,
* on NOT broken boards
@@ -646,20 +645,16 @@ typedef void (*pci_parity_check_fn_t) (struct pci_dev *dev);
/*
* pci_dev parity list iterator
- * Scan the PCI device list for one pass, looking for SERRORs
- * Master Parity ERRORS or Parity ERRORs on primary or secondary devices
+ *
+ * Scan the PCI device list looking for SERRORs, Master Parity ERRORS or
+ * Parity ERRORs on primary or secondary devices.
*/
static inline void edac_pci_dev_parity_iterator(pci_parity_check_fn_t fn)
{
struct pci_dev *dev = NULL;
- /* request for kernel access to the next PCI device, if any,
- * and while we are looking at it have its reference count
- * bumped until we are done with it
- */
- while ((dev = pci_get_device(PCI_ANY_ID, PCI_ANY_ID, dev)) != NULL) {
+ for_each_pci_dev(dev)
fn(dev);
- }
}
/*
@@ -671,7 +666,7 @@ void edac_pci_do_parity_check(void)
{
int before_count;
- debugf3("%s()\n", __func__);
+ edac_dbg(3, "\n");
/* if policy has PCI check off, leave now */
if (!check_pci_errors)
diff --git a/drivers/edac/edac_stub.c b/drivers/edac/edac_stub.c
index aab970760b7..9d9e18aefaa 100644
--- a/drivers/edac/edac_stub.c
+++ b/drivers/edac/edac_stub.c
@@ -5,7 +5,7 @@
*
* 2007 (c) MontaVista Software, Inc.
* 2010 (c) Advanced Micro Devices Inc.
- * Borislav Petkov <borislav.petkov@amd.com>
+ * Borislav Petkov <bp@alien8.de>
*
* This file is licensed under the terms of the GNU General Public
* License version 2. This program is licensed "as is" without any
@@ -14,7 +14,8 @@
*/
#include <linux/module.h>
#include <linux/edac.h>
-#include <asm/atomic.h>
+#include <linux/atomic.h>
+#include <linux/device.h>
#include <asm/edac.h>
int edac_op_state = EDAC_OPSTATE_INVAL;
@@ -26,7 +27,26 @@ EXPORT_SYMBOL_GPL(edac_handlers);
int edac_err_assert = 0;
EXPORT_SYMBOL_GPL(edac_err_assert);
-static atomic_t edac_class_valid = ATOMIC_INIT(0);
+static atomic_t edac_subsys_valid = ATOMIC_INIT(0);
+
+int edac_report_status = EDAC_REPORTING_ENABLED;
+EXPORT_SYMBOL_GPL(edac_report_status);
+
+static int __init edac_report_setup(char *str)
+{
+ if (!str)
+ return -EINVAL;
+
+ if (!strncmp(str, "on", 2))
+ set_edac_report_status(EDAC_REPORTING_ENABLED);
+ else if (!strncmp(str, "off", 3))
+ set_edac_report_status(EDAC_REPORTING_DISABLED);
+ else if (!strncmp(str, "force", 5))
+ set_edac_report_status(EDAC_REPORTING_FORCE);
+
+ return 0;
+}
+__setup("edac_report=", edac_report_setup);
/*
* called to determine if there is an EDAC driver interested in
@@ -54,36 +74,37 @@ EXPORT_SYMBOL_GPL(edac_atomic_assert_error);
* sysfs object: /sys/devices/system/edac
* need to export to other files
*/
-struct sysdev_class edac_class = {
+struct bus_type edac_subsys = {
.name = "edac",
+ .dev_name = "edac",
};
-EXPORT_SYMBOL_GPL(edac_class);
+EXPORT_SYMBOL_GPL(edac_subsys);
/* return pointer to the 'edac' node in sysfs */
-struct sysdev_class *edac_get_sysfs_class(void)
+struct bus_type *edac_get_sysfs_subsys(void)
{
int err = 0;
- if (atomic_read(&edac_class_valid))
+ if (atomic_read(&edac_subsys_valid))
goto out;
/* create the /sys/devices/system/edac directory */
- err = sysdev_class_register(&edac_class);
+ err = subsys_system_register(&edac_subsys, NULL);
if (err) {
printk(KERN_ERR "Error registering toplevel EDAC sysfs dir\n");
return NULL;
}
out:
- atomic_inc(&edac_class_valid);
- return &edac_class;
+ atomic_inc(&edac_subsys_valid);
+ return &edac_subsys;
}
-EXPORT_SYMBOL_GPL(edac_get_sysfs_class);
+EXPORT_SYMBOL_GPL(edac_get_sysfs_subsys);
-void edac_put_sysfs_class(void)
+void edac_put_sysfs_subsys(void)
{
/* last user unregisters it */
- if (atomic_dec_and_test(&edac_class_valid))
- sysdev_class_unregister(&edac_class);
+ if (atomic_dec_and_test(&edac_subsys_valid))
+ bus_unregister(&edac_subsys);
}
-EXPORT_SYMBOL_GPL(edac_put_sysfs_class);
+EXPORT_SYMBOL_GPL(edac_put_sysfs_subsys);
diff --git a/drivers/edac/ghes_edac.c b/drivers/edac/ghes_edac.c
new file mode 100644
index 00000000000..8399b4e16fe
--- /dev/null
+++ b/drivers/edac/ghes_edac.c
@@ -0,0 +1,547 @@
+/*
+ * GHES/EDAC Linux driver
+ *
+ * This file may be distributed under the terms of the GNU General Public
+ * License version 2.
+ *
+ * Copyright (c) 2013 by Mauro Carvalho Chehab
+ *
+ * Red Hat Inc. http://www.redhat.com
+ */
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
+#include <acpi/ghes.h>
+#include <linux/edac.h>
+#include <linux/dmi.h>
+#include "edac_core.h"
+#include <ras/ras_event.h>
+
+#define GHES_EDAC_REVISION " Ver: 1.0.0"
+
+struct ghes_edac_pvt {
+ struct list_head list;
+ struct ghes *ghes;
+ struct mem_ctl_info *mci;
+
+ /* Buffers for the error handling routine */
+ char detail_location[240];
+ char other_detail[160];
+ char msg[80];
+};
+
+static LIST_HEAD(ghes_reglist);
+static DEFINE_MUTEX(ghes_edac_lock);
+static int ghes_edac_mc_num;
+
+
+/* Memory Device - Type 17 of SMBIOS spec */
+struct memdev_dmi_entry {
+ u8 type;
+ u8 length;
+ u16 handle;
+ u16 phys_mem_array_handle;
+ u16 mem_err_info_handle;
+ u16 total_width;
+ u16 data_width;
+ u16 size;
+ u8 form_factor;
+ u8 device_set;
+ u8 device_locator;
+ u8 bank_locator;
+ u8 memory_type;
+ u16 type_detail;
+ u16 speed;
+ u8 manufacturer;
+ u8 serial_number;
+ u8 asset_tag;
+ u8 part_number;
+ u8 attributes;
+ u32 extended_size;
+ u16 conf_mem_clk_speed;
+} __attribute__((__packed__));
+
+struct ghes_edac_dimm_fill {
+ struct mem_ctl_info *mci;
+ unsigned count;
+};
+
+char *memory_type[] = {
+ [MEM_EMPTY] = "EMPTY",
+ [MEM_RESERVED] = "RESERVED",
+ [MEM_UNKNOWN] = "UNKNOWN",
+ [MEM_FPM] = "FPM",
+ [MEM_EDO] = "EDO",
+ [MEM_BEDO] = "BEDO",
+ [MEM_SDR] = "SDR",
+ [MEM_RDR] = "RDR",
+ [MEM_DDR] = "DDR",
+ [MEM_RDDR] = "RDDR",
+ [MEM_RMBS] = "RMBS",
+ [MEM_DDR2] = "DDR2",
+ [MEM_FB_DDR2] = "FB_DDR2",
+ [MEM_RDDR2] = "RDDR2",
+ [MEM_XDR] = "XDR",
+ [MEM_DDR3] = "DDR3",
+ [MEM_RDDR3] = "RDDR3",
+};
+
+static void ghes_edac_count_dimms(const struct dmi_header *dh, void *arg)
+{
+ int *num_dimm = arg;
+
+ if (dh->type == DMI_ENTRY_MEM_DEVICE)
+ (*num_dimm)++;
+}
+
+static void ghes_edac_dmidecode(const struct dmi_header *dh, void *arg)
+{
+ struct ghes_edac_dimm_fill *dimm_fill = arg;
+ struct mem_ctl_info *mci = dimm_fill->mci;
+
+ if (dh->type == DMI_ENTRY_MEM_DEVICE) {
+ struct memdev_dmi_entry *entry = (struct memdev_dmi_entry *)dh;
+ struct dimm_info *dimm = EDAC_DIMM_PTR(mci->layers, mci->dimms,
+ mci->n_layers,
+ dimm_fill->count, 0, 0);
+
+ if (entry->size == 0xffff) {
+ pr_info("Can't get DIMM%i size\n",
+ dimm_fill->count);
+ dimm->nr_pages = MiB_TO_PAGES(32);/* Unknown */
+ } else if (entry->size == 0x7fff) {
+ dimm->nr_pages = MiB_TO_PAGES(entry->extended_size);
+ } else {
+ if (entry->size & 1 << 15)
+ dimm->nr_pages = MiB_TO_PAGES((entry->size &
+ 0x7fff) << 10);
+ else
+ dimm->nr_pages = MiB_TO_PAGES(entry->size);
+ }
+
+ switch (entry->memory_type) {
+ case 0x12:
+ if (entry->type_detail & 1 << 13)
+ dimm->mtype = MEM_RDDR;
+ else
+ dimm->mtype = MEM_DDR;
+ break;
+ case 0x13:
+ if (entry->type_detail & 1 << 13)
+ dimm->mtype = MEM_RDDR2;
+ else
+ dimm->mtype = MEM_DDR2;
+ break;
+ case 0x14:
+ dimm->mtype = MEM_FB_DDR2;
+ break;
+ case 0x18:
+ if (entry->type_detail & 1 << 13)
+ dimm->mtype = MEM_RDDR3;
+ else
+ dimm->mtype = MEM_DDR3;
+ break;
+ default:
+ if (entry->type_detail & 1 << 6)
+ dimm->mtype = MEM_RMBS;
+ else if ((entry->type_detail & ((1 << 7) | (1 << 13)))
+ == ((1 << 7) | (1 << 13)))
+ dimm->mtype = MEM_RDR;
+ else if (entry->type_detail & 1 << 7)
+ dimm->mtype = MEM_SDR;
+ else if (entry->type_detail & 1 << 9)
+ dimm->mtype = MEM_EDO;
+ else
+ dimm->mtype = MEM_UNKNOWN;
+ }
+
+ /*
+ * Actually, we can only detect if the memory has bits for
+ * checksum or not
+ */
+ if (entry->total_width == entry->data_width)
+ dimm->edac_mode = EDAC_NONE;
+ else
+ dimm->edac_mode = EDAC_SECDED;
+
+ dimm->dtype = DEV_UNKNOWN;
+ dimm->grain = 128; /* Likely, worse case */
+
+ /*
+ * FIXME: It shouldn't be hard to also fill the DIMM labels
+ */
+
+ if (dimm->nr_pages) {
+ edac_dbg(1, "DIMM%i: %s size = %d MB%s\n",
+ dimm_fill->count, memory_type[dimm->mtype],
+ PAGES_TO_MiB(dimm->nr_pages),
+ (dimm->edac_mode != EDAC_NONE) ? "(ECC)" : "");
+ edac_dbg(2, "\ttype %d, detail 0x%02x, width %d(total %d)\n",
+ entry->memory_type, entry->type_detail,
+ entry->total_width, entry->data_width);
+ }
+
+ dimm_fill->count++;
+ }
+}
+
+void ghes_edac_report_mem_error(struct ghes *ghes, int sev,
+ struct cper_sec_mem_err *mem_err)
+{
+ enum hw_event_mc_err_type type;
+ struct edac_raw_error_desc *e;
+ struct mem_ctl_info *mci;
+ struct ghes_edac_pvt *pvt = NULL;
+ char *p;
+ u8 grain_bits;
+
+ list_for_each_entry(pvt, &ghes_reglist, list) {
+ if (ghes == pvt->ghes)
+ break;
+ }
+ if (!pvt) {
+ pr_err("Internal error: Can't find EDAC structure\n");
+ return;
+ }
+ mci = pvt->mci;
+ e = &mci->error_desc;
+
+ /* Cleans the error report buffer */
+ memset(e, 0, sizeof (*e));
+ e->error_count = 1;
+ strcpy(e->label, "unknown label");
+ e->msg = pvt->msg;
+ e->other_detail = pvt->other_detail;
+ e->top_layer = -1;
+ e->mid_layer = -1;
+ e->low_layer = -1;
+ *pvt->other_detail = '\0';
+ *pvt->msg = '\0';
+
+ switch (sev) {
+ case GHES_SEV_CORRECTED:
+ type = HW_EVENT_ERR_CORRECTED;
+ break;
+ case GHES_SEV_RECOVERABLE:
+ type = HW_EVENT_ERR_UNCORRECTED;
+ break;
+ case GHES_SEV_PANIC:
+ type = HW_EVENT_ERR_FATAL;
+ break;
+ default:
+ case GHES_SEV_NO:
+ type = HW_EVENT_ERR_INFO;
+ }
+
+ edac_dbg(1, "error validation_bits: 0x%08llx\n",
+ (long long)mem_err->validation_bits);
+
+ /* Error type, mapped on e->msg */
+ if (mem_err->validation_bits & CPER_MEM_VALID_ERROR_TYPE) {
+ p = pvt->msg;
+ switch (mem_err->error_type) {
+ case 0:
+ p += sprintf(p, "Unknown");
+ break;
+ case 1:
+ p += sprintf(p, "No error");
+ break;
+ case 2:
+ p += sprintf(p, "Single-bit ECC");
+ break;
+ case 3:
+ p += sprintf(p, "Multi-bit ECC");
+ break;
+ case 4:
+ p += sprintf(p, "Single-symbol ChipKill ECC");
+ break;
+ case 5:
+ p += sprintf(p, "Multi-symbol ChipKill ECC");
+ break;
+ case 6:
+ p += sprintf(p, "Master abort");
+ break;
+ case 7:
+ p += sprintf(p, "Target abort");
+ break;
+ case 8:
+ p += sprintf(p, "Parity Error");
+ break;
+ case 9:
+ p += sprintf(p, "Watchdog timeout");
+ break;
+ case 10:
+ p += sprintf(p, "Invalid address");
+ break;
+ case 11:
+ p += sprintf(p, "Mirror Broken");
+ break;
+ case 12:
+ p += sprintf(p, "Memory Sparing");
+ break;
+ case 13:
+ p += sprintf(p, "Scrub corrected error");
+ break;
+ case 14:
+ p += sprintf(p, "Scrub uncorrected error");
+ break;
+ case 15:
+ p += sprintf(p, "Physical Memory Map-out event");
+ break;
+ default:
+ p += sprintf(p, "reserved error (%d)",
+ mem_err->error_type);
+ }
+ } else {
+ strcpy(pvt->msg, "unknown error");
+ }
+
+ /* Error address */
+ if (mem_err->validation_bits & CPER_MEM_VALID_PA) {
+ e->page_frame_number = mem_err->physical_addr >> PAGE_SHIFT;
+ e->offset_in_page = mem_err->physical_addr & ~PAGE_MASK;
+ }
+
+ /* Error grain */
+ if (mem_err->validation_bits & CPER_MEM_VALID_PA_MASK)
+ e->grain = ~(mem_err->physical_addr_mask & ~PAGE_MASK);
+
+ /* Memory error location, mapped on e->location */
+ p = e->location;
+ if (mem_err->validation_bits & CPER_MEM_VALID_NODE)
+ p += sprintf(p, "node:%d ", mem_err->node);
+ if (mem_err->validation_bits & CPER_MEM_VALID_CARD)
+ p += sprintf(p, "card:%d ", mem_err->card);
+ if (mem_err->validation_bits & CPER_MEM_VALID_MODULE)
+ p += sprintf(p, "module:%d ", mem_err->module);
+ if (mem_err->validation_bits & CPER_MEM_VALID_RANK_NUMBER)
+ p += sprintf(p, "rank:%d ", mem_err->rank);
+ if (mem_err->validation_bits & CPER_MEM_VALID_BANK)
+ p += sprintf(p, "bank:%d ", mem_err->bank);
+ if (mem_err->validation_bits & CPER_MEM_VALID_ROW)
+ p += sprintf(p, "row:%d ", mem_err->row);
+ if (mem_err->validation_bits & CPER_MEM_VALID_COLUMN)
+ p += sprintf(p, "col:%d ", mem_err->column);
+ if (mem_err->validation_bits & CPER_MEM_VALID_BIT_POSITION)
+ p += sprintf(p, "bit_pos:%d ", mem_err->bit_pos);
+ if (mem_err->validation_bits & CPER_MEM_VALID_MODULE_HANDLE) {
+ const char *bank = NULL, *device = NULL;
+ dmi_memdev_name(mem_err->mem_dev_handle, &bank, &device);
+ if (bank != NULL && device != NULL)
+ p += sprintf(p, "DIMM location:%s %s ", bank, device);
+ else
+ p += sprintf(p, "DIMM DMI handle: 0x%.4x ",
+ mem_err->mem_dev_handle);
+ }
+ if (p > e->location)
+ *(p - 1) = '\0';
+
+ /* All other fields are mapped on e->other_detail */
+ p = pvt->other_detail;
+ if (mem_err->validation_bits & CPER_MEM_VALID_ERROR_STATUS) {
+ u64 status = mem_err->error_status;
+
+ p += sprintf(p, "status(0x%016llx): ", (long long)status);
+ switch ((status >> 8) & 0xff) {
+ case 1:
+ p += sprintf(p, "Error detected internal to the component ");
+ break;
+ case 16:
+ p += sprintf(p, "Error detected in the bus ");
+ break;
+ case 4:
+ p += sprintf(p, "Storage error in DRAM memory ");
+ break;
+ case 5:
+ p += sprintf(p, "Storage error in TLB ");
+ break;
+ case 6:
+ p += sprintf(p, "Storage error in cache ");
+ break;
+ case 7:
+ p += sprintf(p, "Error in one or more functional units ");
+ break;
+ case 8:
+ p += sprintf(p, "component failed self test ");
+ break;
+ case 9:
+ p += sprintf(p, "Overflow or undervalue of internal queue ");
+ break;
+ case 17:
+ p += sprintf(p, "Virtual address not found on IO-TLB or IO-PDIR ");
+ break;
+ case 18:
+ p += sprintf(p, "Improper access error ");
+ break;
+ case 19:
+ p += sprintf(p, "Access to a memory address which is not mapped to any component ");
+ break;
+ case 20:
+ p += sprintf(p, "Loss of Lockstep ");
+ break;
+ case 21:
+ p += sprintf(p, "Response not associated with a request ");
+ break;
+ case 22:
+ p += sprintf(p, "Bus parity error - must also set the A, C, or D Bits ");
+ break;
+ case 23:
+ p += sprintf(p, "Detection of a PATH_ERROR ");
+ break;
+ case 25:
+ p += sprintf(p, "Bus operation timeout ");
+ break;
+ case 26:
+ p += sprintf(p, "A read was issued to data that has been poisoned ");
+ break;
+ default:
+ p += sprintf(p, "reserved ");
+ break;
+ }
+ }
+ if (mem_err->validation_bits & CPER_MEM_VALID_REQUESTOR_ID)
+ p += sprintf(p, "requestorID: 0x%016llx ",
+ (long long)mem_err->requestor_id);
+ if (mem_err->validation_bits & CPER_MEM_VALID_RESPONDER_ID)
+ p += sprintf(p, "responderID: 0x%016llx ",
+ (long long)mem_err->responder_id);
+ if (mem_err->validation_bits & CPER_MEM_VALID_TARGET_ID)
+ p += sprintf(p, "targetID: 0x%016llx ",
+ (long long)mem_err->responder_id);
+ if (p > pvt->other_detail)
+ *(p - 1) = '\0';
+
+ /* Generate the trace event */
+ grain_bits = fls_long(e->grain);
+ sprintf(pvt->detail_location, "APEI location: %s %s",
+ e->location, e->other_detail);
+ trace_mc_event(type, e->msg, e->label, e->error_count,
+ mci->mc_idx, e->top_layer, e->mid_layer, e->low_layer,
+ PAGES_TO_MiB(e->page_frame_number) | e->offset_in_page,
+ grain_bits, e->syndrome, pvt->detail_location);
+
+ /* Report the error via EDAC API */
+ edac_raw_mc_handle_error(type, mci, e);
+}
+EXPORT_SYMBOL_GPL(ghes_edac_report_mem_error);
+
+int ghes_edac_register(struct ghes *ghes, struct device *dev)
+{
+ bool fake = false;
+ int rc, num_dimm = 0;
+ struct mem_ctl_info *mci;
+ struct edac_mc_layer layers[1];
+ struct ghes_edac_pvt *pvt;
+ struct ghes_edac_dimm_fill dimm_fill;
+
+ /* Get the number of DIMMs */
+ dmi_walk(ghes_edac_count_dimms, &num_dimm);
+
+ /* Check if we've got a bogus BIOS */
+ if (num_dimm == 0) {
+ fake = true;
+ num_dimm = 1;
+ }
+
+ layers[0].type = EDAC_MC_LAYER_ALL_MEM;
+ layers[0].size = num_dimm;
+ layers[0].is_virt_csrow = true;
+
+ /*
+ * We need to serialize edac_mc_alloc() and edac_mc_add_mc(),
+ * to avoid duplicated memory controller numbers
+ */
+ mutex_lock(&ghes_edac_lock);
+ mci = edac_mc_alloc(ghes_edac_mc_num, ARRAY_SIZE(layers), layers,
+ sizeof(*pvt));
+ if (!mci) {
+ pr_info("Can't allocate memory for EDAC data\n");
+ mutex_unlock(&ghes_edac_lock);
+ return -ENOMEM;
+ }
+
+ pvt = mci->pvt_info;
+ memset(pvt, 0, sizeof(*pvt));
+ list_add_tail(&pvt->list, &ghes_reglist);
+ pvt->ghes = ghes;
+ pvt->mci = mci;
+ mci->pdev = dev;
+
+ mci->mtype_cap = MEM_FLAG_EMPTY;
+ mci->edac_ctl_cap = EDAC_FLAG_NONE;
+ mci->edac_cap = EDAC_FLAG_NONE;
+ mci->mod_name = "ghes_edac.c";
+ mci->mod_ver = GHES_EDAC_REVISION;
+ mci->ctl_name = "ghes_edac";
+ mci->dev_name = "ghes";
+
+ if (!ghes_edac_mc_num) {
+ if (!fake) {
+ pr_info("This EDAC driver relies on BIOS to enumerate memory and get error reports.\n");
+ pr_info("Unfortunately, not all BIOSes reflect the memory layout correctly.\n");
+ pr_info("So, the end result of using this driver varies from vendor to vendor.\n");
+ pr_info("If you find incorrect reports, please contact your hardware vendor\n");
+ pr_info("to correct its BIOS.\n");
+ pr_info("This system has %d DIMM sockets.\n",
+ num_dimm);
+ } else {
+ pr_info("This system has a very crappy BIOS: It doesn't even list the DIMMS.\n");
+ pr_info("Its SMBIOS info is wrong. It is doubtful that the error report would\n");
+ pr_info("work on such system. Use this driver with caution\n");
+ }
+ }
+
+ if (!fake) {
+ /*
+ * Fill DIMM info from DMI for the memory controller #0
+ *
+ * Keep it in blank for the other memory controllers, as
+ * there's no reliable way to properly credit each DIMM to
+ * the memory controller, as different BIOSes fill the
+ * DMI bank location fields on different ways
+ */
+ if (!ghes_edac_mc_num) {
+ dimm_fill.count = 0;
+ dimm_fill.mci = mci;
+ dmi_walk(ghes_edac_dmidecode, &dimm_fill);
+ }
+ } else {
+ struct dimm_info *dimm = EDAC_DIMM_PTR(mci->layers, mci->dimms,
+ mci->n_layers, 0, 0, 0);
+
+ dimm->nr_pages = 1;
+ dimm->grain = 128;
+ dimm->mtype = MEM_UNKNOWN;
+ dimm->dtype = DEV_UNKNOWN;
+ dimm->edac_mode = EDAC_SECDED;
+ }
+
+ rc = edac_mc_add_mc(mci);
+ if (rc < 0) {
+ pr_info("Can't register at EDAC core\n");
+ edac_mc_free(mci);
+ mutex_unlock(&ghes_edac_lock);
+ return -ENODEV;
+ }
+
+ ghes_edac_mc_num++;
+ mutex_unlock(&ghes_edac_lock);
+ return 0;
+}
+EXPORT_SYMBOL_GPL(ghes_edac_register);
+
+void ghes_edac_unregister(struct ghes *ghes)
+{
+ struct mem_ctl_info *mci;
+ struct ghes_edac_pvt *pvt, *tmp;
+
+ list_for_each_entry_safe(pvt, tmp, &ghes_reglist, list) {
+ if (ghes == pvt->ghes) {
+ mci = pvt->mci;
+ edac_mc_del_mc(mci->pdev);
+ edac_mc_free(mci);
+ list_del(&pvt->list);
+ }
+ }
+}
+EXPORT_SYMBOL_GPL(ghes_edac_unregister);
diff --git a/drivers/edac/highbank_l2_edac.c b/drivers/edac/highbank_l2_edac.c
new file mode 100644
index 00000000000..2f193668ebc
--- /dev/null
+++ b/drivers/edac/highbank_l2_edac.c
@@ -0,0 +1,154 @@
+/*
+ * Copyright 2011-2012 Calxeda, Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/ctype.h>
+#include <linux/edac.h>
+#include <linux/interrupt.h>
+#include <linux/platform_device.h>
+#include <linux/of_platform.h>
+
+#include "edac_core.h"
+#include "edac_module.h"
+
+#define SR_CLR_SB_ECC_INTR 0x0
+#define SR_CLR_DB_ECC_INTR 0x4
+
+struct hb_l2_drvdata {
+ void __iomem *base;
+ int sb_irq;
+ int db_irq;
+};
+
+static irqreturn_t highbank_l2_err_handler(int irq, void *dev_id)
+{
+ struct edac_device_ctl_info *dci = dev_id;
+ struct hb_l2_drvdata *drvdata = dci->pvt_info;
+
+ if (irq == drvdata->sb_irq) {
+ writel(1, drvdata->base + SR_CLR_SB_ECC_INTR);
+ edac_device_handle_ce(dci, 0, 0, dci->ctl_name);
+ }
+ if (irq == drvdata->db_irq) {
+ writel(1, drvdata->base + SR_CLR_DB_ECC_INTR);
+ edac_device_handle_ue(dci, 0, 0, dci->ctl_name);
+ }
+
+ return IRQ_HANDLED;
+}
+
+static const struct of_device_id hb_l2_err_of_match[] = {
+ { .compatible = "calxeda,hb-sregs-l2-ecc", },
+ {},
+};
+MODULE_DEVICE_TABLE(of, hb_l2_err_of_match);
+
+static int highbank_l2_err_probe(struct platform_device *pdev)
+{
+ const struct of_device_id *id;
+ struct edac_device_ctl_info *dci;
+ struct hb_l2_drvdata *drvdata;
+ struct resource *r;
+ int res = 0;
+
+ dci = edac_device_alloc_ctl_info(sizeof(*drvdata), "cpu",
+ 1, "L", 1, 2, NULL, 0, 0);
+ if (!dci)
+ return -ENOMEM;
+
+ drvdata = dci->pvt_info;
+ dci->dev = &pdev->dev;
+ platform_set_drvdata(pdev, dci);
+
+ if (!devres_open_group(&pdev->dev, NULL, GFP_KERNEL))
+ return -ENOMEM;
+
+ r = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+ if (!r) {
+ dev_err(&pdev->dev, "Unable to get mem resource\n");
+ res = -ENODEV;
+ goto err;
+ }
+
+ if (!devm_request_mem_region(&pdev->dev, r->start,
+ resource_size(r), dev_name(&pdev->dev))) {
+ dev_err(&pdev->dev, "Error while requesting mem region\n");
+ res = -EBUSY;
+ goto err;
+ }
+
+ drvdata->base = devm_ioremap(&pdev->dev, r->start, resource_size(r));
+ if (!drvdata->base) {
+ dev_err(&pdev->dev, "Unable to map regs\n");
+ res = -ENOMEM;
+ goto err;
+ }
+
+ id = of_match_device(hb_l2_err_of_match, &pdev->dev);
+ dci->mod_name = pdev->dev.driver->name;
+ dci->ctl_name = id ? id->compatible : "unknown";
+ dci->dev_name = dev_name(&pdev->dev);
+
+ if (edac_device_add_device(dci))
+ goto err;
+
+ drvdata->db_irq = platform_get_irq(pdev, 0);
+ res = devm_request_irq(&pdev->dev, drvdata->db_irq,
+ highbank_l2_err_handler,
+ 0, dev_name(&pdev->dev), dci);
+ if (res < 0)
+ goto err2;
+
+ drvdata->sb_irq = platform_get_irq(pdev, 1);
+ res = devm_request_irq(&pdev->dev, drvdata->sb_irq,
+ highbank_l2_err_handler,
+ 0, dev_name(&pdev->dev), dci);
+ if (res < 0)
+ goto err2;
+
+ devres_close_group(&pdev->dev, NULL);
+ return 0;
+err2:
+ edac_device_del_device(&pdev->dev);
+err:
+ devres_release_group(&pdev->dev, NULL);
+ edac_device_free_ctl_info(dci);
+ return res;
+}
+
+static int highbank_l2_err_remove(struct platform_device *pdev)
+{
+ struct edac_device_ctl_info *dci = platform_get_drvdata(pdev);
+
+ edac_device_del_device(&pdev->dev);
+ edac_device_free_ctl_info(dci);
+ return 0;
+}
+
+static struct platform_driver highbank_l2_edac_driver = {
+ .probe = highbank_l2_err_probe,
+ .remove = highbank_l2_err_remove,
+ .driver = {
+ .name = "hb_l2_edac",
+ .of_match_table = hb_l2_err_of_match,
+ },
+};
+
+module_platform_driver(highbank_l2_edac_driver);
+
+MODULE_LICENSE("GPL v2");
+MODULE_AUTHOR("Calxeda, Inc.");
+MODULE_DESCRIPTION("EDAC Driver for Calxeda Highbank L2 Cache");
diff --git a/drivers/edac/highbank_mc_edac.c b/drivers/edac/highbank_mc_edac.c
new file mode 100644
index 00000000000..f784de1dc79
--- /dev/null
+++ b/drivers/edac/highbank_mc_edac.c
@@ -0,0 +1,281 @@
+/*
+ * Copyright 2011-2012 Calxeda, Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/ctype.h>
+#include <linux/edac.h>
+#include <linux/interrupt.h>
+#include <linux/platform_device.h>
+#include <linux/of_platform.h>
+#include <linux/uaccess.h>
+
+#include "edac_core.h"
+#include "edac_module.h"
+
+/* DDR Ctrlr Error Registers */
+
+#define HB_DDR_ECC_ERR_BASE 0x128
+#define MW_DDR_ECC_ERR_BASE 0x1b4
+
+#define HB_DDR_ECC_OPT 0x00
+#define HB_DDR_ECC_U_ERR_ADDR 0x08
+#define HB_DDR_ECC_U_ERR_STAT 0x0c
+#define HB_DDR_ECC_U_ERR_DATAL 0x10
+#define HB_DDR_ECC_U_ERR_DATAH 0x14
+#define HB_DDR_ECC_C_ERR_ADDR 0x18
+#define HB_DDR_ECC_C_ERR_STAT 0x1c
+#define HB_DDR_ECC_C_ERR_DATAL 0x20
+#define HB_DDR_ECC_C_ERR_DATAH 0x24
+
+#define HB_DDR_ECC_OPT_MODE_MASK 0x3
+#define HB_DDR_ECC_OPT_FWC 0x100
+#define HB_DDR_ECC_OPT_XOR_SHIFT 16
+
+/* DDR Ctrlr Interrupt Registers */
+
+#define HB_DDR_ECC_INT_BASE 0x180
+#define MW_DDR_ECC_INT_BASE 0x218
+
+#define HB_DDR_ECC_INT_STATUS 0x00
+#define HB_DDR_ECC_INT_ACK 0x04
+
+#define HB_DDR_ECC_INT_STAT_CE 0x8
+#define HB_DDR_ECC_INT_STAT_DOUBLE_CE 0x10
+#define HB_DDR_ECC_INT_STAT_UE 0x20
+#define HB_DDR_ECC_INT_STAT_DOUBLE_UE 0x40
+
+struct hb_mc_drvdata {
+ void __iomem *mc_err_base;
+ void __iomem *mc_int_base;
+};
+
+static irqreturn_t highbank_mc_err_handler(int irq, void *dev_id)
+{
+ struct mem_ctl_info *mci = dev_id;
+ struct hb_mc_drvdata *drvdata = mci->pvt_info;
+ u32 status, err_addr;
+
+ /* Read the interrupt status register */
+ status = readl(drvdata->mc_int_base + HB_DDR_ECC_INT_STATUS);
+
+ if (status & HB_DDR_ECC_INT_STAT_UE) {
+ err_addr = readl(drvdata->mc_err_base + HB_DDR_ECC_U_ERR_ADDR);
+ edac_mc_handle_error(HW_EVENT_ERR_UNCORRECTED, mci, 1,
+ err_addr >> PAGE_SHIFT,
+ err_addr & ~PAGE_MASK, 0,
+ 0, 0, -1,
+ mci->ctl_name, "");
+ }
+ if (status & HB_DDR_ECC_INT_STAT_CE) {
+ u32 syndrome = readl(drvdata->mc_err_base + HB_DDR_ECC_C_ERR_STAT);
+ syndrome = (syndrome >> 8) & 0xff;
+ err_addr = readl(drvdata->mc_err_base + HB_DDR_ECC_C_ERR_ADDR);
+ edac_mc_handle_error(HW_EVENT_ERR_CORRECTED, mci, 1,
+ err_addr >> PAGE_SHIFT,
+ err_addr & ~PAGE_MASK, syndrome,
+ 0, 0, -1,
+ mci->ctl_name, "");
+ }
+
+ /* clear the error, clears the interrupt */
+ writel(status, drvdata->mc_int_base + HB_DDR_ECC_INT_ACK);
+ return IRQ_HANDLED;
+}
+
+static void highbank_mc_err_inject(struct mem_ctl_info *mci, u8 synd)
+{
+ struct hb_mc_drvdata *pdata = mci->pvt_info;
+ u32 reg;
+
+ reg = readl(pdata->mc_err_base + HB_DDR_ECC_OPT);
+ reg &= HB_DDR_ECC_OPT_MODE_MASK;
+ reg |= (synd << HB_DDR_ECC_OPT_XOR_SHIFT) | HB_DDR_ECC_OPT_FWC;
+ writel(reg, pdata->mc_err_base + HB_DDR_ECC_OPT);
+}
+
+#define to_mci(k) container_of(k, struct mem_ctl_info, dev)
+
+static ssize_t highbank_mc_inject_ctrl(struct device *dev,
+ struct device_attribute *attr, const char *buf, size_t count)
+{
+ struct mem_ctl_info *mci = to_mci(dev);
+ u8 synd;
+
+ if (kstrtou8(buf, 16, &synd))
+ return -EINVAL;
+
+ highbank_mc_err_inject(mci, synd);
+
+ return count;
+}
+
+static DEVICE_ATTR(inject_ctrl, S_IWUSR, NULL, highbank_mc_inject_ctrl);
+
+struct hb_mc_settings {
+ int err_offset;
+ int int_offset;
+};
+
+static struct hb_mc_settings hb_settings = {
+ .err_offset = HB_DDR_ECC_ERR_BASE,
+ .int_offset = HB_DDR_ECC_INT_BASE,
+};
+
+static struct hb_mc_settings mw_settings = {
+ .err_offset = MW_DDR_ECC_ERR_BASE,
+ .int_offset = MW_DDR_ECC_INT_BASE,
+};
+
+static struct of_device_id hb_ddr_ctrl_of_match[] = {
+ { .compatible = "calxeda,hb-ddr-ctrl", .data = &hb_settings },
+ { .compatible = "calxeda,ecx-2000-ddr-ctrl", .data = &mw_settings },
+ {},
+};
+MODULE_DEVICE_TABLE(of, hb_ddr_ctrl_of_match);
+
+static int highbank_mc_probe(struct platform_device *pdev)
+{
+ const struct of_device_id *id;
+ const struct hb_mc_settings *settings;
+ struct edac_mc_layer layers[2];
+ struct mem_ctl_info *mci;
+ struct hb_mc_drvdata *drvdata;
+ struct dimm_info *dimm;
+ struct resource *r;
+ void __iomem *base;
+ u32 control;
+ int irq;
+ int res = 0;
+
+ id = of_match_device(hb_ddr_ctrl_of_match, &pdev->dev);
+ if (!id)
+ return -ENODEV;
+
+ layers[0].type = EDAC_MC_LAYER_CHIP_SELECT;
+ layers[0].size = 1;
+ layers[0].is_virt_csrow = true;
+ layers[1].type = EDAC_MC_LAYER_CHANNEL;
+ layers[1].size = 1;
+ layers[1].is_virt_csrow = false;
+ mci = edac_mc_alloc(0, ARRAY_SIZE(layers), layers,
+ sizeof(struct hb_mc_drvdata));
+ if (!mci)
+ return -ENOMEM;
+
+ mci->pdev = &pdev->dev;
+ drvdata = mci->pvt_info;
+ platform_set_drvdata(pdev, mci);
+
+ if (!devres_open_group(&pdev->dev, NULL, GFP_KERNEL))
+ return -ENOMEM;
+
+ r = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+ if (!r) {
+ dev_err(&pdev->dev, "Unable to get mem resource\n");
+ res = -ENODEV;
+ goto err;
+ }
+
+ if (!devm_request_mem_region(&pdev->dev, r->start,
+ resource_size(r), dev_name(&pdev->dev))) {
+ dev_err(&pdev->dev, "Error while requesting mem region\n");
+ res = -EBUSY;
+ goto err;
+ }
+
+ base = devm_ioremap(&pdev->dev, r->start, resource_size(r));
+ if (!base) {
+ dev_err(&pdev->dev, "Unable to map regs\n");
+ res = -ENOMEM;
+ goto err;
+ }
+
+ settings = id->data;
+ drvdata->mc_err_base = base + settings->err_offset;
+ drvdata->mc_int_base = base + settings->int_offset;
+
+ control = readl(drvdata->mc_err_base + HB_DDR_ECC_OPT) & 0x3;
+ if (!control || (control == 0x2)) {
+ dev_err(&pdev->dev, "No ECC present, or ECC disabled\n");
+ res = -ENODEV;
+ goto err;
+ }
+
+ mci->mtype_cap = MEM_FLAG_DDR3;
+ mci->edac_ctl_cap = EDAC_FLAG_NONE | EDAC_FLAG_SECDED;
+ mci->edac_cap = EDAC_FLAG_SECDED;
+ mci->mod_name = pdev->dev.driver->name;
+ mci->mod_ver = "1";
+ mci->ctl_name = id->compatible;
+ mci->dev_name = dev_name(&pdev->dev);
+ mci->scrub_mode = SCRUB_SW_SRC;
+
+ /* Only a single 4GB DIMM is supported */
+ dimm = *mci->dimms;
+ dimm->nr_pages = (~0UL >> PAGE_SHIFT) + 1;
+ dimm->grain = 8;
+ dimm->dtype = DEV_X8;
+ dimm->mtype = MEM_DDR3;
+ dimm->edac_mode = EDAC_SECDED;
+
+ res = edac_mc_add_mc(mci);
+ if (res < 0)
+ goto err;
+
+ irq = platform_get_irq(pdev, 0);
+ res = devm_request_irq(&pdev->dev, irq, highbank_mc_err_handler,
+ 0, dev_name(&pdev->dev), mci);
+ if (res < 0) {
+ dev_err(&pdev->dev, "Unable to request irq %d\n", irq);
+ goto err2;
+ }
+
+ device_create_file(&mci->dev, &dev_attr_inject_ctrl);
+
+ devres_close_group(&pdev->dev, NULL);
+ return 0;
+err2:
+ edac_mc_del_mc(&pdev->dev);
+err:
+ devres_release_group(&pdev->dev, NULL);
+ edac_mc_free(mci);
+ return res;
+}
+
+static int highbank_mc_remove(struct platform_device *pdev)
+{
+ struct mem_ctl_info *mci = platform_get_drvdata(pdev);
+
+ device_remove_file(&mci->dev, &dev_attr_inject_ctrl);
+ edac_mc_del_mc(&pdev->dev);
+ edac_mc_free(mci);
+ return 0;
+}
+
+static struct platform_driver highbank_mc_edac_driver = {
+ .probe = highbank_mc_probe,
+ .remove = highbank_mc_remove,
+ .driver = {
+ .name = "hb_mc_edac",
+ .of_match_table = hb_ddr_ctrl_of_match,
+ },
+};
+
+module_platform_driver(highbank_mc_edac_driver);
+
+MODULE_LICENSE("GPL v2");
+MODULE_AUTHOR("Calxeda, Inc.");
+MODULE_DESCRIPTION("EDAC Driver for Calxeda Highbank");
diff --git a/drivers/edac/i3000_edac.c b/drivers/edac/i3000_edac.c
index c0510b3d703..cd28b968e5c 100644
--- a/drivers/edac/i3000_edac.c
+++ b/drivers/edac/i3000_edac.c
@@ -194,7 +194,7 @@ static void i3000_get_error_info(struct mem_ctl_info *mci,
{
struct pci_dev *pdev;
- pdev = to_pci_dev(mci->dev);
+ pdev = to_pci_dev(mci->pdev);
/*
* This is a mess because there is no atomic way to read all the
@@ -236,7 +236,7 @@ static int i3000_process_error_info(struct mem_ctl_info *mci,
int row, multi_chan, channel;
unsigned long pfn, offset;
- multi_chan = mci->csrows[0].nr_channels - 1;
+ multi_chan = mci->csrows[0]->nr_channels - 1;
if (!(info->errsts & I3000_ERRSTS_BITS))
return 0;
@@ -245,7 +245,9 @@ static int i3000_process_error_info(struct mem_ctl_info *mci,
return 1;
if ((info->errsts ^ info->errsts2) & I3000_ERRSTS_BITS) {
- edac_mc_handle_ce_no_info(mci, "UE overwrote CE");
+ edac_mc_handle_error(HW_EVENT_ERR_UNCORRECTED, mci, 1, 0, 0, 0,
+ -1, -1, -1,
+ "UE overwrote CE", "");
info->errsts = info->errsts2;
}
@@ -256,10 +258,15 @@ static int i3000_process_error_info(struct mem_ctl_info *mci,
row = edac_mc_find_csrow_by_page(mci, pfn);
if (info->errsts & I3000_ERRSTS_UE)
- edac_mc_handle_ue(mci, pfn, offset, row, "i3000 UE");
+ edac_mc_handle_error(HW_EVENT_ERR_UNCORRECTED, mci, 1,
+ pfn, offset, 0,
+ row, -1, -1,
+ "i3000 UE", "");
else
- edac_mc_handle_ce(mci, pfn, offset, info->derrsyn, row,
- multi_chan ? channel : 0, "i3000 CE");
+ edac_mc_handle_error(HW_EVENT_ERR_CORRECTED, mci, 1,
+ pfn, offset, info->derrsyn,
+ row, multi_chan ? channel : 0, -1,
+ "i3000 CE", "");
return 1;
}
@@ -268,7 +275,7 @@ static void i3000_check(struct mem_ctl_info *mci)
{
struct i3000_error_info info;
- debugf1("MC%d: %s()\n", mci->mc_idx, __func__);
+ edac_dbg(1, "MC%d\n", mci->mc_idx);
i3000_get_error_info(mci, &info);
i3000_process_error_info(mci, &info, 1);
}
@@ -304,9 +311,10 @@ static int i3000_is_interleaved(const unsigned char *c0dra,
static int i3000_probe1(struct pci_dev *pdev, int dev_idx)
{
int rc;
- int i;
+ int i, j;
struct mem_ctl_info *mci = NULL;
- unsigned long last_cumul_size;
+ struct edac_mc_layer layers[2];
+ unsigned long last_cumul_size, nr_pages;
int interleaved, nr_channels;
unsigned char dra[I3000_RANKS / 2], drb[I3000_RANKS];
unsigned char *c0dra = dra, *c1dra = &dra[I3000_RANKS_PER_CHANNEL / 2];
@@ -314,7 +322,7 @@ static int i3000_probe1(struct pci_dev *pdev, int dev_idx)
unsigned long mchbar;
void __iomem *window;
- debugf0("MC: %s()\n", __func__);
+ edac_dbg(0, "MC:\n");
pci_read_config_dword(pdev, I3000_MCHBAR, (u32 *) & mchbar);
mchbar &= I3000_MCHBAR_MASK;
@@ -347,13 +355,20 @@ static int i3000_probe1(struct pci_dev *pdev, int dev_idx)
*/
interleaved = i3000_is_interleaved(c0dra, c1dra, c0drb, c1drb);
nr_channels = interleaved ? 2 : 1;
- mci = edac_mc_alloc(0, I3000_RANKS / nr_channels, nr_channels, 0);
+
+ layers[0].type = EDAC_MC_LAYER_CHIP_SELECT;
+ layers[0].size = I3000_RANKS / nr_channels;
+ layers[0].is_virt_csrow = true;
+ layers[1].type = EDAC_MC_LAYER_CHANNEL;
+ layers[1].size = nr_channels;
+ layers[1].is_virt_csrow = false;
+ mci = edac_mc_alloc(0, ARRAY_SIZE(layers), layers, 0);
if (!mci)
return -ENOMEM;
- debugf3("MC: %s(): init mci\n", __func__);
+ edac_dbg(3, "MC: init mci\n");
- mci->dev = &pdev->dev;
+ mci->pdev = &pdev->dev;
mci->mtype_cap = MEM_FLAG_DDR2;
mci->edac_ctl_cap = EDAC_FLAG_SECDED;
@@ -378,27 +393,30 @@ static int i3000_probe1(struct pci_dev *pdev, int dev_idx)
for (last_cumul_size = i = 0; i < mci->nr_csrows; i++) {
u8 value;
u32 cumul_size;
- struct csrow_info *csrow = &mci->csrows[i];
+ struct csrow_info *csrow = mci->csrows[i];
value = drb[i];
cumul_size = value << (I3000_DRB_SHIFT - PAGE_SHIFT);
if (interleaved)
cumul_size <<= 1;
- debugf3("MC: %s(): (%d) cumul_size 0x%x\n",
- __func__, i, cumul_size);
- if (cumul_size == last_cumul_size) {
- csrow->mtype = MEM_EMPTY;
+ edac_dbg(3, "MC: (%d) cumul_size 0x%x\n", i, cumul_size);
+ if (cumul_size == last_cumul_size)
continue;
- }
csrow->first_page = last_cumul_size;
csrow->last_page = cumul_size - 1;
- csrow->nr_pages = cumul_size - last_cumul_size;
+ nr_pages = cumul_size - last_cumul_size;
last_cumul_size = cumul_size;
- csrow->grain = I3000_DEAP_GRAIN;
- csrow->mtype = MEM_DDR2;
- csrow->dtype = DEV_UNKNOWN;
- csrow->edac_mode = EDAC_UNKNOWN;
+
+ for (j = 0; j < nr_channels; j++) {
+ struct dimm_info *dimm = csrow->channels[j]->dimm;
+
+ dimm->nr_pages = nr_pages / nr_channels;
+ dimm->grain = I3000_DEAP_GRAIN;
+ dimm->mtype = MEM_DDR2;
+ dimm->dtype = DEV_UNKNOWN;
+ dimm->edac_mode = EDAC_UNKNOWN;
+ }
}
/*
@@ -410,7 +428,7 @@ static int i3000_probe1(struct pci_dev *pdev, int dev_idx)
rc = -ENODEV;
if (edac_mc_add_mc(mci)) {
- debugf3("MC: %s(): failed edac_mc_add_mc()\n", __func__);
+ edac_dbg(3, "MC: failed edac_mc_add_mc()\n");
goto fail;
}
@@ -426,7 +444,7 @@ static int i3000_probe1(struct pci_dev *pdev, int dev_idx)
}
/* get this far and it's successful */
- debugf3("MC: %s(): success\n", __func__);
+ edac_dbg(3, "MC: success\n");
return 0;
fail:
@@ -437,12 +455,11 @@ fail:
}
/* returns count (>= 0), or negative on error */
-static int __devinit i3000_init_one(struct pci_dev *pdev,
- const struct pci_device_id *ent)
+static int i3000_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
{
int rc;
- debugf0("MC: %s()\n", __func__);
+ edac_dbg(0, "MC:\n");
if (pci_enable_device(pdev) < 0)
return -EIO;
@@ -454,11 +471,11 @@ static int __devinit i3000_init_one(struct pci_dev *pdev,
return rc;
}
-static void __devexit i3000_remove_one(struct pci_dev *pdev)
+static void i3000_remove_one(struct pci_dev *pdev)
{
struct mem_ctl_info *mci;
- debugf0("%s()\n", __func__);
+ edac_dbg(0, "\n");
if (i3000_pci)
edac_pci_release_generic_ctl(i3000_pci);
@@ -470,7 +487,7 @@ static void __devexit i3000_remove_one(struct pci_dev *pdev)
edac_mc_free(mci);
}
-static const struct pci_device_id i3000_pci_tbl[] __devinitdata = {
+static const struct pci_device_id i3000_pci_tbl[] = {
{
PCI_VEND_DEV(INTEL, 3000_HB), PCI_ANY_ID, PCI_ANY_ID, 0, 0,
I3000},
@@ -484,7 +501,7 @@ MODULE_DEVICE_TABLE(pci, i3000_pci_tbl);
static struct pci_driver i3000_driver = {
.name = EDAC_MOD_STR,
.probe = i3000_init_one,
- .remove = __devexit_p(i3000_remove_one),
+ .remove = i3000_remove_one,
.id_table = i3000_pci_tbl,
};
@@ -492,7 +509,7 @@ static int __init i3000_init(void)
{
int pci_rc;
- debugf3("MC: %s()\n", __func__);
+ edac_dbg(3, "MC:\n");
/* Ensure that the OPSTATE is set correctly for POLL or NMI */
opstate_init();
@@ -506,14 +523,14 @@ static int __init i3000_init(void)
mci_pdev = pci_get_device(PCI_VENDOR_ID_INTEL,
PCI_DEVICE_ID_INTEL_3000_HB, NULL);
if (!mci_pdev) {
- debugf0("i3000 pci_get_device fail\n");
+ edac_dbg(0, "i3000 pci_get_device fail\n");
pci_rc = -ENODEV;
goto fail1;
}
pci_rc = i3000_init_one(mci_pdev, i3000_pci_tbl);
if (pci_rc < 0) {
- debugf0("i3000 init fail\n");
+ edac_dbg(0, "i3000 init fail\n");
pci_rc = -ENODEV;
goto fail1;
}
@@ -533,7 +550,7 @@ fail0:
static void __exit i3000_exit(void)
{
- debugf3("MC: %s()\n", __func__);
+ edac_dbg(3, "MC:\n");
pci_unregister_driver(&i3000_driver);
if (!i3000_registered) {
diff --git a/drivers/edac/i3200_edac.c b/drivers/edac/i3200_edac.c
index d41f9002da4..022a70273ad 100644
--- a/drivers/edac/i3200_edac.c
+++ b/drivers/edac/i3200_edac.c
@@ -15,12 +15,15 @@
#include <linux/io.h>
#include "edac_core.h"
+#include <asm-generic/io-64-nonatomic-lo-hi.h>
+
#define I3200_REVISION "1.1"
#define EDAC_MOD_STR "i3200_edac"
#define PCI_DEVICE_ID_INTEL_3200_HB 0x29f0
+#define I3200_DIMMS 4
#define I3200_RANKS 8
#define I3200_RANKS_PER_CHANNEL 4
#define I3200_CHANNELS 2
@@ -103,16 +106,26 @@ static int nr_channels;
static int how_many_channels(struct pci_dev *pdev)
{
+ int n_channels;
+
unsigned char capid0_8b; /* 8th byte of CAPID0 */
pci_read_config_byte(pdev, I3200_CAPID0 + 8, &capid0_8b);
+
if (capid0_8b & 0x20) { /* check DCD: Dual Channel Disable */
- debugf0("In single channel mode.\n");
- return 1;
+ edac_dbg(0, "In single channel mode\n");
+ n_channels = 1;
} else {
- debugf0("In dual channel mode.\n");
- return 2;
+ edac_dbg(0, "In dual channel mode\n");
+ n_channels = 2;
}
+
+ if (capid0_8b & 0x10) /* check if both channels are filled */
+ edac_dbg(0, "2 DIMMS per channel disabled\n");
+ else
+ edac_dbg(0, "2 DIMMS per channel enabled\n");
+
+ return n_channels;
}
static unsigned long eccerrlog_syndrome(u64 log)
@@ -156,7 +169,7 @@ static void i3200_clear_error_info(struct mem_ctl_info *mci)
{
struct pci_dev *pdev;
- pdev = to_pci_dev(mci->dev);
+ pdev = to_pci_dev(mci->pdev);
/*
* Clear any error bits.
@@ -173,7 +186,7 @@ static void i3200_get_and_clear_error_info(struct mem_ctl_info *mci,
struct i3200_priv *priv = mci->pvt_info;
void __iomem *window = priv->window;
- pdev = to_pci_dev(mci->dev);
+ pdev = to_pci_dev(mci->pdev);
/*
* This is a mess because there is no atomic way to read all the
@@ -215,21 +228,25 @@ static void i3200_process_error_info(struct mem_ctl_info *mci,
return;
if ((info->errsts ^ info->errsts2) & I3200_ERRSTS_BITS) {
- edac_mc_handle_ce_no_info(mci, "UE overwrote CE");
+ edac_mc_handle_error(HW_EVENT_ERR_UNCORRECTED, mci, 1, 0, 0, 0,
+ -1, -1, -1, "UE overwrote CE", "");
info->errsts = info->errsts2;
}
for (channel = 0; channel < nr_channels; channel++) {
log = info->eccerrlog[channel];
if (log & I3200_ECCERRLOG_UE) {
- edac_mc_handle_ue(mci, 0, 0,
- eccerrlog_row(channel, log),
- "i3200 UE");
+ edac_mc_handle_error(HW_EVENT_ERR_UNCORRECTED, mci, 1,
+ 0, 0, 0,
+ eccerrlog_row(channel, log),
+ -1, -1,
+ "i3000 UE", "");
} else if (log & I3200_ECCERRLOG_CE) {
- edac_mc_handle_ce(mci, 0, 0,
- eccerrlog_syndrome(log),
- eccerrlog_row(channel, log), 0,
- "i3200 CE");
+ edac_mc_handle_error(HW_EVENT_ERR_UNCORRECTED, mci, 1,
+ 0, 0, eccerrlog_syndrome(log),
+ eccerrlog_row(channel, log),
+ -1, -1,
+ "i3000 UE", "");
}
}
}
@@ -238,13 +255,12 @@ static void i3200_check(struct mem_ctl_info *mci)
{
struct i3200_error_info info;
- debugf1("MC%d: %s()\n", mci->mc_idx, __func__);
+ edac_dbg(1, "MC%d\n", mci->mc_idx);
i3200_get_and_clear_error_info(mci, &info);
i3200_process_error_info(mci, &info);
}
-
-void __iomem *i3200_map_mchbar(struct pci_dev *pdev)
+static void __iomem *i3200_map_mchbar(struct pci_dev *pdev)
{
union {
u64 mchbar;
@@ -283,6 +299,8 @@ static void i3200_get_drbs(void __iomem *window,
for (i = 0; i < I3200_RANKS_PER_CHANNEL; i++) {
drbs[0][i] = readw(window + I3200_C0DRB + 2*i) & I3200_DRB_MASK;
drbs[1][i] = readw(window + I3200_C1DRB + 2*i) & I3200_DRB_MASK;
+
+ edac_dbg(0, "drb[0][%d] = %d, drb[1][%d] = %d\n", i, drbs[0][i], i, drbs[1][i]);
}
}
@@ -304,6 +322,9 @@ static unsigned long drb_to_nr_pages(
int n;
n = drbs[channel][rank];
+ if (!n)
+ return 0;
+
if (rank > 0)
n -= drbs[channel][rank - 1];
if (stacked && (channel == 1) &&
@@ -317,15 +338,15 @@ static unsigned long drb_to_nr_pages(
static int i3200_probe1(struct pci_dev *pdev, int dev_idx)
{
int rc;
- int i;
+ int i, j;
struct mem_ctl_info *mci = NULL;
- unsigned long last_page;
+ struct edac_mc_layer layers[2];
u16 drbs[I3200_CHANNELS][I3200_RANKS_PER_CHANNEL];
bool stacked;
void __iomem *window;
struct i3200_priv *priv;
- debugf0("MC: %s()\n", __func__);
+ edac_dbg(0, "MC:\n");
window = i3200_map_mchbar(pdev);
if (!window)
@@ -334,14 +355,20 @@ static int i3200_probe1(struct pci_dev *pdev, int dev_idx)
i3200_get_drbs(window, drbs);
nr_channels = how_many_channels(pdev);
- mci = edac_mc_alloc(sizeof(struct i3200_priv), I3200_RANKS,
- nr_channels, 0);
+ layers[0].type = EDAC_MC_LAYER_CHIP_SELECT;
+ layers[0].size = I3200_DIMMS;
+ layers[0].is_virt_csrow = true;
+ layers[1].type = EDAC_MC_LAYER_CHANNEL;
+ layers[1].size = nr_channels;
+ layers[1].is_virt_csrow = false;
+ mci = edac_mc_alloc(0, ARRAY_SIZE(layers), layers,
+ sizeof(struct i3200_priv));
if (!mci)
return -ENOMEM;
- debugf3("MC: %s(): init mci\n", __func__);
+ edac_dbg(3, "MC: init mci\n");
- mci->dev = &pdev->dev;
+ mci->pdev = &pdev->dev;
mci->mtype_cap = MEM_FLAG_DDR2;
mci->edac_ctl_cap = EDAC_FLAG_SECDED;
@@ -364,41 +391,38 @@ static int i3200_probe1(struct pci_dev *pdev, int dev_idx)
* cumulative; the last one will contain the total memory
* contained in all ranks.
*/
- last_page = -1UL;
- for (i = 0; i < mci->nr_csrows; i++) {
+ for (i = 0; i < I3200_DIMMS; i++) {
unsigned long nr_pages;
- struct csrow_info *csrow = &mci->csrows[i];
- nr_pages = drb_to_nr_pages(drbs, stacked,
- i / I3200_RANKS_PER_CHANNEL,
- i % I3200_RANKS_PER_CHANNEL);
+ for (j = 0; j < nr_channels; j++) {
+ struct dimm_info *dimm = EDAC_DIMM_PTR(mci->layers, mci->dimms,
+ mci->n_layers, i, j, 0);
- if (nr_pages == 0) {
- csrow->mtype = MEM_EMPTY;
- continue;
- }
+ nr_pages = drb_to_nr_pages(drbs, stacked, j, i);
+ if (nr_pages == 0)
+ continue;
- csrow->first_page = last_page + 1;
- last_page += nr_pages;
- csrow->last_page = last_page;
- csrow->nr_pages = nr_pages;
+ edac_dbg(0, "csrow %d, channel %d%s, size = %ld Mb\n", i, j,
+ stacked ? " (stacked)" : "", PAGES_TO_MiB(nr_pages));
- csrow->grain = nr_pages << PAGE_SHIFT;
- csrow->mtype = MEM_DDR2;
- csrow->dtype = DEV_UNKNOWN;
- csrow->edac_mode = EDAC_UNKNOWN;
+ dimm->nr_pages = nr_pages;
+ dimm->grain = nr_pages << PAGE_SHIFT;
+ dimm->mtype = MEM_DDR2;
+ dimm->dtype = DEV_UNKNOWN;
+ dimm->edac_mode = EDAC_UNKNOWN;
+ }
}
i3200_clear_error_info(mci);
rc = -ENODEV;
if (edac_mc_add_mc(mci)) {
- debugf3("MC: %s(): failed edac_mc_add_mc()\n", __func__);
+ edac_dbg(3, "MC: failed edac_mc_add_mc()\n");
goto fail;
}
/* get this far and it's successful */
- debugf3("MC: %s(): success\n", __func__);
+ edac_dbg(3, "MC: success\n");
return 0;
fail:
@@ -409,12 +433,11 @@ fail:
return rc;
}
-static int __devinit i3200_init_one(struct pci_dev *pdev,
- const struct pci_device_id *ent)
+static int i3200_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
{
int rc;
- debugf0("MC: %s()\n", __func__);
+ edac_dbg(0, "MC:\n");
if (pci_enable_device(pdev) < 0)
return -EIO;
@@ -426,12 +449,12 @@ static int __devinit i3200_init_one(struct pci_dev *pdev,
return rc;
}
-static void __devexit i3200_remove_one(struct pci_dev *pdev)
+static void i3200_remove_one(struct pci_dev *pdev)
{
struct mem_ctl_info *mci;
struct i3200_priv *priv;
- debugf0("%s()\n", __func__);
+ edac_dbg(0, "\n");
mci = edac_mc_del_mc(&pdev->dev);
if (!mci)
@@ -441,9 +464,11 @@ static void __devexit i3200_remove_one(struct pci_dev *pdev)
iounmap(priv->window);
edac_mc_free(mci);
+
+ pci_disable_device(pdev);
}
-static const struct pci_device_id i3200_pci_tbl[] __devinitdata = {
+static const struct pci_device_id i3200_pci_tbl[] = {
{
PCI_VEND_DEV(INTEL, 3200_HB), PCI_ANY_ID, PCI_ANY_ID, 0, 0,
I3200},
@@ -457,7 +482,7 @@ MODULE_DEVICE_TABLE(pci, i3200_pci_tbl);
static struct pci_driver i3200_driver = {
.name = EDAC_MOD_STR,
.probe = i3200_init_one,
- .remove = __devexit_p(i3200_remove_one),
+ .remove = i3200_remove_one,
.id_table = i3200_pci_tbl,
};
@@ -465,7 +490,7 @@ static int __init i3200_init(void)
{
int pci_rc;
- debugf3("MC: %s()\n", __func__);
+ edac_dbg(3, "MC:\n");
/* Ensure that the OPSTATE is set correctly for POLL or NMI */
opstate_init();
@@ -479,14 +504,14 @@ static int __init i3200_init(void)
mci_pdev = pci_get_device(PCI_VENDOR_ID_INTEL,
PCI_DEVICE_ID_INTEL_3200_HB, NULL);
if (!mci_pdev) {
- debugf0("i3200 pci_get_device fail\n");
+ edac_dbg(0, "i3200 pci_get_device fail\n");
pci_rc = -ENODEV;
goto fail1;
}
pci_rc = i3200_init_one(mci_pdev, i3200_pci_tbl);
if (pci_rc < 0) {
- debugf0("i3200 init fail\n");
+ edac_dbg(0, "i3200 init fail\n");
pci_rc = -ENODEV;
goto fail1;
}
@@ -506,7 +531,7 @@ fail0:
static void __exit i3200_exit(void)
{
- debugf3("MC: %s()\n", __func__);
+ edac_dbg(3, "MC:\n");
pci_unregister_driver(&i3200_driver);
if (!i3200_registered) {
diff --git a/drivers/edac/i5000_edac.c b/drivers/edac/i5000_edac.c
index a5cefab8d65..72e07e3cf71 100644
--- a/drivers/edac/i5000_edac.c
+++ b/drivers/edac/i5000_edac.c
@@ -27,7 +27,7 @@
/*
* Alter this version for the I5000 module when modifications are made
*/
-#define I5000_REVISION " Ver: 2.0.12 " __DATE__
+#define I5000_REVISION " Ver: 2.0.12"
#define EDAC_MOD_STR "i5000_edac"
#define i5000_printk(level, fmt, arg...) \
@@ -270,9 +270,10 @@
#define MTR3 0x8C
#define NUM_MTRS 4
-#define CHANNELS_PER_BRANCH (2)
+#define CHANNELS_PER_BRANCH 2
+#define MAX_BRANCHES 2
-/* Defines to extract the vaious fields from the
+/* Defines to extract the various fields from the
* MTRx - Memory Technology Registers
*/
#define MTR_DIMMS_PRESENT(mtr) ((mtr) & (0x1 << 8))
@@ -286,22 +287,6 @@
#define MTR_DIMM_COLS(mtr) ((mtr) & 0x3)
#define MTR_DIMM_COLS_ADDR_BITS(mtr) (MTR_DIMM_COLS(mtr) + 10)
-#ifdef CONFIG_EDAC_DEBUG
-static char *numrow_toString[] = {
- "8,192 - 13 rows",
- "16,384 - 14 rows",
- "32,768 - 15 rows",
- "reserved"
-};
-
-static char *numcol_toString[] = {
- "1,024 - 10 columns",
- "2,048 - 11 columns",
- "4,096 - 12 columns",
- "reserved"
-};
-#endif
-
/* enables the report of miscellaneous messages as CE errors - default off */
static int misc_messages;
@@ -343,7 +328,13 @@ struct i5000_pvt {
struct pci_dev *branch_1; /* 22.0 */
u16 tolm; /* top of low memory */
- u64 ambase; /* AMB BAR */
+ union {
+ u64 ambase; /* AMB BAR */
+ struct {
+ u32 ambase_bottom;
+ u32 ambase_top;
+ } u __packed;
+ };
u16 mir0, mir1, mir2;
@@ -473,7 +464,6 @@ static void i5000_process_fatal_error_info(struct mem_ctl_info *mci,
char msg[EDAC_MC_LABEL_LEN + 1 + 160];
char *specific = NULL;
u32 allErrors;
- int branch;
int channel;
int bank;
int rank;
@@ -485,8 +475,7 @@ static void i5000_process_fatal_error_info(struct mem_ctl_info *mci,
if (!allErrors)
return; /* if no error, return now */
- branch = EXTRACT_FBDCHAN_INDX(info->ferr_fat_fbd);
- channel = branch;
+ channel = EXTRACT_FBDCHAN_INDX(info->ferr_fat_fbd);
/* Use the NON-Recoverable macros to extract data */
bank = NREC_BANK(info->nrecmema);
@@ -495,10 +484,9 @@ static void i5000_process_fatal_error_info(struct mem_ctl_info *mci,
ras = NREC_RAS(info->nrecmemb);
cas = NREC_CAS(info->nrecmemb);
- debugf0("\t\tCSROW= %d Channels= %d,%d (Branch= %d "
- "DRAM Bank= %d rdwr= %s ras= %d cas= %d)\n",
- rank, channel, channel + 1, branch >> 1, bank,
- rdwr ? "Write" : "Read", ras, cas);
+ edac_dbg(0, "\t\tCSROW= %d Channel= %d (DRAM Bank= %d rdwr= %s ras= %d cas= %d)\n",
+ rank, channel, bank,
+ rdwr ? "Write" : "Read", ras, cas);
/* Only 1 bit will be on */
switch (allErrors) {
@@ -533,13 +521,14 @@ static void i5000_process_fatal_error_info(struct mem_ctl_info *mci,
/* Form out message */
snprintf(msg, sizeof(msg),
- "(Branch=%d DRAM-Bank=%d RDWR=%s RAS=%d CAS=%d "
- "FATAL Err=0x%x (%s))",
- branch >> 1, bank, rdwr ? "Write" : "Read", ras, cas,
- allErrors, specific);
+ "Bank=%d RAS=%d CAS=%d FATAL Err=0x%x (%s)",
+ bank, ras, cas, allErrors, specific);
/* Call the helper to output message */
- edac_mc_handle_fbd_ue(mci, rank, channel, channel + 1, msg);
+ edac_mc_handle_error(HW_EVENT_ERR_FATAL, mci, 1, 0, 0, 0,
+ channel >> 1, channel & 1, rank,
+ rdwr ? "Write error" : "Read error",
+ msg);
}
/*
@@ -574,7 +563,7 @@ static void i5000_process_nonfatal_error_info(struct mem_ctl_info *mci,
/* ONLY ONE of the possible error bits will be set, as per the docs */
ue_errors = allErrors & FERR_NF_UNCORRECTABLE;
if (ue_errors) {
- debugf0("\tUncorrected bits= 0x%x\n", ue_errors);
+ edac_dbg(0, "\tUncorrected bits= 0x%x\n", ue_errors);
branch = EXTRACT_FBDCHAN_INDX(info->ferr_nf_fbd);
@@ -590,11 +579,9 @@ static void i5000_process_nonfatal_error_info(struct mem_ctl_info *mci,
ras = NREC_RAS(info->nrecmemb);
cas = NREC_CAS(info->nrecmemb);
- debugf0
- ("\t\tCSROW= %d Channels= %d,%d (Branch= %d "
- "DRAM Bank= %d rdwr= %s ras= %d cas= %d)\n",
- rank, channel, channel + 1, branch >> 1, bank,
- rdwr ? "Write" : "Read", ras, cas);
+ edac_dbg(0, "\t\tCSROW= %d Channels= %d,%d (Branch= %d DRAM Bank= %d rdwr= %s ras= %d cas= %d)\n",
+ rank, channel, channel + 1, branch >> 1, bank,
+ rdwr ? "Write" : "Read", ras, cas);
switch (ue_errors) {
case FERR_NF_M12ERR:
@@ -633,19 +620,20 @@ static void i5000_process_nonfatal_error_info(struct mem_ctl_info *mci,
/* Form out message */
snprintf(msg, sizeof(msg),
- "(Branch=%d DRAM-Bank=%d RDWR=%s RAS=%d "
- "CAS=%d, UE Err=0x%x (%s))",
- branch >> 1, bank, rdwr ? "Write" : "Read", ras, cas,
- ue_errors, specific);
+ "Rank=%d Bank=%d RAS=%d CAS=%d, UE Err=0x%x (%s)",
+ rank, bank, ras, cas, ue_errors, specific);
/* Call the helper to output message */
- edac_mc_handle_fbd_ue(mci, rank, channel, channel + 1, msg);
+ edac_mc_handle_error(HW_EVENT_ERR_UNCORRECTED, mci, 1, 0, 0, 0,
+ channel >> 1, -1, rank,
+ rdwr ? "Write error" : "Read error",
+ msg);
}
/* Check correctable errors */
ce_errors = allErrors & FERR_NF_CORRECTABLE;
if (ce_errors) {
- debugf0("\tCorrected bits= 0x%x\n", ce_errors);
+ edac_dbg(0, "\tCorrected bits= 0x%x\n", ce_errors);
branch = EXTRACT_FBDCHAN_INDX(info->ferr_nf_fbd);
@@ -663,10 +651,9 @@ static void i5000_process_nonfatal_error_info(struct mem_ctl_info *mci,
ras = REC_RAS(info->recmemb);
cas = REC_CAS(info->recmemb);
- debugf0("\t\tCSROW= %d Channel= %d (Branch %d "
- "DRAM Bank= %d rdwr= %s ras= %d cas= %d)\n",
- rank, channel, branch >> 1, bank,
- rdwr ? "Write" : "Read", ras, cas);
+ edac_dbg(0, "\t\tCSROW= %d Channel= %d (Branch %d DRAM Bank= %d rdwr= %s ras= %d cas= %d)\n",
+ rank, channel, branch >> 1, bank,
+ rdwr ? "Write" : "Read", ras, cas);
switch (ce_errors) {
case FERR_NF_M17ERR:
@@ -685,13 +672,16 @@ static void i5000_process_nonfatal_error_info(struct mem_ctl_info *mci,
/* Form out message */
snprintf(msg, sizeof(msg),
- "(Branch=%d DRAM-Bank=%d RDWR=%s RAS=%d "
+ "Rank=%d Bank=%d RDWR=%s RAS=%d "
"CAS=%d, CE Err=0x%x (%s))", branch >> 1, bank,
rdwr ? "Write" : "Read", ras, cas, ce_errors,
specific);
/* Call the helper to output message */
- edac_mc_handle_fbd_ce(mci, rank, channel, msg);
+ edac_mc_handle_error(HW_EVENT_ERR_CORRECTED, mci, 1, 0, 0, 0,
+ channel >> 1, channel % 2, rank,
+ rdwr ? "Write error" : "Read error",
+ msg);
}
if (!misc_messages)
@@ -731,11 +721,12 @@ static void i5000_process_nonfatal_error_info(struct mem_ctl_info *mci,
/* Form out message */
snprintf(msg, sizeof(msg),
- "(Branch=%d Err=%#x (%s))", branch >> 1,
- misc_errors, specific);
+ "Err=%#x (%s)", misc_errors, specific);
/* Call the helper to output message */
- edac_mc_handle_fbd_ce(mci, 0, 0, msg);
+ edac_mc_handle_error(HW_EVENT_ERR_CORRECTED, mci, 1, 0, 0, 0,
+ branch >> 1, -1, -1,
+ "Misc error", msg);
}
}
@@ -774,7 +765,7 @@ static void i5000_clear_error(struct mem_ctl_info *mci)
static void i5000_check_error(struct mem_ctl_info *mci)
{
struct i5000_error_info info;
- debugf4("MC%d: %s: %s()\n", mci->mc_idx, __FILE__, __func__);
+ edac_dbg(4, "MC%d\n", mci->mc_idx);
i5000_get_error_info(mci, &info);
i5000_process_error_info(mci, &info, 1);
}
@@ -845,15 +836,16 @@ static int i5000_get_devices(struct mem_ctl_info *mci, int dev_idx)
pvt->fsb_error_regs = pdev;
- debugf1("System Address, processor bus- PCI Bus ID: %s %x:%x\n",
- pci_name(pvt->system_address),
- pvt->system_address->vendor, pvt->system_address->device);
- debugf1("Branchmap, control and errors - PCI Bus ID: %s %x:%x\n",
- pci_name(pvt->branchmap_werrors),
- pvt->branchmap_werrors->vendor, pvt->branchmap_werrors->device);
- debugf1("FSB Error Regs - PCI Bus ID: %s %x:%x\n",
- pci_name(pvt->fsb_error_regs),
- pvt->fsb_error_regs->vendor, pvt->fsb_error_regs->device);
+ edac_dbg(1, "System Address, processor bus- PCI Bus ID: %s %x:%x\n",
+ pci_name(pvt->system_address),
+ pvt->system_address->vendor, pvt->system_address->device);
+ edac_dbg(1, "Branchmap, control and errors - PCI Bus ID: %s %x:%x\n",
+ pci_name(pvt->branchmap_werrors),
+ pvt->branchmap_werrors->vendor,
+ pvt->branchmap_werrors->device);
+ edac_dbg(1, "FSB Error Regs - PCI Bus ID: %s %x:%x\n",
+ pci_name(pvt->fsb_error_regs),
+ pvt->fsb_error_regs->vendor, pvt->fsb_error_regs->device);
pdev = NULL;
pdev = pci_get_device(PCI_VENDOR_ID_INTEL,
@@ -956,14 +948,14 @@ static int determine_amb_present_reg(struct i5000_pvt *pvt, int channel)
*
* return the proper MTR register as determine by the csrow and channel desired
*/
-static int determine_mtr(struct i5000_pvt *pvt, int csrow, int channel)
+static int determine_mtr(struct i5000_pvt *pvt, int slot, int channel)
{
int mtr;
if (channel < CHANNELS_PER_BRANCH)
- mtr = pvt->b0_mtr[csrow >> 1];
+ mtr = pvt->b0_mtr[slot];
else
- mtr = pvt->b1_mtr[csrow >> 1];
+ mtr = pvt->b1_mtr[slot];
return mtr;
}
@@ -976,49 +968,59 @@ static void decode_mtr(int slot_row, u16 mtr)
ans = MTR_DIMMS_PRESENT(mtr);
- debugf2("\tMTR%d=0x%x: DIMMs are %s\n", slot_row, mtr,
- ans ? "Present" : "NOT Present");
+ edac_dbg(2, "\tMTR%d=0x%x: DIMMs are %sPresent\n",
+ slot_row, mtr, ans ? "" : "NOT ");
if (!ans)
return;
- debugf2("\t\tWIDTH: x%d\n", MTR_DRAM_WIDTH(mtr));
- debugf2("\t\tNUMBANK: %d bank(s)\n", MTR_DRAM_BANKS(mtr));
- debugf2("\t\tNUMRANK: %s\n", MTR_DIMM_RANK(mtr) ? "double" : "single");
- debugf2("\t\tNUMROW: %s\n", numrow_toString[MTR_DIMM_ROWS(mtr)]);
- debugf2("\t\tNUMCOL: %s\n", numcol_toString[MTR_DIMM_COLS(mtr)]);
+ edac_dbg(2, "\t\tWIDTH: x%d\n", MTR_DRAM_WIDTH(mtr));
+ edac_dbg(2, "\t\tNUMBANK: %d bank(s)\n", MTR_DRAM_BANKS(mtr));
+ edac_dbg(2, "\t\tNUMRANK: %s\n",
+ MTR_DIMM_RANK(mtr) ? "double" : "single");
+ edac_dbg(2, "\t\tNUMROW: %s\n",
+ MTR_DIMM_ROWS(mtr) == 0 ? "8,192 - 13 rows" :
+ MTR_DIMM_ROWS(mtr) == 1 ? "16,384 - 14 rows" :
+ MTR_DIMM_ROWS(mtr) == 2 ? "32,768 - 15 rows" :
+ "reserved");
+ edac_dbg(2, "\t\tNUMCOL: %s\n",
+ MTR_DIMM_COLS(mtr) == 0 ? "1,024 - 10 columns" :
+ MTR_DIMM_COLS(mtr) == 1 ? "2,048 - 11 columns" :
+ MTR_DIMM_COLS(mtr) == 2 ? "4,096 - 12 columns" :
+ "reserved");
}
-static void handle_channel(struct i5000_pvt *pvt, int csrow, int channel,
+static void handle_channel(struct i5000_pvt *pvt, int slot, int channel,
struct i5000_dimm_info *dinfo)
{
int mtr;
int amb_present_reg;
int addrBits;
- mtr = determine_mtr(pvt, csrow, channel);
+ mtr = determine_mtr(pvt, slot, channel);
if (MTR_DIMMS_PRESENT(mtr)) {
amb_present_reg = determine_amb_present_reg(pvt, channel);
- /* Determine if there is a DIMM present in this DIMM slot */
- if (amb_present_reg & (1 << (csrow >> 1))) {
+ /* Determine if there is a DIMM present in this DIMM slot */
+ if (amb_present_reg) {
dinfo->dual_rank = MTR_DIMM_RANK(mtr);
- if (!((dinfo->dual_rank == 0) &&
- ((csrow & 0x1) == 0x1))) {
- /* Start with the number of bits for a Bank
- * on the DRAM */
- addrBits = MTR_DRAM_BANKS_ADDR_BITS(mtr);
- /* Add thenumber of ROW bits */
- addrBits += MTR_DIMM_ROWS_ADDR_BITS(mtr);
- /* add the number of COLUMN bits */
- addrBits += MTR_DIMM_COLS_ADDR_BITS(mtr);
-
- addrBits += 6; /* add 64 bits per DIMM */
- addrBits -= 20; /* divide by 2^^20 */
- addrBits -= 3; /* 8 bits per bytes */
-
- dinfo->megabytes = 1 << addrBits;
- }
+ /* Start with the number of bits for a Bank
+ * on the DRAM */
+ addrBits = MTR_DRAM_BANKS_ADDR_BITS(mtr);
+ /* Add the number of ROW bits */
+ addrBits += MTR_DIMM_ROWS_ADDR_BITS(mtr);
+ /* add the number of COLUMN bits */
+ addrBits += MTR_DIMM_COLS_ADDR_BITS(mtr);
+
+ /* Dual-rank memories have twice the size */
+ if (dinfo->dual_rank)
+ addrBits++;
+
+ addrBits += 6; /* add 64 bits per DIMM */
+ addrBits -= 20; /* divide by 2^^20 */
+ addrBits -= 3; /* 8 bits per bytes */
+
+ dinfo->megabytes = 1 << addrBits;
}
}
}
@@ -1032,10 +1034,9 @@ static void handle_channel(struct i5000_pvt *pvt, int csrow, int channel,
static void calculate_dimm_size(struct i5000_pvt *pvt)
{
struct i5000_dimm_info *dinfo;
- int csrow, max_csrows;
+ int slot, channel, branch;
char *p, *mem_buffer;
int space, n;
- int channel;
/* ================= Generate some debug output ================= */
space = PAGE_SIZE;
@@ -1046,53 +1047,57 @@ static void calculate_dimm_size(struct i5000_pvt *pvt)
return;
}
- n = snprintf(p, space, "\n");
- p += n;
- space -= n;
-
- /* Scan all the actual CSROWS (which is # of DIMMS * 2)
+ /* Scan all the actual slots
* and calculate the information for each DIMM
- * Start with the highest csrow first, to display it first
- * and work toward the 0th csrow
+ * Start with the highest slot first, to display it first
+ * and work toward the 0th slot
*/
- max_csrows = pvt->maxdimmperch * 2;
- for (csrow = max_csrows - 1; csrow >= 0; csrow--) {
+ for (slot = pvt->maxdimmperch - 1; slot >= 0; slot--) {
- /* on an odd csrow, first output a 'boundary' marker,
+ /* on an odd slot, first output a 'boundary' marker,
* then reset the message buffer */
- if (csrow & 0x1) {
- n = snprintf(p, space, "---------------------------"
+ if (slot & 0x1) {
+ n = snprintf(p, space, "--------------------------"
"--------------------------------");
p += n;
space -= n;
- debugf2("%s\n", mem_buffer);
+ edac_dbg(2, "%s\n", mem_buffer);
p = mem_buffer;
space = PAGE_SIZE;
}
- n = snprintf(p, space, "csrow %2d ", csrow);
+ n = snprintf(p, space, "slot %2d ", slot);
p += n;
space -= n;
for (channel = 0; channel < pvt->maxch; channel++) {
- dinfo = &pvt->dimm_info[csrow][channel];
- handle_channel(pvt, csrow, channel, dinfo);
- n = snprintf(p, space, "%4d MB | ", dinfo->megabytes);
+ dinfo = &pvt->dimm_info[slot][channel];
+ handle_channel(pvt, slot, channel, dinfo);
+ if (dinfo->megabytes)
+ n = snprintf(p, space, "%4d MB %dR| ",
+ dinfo->megabytes, dinfo->dual_rank + 1);
+ else
+ n = snprintf(p, space, "%4d MB | ", 0);
p += n;
space -= n;
}
- n = snprintf(p, space, "\n");
p += n;
space -= n;
+ edac_dbg(2, "%s\n", mem_buffer);
+ p = mem_buffer;
+ space = PAGE_SIZE;
}
/* Output the last bottom 'boundary' marker */
- n = snprintf(p, space, "---------------------------"
- "--------------------------------\n");
+ n = snprintf(p, space, "--------------------------"
+ "--------------------------------");
p += n;
space -= n;
+ edac_dbg(2, "%s\n", mem_buffer);
+ p = mem_buffer;
+ space = PAGE_SIZE;
/* now output the 'channel' labels */
- n = snprintf(p, space, " ");
+ n = snprintf(p, space, " ");
p += n;
space -= n;
for (channel = 0; channel < pvt->maxch; channel++) {
@@ -1100,12 +1105,20 @@ static void calculate_dimm_size(struct i5000_pvt *pvt)
p += n;
space -= n;
}
- n = snprintf(p, space, "\n");
+ edac_dbg(2, "%s\n", mem_buffer);
+ p = mem_buffer;
+ space = PAGE_SIZE;
+
+ n = snprintf(p, space, " ");
p += n;
- space -= n;
+ for (branch = 0; branch < MAX_BRANCHES; branch++) {
+ n = snprintf(p, space, " branch %d | ", branch);
+ p += n;
+ space -= n;
+ }
/* output the last message and free buffer */
- debugf2("%s\n", mem_buffer);
+ edac_dbg(2, "%s\n", mem_buffer);
kfree(mem_buffer);
}
@@ -1128,24 +1141,25 @@ static void i5000_get_mc_regs(struct mem_ctl_info *mci)
pvt = mci->pvt_info;
pci_read_config_dword(pvt->system_address, AMBASE,
- (u32 *) & pvt->ambase);
+ &pvt->u.ambase_bottom);
pci_read_config_dword(pvt->system_address, AMBASE + sizeof(u32),
- ((u32 *) & pvt->ambase) + sizeof(u32));
+ &pvt->u.ambase_top);
maxdimmperch = pvt->maxdimmperch;
maxch = pvt->maxch;
- debugf2("AMBASE= 0x%lx MAXCH= %d MAX-DIMM-Per-CH= %d\n",
- (long unsigned int)pvt->ambase, pvt->maxch, pvt->maxdimmperch);
+ edac_dbg(2, "AMBASE= 0x%lx MAXCH= %d MAX-DIMM-Per-CH= %d\n",
+ (long unsigned int)pvt->ambase, pvt->maxch, pvt->maxdimmperch);
/* Get the Branch Map regs */
pci_read_config_word(pvt->branchmap_werrors, TOLM, &pvt->tolm);
pvt->tolm >>= 12;
- debugf2("\nTOLM (number of 256M regions) =%u (0x%x)\n", pvt->tolm,
- pvt->tolm);
+ edac_dbg(2, "TOLM (number of 256M regions) =%u (0x%x)\n",
+ pvt->tolm, pvt->tolm);
actual_tolm = pvt->tolm << 28;
- debugf2("Actual TOLM byte addr=%u (0x%x)\n", actual_tolm, actual_tolm);
+ edac_dbg(2, "Actual TOLM byte addr=%u (0x%x)\n",
+ actual_tolm, actual_tolm);
pci_read_config_word(pvt->branchmap_werrors, MIR0, &pvt->mir0);
pci_read_config_word(pvt->branchmap_werrors, MIR1, &pvt->mir1);
@@ -1155,15 +1169,18 @@ static void i5000_get_mc_regs(struct mem_ctl_info *mci)
limit = (pvt->mir0 >> 4) & 0x0FFF;
way0 = pvt->mir0 & 0x1;
way1 = pvt->mir0 & 0x2;
- debugf2("MIR0: limit= 0x%x WAY1= %u WAY0= %x\n", limit, way1, way0);
+ edac_dbg(2, "MIR0: limit= 0x%x WAY1= %u WAY0= %x\n",
+ limit, way1, way0);
limit = (pvt->mir1 >> 4) & 0x0FFF;
way0 = pvt->mir1 & 0x1;
way1 = pvt->mir1 & 0x2;
- debugf2("MIR1: limit= 0x%x WAY1= %u WAY0= %x\n", limit, way1, way0);
+ edac_dbg(2, "MIR1: limit= 0x%x WAY1= %u WAY0= %x\n",
+ limit, way1, way0);
limit = (pvt->mir2 >> 4) & 0x0FFF;
way0 = pvt->mir2 & 0x1;
way1 = pvt->mir2 & 0x2;
- debugf2("MIR2: limit= 0x%x WAY1= %u WAY0= %x\n", limit, way1, way0);
+ edac_dbg(2, "MIR2: limit= 0x%x WAY1= %u WAY0= %x\n",
+ limit, way1, way0);
/* Get the MTR[0-3] regs */
for (slot_row = 0; slot_row < NUM_MTRS; slot_row++) {
@@ -1172,31 +1189,31 @@ static void i5000_get_mc_regs(struct mem_ctl_info *mci)
pci_read_config_word(pvt->branch_0, where,
&pvt->b0_mtr[slot_row]);
- debugf2("MTR%d where=0x%x B0 value=0x%x\n", slot_row, where,
- pvt->b0_mtr[slot_row]);
+ edac_dbg(2, "MTR%d where=0x%x B0 value=0x%x\n",
+ slot_row, where, pvt->b0_mtr[slot_row]);
if (pvt->maxch >= CHANNELS_PER_BRANCH) {
pci_read_config_word(pvt->branch_1, where,
&pvt->b1_mtr[slot_row]);
- debugf2("MTR%d where=0x%x B1 value=0x%x\n", slot_row,
- where, pvt->b1_mtr[slot_row]);
+ edac_dbg(2, "MTR%d where=0x%x B1 value=0x%x\n",
+ slot_row, where, pvt->b1_mtr[slot_row]);
} else {
pvt->b1_mtr[slot_row] = 0;
}
}
/* Read and dump branch 0's MTRs */
- debugf2("\nMemory Technology Registers:\n");
- debugf2(" Branch 0:\n");
+ edac_dbg(2, "Memory Technology Registers:\n");
+ edac_dbg(2, " Branch 0:\n");
for (slot_row = 0; slot_row < NUM_MTRS; slot_row++) {
decode_mtr(slot_row, pvt->b0_mtr[slot_row]);
}
pci_read_config_word(pvt->branch_0, AMB_PRESENT_0,
&pvt->b0_ambpresent0);
- debugf2("\t\tAMB-Branch 0-present0 0x%x:\n", pvt->b0_ambpresent0);
+ edac_dbg(2, "\t\tAMB-Branch 0-present0 0x%x:\n", pvt->b0_ambpresent0);
pci_read_config_word(pvt->branch_0, AMB_PRESENT_1,
&pvt->b0_ambpresent1);
- debugf2("\t\tAMB-Branch 0-present1 0x%x:\n", pvt->b0_ambpresent1);
+ edac_dbg(2, "\t\tAMB-Branch 0-present1 0x%x:\n", pvt->b0_ambpresent1);
/* Only if we have 2 branchs (4 channels) */
if (pvt->maxch < CHANNELS_PER_BRANCH) {
@@ -1204,18 +1221,18 @@ static void i5000_get_mc_regs(struct mem_ctl_info *mci)
pvt->b1_ambpresent1 = 0;
} else {
/* Read and dump branch 1's MTRs */
- debugf2(" Branch 1:\n");
+ edac_dbg(2, " Branch 1:\n");
for (slot_row = 0; slot_row < NUM_MTRS; slot_row++) {
decode_mtr(slot_row, pvt->b1_mtr[slot_row]);
}
pci_read_config_word(pvt->branch_1, AMB_PRESENT_0,
&pvt->b1_ambpresent0);
- debugf2("\t\tAMB-Branch 1-present0 0x%x:\n",
- pvt->b1_ambpresent0);
+ edac_dbg(2, "\t\tAMB-Branch 1-present0 0x%x:\n",
+ pvt->b1_ambpresent0);
pci_read_config_word(pvt->branch_1, AMB_PRESENT_1,
&pvt->b1_ambpresent1);
- debugf2("\t\tAMB-Branch 1-present1 0x%x:\n",
- pvt->b1_ambpresent1);
+ edac_dbg(2, "\t\tAMB-Branch 1-present1 0x%x:\n",
+ pvt->b1_ambpresent1);
}
/* Go and determine the size of each DIMM and place in an
@@ -1235,13 +1252,13 @@ static void i5000_get_mc_regs(struct mem_ctl_info *mci)
static int i5000_init_csrows(struct mem_ctl_info *mci)
{
struct i5000_pvt *pvt;
- struct csrow_info *p_csrow;
+ struct dimm_info *dimm;
int empty, channel_count;
int max_csrows;
- int mtr, mtr1;
+ int mtr;
int csrow_megs;
int channel;
- int csrow;
+ int slot;
pvt = mci->pvt_info;
@@ -1250,43 +1267,40 @@ static int i5000_init_csrows(struct mem_ctl_info *mci)
empty = 1; /* Assume NO memory */
- for (csrow = 0; csrow < max_csrows; csrow++) {
- p_csrow = &mci->csrows[csrow];
-
- p_csrow->csrow_idx = csrow;
-
- /* use branch 0 for the basis */
- mtr = pvt->b0_mtr[csrow >> 1];
- mtr1 = pvt->b1_mtr[csrow >> 1];
-
- /* if no DIMMS on this row, continue */
- if (!MTR_DIMMS_PRESENT(mtr) && !MTR_DIMMS_PRESENT(mtr1))
- continue;
+ /*
+ * FIXME: The memory layout used to map slot/channel into the
+ * real memory architecture is weird: branch+slot are "csrows"
+ * and channel is channel. That required an extra array (dimm_info)
+ * to map the dimms. A good cleanup would be to remove this array,
+ * and do a loop here with branch, channel, slot
+ */
+ for (slot = 0; slot < max_csrows; slot++) {
+ for (channel = 0; channel < pvt->maxch; channel++) {
- /* FAKE OUT VALUES, FIXME */
- p_csrow->first_page = 0 + csrow * 20;
- p_csrow->last_page = 9 + csrow * 20;
- p_csrow->page_mask = 0xFFF;
+ mtr = determine_mtr(pvt, slot, channel);
- p_csrow->grain = 8;
+ if (!MTR_DIMMS_PRESENT(mtr))
+ continue;
- csrow_megs = 0;
- for (channel = 0; channel < pvt->maxch; channel++) {
- csrow_megs += pvt->dimm_info[csrow][channel].megabytes;
- }
+ dimm = EDAC_DIMM_PTR(mci->layers, mci->dimms, mci->n_layers,
+ channel / MAX_BRANCHES,
+ channel % MAX_BRANCHES, slot);
- p_csrow->nr_pages = csrow_megs << 8;
+ csrow_megs = pvt->dimm_info[slot][channel].megabytes;
+ dimm->grain = 8;
- /* Assume DDR2 for now */
- p_csrow->mtype = MEM_FB_DDR2;
+ /* Assume DDR2 for now */
+ dimm->mtype = MEM_FB_DDR2;
- /* ask what device type on this row */
- if (MTR_DRAM_WIDTH(mtr))
- p_csrow->dtype = DEV_X8;
- else
- p_csrow->dtype = DEV_X4;
+ /* ask what device type on this row */
+ if (MTR_DRAM_WIDTH(mtr))
+ dimm->dtype = DEV_X8;
+ else
+ dimm->dtype = DEV_X4;
- p_csrow->edac_mode = EDAC_S8ECD8ED;
+ dimm->edac_mode = EDAC_S8ECD8ED;
+ dimm->nr_pages = csrow_megs << 8;
+ }
empty = 0;
}
@@ -1317,7 +1331,7 @@ static void i5000_enable_error_reporting(struct mem_ctl_info *mci)
}
/*
- * i5000_get_dimm_and_channel_counts(pdev, &num_csrows, &num_channels)
+ * i5000_get_dimm_and_channel_counts(pdev, &nr_csrows, &num_channels)
*
* ask the device how many channels are present and how many CSROWS
* as well
@@ -1332,7 +1346,7 @@ static void i5000_get_dimm_and_channel_counts(struct pci_dev *pdev,
* supported on this memory controller
*/
pci_read_config_byte(pdev, MAXDIMMPERCH, &value);
- *num_dimms_per_channel = (int)value *2;
+ *num_dimms_per_channel = (int)value;
pci_read_config_byte(pdev, MAXCH, &value);
*num_channels = (int)value;
@@ -1348,15 +1362,14 @@ static void i5000_get_dimm_and_channel_counts(struct pci_dev *pdev,
static int i5000_probe1(struct pci_dev *pdev, int dev_idx)
{
struct mem_ctl_info *mci;
+ struct edac_mc_layer layers[3];
struct i5000_pvt *pvt;
int num_channels;
int num_dimms_per_channel;
- int num_csrows;
- debugf0("MC: %s: %s(), pdev bus %u dev=0x%x fn=0x%x\n",
- __FILE__, __func__,
- pdev->bus->number,
- PCI_SLOT(pdev->devfn), PCI_FUNC(pdev->devfn));
+ edac_dbg(0, "MC: pdev bus %u dev=0x%x fn=0x%x\n",
+ pdev->bus->number,
+ PCI_SLOT(pdev->devfn), PCI_FUNC(pdev->devfn));
/* We only are looking for func 0 of the set */
if (PCI_FUNC(pdev->devfn) != 0)
@@ -1372,26 +1385,33 @@ static int i5000_probe1(struct pci_dev *pdev, int dev_idx)
* actual number of slots/dimms per channel, we thus utilize the
* resource as specified by the chipset. Thus, we might have
* have more DIMMs per channel than actually on the mobo, but this
- * allows the driver to support upto the chipset max, without
+ * allows the driver to support up to the chipset max, without
* some fancy mobo determination.
*/
i5000_get_dimm_and_channel_counts(pdev, &num_dimms_per_channel,
&num_channels);
- num_csrows = num_dimms_per_channel * 2;
- debugf0("MC: %s(): Number of - Channels= %d DIMMS= %d CSROWS= %d\n",
- __func__, num_channels, num_dimms_per_channel, num_csrows);
+ edac_dbg(0, "MC: Number of Branches=2 Channels= %d DIMMS= %d\n",
+ num_channels, num_dimms_per_channel);
/* allocate a new MC control structure */
- mci = edac_mc_alloc(sizeof(*pvt), num_csrows, num_channels, 0);
+ layers[0].type = EDAC_MC_LAYER_BRANCH;
+ layers[0].size = MAX_BRANCHES;
+ layers[0].is_virt_csrow = false;
+ layers[1].type = EDAC_MC_LAYER_CHANNEL;
+ layers[1].size = num_channels / MAX_BRANCHES;
+ layers[1].is_virt_csrow = false;
+ layers[2].type = EDAC_MC_LAYER_SLOT;
+ layers[2].size = num_dimms_per_channel;
+ layers[2].is_virt_csrow = true;
+ mci = edac_mc_alloc(0, ARRAY_SIZE(layers), layers, sizeof(*pvt));
if (mci == NULL)
return -ENOMEM;
- kobject_get(&mci->edac_mci_kobj);
- debugf0("MC: %s: %s(): mci = %p\n", __FILE__, __func__, mci);
+ edac_dbg(0, "MC: mci = %p\n", mci);
- mci->dev = &pdev->dev; /* record ptr to the generic device */
+ mci->pdev = &pdev->dev; /* record ptr to the generic device */
pvt = mci->pvt_info;
pvt->system_address = pdev; /* Record this device in our private */
@@ -1421,19 +1441,16 @@ static int i5000_probe1(struct pci_dev *pdev, int dev_idx)
/* initialize the MC control structure 'csrows' table
* with the mapping and control information */
if (i5000_init_csrows(mci)) {
- debugf0("MC: Setting mci->edac_cap to EDAC_FLAG_NONE\n"
- " because i5000_init_csrows() returned nonzero "
- "value\n");
+ edac_dbg(0, "MC: Setting mci->edac_cap to EDAC_FLAG_NONE because i5000_init_csrows() returned nonzero value\n");
mci->edac_cap = EDAC_FLAG_NONE; /* no csrows found */
} else {
- debugf1("MC: Enable error reporting now\n");
+ edac_dbg(1, "MC: Enable error reporting now\n");
i5000_enable_error_reporting(mci);
}
/* add this new MC control structure to EDAC's list of MCs */
if (edac_mc_add_mc(mci)) {
- debugf0("MC: %s: %s(): failed edac_mc_add_mc()\n",
- __FILE__, __func__);
+ edac_dbg(0, "MC: failed edac_mc_add_mc()\n");
/* FIXME: perhaps some code should go here that disables error
* reporting if we just enabled it
*/
@@ -1461,7 +1478,6 @@ fail1:
i5000_put_devices(mci);
fail0:
- kobject_put(&mci->edac_mci_kobj);
edac_mc_free(mci);
return -ENODEV;
}
@@ -1473,12 +1489,11 @@ fail0:
* negative on error
* count (>= 0)
*/
-static int __devinit i5000_init_one(struct pci_dev *pdev,
- const struct pci_device_id *id)
+static int i5000_init_one(struct pci_dev *pdev, const struct pci_device_id *id)
{
int rc;
- debugf0("MC: %s: %s()\n", __FILE__, __func__);
+ edac_dbg(0, "MC:\n");
/* wake up device */
rc = pci_enable_device(pdev);
@@ -1493,11 +1508,11 @@ static int __devinit i5000_init_one(struct pci_dev *pdev,
* i5000_remove_one destructor for one instance of device
*
*/
-static void __devexit i5000_remove_one(struct pci_dev *pdev)
+static void i5000_remove_one(struct pci_dev *pdev)
{
struct mem_ctl_info *mci;
- debugf0("%s: %s()\n", __FILE__, __func__);
+ edac_dbg(0, "\n");
if (i5000_pci)
edac_pci_release_generic_ctl(i5000_pci);
@@ -1507,7 +1522,6 @@ static void __devexit i5000_remove_one(struct pci_dev *pdev)
/* retrieve references to resources, and free those resources */
i5000_put_devices(mci);
- kobject_put(&mci->edac_mci_kobj);
edac_mc_free(mci);
}
@@ -1516,7 +1530,7 @@ static void __devexit i5000_remove_one(struct pci_dev *pdev)
*
* The "E500P" device is the first device supported.
*/
-static const struct pci_device_id i5000_pci_tbl[] __devinitdata = {
+static const struct pci_device_id i5000_pci_tbl[] = {
{PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_I5000_DEV16),
.driver_data = I5000P},
@@ -1532,7 +1546,7 @@ MODULE_DEVICE_TABLE(pci, i5000_pci_tbl);
static struct pci_driver i5000_driver = {
.name = KBUILD_BASENAME,
.probe = i5000_init_one,
- .remove = __devexit_p(i5000_remove_one),
+ .remove = i5000_remove_one,
.id_table = i5000_pci_tbl,
};
@@ -1544,7 +1558,7 @@ static int __init i5000_init(void)
{
int pci_rc;
- debugf2("MC: %s: %s()\n", __FILE__, __func__);
+ edac_dbg(2, "MC:\n");
/* Ensure that the OPSTATE is set correctly for POLL or NMI */
opstate_init();
@@ -1560,7 +1574,7 @@ static int __init i5000_init(void)
*/
static void __exit i5000_exit(void)
{
- debugf2("MC: %s: %s()\n", __FILE__, __func__);
+ edac_dbg(2, "MC:\n");
pci_unregister_driver(&i5000_driver);
}
diff --git a/drivers/edac/i5100_edac.c b/drivers/edac/i5100_edac.c
index f459a6c0886..6247d186177 100644
--- a/drivers/edac/i5100_edac.c
+++ b/drivers/edac/i5100_edac.c
@@ -11,9 +11,14 @@
*
* The intel 5100 has two independent channels. EDAC core currently
* can not reflect this configuration so instead the chip-select
- * rows for each respective channel are layed out one after another,
+ * rows for each respective channel are laid out one after another,
* the first half belonging to channel 0, the second half belonging
* to channel 1.
+ *
+ * This driver is for DDR2 DIMMs, and it uses chip select to select among the
+ * several ranks. However, instead of showing memories as ranks, it outputs
+ * them as DIMM's. An internal table creates the association between ranks
+ * and DIMM's.
*/
#include <linux/module.h>
#include <linux/init.h>
@@ -22,6 +27,7 @@
#include <linux/edac.h>
#include <linux/delay.h>
#include <linux/mmzone.h>
+#include <linux/debugfs.h>
#include "edac_core.h"
@@ -49,7 +55,7 @@
#define I5100_FERR_NF_MEM_M6ERR_MASK (1 << 6)
#define I5100_FERR_NF_MEM_M5ERR_MASK (1 << 5)
#define I5100_FERR_NF_MEM_M4ERR_MASK (1 << 4)
-#define I5100_FERR_NF_MEM_M1ERR_MASK 1
+#define I5100_FERR_NF_MEM_M1ERR_MASK (1 << 1)
#define I5100_FERR_NF_MEM_ANY_MASK \
(I5100_FERR_NF_MEM_M16ERR_MASK | \
I5100_FERR_NF_MEM_M15ERR_MASK | \
@@ -63,6 +69,14 @@
I5100_FERR_NF_MEM_M1ERR_MASK)
#define I5100_NERR_NF_MEM 0xa4 /* MC Next Non-Fatal Errors */
#define I5100_EMASK_MEM 0xa8 /* MC Error Mask Register */
+#define I5100_MEM0EINJMSK0 0x200 /* Injection Mask0 Register Channel 0 */
+#define I5100_MEM1EINJMSK0 0x208 /* Injection Mask0 Register Channel 1 */
+#define I5100_MEMXEINJMSK0_EINJEN (1 << 27)
+#define I5100_MEM0EINJMSK1 0x204 /* Injection Mask1 Register Channel 0 */
+#define I5100_MEM1EINJMSK1 0x206 /* Injection Mask1 Register Channel 1 */
+
+/* Device 19, Function 0 */
+#define I5100_DINJ0 0x9a
/* device 21 and 22, func 0 */
#define I5100_MTR_0 0x154 /* Memory Technology Registers 0-3 */
@@ -333,13 +347,26 @@ struct i5100_priv {
unsigned ranksperchan; /* number of ranks per channel */
struct pci_dev *mc; /* device 16 func 1 */
+ struct pci_dev *einj; /* device 19 func 0 */
struct pci_dev *ch0mm; /* device 21 func 0 */
struct pci_dev *ch1mm; /* device 22 func 0 */
struct delayed_work i5100_scrubbing;
int scrub_enable;
+
+ /* Error injection */
+ u8 inject_channel;
+ u8 inject_hlinesel;
+ u8 inject_deviceptr1;
+ u8 inject_deviceptr2;
+ u16 inject_eccmask1;
+ u16 inject_eccmask2;
+
+ struct dentry *debugfs;
};
+static struct dentry *i5100_debugfs;
+
/* map a rank/chan to a slot number on the mainboard */
static int i5100_rank_to_slot(const struct mem_ctl_info *mci,
int chan, int rank)
@@ -410,14 +437,6 @@ static int i5100_csrow_to_chan(const struct mem_ctl_info *mci, int csrow)
return csrow / priv->ranksperchan;
}
-static unsigned i5100_rank_to_csrow(const struct mem_ctl_info *mci,
- int chan, int rank)
-{
- const struct i5100_priv *priv = mci->pvt_info;
-
- return chan * priv->ranksperchan + rank;
-}
-
static void i5100_handle_ce(struct mem_ctl_info *mci,
int chan,
unsigned bank,
@@ -427,17 +446,17 @@ static void i5100_handle_ce(struct mem_ctl_info *mci,
unsigned ras,
const char *msg)
{
- const int csrow = i5100_rank_to_csrow(mci, chan, rank);
+ char detail[80];
- printk(KERN_ERR
- "CE chan %d, bank %u, rank %u, syndrome 0x%lx, "
- "cas %u, ras %u, csrow %u, label \"%s\": %s\n",
- chan, bank, rank, syndrome, cas, ras,
- csrow, mci->csrows[csrow].channels[0].label, msg);
+ /* Form out message */
+ snprintf(detail, sizeof(detail),
+ "bank %u, cas %u, ras %u\n",
+ bank, cas, ras);
- mci->ce_count++;
- mci->csrows[csrow].ce_count++;
- mci->csrows[csrow].channels[0].ce_count++;
+ edac_mc_handle_error(HW_EVENT_ERR_CORRECTED, mci, 1,
+ 0, 0, syndrome,
+ chan, rank, -1,
+ msg, detail);
}
static void i5100_handle_ue(struct mem_ctl_info *mci,
@@ -449,16 +468,17 @@ static void i5100_handle_ue(struct mem_ctl_info *mci,
unsigned ras,
const char *msg)
{
- const int csrow = i5100_rank_to_csrow(mci, chan, rank);
+ char detail[80];
- printk(KERN_ERR
- "UE chan %d, bank %u, rank %u, syndrome 0x%lx, "
- "cas %u, ras %u, csrow %u, label \"%s\": %s\n",
- chan, bank, rank, syndrome, cas, ras,
- csrow, mci->csrows[csrow].channels[0].label, msg);
+ /* Form out message */
+ snprintf(detail, sizeof(detail),
+ "bank %u, cas %u, ras %u\n",
+ bank, cas, ras);
- mci->ue_count++;
- mci->csrows[csrow].ue_count++;
+ edac_mc_handle_error(HW_EVENT_ERR_UNCORRECTED, mci, 1,
+ 0, 0, syndrome,
+ chan, rank, -1,
+ msg, detail);
}
static void i5100_read_log(struct mem_ctl_info *mci, int chan,
@@ -535,23 +555,20 @@ static void i5100_read_log(struct mem_ctl_info *mci, int chan,
static void i5100_check_error(struct mem_ctl_info *mci)
{
struct i5100_priv *priv = mci->pvt_info;
- u32 dw;
-
+ u32 dw, dw2;
pci_read_config_dword(priv->mc, I5100_FERR_NF_MEM, &dw);
if (i5100_ferr_nf_mem_any(dw)) {
- u32 dw2;
pci_read_config_dword(priv->mc, I5100_NERR_NF_MEM, &dw2);
- if (dw2)
- pci_write_config_dword(priv->mc, I5100_NERR_NF_MEM,
- dw2);
- pci_write_config_dword(priv->mc, I5100_FERR_NF_MEM, dw);
i5100_read_log(mci, i5100_ferr_nf_mem_chan_indx(dw),
i5100_ferr_nf_mem_any(dw),
i5100_nerr_nf_mem_any(dw2));
+
+ pci_write_config_dword(priv->mc, I5100_NERR_NF_MEM, dw2);
}
+ pci_write_config_dword(priv->mc, I5100_FERR_NF_MEM, dw);
}
/* The i5100 chipset will scrub the entire memory once, then
@@ -611,20 +628,17 @@ static int i5100_set_scrub_rate(struct mem_ctl_info *mci, u32 bandwidth)
bandwidth = 5900000 * i5100_mc_scrben(dw);
- return 0;
+ return bandwidth;
}
-static int i5100_get_scrub_rate(struct mem_ctl_info *mci,
- u32 *bandwidth)
+static int i5100_get_scrub_rate(struct mem_ctl_info *mci)
{
struct i5100_priv *priv = mci->pvt_info;
u32 dw;
pci_read_config_dword(priv->mc, I5100_MC, &dw);
- *bandwidth = 5900000 * i5100_mc_scrben(dw);
-
- return 0;
+ return 5900000 * i5100_mc_scrben(dw);
}
static struct pci_dev *pci_get_device_func(unsigned vendor,
@@ -646,8 +660,7 @@ static struct pci_dev *pci_get_device_func(unsigned vendor,
return ret;
}
-static unsigned long __devinit i5100_npages(struct mem_ctl_info *mci,
- int csrow)
+static unsigned long i5100_npages(struct mem_ctl_info *mci, int csrow)
{
struct i5100_priv *priv = mci->pvt_info;
const unsigned chan_rank = i5100_csrow_to_rank(mci, csrow);
@@ -668,7 +681,7 @@ static unsigned long __devinit i5100_npages(struct mem_ctl_info *mci,
((unsigned long long) (1ULL << addr_lines) / PAGE_SIZE);
}
-static void __devinit i5100_init_mtr(struct mem_ctl_info *mci)
+static void i5100_init_mtr(struct mem_ctl_info *mci)
{
struct i5100_priv *priv = mci->pvt_info;
struct pci_dev *mms[2] = { priv->ch0mm, priv->ch1mm };
@@ -740,7 +753,7 @@ static int i5100_read_spd_byte(const struct mem_ctl_info *mci,
* o not the only way to may chip selects to dimm slots
* o investigate if there is some way to obtain this map from the bios
*/
-static void __devinit i5100_init_dimm_csmap(struct mem_ctl_info *mci)
+static void i5100_init_dimm_csmap(struct mem_ctl_info *mci)
{
struct i5100_priv *priv = mci->pvt_info;
int i;
@@ -770,8 +783,8 @@ static void __devinit i5100_init_dimm_csmap(struct mem_ctl_info *mci)
}
}
-static void __devinit i5100_init_dimm_layout(struct pci_dev *pdev,
- struct mem_ctl_info *mci)
+static void i5100_init_dimm_layout(struct pci_dev *pdev,
+ struct mem_ctl_info *mci)
{
struct i5100_priv *priv = mci->pvt_info;
int i;
@@ -792,8 +805,8 @@ static void __devinit i5100_init_dimm_layout(struct pci_dev *pdev,
i5100_init_dimm_csmap(mci);
}
-static void __devinit i5100_init_interleaving(struct pci_dev *pdev,
- struct mem_ctl_info *mci)
+static void i5100_init_interleaving(struct pci_dev *pdev,
+ struct mem_ctl_info *mci)
{
u16 w;
u32 dw;
@@ -838,13 +851,13 @@ static void __devinit i5100_init_interleaving(struct pci_dev *pdev,
i5100_init_mtr(mci);
}
-static void __devinit i5100_init_csrows(struct mem_ctl_info *mci)
+static void i5100_init_csrows(struct mem_ctl_info *mci)
{
int i;
- unsigned long total_pages = 0UL;
struct i5100_priv *priv = mci->pvt_info;
- for (i = 0; i < mci->nr_csrows; i++) {
+ for (i = 0; i < mci->tot_dimms; i++) {
+ struct dimm_info *dimm;
const unsigned long npages = i5100_npages(mci, i);
const unsigned chan = i5100_csrow_to_chan(mci, i);
const unsigned rank = i5100_csrow_to_rank(mci, i);
@@ -852,43 +865,143 @@ static void __devinit i5100_init_csrows(struct mem_ctl_info *mci)
if (!npages)
continue;
- /*
- * FIXME: these two are totally bogus -- I don't see how to
- * map them correctly to this structure...
- */
- mci->csrows[i].first_page = total_pages;
- mci->csrows[i].last_page = total_pages + npages - 1;
- mci->csrows[i].page_mask = 0UL;
-
- mci->csrows[i].nr_pages = npages;
- mci->csrows[i].grain = 32;
- mci->csrows[i].csrow_idx = i;
- mci->csrows[i].dtype =
- (priv->mtr[chan][rank].width == 4) ? DEV_X4 : DEV_X8;
- mci->csrows[i].ue_count = 0;
- mci->csrows[i].ce_count = 0;
- mci->csrows[i].mtype = MEM_RDDR2;
- mci->csrows[i].edac_mode = EDAC_SECDED;
- mci->csrows[i].mci = mci;
- mci->csrows[i].nr_channels = 1;
- mci->csrows[i].channels[0].chan_idx = 0;
- mci->csrows[i].channels[0].ce_count = 0;
- mci->csrows[i].channels[0].csrow = mci->csrows + i;
- snprintf(mci->csrows[i].channels[0].label,
- sizeof(mci->csrows[i].channels[0].label),
- "DIMM%u", i5100_rank_to_slot(mci, chan, rank));
-
- total_pages += npages;
+ dimm = EDAC_DIMM_PTR(mci->layers, mci->dimms, mci->n_layers,
+ chan, rank, 0);
+
+ dimm->nr_pages = npages;
+ dimm->grain = 32;
+ dimm->dtype = (priv->mtr[chan][rank].width == 4) ?
+ DEV_X4 : DEV_X8;
+ dimm->mtype = MEM_RDDR2;
+ dimm->edac_mode = EDAC_SECDED;
+ snprintf(dimm->label, sizeof(dimm->label), "DIMM%u",
+ i5100_rank_to_slot(mci, chan, rank));
+
+ edac_dbg(2, "dimm channel %d, rank %d, size %ld\n",
+ chan, rank, (long)PAGES_TO_MiB(npages));
}
}
-static int __devinit i5100_init_one(struct pci_dev *pdev,
- const struct pci_device_id *id)
+/****************************************************************************
+ * Error injection routines
+ ****************************************************************************/
+
+static void i5100_do_inject(struct mem_ctl_info *mci)
+{
+ struct i5100_priv *priv = mci->pvt_info;
+ u32 mask0;
+ u16 mask1;
+
+ /* MEM[1:0]EINJMSK0
+ * 31 - ADDRMATCHEN
+ * 29:28 - HLINESEL
+ * 00 Reserved
+ * 01 Lower half of cache line
+ * 10 Upper half of cache line
+ * 11 Both upper and lower parts of cache line
+ * 27 - EINJEN
+ * 25:19 - XORMASK1 for deviceptr1
+ * 9:5 - SEC2RAM for deviceptr2
+ * 4:0 - FIR2RAM for deviceptr1
+ */
+ mask0 = ((priv->inject_hlinesel & 0x3) << 28) |
+ I5100_MEMXEINJMSK0_EINJEN |
+ ((priv->inject_eccmask1 & 0xffff) << 10) |
+ ((priv->inject_deviceptr2 & 0x1f) << 5) |
+ (priv->inject_deviceptr1 & 0x1f);
+
+ /* MEM[1:0]EINJMSK1
+ * 15:0 - XORMASK2 for deviceptr2
+ */
+ mask1 = priv->inject_eccmask2;
+
+ if (priv->inject_channel == 0) {
+ pci_write_config_dword(priv->mc, I5100_MEM0EINJMSK0, mask0);
+ pci_write_config_word(priv->mc, I5100_MEM0EINJMSK1, mask1);
+ } else {
+ pci_write_config_dword(priv->mc, I5100_MEM1EINJMSK0, mask0);
+ pci_write_config_word(priv->mc, I5100_MEM1EINJMSK1, mask1);
+ }
+
+ /* Error Injection Response Function
+ * Intel 5100 Memory Controller Hub Chipset (318378) datasheet
+ * hints about this register but carry no data about them. All
+ * data regarding device 19 is based on experimentation and the
+ * Intel 7300 Chipset Memory Controller Hub (318082) datasheet
+ * which appears to be accurate for the i5100 in this area.
+ *
+ * The injection code don't work without setting this register.
+ * The register needs to be flipped off then on else the hardware
+ * will only preform the first injection.
+ *
+ * Stop condition bits 7:4
+ * 1010 - Stop after one injection
+ * 1011 - Never stop injecting faults
+ *
+ * Start condition bits 3:0
+ * 1010 - Never start
+ * 1011 - Start immediately
+ */
+ pci_write_config_byte(priv->einj, I5100_DINJ0, 0xaa);
+ pci_write_config_byte(priv->einj, I5100_DINJ0, 0xab);
+}
+
+#define to_mci(k) container_of(k, struct mem_ctl_info, dev)
+static ssize_t inject_enable_write(struct file *file, const char __user *data,
+ size_t count, loff_t *ppos)
+{
+ struct device *dev = file->private_data;
+ struct mem_ctl_info *mci = to_mci(dev);
+
+ i5100_do_inject(mci);
+
+ return count;
+}
+
+static const struct file_operations i5100_inject_enable_fops = {
+ .open = simple_open,
+ .write = inject_enable_write,
+ .llseek = generic_file_llseek,
+};
+
+static int i5100_setup_debugfs(struct mem_ctl_info *mci)
+{
+ struct i5100_priv *priv = mci->pvt_info;
+
+ if (!i5100_debugfs)
+ return -ENODEV;
+
+ priv->debugfs = debugfs_create_dir(mci->bus->name, i5100_debugfs);
+
+ if (!priv->debugfs)
+ return -ENOMEM;
+
+ debugfs_create_x8("inject_channel", S_IRUGO | S_IWUSR, priv->debugfs,
+ &priv->inject_channel);
+ debugfs_create_x8("inject_hlinesel", S_IRUGO | S_IWUSR, priv->debugfs,
+ &priv->inject_hlinesel);
+ debugfs_create_x8("inject_deviceptr1", S_IRUGO | S_IWUSR, priv->debugfs,
+ &priv->inject_deviceptr1);
+ debugfs_create_x8("inject_deviceptr2", S_IRUGO | S_IWUSR, priv->debugfs,
+ &priv->inject_deviceptr2);
+ debugfs_create_x16("inject_eccmask1", S_IRUGO | S_IWUSR, priv->debugfs,
+ &priv->inject_eccmask1);
+ debugfs_create_x16("inject_eccmask2", S_IRUGO | S_IWUSR, priv->debugfs,
+ &priv->inject_eccmask2);
+ debugfs_create_file("inject_enable", S_IWUSR, priv->debugfs,
+ &mci->dev, &i5100_inject_enable_fops);
+
+ return 0;
+
+}
+
+static int i5100_init_one(struct pci_dev *pdev, const struct pci_device_id *id)
{
int rc;
struct mem_ctl_info *mci;
+ struct edac_mc_layer layers[2];
struct i5100_priv *priv;
- struct pci_dev *ch0mm, *ch1mm;
+ struct pci_dev *ch0mm, *ch1mm, *einj;
int ret = 0;
u32 dw;
int ranksperch;
@@ -947,19 +1060,43 @@ static int __devinit i5100_init_one(struct pci_dev *pdev,
goto bail_ch1;
}
- mci = edac_mc_alloc(sizeof(*priv), ranksperch * 2, 1, 0);
+ layers[0].type = EDAC_MC_LAYER_CHANNEL;
+ layers[0].size = 2;
+ layers[0].is_virt_csrow = false;
+ layers[1].type = EDAC_MC_LAYER_SLOT;
+ layers[1].size = ranksperch;
+ layers[1].is_virt_csrow = true;
+ mci = edac_mc_alloc(0, ARRAY_SIZE(layers), layers,
+ sizeof(*priv));
if (!mci) {
ret = -ENOMEM;
goto bail_disable_ch1;
}
- mci->dev = &pdev->dev;
+
+ /* device 19, func 0, Error injection */
+ einj = pci_get_device_func(PCI_VENDOR_ID_INTEL,
+ PCI_DEVICE_ID_INTEL_5100_19, 0);
+ if (!einj) {
+ ret = -ENODEV;
+ goto bail_einj;
+ }
+
+ rc = pci_enable_device(einj);
+ if (rc < 0) {
+ ret = rc;
+ goto bail_disable_einj;
+ }
+
+
+ mci->pdev = &pdev->dev;
priv = mci->pvt_info;
priv->ranksperchan = ranksperch;
priv->mc = pdev;
priv->ch0mm = ch0mm;
priv->ch1mm = ch1mm;
+ priv->einj = einj;
INIT_DELAYED_WORK(&(priv->i5100_scrubbing), i5100_refresh_scrubbing);
@@ -987,6 +1124,13 @@ static int __devinit i5100_init_one(struct pci_dev *pdev,
mci->set_sdram_scrub_rate = i5100_set_scrub_rate;
mci->get_sdram_scrub_rate = i5100_get_scrub_rate;
+ priv->inject_channel = 0;
+ priv->inject_hlinesel = 0;
+ priv->inject_deviceptr1 = 0;
+ priv->inject_deviceptr2 = 0;
+ priv->inject_eccmask1 = 0;
+ priv->inject_eccmask2 = 0;
+
i5100_init_csrows(mci);
/* this strange construction seems to be in every driver, dunno why */
@@ -1004,6 +1148,8 @@ static int __devinit i5100_init_one(struct pci_dev *pdev,
goto bail_scrub;
}
+ i5100_setup_debugfs(mci);
+
return ret;
bail_scrub:
@@ -1011,6 +1157,12 @@ bail_scrub:
cancel_delayed_work_sync(&(priv->i5100_scrubbing));
edac_mc_free(mci);
+bail_disable_einj:
+ pci_disable_device(einj);
+
+bail_einj:
+ pci_dev_put(einj);
+
bail_disable_ch1:
pci_disable_device(ch1mm);
@@ -1030,7 +1182,7 @@ bail:
return ret;
}
-static void __devexit i5100_remove_one(struct pci_dev *pdev)
+static void i5100_remove_one(struct pci_dev *pdev)
{
struct mem_ctl_info *mci;
struct i5100_priv *priv;
@@ -1042,19 +1194,23 @@ static void __devexit i5100_remove_one(struct pci_dev *pdev)
priv = mci->pvt_info;
+ debugfs_remove_recursive(priv->debugfs);
+
priv->scrub_enable = 0;
cancel_delayed_work_sync(&(priv->i5100_scrubbing));
pci_disable_device(pdev);
pci_disable_device(priv->ch0mm);
pci_disable_device(priv->ch1mm);
+ pci_disable_device(priv->einj);
pci_dev_put(priv->ch0mm);
pci_dev_put(priv->ch1mm);
+ pci_dev_put(priv->einj);
edac_mc_free(mci);
}
-static const struct pci_device_id i5100_pci_tbl[] __devinitdata = {
+static const struct pci_device_id i5100_pci_tbl[] = {
/* Device 16, Function 0, Channel 0 Memory Map, Error Flag/Mask, ... */
{ PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_5100_16) },
{ 0, }
@@ -1064,7 +1220,7 @@ MODULE_DEVICE_TABLE(pci, i5100_pci_tbl);
static struct pci_driver i5100_driver = {
.name = KBUILD_BASENAME,
.probe = i5100_init_one,
- .remove = __devexit_p(i5100_remove_one),
+ .remove = i5100_remove_one,
.id_table = i5100_pci_tbl,
};
@@ -1072,13 +1228,16 @@ static int __init i5100_init(void)
{
int pci_rc;
- pci_rc = pci_register_driver(&i5100_driver);
+ i5100_debugfs = debugfs_create_dir("i5100_edac", NULL);
+ pci_rc = pci_register_driver(&i5100_driver);
return (pci_rc < 0) ? pci_rc : 0;
}
static void __exit i5100_exit(void)
{
+ debugfs_remove(i5100_debugfs);
+
pci_unregister_driver(&i5100_driver);
}
diff --git a/drivers/edac/i5400_edac.c b/drivers/edac/i5400_edac.c
index 38a9be9e1c7..6ef6ad1ba16 100644
--- a/drivers/edac/i5400_edac.c
+++ b/drivers/edac/i5400_edac.c
@@ -6,7 +6,7 @@
*
* Copyright (c) 2008 by:
* Ben Woodard <woodard@redhat.com>
- * Mauro Carvalho Chehab <mchehab@redhat.com>
+ * Mauro Carvalho Chehab
*
* Red Hat Inc. http://www.redhat.com
*
@@ -18,6 +18,10 @@
* Intel 5400 Chipset Memory Controller Hub (MCH) - Datasheet
* http://developer.intel.com/design/chipsets/datashts/313070.htm
*
+ * This Memory Controller manages DDR2 FB-DIMMs. It has 2 branches, each with
+ * 2 channels operating in lockstep no-mirror mode. Each channel can have up to
+ * 4 dimm's, each with up to 8GB.
+ *
*/
#include <linux/module.h>
@@ -33,7 +37,7 @@
/*
* Alter this version for the I5400 module when modifications are made
*/
-#define I5400_REVISION " Ver: 1.0.0 " __DATE__
+#define I5400_REVISION " Ver: 1.0.0"
#define EDAC_MOD_STR "i5400_edac"
@@ -44,12 +48,10 @@
edac_mc_chipset_printk(mci, level, "i5400", fmt, ##arg)
/* Limits for i5400 */
-#define NUM_MTRS_PER_BRANCH 4
+#define MAX_BRANCHES 2
#define CHANNELS_PER_BRANCH 2
-#define MAX_DIMMS_PER_CHANNEL NUM_MTRS_PER_BRANCH
-#define MAX_CHANNELS 4
-/* max possible csrows per channel */
-#define MAX_CSROWS (MAX_DIMMS_PER_CHANNEL)
+#define DIMMS_PER_CHANNEL 4
+#define MAX_CHANNELS (MAX_BRANCHES * CHANNELS_PER_BRANCH)
/* Device 16,
* Function 0: System Address
@@ -298,24 +300,6 @@ static inline int extract_fbdchan_indx(u32 x)
return (x>>28) & 0x3;
}
-#ifdef CONFIG_EDAC_DEBUG
-/* MTR NUMROW */
-static const char *numrow_toString[] = {
- "8,192 - 13 rows",
- "16,384 - 14 rows",
- "32,768 - 15 rows",
- "65,536 - 16 rows"
-};
-
-/* MTR NUMCOL */
-static const char *numcol_toString[] = {
- "1,024 - 10 columns",
- "2,048 - 11 columns",
- "4,096 - 12 columns",
- "reserved"
-};
-#endif
-
/* Device name and register DID (Device ID) */
struct i5400_dev_info {
const char *ctl_name; /* name for this device */
@@ -343,20 +327,26 @@ struct i5400_pvt {
struct pci_dev *branch_1; /* 22.0 */
u16 tolm; /* top of low memory */
- u64 ambase; /* AMB BAR */
+ union {
+ u64 ambase; /* AMB BAR */
+ struct {
+ u32 ambase_bottom;
+ u32 ambase_top;
+ } u __packed;
+ };
u16 mir0, mir1;
- u16 b0_mtr[NUM_MTRS_PER_BRANCH]; /* Memory Technlogy Reg */
+ u16 b0_mtr[DIMMS_PER_CHANNEL]; /* Memory Technlogy Reg */
u16 b0_ambpresent0; /* Branch 0, Channel 0 */
u16 b0_ambpresent1; /* Brnach 0, Channel 1 */
- u16 b1_mtr[NUM_MTRS_PER_BRANCH]; /* Memory Technlogy Reg */
+ u16 b1_mtr[DIMMS_PER_CHANNEL]; /* Memory Technlogy Reg */
u16 b1_ambpresent0; /* Branch 1, Channel 8 */
u16 b1_ambpresent1; /* Branch 1, Channel 1 */
/* DIMM information matrix, allocating architecture maximums */
- struct i5400_dimm_info dimm_info[MAX_CSROWS][MAX_CHANNELS];
+ struct i5400_dimm_info dimm_info[DIMMS_PER_CHANNEL][MAX_CHANNELS];
/* Actual values for this controller */
int maxch; /* Max channels */
@@ -532,13 +522,15 @@ static void i5400_proccess_non_recoverable_info(struct mem_ctl_info *mci,
int ras, cas;
int errnum;
char *type = NULL;
+ enum hw_event_mc_err_type tp_event = HW_EVENT_ERR_UNCORRECTED;
if (!allErrors)
return; /* if no error, return now */
- if (allErrors & ERROR_FAT_MASK)
+ if (allErrors & ERROR_FAT_MASK) {
type = "FATAL";
- else if (allErrors & FERR_NF_UNCORRECTABLE)
+ tp_event = HW_EVENT_ERR_FATAL;
+ } else if (allErrors & FERR_NF_UNCORRECTABLE)
type = "NON-FATAL uncorrected";
else
type = "NON-FATAL recoverable";
@@ -556,23 +548,22 @@ static void i5400_proccess_non_recoverable_info(struct mem_ctl_info *mci,
ras = nrec_ras(info);
cas = nrec_cas(info);
- debugf0("\t\tCSROW= %d Channels= %d,%d (Branch= %d "
- "DRAM Bank= %d Buffer ID = %d rdwr= %s ras= %d cas= %d)\n",
- rank, channel, channel + 1, branch >> 1, bank,
- buf_id, rdwr_str(rdwr), ras, cas);
+ edac_dbg(0, "\t\tDIMM= %d Channels= %d,%d (Branch= %d DRAM Bank= %d Buffer ID = %d rdwr= %s ras= %d cas= %d)\n",
+ rank, channel, channel + 1, branch >> 1, bank,
+ buf_id, rdwr_str(rdwr), ras, cas);
/* Only 1 bit will be on */
errnum = find_first_bit(&allErrors, ARRAY_SIZE(error_name));
/* Form out message */
snprintf(msg, sizeof(msg),
- "%s (Branch=%d DRAM-Bank=%d Buffer ID = %d RDWR=%s "
- "RAS=%d CAS=%d %s Err=0x%lx (%s))",
- type, branch >> 1, bank, buf_id, rdwr_str(rdwr), ras, cas,
- type, allErrors, error_name[errnum]);
+ "Bank=%d Buffer ID = %d RAS=%d CAS=%d Err=0x%lx (%s)",
+ bank, buf_id, ras, cas, allErrors, error_name[errnum]);
- /* Call the helper to output message */
- edac_mc_handle_fbd_ue(mci, rank, channel, channel + 1, msg);
+ edac_mc_handle_error(tp_event, mci, 1, 0, 0, 0,
+ branch >> 1, -1, rank,
+ rdwr ? "Write error" : "Read error",
+ msg);
}
/*
@@ -609,7 +600,7 @@ static void i5400_process_nonfatal_error_info(struct mem_ctl_info *mci,
/* Correctable errors */
if (allErrors & ERROR_NF_CORRECTABLE) {
- debugf0("\tCorrected bits= 0x%lx\n", allErrors);
+ edac_dbg(0, "\tCorrected bits= 0x%lx\n", allErrors);
branch = extract_fbdchan_indx(info->ferr_nf_fbd);
@@ -630,10 +621,9 @@ static void i5400_process_nonfatal_error_info(struct mem_ctl_info *mci,
/* Only 1 bit will be on */
errnum = find_first_bit(&allErrors, ARRAY_SIZE(error_name));
- debugf0("\t\tCSROW= %d Channel= %d (Branch %d "
- "DRAM Bank= %d rdwr= %s ras= %d cas= %d)\n",
- rank, channel, branch >> 1, bank,
- rdwr_str(rdwr), ras, cas);
+ edac_dbg(0, "\t\tDIMM= %d Channel= %d (Branch %d DRAM Bank= %d rdwr= %s ras= %d cas= %d)\n",
+ rank, channel, branch >> 1, bank,
+ rdwr_str(rdwr), ras, cas);
/* Form out message */
snprintf(msg, sizeof(msg),
@@ -642,13 +632,15 @@ static void i5400_process_nonfatal_error_info(struct mem_ctl_info *mci,
branch >> 1, bank, rdwr_str(rdwr), ras, cas,
allErrors, error_name[errnum]);
- /* Call the helper to output message */
- edac_mc_handle_fbd_ce(mci, rank, channel, msg);
+ edac_mc_handle_error(HW_EVENT_ERR_CORRECTED, mci, 1, 0, 0, 0,
+ branch >> 1, channel % 2, rank,
+ rdwr ? "Write error" : "Read error",
+ msg);
return;
}
- /* Miscelaneous errors */
+ /* Miscellaneous errors */
errnum = find_first_bit(&allErrors, ARRAY_SIZE(error_name));
branch = extract_fbdchan_indx(info->ferr_nf_fbd);
@@ -694,7 +686,7 @@ static void i5400_clear_error(struct mem_ctl_info *mci)
static void i5400_check_error(struct mem_ctl_info *mci)
{
struct i5400_error_info info;
- debugf4("MC%d: %s: %s()\n", mci->mc_idx, __FILE__, __func__);
+ edac_dbg(4, "MC%d\n", mci->mc_idx);
i5400_get_error_info(mci, &info);
i5400_process_error_info(mci, &info);
}
@@ -735,7 +727,7 @@ static int i5400_get_devices(struct mem_ctl_info *mci, int dev_idx)
/* Attempt to 'get' the MCH register we want */
pdev = NULL;
- while (!pvt->branchmap_werrors || !pvt->fsb_error_regs) {
+ while (1) {
pdev = pci_get_device(PCI_VENDOR_ID_INTEL,
PCI_DEVICE_ID_INTEL_5400_ERR, pdev);
if (!pdev) {
@@ -743,33 +735,53 @@ static int i5400_get_devices(struct mem_ctl_info *mci, int dev_idx)
i5400_printk(KERN_ERR,
"'system address,Process Bus' "
"device not found:"
- "vendor 0x%x device 0x%x ERR funcs "
+ "vendor 0x%x device 0x%x ERR func 1 "
"(broken BIOS?)\n",
PCI_VENDOR_ID_INTEL,
PCI_DEVICE_ID_INTEL_5400_ERR);
- goto error;
+ return -ENODEV;
}
- /* Store device 16 funcs 1 and 2 */
- switch (PCI_FUNC(pdev->devfn)) {
- case 1:
- pvt->branchmap_werrors = pdev;
+ /* Store device 16 func 1 */
+ if (PCI_FUNC(pdev->devfn) == 1)
break;
- case 2:
- pvt->fsb_error_regs = pdev;
- break;
- }
}
+ pvt->branchmap_werrors = pdev;
- debugf1("System Address, processor bus- PCI Bus ID: %s %x:%x\n",
- pci_name(pvt->system_address),
- pvt->system_address->vendor, pvt->system_address->device);
- debugf1("Branchmap, control and errors - PCI Bus ID: %s %x:%x\n",
- pci_name(pvt->branchmap_werrors),
- pvt->branchmap_werrors->vendor, pvt->branchmap_werrors->device);
- debugf1("FSB Error Regs - PCI Bus ID: %s %x:%x\n",
- pci_name(pvt->fsb_error_regs),
- pvt->fsb_error_regs->vendor, pvt->fsb_error_regs->device);
+ pdev = NULL;
+ while (1) {
+ pdev = pci_get_device(PCI_VENDOR_ID_INTEL,
+ PCI_DEVICE_ID_INTEL_5400_ERR, pdev);
+ if (!pdev) {
+ /* End of list, leave */
+ i5400_printk(KERN_ERR,
+ "'system address,Process Bus' "
+ "device not found:"
+ "vendor 0x%x device 0x%x ERR func 2 "
+ "(broken BIOS?)\n",
+ PCI_VENDOR_ID_INTEL,
+ PCI_DEVICE_ID_INTEL_5400_ERR);
+
+ pci_dev_put(pvt->branchmap_werrors);
+ return -ENODEV;
+ }
+
+ /* Store device 16 func 2 */
+ if (PCI_FUNC(pdev->devfn) == 2)
+ break;
+ }
+ pvt->fsb_error_regs = pdev;
+
+ edac_dbg(1, "System Address, processor bus- PCI Bus ID: %s %x:%x\n",
+ pci_name(pvt->system_address),
+ pvt->system_address->vendor, pvt->system_address->device);
+ edac_dbg(1, "Branchmap, control and errors - PCI Bus ID: %s %x:%x\n",
+ pci_name(pvt->branchmap_werrors),
+ pvt->branchmap_werrors->vendor,
+ pvt->branchmap_werrors->device);
+ edac_dbg(1, "FSB Error Regs - PCI Bus ID: %s %x:%x\n",
+ pci_name(pvt->fsb_error_regs),
+ pvt->fsb_error_regs->vendor, pvt->fsb_error_regs->device);
pvt->branch_0 = pci_get_device(PCI_VENDOR_ID_INTEL,
PCI_DEVICE_ID_INTEL_5400_FBD0, NULL);
@@ -778,7 +790,10 @@ static int i5400_get_devices(struct mem_ctl_info *mci, int dev_idx)
"MC: 'BRANCH 0' device not found:"
"vendor 0x%x device 0x%x Func 0 (broken BIOS?)\n",
PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_5400_FBD0);
- goto error;
+
+ pci_dev_put(pvt->fsb_error_regs);
+ pci_dev_put(pvt->branchmap_werrors);
+ return -ENODEV;
}
/* If this device claims to have more than 2 channels then
@@ -796,21 +811,21 @@ static int i5400_get_devices(struct mem_ctl_info *mci, int dev_idx)
"(broken BIOS?)\n",
PCI_VENDOR_ID_INTEL,
PCI_DEVICE_ID_INTEL_5400_FBD1);
- goto error;
+
+ pci_dev_put(pvt->branch_0);
+ pci_dev_put(pvt->fsb_error_regs);
+ pci_dev_put(pvt->branchmap_werrors);
+ return -ENODEV;
}
return 0;
-
-error:
- i5400_put_devices(mci);
- return -ENODEV;
}
/*
* determine_amb_present
*
- * the information is contained in NUM_MTRS_PER_BRANCH different
- * registers determining which of the NUM_MTRS_PER_BRANCH requires
+ * the information is contained in DIMMS_PER_CHANNEL different
+ * registers determining which of the DIMMS_PER_CHANNEL requires
* knowing which channel is in question
*
* 2 branches, each with 2 channels
@@ -839,11 +854,11 @@ static int determine_amb_present_reg(struct i5400_pvt *pvt, int channel)
}
/*
- * determine_mtr(pvt, csrow, channel)
+ * determine_mtr(pvt, dimm, channel)
*
- * return the proper MTR register as determine by the csrow and desired channel
+ * return the proper MTR register as determine by the dimm and desired channel
*/
-static int determine_mtr(struct i5400_pvt *pvt, int csrow, int channel)
+static int determine_mtr(struct i5400_pvt *pvt, int dimm, int channel)
{
int mtr;
int n;
@@ -851,11 +866,11 @@ static int determine_mtr(struct i5400_pvt *pvt, int csrow, int channel)
/* There is one MTR for each slot pair of FB-DIMMs,
Each slot pair may be at branch 0 or branch 1.
*/
- n = csrow;
+ n = dimm;
- if (n >= NUM_MTRS_PER_BRANCH) {
- debugf0("ERROR: trying to access an invalid csrow: %d\n",
- csrow);
+ if (n >= DIMMS_PER_CHANNEL) {
+ edac_dbg(0, "ERROR: trying to access an invalid dimm: %d\n",
+ dimm);
return 0;
}
@@ -875,35 +890,44 @@ static void decode_mtr(int slot_row, u16 mtr)
ans = MTR_DIMMS_PRESENT(mtr);
- debugf2("\tMTR%d=0x%x: DIMMs are %s\n", slot_row, mtr,
- ans ? "Present" : "NOT Present");
+ edac_dbg(2, "\tMTR%d=0x%x: DIMMs are %sPresent\n",
+ slot_row, mtr, ans ? "" : "NOT ");
if (!ans)
return;
- debugf2("\t\tWIDTH: x%d\n", MTR_DRAM_WIDTH(mtr));
-
- debugf2("\t\tELECTRICAL THROTTLING is %s\n",
- MTR_DIMMS_ETHROTTLE(mtr) ? "enabled" : "disabled");
-
- debugf2("\t\tNUMBANK: %d bank(s)\n", MTR_DRAM_BANKS(mtr));
- debugf2("\t\tNUMRANK: %s\n", MTR_DIMM_RANK(mtr) ? "double" : "single");
- debugf2("\t\tNUMROW: %s\n", numrow_toString[MTR_DIMM_ROWS(mtr)]);
- debugf2("\t\tNUMCOL: %s\n", numcol_toString[MTR_DIMM_COLS(mtr)]);
+ edac_dbg(2, "\t\tWIDTH: x%d\n", MTR_DRAM_WIDTH(mtr));
+
+ edac_dbg(2, "\t\tELECTRICAL THROTTLING is %s\n",
+ MTR_DIMMS_ETHROTTLE(mtr) ? "enabled" : "disabled");
+
+ edac_dbg(2, "\t\tNUMBANK: %d bank(s)\n", MTR_DRAM_BANKS(mtr));
+ edac_dbg(2, "\t\tNUMRANK: %s\n",
+ MTR_DIMM_RANK(mtr) ? "double" : "single");
+ edac_dbg(2, "\t\tNUMROW: %s\n",
+ MTR_DIMM_ROWS(mtr) == 0 ? "8,192 - 13 rows" :
+ MTR_DIMM_ROWS(mtr) == 1 ? "16,384 - 14 rows" :
+ MTR_DIMM_ROWS(mtr) == 2 ? "32,768 - 15 rows" :
+ "65,536 - 16 rows");
+ edac_dbg(2, "\t\tNUMCOL: %s\n",
+ MTR_DIMM_COLS(mtr) == 0 ? "1,024 - 10 columns" :
+ MTR_DIMM_COLS(mtr) == 1 ? "2,048 - 11 columns" :
+ MTR_DIMM_COLS(mtr) == 2 ? "4,096 - 12 columns" :
+ "reserved");
}
-static void handle_channel(struct i5400_pvt *pvt, int csrow, int channel,
+static void handle_channel(struct i5400_pvt *pvt, int dimm, int channel,
struct i5400_dimm_info *dinfo)
{
int mtr;
int amb_present_reg;
int addrBits;
- mtr = determine_mtr(pvt, csrow, channel);
+ mtr = determine_mtr(pvt, dimm, channel);
if (MTR_DIMMS_PRESENT(mtr)) {
amb_present_reg = determine_amb_present_reg(pvt, channel);
/* Determine if there is a DIMM present in this DIMM slot */
- if (amb_present_reg & (1 << csrow)) {
+ if (amb_present_reg & (1 << dimm)) {
/* Start with the number of bits for a Bank
* on the DRAM */
addrBits = MTR_DRAM_BANKS_ADDR_BITS(mtr);
@@ -932,10 +956,10 @@ static void handle_channel(struct i5400_pvt *pvt, int csrow, int channel,
static void calculate_dimm_size(struct i5400_pvt *pvt)
{
struct i5400_dimm_info *dinfo;
- int csrow, max_csrows;
+ int dimm, max_dimms;
char *p, *mem_buffer;
int space, n;
- int channel;
+ int channel, branch;
/* ================= Generate some debug output ================= */
space = PAGE_SIZE;
@@ -946,52 +970,52 @@ static void calculate_dimm_size(struct i5400_pvt *pvt)
return;
}
- /* Scan all the actual CSROWS
+ /* Scan all the actual DIMMS
* and calculate the information for each DIMM
- * Start with the highest csrow first, to display it first
- * and work toward the 0th csrow
+ * Start with the highest dimm first, to display it first
+ * and work toward the 0th dimm
*/
- max_csrows = pvt->maxdimmperch;
- for (csrow = max_csrows - 1; csrow >= 0; csrow--) {
+ max_dimms = pvt->maxdimmperch;
+ for (dimm = max_dimms - 1; dimm >= 0; dimm--) {
- /* on an odd csrow, first output a 'boundary' marker,
+ /* on an odd dimm, first output a 'boundary' marker,
* then reset the message buffer */
- if (csrow & 0x1) {
+ if (dimm & 0x1) {
n = snprintf(p, space, "---------------------------"
- "--------------------------------");
+ "-------------------------------");
p += n;
space -= n;
- debugf2("%s\n", mem_buffer);
+ edac_dbg(2, "%s\n", mem_buffer);
p = mem_buffer;
space = PAGE_SIZE;
}
- n = snprintf(p, space, "csrow %2d ", csrow);
+ n = snprintf(p, space, "dimm %2d ", dimm);
p += n;
space -= n;
for (channel = 0; channel < pvt->maxch; channel++) {
- dinfo = &pvt->dimm_info[csrow][channel];
- handle_channel(pvt, csrow, channel, dinfo);
+ dinfo = &pvt->dimm_info[dimm][channel];
+ handle_channel(pvt, dimm, channel, dinfo);
n = snprintf(p, space, "%4d MB | ", dinfo->megabytes);
p += n;
space -= n;
}
- debugf2("%s\n", mem_buffer);
+ edac_dbg(2, "%s\n", mem_buffer);
p = mem_buffer;
space = PAGE_SIZE;
}
/* Output the last bottom 'boundary' marker */
n = snprintf(p, space, "---------------------------"
- "--------------------------------");
+ "-------------------------------");
p += n;
space -= n;
- debugf2("%s\n", mem_buffer);
+ edac_dbg(2, "%s\n", mem_buffer);
p = mem_buffer;
space = PAGE_SIZE;
/* now output the 'channel' labels */
- n = snprintf(p, space, " ");
+ n = snprintf(p, space, " ");
p += n;
space -= n;
for (channel = 0; channel < pvt->maxch; channel++) {
@@ -1000,8 +1024,21 @@ static void calculate_dimm_size(struct i5400_pvt *pvt)
space -= n;
}
+ space -= n;
+ edac_dbg(2, "%s\n", mem_buffer);
+ p = mem_buffer;
+ space = PAGE_SIZE;
+
+ n = snprintf(p, space, " ");
+ p += n;
+ for (branch = 0; branch < MAX_BRANCHES; branch++) {
+ n = snprintf(p, space, " branch %d | ", branch);
+ p += n;
+ space -= n;
+ }
+
/* output the last message and free buffer */
- debugf2("%s\n", mem_buffer);
+ edac_dbg(2, "%s\n", mem_buffer);
kfree(mem_buffer);
}
@@ -1024,25 +1061,25 @@ static void i5400_get_mc_regs(struct mem_ctl_info *mci)
pvt = mci->pvt_info;
pci_read_config_dword(pvt->system_address, AMBASE,
- (u32 *) &pvt->ambase);
+ &pvt->u.ambase_bottom);
pci_read_config_dword(pvt->system_address, AMBASE + sizeof(u32),
- ((u32 *) &pvt->ambase) + sizeof(u32));
+ &pvt->u.ambase_top);
maxdimmperch = pvt->maxdimmperch;
maxch = pvt->maxch;
- debugf2("AMBASE= 0x%lx MAXCH= %d MAX-DIMM-Per-CH= %d\n",
- (long unsigned int)pvt->ambase, pvt->maxch, pvt->maxdimmperch);
+ edac_dbg(2, "AMBASE= 0x%lx MAXCH= %d MAX-DIMM-Per-CH= %d\n",
+ (long unsigned int)pvt->ambase, pvt->maxch, pvt->maxdimmperch);
/* Get the Branch Map regs */
pci_read_config_word(pvt->branchmap_werrors, TOLM, &pvt->tolm);
pvt->tolm >>= 12;
- debugf2("\nTOLM (number of 256M regions) =%u (0x%x)\n", pvt->tolm,
- pvt->tolm);
+ edac_dbg(2, "\nTOLM (number of 256M regions) =%u (0x%x)\n",
+ pvt->tolm, pvt->tolm);
actual_tolm = (u32) ((1000l * pvt->tolm) >> (30 - 28));
- debugf2("Actual TOLM byte addr=%u.%03u GB (0x%x)\n",
- actual_tolm/1000, actual_tolm % 1000, pvt->tolm << 28);
+ edac_dbg(2, "Actual TOLM byte addr=%u.%03u GB (0x%x)\n",
+ actual_tolm/1000, actual_tolm % 1000, pvt->tolm << 28);
pci_read_config_word(pvt->branchmap_werrors, MIR0, &pvt->mir0);
pci_read_config_word(pvt->branchmap_werrors, MIR1, &pvt->mir1);
@@ -1051,22 +1088,24 @@ static void i5400_get_mc_regs(struct mem_ctl_info *mci)
limit = (pvt->mir0 >> 4) & 0x0fff;
way0 = pvt->mir0 & 0x1;
way1 = pvt->mir0 & 0x2;
- debugf2("MIR0: limit= 0x%x WAY1= %u WAY0= %x\n", limit, way1, way0);
+ edac_dbg(2, "MIR0: limit= 0x%x WAY1= %u WAY0= %x\n",
+ limit, way1, way0);
limit = (pvt->mir1 >> 4) & 0xfff;
way0 = pvt->mir1 & 0x1;
way1 = pvt->mir1 & 0x2;
- debugf2("MIR1: limit= 0x%x WAY1= %u WAY0= %x\n", limit, way1, way0);
+ edac_dbg(2, "MIR1: limit= 0x%x WAY1= %u WAY0= %x\n",
+ limit, way1, way0);
/* Get the set of MTR[0-3] regs by each branch */
- for (slot_row = 0; slot_row < NUM_MTRS_PER_BRANCH; slot_row++) {
+ for (slot_row = 0; slot_row < DIMMS_PER_CHANNEL; slot_row++) {
int where = MTR0 + (slot_row * sizeof(u16));
/* Branch 0 set of MTR registers */
pci_read_config_word(pvt->branch_0, where,
&pvt->b0_mtr[slot_row]);
- debugf2("MTR%d where=0x%x B0 value=0x%x\n", slot_row, where,
- pvt->b0_mtr[slot_row]);
+ edac_dbg(2, "MTR%d where=0x%x B0 value=0x%x\n",
+ slot_row, where, pvt->b0_mtr[slot_row]);
if (pvt->maxch < CHANNELS_PER_BRANCH) {
pvt->b1_mtr[slot_row] = 0;
@@ -1076,22 +1115,22 @@ static void i5400_get_mc_regs(struct mem_ctl_info *mci)
/* Branch 1 set of MTR registers */
pci_read_config_word(pvt->branch_1, where,
&pvt->b1_mtr[slot_row]);
- debugf2("MTR%d where=0x%x B1 value=0x%x\n", slot_row, where,
- pvt->b1_mtr[slot_row]);
+ edac_dbg(2, "MTR%d where=0x%x B1 value=0x%x\n",
+ slot_row, where, pvt->b1_mtr[slot_row]);
}
/* Read and dump branch 0's MTRs */
- debugf2("\nMemory Technology Registers:\n");
- debugf2(" Branch 0:\n");
- for (slot_row = 0; slot_row < NUM_MTRS_PER_BRANCH; slot_row++)
+ edac_dbg(2, "Memory Technology Registers:\n");
+ edac_dbg(2, " Branch 0:\n");
+ for (slot_row = 0; slot_row < DIMMS_PER_CHANNEL; slot_row++)
decode_mtr(slot_row, pvt->b0_mtr[slot_row]);
pci_read_config_word(pvt->branch_0, AMBPRESENT_0,
&pvt->b0_ambpresent0);
- debugf2("\t\tAMB-Branch 0-present0 0x%x:\n", pvt->b0_ambpresent0);
+ edac_dbg(2, "\t\tAMB-Branch 0-present0 0x%x:\n", pvt->b0_ambpresent0);
pci_read_config_word(pvt->branch_0, AMBPRESENT_1,
&pvt->b0_ambpresent1);
- debugf2("\t\tAMB-Branch 0-present1 0x%x:\n", pvt->b0_ambpresent1);
+ edac_dbg(2, "\t\tAMB-Branch 0-present1 0x%x:\n", pvt->b0_ambpresent1);
/* Only if we have 2 branchs (4 channels) */
if (pvt->maxch < CHANNELS_PER_BRANCH) {
@@ -1099,18 +1138,18 @@ static void i5400_get_mc_regs(struct mem_ctl_info *mci)
pvt->b1_ambpresent1 = 0;
} else {
/* Read and dump branch 1's MTRs */
- debugf2(" Branch 1:\n");
- for (slot_row = 0; slot_row < NUM_MTRS_PER_BRANCH; slot_row++)
+ edac_dbg(2, " Branch 1:\n");
+ for (slot_row = 0; slot_row < DIMMS_PER_CHANNEL; slot_row++)
decode_mtr(slot_row, pvt->b1_mtr[slot_row]);
pci_read_config_word(pvt->branch_1, AMBPRESENT_0,
&pvt->b1_ambpresent0);
- debugf2("\t\tAMB-Branch 1-present0 0x%x:\n",
- pvt->b1_ambpresent0);
+ edac_dbg(2, "\t\tAMB-Branch 1-present0 0x%x:\n",
+ pvt->b1_ambpresent0);
pci_read_config_word(pvt->branch_1, AMBPRESENT_1,
&pvt->b1_ambpresent1);
- debugf2("\t\tAMB-Branch 1-present1 0x%x:\n",
- pvt->b1_ambpresent1);
+ edac_dbg(2, "\t\tAMB-Branch 1-present1 0x%x:\n",
+ pvt->b1_ambpresent1);
}
/* Go and determine the size of each DIMM and place in an
@@ -1119,7 +1158,7 @@ static void i5400_get_mc_regs(struct mem_ctl_info *mci)
}
/*
- * i5400_init_csrows Initialize the 'csrows' table within
+ * i5400_init_dimms Initialize the 'dimms' table within
* the mci control structure with the
* addressing of memory.
*
@@ -1127,64 +1166,67 @@ static void i5400_get_mc_regs(struct mem_ctl_info *mci)
* 0 success
* 1 no actual memory found on this MC
*/
-static int i5400_init_csrows(struct mem_ctl_info *mci)
+static int i5400_init_dimms(struct mem_ctl_info *mci)
{
struct i5400_pvt *pvt;
- struct csrow_info *p_csrow;
- int empty, channel_count;
- int max_csrows;
+ struct dimm_info *dimm;
+ int ndimms, channel_count;
+ int max_dimms;
int mtr;
- int csrow_megs;
- int channel;
- int csrow;
+ int size_mb;
+ int channel, slot;
pvt = mci->pvt_info;
channel_count = pvt->maxch;
- max_csrows = pvt->maxdimmperch;
-
- empty = 1; /* Assume NO memory */
-
- for (csrow = 0; csrow < max_csrows; csrow++) {
- p_csrow = &mci->csrows[csrow];
-
- p_csrow->csrow_idx = csrow;
-
- /* use branch 0 for the basis */
- mtr = determine_mtr(pvt, csrow, 0);
-
- /* if no DIMMS on this row, continue */
- if (!MTR_DIMMS_PRESENT(mtr))
- continue;
+ max_dimms = pvt->maxdimmperch;
- /* FAKE OUT VALUES, FIXME */
- p_csrow->first_page = 0 + csrow * 20;
- p_csrow->last_page = 9 + csrow * 20;
- p_csrow->page_mask = 0xFFF;
+ ndimms = 0;
- p_csrow->grain = 8;
-
- csrow_megs = 0;
- for (channel = 0; channel < pvt->maxch; channel++)
- csrow_megs += pvt->dimm_info[csrow][channel].megabytes;
-
- p_csrow->nr_pages = csrow_megs << 8;
-
- /* Assume DDR2 for now */
- p_csrow->mtype = MEM_FB_DDR2;
-
- /* ask what device type on this row */
- if (MTR_DRAM_WIDTH(mtr))
- p_csrow->dtype = DEV_X8;
- else
- p_csrow->dtype = DEV_X4;
-
- p_csrow->edac_mode = EDAC_S8ECD8ED;
-
- empty = 0;
+ /*
+ * FIXME: remove pvt->dimm_info[slot][channel] and use the 3
+ * layers here.
+ */
+ for (channel = 0; channel < mci->layers[0].size * mci->layers[1].size;
+ channel++) {
+ for (slot = 0; slot < mci->layers[2].size; slot++) {
+ mtr = determine_mtr(pvt, slot, channel);
+
+ /* if no DIMMS on this slot, continue */
+ if (!MTR_DIMMS_PRESENT(mtr))
+ continue;
+
+ dimm = EDAC_DIMM_PTR(mci->layers, mci->dimms, mci->n_layers,
+ channel / 2, channel % 2, slot);
+
+ size_mb = pvt->dimm_info[slot][channel].megabytes;
+
+ edac_dbg(2, "dimm (branch %d channel %d slot %d): %d.%03d GB\n",
+ channel / 2, channel % 2, slot,
+ size_mb / 1000, size_mb % 1000);
+
+ dimm->nr_pages = size_mb << 8;
+ dimm->grain = 8;
+ dimm->dtype = MTR_DRAM_WIDTH(mtr) ? DEV_X8 : DEV_X4;
+ dimm->mtype = MEM_FB_DDR2;
+ /*
+ * The eccc mechanism is SDDC (aka SECC), with
+ * is similar to Chipkill.
+ */
+ dimm->edac_mode = MTR_DRAM_WIDTH(mtr) ?
+ EDAC_S8ECD8ED : EDAC_S4ECD4ED;
+ ndimms++;
+ }
}
- return empty;
+ /*
+ * When just one memory is provided, it should be at location (0,0,0).
+ * With such single-DIMM mode, the SDCC algorithm degrades to SECDEC+.
+ */
+ if (ndimms == 1)
+ mci->dimms[0]->edac_mode = EDAC_SECDED;
+
+ return (ndimms == 0);
}
/*
@@ -1220,50 +1262,45 @@ static int i5400_probe1(struct pci_dev *pdev, int dev_idx)
{
struct mem_ctl_info *mci;
struct i5400_pvt *pvt;
- int num_channels;
- int num_dimms_per_channel;
- int num_csrows;
+ struct edac_mc_layer layers[3];
if (dev_idx >= ARRAY_SIZE(i5400_devs))
return -EINVAL;
- debugf0("MC: %s: %s(), pdev bus %u dev=0x%x fn=0x%x\n",
- __FILE__, __func__,
- pdev->bus->number,
- PCI_SLOT(pdev->devfn), PCI_FUNC(pdev->devfn));
+ edac_dbg(0, "MC: pdev bus %u dev=0x%x fn=0x%x\n",
+ pdev->bus->number,
+ PCI_SLOT(pdev->devfn), PCI_FUNC(pdev->devfn));
/* We only are looking for func 0 of the set */
if (PCI_FUNC(pdev->devfn) != 0)
return -ENODEV;
- /* As we don't have a motherboard identification routine to determine
- * actual number of slots/dimms per channel, we thus utilize the
- * resource as specified by the chipset. Thus, we might have
- * have more DIMMs per channel than actually on the mobo, but this
- * allows the driver to support upto the chipset max, without
- * some fancy mobo determination.
+ /*
+ * allocate a new MC control structure
+ *
+ * This drivers uses the DIMM slot as "csrow" and the rest as "channel".
*/
- num_dimms_per_channel = MAX_DIMMS_PER_CHANNEL;
- num_channels = MAX_CHANNELS;
- num_csrows = num_dimms_per_channel;
-
- debugf0("MC: %s(): Number of - Channels= %d DIMMS= %d CSROWS= %d\n",
- __func__, num_channels, num_dimms_per_channel, num_csrows);
-
- /* allocate a new MC control structure */
- mci = edac_mc_alloc(sizeof(*pvt), num_csrows, num_channels, 0);
-
+ layers[0].type = EDAC_MC_LAYER_BRANCH;
+ layers[0].size = MAX_BRANCHES;
+ layers[0].is_virt_csrow = false;
+ layers[1].type = EDAC_MC_LAYER_CHANNEL;
+ layers[1].size = CHANNELS_PER_BRANCH;
+ layers[1].is_virt_csrow = false;
+ layers[2].type = EDAC_MC_LAYER_SLOT;
+ layers[2].size = DIMMS_PER_CHANNEL;
+ layers[2].is_virt_csrow = true;
+ mci = edac_mc_alloc(0, ARRAY_SIZE(layers), layers, sizeof(*pvt));
if (mci == NULL)
return -ENOMEM;
- debugf0("MC: %s: %s(): mci = %p\n", __FILE__, __func__, mci);
+ edac_dbg(0, "MC: mci = %p\n", mci);
- mci->dev = &pdev->dev; /* record ptr to the generic device */
+ mci->pdev = &pdev->dev; /* record ptr to the generic device */
pvt = mci->pvt_info;
pvt->system_address = pdev; /* Record this device in our private */
- pvt->maxch = num_channels;
- pvt->maxdimmperch = num_dimms_per_channel;
+ pvt->maxch = MAX_CHANNELS;
+ pvt->maxdimmperch = DIMMS_PER_CHANNEL;
/* 'get' the pci devices we want to reserve for our use */
if (i5400_get_devices(mci, dev_idx))
@@ -1285,22 +1322,19 @@ static int i5400_probe1(struct pci_dev *pdev, int dev_idx)
/* Set the function pointer to an actual operation function */
mci->edac_check = i5400_check_error;
- /* initialize the MC control structure 'csrows' table
+ /* initialize the MC control structure 'dimms' table
* with the mapping and control information */
- if (i5400_init_csrows(mci)) {
- debugf0("MC: Setting mci->edac_cap to EDAC_FLAG_NONE\n"
- " because i5400_init_csrows() returned nonzero "
- "value\n");
- mci->edac_cap = EDAC_FLAG_NONE; /* no csrows found */
+ if (i5400_init_dimms(mci)) {
+ edac_dbg(0, "MC: Setting mci->edac_cap to EDAC_FLAG_NONE because i5400_init_dimms() returned nonzero value\n");
+ mci->edac_cap = EDAC_FLAG_NONE; /* no dimms found */
} else {
- debugf1("MC: Enable error reporting now\n");
+ edac_dbg(1, "MC: Enable error reporting now\n");
i5400_enable_error_reporting(mci);
}
/* add this new MC control structure to EDAC's list of MCs */
if (edac_mc_add_mc(mci)) {
- debugf0("MC: %s: %s(): failed edac_mc_add_mc()\n",
- __FILE__, __func__);
+ edac_dbg(0, "MC: failed edac_mc_add_mc()\n");
/* FIXME: perhaps some code should go here that disables error
* reporting if we just enabled it
*/
@@ -1339,12 +1373,11 @@ fail0:
* negative on error
* count (>= 0)
*/
-static int __devinit i5400_init_one(struct pci_dev *pdev,
- const struct pci_device_id *id)
+static int i5400_init_one(struct pci_dev *pdev, const struct pci_device_id *id)
{
int rc;
- debugf0("MC: %s: %s()\n", __FILE__, __func__);
+ edac_dbg(0, "MC:\n");
/* wake up device */
rc = pci_enable_device(pdev);
@@ -1359,11 +1392,11 @@ static int __devinit i5400_init_one(struct pci_dev *pdev,
* i5400_remove_one destructor for one instance of device
*
*/
-static void __devexit i5400_remove_one(struct pci_dev *pdev)
+static void i5400_remove_one(struct pci_dev *pdev)
{
struct mem_ctl_info *mci;
- debugf0("%s: %s()\n", __FILE__, __func__);
+ edac_dbg(0, "\n");
if (i5400_pci)
edac_pci_release_generic_ctl(i5400_pci);
@@ -1375,6 +1408,8 @@ static void __devexit i5400_remove_one(struct pci_dev *pdev)
/* retrieve references to resources, and free those resources */
i5400_put_devices(mci);
+ pci_disable_device(pdev);
+
edac_mc_free(mci);
}
@@ -1383,7 +1418,7 @@ static void __devexit i5400_remove_one(struct pci_dev *pdev)
*
* The "E500P" device is the first device supported.
*/
-static const struct pci_device_id i5400_pci_tbl[] __devinitdata = {
+static const struct pci_device_id i5400_pci_tbl[] = {
{PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_5400_ERR)},
{0,} /* 0 terminated list. */
};
@@ -1397,7 +1432,7 @@ MODULE_DEVICE_TABLE(pci, i5400_pci_tbl);
static struct pci_driver i5400_driver = {
.name = "i5400_edac",
.probe = i5400_init_one,
- .remove = __devexit_p(i5400_remove_one),
+ .remove = i5400_remove_one,
.id_table = i5400_pci_tbl,
};
@@ -1409,7 +1444,7 @@ static int __init i5400_init(void)
{
int pci_rc;
- debugf2("MC: %s: %s()\n", __FILE__, __func__);
+ edac_dbg(2, "MC:\n");
/* Ensure that the OPSTATE is set correctly for POLL or NMI */
opstate_init();
@@ -1425,7 +1460,7 @@ static int __init i5400_init(void)
*/
static void __exit i5400_exit(void)
{
- debugf2("MC: %s: %s()\n", __FILE__, __func__);
+ edac_dbg(2, "MC:\n");
pci_unregister_driver(&i5400_driver);
}
@@ -1434,7 +1469,7 @@ module_exit(i5400_exit);
MODULE_LICENSE("GPL");
MODULE_AUTHOR("Ben Woodard <woodard@redhat.com>");
-MODULE_AUTHOR("Mauro Carvalho Chehab <mchehab@redhat.com>");
+MODULE_AUTHOR("Mauro Carvalho Chehab");
MODULE_AUTHOR("Red Hat Inc. (http://www.redhat.com)");
MODULE_DESCRIPTION("MC Driver for Intel I5400 memory controllers - "
I5400_REVISION);
diff --git a/drivers/edac/i7300_edac.c b/drivers/edac/i7300_edac.c
index 05523b50427..dcac982fdc7 100644
--- a/drivers/edac/i7300_edac.c
+++ b/drivers/edac/i7300_edac.c
@@ -5,7 +5,7 @@
* GNU General Public License version 2 only.
*
* Copyright (c) 2010 by:
- * Mauro Carvalho Chehab <mchehab@redhat.com>
+ * Mauro Carvalho Chehab
*
* Red Hat Inc. http://www.redhat.com
*
@@ -31,7 +31,7 @@
/*
* Alter this version for the I7300 module when modifications are made
*/
-#define I7300_REVISION " Ver: 1.0.0 " __DATE__
+#define I7300_REVISION " Ver: 1.0.0"
#define EDAC_MOD_STR "i7300_edac"
@@ -162,7 +162,7 @@ static struct edac_pci_ctl_info *i7300_pci;
#define AMBPRESENT_0 0x64
#define AMBPRESENT_1 0x66
-const static u16 mtr_regs[MAX_SLOTS] = {
+static const u16 mtr_regs[MAX_SLOTS] = {
0x80, 0x84, 0x88, 0x8c,
0x82, 0x86, 0x8a, 0x8e
};
@@ -182,24 +182,6 @@ const static u16 mtr_regs[MAX_SLOTS] = {
#define MTR_DIMM_COLS(mtr) ((mtr) & 0x3)
#define MTR_DIMM_COLS_ADDR_BITS(mtr) (MTR_DIMM_COLS(mtr) + 10)
-#ifdef CONFIG_EDAC_DEBUG
-/* MTR NUMROW */
-static const char *numrow_toString[] = {
- "8,192 - 13 rows",
- "16,384 - 14 rows",
- "32,768 - 15 rows",
- "65,536 - 16 rows"
-};
-
-/* MTR NUMCOL */
-static const char *numcol_toString[] = {
- "1,024 - 10 columns",
- "2,048 - 11 columns",
- "4,096 - 12 columns",
- "reserved"
-};
-#endif
-
/************************************************
* i7300 Register definitions for error detection
************************************************/
@@ -215,8 +197,8 @@ static const char *ferr_fat_fbd_name[] = {
[0] = "Memory Write error on non-redundant retry or "
"FBD configuration Write error on retry",
};
-#define GET_FBD_FAT_IDX(fbderr) (fbderr & (3 << 28))
-#define FERR_FAT_FBD_ERR_MASK ((1 << 0) | (1 << 1) | (1 << 2) | (1 << 3))
+#define GET_FBD_FAT_IDX(fbderr) (((fbderr) >> 28) & 3)
+#define FERR_FAT_FBD_ERR_MASK ((1 << 0) | (1 << 1) | (1 << 2) | (1 << 22))
#define FERR_NF_FBD 0xa0
static const char *ferr_nf_fbd_name[] = {
@@ -243,7 +225,7 @@ static const char *ferr_nf_fbd_name[] = {
[1] = "Aliased Uncorrectable Non-Mirrored Demand Data ECC",
[0] = "Uncorrectable Data ECC on Replay",
};
-#define GET_FBD_NF_IDX(fbderr) (fbderr & (3 << 28))
+#define GET_FBD_NF_IDX(fbderr) (((fbderr) >> 28) & 3)
#define FERR_NF_FBD_ERR_MASK ((1 << 24) | (1 << 23) | (1 << 22) | (1 << 21) |\
(1 << 18) | (1 << 17) | (1 << 16) | (1 << 15) |\
(1 << 14) | (1 << 13) | (1 << 11) | (1 << 10) |\
@@ -372,7 +354,7 @@ static const char *get_err_from_table(const char *table[], int size, int pos)
static void i7300_process_error_global(struct mem_ctl_info *mci)
{
struct i7300_pvt *pvt;
- u32 errnum, value;
+ u32 errnum, error_reg;
unsigned long errors;
const char *specific;
bool is_fatal;
@@ -381,9 +363,9 @@ static void i7300_process_error_global(struct mem_ctl_info *mci)
/* read in the 1st FATAL error register */
pci_read_config_dword(pvt->pci_dev_16_2_fsb_err_regs,
- FERR_GLOBAL_HI, &value);
- if (unlikely(value)) {
- errors = value;
+ FERR_GLOBAL_HI, &error_reg);
+ if (unlikely(error_reg)) {
+ errors = error_reg;
errnum = find_first_bit(&errors,
ARRAY_SIZE(ferr_global_hi_name));
specific = GET_ERR_FROM_TABLE(ferr_global_hi_name, errnum);
@@ -391,15 +373,15 @@ static void i7300_process_error_global(struct mem_ctl_info *mci)
/* Clear the error bit */
pci_write_config_dword(pvt->pci_dev_16_2_fsb_err_regs,
- FERR_GLOBAL_HI, value);
+ FERR_GLOBAL_HI, error_reg);
goto error_global;
}
pci_read_config_dword(pvt->pci_dev_16_2_fsb_err_regs,
- FERR_GLOBAL_LO, &value);
- if (unlikely(value)) {
- errors = value;
+ FERR_GLOBAL_LO, &error_reg);
+ if (unlikely(error_reg)) {
+ errors = error_reg;
errnum = find_first_bit(&errors,
ARRAY_SIZE(ferr_global_lo_name));
specific = GET_ERR_FROM_TABLE(ferr_global_lo_name, errnum);
@@ -407,7 +389,7 @@ static void i7300_process_error_global(struct mem_ctl_info *mci)
/* Clear the error bit */
pci_write_config_dword(pvt->pci_dev_16_2_fsb_err_regs,
- FERR_GLOBAL_LO, value);
+ FERR_GLOBAL_LO, error_reg);
goto error_global;
}
@@ -427,7 +409,7 @@ error_global:
static void i7300_process_fbd_error(struct mem_ctl_info *mci)
{
struct i7300_pvt *pvt;
- u32 errnum, value;
+ u32 errnum, value, error_reg;
u16 val16;
unsigned branch, channel, bank, rank, cas, ras;
u32 syndrome;
@@ -440,14 +422,14 @@ static void i7300_process_fbd_error(struct mem_ctl_info *mci)
/* read in the 1st FATAL error register */
pci_read_config_dword(pvt->pci_dev_16_1_fsb_addr_map,
- FERR_FAT_FBD, &value);
- if (unlikely(value & FERR_FAT_FBD_ERR_MASK)) {
- errors = value & FERR_FAT_FBD_ERR_MASK ;
+ FERR_FAT_FBD, &error_reg);
+ if (unlikely(error_reg & FERR_FAT_FBD_ERR_MASK)) {
+ errors = error_reg & FERR_FAT_FBD_ERR_MASK ;
errnum = find_first_bit(&errors,
ARRAY_SIZE(ferr_fat_fbd_name));
specific = GET_ERR_FROM_TABLE(ferr_fat_fbd_name, errnum);
+ branch = (GET_FBD_FAT_IDX(error_reg) == 2) ? 1 : 0;
- branch = (GET_FBD_FAT_IDX(value) == 2) ? 1 : 0;
pci_read_config_word(pvt->pci_dev_16_1_fsb_addr_map,
NRECMEMA, &val16);
bank = NRECMEMA_BANK(val16);
@@ -455,42 +437,38 @@ static void i7300_process_fbd_error(struct mem_ctl_info *mci)
pci_read_config_dword(pvt->pci_dev_16_1_fsb_addr_map,
NRECMEMB, &value);
-
is_wr = NRECMEMB_IS_WR(value);
cas = NRECMEMB_CAS(value);
ras = NRECMEMB_RAS(value);
+ /* Clean the error register */
+ pci_write_config_dword(pvt->pci_dev_16_1_fsb_addr_map,
+ FERR_FAT_FBD, error_reg);
+
snprintf(pvt->tmp_prt_buffer, PAGE_SIZE,
- "FATAL (Branch=%d DRAM-Bank=%d %s "
- "RAS=%d CAS=%d Err=0x%lx (%s))",
- branch, bank,
- is_wr ? "RDWR" : "RD",
- ras, cas,
- errors, specific);
-
- /* Call the helper to output message */
- edac_mc_handle_fbd_ue(mci, rank, branch << 1,
- (branch << 1) + 1,
- pvt->tmp_prt_buffer);
+ "Bank=%d RAS=%d CAS=%d Err=0x%lx (%s))",
+ bank, ras, cas, errors, specific);
+
+ edac_mc_handle_error(HW_EVENT_ERR_FATAL, mci, 1, 0, 0, 0,
+ branch, -1, rank,
+ is_wr ? "Write error" : "Read error",
+ pvt->tmp_prt_buffer);
+
}
/* read in the 1st NON-FATAL error register */
pci_read_config_dword(pvt->pci_dev_16_1_fsb_addr_map,
- FERR_NF_FBD, &value);
- if (unlikely(value & FERR_NF_FBD_ERR_MASK)) {
- errors = value & FERR_NF_FBD_ERR_MASK;
+ FERR_NF_FBD, &error_reg);
+ if (unlikely(error_reg & FERR_NF_FBD_ERR_MASK)) {
+ errors = error_reg & FERR_NF_FBD_ERR_MASK;
errnum = find_first_bit(&errors,
ARRAY_SIZE(ferr_nf_fbd_name));
specific = GET_ERR_FROM_TABLE(ferr_nf_fbd_name, errnum);
-
- /* Clear the error bit */
- pci_write_config_dword(pvt->pci_dev_16_2_fsb_err_regs,
- FERR_GLOBAL_LO, value);
+ branch = (GET_FBD_NF_IDX(error_reg) == 2) ? 1 : 0;
pci_read_config_dword(pvt->pci_dev_16_1_fsb_addr_map,
REDMEMA, &syndrome);
- branch = (GET_FBD_FAT_IDX(value) == 2) ? 1 : 0;
pci_read_config_word(pvt->pci_dev_16_1_fsb_addr_map,
RECMEMA, &val16);
bank = RECMEMA_BANK(val16);
@@ -498,37 +476,30 @@ static void i7300_process_fbd_error(struct mem_ctl_info *mci)
pci_read_config_dword(pvt->pci_dev_16_1_fsb_addr_map,
RECMEMB, &value);
-
is_wr = RECMEMB_IS_WR(value);
cas = RECMEMB_CAS(value);
ras = RECMEMB_RAS(value);
pci_read_config_dword(pvt->pci_dev_16_1_fsb_addr_map,
REDMEMB, &value);
-
channel = (branch << 1);
if (IS_SECOND_CH(value))
channel++;
+ /* Clear the error bit */
+ pci_write_config_dword(pvt->pci_dev_16_1_fsb_addr_map,
+ FERR_NF_FBD, error_reg);
+
/* Form out message */
snprintf(pvt->tmp_prt_buffer, PAGE_SIZE,
- "Corrected error (Branch=%d, Channel %d), "
- " DRAM-Bank=%d %s "
- "RAS=%d CAS=%d, CE Err=0x%lx, Syndrome=0x%08x(%s))",
- branch, channel,
- bank,
- is_wr ? "RDWR" : "RD",
- ras, cas,
- errors, syndrome, specific);
-
- /*
- * Call the helper to output message
- * NOTE: Errors are reported per-branch, and not per-channel
- * Currently, we don't know how to identify the right
- * channel.
- */
- edac_mc_handle_fbd_ce(mci, rank, channel,
- pvt->tmp_prt_buffer);
+ "DRAM-Bank=%d RAS=%d CAS=%d, Err=0x%lx (%s))",
+ bank, ras, cas, errors, specific);
+
+ edac_mc_handle_error(HW_EVENT_ERR_CORRECTED, mci, 1, 0, 0,
+ syndrome,
+ branch >> 1, channel % 2, rank,
+ is_wr ? "Write error" : "Read error",
+ pvt->tmp_prt_buffer);
}
return;
}
@@ -616,8 +587,7 @@ static void i7300_enable_error_reporting(struct mem_ctl_info *mci)
static int decode_mtr(struct i7300_pvt *pvt,
int slot, int ch, int branch,
struct i7300_dimm_info *dinfo,
- struct csrow_info *p_csrow,
- u32 *nr_pages)
+ struct dimm_info *dimm)
{
int mtr, ans, addrBits, channel;
@@ -626,9 +596,8 @@ static int decode_mtr(struct i7300_pvt *pvt,
mtr = pvt->mtr[slot][branch];
ans = MTR_DIMMS_PRESENT(mtr) ? 1 : 0;
- debugf2("\tMTR%d CH%d: DIMMs are %s (mtr)\n",
- slot, channel,
- ans ? "Present" : "NOT Present");
+ edac_dbg(2, "\tMTR%d CH%d: DIMMs are %sPresent (mtr)\n",
+ slot, channel, ans ? "" : "NOT ");
/* Determine if there is a DIMM present in this DIMM slot */
if (!ans)
@@ -649,23 +618,26 @@ static int decode_mtr(struct i7300_pvt *pvt,
addrBits -= 3; /* 8 bits per bytes */
dinfo->megabytes = 1 << addrBits;
- *nr_pages = dinfo->megabytes << 8;
-
- debugf2("\t\tWIDTH: x%d\n", MTR_DRAM_WIDTH(mtr));
- debugf2("\t\tELECTRICAL THROTTLING is %s\n",
- MTR_DIMMS_ETHROTTLE(mtr) ? "enabled" : "disabled");
-
- debugf2("\t\tNUMBANK: %d bank(s)\n", MTR_DRAM_BANKS(mtr));
- debugf2("\t\tNUMRANK: %s\n", MTR_DIMM_RANKS(mtr) ? "double" : "single");
- debugf2("\t\tNUMROW: %s\n", numrow_toString[MTR_DIMM_ROWS(mtr)]);
- debugf2("\t\tNUMCOL: %s\n", numcol_toString[MTR_DIMM_COLS(mtr)]);
- debugf2("\t\tSIZE: %d MB\n", dinfo->megabytes);
-
- p_csrow->grain = 8;
- p_csrow->mtype = MEM_FB_DDR2;
- p_csrow->csrow_idx = slot;
- p_csrow->page_mask = 0;
+ edac_dbg(2, "\t\tWIDTH: x%d\n", MTR_DRAM_WIDTH(mtr));
+
+ edac_dbg(2, "\t\tELECTRICAL THROTTLING is %s\n",
+ MTR_DIMMS_ETHROTTLE(mtr) ? "enabled" : "disabled");
+
+ edac_dbg(2, "\t\tNUMBANK: %d bank(s)\n", MTR_DRAM_BANKS(mtr));
+ edac_dbg(2, "\t\tNUMRANK: %s\n",
+ MTR_DIMM_RANKS(mtr) ? "double" : "single");
+ edac_dbg(2, "\t\tNUMROW: %s\n",
+ MTR_DIMM_ROWS(mtr) == 0 ? "8,192 - 13 rows" :
+ MTR_DIMM_ROWS(mtr) == 1 ? "16,384 - 14 rows" :
+ MTR_DIMM_ROWS(mtr) == 2 ? "32,768 - 15 rows" :
+ "65,536 - 16 rows");
+ edac_dbg(2, "\t\tNUMCOL: %s\n",
+ MTR_DIMM_COLS(mtr) == 0 ? "1,024 - 10 columns" :
+ MTR_DIMM_COLS(mtr) == 1 ? "2,048 - 11 columns" :
+ MTR_DIMM_COLS(mtr) == 2 ? "4,096 - 12 columns" :
+ "reserved");
+ edac_dbg(2, "\t\tSIZE: %d MB\n", dinfo->megabytes);
/*
* The type of error detection actually depends of the
@@ -676,26 +648,29 @@ static int decode_mtr(struct i7300_pvt *pvt,
* See datasheet Sections 7.3.6 to 7.3.8
*/
+ dimm->nr_pages = MiB_TO_PAGES(dinfo->megabytes);
+ dimm->grain = 8;
+ dimm->mtype = MEM_FB_DDR2;
if (IS_SINGLE_MODE(pvt->mc_settings_a)) {
- p_csrow->edac_mode = EDAC_SECDED;
- debugf2("\t\tECC code is 8-byte-over-32-byte SECDED+ code\n");
+ dimm->edac_mode = EDAC_SECDED;
+ edac_dbg(2, "\t\tECC code is 8-byte-over-32-byte SECDED+ code\n");
} else {
- debugf2("\t\tECC code is on Lockstep mode\n");
+ edac_dbg(2, "\t\tECC code is on Lockstep mode\n");
if (MTR_DRAM_WIDTH(mtr) == 8)
- p_csrow->edac_mode = EDAC_S8ECD8ED;
+ dimm->edac_mode = EDAC_S8ECD8ED;
else
- p_csrow->edac_mode = EDAC_S4ECD4ED;
+ dimm->edac_mode = EDAC_S4ECD4ED;
}
/* ask what device type on this row */
if (MTR_DRAM_WIDTH(mtr) == 8) {
- debugf2("\t\tScrub algorithm for x8 is on %s mode\n",
- IS_SCRBALGO_ENHANCED(pvt->mc_settings) ?
- "enhanced" : "normal");
+ edac_dbg(2, "\t\tScrub algorithm for x8 is on %s mode\n",
+ IS_SCRBALGO_ENHANCED(pvt->mc_settings) ?
+ "enhanced" : "normal");
- p_csrow->dtype = DEV_X8;
+ dimm->dtype = DEV_X8;
} else
- p_csrow->dtype = DEV_X4;
+ dimm->dtype = DEV_X4;
return mtr;
}
@@ -725,14 +700,14 @@ static void print_dimm_size(struct i7300_pvt *pvt)
p += n;
space -= n;
}
- debugf2("%s\n", pvt->tmp_prt_buffer);
+ edac_dbg(2, "%s\n", pvt->tmp_prt_buffer);
p = pvt->tmp_prt_buffer;
space = PAGE_SIZE;
n = snprintf(p, space, "-------------------------------"
"------------------------------");
p += n;
space -= n;
- debugf2("%s\n", pvt->tmp_prt_buffer);
+ edac_dbg(2, "%s\n", pvt->tmp_prt_buffer);
p = pvt->tmp_prt_buffer;
space = PAGE_SIZE;
@@ -748,7 +723,7 @@ static void print_dimm_size(struct i7300_pvt *pvt)
space -= n;
}
- debugf2("%s\n", pvt->tmp_prt_buffer);
+ edac_dbg(2, "%s\n", pvt->tmp_prt_buffer);
p = pvt->tmp_prt_buffer;
space = PAGE_SIZE;
}
@@ -757,7 +732,7 @@ static void print_dimm_size(struct i7300_pvt *pvt)
"------------------------------");
p += n;
space -= n;
- debugf2("%s\n", pvt->tmp_prt_buffer);
+ edac_dbg(2, "%s\n", pvt->tmp_prt_buffer);
p = pvt->tmp_prt_buffer;
space = PAGE_SIZE;
#endif
@@ -773,60 +748,68 @@ static int i7300_init_csrows(struct mem_ctl_info *mci)
{
struct i7300_pvt *pvt;
struct i7300_dimm_info *dinfo;
- struct csrow_info *p_csrow;
int rc = -ENODEV;
int mtr;
- int ch, branch, slot, channel;
- u32 last_page = 0, nr_pages;
+ int ch, branch, slot, channel, max_channel, max_branch;
+ struct dimm_info *dimm;
pvt = mci->pvt_info;
- debugf2("Memory Technology Registers:\n");
+ edac_dbg(2, "Memory Technology Registers:\n");
+
+ if (IS_SINGLE_MODE(pvt->mc_settings_a)) {
+ max_branch = 1;
+ max_channel = 1;
+ } else {
+ max_branch = MAX_BRANCHES;
+ max_channel = MAX_CH_PER_BRANCH;
+ }
/* Get the AMB present registers for the four channels */
- for (branch = 0; branch < MAX_BRANCHES; branch++) {
+ for (branch = 0; branch < max_branch; branch++) {
/* Read and dump branch 0's MTRs */
channel = to_channel(0, branch);
pci_read_config_word(pvt->pci_dev_2x_0_fbd_branch[branch],
AMBPRESENT_0,
&pvt->ambpresent[channel]);
- debugf2("\t\tAMB-present CH%d = 0x%x:\n",
- channel, pvt->ambpresent[channel]);
+ edac_dbg(2, "\t\tAMB-present CH%d = 0x%x:\n",
+ channel, pvt->ambpresent[channel]);
+
+ if (max_channel == 1)
+ continue;
channel = to_channel(1, branch);
pci_read_config_word(pvt->pci_dev_2x_0_fbd_branch[branch],
AMBPRESENT_1,
&pvt->ambpresent[channel]);
- debugf2("\t\tAMB-present CH%d = 0x%x:\n",
- channel, pvt->ambpresent[channel]);
+ edac_dbg(2, "\t\tAMB-present CH%d = 0x%x:\n",
+ channel, pvt->ambpresent[channel]);
}
/* Get the set of MTR[0-7] regs by each branch */
for (slot = 0; slot < MAX_SLOTS; slot++) {
int where = mtr_regs[slot];
- for (branch = 0; branch < MAX_BRANCHES; branch++) {
+ for (branch = 0; branch < max_branch; branch++) {
pci_read_config_word(pvt->pci_dev_2x_0_fbd_branch[branch],
where,
&pvt->mtr[slot][branch]);
- for (ch = 0; ch < MAX_BRANCHES; ch++) {
+ for (ch = 0; ch < max_channel; ch++) {
int channel = to_channel(ch, branch);
+ dimm = EDAC_DIMM_PTR(mci->layers, mci->dimms,
+ mci->n_layers, branch, ch, slot);
+
dinfo = &pvt->dimm_info[slot][channel];
- p_csrow = &mci->csrows[slot];
mtr = decode_mtr(pvt, slot, ch, branch,
- dinfo, p_csrow, &nr_pages);
+ dinfo, dimm);
+
/* if no DIMMS on this row, continue */
if (!MTR_DIMMS_PRESENT(mtr))
continue;
- /* Update per_csrow memory count */
- p_csrow->nr_pages += nr_pages;
- p_csrow->first_page = last_page;
- last_page += nr_pages;
- p_csrow->last_page = last_page;
-
rc = 0;
+
}
}
}
@@ -842,12 +825,11 @@ static int i7300_init_csrows(struct mem_ctl_info *mci)
static void decode_mir(int mir_no, u16 mir[MAX_MIR])
{
if (mir[mir_no] & 3)
- debugf2("MIR%d: limit= 0x%x Branch(es) that participate:"
- " %s %s\n",
- mir_no,
- (mir[mir_no] >> 4) & 0xfff,
- (mir[mir_no] & 1) ? "B0" : "",
- (mir[mir_no] & 2) ? "B1" : "");
+ edac_dbg(2, "MIR%d: limit= 0x%x Branch(es) that participate: %s %s\n",
+ mir_no,
+ (mir[mir_no] >> 4) & 0xfff,
+ (mir[mir_no] & 1) ? "B0" : "",
+ (mir[mir_no] & 2) ? "B1" : "");
}
/**
@@ -867,17 +849,17 @@ static int i7300_get_mc_regs(struct mem_ctl_info *mci)
pci_read_config_dword(pvt->pci_dev_16_0_fsb_ctlr, AMBASE,
(u32 *) &pvt->ambase);
- debugf2("AMBASE= 0x%lx\n", (long unsigned int)pvt->ambase);
+ edac_dbg(2, "AMBASE= 0x%lx\n", (long unsigned int)pvt->ambase);
/* Get the Branch Map regs */
pci_read_config_word(pvt->pci_dev_16_1_fsb_addr_map, TOLM, &pvt->tolm);
pvt->tolm >>= 12;
- debugf2("TOLM (number of 256M regions) =%u (0x%x)\n", pvt->tolm,
- pvt->tolm);
+ edac_dbg(2, "TOLM (number of 256M regions) =%u (0x%x)\n",
+ pvt->tolm, pvt->tolm);
actual_tolm = (u32) ((1000l * pvt->tolm) >> (30 - 28));
- debugf2("Actual TOLM byte addr=%u.%03u GB (0x%x)\n",
- actual_tolm/1000, actual_tolm % 1000, pvt->tolm << 28);
+ edac_dbg(2, "Actual TOLM byte addr=%u.%03u GB (0x%x)\n",
+ actual_tolm/1000, actual_tolm % 1000, pvt->tolm << 28);
/* Get memory controller settings */
pci_read_config_dword(pvt->pci_dev_16_1_fsb_addr_map, MC_SETTINGS,
@@ -886,15 +868,15 @@ static int i7300_get_mc_regs(struct mem_ctl_info *mci)
&pvt->mc_settings_a);
if (IS_SINGLE_MODE(pvt->mc_settings_a))
- debugf0("Memory controller operating on single mode\n");
+ edac_dbg(0, "Memory controller operating on single mode\n");
else
- debugf0("Memory controller operating on %s mode\n",
- IS_MIRRORED(pvt->mc_settings) ? "mirrored" : "non-mirrored");
+ edac_dbg(0, "Memory controller operating on %smirrored mode\n",
+ IS_MIRRORED(pvt->mc_settings) ? "" : "non-");
- debugf0("Error detection is %s\n",
- IS_ECC_ENABLED(pvt->mc_settings) ? "enabled" : "disabled");
- debugf0("Retry is %s\n",
- IS_RETRY_ENABLED(pvt->mc_settings) ? "enabled" : "disabled");
+ edac_dbg(0, "Error detection is %s\n",
+ IS_ECC_ENABLED(pvt->mc_settings) ? "enabled" : "disabled");
+ edac_dbg(0, "Retry is %s\n",
+ IS_RETRY_ENABLED(pvt->mc_settings) ? "enabled" : "disabled");
/* Get Memory Interleave Range registers */
pci_read_config_word(pvt->pci_dev_16_1_fsb_addr_map, MIR0,
@@ -952,7 +934,7 @@ static void i7300_put_devices(struct mem_ctl_info *mci)
* Device 21 function 0: PCI_DEVICE_ID_INTEL_I7300_MCH_FB0
* Device 22 function 0: PCI_DEVICE_ID_INTEL_I7300_MCH_FB1
*/
-static int __devinit i7300_get_devices(struct mem_ctl_info *mci)
+static int i7300_get_devices(struct mem_ctl_info *mci)
{
struct i7300_pvt *pvt;
struct pci_dev *pdev;
@@ -961,45 +943,47 @@ static int __devinit i7300_get_devices(struct mem_ctl_info *mci)
/* Attempt to 'get' the MCH register we want */
pdev = NULL;
- while (!pvt->pci_dev_16_1_fsb_addr_map ||
- !pvt->pci_dev_16_2_fsb_err_regs) {
- pdev = pci_get_device(PCI_VENDOR_ID_INTEL,
- PCI_DEVICE_ID_INTEL_I7300_MCH_ERR, pdev);
- if (!pdev) {
- /* End of list, leave */
- i7300_printk(KERN_ERR,
- "'system address,Process Bus' "
- "device not found:"
- "vendor 0x%x device 0x%x ERR funcs "
- "(broken BIOS?)\n",
- PCI_VENDOR_ID_INTEL,
- PCI_DEVICE_ID_INTEL_I7300_MCH_ERR);
- goto error;
- }
-
+ while ((pdev = pci_get_device(PCI_VENDOR_ID_INTEL,
+ PCI_DEVICE_ID_INTEL_I7300_MCH_ERR,
+ pdev))) {
/* Store device 16 funcs 1 and 2 */
switch (PCI_FUNC(pdev->devfn)) {
case 1:
- pvt->pci_dev_16_1_fsb_addr_map = pdev;
+ if (!pvt->pci_dev_16_1_fsb_addr_map)
+ pvt->pci_dev_16_1_fsb_addr_map =
+ pci_dev_get(pdev);
break;
case 2:
- pvt->pci_dev_16_2_fsb_err_regs = pdev;
+ if (!pvt->pci_dev_16_2_fsb_err_regs)
+ pvt->pci_dev_16_2_fsb_err_regs =
+ pci_dev_get(pdev);
break;
}
}
- debugf1("System Address, processor bus- PCI Bus ID: %s %x:%x\n",
- pci_name(pvt->pci_dev_16_0_fsb_ctlr),
- pvt->pci_dev_16_0_fsb_ctlr->vendor,
- pvt->pci_dev_16_0_fsb_ctlr->device);
- debugf1("Branchmap, control and errors - PCI Bus ID: %s %x:%x\n",
- pci_name(pvt->pci_dev_16_1_fsb_addr_map),
- pvt->pci_dev_16_1_fsb_addr_map->vendor,
- pvt->pci_dev_16_1_fsb_addr_map->device);
- debugf1("FSB Error Regs - PCI Bus ID: %s %x:%x\n",
- pci_name(pvt->pci_dev_16_2_fsb_err_regs),
- pvt->pci_dev_16_2_fsb_err_regs->vendor,
- pvt->pci_dev_16_2_fsb_err_regs->device);
+ if (!pvt->pci_dev_16_1_fsb_addr_map ||
+ !pvt->pci_dev_16_2_fsb_err_regs) {
+ /* At least one device was not found */
+ i7300_printk(KERN_ERR,
+ "'system address,Process Bus' device not found:"
+ "vendor 0x%x device 0x%x ERR funcs (broken BIOS?)\n",
+ PCI_VENDOR_ID_INTEL,
+ PCI_DEVICE_ID_INTEL_I7300_MCH_ERR);
+ goto error;
+ }
+
+ edac_dbg(1, "System Address, processor bus- PCI Bus ID: %s %x:%x\n",
+ pci_name(pvt->pci_dev_16_0_fsb_ctlr),
+ pvt->pci_dev_16_0_fsb_ctlr->vendor,
+ pvt->pci_dev_16_0_fsb_ctlr->device);
+ edac_dbg(1, "Branchmap, control and errors - PCI Bus ID: %s %x:%x\n",
+ pci_name(pvt->pci_dev_16_1_fsb_addr_map),
+ pvt->pci_dev_16_1_fsb_addr_map->vendor,
+ pvt->pci_dev_16_1_fsb_addr_map->device);
+ edac_dbg(1, "FSB Error Regs - PCI Bus ID: %s %x:%x\n",
+ pci_name(pvt->pci_dev_16_2_fsb_err_regs),
+ pvt->pci_dev_16_2_fsb_err_regs->vendor,
+ pvt->pci_dev_16_2_fsb_err_regs->device);
pvt->pci_dev_2x_0_fbd_branch[0] = pci_get_device(PCI_VENDOR_ID_INTEL,
PCI_DEVICE_ID_INTEL_I7300_MCH_FB0,
@@ -1037,14 +1021,11 @@ error:
* @pdev: struct pci_dev pointer
* @id: struct pci_device_id pointer - currently unused
*/
-static int __devinit i7300_init_one(struct pci_dev *pdev,
- const struct pci_device_id *id)
+static int i7300_init_one(struct pci_dev *pdev, const struct pci_device_id *id)
{
struct mem_ctl_info *mci;
+ struct edac_mc_layer layers[3];
struct i7300_pvt *pvt;
- int num_channels;
- int num_dimms_per_channel;
- int num_csrows;
int rc;
/* wake up device */
@@ -1052,38 +1033,31 @@ static int __devinit i7300_init_one(struct pci_dev *pdev,
if (rc == -EIO)
return rc;
- debugf0("MC: " __FILE__ ": %s(), pdev bus %u dev=0x%x fn=0x%x\n",
- __func__,
- pdev->bus->number,
- PCI_SLOT(pdev->devfn), PCI_FUNC(pdev->devfn));
+ edac_dbg(0, "MC: pdev bus %u dev=0x%x fn=0x%x\n",
+ pdev->bus->number,
+ PCI_SLOT(pdev->devfn), PCI_FUNC(pdev->devfn));
/* We only are looking for func 0 of the set */
if (PCI_FUNC(pdev->devfn) != 0)
return -ENODEV;
- /* As we don't have a motherboard identification routine to determine
- * actual number of slots/dimms per channel, we thus utilize the
- * resource as specified by the chipset. Thus, we might have
- * have more DIMMs per channel than actually on the mobo, but this
- * allows the driver to support upto the chipset max, without
- * some fancy mobo determination.
- */
- num_dimms_per_channel = MAX_SLOTS;
- num_channels = MAX_CHANNELS;
- num_csrows = MAX_SLOTS * MAX_CHANNELS;
-
- debugf0("MC: %s(): Number of - Channels= %d DIMMS= %d CSROWS= %d\n",
- __func__, num_channels, num_dimms_per_channel, num_csrows);
-
/* allocate a new MC control structure */
- mci = edac_mc_alloc(sizeof(*pvt), num_csrows, num_channels, 0);
-
+ layers[0].type = EDAC_MC_LAYER_BRANCH;
+ layers[0].size = MAX_BRANCHES;
+ layers[0].is_virt_csrow = false;
+ layers[1].type = EDAC_MC_LAYER_CHANNEL;
+ layers[1].size = MAX_CH_PER_BRANCH;
+ layers[1].is_virt_csrow = true;
+ layers[2].type = EDAC_MC_LAYER_SLOT;
+ layers[2].size = MAX_SLOTS;
+ layers[2].is_virt_csrow = true;
+ mci = edac_mc_alloc(0, ARRAY_SIZE(layers), layers, sizeof(*pvt));
if (mci == NULL)
return -ENOMEM;
- debugf0("MC: " __FILE__ ": %s(): mci = %p\n", __func__, mci);
+ edac_dbg(0, "MC: mci = %p\n", mci);
- mci->dev = &pdev->dev; /* record ptr to the generic device */
+ mci->pdev = &pdev->dev; /* record ptr to the generic device */
pvt = mci->pvt_info;
pvt->pci_dev_16_0_fsb_ctlr = pdev; /* Record this device in our private */
@@ -1114,19 +1088,16 @@ static int __devinit i7300_init_one(struct pci_dev *pdev,
/* initialize the MC control structure 'csrows' table
* with the mapping and control information */
if (i7300_get_mc_regs(mci)) {
- debugf0("MC: Setting mci->edac_cap to EDAC_FLAG_NONE\n"
- " because i7300_init_csrows() returned nonzero "
- "value\n");
+ edac_dbg(0, "MC: Setting mci->edac_cap to EDAC_FLAG_NONE because i7300_init_csrows() returned nonzero value\n");
mci->edac_cap = EDAC_FLAG_NONE; /* no csrows found */
} else {
- debugf1("MC: Enable error reporting now\n");
+ edac_dbg(1, "MC: Enable error reporting now\n");
i7300_enable_error_reporting(mci);
}
/* add this new MC control structure to EDAC's list of MCs */
if (edac_mc_add_mc(mci)) {
- debugf0("MC: " __FILE__
- ": %s(): failed edac_mc_add_mc()\n", __func__);
+ edac_dbg(0, "MC: failed edac_mc_add_mc()\n");
/* FIXME: perhaps some code should go here that disables error
* reporting if we just enabled it
*/
@@ -1163,12 +1134,12 @@ fail0:
* i7300_remove_one() - Remove the driver
* @pdev: struct pci_dev pointer
*/
-static void __devexit i7300_remove_one(struct pci_dev *pdev)
+static void i7300_remove_one(struct pci_dev *pdev)
{
struct mem_ctl_info *mci;
char *tmp;
- debugf0(__FILE__ ": %s()\n", __func__);
+ edac_dbg(0, "\n");
if (i7300_pci)
edac_pci_release_generic_ctl(i7300_pci);
@@ -1191,7 +1162,7 @@ static void __devexit i7300_remove_one(struct pci_dev *pdev)
*
* Has only 8086:360c PCI ID
*/
-static const struct pci_device_id i7300_pci_tbl[] __devinitdata = {
+static const struct pci_device_id i7300_pci_tbl[] = {
{PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_I7300_MCH_ERR)},
{0,} /* 0 terminated list. */
};
@@ -1204,7 +1175,7 @@ MODULE_DEVICE_TABLE(pci, i7300_pci_tbl);
static struct pci_driver i7300_driver = {
.name = "i7300_edac",
.probe = i7300_init_one,
- .remove = __devexit_p(i7300_remove_one),
+ .remove = i7300_remove_one,
.id_table = i7300_pci_tbl,
};
@@ -1215,7 +1186,7 @@ static int __init i7300_init(void)
{
int pci_rc;
- debugf2("MC: " __FILE__ ": %s()\n", __func__);
+ edac_dbg(2, "\n");
/* Ensure that the OPSTATE is set correctly for POLL or NMI */
opstate_init();
@@ -1230,7 +1201,7 @@ static int __init i7300_init(void)
*/
static void __exit i7300_exit(void)
{
- debugf2("MC: " __FILE__ ": %s()\n", __func__);
+ edac_dbg(2, "\n");
pci_unregister_driver(&i7300_driver);
}
@@ -1238,7 +1209,7 @@ module_init(i7300_init);
module_exit(i7300_exit);
MODULE_LICENSE("GPL");
-MODULE_AUTHOR("Mauro Carvalho Chehab <mchehab@redhat.com>");
+MODULE_AUTHOR("Mauro Carvalho Chehab");
MODULE_AUTHOR("Red Hat Inc. (http://www.redhat.com)");
MODULE_DESCRIPTION("MC Driver for Intel I7300 memory controllers - "
I7300_REVISION);
diff --git a/drivers/edac/i7core_edac.c b/drivers/edac/i7core_edac.c
index 362861c1577..9cd0b301f81 100644
--- a/drivers/edac/i7core_edac.c
+++ b/drivers/edac/i7core_edac.c
@@ -1,6 +1,6 @@
/* Intel i7 core/Nehalem Memory Controller kernel module
*
- * This driver supports yhe memory controllers found on the Intel
+ * This driver supports the memory controllers found on the Intel
* processor families i7core, i7core 7xx/8xx, i5core, Xeon 35xx,
* Xeon 55xx and Xeon 56xx also known as Nehalem, Nehalem-EP, Lynnfield
* and Westmere-EP.
@@ -9,7 +9,7 @@
* GNU General Public License version 2 only.
*
* Copyright (c) 2009-2010 by:
- * Mauro Carvalho Chehab <mchehab@redhat.com>
+ * Mauro Carvalho Chehab
*
* Red Hat Inc. http://www.redhat.com
*
@@ -31,11 +31,13 @@
#include <linux/pci_ids.h>
#include <linux/slab.h>
#include <linux/delay.h>
+#include <linux/dmi.h>
#include <linux/edac.h>
#include <linux/mmzone.h>
-#include <linux/edac_mce.h>
#include <linux/smp.h>
+#include <asm/mce.h>
#include <asm/processor.h>
+#include <asm/div64.h>
#include "edac_core.h"
@@ -59,7 +61,7 @@ MODULE_PARM_DESC(use_pci_fixup, "Enable PCI fixup to seek for hidden devices");
/*
* Alter this version for the module when modifications are made
*/
-#define I7CORE_REVISION " Ver: 1.0.0 " __DATE__
+#define I7CORE_REVISION " Ver: 1.0.0"
#define EDAC_MOD_STR "i7core_edac"
/*
@@ -78,6 +80,8 @@ MODULE_PARM_DESC(use_pci_fixup, "Enable PCI fixup to seek for hidden devices");
/* OFFSETS for Device 0 Function 0 */
#define MC_CFG_CONTROL 0x90
+ #define MC_CFG_UNLOCK 0x02
+ #define MC_CFG_LOCK 0x00
/* OFFSETS for Device 3 Function 0 */
@@ -86,7 +90,7 @@ MODULE_PARM_DESC(use_pci_fixup, "Enable PCI fixup to seek for hidden devices");
#define MC_MAX_DOD 0x64
/*
- * OFFSETS for Device 3 Function 4, as inicated on Xeon 5500 datasheet:
+ * OFFSETS for Device 3 Function 4, as indicated on Xeon 5500 datasheet:
* http://www.arrownac.com/manufacturers/intel/s/nehalem/5500-datasheet-v2.pdf
*/
@@ -97,7 +101,16 @@ MODULE_PARM_DESC(use_pci_fixup, "Enable PCI fixup to seek for hidden devices");
#define DIMM1_COR_ERR(r) (((r) >> 16) & 0x7fff)
#define DIMM0_COR_ERR(r) ((r) & 0x7fff)
-/* OFFSETS for Device 3 Function 2, as inicated on Xeon 5500 datasheet */
+/* OFFSETS for Device 3 Function 2, as indicated on Xeon 5500 datasheet */
+#define MC_SSRCONTROL 0x48
+ #define SSR_MODE_DISABLE 0x00
+ #define SSR_MODE_ENABLE 0x01
+ #define SSR_MODE_MASK 0x03
+
+#define MC_SCRUB_CONTROL 0x4c
+ #define STARTSCRUB (1 << 24)
+ #define SCRUBINTERVAL_MASK 0xffffff
+
#define MC_COR_ECC_CNT_0 0x80
#define MC_COR_ECC_CNT_1 0x84
#define MC_COR_ECC_CNT_2 0x88
@@ -208,7 +221,9 @@ struct i7core_inject {
};
struct i7core_channel {
- u32 ranks;
+ bool is_3dimms_present;
+ bool is_single_4rank;
+ bool has_4rank;
u32 dimms;
};
@@ -233,6 +248,8 @@ struct i7core_dev {
};
struct i7core_pvt {
+ struct device *addrmatch_dev, *chancounts_dev;
+
struct pci_dev *pci_noncore;
struct pci_dev *pci_mcr[MAX_MCR_FUNC + 1];
struct pci_dev *pci_ch[NUM_CHANS][MAX_CHAN_FUNC + 1];
@@ -244,7 +261,6 @@ struct i7core_pvt {
struct i7core_channel channel[NUM_CHANS];
int ce_count_available;
- int csrow_map[NUM_CHANS][MAX_DIMMS];
/* ECC corrected errors counts per udimm */
unsigned long udimm_ce_count[MAX_DIMMS];
@@ -253,10 +269,7 @@ struct i7core_pvt {
unsigned long rdimm_ce_count[NUM_CHANS][MAX_DIMMS];
int rdimm_last_ce_count[NUM_CHANS][MAX_DIMMS];
- unsigned int is_registered;
-
- /* mcelog glue */
- struct edac_mce edac_mce;
+ bool is_registered, enable_scrub;
/* Fifo double buffers */
struct mce mce_entry[MCE_LOG_LEN];
@@ -268,6 +281,9 @@ struct i7core_pvt {
/* Count indicator to show errors not got */
unsigned mce_overrun;
+ /* DCLK Frequency used for computing scrub rate */
+ int dclk_freq;
+
/* Struct to control EDAC polling */
struct edac_pci_ctl_info *i7core_pci;
};
@@ -281,8 +297,7 @@ static const struct pci_id_descr pci_dev_descr_i7core_nehalem[] = {
/* Memory controller */
{ PCI_DESCR(3, 0, PCI_DEVICE_ID_INTEL_I7_MCR) },
{ PCI_DESCR(3, 1, PCI_DEVICE_ID_INTEL_I7_MC_TAD) },
-
- /* Exists only for RDIMM */
+ /* Exists only for RDIMM */
{ PCI_DESCR(3, 2, PCI_DEVICE_ID_INTEL_I7_MC_RAS), .optional = 1 },
{ PCI_DESCR(3, 4, PCI_DEVICE_ID_INTEL_I7_MC_TEST) },
@@ -303,6 +318,16 @@ static const struct pci_id_descr pci_dev_descr_i7core_nehalem[] = {
{ PCI_DESCR(6, 1, PCI_DEVICE_ID_INTEL_I7_MC_CH2_ADDR) },
{ PCI_DESCR(6, 2, PCI_DEVICE_ID_INTEL_I7_MC_CH2_RANK) },
{ PCI_DESCR(6, 3, PCI_DEVICE_ID_INTEL_I7_MC_CH2_TC) },
+
+ /* Generic Non-core registers */
+ /*
+ * This is the PCI device on i7core and on Xeon 35xx (8086:2c41)
+ * On Xeon 55xx, however, it has a different id (8086:2c40). So,
+ * the probing code needs to test for the other address in case of
+ * failure of this one
+ */
+ { PCI_DESCR(0, 0, PCI_DEVICE_ID_INTEL_I7_NONCORE) },
+
};
static const struct pci_id_descr pci_dev_descr_lynnfield[] = {
@@ -319,6 +344,12 @@ static const struct pci_id_descr pci_dev_descr_lynnfield[] = {
{ PCI_DESCR( 5, 1, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH1_ADDR) },
{ PCI_DESCR( 5, 2, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH1_RANK) },
{ PCI_DESCR( 5, 3, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH1_TC) },
+
+ /*
+ * This is the PCI device has an alternate address on some
+ * processors like Core i7 860
+ */
+ { PCI_DESCR( 0, 0, PCI_DEVICE_ID_INTEL_LYNNFIELD_NONCORE) },
};
static const struct pci_id_descr pci_dev_descr_i7core_westmere[] = {
@@ -346,6 +377,10 @@ static const struct pci_id_descr pci_dev_descr_i7core_westmere[] = {
{ PCI_DESCR(6, 1, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH2_ADDR_REV2) },
{ PCI_DESCR(6, 2, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH2_RANK_REV2) },
{ PCI_DESCR(6, 3, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH2_TC_REV2) },
+
+ /* Generic Non-core registers */
+ { PCI_DESCR(0, 0, PCI_DEVICE_ID_INTEL_LYNNFIELD_NONCORE_REV2) },
+
};
#define PCI_ID_TABLE_ENTRY(A) { .descr=A, .n_devs = ARRAY_SIZE(A) }
@@ -359,14 +394,14 @@ static const struct pci_id_table pci_dev_table[] = {
/*
* pci_device_id table for which devices we are looking for
*/
-static const struct pci_device_id i7core_pci_tbl[] __devinitdata = {
+static const struct pci_device_id i7core_pci_tbl[] = {
{PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_X58_HUB_MGMT)},
{PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_LYNNFIELD_QPI_LINK0)},
{0,} /* 0 terminated list. */
};
/****************************************************************************
- Anciliary status routines
+ Ancillary status routines
****************************************************************************/
/* MC_CONTROL bits */
@@ -385,21 +420,21 @@ static inline int numdimms(u32 dimms)
static inline int numrank(u32 rank)
{
- static int ranks[4] = { 1, 2, 4, -EINVAL };
+ static const int ranks[] = { 1, 2, 4, -EINVAL };
return ranks[rank & 0x3];
}
static inline int numbank(u32 bank)
{
- static int banks[4] = { 4, 8, 16, -EINVAL };
+ static const int banks[] = { 4, 8, 16, -EINVAL };
return banks[bank & 0x3];
}
static inline int numrow(u32 row)
{
- static int rows[8] = {
+ static const int rows[] = {
1 << 12, 1 << 13, 1 << 14, 1 << 15,
1 << 16, -EINVAL, -EINVAL, -EINVAL,
};
@@ -409,7 +444,7 @@ static inline int numrow(u32 row)
static inline int numcol(u32 col)
{
- static int cols[8] = {
+ static const int cols[] = {
1 << 10, 1 << 11, 1 << 12, -EINVAL,
};
return cols[col & 0x3];
@@ -460,116 +495,15 @@ static void free_i7core_dev(struct i7core_dev *i7core_dev)
/****************************************************************************
Memory check routines
****************************************************************************/
-static struct pci_dev *get_pdev_slot_func(u8 socket, unsigned slot,
- unsigned func)
-{
- struct i7core_dev *i7core_dev = get_i7core_dev(socket);
- int i;
- if (!i7core_dev)
- return NULL;
-
- for (i = 0; i < i7core_dev->n_devs; i++) {
- if (!i7core_dev->pdev[i])
- continue;
-
- if (PCI_SLOT(i7core_dev->pdev[i]->devfn) == slot &&
- PCI_FUNC(i7core_dev->pdev[i]->devfn) == func) {
- return i7core_dev->pdev[i];
- }
- }
-
- return NULL;
-}
-
-/**
- * i7core_get_active_channels() - gets the number of channels and csrows
- * @socket: Quick Path Interconnect socket
- * @channels: Number of channels that will be returned
- * @csrows: Number of csrows found
- *
- * Since EDAC core needs to know in advance the number of available channels
- * and csrows, in order to allocate memory for csrows/channels, it is needed
- * to run two similar steps. At the first step, implemented on this function,
- * it checks the number of csrows/channels present at one socket.
- * this is used in order to properly allocate the size of mci components.
- *
- * It should be noticed that none of the current available datasheets explain
- * or even mention how csrows are seen by the memory controller. So, we need
- * to add a fake description for csrows.
- * So, this driver is attributing one DIMM memory for one csrow.
- */
-static int i7core_get_active_channels(const u8 socket, unsigned *channels,
- unsigned *csrows)
-{
- struct pci_dev *pdev = NULL;
- int i, j;
- u32 status, control;
-
- *channels = 0;
- *csrows = 0;
-
- pdev = get_pdev_slot_func(socket, 3, 0);
- if (!pdev) {
- i7core_printk(KERN_ERR, "Couldn't find socket %d fn 3.0!!!\n",
- socket);
- return -ENODEV;
- }
-
- /* Device 3 function 0 reads */
- pci_read_config_dword(pdev, MC_STATUS, &status);
- pci_read_config_dword(pdev, MC_CONTROL, &control);
-
- for (i = 0; i < NUM_CHANS; i++) {
- u32 dimm_dod[3];
- /* Check if the channel is active */
- if (!(control & (1 << (8 + i))))
- continue;
-
- /* Check if the channel is disabled */
- if (status & (1 << i))
- continue;
-
- pdev = get_pdev_slot_func(socket, i + 4, 1);
- if (!pdev) {
- i7core_printk(KERN_ERR, "Couldn't find socket %d "
- "fn %d.%d!!!\n",
- socket, i + 4, 1);
- return -ENODEV;
- }
- /* Devices 4-6 function 1 */
- pci_read_config_dword(pdev,
- MC_DOD_CH_DIMM0, &dimm_dod[0]);
- pci_read_config_dword(pdev,
- MC_DOD_CH_DIMM1, &dimm_dod[1]);
- pci_read_config_dword(pdev,
- MC_DOD_CH_DIMM2, &dimm_dod[2]);
-
- (*channels)++;
-
- for (j = 0; j < 3; j++) {
- if (!DIMM_PRESENT(dimm_dod[j]))
- continue;
- (*csrows)++;
- }
- }
-
- debugf0("Number of active channels on socket %d: %d\n",
- socket, *channels);
-
- return 0;
-}
-
-static int get_dimm_config(const struct mem_ctl_info *mci)
+static int get_dimm_config(struct mem_ctl_info *mci)
{
struct i7core_pvt *pvt = mci->pvt_info;
- struct csrow_info *csr;
struct pci_dev *pdev;
int i, j;
- int csrow = 0;
- unsigned long last_page = 0;
enum edac_type mode;
enum mem_type mtype;
+ struct dimm_info *dimm;
/* Get data from the MC register, function 0 */
pdev = pvt->pci_mcr[0];
@@ -582,29 +516,28 @@ static int get_dimm_config(const struct mem_ctl_info *mci)
pci_read_config_dword(pdev, MC_MAX_DOD, &pvt->info.max_dod);
pci_read_config_dword(pdev, MC_CHANNEL_MAPPER, &pvt->info.ch_map);
- debugf0("QPI %d control=0x%08x status=0x%08x dod=0x%08x map=0x%08x\n",
- pvt->i7core_dev->socket, pvt->info.mc_control, pvt->info.mc_status,
- pvt->info.max_dod, pvt->info.ch_map);
+ edac_dbg(0, "QPI %d control=0x%08x status=0x%08x dod=0x%08x map=0x%08x\n",
+ pvt->i7core_dev->socket, pvt->info.mc_control,
+ pvt->info.mc_status, pvt->info.max_dod, pvt->info.ch_map);
if (ECC_ENABLED(pvt)) {
- debugf0("ECC enabled with x%d SDCC\n", ECCx8(pvt) ? 8 : 4);
+ edac_dbg(0, "ECC enabled with x%d SDCC\n", ECCx8(pvt) ? 8 : 4);
if (ECCx8(pvt))
mode = EDAC_S8ECD8ED;
else
mode = EDAC_S4ECD4ED;
} else {
- debugf0("ECC disabled\n");
+ edac_dbg(0, "ECC disabled\n");
mode = EDAC_NONE;
}
/* FIXME: need to handle the error codes */
- debugf0("DOD Max limits: DIMMS: %d, %d-ranked, %d-banked "
- "x%x x 0x%x\n",
- numdimms(pvt->info.max_dod),
- numrank(pvt->info.max_dod >> 2),
- numbank(pvt->info.max_dod >> 4),
- numrow(pvt->info.max_dod >> 6),
- numcol(pvt->info.max_dod >> 9));
+ edac_dbg(0, "DOD Max limits: DIMMS: %d, %d-ranked, %d-banked x%x x 0x%x\n",
+ numdimms(pvt->info.max_dod),
+ numrank(pvt->info.max_dod >> 2),
+ numbank(pvt->info.max_dod >> 4),
+ numrow(pvt->info.max_dod >> 6),
+ numcol(pvt->info.max_dod >> 9));
for (i = 0; i < NUM_CHANS; i++) {
u32 data, dimm_dod[3], value[8];
@@ -613,11 +546,11 @@ static int get_dimm_config(const struct mem_ctl_info *mci)
continue;
if (!CH_ACTIVE(pvt, i)) {
- debugf0("Channel %i is not active\n", i);
+ edac_dbg(0, "Channel %i is not active\n", i);
continue;
}
if (CH_DISABLED(pvt, i)) {
- debugf0("Channel %i is disabled\n", i);
+ edac_dbg(0, "Channel %i is disabled\n", i);
continue;
}
@@ -625,21 +558,20 @@ static int get_dimm_config(const struct mem_ctl_info *mci)
pci_read_config_dword(pvt->pci_ch[i][0],
MC_CHANNEL_DIMM_INIT_PARAMS, &data);
- pvt->channel[i].ranks = (data & QUAD_RANK_PRESENT) ?
- 4 : 2;
+
+ if (data & THREE_DIMMS_PRESENT)
+ pvt->channel[i].is_3dimms_present = true;
+
+ if (data & SINGLE_QUAD_RANK_PRESENT)
+ pvt->channel[i].is_single_4rank = true;
+
+ if (data & QUAD_RANK_PRESENT)
+ pvt->channel[i].has_4rank = true;
if (data & REGISTERED_DIMM)
mtype = MEM_RDDR3;
else
mtype = MEM_DDR3;
-#if 0
- if (data & THREE_DIMMS_PRESENT)
- pvt->channel[i].dimms = 3;
- else if (data & SINGLE_QUAD_RANK_PRESENT)
- pvt->channel[i].dimms = 1;
- else
- pvt->channel[i].dimms = 2;
-#endif
/* Devices 4-6 function 1 */
pci_read_config_dword(pvt->pci_ch[i][1],
@@ -649,13 +581,14 @@ static int get_dimm_config(const struct mem_ctl_info *mci)
pci_read_config_dword(pvt->pci_ch[i][1],
MC_DOD_CH_DIMM2, &dimm_dod[2]);
- debugf0("Ch%d phy rd%d, wr%d (0x%08x): "
- "%d ranks, %cDIMMs\n",
- i,
- RDLCH(pvt->info.ch_map, i), WRLCH(pvt->info.ch_map, i),
- data,
- pvt->channel[i].ranks,
- (data & REGISTERED_DIMM) ? 'R' : 'U');
+ edac_dbg(0, "Ch%d phy rd%d, wr%d (0x%08x): %s%s%s%cDIMMs\n",
+ i,
+ RDLCH(pvt->info.ch_map, i), WRLCH(pvt->info.ch_map, i),
+ data,
+ pvt->channel[i].is_3dimms_present ? "3DIMMS " : "",
+ pvt->channel[i].is_3dimms_present ? "SINGLE_4R " : "",
+ pvt->channel[i].has_4rank ? "HAS_4R " : "",
+ (data & REGISTERED_DIMM) ? 'R' : 'U');
for (j = 0; j < 3; j++) {
u32 banks, ranks, rows, cols;
@@ -664,6 +597,8 @@ static int get_dimm_config(const struct mem_ctl_info *mci)
if (!DIMM_PRESENT(dimm_dod[j]))
continue;
+ dimm = EDAC_DIMM_PTR(mci->layers, mci->dimms, mci->n_layers,
+ i, j, 0);
banks = numbank(MC_DOD_NUMBANK(dimm_dod[j]));
ranks = numrank(MC_DOD_NUMRANK(dimm_dod[j]));
rows = numrow(MC_DOD_NUMROW(dimm_dod[j]));
@@ -672,50 +607,35 @@ static int get_dimm_config(const struct mem_ctl_info *mci)
/* DDR3 has 8 I/O banks */
size = (rows * cols * banks * ranks) >> (20 - 3);
- pvt->channel[i].dimms++;
-
- debugf0("\tdimm %d %d Mb offset: %x, "
- "bank: %d, rank: %d, row: %#x, col: %#x\n",
- j, size,
- RANKOFFSET(dimm_dod[j]),
- banks, ranks, rows, cols);
+ edac_dbg(0, "\tdimm %d %d Mb offset: %x, bank: %d, rank: %d, row: %#x, col: %#x\n",
+ j, size,
+ RANKOFFSET(dimm_dod[j]),
+ banks, ranks, rows, cols);
npages = MiB_TO_PAGES(size);
- csr = &mci->csrows[csrow];
- csr->first_page = last_page + 1;
- last_page += npages;
- csr->last_page = last_page;
- csr->nr_pages = npages;
-
- csr->page_mask = 0;
- csr->grain = 8;
- csr->csrow_idx = csrow;
- csr->nr_channels = 1;
-
- csr->channels[0].chan_idx = i;
- csr->channels[0].ce_count = 0;
-
- pvt->csrow_map[i][j] = csrow;
+ dimm->nr_pages = npages;
switch (banks) {
case 4:
- csr->dtype = DEV_X4;
+ dimm->dtype = DEV_X4;
break;
case 8:
- csr->dtype = DEV_X8;
+ dimm->dtype = DEV_X8;
break;
case 16:
- csr->dtype = DEV_X16;
+ dimm->dtype = DEV_X16;
break;
default:
- csr->dtype = DEV_UNKNOWN;
+ dimm->dtype = DEV_UNKNOWN;
}
- csr->edac_mode = mode;
- csr->mtype = mtype;
-
- csrow++;
+ snprintf(dimm->label, sizeof(dimm->label),
+ "CPU#%uChannel#%u_DIMM#%u",
+ pvt->i7core_dev->socket, i, j);
+ dimm->grain = 8;
+ dimm->edac_mode = mode;
+ dimm->mtype = mtype;
}
pci_read_config_dword(pdev, MC_SAG_CH_0, &value[0]);
@@ -726,12 +646,12 @@ static int get_dimm_config(const struct mem_ctl_info *mci)
pci_read_config_dword(pdev, MC_SAG_CH_5, &value[5]);
pci_read_config_dword(pdev, MC_SAG_CH_6, &value[6]);
pci_read_config_dword(pdev, MC_SAG_CH_7, &value[7]);
- debugf1("\t[%i] DIVBY3\tREMOVED\tOFFSET\n", i);
+ edac_dbg(1, "\t[%i] DIVBY3\tREMOVED\tOFFSET\n", i);
for (j = 0; j < 8; j++)
- debugf1("\t\t%#x\t%#x\t%#x\n",
- (value[j] >> 27) & 0x1,
- (value[j] >> 24) & 0x7,
- (value[j] && ((1 << 24) - 1)));
+ edac_dbg(1, "\t\t%#x\t%#x\t%#x\n",
+ (value[j] >> 27) & 0x1,
+ (value[j] >> 24) & 0x7,
+ (value[j] & ((1 << 24) - 1)));
}
return 0;
@@ -741,6 +661,8 @@ static int get_dimm_config(const struct mem_ctl_info *mci)
Error insertion routines
****************************************************************************/
+#define to_mci(k) container_of(k, struct mem_ctl_info, dev)
+
/* The i7core has independent error injection features per channel.
However, to have a simpler code, we don't allow enabling error injection
on more than one channel.
@@ -770,9 +692,11 @@ static int disable_inject(const struct mem_ctl_info *mci)
* bit 0 - refers to the lower 32-byte half cacheline
* bit 1 - refers to the upper 32-byte half cacheline
*/
-static ssize_t i7core_inject_section_store(struct mem_ctl_info *mci,
+static ssize_t i7core_inject_section_store(struct device *dev,
+ struct device_attribute *mattr,
const char *data, size_t count)
{
+ struct mem_ctl_info *mci = to_mci(dev);
struct i7core_pvt *pvt = mci->pvt_info;
unsigned long value;
int rc;
@@ -780,7 +704,7 @@ static ssize_t i7core_inject_section_store(struct mem_ctl_info *mci,
if (pvt->inject.enable)
disable_inject(mci);
- rc = strict_strtoul(data, 10, &value);
+ rc = kstrtoul(data, 10, &value);
if ((rc < 0) || (value > 3))
return -EIO;
@@ -788,9 +712,11 @@ static ssize_t i7core_inject_section_store(struct mem_ctl_info *mci,
return count;
}
-static ssize_t i7core_inject_section_show(struct mem_ctl_info *mci,
- char *data)
+static ssize_t i7core_inject_section_show(struct device *dev,
+ struct device_attribute *mattr,
+ char *data)
{
+ struct mem_ctl_info *mci = to_mci(dev);
struct i7core_pvt *pvt = mci->pvt_info;
return sprintf(data, "0x%08x\n", pvt->inject.section);
}
@@ -803,17 +729,19 @@ static ssize_t i7core_inject_section_show(struct mem_ctl_info *mci,
* bit 1 - inject ECC error
* bit 2 - inject parity error
*/
-static ssize_t i7core_inject_type_store(struct mem_ctl_info *mci,
+static ssize_t i7core_inject_type_store(struct device *dev,
+ struct device_attribute *mattr,
const char *data, size_t count)
{
- struct i7core_pvt *pvt = mci->pvt_info;
+ struct mem_ctl_info *mci = to_mci(dev);
+struct i7core_pvt *pvt = mci->pvt_info;
unsigned long value;
int rc;
if (pvt->inject.enable)
disable_inject(mci);
- rc = strict_strtoul(data, 10, &value);
+ rc = kstrtoul(data, 10, &value);
if ((rc < 0) || (value > 7))
return -EIO;
@@ -821,10 +749,13 @@ static ssize_t i7core_inject_type_store(struct mem_ctl_info *mci,
return count;
}
-static ssize_t i7core_inject_type_show(struct mem_ctl_info *mci,
- char *data)
+static ssize_t i7core_inject_type_show(struct device *dev,
+ struct device_attribute *mattr,
+ char *data)
{
+ struct mem_ctl_info *mci = to_mci(dev);
struct i7core_pvt *pvt = mci->pvt_info;
+
return sprintf(data, "0x%08x\n", pvt->inject.type);
}
@@ -838,9 +769,11 @@ static ssize_t i7core_inject_type_show(struct mem_ctl_info *mci,
* 23:16 and 31:24). Flipping bits in two symbol pairs will cause an
* uncorrectable error to be injected.
*/
-static ssize_t i7core_inject_eccmask_store(struct mem_ctl_info *mci,
- const char *data, size_t count)
+static ssize_t i7core_inject_eccmask_store(struct device *dev,
+ struct device_attribute *mattr,
+ const char *data, size_t count)
{
+ struct mem_ctl_info *mci = to_mci(dev);
struct i7core_pvt *pvt = mci->pvt_info;
unsigned long value;
int rc;
@@ -848,7 +781,7 @@ static ssize_t i7core_inject_eccmask_store(struct mem_ctl_info *mci,
if (pvt->inject.enable)
disable_inject(mci);
- rc = strict_strtoul(data, 10, &value);
+ rc = kstrtoul(data, 10, &value);
if (rc < 0)
return -EIO;
@@ -856,10 +789,13 @@ static ssize_t i7core_inject_eccmask_store(struct mem_ctl_info *mci,
return count;
}
-static ssize_t i7core_inject_eccmask_show(struct mem_ctl_info *mci,
- char *data)
+static ssize_t i7core_inject_eccmask_show(struct device *dev,
+ struct device_attribute *mattr,
+ char *data)
{
+ struct mem_ctl_info *mci = to_mci(dev);
struct i7core_pvt *pvt = mci->pvt_info;
+
return sprintf(data, "0x%08x\n", pvt->inject.eccmask);
}
@@ -876,14 +812,16 @@ static ssize_t i7core_inject_eccmask_show(struct mem_ctl_info *mci,
#define DECLARE_ADDR_MATCH(param, limit) \
static ssize_t i7core_inject_store_##param( \
- struct mem_ctl_info *mci, \
- const char *data, size_t count) \
+ struct device *dev, \
+ struct device_attribute *mattr, \
+ const char *data, size_t count) \
{ \
+ struct mem_ctl_info *mci = dev_get_drvdata(dev); \
struct i7core_pvt *pvt; \
long value; \
int rc; \
\
- debugf1("%s()\n", __func__); \
+ edac_dbg(1, "\n"); \
pvt = mci->pvt_info; \
\
if (pvt->inject.enable) \
@@ -892,7 +830,7 @@ static ssize_t i7core_inject_store_##param( \
if (!strcasecmp(data, "any") || !strcasecmp(data, "any\n"))\
value = -1; \
else { \
- rc = strict_strtoul(data, 10, &value); \
+ rc = kstrtoul(data, 10, &value); \
if ((rc < 0) || (value >= limit)) \
return -EIO; \
} \
@@ -903,13 +841,15 @@ static ssize_t i7core_inject_store_##param( \
} \
\
static ssize_t i7core_inject_show_##param( \
- struct mem_ctl_info *mci, \
- char *data) \
+ struct device *dev, \
+ struct device_attribute *mattr, \
+ char *data) \
{ \
+ struct mem_ctl_info *mci = dev_get_drvdata(dev); \
struct i7core_pvt *pvt; \
\
pvt = mci->pvt_info; \
- debugf1("%s() pvt=%p\n", __func__, pvt); \
+ edac_dbg(1, "pvt=%p\n", pvt); \
if (pvt->inject.param < 0) \
return sprintf(data, "any\n"); \
else \
@@ -917,14 +857,9 @@ static ssize_t i7core_inject_show_##param( \
}
#define ATTR_ADDR_MATCH(param) \
- { \
- .attr = { \
- .name = #param, \
- .mode = (S_IRUGO | S_IWUSR) \
- }, \
- .show = i7core_inject_show_##param, \
- .store = i7core_inject_store_##param, \
- }
+ static DEVICE_ATTR(param, S_IRUGO | S_IWUSR, \
+ i7core_inject_show_##param, \
+ i7core_inject_store_##param)
DECLARE_ADDR_MATCH(channel, 3);
DECLARE_ADDR_MATCH(dimm, 3);
@@ -933,14 +868,21 @@ DECLARE_ADDR_MATCH(bank, 32);
DECLARE_ADDR_MATCH(page, 0x10000);
DECLARE_ADDR_MATCH(col, 0x4000);
+ATTR_ADDR_MATCH(channel);
+ATTR_ADDR_MATCH(dimm);
+ATTR_ADDR_MATCH(rank);
+ATTR_ADDR_MATCH(bank);
+ATTR_ADDR_MATCH(page);
+ATTR_ADDR_MATCH(col);
+
static int write_and_test(struct pci_dev *dev, const int where, const u32 val)
{
u32 read;
int count;
- debugf0("setting pci %02x:%02x.%x reg=%02x value=%08x\n",
- dev->bus->number, PCI_SLOT(dev->devfn), PCI_FUNC(dev->devfn),
- where, val);
+ edac_dbg(0, "setting pci %02x:%02x.%x reg=%02x value=%08x\n",
+ dev->bus->number, PCI_SLOT(dev->devfn), PCI_FUNC(dev->devfn),
+ where, val);
for (count = 0; count < 10; count++) {
if (count)
@@ -978,9 +920,11 @@ static int write_and_test(struct pci_dev *dev, const int where, const u32 val)
* is reliable enough to check if the MC is using the
* three channels. However, this is not clear at the datasheet.
*/
-static ssize_t i7core_inject_enable_store(struct mem_ctl_info *mci,
- const char *data, size_t count)
+static ssize_t i7core_inject_enable_store(struct device *dev,
+ struct device_attribute *mattr,
+ const char *data, size_t count)
{
+ struct mem_ctl_info *mci = to_mci(dev);
struct i7core_pvt *pvt = mci->pvt_info;
u32 injectmask;
u64 mask = 0;
@@ -990,7 +934,7 @@ static ssize_t i7core_inject_enable_store(struct mem_ctl_info *mci,
if (!pvt->pci_ch[pvt->inject.channel][0])
return 0;
- rc = strict_strtoul(data, 10, &enable);
+ rc = kstrtoul(data, 10, &enable);
if ((rc < 0))
return 0;
@@ -1073,17 +1017,18 @@ static ssize_t i7core_inject_enable_store(struct mem_ctl_info *mci,
pci_write_config_dword(pvt->pci_noncore,
MC_CFG_CONTROL, 8);
- debugf0("Error inject addr match 0x%016llx, ecc 0x%08x,"
- " inject 0x%08x\n",
- mask, pvt->inject.eccmask, injectmask);
+ edac_dbg(0, "Error inject addr match 0x%016llx, ecc 0x%08x, inject 0x%08x\n",
+ mask, pvt->inject.eccmask, injectmask);
return count;
}
-static ssize_t i7core_inject_enable_show(struct mem_ctl_info *mci,
- char *data)
+static ssize_t i7core_inject_enable_show(struct device *dev,
+ struct device_attribute *mattr,
+ char *data)
{
+ struct mem_ctl_info *mci = to_mci(dev);
struct i7core_pvt *pvt = mci->pvt_info;
u32 injectmask;
@@ -1093,7 +1038,7 @@ static ssize_t i7core_inject_enable_show(struct mem_ctl_info *mci,
pci_read_config_dword(pvt->pci_ch[pvt->inject.channel][0],
MC_CHANNEL_ERROR_INJECT, &injectmask);
- debugf0("Inject error read: 0x%018x\n", injectmask);
+ edac_dbg(0, "Inject error read: 0x%018x\n", injectmask);
if (injectmask & 0x0c)
pvt->inject.enable = 1;
@@ -1103,12 +1048,14 @@ static ssize_t i7core_inject_enable_show(struct mem_ctl_info *mci,
#define DECLARE_COUNTER(param) \
static ssize_t i7core_show_counter_##param( \
- struct mem_ctl_info *mci, \
- char *data) \
+ struct device *dev, \
+ struct device_attribute *mattr, \
+ char *data) \
{ \
+ struct mem_ctl_info *mci = dev_get_drvdata(dev); \
struct i7core_pvt *pvt = mci->pvt_info; \
\
- debugf1("%s() \n", __func__); \
+ edac_dbg(1, "\n"); \
if (!pvt->ce_count_available || (pvt->is_registered)) \
return sprintf(data, "data unavailable\n"); \
return sprintf(data, "%lu\n", \
@@ -1116,121 +1063,179 @@ static ssize_t i7core_show_counter_##param( \
}
#define ATTR_COUNTER(param) \
- { \
- .attr = { \
- .name = __stringify(udimm##param), \
- .mode = (S_IRUGO | S_IWUSR) \
- }, \
- .show = i7core_show_counter_##param \
- }
+ static DEVICE_ATTR(udimm##param, S_IRUGO | S_IWUSR, \
+ i7core_show_counter_##param, \
+ NULL)
DECLARE_COUNTER(0);
DECLARE_COUNTER(1);
DECLARE_COUNTER(2);
+ATTR_COUNTER(0);
+ATTR_COUNTER(1);
+ATTR_COUNTER(2);
+
/*
- * Sysfs struct
+ * inject_addrmatch device sysfs struct
*/
-static const struct mcidev_sysfs_attribute i7core_addrmatch_attrs[] = {
- ATTR_ADDR_MATCH(channel),
- ATTR_ADDR_MATCH(dimm),
- ATTR_ADDR_MATCH(rank),
- ATTR_ADDR_MATCH(bank),
- ATTR_ADDR_MATCH(page),
- ATTR_ADDR_MATCH(col),
- { } /* End of list */
+static struct attribute *i7core_addrmatch_attrs[] = {
+ &dev_attr_channel.attr,
+ &dev_attr_dimm.attr,
+ &dev_attr_rank.attr,
+ &dev_attr_bank.attr,
+ &dev_attr_page.attr,
+ &dev_attr_col.attr,
+ NULL
};
-static const struct mcidev_sysfs_group i7core_inject_addrmatch = {
- .name = "inject_addrmatch",
- .mcidev_attr = i7core_addrmatch_attrs,
+static struct attribute_group addrmatch_grp = {
+ .attrs = i7core_addrmatch_attrs,
};
-static const struct mcidev_sysfs_attribute i7core_udimm_counters_attrs[] = {
- ATTR_COUNTER(0),
- ATTR_COUNTER(1),
- ATTR_COUNTER(2),
- { .attr = { .name = NULL } }
+static const struct attribute_group *addrmatch_groups[] = {
+ &addrmatch_grp,
+ NULL
};
-static const struct mcidev_sysfs_group i7core_udimm_counters = {
- .name = "all_channel_counts",
- .mcidev_attr = i7core_udimm_counters_attrs,
+static void addrmatch_release(struct device *device)
+{
+ edac_dbg(1, "Releasing device %s\n", dev_name(device));
+ kfree(device);
+}
+
+static struct device_type addrmatch_type = {
+ .groups = addrmatch_groups,
+ .release = addrmatch_release,
+};
+
+/*
+ * all_channel_counts sysfs struct
+ */
+
+static struct attribute *i7core_udimm_counters_attrs[] = {
+ &dev_attr_udimm0.attr,
+ &dev_attr_udimm1.attr,
+ &dev_attr_udimm2.attr,
+ NULL
+};
+
+static struct attribute_group all_channel_counts_grp = {
+ .attrs = i7core_udimm_counters_attrs,
};
-static const struct mcidev_sysfs_attribute i7core_sysfs_rdimm_attrs[] = {
- {
- .attr = {
- .name = "inject_section",
- .mode = (S_IRUGO | S_IWUSR)
- },
- .show = i7core_inject_section_show,
- .store = i7core_inject_section_store,
- }, {
- .attr = {
- .name = "inject_type",
- .mode = (S_IRUGO | S_IWUSR)
- },
- .show = i7core_inject_type_show,
- .store = i7core_inject_type_store,
- }, {
- .attr = {
- .name = "inject_eccmask",
- .mode = (S_IRUGO | S_IWUSR)
- },
- .show = i7core_inject_eccmask_show,
- .store = i7core_inject_eccmask_store,
- }, {
- .grp = &i7core_inject_addrmatch,
- }, {
- .attr = {
- .name = "inject_enable",
- .mode = (S_IRUGO | S_IWUSR)
- },
- .show = i7core_inject_enable_show,
- .store = i7core_inject_enable_store,
- },
- { } /* End of list */
+static const struct attribute_group *all_channel_counts_groups[] = {
+ &all_channel_counts_grp,
+ NULL
};
-static const struct mcidev_sysfs_attribute i7core_sysfs_udimm_attrs[] = {
- {
- .attr = {
- .name = "inject_section",
- .mode = (S_IRUGO | S_IWUSR)
- },
- .show = i7core_inject_section_show,
- .store = i7core_inject_section_store,
- }, {
- .attr = {
- .name = "inject_type",
- .mode = (S_IRUGO | S_IWUSR)
- },
- .show = i7core_inject_type_show,
- .store = i7core_inject_type_store,
- }, {
- .attr = {
- .name = "inject_eccmask",
- .mode = (S_IRUGO | S_IWUSR)
- },
- .show = i7core_inject_eccmask_show,
- .store = i7core_inject_eccmask_store,
- }, {
- .grp = &i7core_inject_addrmatch,
- }, {
- .attr = {
- .name = "inject_enable",
- .mode = (S_IRUGO | S_IWUSR)
- },
- .show = i7core_inject_enable_show,
- .store = i7core_inject_enable_store,
- }, {
- .grp = &i7core_udimm_counters,
- },
- { } /* End of list */
+static void all_channel_counts_release(struct device *device)
+{
+ edac_dbg(1, "Releasing device %s\n", dev_name(device));
+ kfree(device);
+}
+
+static struct device_type all_channel_counts_type = {
+ .groups = all_channel_counts_groups,
+ .release = all_channel_counts_release,
};
+/*
+ * inject sysfs attributes
+ */
+
+static DEVICE_ATTR(inject_section, S_IRUGO | S_IWUSR,
+ i7core_inject_section_show, i7core_inject_section_store);
+
+static DEVICE_ATTR(inject_type, S_IRUGO | S_IWUSR,
+ i7core_inject_type_show, i7core_inject_type_store);
+
+
+static DEVICE_ATTR(inject_eccmask, S_IRUGO | S_IWUSR,
+ i7core_inject_eccmask_show, i7core_inject_eccmask_store);
+
+static DEVICE_ATTR(inject_enable, S_IRUGO | S_IWUSR,
+ i7core_inject_enable_show, i7core_inject_enable_store);
+
+static int i7core_create_sysfs_devices(struct mem_ctl_info *mci)
+{
+ struct i7core_pvt *pvt = mci->pvt_info;
+ int rc;
+
+ rc = device_create_file(&mci->dev, &dev_attr_inject_section);
+ if (rc < 0)
+ return rc;
+ rc = device_create_file(&mci->dev, &dev_attr_inject_type);
+ if (rc < 0)
+ return rc;
+ rc = device_create_file(&mci->dev, &dev_attr_inject_eccmask);
+ if (rc < 0)
+ return rc;
+ rc = device_create_file(&mci->dev, &dev_attr_inject_enable);
+ if (rc < 0)
+ return rc;
+
+ pvt->addrmatch_dev = kzalloc(sizeof(*pvt->addrmatch_dev), GFP_KERNEL);
+ if (!pvt->addrmatch_dev)
+ return rc;
+
+ pvt->addrmatch_dev->type = &addrmatch_type;
+ pvt->addrmatch_dev->bus = mci->dev.bus;
+ device_initialize(pvt->addrmatch_dev);
+ pvt->addrmatch_dev->parent = &mci->dev;
+ dev_set_name(pvt->addrmatch_dev, "inject_addrmatch");
+ dev_set_drvdata(pvt->addrmatch_dev, mci);
+
+ edac_dbg(1, "creating %s\n", dev_name(pvt->addrmatch_dev));
+
+ rc = device_add(pvt->addrmatch_dev);
+ if (rc < 0)
+ return rc;
+
+ if (!pvt->is_registered) {
+ pvt->chancounts_dev = kzalloc(sizeof(*pvt->chancounts_dev),
+ GFP_KERNEL);
+ if (!pvt->chancounts_dev) {
+ put_device(pvt->addrmatch_dev);
+ device_del(pvt->addrmatch_dev);
+ return rc;
+ }
+
+ pvt->chancounts_dev->type = &all_channel_counts_type;
+ pvt->chancounts_dev->bus = mci->dev.bus;
+ device_initialize(pvt->chancounts_dev);
+ pvt->chancounts_dev->parent = &mci->dev;
+ dev_set_name(pvt->chancounts_dev, "all_channel_counts");
+ dev_set_drvdata(pvt->chancounts_dev, mci);
+
+ edac_dbg(1, "creating %s\n", dev_name(pvt->chancounts_dev));
+
+ rc = device_add(pvt->chancounts_dev);
+ if (rc < 0)
+ return rc;
+ }
+ return 0;
+}
+
+static void i7core_delete_sysfs_devices(struct mem_ctl_info *mci)
+{
+ struct i7core_pvt *pvt = mci->pvt_info;
+
+ edac_dbg(1, "\n");
+
+ device_remove_file(&mci->dev, &dev_attr_inject_section);
+ device_remove_file(&mci->dev, &dev_attr_inject_type);
+ device_remove_file(&mci->dev, &dev_attr_inject_eccmask);
+ device_remove_file(&mci->dev, &dev_attr_inject_enable);
+
+ if (!pvt->is_registered) {
+ put_device(pvt->chancounts_dev);
+ device_del(pvt->chancounts_dev);
+ }
+ put_device(pvt->addrmatch_dev);
+ device_del(pvt->addrmatch_dev);
+}
+
/****************************************************************************
Device initialization routines: put/get, init/exit
****************************************************************************/
@@ -1243,14 +1248,14 @@ static void i7core_put_devices(struct i7core_dev *i7core_dev)
{
int i;
- debugf0(__FILE__ ": %s()\n", __func__);
+ edac_dbg(0, "\n");
for (i = 0; i < i7core_dev->n_devs; i++) {
struct pci_dev *pdev = i7core_dev->pdev[i];
if (!pdev)
continue;
- debugf0("Removing dev %02x:%02x.%d\n",
- pdev->bus->number,
- PCI_SLOT(pdev->devfn), PCI_FUNC(pdev->devfn));
+ edac_dbg(0, "Removing dev %02x:%02x.%d\n",
+ pdev->bus->number,
+ PCI_SLOT(pdev->devfn), PCI_FUNC(pdev->devfn));
pci_dev_put(pdev);
}
}
@@ -1271,7 +1276,7 @@ static void __init i7core_xeon_pci_fixup(const struct pci_id_table *table)
int i;
/*
- * On Xeon 55xx, the Intel Quckpath Arch Generic Non-core pci buses
+ * On Xeon 55xx, the Intel Quick Path Arch Generic Non-core pci buses
* aren't announced by acpi. So, we need to use a legacy scan probing
* to detect them
*/
@@ -1293,12 +1298,12 @@ static unsigned i7core_pci_lastbus(void)
while ((b = pci_find_next_bus(b)) != NULL) {
bus = b->number;
- debugf0("Found bus %d\n", bus);
+ edac_dbg(0, "Found bus %d\n", bus);
if (bus > last_bus)
last_bus = bus;
}
- debugf0("Last bus %d\n", last_bus);
+ edac_dbg(0, "Last bus %d\n", last_bus);
return last_bus;
}
@@ -1324,6 +1329,25 @@ static int i7core_get_onedevice(struct pci_dev **prev,
pdev = pci_get_device(PCI_VENDOR_ID_INTEL,
dev_descr->dev_id, *prev);
+ /*
+ * On Xeon 55xx, the Intel QuickPath Arch Generic Non-core regs
+ * is at addr 8086:2c40, instead of 8086:2c41. So, we need
+ * to probe for the alternate address in case of failure
+ */
+ if (dev_descr->dev_id == PCI_DEVICE_ID_INTEL_I7_NONCORE && !pdev) {
+ pci_dev_get(*prev); /* pci_get_device will put it */
+ pdev = pci_get_device(PCI_VENDOR_ID_INTEL,
+ PCI_DEVICE_ID_INTEL_I7_NONCORE_ALT, *prev);
+ }
+
+ if (dev_descr->dev_id == PCI_DEVICE_ID_INTEL_LYNNFIELD_NONCORE &&
+ !pdev) {
+ pci_dev_get(*prev); /* pci_get_device will put it */
+ pdev = pci_get_device(PCI_VENDOR_ID_INTEL,
+ PCI_DEVICE_ID_INTEL_LYNNFIELD_NONCORE_ALT,
+ *prev);
+ }
+
if (!pdev) {
if (*prev) {
*prev = pdev;
@@ -1391,10 +1415,10 @@ static int i7core_get_onedevice(struct pci_dev **prev,
return -ENODEV;
}
- debugf0("Detected socket %d dev %02x:%02x.%d PCI ID %04x:%04x\n",
- socket, bus, dev_descr->dev,
- dev_descr->func,
- PCI_VENDOR_ID_INTEL, dev_descr->dev_id);
+ edac_dbg(0, "Detected socket %d dev %02x:%02x.%d PCI ID %04x:%04x\n",
+ socket, bus, dev_descr->dev,
+ dev_descr->func,
+ PCI_VENDOR_ID_INTEL, dev_descr->dev_id);
/*
* As stated on drivers/pci/search.c, the reference count for
@@ -1444,8 +1468,10 @@ static int mci_bind_devs(struct mem_ctl_info *mci,
struct i7core_pvt *pvt = mci->pvt_info;
struct pci_dev *pdev;
int i, func, slot;
+ char *family;
- pvt->is_registered = 0;
+ pvt->is_registered = false;
+ pvt->enable_scrub = false;
for (i = 0; i < i7core_dev->n_devs; i++) {
pdev = i7core_dev->pdev[i];
if (!pdev)
@@ -1461,18 +1487,46 @@ static int mci_bind_devs(struct mem_ctl_info *mci,
if (unlikely(func > MAX_CHAN_FUNC))
goto error;
pvt->pci_ch[slot - 4][func] = pdev;
- } else if (!slot && !func)
+ } else if (!slot && !func) {
pvt->pci_noncore = pdev;
- else
+
+ /* Detect the processor family */
+ switch (pdev->device) {
+ case PCI_DEVICE_ID_INTEL_I7_NONCORE:
+ family = "Xeon 35xx/ i7core";
+ pvt->enable_scrub = false;
+ break;
+ case PCI_DEVICE_ID_INTEL_LYNNFIELD_NONCORE_ALT:
+ family = "i7-800/i5-700";
+ pvt->enable_scrub = false;
+ break;
+ case PCI_DEVICE_ID_INTEL_LYNNFIELD_NONCORE:
+ family = "Xeon 34xx";
+ pvt->enable_scrub = false;
+ break;
+ case PCI_DEVICE_ID_INTEL_I7_NONCORE_ALT:
+ family = "Xeon 55xx";
+ pvt->enable_scrub = true;
+ break;
+ case PCI_DEVICE_ID_INTEL_LYNNFIELD_NONCORE_REV2:
+ family = "Xeon 56xx / i7-900";
+ pvt->enable_scrub = true;
+ break;
+ default:
+ family = "unknown";
+ pvt->enable_scrub = false;
+ }
+ edac_dbg(0, "Detected a processor type %s\n", family);
+ } else
goto error;
- debugf0("Associated fn %d.%d, dev = %p, socket %d\n",
- PCI_SLOT(pdev->devfn), PCI_FUNC(pdev->devfn),
- pdev, i7core_dev->socket);
+ edac_dbg(0, "Associated fn %d.%d, dev = %p, socket %d\n",
+ PCI_SLOT(pdev->devfn), PCI_FUNC(pdev->devfn),
+ pdev, i7core_dev->socket);
if (PCI_SLOT(pdev->devfn) == 3 &&
PCI_FUNC(pdev->devfn) == 2)
- pvt->is_registered = 1;
+ pvt->is_registered = true;
}
return 0;
@@ -1487,24 +1541,6 @@ error:
/****************************************************************************
Error check routines
****************************************************************************/
-static void i7core_rdimm_update_csrow(struct mem_ctl_info *mci,
- const int chan,
- const int dimm,
- const int add)
-{
- char *msg;
- struct i7core_pvt *pvt = mci->pvt_info;
- int row = pvt->csrow_map[chan][dimm], i;
-
- for (i = 0; i < add; i++) {
- msg = kasprintf(GFP_KERNEL, "Corrected error "
- "(Socket=%d channel=%d dimm=%d)",
- pvt->i7core_dev->socket, chan, dimm);
-
- edac_mc_handle_fbd_ce(mci, row, 0, msg);
- kfree (msg);
- }
-}
static void i7core_rdimm_update_ce_count(struct mem_ctl_info *mci,
const int chan,
@@ -1543,12 +1579,17 @@ static void i7core_rdimm_update_ce_count(struct mem_ctl_info *mci,
/*updated the edac core */
if (add0 != 0)
- i7core_rdimm_update_csrow(mci, chan, 0, add0);
+ edac_mc_handle_error(HW_EVENT_ERR_CORRECTED, mci, add0,
+ 0, 0, 0,
+ chan, 0, -1, "error", "");
if (add1 != 0)
- i7core_rdimm_update_csrow(mci, chan, 1, add1);
+ edac_mc_handle_error(HW_EVENT_ERR_CORRECTED, mci, add1,
+ 0, 0, 0,
+ chan, 1, -1, "error", "");
if (add2 != 0)
- i7core_rdimm_update_csrow(mci, chan, 2, add2);
-
+ edac_mc_handle_error(HW_EVENT_ERR_CORRECTED, mci, add2,
+ 0, 0, 0,
+ chan, 2, -1, "error", "");
}
static void i7core_rdimm_check_mc_ecc_err(struct mem_ctl_info *mci)
@@ -1571,8 +1612,8 @@ static void i7core_rdimm_check_mc_ecc_err(struct mem_ctl_info *mci)
pci_read_config_dword(pvt->pci_mcr[2], MC_COR_ECC_CNT_5,
&rcv[2][1]);
for (i = 0 ; i < 3; i++) {
- debugf3("MC_COR_ECC_CNT%d = 0x%x; MC_COR_ECC_CNT%d = 0x%x\n",
- (i * 2), rcv[i][0], (i * 2) + 1, rcv[i][1]);
+ edac_dbg(3, "MC_COR_ECC_CNT%d = 0x%x; MC_COR_ECC_CNT%d = 0x%x\n",
+ (i * 2), rcv[i][0], (i * 2) + 1, rcv[i][1]);
/*if the channel has 3 dimms*/
if (pvt->channel[i].dimms > 2) {
new0 = DIMM_BOT_COR_ERR(rcv[i][0]);
@@ -1603,7 +1644,7 @@ static void i7core_udimm_check_mc_ecc_err(struct mem_ctl_info *mci)
int new0, new1, new2;
if (!pvt->pci_mcr[4]) {
- debugf0("%s MCR registers not found\n", __func__);
+ edac_dbg(0, "MCR registers not found\n");
return;
}
@@ -1667,20 +1708,26 @@ static void i7core_mce_output_error(struct mem_ctl_info *mci,
const struct mce *m)
{
struct i7core_pvt *pvt = mci->pvt_info;
- char *type, *optype, *err, *msg;
+ char *optype, *err;
+ enum hw_event_mc_err_type tp_event;
unsigned long error = m->status & 0x1ff0000l;
+ bool uncorrected_error = m->mcgstatus & 1ll << 61;
+ bool ripv = m->mcgstatus & 1;
u32 optypenum = (m->status >> 4) & 0x07;
- u32 core_err_cnt = (m->status >> 38) && 0x7fff;
+ u32 core_err_cnt = (m->status >> 38) & 0x7fff;
u32 dimm = (m->misc >> 16) & 0x3;
u32 channel = (m->misc >> 18) & 0x3;
u32 syndrome = m->misc >> 32;
u32 errnum = find_first_bit(&error, 32);
- int csrow;
- if (m->mcgstatus & 1)
- type = "FATAL";
- else
- type = "NON_FATAL";
+ if (uncorrected_error) {
+ if (ripv)
+ tp_event = HW_EVENT_ERR_FATAL;
+ else
+ tp_event = HW_EVENT_ERR_UNCORRECTED;
+ } else {
+ tp_event = HW_EVENT_ERR_CORRECTED;
+ }
switch (optypenum) {
case 0:
@@ -1735,27 +1782,18 @@ static void i7core_mce_output_error(struct mem_ctl_info *mci,
err = "unknown";
}
- /* FIXME: should convert addr into bank and rank information */
- msg = kasprintf(GFP_ATOMIC,
- "%s (addr = 0x%08llx, cpu=%d, Dimm=%d, Channel=%d, "
- "syndrome=0x%08x, count=%d, Err=%08llx:%08llx (%s: %s))\n",
- type, (long long) m->addr, m->cpu, dimm, channel,
- syndrome, core_err_cnt, (long long)m->status,
- (long long)m->misc, optype, err);
-
- debugf0("%s", msg);
-
- csrow = pvt->csrow_map[channel][dimm];
-
- /* Call the helper to output message */
- if (m->mcgstatus & 1)
- edac_mc_handle_fbd_ue(mci, csrow, 0,
- 0 /* FIXME: should be channel here */, msg);
- else if (!pvt->is_registered)
- edac_mc_handle_fbd_ce(mci, csrow,
- 0 /* FIXME: should be channel here */, msg);
-
- kfree(msg);
+ /*
+ * Call the helper to output message
+ * FIXME: what to do if core_err_cnt > 1? Currently, it generates
+ * only one event
+ */
+ if (uncorrected_error || !pvt->is_registered)
+ edac_mc_handle_error(tp_event, mci, core_err_cnt,
+ m->addr >> PAGE_SHIFT,
+ m->addr & ~PAGE_MASK,
+ syndrome,
+ channel, dimm, -1,
+ err, optype);
}
/*
@@ -1772,7 +1810,7 @@ static void i7core_check_error(struct mem_ctl_info *mci)
/*
* MCE first step: Copy all mce errors into a temporary buffer
* We use a double buffering here, to reduce the risk of
- * loosing an error.
+ * losing an error.
*/
smp_rmb();
count = (pvt->mce_out + MCE_LOG_LEN - pvt->mce_in)
@@ -1826,33 +1864,37 @@ check_ce_error:
* WARNING: As this routine should be called at NMI time, extra care should
* be taken to avoid deadlocks, and to be as fast as possible.
*/
-static int i7core_mce_check_error(void *priv, struct mce *mce)
+static int i7core_mce_check_error(struct notifier_block *nb, unsigned long val,
+ void *data)
{
- struct mem_ctl_info *mci = priv;
- struct i7core_pvt *pvt = mci->pvt_info;
+ struct mce *mce = (struct mce *)data;
+ struct i7core_dev *i7_dev;
+ struct mem_ctl_info *mci;
+ struct i7core_pvt *pvt;
+
+ i7_dev = get_i7core_dev(mce->socketid);
+ if (!i7_dev)
+ return NOTIFY_BAD;
+
+ mci = i7_dev->mci;
+ pvt = mci->pvt_info;
/*
* Just let mcelog handle it if the error is
* outside the memory controller
*/
if (((mce->status & 0xffff) >> 7) != 1)
- return 0;
+ return NOTIFY_DONE;
/* Bank 8 registers are the only ones that we know how to handle */
if (mce->bank != 8)
- return 0;
-
-#ifdef CONFIG_SMP
- /* Only handle if it is the right mc controller */
- if (cpu_data(mce->cpu).phys_proc_id != pvt->i7core_dev->socket)
- return 0;
-#endif
+ return NOTIFY_DONE;
smp_rmb();
if ((pvt->mce_out + 1) % MCE_LOG_LEN == pvt->mce_in) {
smp_wmb();
pvt->mce_overrun++;
- return 0;
+ return NOTIFY_DONE;
}
/* Copy memory error at the ringbuffer */
@@ -1864,8 +1906,241 @@ static int i7core_mce_check_error(void *priv, struct mce *mce)
if (mce->mcgstatus & 1)
i7core_check_error(mci);
- /* Advice mcelog that the error were handled */
- return 1;
+ /* Advise mcelog that the errors were handled */
+ return NOTIFY_STOP;
+}
+
+static struct notifier_block i7_mce_dec = {
+ .notifier_call = i7core_mce_check_error,
+};
+
+struct memdev_dmi_entry {
+ u8 type;
+ u8 length;
+ u16 handle;
+ u16 phys_mem_array_handle;
+ u16 mem_err_info_handle;
+ u16 total_width;
+ u16 data_width;
+ u16 size;
+ u8 form;
+ u8 device_set;
+ u8 device_locator;
+ u8 bank_locator;
+ u8 memory_type;
+ u16 type_detail;
+ u16 speed;
+ u8 manufacturer;
+ u8 serial_number;
+ u8 asset_tag;
+ u8 part_number;
+ u8 attributes;
+ u32 extended_size;
+ u16 conf_mem_clk_speed;
+} __attribute__((__packed__));
+
+
+/*
+ * Decode the DRAM Clock Frequency, be paranoid, make sure that all
+ * memory devices show the same speed, and if they don't then consider
+ * all speeds to be invalid.
+ */
+static void decode_dclk(const struct dmi_header *dh, void *_dclk_freq)
+{
+ int *dclk_freq = _dclk_freq;
+ u16 dmi_mem_clk_speed;
+
+ if (*dclk_freq == -1)
+ return;
+
+ if (dh->type == DMI_ENTRY_MEM_DEVICE) {
+ struct memdev_dmi_entry *memdev_dmi_entry =
+ (struct memdev_dmi_entry *)dh;
+ unsigned long conf_mem_clk_speed_offset =
+ (unsigned long)&memdev_dmi_entry->conf_mem_clk_speed -
+ (unsigned long)&memdev_dmi_entry->type;
+ unsigned long speed_offset =
+ (unsigned long)&memdev_dmi_entry->speed -
+ (unsigned long)&memdev_dmi_entry->type;
+
+ /* Check that a DIMM is present */
+ if (memdev_dmi_entry->size == 0)
+ return;
+
+ /*
+ * Pick the configured speed if it's available, otherwise
+ * pick the DIMM speed, or we don't have a speed.
+ */
+ if (memdev_dmi_entry->length > conf_mem_clk_speed_offset) {
+ dmi_mem_clk_speed =
+ memdev_dmi_entry->conf_mem_clk_speed;
+ } else if (memdev_dmi_entry->length > speed_offset) {
+ dmi_mem_clk_speed = memdev_dmi_entry->speed;
+ } else {
+ *dclk_freq = -1;
+ return;
+ }
+
+ if (*dclk_freq == 0) {
+ /* First pass, speed was 0 */
+ if (dmi_mem_clk_speed > 0) {
+ /* Set speed if a valid speed is read */
+ *dclk_freq = dmi_mem_clk_speed;
+ } else {
+ /* Otherwise we don't have a valid speed */
+ *dclk_freq = -1;
+ }
+ } else if (*dclk_freq > 0 &&
+ *dclk_freq != dmi_mem_clk_speed) {
+ /*
+ * If we have a speed, check that all DIMMS are the same
+ * speed, otherwise set the speed as invalid.
+ */
+ *dclk_freq = -1;
+ }
+ }
+}
+
+/*
+ * The default DCLK frequency is used as a fallback if we
+ * fail to find anything reliable in the DMI. The value
+ * is taken straight from the datasheet.
+ */
+#define DEFAULT_DCLK_FREQ 800
+
+static int get_dclk_freq(void)
+{
+ int dclk_freq = 0;
+
+ dmi_walk(decode_dclk, (void *)&dclk_freq);
+
+ if (dclk_freq < 1)
+ return DEFAULT_DCLK_FREQ;
+
+ return dclk_freq;
+}
+
+/*
+ * set_sdram_scrub_rate This routine sets byte/sec bandwidth scrub rate
+ * to hardware according to SCRUBINTERVAL formula
+ * found in datasheet.
+ */
+static int set_sdram_scrub_rate(struct mem_ctl_info *mci, u32 new_bw)
+{
+ struct i7core_pvt *pvt = mci->pvt_info;
+ struct pci_dev *pdev;
+ u32 dw_scrub;
+ u32 dw_ssr;
+
+ /* Get data from the MC register, function 2 */
+ pdev = pvt->pci_mcr[2];
+ if (!pdev)
+ return -ENODEV;
+
+ pci_read_config_dword(pdev, MC_SCRUB_CONTROL, &dw_scrub);
+
+ if (new_bw == 0) {
+ /* Prepare to disable petrol scrub */
+ dw_scrub &= ~STARTSCRUB;
+ /* Stop the patrol scrub engine */
+ write_and_test(pdev, MC_SCRUB_CONTROL,
+ dw_scrub & ~SCRUBINTERVAL_MASK);
+
+ /* Get current status of scrub rate and set bit to disable */
+ pci_read_config_dword(pdev, MC_SSRCONTROL, &dw_ssr);
+ dw_ssr &= ~SSR_MODE_MASK;
+ dw_ssr |= SSR_MODE_DISABLE;
+ } else {
+ const int cache_line_size = 64;
+ const u32 freq_dclk_mhz = pvt->dclk_freq;
+ unsigned long long scrub_interval;
+ /*
+ * Translate the desired scrub rate to a register value and
+ * program the corresponding register value.
+ */
+ scrub_interval = (unsigned long long)freq_dclk_mhz *
+ cache_line_size * 1000000;
+ do_div(scrub_interval, new_bw);
+
+ if (!scrub_interval || scrub_interval > SCRUBINTERVAL_MASK)
+ return -EINVAL;
+
+ dw_scrub = SCRUBINTERVAL_MASK & scrub_interval;
+
+ /* Start the patrol scrub engine */
+ pci_write_config_dword(pdev, MC_SCRUB_CONTROL,
+ STARTSCRUB | dw_scrub);
+
+ /* Get current status of scrub rate and set bit to enable */
+ pci_read_config_dword(pdev, MC_SSRCONTROL, &dw_ssr);
+ dw_ssr &= ~SSR_MODE_MASK;
+ dw_ssr |= SSR_MODE_ENABLE;
+ }
+ /* Disable or enable scrubbing */
+ pci_write_config_dword(pdev, MC_SSRCONTROL, dw_ssr);
+
+ return new_bw;
+}
+
+/*
+ * get_sdram_scrub_rate This routine convert current scrub rate value
+ * into byte/sec bandwidth according to
+ * SCRUBINTERVAL formula found in datasheet.
+ */
+static int get_sdram_scrub_rate(struct mem_ctl_info *mci)
+{
+ struct i7core_pvt *pvt = mci->pvt_info;
+ struct pci_dev *pdev;
+ const u32 cache_line_size = 64;
+ const u32 freq_dclk_mhz = pvt->dclk_freq;
+ unsigned long long scrub_rate;
+ u32 scrubval;
+
+ /* Get data from the MC register, function 2 */
+ pdev = pvt->pci_mcr[2];
+ if (!pdev)
+ return -ENODEV;
+
+ /* Get current scrub control data */
+ pci_read_config_dword(pdev, MC_SCRUB_CONTROL, &scrubval);
+
+ /* Mask highest 8-bits to 0 */
+ scrubval &= SCRUBINTERVAL_MASK;
+ if (!scrubval)
+ return 0;
+
+ /* Calculate scrub rate value into byte/sec bandwidth */
+ scrub_rate = (unsigned long long)freq_dclk_mhz *
+ 1000000 * cache_line_size;
+ do_div(scrub_rate, scrubval);
+ return (int)scrub_rate;
+}
+
+static void enable_sdram_scrub_setting(struct mem_ctl_info *mci)
+{
+ struct i7core_pvt *pvt = mci->pvt_info;
+ u32 pci_lock;
+
+ /* Unlock writes to pci registers */
+ pci_read_config_dword(pvt->pci_noncore, MC_CFG_CONTROL, &pci_lock);
+ pci_lock &= ~0x3;
+ pci_write_config_dword(pvt->pci_noncore, MC_CFG_CONTROL,
+ pci_lock | MC_CFG_UNLOCK);
+
+ mci->set_sdram_scrub_rate = set_sdram_scrub_rate;
+ mci->get_sdram_scrub_rate = get_sdram_scrub_rate;
+}
+
+static void disable_sdram_scrub_setting(struct mem_ctl_info *mci)
+{
+ struct i7core_pvt *pvt = mci->pvt_info;
+ u32 pci_lock;
+
+ /* Lock writes to pci registers */
+ pci_read_config_dword(pvt->pci_noncore, MC_CFG_CONTROL, &pci_lock);
+ pci_lock &= ~0x3;
+ pci_write_config_dword(pvt->pci_noncore, MC_CFG_CONTROL,
+ pci_lock | MC_CFG_LOCK);
}
static void i7core_pci_ctl_create(struct i7core_pvt *pvt)
@@ -1874,7 +2149,8 @@ static void i7core_pci_ctl_create(struct i7core_pvt *pvt)
&pvt->i7core_dev->pdev[0]->dev,
EDAC_MOD_STR);
if (unlikely(!pvt->i7core_pci))
- pr_warn("Unable to setup PCI error report via EDAC\n");
+ i7core_printk(KERN_WARNING,
+ "Unable to setup PCI error report via EDAC\n");
}
static void i7core_pci_ctl_release(struct i7core_pvt *pvt)
@@ -1894,8 +2170,7 @@ static void i7core_unregister_mci(struct i7core_dev *i7core_dev)
struct i7core_pvt *pvt;
if (unlikely(!mci || !mci->pvt_info)) {
- debugf0("MC: " __FILE__ ": %s(): dev = %p\n",
- __func__, &i7core_dev->pdev[0]->dev);
+ edac_dbg(0, "MC: dev = %p\n", &i7core_dev->pdev[0]->dev);
i7core_printk(KERN_ERR, "Couldn't find mci handler\n");
return;
@@ -1903,19 +2178,20 @@ static void i7core_unregister_mci(struct i7core_dev *i7core_dev)
pvt = mci->pvt_info;
- debugf0("MC: " __FILE__ ": %s(): mci = %p, dev = %p\n",
- __func__, mci, &i7core_dev->pdev[0]->dev);
+ edac_dbg(0, "MC: mci = %p, dev = %p\n", mci, &i7core_dev->pdev[0]->dev);
- /* Disable MCE NMI handler */
- edac_mce_unregister(&pvt->edac_mce);
+ /* Disable scrubrate setting */
+ if (pvt->enable_scrub)
+ disable_sdram_scrub_setting(mci);
/* Disable EDAC polling */
i7core_pci_ctl_release(pvt);
/* Remove MC sysfs nodes */
- edac_mc_del_mc(mci->dev);
+ i7core_delete_sysfs_devices(mci);
+ edac_mc_del_mc(mci->pdev);
- debugf1("%s: free mci struct\n", mci->ctl_name);
+ edac_dbg(1, "%s: free mci struct\n", mci->ctl_name);
kfree(mci->ctl_name);
edac_mc_free(mci);
i7core_dev->mci = NULL;
@@ -1925,20 +2201,23 @@ static int i7core_register_mci(struct i7core_dev *i7core_dev)
{
struct mem_ctl_info *mci;
struct i7core_pvt *pvt;
- int rc, channels, csrows;
-
- /* Check the number of active and not disabled channels */
- rc = i7core_get_active_channels(i7core_dev->socket, &channels, &csrows);
- if (unlikely(rc < 0))
- return rc;
+ int rc;
+ struct edac_mc_layer layers[2];
/* allocate a new MC control structure */
- mci = edac_mc_alloc(sizeof(*pvt), csrows, channels, i7core_dev->socket);
+
+ layers[0].type = EDAC_MC_LAYER_CHANNEL;
+ layers[0].size = NUM_CHANS;
+ layers[0].is_virt_csrow = false;
+ layers[1].type = EDAC_MC_LAYER_SLOT;
+ layers[1].size = MAX_DIMMS;
+ layers[1].is_virt_csrow = true;
+ mci = edac_mc_alloc(i7core_dev->socket, ARRAY_SIZE(layers), layers,
+ sizeof(*pvt));
if (unlikely(!mci))
return -ENOMEM;
- debugf0("MC: " __FILE__ ": %s(): mci = %p, dev = %p\n",
- __func__, mci, &i7core_dev->pdev[0]->dev);
+ edac_dbg(0, "MC: mci = %p, dev = %p\n", mci, &i7core_dev->pdev[0]->dev);
pvt = mci->pvt_info;
memset(pvt, 0, sizeof(*pvt));
@@ -1967,22 +2246,21 @@ static int i7core_register_mci(struct i7core_dev *i7core_dev)
if (unlikely(rc < 0))
goto fail0;
- if (pvt->is_registered)
- mci->mc_driver_sysfs_attributes = i7core_sysfs_rdimm_attrs;
- else
- mci->mc_driver_sysfs_attributes = i7core_sysfs_udimm_attrs;
/* Get dimm basic config */
get_dimm_config(mci);
/* record ptr to the generic device */
- mci->dev = &i7core_dev->pdev[0]->dev;
+ mci->pdev = &i7core_dev->pdev[0]->dev;
/* Set the function pointer to an actual operation function */
mci->edac_check = i7core_check_error;
+ /* Enable scrubrate setting */
+ if (pvt->enable_scrub)
+ enable_sdram_scrub_setting(mci);
+
/* add this new MC control structure to EDAC's list of MCs */
if (unlikely(edac_mc_add_mc(mci))) {
- debugf0("MC: " __FILE__
- ": %s(): failed edac_mc_add_mc()\n", __func__);
+ edac_dbg(0, "MC: failed edac_mc_add_mc()\n");
/* FIXME: perhaps some code should go here that disables error
* reporting if we just enabled it
*/
@@ -1990,6 +2268,12 @@ static int i7core_register_mci(struct i7core_dev *i7core_dev)
rc = -EINVAL;
goto fail0;
}
+ if (i7core_create_sysfs_devices(mci)) {
+ edac_dbg(0, "MC: failed to create sysfs nodes\n");
+ edac_mc_del_mc(mci->pdev);
+ rc = -EINVAL;
+ goto fail0;
+ }
/* Default error mask is any memory */
pvt->inject.channel = 0;
@@ -2002,21 +2286,11 @@ static int i7core_register_mci(struct i7core_dev *i7core_dev)
/* allocating generic PCI control info */
i7core_pci_ctl_create(pvt);
- /* Registers on edac_mce in order to receive memory errors */
- pvt->edac_mce.priv = mci;
- pvt->edac_mce.check_error = i7core_mce_check_error;
- rc = edac_mce_register(&pvt->edac_mce);
- if (unlikely(rc < 0)) {
- debugf0("MC: " __FILE__
- ": %s(): failed edac_mce_register()\n", __func__);
- goto fail1;
- }
+ /* DCLK for scrub rate setting */
+ pvt->dclk_freq = get_dclk_freq();
return 0;
-fail1:
- i7core_pci_ctl_release(pvt);
- edac_mc_del_mc(mci->dev);
fail0:
kfree(mci->ctl_name);
edac_mc_free(mci);
@@ -2032,10 +2306,9 @@ fail0:
* < 0 for error code
*/
-static int __devinit i7core_probe(struct pci_dev *pdev,
- const struct pci_device_id *id)
+static int i7core_probe(struct pci_dev *pdev, const struct pci_device_id *id)
{
- int rc;
+ int rc, count = 0;
struct i7core_dev *i7core_dev;
/* get the pci devices we want to reserve for our use */
@@ -2055,12 +2328,28 @@ static int __devinit i7core_probe(struct pci_dev *pdev,
goto fail0;
list_for_each_entry(i7core_dev, &i7core_edac_list, list) {
+ count++;
rc = i7core_register_mci(i7core_dev);
if (unlikely(rc < 0))
goto fail1;
}
- i7core_printk(KERN_INFO, "Driver loaded.\n");
+ /*
+ * Nehalem-EX uses a different memory controller. However, as the
+ * memory controller is not visible on some Nehalem/Nehalem-EP, we
+ * need to indirectly probe via a X58 PCI device. The same devices
+ * are found on (some) Nehalem-EX. So, on those machines, the
+ * probe routine needs to return -ENODEV, as the actual Memory
+ * Controller registers won't be detected.
+ */
+ if (!count) {
+ rc = -ENODEV;
+ goto fail1;
+ }
+
+ i7core_printk(KERN_INFO,
+ "Driver loaded, %d memory controller(s) found.\n",
+ count);
mutex_unlock(&i7core_edac_lock);
return 0;
@@ -2079,11 +2368,11 @@ fail0:
* i7core_remove destructor for one instance of device
*
*/
-static void __devexit i7core_remove(struct pci_dev *pdev)
+static void i7core_remove(struct pci_dev *pdev)
{
struct i7core_dev *i7core_dev;
- debugf0(__FILE__ ": %s()\n", __func__);
+ edac_dbg(0, "\n");
/*
* we have a trouble here: pdev value for removal will be wrong, since
@@ -2120,7 +2409,7 @@ MODULE_DEVICE_TABLE(pci, i7core_pci_tbl);
static struct pci_driver i7core_driver = {
.name = "i7core_edac",
.probe = i7core_probe,
- .remove = __devexit_p(i7core_remove),
+ .remove = i7core_remove,
.id_table = i7core_pci_tbl,
};
@@ -2132,7 +2421,7 @@ static int __init i7core_init(void)
{
int pci_rc;
- debugf2("MC: " __FILE__ ": %s()\n", __func__);
+ edac_dbg(2, "\n");
/* Ensure that the OPSTATE is set correctly for POLL or NMI */
opstate_init();
@@ -2142,8 +2431,10 @@ static int __init i7core_init(void)
pci_rc = pci_register_driver(&i7core_driver);
- if (pci_rc >= 0)
+ if (pci_rc >= 0) {
+ mce_register_decode_chain(&i7_mce_dec);
return 0;
+ }
i7core_printk(KERN_ERR, "Failed to register device with error %d.\n",
pci_rc);
@@ -2157,15 +2448,16 @@ static int __init i7core_init(void)
*/
static void __exit i7core_exit(void)
{
- debugf2("MC: " __FILE__ ": %s()\n", __func__);
+ edac_dbg(2, "\n");
pci_unregister_driver(&i7core_driver);
+ mce_unregister_decode_chain(&i7_mce_dec);
}
module_init(i7core_init);
module_exit(i7core_exit);
MODULE_LICENSE("GPL");
-MODULE_AUTHOR("Mauro Carvalho Chehab <mchehab@redhat.com>");
+MODULE_AUTHOR("Mauro Carvalho Chehab");
MODULE_AUTHOR("Red Hat Inc. (http://www.redhat.com)");
MODULE_DESCRIPTION("MC Driver for Intel i7 Core memory controllers - "
I7CORE_REVISION);
diff --git a/drivers/edac/i82443bxgx_edac.c b/drivers/edac/i82443bxgx_edac.c
index 678405ab04e..d730e276d1a 100644
--- a/drivers/edac/i82443bxgx_edac.c
+++ b/drivers/edac/i82443bxgx_edac.c
@@ -12,7 +12,7 @@
* 440GX fix by Jason Uhlenkott <juhlenko@akamai.com>.
*
* Written with reference to 82443BX Host Bridge Datasheet:
- * http://download.intel.com/design/chipsets/datashts/29063301.pdf
+ * http://download.intel.com/design/chipsets/datashts/29063301.pdf
* references to this document given in [].
*
* This module doesn't support the 440LX, but it may be possible to
@@ -124,7 +124,7 @@ static void i82443bxgx_edacmc_get_error_info(struct mem_ctl_info *mci,
*info)
{
struct pci_dev *pdev;
- pdev = to_pci_dev(mci->dev);
+ pdev = to_pci_dev(mci->pdev);
pci_read_config_dword(pdev, I82443BXGX_EAP, &info->eap);
if (info->eap & I82443BXGX_EAP_OFFSET_SBE)
/* Clear error to allow next error to be reported [p.61] */
@@ -156,19 +156,19 @@ static int i82443bxgx_edacmc_process_error_info(struct mem_ctl_info *mci,
if (info->eap & I82443BXGX_EAP_OFFSET_SBE) {
error_found = 1;
if (handle_errors)
- edac_mc_handle_ce(mci, page, pageoffset,
- /* 440BX/GX don't make syndrome information
- * available */
- 0, edac_mc_find_csrow_by_page(mci, page), 0,
- mci->ctl_name);
+ edac_mc_handle_error(HW_EVENT_ERR_CORRECTED, mci, 1,
+ page, pageoffset, 0,
+ edac_mc_find_csrow_by_page(mci, page),
+ 0, -1, mci->ctl_name, "");
}
if (info->eap & I82443BXGX_EAP_OFFSET_MBE) {
error_found = 1;
if (handle_errors)
- edac_mc_handle_ue(mci, page, pageoffset,
- edac_mc_find_csrow_by_page(mci, page),
- mci->ctl_name);
+ edac_mc_handle_error(HW_EVENT_ERR_UNCORRECTED, mci, 1,
+ page, pageoffset, 0,
+ edac_mc_find_csrow_by_page(mci, page),
+ 0, -1, mci->ctl_name, "");
}
return error_found;
@@ -178,7 +178,7 @@ static void i82443bxgx_edacmc_check(struct mem_ctl_info *mci)
{
struct i82443bxgx_edacmc_error_info info;
- debugf1("MC%d: %s: %s()\n", mci->mc_idx, __FILE__, __func__);
+ edac_dbg(1, "MC%d\n", mci->mc_idx);
i82443bxgx_edacmc_get_error_info(mci, &info);
i82443bxgx_edacmc_process_error_info(mci, &info, 1);
}
@@ -189,6 +189,7 @@ static void i82443bxgx_init_csrows(struct mem_ctl_info *mci,
enum mem_type mtype)
{
struct csrow_info *csrow;
+ struct dimm_info *dimm;
int index;
u8 drbar, dramc;
u32 row_base, row_high_limit, row_high_limit_last;
@@ -196,16 +197,17 @@ static void i82443bxgx_init_csrows(struct mem_ctl_info *mci,
pci_read_config_byte(pdev, I82443BXGX_DRAMC, &dramc);
row_high_limit_last = 0;
for (index = 0; index < mci->nr_csrows; index++) {
- csrow = &mci->csrows[index];
+ csrow = mci->csrows[index];
+ dimm = csrow->channels[0]->dimm;
+
pci_read_config_byte(pdev, I82443BXGX_DRB + index, &drbar);
- debugf1("MC%d: %s: %s() Row=%d DRB = %#0x\n",
- mci->mc_idx, __FILE__, __func__, index, drbar);
+ edac_dbg(1, "MC%d: Row=%d DRB = %#0x\n",
+ mci->mc_idx, index, drbar);
row_high_limit = ((u32) drbar << 23);
/* find the DRAM Chip Select Base address and mask */
- debugf1("MC%d: %s: %s() Row=%d, "
- "Boundry Address=%#0x, Last = %#0x\n",
- mci->mc_idx, __FILE__, __func__, index, row_high_limit,
- row_high_limit_last);
+ edac_dbg(1, "MC%d: Row=%d, Boundary Address=%#0x, Last = %#0x\n",
+ mci->mc_idx, index, row_high_limit,
+ row_high_limit_last);
/* 440GX goes to 2GB, represented with a DRB of 0. */
if (row_high_limit_last && !row_high_limit)
@@ -217,14 +219,14 @@ static void i82443bxgx_init_csrows(struct mem_ctl_info *mci,
row_base = row_high_limit_last;
csrow->first_page = row_base >> PAGE_SHIFT;
csrow->last_page = (row_high_limit >> PAGE_SHIFT) - 1;
- csrow->nr_pages = csrow->last_page - csrow->first_page + 1;
+ dimm->nr_pages = csrow->last_page - csrow->first_page + 1;
/* EAP reports in 4kilobyte granularity [61] */
- csrow->grain = 1 << 12;
- csrow->mtype = mtype;
+ dimm->grain = 1 << 12;
+ dimm->mtype = mtype;
/* I don't think 440BX can tell you device type? FIXME? */
- csrow->dtype = DEV_UNKNOWN;
+ dimm->dtype = DEV_UNKNOWN;
/* Mode is global to all rows on 440BX */
- csrow->edac_mode = edac_mode;
+ dimm->edac_mode = edac_mode;
row_high_limit_last = row_high_limit;
}
}
@@ -232,12 +234,13 @@ static void i82443bxgx_init_csrows(struct mem_ctl_info *mci,
static int i82443bxgx_edacmc_probe1(struct pci_dev *pdev, int dev_idx)
{
struct mem_ctl_info *mci;
+ struct edac_mc_layer layers[2];
u8 dramc;
u32 nbxcfg, ecc_mode;
enum mem_type mtype;
enum edac_type edac_mode;
- debugf0("MC: %s: %s()\n", __FILE__, __func__);
+ edac_dbg(0, "MC:\n");
/* Something is really hosed if PCI config space reads from
* the MC aren't working.
@@ -245,13 +248,18 @@ static int i82443bxgx_edacmc_probe1(struct pci_dev *pdev, int dev_idx)
if (pci_read_config_dword(pdev, I82443BXGX_NBXCFG, &nbxcfg))
return -EIO;
- mci = edac_mc_alloc(0, I82443BXGX_NR_CSROWS, I82443BXGX_NR_CHANS, 0);
-
+ layers[0].type = EDAC_MC_LAYER_CHIP_SELECT;
+ layers[0].size = I82443BXGX_NR_CSROWS;
+ layers[0].is_virt_csrow = true;
+ layers[1].type = EDAC_MC_LAYER_CHANNEL;
+ layers[1].size = I82443BXGX_NR_CHANS;
+ layers[1].is_virt_csrow = false;
+ mci = edac_mc_alloc(0, ARRAY_SIZE(layers), layers, 0);
if (mci == NULL)
return -ENOMEM;
- debugf0("MC: %s: %s(): mci = %p\n", __FILE__, __func__, mci);
- mci->dev = &pdev->dev;
+ edac_dbg(0, "MC: mci = %p\n", mci);
+ mci->pdev = &pdev->dev;
mci->mtype_cap = MEM_FLAG_EDO | MEM_FLAG_SDR | MEM_FLAG_RDR;
mci->edac_ctl_cap = EDAC_FLAG_NONE | EDAC_FLAG_EC | EDAC_FLAG_SECDED;
pci_read_config_byte(pdev, I82443BXGX_DRAMC, &dramc);
@@ -266,8 +274,7 @@ static int i82443bxgx_edacmc_probe1(struct pci_dev *pdev, int dev_idx)
mtype = MEM_RDR;
break;
default:
- debugf0("Unknown/reserved DRAM type value "
- "in DRAMC register!\n");
+ edac_dbg(0, "Unknown/reserved DRAM type value in DRAMC register!\n");
mtype = -MEM_UNKNOWN;
}
@@ -296,8 +303,7 @@ static int i82443bxgx_edacmc_probe1(struct pci_dev *pdev, int dev_idx)
edac_mode = EDAC_SECDED;
break;
default:
- debugf0("%s(): Unknown/reserved ECC state "
- "in NBXCFG register!\n", __func__);
+ edac_dbg(0, "Unknown/reserved ECC state in NBXCFG register!\n");
edac_mode = EDAC_UNKNOWN;
break;
}
@@ -305,7 +311,7 @@ static int i82443bxgx_edacmc_probe1(struct pci_dev *pdev, int dev_idx)
i82443bxgx_init_csrows(mci, pdev, edac_mode, mtype);
/* Many BIOSes don't clear error flags on boot, so do this
- * here, or we get "phantom" errors occuring at module-load
+ * here, or we get "phantom" errors occurring at module-load
* time. */
pci_write_bits32(pdev, I82443BXGX_EAP,
(I82443BXGX_EAP_OFFSET_SBE |
@@ -321,7 +327,7 @@ static int i82443bxgx_edacmc_probe1(struct pci_dev *pdev, int dev_idx)
mci->ctl_page_to_phys = NULL;
if (edac_mc_add_mc(mci)) {
- debugf3("%s(): failed edac_mc_add_mc()\n", __func__);
+ edac_dbg(3, "failed edac_mc_add_mc()\n");
goto fail;
}
@@ -336,7 +342,7 @@ static int i82443bxgx_edacmc_probe1(struct pci_dev *pdev, int dev_idx)
__func__);
}
- debugf3("MC: %s: %s(): success\n", __FILE__, __func__);
+ edac_dbg(3, "MC: success\n");
return 0;
fail:
@@ -347,12 +353,12 @@ fail:
EXPORT_SYMBOL_GPL(i82443bxgx_edacmc_probe1);
/* returns count (>= 0), or negative on error */
-static int __devinit i82443bxgx_edacmc_init_one(struct pci_dev *pdev,
- const struct pci_device_id *ent)
+static int i82443bxgx_edacmc_init_one(struct pci_dev *pdev,
+ const struct pci_device_id *ent)
{
int rc;
- debugf0("MC: %s: %s()\n", __FILE__, __func__);
+ edac_dbg(0, "MC:\n");
/* don't need to call pci_enable_device() */
rc = i82443bxgx_edacmc_probe1(pdev, ent->driver_data);
@@ -363,11 +369,11 @@ static int __devinit i82443bxgx_edacmc_init_one(struct pci_dev *pdev,
return rc;
}
-static void __devexit i82443bxgx_edacmc_remove_one(struct pci_dev *pdev)
+static void i82443bxgx_edacmc_remove_one(struct pci_dev *pdev)
{
struct mem_ctl_info *mci;
- debugf0("%s: %s()\n", __FILE__, __func__);
+ edac_dbg(0, "\n");
if (i82443bxgx_pci)
edac_pci_release_generic_ctl(i82443bxgx_pci);
@@ -380,7 +386,7 @@ static void __devexit i82443bxgx_edacmc_remove_one(struct pci_dev *pdev)
EXPORT_SYMBOL_GPL(i82443bxgx_edacmc_remove_one);
-static const struct pci_device_id i82443bxgx_pci_tbl[] __devinitdata = {
+static const struct pci_device_id i82443bxgx_pci_tbl[] = {
{PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82443BX_0)},
{PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82443BX_2)},
{PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82443GX_0)},
@@ -393,7 +399,7 @@ MODULE_DEVICE_TABLE(pci, i82443bxgx_pci_tbl);
static struct pci_driver i82443bxgx_edacmc_driver = {
.name = EDAC_MOD_STR,
.probe = i82443bxgx_edacmc_init_one,
- .remove = __devexit_p(i82443bxgx_edacmc_remove_one),
+ .remove = i82443bxgx_edacmc_remove_one,
.id_table = i82443bxgx_pci_tbl,
};
@@ -419,7 +425,7 @@ static int __init i82443bxgx_edacmc_init(void)
id = &i82443bxgx_pci_tbl[i];
}
if (!mci_pdev) {
- debugf0("i82443bxgx pci_get_device fail\n");
+ edac_dbg(0, "i82443bxgx pci_get_device fail\n");
pci_rc = -ENODEV;
goto fail1;
}
@@ -427,7 +433,7 @@ static int __init i82443bxgx_edacmc_init(void)
pci_rc = i82443bxgx_edacmc_init_one(mci_pdev, i82443bxgx_pci_tbl);
if (pci_rc < 0) {
- debugf0("i82443bxgx init fail\n");
+ edac_dbg(0, "i82443bxgx init fail\n");
pci_rc = -ENODEV;
goto fail1;
}
diff --git a/drivers/edac/i82860_edac.c b/drivers/edac/i82860_edac.c
index b8a95cf5071..3382f6344e4 100644
--- a/drivers/edac/i82860_edac.c
+++ b/drivers/edac/i82860_edac.c
@@ -16,7 +16,7 @@
#include <linux/edac.h>
#include "edac_core.h"
-#define I82860_REVISION " Ver: 2.0.2 " __DATE__
+#define I82860_REVISION " Ver: 2.0.2"
#define EDAC_MOD_STR "i82860_edac"
#define i82860_printk(level, fmt, arg...) \
@@ -67,7 +67,7 @@ static void i82860_get_error_info(struct mem_ctl_info *mci,
{
struct pci_dev *pdev;
- pdev = to_pci_dev(mci->dev);
+ pdev = to_pci_dev(mci->pdev);
/*
* This is a mess because there is no atomic way to read all the
@@ -99,6 +99,7 @@ static int i82860_process_error_info(struct mem_ctl_info *mci,
struct i82860_error_info *info,
int handle_errors)
{
+ struct dimm_info *dimm;
int row;
if (!(info->errsts2 & 0x0003))
@@ -108,18 +109,25 @@ static int i82860_process_error_info(struct mem_ctl_info *mci,
return 1;
if ((info->errsts ^ info->errsts2) & 0x0003) {
- edac_mc_handle_ce_no_info(mci, "UE overwrote CE");
+ edac_mc_handle_error(HW_EVENT_ERR_UNCORRECTED, mci, 1, 0, 0, 0,
+ -1, -1, -1, "UE overwrote CE", "");
info->errsts = info->errsts2;
}
info->eap >>= PAGE_SHIFT;
row = edac_mc_find_csrow_by_page(mci, info->eap);
+ dimm = mci->csrows[row]->channels[0]->dimm;
if (info->errsts & 0x0002)
- edac_mc_handle_ue(mci, info->eap, 0, row, "i82860 UE");
+ edac_mc_handle_error(HW_EVENT_ERR_UNCORRECTED, mci, 1,
+ info->eap, 0, 0,
+ dimm->location[0], dimm->location[1], -1,
+ "i82860 UE", "");
else
- edac_mc_handle_ce(mci, info->eap, 0, info->derrsyn, row, 0,
- "i82860 UE");
+ edac_mc_handle_error(HW_EVENT_ERR_UNCORRECTED, mci, 1,
+ info->eap, 0, info->derrsyn,
+ dimm->location[0], dimm->location[1], -1,
+ "i82860 CE", "");
return 1;
}
@@ -128,7 +136,7 @@ static void i82860_check(struct mem_ctl_info *mci)
{
struct i82860_error_info info;
- debugf1("MC%d: %s()\n", mci->mc_idx, __func__);
+ edac_dbg(1, "MC%d\n", mci->mc_idx);
i82860_get_error_info(mci, &info);
i82860_process_error_info(mci, &info, 1);
}
@@ -140,6 +148,7 @@ static void i82860_init_csrows(struct mem_ctl_info *mci, struct pci_dev *pdev)
u16 value;
u32 cumul_size;
struct csrow_info *csrow;
+ struct dimm_info *dimm;
int index;
pci_read_config_word(pdev, I82860_MCHCFG, &mchcfg_ddim);
@@ -152,47 +161,56 @@ static void i82860_init_csrows(struct mem_ctl_info *mci, struct pci_dev *pdev)
* in all eight rows.
*/
for (index = 0; index < mci->nr_csrows; index++) {
- csrow = &mci->csrows[index];
+ csrow = mci->csrows[index];
+ dimm = csrow->channels[0]->dimm;
+
pci_read_config_word(pdev, I82860_GBA + index * 2, &value);
cumul_size = (value & I82860_GBA_MASK) <<
(I82860_GBA_SHIFT - PAGE_SHIFT);
- debugf3("%s(): (%d) cumul_size 0x%x\n", __func__, index,
- cumul_size);
+ edac_dbg(3, "(%d) cumul_size 0x%x\n", index, cumul_size);
if (cumul_size == last_cumul_size)
continue; /* not populated */
csrow->first_page = last_cumul_size;
csrow->last_page = cumul_size - 1;
- csrow->nr_pages = cumul_size - last_cumul_size;
+ dimm->nr_pages = cumul_size - last_cumul_size;
last_cumul_size = cumul_size;
- csrow->grain = 1 << 12; /* I82860_EAP has 4KiB reolution */
- csrow->mtype = MEM_RMBS;
- csrow->dtype = DEV_UNKNOWN;
- csrow->edac_mode = mchcfg_ddim ? EDAC_SECDED : EDAC_NONE;
+ dimm->grain = 1 << 12; /* I82860_EAP has 4KiB reolution */
+ dimm->mtype = MEM_RMBS;
+ dimm->dtype = DEV_UNKNOWN;
+ dimm->edac_mode = mchcfg_ddim ? EDAC_SECDED : EDAC_NONE;
}
}
static int i82860_probe1(struct pci_dev *pdev, int dev_idx)
{
struct mem_ctl_info *mci;
+ struct edac_mc_layer layers[2];
struct i82860_error_info discard;
- /* RDRAM has channels but these don't map onto the abstractions that
- edac uses.
- The device groups from the GRA registers seem to map reasonably
- well onto the notion of a chip select row.
- There are 16 GRA registers and since the name is associated with
- the channel and the GRA registers map to physical devices so we are
- going to make 1 channel for group.
+ /*
+ * RDRAM has channels but these don't map onto the csrow abstraction.
+ * According with the datasheet, there are 2 Rambus channels, supporting
+ * up to 16 direct RDRAM devices.
+ * The device groups from the GRA registers seem to map reasonably
+ * well onto the notion of a chip select row.
+ * There are 16 GRA registers and since the name is associated with
+ * the channel and the GRA registers map to physical devices so we are
+ * going to make 1 channel for group.
*/
- mci = edac_mc_alloc(0, 16, 1, 0);
-
+ layers[0].type = EDAC_MC_LAYER_CHANNEL;
+ layers[0].size = 2;
+ layers[0].is_virt_csrow = true;
+ layers[1].type = EDAC_MC_LAYER_SLOT;
+ layers[1].size = 8;
+ layers[1].is_virt_csrow = true;
+ mci = edac_mc_alloc(0, ARRAY_SIZE(layers), layers, 0);
if (!mci)
return -ENOMEM;
- debugf3("%s(): init mci\n", __func__);
- mci->dev = &pdev->dev;
+ edac_dbg(3, "init mci\n");
+ mci->pdev = &pdev->dev;
mci->mtype_cap = MEM_FLAG_DDR;
mci->edac_ctl_cap = EDAC_FLAG_NONE | EDAC_FLAG_SECDED;
/* I"m not sure about this but I think that all RDRAM is SECDED */
@@ -210,7 +228,7 @@ static int i82860_probe1(struct pci_dev *pdev, int dev_idx)
* type of memory controller. The ID is therefore hardcoded to 0.
*/
if (edac_mc_add_mc(mci)) {
- debugf3("%s(): failed edac_mc_add_mc()\n", __func__);
+ edac_dbg(3, "failed edac_mc_add_mc()\n");
goto fail;
}
@@ -226,7 +244,7 @@ static int i82860_probe1(struct pci_dev *pdev, int dev_idx)
}
/* get this far and it's successful */
- debugf3("%s(): success\n", __func__);
+ edac_dbg(3, "success\n");
return 0;
@@ -236,12 +254,12 @@ fail:
}
/* returns count (>= 0), or negative on error */
-static int __devinit i82860_init_one(struct pci_dev *pdev,
- const struct pci_device_id *ent)
+static int i82860_init_one(struct pci_dev *pdev,
+ const struct pci_device_id *ent)
{
int rc;
- debugf0("%s()\n", __func__);
+ edac_dbg(0, "\n");
i82860_printk(KERN_INFO, "i82860 init one\n");
if (pci_enable_device(pdev) < 0)
@@ -255,11 +273,11 @@ static int __devinit i82860_init_one(struct pci_dev *pdev,
return rc;
}
-static void __devexit i82860_remove_one(struct pci_dev *pdev)
+static void i82860_remove_one(struct pci_dev *pdev)
{
struct mem_ctl_info *mci;
- debugf0("%s()\n", __func__);
+ edac_dbg(0, "\n");
if (i82860_pci)
edac_pci_release_generic_ctl(i82860_pci);
@@ -270,7 +288,7 @@ static void __devexit i82860_remove_one(struct pci_dev *pdev)
edac_mc_free(mci);
}
-static const struct pci_device_id i82860_pci_tbl[] __devinitdata = {
+static const struct pci_device_id i82860_pci_tbl[] = {
{
PCI_VEND_DEV(INTEL, 82860_0), PCI_ANY_ID, PCI_ANY_ID, 0, 0,
I82860},
@@ -284,7 +302,7 @@ MODULE_DEVICE_TABLE(pci, i82860_pci_tbl);
static struct pci_driver i82860_driver = {
.name = EDAC_MOD_STR,
.probe = i82860_init_one,
- .remove = __devexit_p(i82860_remove_one),
+ .remove = i82860_remove_one,
.id_table = i82860_pci_tbl,
};
@@ -292,7 +310,7 @@ static int __init i82860_init(void)
{
int pci_rc;
- debugf3("%s()\n", __func__);
+ edac_dbg(3, "\n");
/* Ensure that the OPSTATE is set correctly for POLL or NMI */
opstate_init();
@@ -305,7 +323,7 @@ static int __init i82860_init(void)
PCI_DEVICE_ID_INTEL_82860_0, NULL);
if (mci_pdev == NULL) {
- debugf0("860 pci_get_device fail\n");
+ edac_dbg(0, "860 pci_get_device fail\n");
pci_rc = -ENODEV;
goto fail1;
}
@@ -313,7 +331,7 @@ static int __init i82860_init(void)
pci_rc = i82860_init_one(mci_pdev, i82860_pci_tbl);
if (pci_rc < 0) {
- debugf0("860 init fail\n");
+ edac_dbg(0, "860 init fail\n");
pci_rc = -ENODEV;
goto fail1;
}
@@ -333,7 +351,7 @@ fail0:
static void __exit i82860_exit(void)
{
- debugf3("%s()\n", __func__);
+ edac_dbg(3, "\n");
pci_unregister_driver(&i82860_driver);
diff --git a/drivers/edac/i82875p_edac.c b/drivers/edac/i82875p_edac.c
index b2fd1e89914..64b68320249 100644
--- a/drivers/edac/i82875p_edac.c
+++ b/drivers/edac/i82875p_edac.c
@@ -20,7 +20,7 @@
#include <linux/edac.h>
#include "edac_core.h"
-#define I82875P_REVISION " Ver: 2.0.2 " __DATE__
+#define I82875P_REVISION " Ver: 2.0.2"
#define EDAC_MOD_STR "i82875p_edac"
#define i82875p_printk(level, fmt, arg...) \
@@ -38,7 +38,8 @@
#endif /* PCI_DEVICE_ID_INTEL_82875_6 */
/* four csrows in dual channel, eight in single channel */
-#define I82875P_NR_CSROWS(nr_chans) (8/(nr_chans))
+#define I82875P_NR_DIMMS 8
+#define I82875P_NR_CSROWS(nr_chans) (I82875P_NR_DIMMS / (nr_chans))
/* Intel 82875p register addresses - device 0 function 0 - DRAM Controller */
#define I82875P_EAP 0x58 /* Error Address Pointer (32b)
@@ -188,7 +189,7 @@ static void i82875p_get_error_info(struct mem_ctl_info *mci,
{
struct pci_dev *pdev;
- pdev = to_pci_dev(mci->dev);
+ pdev = to_pci_dev(mci->pdev);
/*
* This is a mess because there is no atomic way to read all the
@@ -226,7 +227,7 @@ static int i82875p_process_error_info(struct mem_ctl_info *mci,
{
int row, multi_chan;
- multi_chan = mci->csrows[0].nr_channels - 1;
+ multi_chan = mci->csrows[0]->nr_channels - 1;
if (!(info->errsts & 0x0081))
return 0;
@@ -235,7 +236,9 @@ static int i82875p_process_error_info(struct mem_ctl_info *mci,
return 1;
if ((info->errsts ^ info->errsts2) & 0x0081) {
- edac_mc_handle_ce_no_info(mci, "UE overwrote CE");
+ edac_mc_handle_error(HW_EVENT_ERR_UNCORRECTED, mci, 1, 0, 0, 0,
+ -1, -1, -1,
+ "UE overwrote CE", "");
info->errsts = info->errsts2;
}
@@ -243,11 +246,15 @@ static int i82875p_process_error_info(struct mem_ctl_info *mci,
row = edac_mc_find_csrow_by_page(mci, info->eap);
if (info->errsts & 0x0080)
- edac_mc_handle_ue(mci, info->eap, 0, row, "i82875p UE");
+ edac_mc_handle_error(HW_EVENT_ERR_UNCORRECTED, mci, 1,
+ info->eap, 0, 0,
+ row, -1, -1,
+ "i82875p UE", "");
else
- edac_mc_handle_ce(mci, info->eap, 0, info->derrsyn, row,
- multi_chan ? (info->des & 0x1) : 0,
- "i82875p CE");
+ edac_mc_handle_error(HW_EVENT_ERR_CORRECTED, mci, 1,
+ info->eap, 0, info->derrsyn,
+ row, multi_chan ? (info->des & 0x1) : 0,
+ -1, "i82875p CE", "");
return 1;
}
@@ -256,7 +263,7 @@ static void i82875p_check(struct mem_ctl_info *mci)
{
struct i82875p_error_info info;
- debugf1("MC%d: %s()\n", mci->mc_idx, __func__);
+ edac_dbg(1, "MC%d\n", mci->mc_idx);
i82875p_get_error_info(mci, &info);
i82875p_process_error_info(mci, &info, 1);
}
@@ -268,7 +275,6 @@ static int i82875p_setup_overfl_dev(struct pci_dev *pdev,
{
struct pci_dev *dev;
void __iomem *window;
- int err;
*ovrfl_pdev = NULL;
*ovrfl_window = NULL;
@@ -286,13 +292,8 @@ static int i82875p_setup_overfl_dev(struct pci_dev *pdev,
if (dev == NULL)
return 1;
- err = pci_bus_add_device(dev);
- if (err) {
- i82875p_printk(KERN_ERR,
- "%s(): pci_bus_add_device() Failed\n",
- __func__);
- }
pci_bus_assign_resources(dev->bus);
+ pci_bus_add_device(dev);
}
*ovrfl_pdev = dev;
@@ -342,11 +343,13 @@ static void i82875p_init_csrows(struct mem_ctl_info *mci,
void __iomem * ovrfl_window, u32 drc)
{
struct csrow_info *csrow;
+ struct dimm_info *dimm;
+ unsigned nr_chans = dual_channel_active(drc) + 1;
unsigned long last_cumul_size;
u8 value;
u32 drc_ddim; /* DRAM Data Integrity Mode 0=none,2=edac */
- u32 cumul_size;
- int index;
+ u32 cumul_size, nr_pages;
+ int index, j;
drc_ddim = (drc >> 18) & 0x1;
last_cumul_size = 0;
@@ -358,23 +361,28 @@ static void i82875p_init_csrows(struct mem_ctl_info *mci,
*/
for (index = 0; index < mci->nr_csrows; index++) {
- csrow = &mci->csrows[index];
+ csrow = mci->csrows[index];
value = readb(ovrfl_window + I82875P_DRB + index);
cumul_size = value << (I82875P_DRB_SHIFT - PAGE_SHIFT);
- debugf3("%s(): (%d) cumul_size 0x%x\n", __func__, index,
- cumul_size);
+ edac_dbg(3, "(%d) cumul_size 0x%x\n", index, cumul_size);
if (cumul_size == last_cumul_size)
continue; /* not populated */
csrow->first_page = last_cumul_size;
csrow->last_page = cumul_size - 1;
- csrow->nr_pages = cumul_size - last_cumul_size;
+ nr_pages = cumul_size - last_cumul_size;
last_cumul_size = cumul_size;
- csrow->grain = 1 << 12; /* I82875P_EAP has 4KiB reolution */
- csrow->mtype = MEM_DDR;
- csrow->dtype = DEV_UNKNOWN;
- csrow->edac_mode = drc_ddim ? EDAC_SECDED : EDAC_NONE;
+
+ for (j = 0; j < nr_chans; j++) {
+ dimm = csrow->channels[j]->dimm;
+
+ dimm->nr_pages = nr_pages / nr_chans;
+ dimm->grain = 1 << 12; /* I82875P_EAP has 4KiB reolution */
+ dimm->mtype = MEM_DDR;
+ dimm->dtype = DEV_UNKNOWN;
+ dimm->edac_mode = drc_ddim ? EDAC_SECDED : EDAC_NONE;
+ }
}
}
@@ -382,6 +390,7 @@ static int i82875p_probe1(struct pci_dev *pdev, int dev_idx)
{
int rc = -ENODEV;
struct mem_ctl_info *mci;
+ struct edac_mc_layer layers[2];
struct i82875p_pvt *pvt;
struct pci_dev *ovrfl_pdev;
void __iomem *ovrfl_window;
@@ -389,27 +398,27 @@ static int i82875p_probe1(struct pci_dev *pdev, int dev_idx)
u32 nr_chans;
struct i82875p_error_info discard;
- debugf0("%s()\n", __func__);
-
- ovrfl_pdev = pci_get_device(PCI_VEND_DEV(INTEL, 82875_6), NULL);
+ edac_dbg(0, "\n");
if (i82875p_setup_overfl_dev(pdev, &ovrfl_pdev, &ovrfl_window))
return -ENODEV;
drc = readl(ovrfl_window + I82875P_DRC);
nr_chans = dual_channel_active(drc) + 1;
- mci = edac_mc_alloc(sizeof(*pvt), I82875P_NR_CSROWS(nr_chans),
- nr_chans, 0);
+ layers[0].type = EDAC_MC_LAYER_CHIP_SELECT;
+ layers[0].size = I82875P_NR_CSROWS(nr_chans);
+ layers[0].is_virt_csrow = true;
+ layers[1].type = EDAC_MC_LAYER_CHANNEL;
+ layers[1].size = nr_chans;
+ layers[1].is_virt_csrow = false;
+ mci = edac_mc_alloc(0, ARRAY_SIZE(layers), layers, sizeof(*pvt));
if (!mci) {
rc = -ENOMEM;
goto fail0;
}
- /* Keeps mci available after edac_mc_del_mc() till edac_mc_free() */
- kobject_get(&mci->edac_mci_kobj);
-
- debugf3("%s(): init mci\n", __func__);
- mci->dev = &pdev->dev;
+ edac_dbg(3, "init mci\n");
+ mci->pdev = &pdev->dev;
mci->mtype_cap = MEM_FLAG_DDR;
mci->edac_ctl_cap = EDAC_FLAG_NONE | EDAC_FLAG_SECDED;
mci->edac_cap = EDAC_FLAG_UNKNOWN;
@@ -419,7 +428,7 @@ static int i82875p_probe1(struct pci_dev *pdev, int dev_idx)
mci->dev_name = pci_name(pdev);
mci->edac_check = i82875p_check;
mci->ctl_page_to_phys = NULL;
- debugf3("%s(): init pvt\n", __func__);
+ edac_dbg(3, "init pvt\n");
pvt = (struct i82875p_pvt *)mci->pvt_info;
pvt->ovrfl_pdev = ovrfl_pdev;
pvt->ovrfl_window = ovrfl_window;
@@ -430,7 +439,7 @@ static int i82875p_probe1(struct pci_dev *pdev, int dev_idx)
* type of memory controller. The ID is therefore hardcoded to 0.
*/
if (edac_mc_add_mc(mci)) {
- debugf3("%s(): failed edac_mc_add_mc()\n", __func__);
+ edac_dbg(3, "failed edac_mc_add_mc()\n");
goto fail1;
}
@@ -446,11 +455,10 @@ static int i82875p_probe1(struct pci_dev *pdev, int dev_idx)
}
/* get this far and it's successful */
- debugf3("%s(): success\n", __func__);
+ edac_dbg(3, "success\n");
return 0;
fail1:
- kobject_put(&mci->edac_mci_kobj);
edac_mc_free(mci);
fail0:
@@ -463,12 +471,12 @@ fail0:
}
/* returns count (>= 0), or negative on error */
-static int __devinit i82875p_init_one(struct pci_dev *pdev,
- const struct pci_device_id *ent)
+static int i82875p_init_one(struct pci_dev *pdev,
+ const struct pci_device_id *ent)
{
int rc;
- debugf0("%s()\n", __func__);
+ edac_dbg(0, "\n");
i82875p_printk(KERN_INFO, "i82875p init one\n");
if (pci_enable_device(pdev) < 0)
@@ -482,12 +490,12 @@ static int __devinit i82875p_init_one(struct pci_dev *pdev,
return rc;
}
-static void __devexit i82875p_remove_one(struct pci_dev *pdev)
+static void i82875p_remove_one(struct pci_dev *pdev)
{
struct mem_ctl_info *mci;
struct i82875p_pvt *pvt = NULL;
- debugf0("%s()\n", __func__);
+ edac_dbg(0, "\n");
if (i82875p_pci)
edac_pci_release_generic_ctl(i82875p_pci);
@@ -511,7 +519,7 @@ static void __devexit i82875p_remove_one(struct pci_dev *pdev)
edac_mc_free(mci);
}
-static const struct pci_device_id i82875p_pci_tbl[] __devinitdata = {
+static const struct pci_device_id i82875p_pci_tbl[] = {
{
PCI_VEND_DEV(INTEL, 82875_0), PCI_ANY_ID, PCI_ANY_ID, 0, 0,
I82875P},
@@ -525,7 +533,7 @@ MODULE_DEVICE_TABLE(pci, i82875p_pci_tbl);
static struct pci_driver i82875p_driver = {
.name = EDAC_MOD_STR,
.probe = i82875p_init_one,
- .remove = __devexit_p(i82875p_remove_one),
+ .remove = i82875p_remove_one,
.id_table = i82875p_pci_tbl,
};
@@ -533,7 +541,7 @@ static int __init i82875p_init(void)
{
int pci_rc;
- debugf3("%s()\n", __func__);
+ edac_dbg(3, "\n");
/* Ensure that the OPSTATE is set correctly for POLL or NMI */
opstate_init();
@@ -548,7 +556,7 @@ static int __init i82875p_init(void)
PCI_DEVICE_ID_INTEL_82875_0, NULL);
if (!mci_pdev) {
- debugf0("875p pci_get_device fail\n");
+ edac_dbg(0, "875p pci_get_device fail\n");
pci_rc = -ENODEV;
goto fail1;
}
@@ -556,7 +564,7 @@ static int __init i82875p_init(void)
pci_rc = i82875p_init_one(mci_pdev, i82875p_pci_tbl);
if (pci_rc < 0) {
- debugf0("875p init fail\n");
+ edac_dbg(0, "875p init fail\n");
pci_rc = -ENODEV;
goto fail1;
}
@@ -576,7 +584,7 @@ fail0:
static void __exit i82875p_exit(void)
{
- debugf3("%s()\n", __func__);
+ edac_dbg(3, "\n");
i82875p_remove_one(mci_pdev);
pci_dev_put(mci_pdev);
diff --git a/drivers/edac/i82975x_edac.c b/drivers/edac/i82975x_edac.c
index 3218819b728..10b10521f62 100644
--- a/drivers/edac/i82975x_edac.c
+++ b/drivers/edac/i82975x_edac.c
@@ -16,7 +16,7 @@
#include <linux/edac.h>
#include "edac_core.h"
-#define I82975X_REVISION " Ver: 1.0.0 " __DATE__
+#define I82975X_REVISION " Ver: 1.0.0"
#define EDAC_MOD_STR "i82975x_edac"
#define i82975x_printk(level, fmt, arg...) \
@@ -29,7 +29,8 @@
#define PCI_DEVICE_ID_INTEL_82975_0 0x277c
#endif /* PCI_DEVICE_ID_INTEL_82975_0 */
-#define I82975X_NR_CSROWS(nr_chans) (8/(nr_chans))
+#define I82975X_NR_DIMMS 8
+#define I82975X_NR_CSROWS(nr_chans) (I82975X_NR_DIMMS / (nr_chans))
/* Intel 82975X register addresses - device 0 function 0 - DRAM Controller */
#define I82975X_EAP 0x58 /* Dram Error Address Pointer (32b)
@@ -160,8 +161,8 @@ NOTE: Only ONE of the three must be enabled
* 3:2 Rank 1 architecture
* 1:0 Rank 0 architecture
*
- * 00 => x16 devices; i.e 4 banks
- * 01 => x8 devices; i.e 8 banks
+ * 00 => 4 banks
+ * 01 => 8 banks
*/
#define I82975X_C0BNKARC 0x10e
#define I82975X_C1BNKARC 0x18e
@@ -240,7 +241,7 @@ static void i82975x_get_error_info(struct mem_ctl_info *mci,
{
struct pci_dev *pdev;
- pdev = to_pci_dev(mci->dev);
+ pdev = to_pci_dev(mci->pdev);
/*
* This is a mess because there is no atomic way to read all the
@@ -277,9 +278,8 @@ static void i82975x_get_error_info(struct mem_ctl_info *mci,
static int i82975x_process_error_info(struct mem_ctl_info *mci,
struct i82975x_error_info *info, int handle_errors)
{
- int row, multi_chan, chan;
-
- multi_chan = mci->csrows[0].nr_channels - 1;
+ int row, chan;
+ unsigned long offst, page;
if (!(info->errsts2 & 0x0003))
return 0;
@@ -288,23 +288,41 @@ static int i82975x_process_error_info(struct mem_ctl_info *mci,
return 1;
if ((info->errsts ^ info->errsts2) & 0x0003) {
- edac_mc_handle_ce_no_info(mci, "UE overwrote CE");
+ edac_mc_handle_error(HW_EVENT_ERR_UNCORRECTED, mci, 1, 0, 0, 0,
+ -1, -1, -1, "UE overwrote CE", "");
info->errsts = info->errsts2;
}
- chan = info->eap & 1;
- info->eap >>= 1;
- if (info->xeap )
- info->eap |= 0x80000000;
- info->eap >>= PAGE_SHIFT;
- row = edac_mc_find_csrow_by_page(mci, info->eap);
+ page = (unsigned long) info->eap;
+ page >>= 1;
+ if (info->xeap & 1)
+ page |= 0x80000000;
+ page >>= (PAGE_SHIFT - 1);
+ row = edac_mc_find_csrow_by_page(mci, page);
+
+ if (row == -1) {
+ i82975x_mc_printk(mci, KERN_ERR, "error processing EAP:\n"
+ "\tXEAP=%u\n"
+ "\t EAP=0x%08x\n"
+ "\tPAGE=0x%08x\n",
+ (info->xeap & 1) ? 1 : 0, info->eap, (unsigned int) page);
+ return 0;
+ }
+ chan = (mci->csrows[row]->nr_channels == 1) ? 0 : info->eap & 1;
+ offst = info->eap
+ & ((1 << PAGE_SHIFT) -
+ (1 << mci->csrows[row]->channels[chan]->dimm->grain));
if (info->errsts & 0x0002)
- edac_mc_handle_ue(mci, info->eap, 0, row, "i82975x UE");
+ edac_mc_handle_error(HW_EVENT_ERR_UNCORRECTED, mci, 1,
+ page, offst, 0,
+ row, -1, -1,
+ "i82975x UE", "");
else
- edac_mc_handle_ce(mci, info->eap, 0, info->derrsyn, row,
- multi_chan ? chan : 0,
- "i82975x CE");
+ edac_mc_handle_error(HW_EVENT_ERR_CORRECTED, mci, 1,
+ page, offst, info->derrsyn,
+ row, chan ? chan : 0, -1,
+ "i82975x CE", "");
return 1;
}
@@ -313,7 +331,7 @@ static void i82975x_check(struct mem_ctl_info *mci)
{
struct i82975x_error_info info;
- debugf1("MC%d: %s()\n", mci->mc_idx, __func__);
+ edac_dbg(1, "MC%d\n", mci->mc_idx);
i82975x_get_error_info(mci, &info);
i82975x_process_error_info(mci, &info, 1);
}
@@ -344,11 +362,7 @@ static int dual_channel_active(void __iomem *mch_window)
static enum dev_type i82975x_dram_type(void __iomem *mch_window, int rank)
{
/*
- * ASUS P5W DH either does not program this register or programs
- * it wrong!
- * ECC is possible on i92975x ONLY with DEV_X8 which should mean 'val'
- * for each rank should be 01b - the LSB of the word should be 0x55;
- * but it reads 0!
+ * ECC is possible on i92975x ONLY with DEV_X8
*/
return DEV_X8;
}
@@ -359,8 +373,10 @@ static void i82975x_init_csrows(struct mem_ctl_info *mci,
struct csrow_info *csrow;
unsigned long last_cumul_size;
u8 value;
- u32 cumul_size;
- int index;
+ u32 cumul_size, nr_pages;
+ int index, chan;
+ struct dimm_info *dimm;
+ enum dev_type dtype;
last_cumul_size = 0;
@@ -369,34 +385,53 @@ static void i82975x_init_csrows(struct mem_ctl_info *mci,
* The dram row boundary (DRB) reg values are boundary address
* for each DRAM row with a granularity of 32 or 64MB (single/dual
* channel operation). DRB regs are cumulative; therefore DRB7 will
- * contain the total memory contained in all eight rows.
- *
- * FIXME:
- * EDAC currently works for Dual-channel Interleaved configuration.
- * Other configurations, which the chip supports, need fixing/testing.
+ * contain the total memory contained in all rows.
*
*/
for (index = 0; index < mci->nr_csrows; index++) {
- csrow = &mci->csrows[index];
+ csrow = mci->csrows[index];
value = readb(mch_window + I82975X_DRB + index +
((index >= 4) ? 0x80 : 0));
cumul_size = value;
cumul_size <<= (I82975X_DRB_SHIFT - PAGE_SHIFT);
- debugf3("%s(): (%d) cumul_size 0x%x\n", __func__, index,
- cumul_size);
- if (cumul_size == last_cumul_size)
- continue; /* not populated */
+ /*
+ * Adjust cumul_size w.r.t number of channels
+ *
+ */
+ if (csrow->nr_channels > 1)
+ cumul_size <<= 1;
+ edac_dbg(3, "(%d) cumul_size 0x%x\n", index, cumul_size);
+
+ nr_pages = cumul_size - last_cumul_size;
+ if (!nr_pages)
+ continue;
+
+ /*
+ * Initialise dram labels
+ * index values:
+ * [0-7] for single-channel; i.e. csrow->nr_channels = 1
+ * [0-3] for dual-channel; i.e. csrow->nr_channels = 2
+ */
+ dtype = i82975x_dram_type(mch_window, index);
+ for (chan = 0; chan < csrow->nr_channels; chan++) {
+ dimm = mci->csrows[index]->channels[chan]->dimm;
+
+ dimm->nr_pages = nr_pages / csrow->nr_channels;
+
+ snprintf(csrow->channels[chan]->dimm->label, EDAC_MC_LABEL_LEN, "DIMM %c%d",
+ (chan == 0) ? 'A' : 'B',
+ index);
+ dimm->grain = 1 << 7; /* 128Byte cache-line resolution */
+ dimm->dtype = i82975x_dram_type(mch_window, index);
+ dimm->mtype = MEM_DDR2; /* I82975x supports only DDR2 */
+ dimm->edac_mode = EDAC_SECDED; /* only supported */
+ }
csrow->first_page = last_cumul_size;
csrow->last_page = cumul_size - 1;
- csrow->nr_pages = cumul_size - last_cumul_size;
last_cumul_size = cumul_size;
- csrow->grain = 1 << 7; /* I82975X_EAP has 128B resolution */
- csrow->mtype = MEM_DDR; /* i82975x supports only DDR2 */
- csrow->dtype = i82975x_dram_type(mch_window, index);
- csrow->edac_mode = EDAC_SECDED; /* only supported */
}
}
@@ -438,6 +473,7 @@ static int i82975x_probe1(struct pci_dev *pdev, int dev_idx)
{
int rc = -ENODEV;
struct mem_ctl_info *mci;
+ struct edac_mc_layer layers[2];
struct i82975x_pvt *pvt;
void __iomem *mch_window;
u32 mchbar;
@@ -449,11 +485,11 @@ static int i82975x_probe1(struct pci_dev *pdev, int dev_idx)
u8 c1drb[4];
#endif
- debugf0("%s()\n", __func__);
+ edac_dbg(0, "\n");
pci_read_config_dword(pdev, I82975X_MCHBAR, &mchbar);
if (!(mchbar & 1)) {
- debugf3("%s(): failed, MCHBAR disabled!\n", __func__);
+ edac_dbg(3, "failed, MCHBAR disabled!\n");
goto fail0;
}
mchbar &= 0xffffc000; /* bits 31:14 used for 16K window */
@@ -506,37 +542,44 @@ static int i82975x_probe1(struct pci_dev *pdev, int dev_idx)
chans = dual_channel_active(mch_window) + 1;
/* assuming only one controller, index thus is 0 */
- mci = edac_mc_alloc(sizeof(*pvt), I82975X_NR_CSROWS(chans),
- chans, 0);
+ layers[0].type = EDAC_MC_LAYER_CHIP_SELECT;
+ layers[0].size = I82975X_NR_DIMMS;
+ layers[0].is_virt_csrow = true;
+ layers[1].type = EDAC_MC_LAYER_CHANNEL;
+ layers[1].size = I82975X_NR_CSROWS(chans);
+ layers[1].is_virt_csrow = false;
+ mci = edac_mc_alloc(0, ARRAY_SIZE(layers), layers, sizeof(*pvt));
if (!mci) {
rc = -ENOMEM;
goto fail1;
}
- debugf3("%s(): init mci\n", __func__);
- mci->dev = &pdev->dev;
- mci->mtype_cap = MEM_FLAG_DDR;
+ edac_dbg(3, "init mci\n");
+ mci->pdev = &pdev->dev;
+ mci->mtype_cap = MEM_FLAG_DDR2;
mci->edac_ctl_cap = EDAC_FLAG_NONE | EDAC_FLAG_SECDED;
mci->edac_cap = EDAC_FLAG_NONE | EDAC_FLAG_SECDED;
mci->mod_name = EDAC_MOD_STR;
mci->mod_ver = I82975X_REVISION;
mci->ctl_name = i82975x_devs[dev_idx].ctl_name;
+ mci->dev_name = pci_name(pdev);
mci->edac_check = i82975x_check;
mci->ctl_page_to_phys = NULL;
- debugf3("%s(): init pvt\n", __func__);
+ edac_dbg(3, "init pvt\n");
pvt = (struct i82975x_pvt *) mci->pvt_info;
pvt->mch_window = mch_window;
i82975x_init_csrows(mci, pdev, mch_window);
+ mci->scrub_mode = SCRUB_HW_SRC;
i82975x_get_error_info(mci, &discard); /* clear counters */
/* finalize this instance of memory controller with edac core */
if (edac_mc_add_mc(mci)) {
- debugf3("%s(): failed edac_mc_add_mc()\n", __func__);
+ edac_dbg(3, "failed edac_mc_add_mc()\n");
goto fail2;
}
/* get this far and it's successful */
- debugf3("%s(): success\n", __func__);
+ edac_dbg(3, "success\n");
return 0;
fail2:
@@ -549,12 +592,12 @@ fail0:
}
/* returns count (>= 0), or negative on error */
-static int __devinit i82975x_init_one(struct pci_dev *pdev,
- const struct pci_device_id *ent)
+static int i82975x_init_one(struct pci_dev *pdev,
+ const struct pci_device_id *ent)
{
int rc;
- debugf0("%s()\n", __func__);
+ edac_dbg(0, "\n");
if (pci_enable_device(pdev) < 0)
return -EIO;
@@ -567,12 +610,12 @@ static int __devinit i82975x_init_one(struct pci_dev *pdev,
return rc;
}
-static void __devexit i82975x_remove_one(struct pci_dev *pdev)
+static void i82975x_remove_one(struct pci_dev *pdev)
{
struct mem_ctl_info *mci;
struct i82975x_pvt *pvt;
- debugf0("%s()\n", __func__);
+ edac_dbg(0, "\n");
mci = edac_mc_del_mc(&pdev->dev);
if (mci == NULL)
@@ -585,7 +628,7 @@ static void __devexit i82975x_remove_one(struct pci_dev *pdev)
edac_mc_free(mci);
}
-static const struct pci_device_id i82975x_pci_tbl[] __devinitdata = {
+static const struct pci_device_id i82975x_pci_tbl[] = {
{
PCI_VEND_DEV(INTEL, 82975_0), PCI_ANY_ID, PCI_ANY_ID, 0, 0,
I82975X
@@ -600,7 +643,7 @@ MODULE_DEVICE_TABLE(pci, i82975x_pci_tbl);
static struct pci_driver i82975x_driver = {
.name = EDAC_MOD_STR,
.probe = i82975x_init_one,
- .remove = __devexit_p(i82975x_remove_one),
+ .remove = i82975x_remove_one,
.id_table = i82975x_pci_tbl,
};
@@ -608,7 +651,7 @@ static int __init i82975x_init(void)
{
int pci_rc;
- debugf3("%s()\n", __func__);
+ edac_dbg(3, "\n");
/* Ensure that the OPSTATE is set correctly for POLL or NMI */
opstate_init();
@@ -622,7 +665,7 @@ static int __init i82975x_init(void)
PCI_DEVICE_ID_INTEL_82975_0, NULL);
if (!mci_pdev) {
- debugf0("i82975x pci_get_device fail\n");
+ edac_dbg(0, "i82975x pci_get_device fail\n");
pci_rc = -ENODEV;
goto fail1;
}
@@ -630,7 +673,7 @@ static int __init i82975x_init(void)
pci_rc = i82975x_init_one(mci_pdev, i82975x_pci_tbl);
if (pci_rc < 0) {
- debugf0("i82975x init fail\n");
+ edac_dbg(0, "i82975x init fail\n");
pci_rc = -ENODEV;
goto fail1;
}
@@ -650,7 +693,7 @@ fail0:
static void __exit i82975x_exit(void)
{
- debugf3("%s()\n", __func__);
+ edac_dbg(3, "\n");
pci_unregister_driver(&i82975x_driver);
@@ -664,7 +707,7 @@ module_init(i82975x_init);
module_exit(i82975x_exit);
MODULE_LICENSE("GPL");
-MODULE_AUTHOR("Arvind R. <arvind@acarlab.com>");
+MODULE_AUTHOR("Arvind R. <arvino55@gmail.com>");
MODULE_DESCRIPTION("MC support for Intel 82975 memory hub controllers");
module_param(edac_op_state, int, 0444);
diff --git a/drivers/edac/mce_amd.c b/drivers/edac/mce_amd.c
index c0181093b49..5f43620d580 100644
--- a/drivers/edac/mce_amd.c
+++ b/drivers/edac/mce_amd.c
@@ -5,10 +5,10 @@
static struct amd_decoder_ops *fam_ops;
-static u8 nb_err_cpumask = 0xf;
+static u8 xec_mask = 0xf;
static bool report_gart_errors;
-static void (*nb_bus_decoder)(int node_id, struct mce *m, u32 nbcfg);
+static void (*nb_bus_decoder)(int node_id, struct mce *m);
void amd_report_gart_errors(bool v)
{
@@ -16,13 +16,13 @@ void amd_report_gart_errors(bool v)
}
EXPORT_SYMBOL_GPL(amd_report_gart_errors);
-void amd_register_ecc_decoder(void (*f)(int, struct mce *, u32))
+void amd_register_ecc_decoder(void (*f)(int, struct mce *))
{
nb_bus_decoder = f;
}
EXPORT_SYMBOL_GPL(amd_register_ecc_decoder);
-void amd_unregister_ecc_decoder(void (*f)(int, struct mce *, u32))
+void amd_unregister_ecc_decoder(void (*f)(int, struct mce *))
{
if (nb_bus_decoder) {
WARN_ON(nb_bus_decoder != f);
@@ -38,93 +38,150 @@ EXPORT_SYMBOL_GPL(amd_unregister_ecc_decoder);
*/
/* transaction type */
-const char *tt_msgs[] = { "INSN", "DATA", "GEN", "RESV" };
-EXPORT_SYMBOL_GPL(tt_msgs);
+static const char * const tt_msgs[] = { "INSN", "DATA", "GEN", "RESV" };
/* cache level */
-const char *ll_msgs[] = { "RESV", "L1", "L2", "L3/GEN" };
-EXPORT_SYMBOL_GPL(ll_msgs);
+static const char * const ll_msgs[] = { "RESV", "L1", "L2", "L3/GEN" };
/* memory transaction type */
-const char *rrrr_msgs[] = {
+static const char * const rrrr_msgs[] = {
"GEN", "RD", "WR", "DRD", "DWR", "IRD", "PRF", "EV", "SNP"
};
-EXPORT_SYMBOL_GPL(rrrr_msgs);
/* participating processor */
-const char *pp_msgs[] = { "SRC", "RES", "OBS", "GEN" };
+const char * const pp_msgs[] = { "SRC", "RES", "OBS", "GEN" };
EXPORT_SYMBOL_GPL(pp_msgs);
/* request timeout */
-const char *to_msgs[] = { "no timeout", "timed out" };
-EXPORT_SYMBOL_GPL(to_msgs);
+static const char * const to_msgs[] = { "no timeout", "timed out" };
/* memory or i/o */
-const char *ii_msgs[] = { "MEM", "RESV", "IO", "GEN" };
-EXPORT_SYMBOL_GPL(ii_msgs);
-
-static const char *f10h_nb_mce_desc[] = {
- "HT link data error",
- "Protocol error (link, L3, probe filter, etc.)",
- "Parity error in NB-internal arrays",
- "Link Retry due to IO link transmission error",
- "L3 ECC data cache error",
- "ECC error in L3 cache tag",
+static const char * const ii_msgs[] = { "MEM", "RESV", "IO", "GEN" };
+
+/* internal error type */
+static const char * const uu_msgs[] = { "RESV", "RESV", "HWA", "RESV" };
+
+static const char * const f15h_mc1_mce_desc[] = {
+ "UC during a demand linefill from L2",
+ "Parity error during data load from IC",
+ "Parity error for IC valid bit",
+ "Main tag parity error",
+ "Parity error in prediction queue",
+ "PFB data/address parity error",
+ "Parity error in the branch status reg",
+ "PFB promotion address error",
+ "Tag error during probe/victimization",
+ "Parity error for IC probe tag valid bit",
+ "PFB non-cacheable bit parity error",
+ "PFB valid bit parity error", /* xec = 0xd */
+ "Microcode Patch Buffer", /* xec = 010 */
+ "uop queue",
+ "insn buffer",
+ "predecode buffer",
+ "fetch address FIFO"
+};
+
+static const char * const f15h_mc2_mce_desc[] = {
+ "Fill ECC error on data fills", /* xec = 0x4 */
+ "Fill parity error on insn fills",
+ "Prefetcher request FIFO parity error",
+ "PRQ address parity error",
+ "PRQ data parity error",
+ "WCC Tag ECC error",
+ "WCC Data ECC error",
+ "WCB Data parity error",
+ "VB Data ECC or parity error",
+ "L2 Tag ECC error", /* xec = 0x10 */
+ "Hard L2 Tag ECC error",
+ "Multiple hits on L2 tag",
+ "XAB parity error",
+ "PRB address parity error"
+};
+
+static const char * const mc4_mce_desc[] = {
+ "DRAM ECC error detected on the NB",
+ "CRC error detected on HT link",
+ "Link-defined sync error packets detected on HT link",
+ "HT Master abort",
+ "HT Target abort",
+ "Invalid GART PTE entry during GART table walk",
+ "Unsupported atomic RMW received from an IO link",
+ "Watchdog timeout due to lack of progress",
+ "DRAM ECC error detected on the NB",
+ "SVM DMA Exclusion Vector error",
+ "HT data error detected on link",
+ "Protocol error (link, L3, probe filter)",
+ "NB internal arrays parity error",
+ "DRAM addr/ctl signals parity error",
+ "IO link transmission error",
+ "L3 data cache ECC error", /* xec = 0x1c */
+ "L3 cache tag error",
"L3 LRU parity bits error",
"ECC Error in the Probe Filter directory"
};
-static bool f12h_dc_mce(u16 ec)
+static const char * const mc5_mce_desc[] = {
+ "CPU Watchdog timer expire",
+ "Wakeup array dest tag",
+ "AG payload array",
+ "EX payload array",
+ "IDRF array",
+ "Retire dispatch queue",
+ "Mapper checkpoint array",
+ "Physical register file EX0 port",
+ "Physical register file EX1 port",
+ "Physical register file AG0 port",
+ "Physical register file AG1 port",
+ "Flag register file",
+ "DE error occurred",
+ "Retire status queue"
+};
+
+static bool f12h_mc0_mce(u16 ec, u8 xec)
{
bool ret = false;
if (MEM_ERROR(ec)) {
- u8 ll = ec & 0x3;
+ u8 ll = LL(ec);
ret = true;
if (ll == LL_L2)
pr_cont("during L1 linefill from L2.\n");
else if (ll == LL_L1)
- pr_cont("Data/Tag %s error.\n", RRRR_MSG(ec));
+ pr_cont("Data/Tag %s error.\n", R4_MSG(ec));
else
ret = false;
}
return ret;
}
-static bool f10h_dc_mce(u16 ec)
+static bool f10h_mc0_mce(u16 ec, u8 xec)
{
- u8 r4 = (ec >> 4) & 0xf;
- u8 ll = ec & 0x3;
-
- if (r4 == R4_GEN && ll == LL_L1) {
+ if (R4(ec) == R4_GEN && LL(ec) == LL_L1) {
pr_cont("during data scrub.\n");
return true;
}
- return f12h_dc_mce(ec);
+ return f12h_mc0_mce(ec, xec);
}
-static bool k8_dc_mce(u16 ec)
+static bool k8_mc0_mce(u16 ec, u8 xec)
{
if (BUS_ERROR(ec)) {
pr_cont("during system linefill.\n");
return true;
}
- return f10h_dc_mce(ec);
+ return f10h_mc0_mce(ec, xec);
}
-static bool f14h_dc_mce(u16 ec)
+static bool cat_mc0_mce(u16 ec, u8 xec)
{
- u8 r4 = (ec >> 4) & 0xf;
- u8 ll = ec & 0x3;
- u8 tt = (ec >> 2) & 0x3;
- u8 ii = tt;
+ u8 r4 = R4(ec);
bool ret = true;
if (MEM_ERROR(ec)) {
- if (tt != TT_DATA || ll != LL_L1)
+ if (TT(ec) != TT_DATA || LL(ec) != LL_L1)
return false;
switch (r4) {
@@ -144,7 +201,7 @@ static bool f14h_dc_mce(u16 ec)
}
} else if (BUS_ERROR(ec)) {
- if ((ii != II_MEM && ii != II_IO) || ll != LL_LG)
+ if ((II(ec) != II_MEM && II(ec) != II_IO) || LL(ec) != LL_LG)
return false;
pr_cont("System read data error on a ");
@@ -169,39 +226,77 @@ static bool f14h_dc_mce(u16 ec)
return ret;
}
-static void amd_decode_dc_mce(struct mce *m)
+static bool f15h_mc0_mce(u16 ec, u8 xec)
{
- u16 ec = m->status & 0xffff;
- u8 xec = (m->status >> 16) & 0xf;
+ bool ret = true;
- pr_emerg(HW_ERR "Data Cache Error: ");
+ if (MEM_ERROR(ec)) {
- /* TLB error signatures are the same across families */
- if (TLB_ERROR(ec)) {
- u8 tt = (ec >> 2) & 0x3;
+ switch (xec) {
+ case 0x0:
+ pr_cont("Data Array access error.\n");
+ break;
- if (tt == TT_DATA) {
- pr_cont("%s TLB %s.\n", LL_MSG(ec),
- (xec ? "multimatch" : "parity error"));
- return;
+ case 0x1:
+ pr_cont("UC error during a linefill from L2/NB.\n");
+ break;
+
+ case 0x2:
+ case 0x11:
+ pr_cont("STQ access error.\n");
+ break;
+
+ case 0x3:
+ pr_cont("SCB access error.\n");
+ break;
+
+ case 0x10:
+ pr_cont("Tag error.\n");
+ break;
+
+ case 0x12:
+ pr_cont("LDQ access error.\n");
+ break;
+
+ default:
+ ret = false;
}
+ } else if (BUS_ERROR(ec)) {
+
+ if (!xec)
+ pr_cont("System Read Data Error.\n");
else
- goto wrong_dc_mce;
- }
+ pr_cont(" Internal error condition type %d.\n", xec);
+ } else
+ ret = false;
- if (!fam_ops->dc_mce(ec))
- goto wrong_dc_mce;
+ return ret;
+}
- return;
+static void decode_mc0_mce(struct mce *m)
+{
+ u16 ec = EC(m->status);
+ u8 xec = XEC(m->status, xec_mask);
-wrong_dc_mce:
- pr_emerg(HW_ERR "Corrupted DC MCE info?\n");
+ pr_emerg(HW_ERR "MC0 Error: ");
+
+ /* TLB error signatures are the same across families */
+ if (TLB_ERROR(ec)) {
+ if (TT(ec) == TT_DATA) {
+ pr_cont("%s TLB %s.\n", LL_MSG(ec),
+ ((xec == 2) ? "locked miss"
+ : (xec ? "multimatch" : "parity")));
+ return;
+ }
+ } else if (fam_ops->mc0_mce(ec, xec))
+ ;
+ else
+ pr_emerg(HW_ERR "Corrupted MC0 MCE info?\n");
}
-static bool k8_ic_mce(u16 ec)
+static bool k8_mc1_mce(u16 ec, u8 xec)
{
- u8 ll = ec & 0x3;
- u8 r4 = (ec >> 4) & 0xf;
+ u8 ll = LL(ec);
bool ret = true;
if (!MEM_ERROR(ec))
@@ -210,7 +305,7 @@ static bool k8_ic_mce(u16 ec)
if (ll == 0x2)
pr_cont("during a linefill from L2.\n");
else if (ll == 0x1) {
- switch (r4) {
+ switch (R4(ec)) {
case R4_IRD:
pr_cont("Parity error during data load.\n");
break;
@@ -233,33 +328,67 @@ static bool k8_ic_mce(u16 ec)
return ret;
}
-static bool f14h_ic_mce(u16 ec)
+static bool cat_mc1_mce(u16 ec, u8 xec)
{
- u8 ll = ec & 0x3;
- u8 tt = (ec >> 2) & 0x3;
- u8 r4 = (ec >> 4) & 0xf;
+ u8 r4 = R4(ec);
bool ret = true;
- if (MEM_ERROR(ec)) {
- if (tt != 0 || ll != 1)
- ret = false;
+ if (!MEM_ERROR(ec))
+ return false;
- if (r4 == R4_IRD)
- pr_cont("Data/tag array parity error for a tag hit.\n");
- else if (r4 == R4_SNOOP)
- pr_cont("Tag error during snoop/victimization.\n");
- else
- ret = false;
+ if (TT(ec) != TT_INSTR)
+ return false;
+
+ if (r4 == R4_IRD)
+ pr_cont("Data/tag array parity error for a tag hit.\n");
+ else if (r4 == R4_SNOOP)
+ pr_cont("Tag error during snoop/victimization.\n");
+ else if (xec == 0x0)
+ pr_cont("Tag parity error from victim castout.\n");
+ else if (xec == 0x2)
+ pr_cont("Microcode patch RAM parity error.\n");
+ else
+ ret = false;
+
+ return ret;
+}
+
+static bool f15h_mc1_mce(u16 ec, u8 xec)
+{
+ bool ret = true;
+
+ if (!MEM_ERROR(ec))
+ return false;
+
+ switch (xec) {
+ case 0x0 ... 0xa:
+ pr_cont("%s.\n", f15h_mc1_mce_desc[xec]);
+ break;
+
+ case 0xd:
+ pr_cont("%s.\n", f15h_mc1_mce_desc[xec-2]);
+ break;
+
+ case 0x10:
+ pr_cont("%s.\n", f15h_mc1_mce_desc[xec-4]);
+ break;
+
+ case 0x11 ... 0x14:
+ pr_cont("Decoder %s parity error.\n", f15h_mc1_mce_desc[xec-4]);
+ break;
+
+ default:
+ ret = false;
}
return ret;
}
-static void amd_decode_ic_mce(struct mce *m)
+static void decode_mc1_mce(struct mce *m)
{
- u16 ec = m->status & 0xffff;
- u8 xec = (m->status >> 16) & 0xf;
+ u16 ec = EC(m->status);
+ u8 xec = XEC(m->status, xec_mask);
- pr_emerg(HW_ERR "Instruction Cache Error: ");
+ pr_emerg(HW_ERR "MC1 Error: ");
if (TLB_ERROR(ec))
pr_cont("%s TLB %s.\n", LL_MSG(ec),
@@ -268,135 +397,188 @@ static void amd_decode_ic_mce(struct mce *m)
bool k8 = (boot_cpu_data.x86 == 0xf && (m->status & BIT_64(58)));
pr_cont("during %s.\n", (k8 ? "system linefill" : "NB data read"));
- } else if (fam_ops->ic_mce(ec))
+ } else if (fam_ops->mc1_mce(ec, xec))
;
else
- pr_emerg(HW_ERR "Corrupted IC MCE info?\n");
+ pr_emerg(HW_ERR "Corrupted MC1 MCE info?\n");
}
-static void amd_decode_bu_mce(struct mce *m)
+static bool k8_mc2_mce(u16 ec, u8 xec)
{
- u32 ec = m->status & 0xffff;
- u32 xec = (m->status >> 16) & 0xf;
-
- pr_emerg(HW_ERR "Bus Unit Error");
+ bool ret = true;
if (xec == 0x1)
pr_cont(" in the write data buffers.\n");
else if (xec == 0x3)
pr_cont(" in the victim data buffers.\n");
else if (xec == 0x2 && MEM_ERROR(ec))
- pr_cont(": %s error in the L2 cache tags.\n", RRRR_MSG(ec));
+ pr_cont(": %s error in the L2 cache tags.\n", R4_MSG(ec));
else if (xec == 0x0) {
if (TLB_ERROR(ec))
pr_cont(": %s error in a Page Descriptor Cache or "
"Guest TLB.\n", TT_MSG(ec));
else if (BUS_ERROR(ec))
pr_cont(": %s/ECC error in data read from NB: %s.\n",
- RRRR_MSG(ec), PP_MSG(ec));
+ R4_MSG(ec), PP_MSG(ec));
else if (MEM_ERROR(ec)) {
- u8 rrrr = (ec >> 4) & 0xf;
+ u8 r4 = R4(ec);
- if (rrrr >= 0x7)
+ if (r4 >= 0x7)
pr_cont(": %s error during data copyback.\n",
- RRRR_MSG(ec));
- else if (rrrr <= 0x1)
+ R4_MSG(ec));
+ else if (r4 <= 0x1)
pr_cont(": %s parity/ECC error during data "
- "access from L2.\n", RRRR_MSG(ec));
+ "access from L2.\n", R4_MSG(ec));
else
- goto wrong_bu_mce;
+ ret = false;
} else
- goto wrong_bu_mce;
+ ret = false;
} else
- goto wrong_bu_mce;
-
- return;
+ ret = false;
-wrong_bu_mce:
- pr_emerg(HW_ERR "Corrupted BU MCE info?\n");
+ return ret;
}
-static void amd_decode_ls_mce(struct mce *m)
+static bool f15h_mc2_mce(u16 ec, u8 xec)
{
- u16 ec = m->status & 0xffff;
- u8 xec = (m->status >> 16) & 0xf;
-
- if (boot_cpu_data.x86 == 0x14) {
- pr_emerg("You shouldn't be seeing an LS MCE on this cpu family,"
- " please report on LKML.\n");
- return;
- }
-
- pr_emerg(HW_ERR "Load Store Error");
+ bool ret = true;
- if (xec == 0x0) {
- u8 r4 = (ec >> 4) & 0xf;
+ if (TLB_ERROR(ec)) {
+ if (xec == 0x0)
+ pr_cont("Data parity TLB read error.\n");
+ else if (xec == 0x1)
+ pr_cont("Poison data provided for TLB fill.\n");
+ else
+ ret = false;
+ } else if (BUS_ERROR(ec)) {
+ if (xec > 2)
+ ret = false;
- if (!BUS_ERROR(ec) || (r4 != R4_DRD && r4 != R4_DWR))
- goto wrong_ls_mce;
+ pr_cont("Error during attempted NB data read.\n");
+ } else if (MEM_ERROR(ec)) {
+ switch (xec) {
+ case 0x4 ... 0xc:
+ pr_cont("%s.\n", f15h_mc2_mce_desc[xec - 0x4]);
+ break;
- pr_cont(" during %s.\n", RRRR_MSG(ec));
- } else
- goto wrong_ls_mce;
+ case 0x10 ... 0x14:
+ pr_cont("%s.\n", f15h_mc2_mce_desc[xec - 0x7]);
+ break;
- return;
+ default:
+ ret = false;
+ }
+ }
-wrong_ls_mce:
- pr_emerg(HW_ERR "Corrupted LS MCE info?\n");
+ return ret;
}
-static bool k8_nb_mce(u16 ec, u8 xec)
+static bool f16h_mc2_mce(u16 ec, u8 xec)
{
- bool ret = true;
+ u8 r4 = R4(ec);
- switch (xec) {
- case 0x1:
- pr_cont("CRC error detected on HT link.\n");
- break;
+ if (!MEM_ERROR(ec))
+ return false;
- case 0x5:
- pr_cont("Invalid GART PTE entry during GART table walk.\n");
+ switch (xec) {
+ case 0x04 ... 0x05:
+ pr_cont("%cBUFF parity error.\n", (r4 == R4_RD) ? 'I' : 'O');
break;
- case 0x6:
- pr_cont("Unsupported atomic RMW received from an IO link.\n");
+ case 0x09 ... 0x0b:
+ case 0x0d ... 0x0f:
+ pr_cont("ECC error in L2 tag (%s).\n",
+ ((r4 == R4_GEN) ? "BankReq" :
+ ((r4 == R4_SNOOP) ? "Prb" : "Fill")));
break;
- case 0x0:
- case 0x8:
- if (boot_cpu_data.x86 == 0x11)
- return false;
-
- pr_cont("DRAM ECC error detected on the NB.\n");
+ case 0x10 ... 0x19:
+ case 0x1b:
+ pr_cont("ECC error in L2 data array (%s).\n",
+ (((r4 == R4_RD) && !(xec & 0x3)) ? "Hit" :
+ ((r4 == R4_GEN) ? "Attr" :
+ ((r4 == R4_EVICT) ? "Vict" : "Fill"))));
break;
- case 0xd:
- pr_cont("Parity error on the DRAM addr/ctl signals.\n");
+ case 0x1c ... 0x1d:
+ case 0x1f:
+ pr_cont("Parity error in L2 attribute bits (%s).\n",
+ ((r4 == R4_RD) ? "Hit" :
+ ((r4 == R4_GEN) ? "Attr" : "Fill")));
break;
default:
- ret = false;
- break;
+ return false;
}
- return ret;
+ return true;
}
-static bool f10h_nb_mce(u16 ec, u8 xec)
+static void decode_mc2_mce(struct mce *m)
{
- bool ret = true;
+ u16 ec = EC(m->status);
+ u8 xec = XEC(m->status, xec_mask);
+
+ pr_emerg(HW_ERR "MC2 Error: ");
+
+ if (!fam_ops->mc2_mce(ec, xec))
+ pr_cont(HW_ERR "Corrupted MC2 MCE info?\n");
+}
+
+static void decode_mc3_mce(struct mce *m)
+{
+ u16 ec = EC(m->status);
+ u8 xec = XEC(m->status, xec_mask);
+
+ if (boot_cpu_data.x86 >= 0x14) {
+ pr_emerg("You shouldn't be seeing MC3 MCE on this cpu family,"
+ " please report on LKML.\n");
+ return;
+ }
+
+ pr_emerg(HW_ERR "MC3 Error");
+
+ if (xec == 0x0) {
+ u8 r4 = R4(ec);
+
+ if (!BUS_ERROR(ec) || (r4 != R4_DRD && r4 != R4_DWR))
+ goto wrong_mc3_mce;
+
+ pr_cont(" during %s.\n", R4_MSG(ec));
+ } else
+ goto wrong_mc3_mce;
+
+ return;
+
+ wrong_mc3_mce:
+ pr_emerg(HW_ERR "Corrupted MC3 MCE info?\n");
+}
+
+static void decode_mc4_mce(struct mce *m)
+{
+ struct cpuinfo_x86 *c = &boot_cpu_data;
+ int node_id = amd_get_nb_id(m->extcpu);
+ u16 ec = EC(m->status);
+ u8 xec = XEC(m->status, 0x1f);
u8 offset = 0;
- if (k8_nb_mce(ec, xec))
- return true;
+ pr_emerg(HW_ERR "MC4 Error (node %d): ", node_id);
- switch(xec) {
- case 0xa ... 0xc:
- offset = 10;
- break;
+ switch (xec) {
+ case 0x0 ... 0xe:
+
+ /* special handling for DRAM ECCs */
+ if (xec == 0x0 || xec == 0x8) {
+ /* no ECCs on F11h */
+ if (c->x86 == 0x11)
+ goto wrong_mc4_mce;
- case 0xe:
- offset = 11;
+ pr_cont("%s.\n", mc4_mce_desc[xec]);
+
+ if (nb_bus_decoder)
+ nb_bus_decoder(node_id, m);
+ return;
+ }
break;
case 0xf:
@@ -405,126 +587,116 @@ static bool f10h_nb_mce(u16 ec, u8 xec)
else if (BUS_ERROR(ec))
pr_cont("DMA Exclusion Vector Table Walk error.\n");
else
- ret = false;
+ goto wrong_mc4_mce;
+ return;
- goto out;
- break;
+ case 0x19:
+ if (boot_cpu_data.x86 == 0x15 || boot_cpu_data.x86 == 0x16)
+ pr_cont("Compute Unit Data Error.\n");
+ else
+ goto wrong_mc4_mce;
+ return;
case 0x1c ... 0x1f:
- offset = 24;
+ offset = 13;
break;
default:
- ret = false;
-
- goto out;
- break;
+ goto wrong_mc4_mce;
}
- pr_cont("%s.\n", f10h_nb_mce_desc[xec - offset]);
+ pr_cont("%s.\n", mc4_mce_desc[xec - offset]);
+ return;
-out:
- return ret;
+ wrong_mc4_mce:
+ pr_emerg(HW_ERR "Corrupted MC4 MCE info?\n");
}
-static bool nb_noop_mce(u16 ec, u8 xec)
+static void decode_mc5_mce(struct mce *m)
{
- return false;
-}
+ struct cpuinfo_x86 *c = &boot_cpu_data;
+ u8 xec = XEC(m->status, xec_mask);
-void amd_decode_nb_mce(int node_id, struct mce *m, u32 nbcfg)
-{
- u8 xec = (m->status >> 16) & 0x1f;
- u16 ec = m->status & 0xffff;
- u32 nbsh = (u32)(m->status >> 32);
+ if (c->x86 == 0xf || c->x86 == 0x11)
+ goto wrong_mc5_mce;
- pr_emerg(HW_ERR "Northbridge Error, node %d: ", node_id);
+ pr_emerg(HW_ERR "MC5 Error: ");
- /*
- * F10h, revD can disable ErrCpu[3:0] so check that first and also the
- * value encoding has changed so interpret those differently
- */
- if ((boot_cpu_data.x86 == 0x10) &&
- (boot_cpu_data.x86_model > 7)) {
- if (nbsh & K8_NBSH_ERR_CPU_VAL)
- pr_cont(", core: %u", (u8)(nbsh & nb_err_cpumask));
- } else {
- u8 assoc_cpus = nbsh & nb_err_cpumask;
+ if (xec == 0x0 || xec == 0xc)
+ pr_cont("%s.\n", mc5_mce_desc[xec]);
+ else if (xec <= 0xd)
+ pr_cont("%s parity error.\n", mc5_mce_desc[xec]);
+ else
+ goto wrong_mc5_mce;
- if (assoc_cpus > 0)
- pr_cont(", core: %d", fls(assoc_cpus) - 1);
- }
+ return;
+
+ wrong_mc5_mce:
+ pr_emerg(HW_ERR "Corrupted MC5 MCE info?\n");
+}
+
+static void decode_mc6_mce(struct mce *m)
+{
+ u8 xec = XEC(m->status, xec_mask);
+
+ pr_emerg(HW_ERR "MC6 Error: ");
switch (xec) {
+ case 0x1:
+ pr_cont("Free List");
+ break;
+
case 0x2:
- pr_cont("Sync error (sync packets on HT link detected).\n");
- return;
+ pr_cont("Physical Register File");
+ break;
case 0x3:
- pr_cont("HT Master abort.\n");
- return;
+ pr_cont("Retire Queue");
+ break;
case 0x4:
- pr_cont("HT Target abort.\n");
- return;
-
- case 0x7:
- pr_cont("NB Watchdog timeout.\n");
- return;
+ pr_cont("Scheduler table");
+ break;
- case 0x9:
- pr_cont("SVM DMA Exclusion Vector error.\n");
- return;
+ case 0x5:
+ pr_cont("Status Register File");
+ break;
default:
+ goto wrong_mc6_mce;
break;
}
- if (!fam_ops->nb_mce(ec, xec))
- goto wrong_nb_mce;
-
- if (boot_cpu_data.x86 == 0xf || boot_cpu_data.x86 == 0x10)
- if ((xec == 0x8 || xec == 0x0) && nb_bus_decoder)
- nb_bus_decoder(node_id, m, nbcfg);
+ pr_cont(" parity error.\n");
return;
-wrong_nb_mce:
- pr_emerg(HW_ERR "Corrupted NB MCE info?\n");
+ wrong_mc6_mce:
+ pr_emerg(HW_ERR "Corrupted MC6 MCE info?\n");
}
-EXPORT_SYMBOL_GPL(amd_decode_nb_mce);
-static void amd_decode_fr_mce(struct mce *m)
+static inline void amd_decode_err_code(u16 ec)
{
- if (boot_cpu_data.x86 == 0xf ||
- boot_cpu_data.x86 == 0x11)
- goto wrong_fr_mce;
-
- /* we have only one error signature so match all fields at once. */
- if ((m->status & 0xffff) == 0x0f0f) {
- pr_emerg(HW_ERR "FR Error: CPU Watchdog timer expire.\n");
+ if (INT_ERROR(ec)) {
+ pr_emerg(HW_ERR "internal: %s\n", UU_MSG(ec));
return;
}
-wrong_fr_mce:
- pr_emerg(HW_ERR "Corrupted FR MCE info?\n");
-}
+ pr_emerg(HW_ERR "cache level: %s", LL_MSG(ec));
-static inline void amd_decode_err_code(u16 ec)
-{
- if (TLB_ERROR(ec)) {
- pr_emerg(HW_ERR "Transaction: %s, Cache Level: %s\n",
- TT_MSG(ec), LL_MSG(ec));
- } else if (MEM_ERROR(ec)) {
- pr_emerg(HW_ERR "Transaction: %s, Type: %s, Cache Level: %s\n",
- RRRR_MSG(ec), TT_MSG(ec), LL_MSG(ec));
- } else if (BUS_ERROR(ec)) {
- pr_emerg(HW_ERR "Transaction: %s (%s), %s, Cache Level: %s, "
- "Participating Processor: %s\n",
- RRRR_MSG(ec), II_MSG(ec), TO_MSG(ec), LL_MSG(ec),
- PP_MSG(ec));
- } else
- pr_emerg(HW_ERR "Huh? Unknown MCE error 0x%x\n", ec);
+ if (BUS_ERROR(ec))
+ pr_cont(", mem/io: %s", II_MSG(ec));
+ else
+ pr_cont(", tx: %s", TT_MSG(ec));
+
+ if (MEM_ERROR(ec) || BUS_ERROR(ec)) {
+ pr_cont(", mem-tx: %s", R4_MSG(ec));
+
+ if (BUS_ERROR(ec))
+ pr_cont(", part-proc: %s (%s)", PP_MSG(ec), TO_MSG(ec));
+ }
+
+ pr_cont("\n");
}
/*
@@ -543,59 +715,95 @@ static bool amd_filter_mce(struct mce *m)
return false;
}
+static const char *decode_error_status(struct mce *m)
+{
+ if (m->status & MCI_STATUS_UC) {
+ if (m->status & MCI_STATUS_PCC)
+ return "System Fatal error.";
+ if (m->mcgstatus & MCG_STATUS_RIPV)
+ return "Uncorrected, software restartable error.";
+ return "Uncorrected, software containable error.";
+ }
+
+ if (m->status & MCI_STATUS_DEFERRED)
+ return "Deferred error.";
+
+ return "Corrected error, no action required.";
+}
+
int amd_decode_mce(struct notifier_block *nb, unsigned long val, void *data)
{
struct mce *m = (struct mce *)data;
- int node, ecc;
+ struct cpuinfo_x86 *c = &cpu_data(m->extcpu);
+ int ecc;
if (amd_filter_mce(m))
return NOTIFY_STOP;
- pr_emerg(HW_ERR "MC%d_STATUS: ", m->bank);
+ pr_emerg(HW_ERR "%s\n", decode_error_status(m));
- pr_cont("%sorrected error, other errors lost: %s, "
- "CPU context corrupt: %s",
- ((m->status & MCI_STATUS_UC) ? "Unc" : "C"),
- ((m->status & MCI_STATUS_OVER) ? "yes" : "no"),
- ((m->status & MCI_STATUS_PCC) ? "yes" : "no"));
+ pr_emerg(HW_ERR "CPU:%d (%x:%x:%x) MC%d_STATUS[%s|%s|%s|%s|%s",
+ m->extcpu,
+ c->x86, c->x86_model, c->x86_mask,
+ m->bank,
+ ((m->status & MCI_STATUS_OVER) ? "Over" : "-"),
+ ((m->status & MCI_STATUS_UC) ? "UE" : "CE"),
+ ((m->status & MCI_STATUS_MISCV) ? "MiscV" : "-"),
+ ((m->status & MCI_STATUS_PCC) ? "PCC" : "-"),
+ ((m->status & MCI_STATUS_ADDRV) ? "AddrV" : "-"));
+
+ if (c->x86 == 0x15 || c->x86 == 0x16)
+ pr_cont("|%s|%s",
+ ((m->status & MCI_STATUS_DEFERRED) ? "Deferred" : "-"),
+ ((m->status & MCI_STATUS_POISON) ? "Poison" : "-"));
/* do the two bits[14:13] together */
ecc = (m->status >> 45) & 0x3;
if (ecc)
- pr_cont(", %sECC Error", ((ecc == 2) ? "C" : "U"));
+ pr_cont("|%sECC", ((ecc == 2) ? "C" : "U"));
- pr_cont("\n");
+ pr_cont("]: 0x%016llx\n", m->status);
+
+ if (m->status & MCI_STATUS_ADDRV)
+ pr_emerg(HW_ERR "MC%d_ADDR: 0x%016llx\n", m->bank, m->addr);
+
+ if (!fam_ops)
+ goto err_code;
switch (m->bank) {
case 0:
- amd_decode_dc_mce(m);
+ decode_mc0_mce(m);
break;
case 1:
- amd_decode_ic_mce(m);
+ decode_mc1_mce(m);
break;
case 2:
- amd_decode_bu_mce(m);
+ decode_mc2_mce(m);
break;
case 3:
- amd_decode_ls_mce(m);
+ decode_mc3_mce(m);
break;
case 4:
- node = amd_get_nb_id(m->extcpu);
- amd_decode_nb_mce(node, m, 0);
+ decode_mc4_mce(m);
break;
case 5:
- amd_decode_fr_mce(m);
+ decode_mc5_mce(m);
+ break;
+
+ case 6:
+ decode_mc6_mce(m);
break;
default:
break;
}
+ err_code:
amd_decode_err_code(m->status & 0xffff);
return NOTIFY_STOP;
@@ -608,59 +816,69 @@ static struct notifier_block amd_mce_dec_nb = {
static int __init mce_amd_init(void)
{
- if (boot_cpu_data.x86_vendor != X86_VENDOR_AMD)
- return 0;
+ struct cpuinfo_x86 *c = &boot_cpu_data;
- if ((boot_cpu_data.x86 < 0xf || boot_cpu_data.x86 > 0x12) &&
- (boot_cpu_data.x86 != 0x14 || boot_cpu_data.x86_model > 0xf))
- return 0;
+ if (c->x86_vendor != X86_VENDOR_AMD)
+ return -ENODEV;
fam_ops = kzalloc(sizeof(struct amd_decoder_ops), GFP_KERNEL);
if (!fam_ops)
return -ENOMEM;
- switch (boot_cpu_data.x86) {
+ switch (c->x86) {
case 0xf:
- fam_ops->dc_mce = k8_dc_mce;
- fam_ops->ic_mce = k8_ic_mce;
- fam_ops->nb_mce = k8_nb_mce;
+ fam_ops->mc0_mce = k8_mc0_mce;
+ fam_ops->mc1_mce = k8_mc1_mce;
+ fam_ops->mc2_mce = k8_mc2_mce;
break;
case 0x10:
- fam_ops->dc_mce = f10h_dc_mce;
- fam_ops->ic_mce = k8_ic_mce;
- fam_ops->nb_mce = f10h_nb_mce;
+ fam_ops->mc0_mce = f10h_mc0_mce;
+ fam_ops->mc1_mce = k8_mc1_mce;
+ fam_ops->mc2_mce = k8_mc2_mce;
break;
case 0x11:
- fam_ops->dc_mce = k8_dc_mce;
- fam_ops->ic_mce = k8_ic_mce;
- fam_ops->nb_mce = f10h_nb_mce;
+ fam_ops->mc0_mce = k8_mc0_mce;
+ fam_ops->mc1_mce = k8_mc1_mce;
+ fam_ops->mc2_mce = k8_mc2_mce;
break;
case 0x12:
- fam_ops->dc_mce = f12h_dc_mce;
- fam_ops->ic_mce = k8_ic_mce;
- fam_ops->nb_mce = nb_noop_mce;
+ fam_ops->mc0_mce = f12h_mc0_mce;
+ fam_ops->mc1_mce = k8_mc1_mce;
+ fam_ops->mc2_mce = k8_mc2_mce;
break;
case 0x14:
- nb_err_cpumask = 0x3;
- fam_ops->dc_mce = f14h_dc_mce;
- fam_ops->ic_mce = f14h_ic_mce;
- fam_ops->nb_mce = nb_noop_mce;
+ fam_ops->mc0_mce = cat_mc0_mce;
+ fam_ops->mc1_mce = cat_mc1_mce;
+ fam_ops->mc2_mce = k8_mc2_mce;
+ break;
+
+ case 0x15:
+ xec_mask = 0x1f;
+ fam_ops->mc0_mce = f15h_mc0_mce;
+ fam_ops->mc1_mce = f15h_mc1_mce;
+ fam_ops->mc2_mce = f15h_mc2_mce;
+ break;
+
+ case 0x16:
+ xec_mask = 0x1f;
+ fam_ops->mc0_mce = cat_mc0_mce;
+ fam_ops->mc1_mce = cat_mc1_mce;
+ fam_ops->mc2_mce = f16h_mc2_mce;
break;
default:
- printk(KERN_WARNING "Huh? What family is that: %d?!\n",
- boot_cpu_data.x86);
+ printk(KERN_WARNING "Huh? What family is it: 0x%x?!\n", c->x86);
kfree(fam_ops);
- return -EINVAL;
+ fam_ops = NULL;
}
pr_info("MCE: In-kernel MCE decoding enabled.\n");
- atomic_notifier_chain_register(&x86_mce_decoder_chain, &amd_mce_dec_nb);
+ mce_register_decode_chain(&amd_mce_dec_nb);
return 0;
}
@@ -669,7 +887,7 @@ early_initcall(mce_amd_init);
#ifdef MODULE
static void __exit mce_amd_exit(void)
{
- atomic_notifier_chain_unregister(&x86_mce_decoder_chain, &amd_mce_dec_nb);
+ mce_unregister_decode_chain(&amd_mce_dec_nb);
kfree(fam_ops);
}
diff --git a/drivers/edac/mce_amd.h b/drivers/edac/mce_amd.h
index 35f6e0e3b29..51b7e3a36e3 100644
--- a/drivers/edac/mce_amd.h
+++ b/drivers/edac/mce_amd.h
@@ -5,10 +5,8 @@
#include <asm/mce.h>
-#define BIT_64(n) (U64_C(1) << (n))
-
-#define ERROR_CODE(x) ((x) & 0xffff)
-#define EXT_ERROR_CODE(x) (((x) >> 16) & 0x1f)
+#define EC(x) ((x) & 0xffff)
+#define XEC(x, mask) (((x) >> 16) & mask)
#define LOW_SYNDROME(x) (((x) >> 15) & 0xff)
#define HIGH_SYNDROME(x) (((x) >> 24) & 0xff)
@@ -16,34 +14,28 @@
#define TLB_ERROR(x) (((x) & 0xFFF0) == 0x0010)
#define MEM_ERROR(x) (((x) & 0xFF00) == 0x0100)
#define BUS_ERROR(x) (((x) & 0xF800) == 0x0800)
+#define INT_ERROR(x) (((x) & 0xF4FF) == 0x0400)
#define TT(x) (((x) >> 2) & 0x3)
#define TT_MSG(x) tt_msgs[TT(x)]
#define II(x) (((x) >> 2) & 0x3)
#define II_MSG(x) ii_msgs[II(x)]
-#define LL(x) (((x) >> 0) & 0x3)
+#define LL(x) ((x) & 0x3)
#define LL_MSG(x) ll_msgs[LL(x)]
#define TO(x) (((x) >> 8) & 0x1)
#define TO_MSG(x) to_msgs[TO(x)]
#define PP(x) (((x) >> 9) & 0x3)
#define PP_MSG(x) pp_msgs[PP(x)]
+#define UU(x) (((x) >> 8) & 0x3)
+#define UU_MSG(x) uu_msgs[UU(x)]
-#define RRRR(x) (((x) >> 4) & 0xf)
-#define RRRR_MSG(x) ((RRRR(x) < 9) ? rrrr_msgs[RRRR(x)] : "Wrong R4!")
+#define R4(x) (((x) >> 4) & 0xf)
+#define R4_MSG(x) ((R4(x) < 9) ? rrrr_msgs[R4(x)] : "Wrong R4!")
-#define K8_NBSH 0x4C
+#define MCI_STATUS_DEFERRED BIT_64(44)
+#define MCI_STATUS_POISON BIT_64(43)
-#define K8_NBSH_VALID_BIT BIT(31)
-#define K8_NBSH_OVERFLOW BIT(30)
-#define K8_NBSH_UC_ERR BIT(29)
-#define K8_NBSH_ERR_EN BIT(28)
-#define K8_NBSH_MISCV BIT(27)
-#define K8_NBSH_VALID_ERROR_ADDR BIT(26)
-#define K8_NBSH_PCC BIT(25)
-#define K8_NBSH_ERR_CPU_VAL BIT(24)
-#define K8_NBSH_CECC BIT(14)
-#define K8_NBSH_UECC BIT(13)
-#define K8_NBSH_ERR_SCRUBER BIT(8)
+extern const char * const pp_msgs[];
enum tt_ids {
TT_INSTR = 0,
@@ -78,37 +70,18 @@ enum rrrr_ids {
R4_SNOOP,
};
-extern const char *tt_msgs[];
-extern const char *ll_msgs[];
-extern const char *rrrr_msgs[];
-extern const char *pp_msgs[];
-extern const char *to_msgs[];
-extern const char *ii_msgs[];
-
-/*
- * relevant NB regs
- */
-struct err_regs {
- u32 nbcfg;
- u32 nbsh;
- u32 nbsl;
- u32 nbeah;
- u32 nbeal;
-};
-
/*
* per-family decoder ops
*/
struct amd_decoder_ops {
- bool (*dc_mce)(u16);
- bool (*ic_mce)(u16);
- bool (*nb_mce)(u16, u8);
+ bool (*mc0_mce)(u16, u8);
+ bool (*mc1_mce)(u16, u8);
+ bool (*mc2_mce)(u16, u8);
};
void amd_report_gart_errors(bool);
-void amd_register_ecc_decoder(void (*f)(int, struct mce *, u32));
-void amd_unregister_ecc_decoder(void (*f)(int, struct mce *, u32));
-void amd_decode_nb_mce(int, struct mce *, u32);
+void amd_register_ecc_decoder(void (*f)(int, struct mce *));
+void amd_unregister_ecc_decoder(void (*f)(int, struct mce *));
int amd_decode_mce(struct notifier_block *nb, unsigned long val, void *data);
#endif /* _EDAC_MCE_AMD_H */
diff --git a/drivers/edac/mce_amd_inj.c b/drivers/edac/mce_amd_inj.c
index 8d0688f36d4..5e46a9fea31 100644
--- a/drivers/edac/mce_amd_inj.c
+++ b/drivers/edac/mce_amd_inj.c
@@ -6,13 +6,14 @@
* This file may be distributed under the terms of the GNU General Public
* License version 2.
*
- * Copyright (c) 2010: Borislav Petkov <borislav.petkov@amd.com>
+ * Copyright (c) 2010: Borislav Petkov <bp@alien8.de>
* Advanced Micro Devices Inc.
*/
#include <linux/kobject.h>
-#include <linux/sysdev.h>
+#include <linux/device.h>
#include <linux/edac.h>
+#include <linux/module.h>
#include <asm/mce.h>
#include "mce_amd.h"
@@ -42,7 +43,7 @@ static ssize_t edac_inject_##reg##_store(struct kobject *kobj, \
int ret = 0; \
unsigned long value; \
\
- ret = strict_strtoul(data, 16, &value); \
+ ret = kstrtoul(data, 16, &value); \
if (ret < 0) \
printk(KERN_ERR "Error writing MCE " #reg " field.\n"); \
\
@@ -82,16 +83,17 @@ static ssize_t edac_inject_bank_store(struct kobject *kobj,
int ret = 0;
unsigned long value;
- ret = strict_strtoul(data, 10, &value);
+ ret = kstrtoul(data, 10, &value);
if (ret < 0) {
printk(KERN_ERR "Invalid bank value!\n");
return -EINVAL;
}
- if (value > 5) {
- printk(KERN_ERR "Non-existant MCE bank: %lu\n", value);
- return -EINVAL;
- }
+ if (value > 5)
+ if (boot_cpu_data.x86 != 0x15 || value > 6) {
+ printk(KERN_ERR "Non-existent MCE bank: %lu\n", value);
+ return -EINVAL;
+ }
i_mce.bank = value;
@@ -114,14 +116,14 @@ static struct edac_mce_attr *sysfs_attrs[] = { &mce_attr_status, &mce_attr_misc,
static int __init edac_init_mce_inject(void)
{
- struct sysdev_class *edac_class = NULL;
+ struct bus_type *edac_subsys = NULL;
int i, err = 0;
- edac_class = edac_get_sysfs_class();
- if (!edac_class)
+ edac_subsys = edac_get_sysfs_subsys();
+ if (!edac_subsys)
return -EINVAL;
- mce_kobj = kobject_create_and_add("mce", &edac_class->kset.kobj);
+ mce_kobj = kobject_create_and_add("mce", &edac_subsys->dev_root->kobj);
if (!mce_kobj) {
printk(KERN_ERR "Error creating a mce kset.\n");
err = -ENOMEM;
@@ -139,13 +141,13 @@ static int __init edac_init_mce_inject(void)
return 0;
err_sysfs_create:
- while (i-- >= 0)
+ while (--i >= 0)
sysfs_remove_file(mce_kobj, &sysfs_attrs[i]->attr);
kobject_del(mce_kobj);
err_mce_kobj:
- edac_put_sysfs_class();
+ edac_put_sysfs_subsys();
return err;
}
@@ -159,13 +161,13 @@ static void __exit edac_exit_mce_inject(void)
kobject_del(mce_kobj);
- edac_put_sysfs_class();
+ edac_put_sysfs_subsys();
}
module_init(edac_init_mce_inject);
module_exit(edac_exit_mce_inject);
MODULE_LICENSE("GPL");
-MODULE_AUTHOR("Borislav Petkov <borislav.petkov@amd.com>");
+MODULE_AUTHOR("Borislav Petkov <bp@alien8.de>");
MODULE_AUTHOR("AMD Inc.");
MODULE_DESCRIPTION("MCE injection facility for testing MCE decoding");
diff --git a/drivers/edac/mpc85xx_edac.c b/drivers/edac/mpc85xx_edac.c
index b123bb308a4..f4aec2e6ef5 100644
--- a/drivers/edac/mpc85xx_edac.c
+++ b/drivers/edac/mpc85xx_edac.c
@@ -1,6 +1,8 @@
/*
* Freescale MPC85xx Memory Controller kenel module
*
+ * Parts Copyrighted (c) 2013 by Freescale Semiconductor, Inc.
+ *
* Author: Dave Jiang <djiang@mvista.com>
*
* 2006-2007 (c) MontaVista Software, Inc. This file is licensed under
@@ -49,34 +51,45 @@ static u32 orig_hid1[2];
/************************ MC SYSFS parts ***********************************/
-static ssize_t mpc85xx_mc_inject_data_hi_show(struct mem_ctl_info *mci,
+#define to_mci(k) container_of(k, struct mem_ctl_info, dev)
+
+static ssize_t mpc85xx_mc_inject_data_hi_show(struct device *dev,
+ struct device_attribute *mattr,
char *data)
{
+ struct mem_ctl_info *mci = to_mci(dev);
struct mpc85xx_mc_pdata *pdata = mci->pvt_info;
return sprintf(data, "0x%08x",
in_be32(pdata->mc_vbase +
MPC85XX_MC_DATA_ERR_INJECT_HI));
}
-static ssize_t mpc85xx_mc_inject_data_lo_show(struct mem_ctl_info *mci,
+static ssize_t mpc85xx_mc_inject_data_lo_show(struct device *dev,
+ struct device_attribute *mattr,
char *data)
{
+ struct mem_ctl_info *mci = to_mci(dev);
struct mpc85xx_mc_pdata *pdata = mci->pvt_info;
return sprintf(data, "0x%08x",
in_be32(pdata->mc_vbase +
MPC85XX_MC_DATA_ERR_INJECT_LO));
}
-static ssize_t mpc85xx_mc_inject_ctrl_show(struct mem_ctl_info *mci, char *data)
+static ssize_t mpc85xx_mc_inject_ctrl_show(struct device *dev,
+ struct device_attribute *mattr,
+ char *data)
{
+ struct mem_ctl_info *mci = to_mci(dev);
struct mpc85xx_mc_pdata *pdata = mci->pvt_info;
return sprintf(data, "0x%08x",
in_be32(pdata->mc_vbase + MPC85XX_MC_ECC_ERR_INJECT));
}
-static ssize_t mpc85xx_mc_inject_data_hi_store(struct mem_ctl_info *mci,
+static ssize_t mpc85xx_mc_inject_data_hi_store(struct device *dev,
+ struct device_attribute *mattr,
const char *data, size_t count)
{
+ struct mem_ctl_info *mci = to_mci(dev);
struct mpc85xx_mc_pdata *pdata = mci->pvt_info;
if (isdigit(*data)) {
out_be32(pdata->mc_vbase + MPC85XX_MC_DATA_ERR_INJECT_HI,
@@ -86,9 +99,11 @@ static ssize_t mpc85xx_mc_inject_data_hi_store(struct mem_ctl_info *mci,
return 0;
}
-static ssize_t mpc85xx_mc_inject_data_lo_store(struct mem_ctl_info *mci,
+static ssize_t mpc85xx_mc_inject_data_lo_store(struct device *dev,
+ struct device_attribute *mattr,
const char *data, size_t count)
{
+ struct mem_ctl_info *mci = to_mci(dev);
struct mpc85xx_mc_pdata *pdata = mci->pvt_info;
if (isdigit(*data)) {
out_be32(pdata->mc_vbase + MPC85XX_MC_DATA_ERR_INJECT_LO,
@@ -98,9 +113,11 @@ static ssize_t mpc85xx_mc_inject_data_lo_store(struct mem_ctl_info *mci,
return 0;
}
-static ssize_t mpc85xx_mc_inject_ctrl_store(struct mem_ctl_info *mci,
- const char *data, size_t count)
+static ssize_t mpc85xx_mc_inject_ctrl_store(struct device *dev,
+ struct device_attribute *mattr,
+ const char *data, size_t count)
{
+ struct mem_ctl_info *mci = to_mci(dev);
struct mpc85xx_mc_pdata *pdata = mci->pvt_info;
if (isdigit(*data)) {
out_be32(pdata->mc_vbase + MPC85XX_MC_ECC_ERR_INJECT,
@@ -110,38 +127,35 @@ static ssize_t mpc85xx_mc_inject_ctrl_store(struct mem_ctl_info *mci,
return 0;
}
-static struct mcidev_sysfs_attribute mpc85xx_mc_sysfs_attributes[] = {
- {
- .attr = {
- .name = "inject_data_hi",
- .mode = (S_IRUGO | S_IWUSR)
- },
- .show = mpc85xx_mc_inject_data_hi_show,
- .store = mpc85xx_mc_inject_data_hi_store},
- {
- .attr = {
- .name = "inject_data_lo",
- .mode = (S_IRUGO | S_IWUSR)
- },
- .show = mpc85xx_mc_inject_data_lo_show,
- .store = mpc85xx_mc_inject_data_lo_store},
- {
- .attr = {
- .name = "inject_ctrl",
- .mode = (S_IRUGO | S_IWUSR)
- },
- .show = mpc85xx_mc_inject_ctrl_show,
- .store = mpc85xx_mc_inject_ctrl_store},
+DEVICE_ATTR(inject_data_hi, S_IRUGO | S_IWUSR,
+ mpc85xx_mc_inject_data_hi_show, mpc85xx_mc_inject_data_hi_store);
+DEVICE_ATTR(inject_data_lo, S_IRUGO | S_IWUSR,
+ mpc85xx_mc_inject_data_lo_show, mpc85xx_mc_inject_data_lo_store);
+DEVICE_ATTR(inject_ctrl, S_IRUGO | S_IWUSR,
+ mpc85xx_mc_inject_ctrl_show, mpc85xx_mc_inject_ctrl_store);
- /* End of list */
- {
- .attr = {.name = NULL}
- }
-};
+static int mpc85xx_create_sysfs_attributes(struct mem_ctl_info *mci)
+{
+ int rc;
+
+ rc = device_create_file(&mci->dev, &dev_attr_inject_data_hi);
+ if (rc < 0)
+ return rc;
+ rc = device_create_file(&mci->dev, &dev_attr_inject_data_lo);
+ if (rc < 0)
+ return rc;
+ rc = device_create_file(&mci->dev, &dev_attr_inject_ctrl);
+ if (rc < 0)
+ return rc;
+
+ return 0;
+}
-static void mpc85xx_set_mc_sysfs_attributes(struct mem_ctl_info *mci)
+static void mpc85xx_remove_sysfs_attributes(struct mem_ctl_info *mci)
{
- mci->mc_driver_sysfs_attributes = mpc85xx_mc_sysfs_attributes;
+ device_remove_file(&mci->dev, &dev_attr_inject_data_hi);
+ device_remove_file(&mci->dev, &dev_attr_inject_data_lo);
+ device_remove_file(&mci->dev, &dev_attr_inject_ctrl);
}
/**************************** PCI Err device ***************************/
@@ -184,6 +198,42 @@ static void mpc85xx_pci_check(struct edac_pci_ctl_info *pci)
edac_pci_handle_npe(pci, pci->ctl_name);
}
+static void mpc85xx_pcie_check(struct edac_pci_ctl_info *pci)
+{
+ struct mpc85xx_pci_pdata *pdata = pci->pvt_info;
+ u32 err_detect;
+
+ err_detect = in_be32(pdata->pci_vbase + MPC85XX_PCI_ERR_DR);
+
+ pr_err("PCIe error(s) detected\n");
+ pr_err("PCIe ERR_DR register: 0x%08x\n", err_detect);
+ pr_err("PCIe ERR_CAP_STAT register: 0x%08x\n",
+ in_be32(pdata->pci_vbase + MPC85XX_PCI_GAS_TIMR));
+ pr_err("PCIe ERR_CAP_R0 register: 0x%08x\n",
+ in_be32(pdata->pci_vbase + MPC85XX_PCIE_ERR_CAP_R0));
+ pr_err("PCIe ERR_CAP_R1 register: 0x%08x\n",
+ in_be32(pdata->pci_vbase + MPC85XX_PCIE_ERR_CAP_R1));
+ pr_err("PCIe ERR_CAP_R2 register: 0x%08x\n",
+ in_be32(pdata->pci_vbase + MPC85XX_PCIE_ERR_CAP_R2));
+ pr_err("PCIe ERR_CAP_R3 register: 0x%08x\n",
+ in_be32(pdata->pci_vbase + MPC85XX_PCIE_ERR_CAP_R3));
+
+ /* clear error bits */
+ out_be32(pdata->pci_vbase + MPC85XX_PCI_ERR_DR, err_detect);
+}
+
+static int mpc85xx_pcie_find_capability(struct device_node *np)
+{
+ struct pci_controller *hose;
+
+ if (!np)
+ return -EINVAL;
+
+ hose = pci_find_hose_for_OF_device(np);
+
+ return early_find_capability(hose, 0, 0, PCI_CAP_ID_EXP);
+}
+
static irqreturn_t mpc85xx_pci_isr(int irq, void *dev_id)
{
struct edac_pci_ctl_info *pci = dev_id;
@@ -195,13 +245,15 @@ static irqreturn_t mpc85xx_pci_isr(int irq, void *dev_id)
if (!err_detect)
return IRQ_NONE;
- mpc85xx_pci_check(pci);
+ if (pdata->is_pcie)
+ mpc85xx_pcie_check(pci);
+ else
+ mpc85xx_pci_check(pci);
return IRQ_HANDLED;
}
-static int __devinit mpc85xx_pci_err_probe(struct platform_device *op,
- const struct of_device_id *match)
+int mpc85xx_pci_err_probe(struct platform_device *op)
{
struct edac_pci_ctl_info *pci;
struct mpc85xx_pci_pdata *pdata;
@@ -215,17 +267,35 @@ static int __devinit mpc85xx_pci_err_probe(struct platform_device *op,
if (!pci)
return -ENOMEM;
+ /* make sure error reporting method is sane */
+ switch (edac_op_state) {
+ case EDAC_OPSTATE_POLL:
+ case EDAC_OPSTATE_INT:
+ break;
+ default:
+ edac_op_state = EDAC_OPSTATE_INT;
+ break;
+ }
+
pdata = pci->pvt_info;
pdata->name = "mpc85xx_pci_err";
pdata->irq = NO_IRQ;
+
+ if (mpc85xx_pcie_find_capability(op->dev.of_node) > 0)
+ pdata->is_pcie = true;
+
dev_set_drvdata(&op->dev, pci);
pci->dev = &op->dev;
pci->mod_name = EDAC_MOD_STR;
pci->ctl_name = pdata->name;
pci->dev_name = dev_name(&op->dev);
- if (edac_op_state == EDAC_OPSTATE_POLL)
- pci->edac_check = mpc85xx_pci_check;
+ if (edac_op_state == EDAC_OPSTATE_POLL) {
+ if (pdata->is_pcie)
+ pci->edac_check = mpc85xx_pcie_check;
+ else
+ pci->edac_check = mpc85xx_pci_check;
+ }
pdata->edac_idx = edac_pci_idx++;
@@ -254,33 +324,44 @@ static int __devinit mpc85xx_pci_err_probe(struct platform_device *op,
goto err;
}
- orig_pci_err_cap_dr =
- in_be32(pdata->pci_vbase + MPC85XX_PCI_ERR_CAP_DR);
+ if (pdata->is_pcie) {
+ orig_pci_err_cap_dr =
+ in_be32(pdata->pci_vbase + MPC85XX_PCI_ERR_ADDR);
+ out_be32(pdata->pci_vbase + MPC85XX_PCI_ERR_ADDR, ~0);
+ orig_pci_err_en =
+ in_be32(pdata->pci_vbase + MPC85XX_PCI_ERR_EN);
+ out_be32(pdata->pci_vbase + MPC85XX_PCI_ERR_EN, 0);
+ } else {
+ orig_pci_err_cap_dr =
+ in_be32(pdata->pci_vbase + MPC85XX_PCI_ERR_CAP_DR);
- /* PCI master abort is expected during config cycles */
- out_be32(pdata->pci_vbase + MPC85XX_PCI_ERR_CAP_DR, 0x40);
+ /* PCI master abort is expected during config cycles */
+ out_be32(pdata->pci_vbase + MPC85XX_PCI_ERR_CAP_DR, 0x40);
- orig_pci_err_en = in_be32(pdata->pci_vbase + MPC85XX_PCI_ERR_EN);
+ orig_pci_err_en =
+ in_be32(pdata->pci_vbase + MPC85XX_PCI_ERR_EN);
- /* disable master abort reporting */
- out_be32(pdata->pci_vbase + MPC85XX_PCI_ERR_EN, ~0x40);
+ /* disable master abort reporting */
+ out_be32(pdata->pci_vbase + MPC85XX_PCI_ERR_EN, ~0x40);
+ }
/* clear error bits */
out_be32(pdata->pci_vbase + MPC85XX_PCI_ERR_DR, ~0);
if (edac_pci_add_device(pci, pdata->edac_idx) > 0) {
- debugf3("%s(): failed edac_pci_add_device()\n", __func__);
+ edac_dbg(3, "failed edac_pci_add_device()\n");
goto err;
}
if (edac_op_state == EDAC_OPSTATE_INT) {
pdata->irq = irq_of_parse_and_map(op->dev.of_node, 0);
res = devm_request_irq(&op->dev, pdata->irq,
- mpc85xx_pci_isr, IRQF_DISABLED,
+ mpc85xx_pci_isr,
+ IRQF_SHARED,
"[EDAC] PCI err", pci);
if (res < 0) {
printk(KERN_ERR
- "%s: Unable to requiest irq %d for "
+ "%s: Unable to request irq %d for "
"MPC85xx PCI err\n", __func__, pdata->irq);
irq_dispose_mapping(pdata->irq);
res = -ENODEV;
@@ -291,8 +372,24 @@ static int __devinit mpc85xx_pci_err_probe(struct platform_device *op,
pdata->irq);
}
+ if (pdata->is_pcie) {
+ /*
+ * Enable all PCIe error interrupt & error detect except invalid
+ * PEX_CONFIG_ADDR/PEX_CONFIG_DATA access interrupt generation
+ * enable bit and invalid PEX_CONFIG_ADDR/PEX_CONFIG_DATA access
+ * detection enable bit. Because PCIe bus code to initialize and
+ * configure these PCIe devices on booting will use some invalid
+ * PEX_CONFIG_ADDR/PEX_CONFIG_DATA, edac driver prints the much
+ * notice information. So disable this detect to fix ugly print.
+ */
+ out_be32(pdata->pci_vbase + MPC85XX_PCI_ERR_EN, ~0
+ & ~PEX_ERR_ICCAIE_EN_BIT);
+ out_be32(pdata->pci_vbase + MPC85XX_PCI_ERR_ADDR, 0
+ | PEX_ERR_ICCAD_DISR_BIT);
+ }
+
devres_remove_group(&op->dev, mpc85xx_pci_err_probe);
- debugf3("%s(): success\n", __func__);
+ edac_dbg(3, "success\n");
printk(KERN_INFO EDAC_MOD_STR " PCI err registered\n");
return 0;
@@ -304,49 +401,7 @@ err:
devres_release_group(&op->dev, mpc85xx_pci_err_probe);
return res;
}
-
-static int mpc85xx_pci_err_remove(struct platform_device *op)
-{
- struct edac_pci_ctl_info *pci = dev_get_drvdata(&op->dev);
- struct mpc85xx_pci_pdata *pdata = pci->pvt_info;
-
- debugf0("%s()\n", __func__);
-
- out_be32(pdata->pci_vbase + MPC85XX_PCI_ERR_CAP_DR,
- orig_pci_err_cap_dr);
-
- out_be32(pdata->pci_vbase + MPC85XX_PCI_ERR_EN, orig_pci_err_en);
-
- edac_pci_del_device(pci->dev);
-
- if (edac_op_state == EDAC_OPSTATE_INT)
- irq_dispose_mapping(pdata->irq);
-
- edac_pci_free_ctl_info(pci);
-
- return 0;
-}
-
-static struct of_device_id mpc85xx_pci_err_of_match[] = {
- {
- .compatible = "fsl,mpc8540-pcix",
- },
- {
- .compatible = "fsl,mpc8540-pci",
- },
- {},
-};
-MODULE_DEVICE_TABLE(of, mpc85xx_pci_err_of_match);
-
-static struct of_platform_driver mpc85xx_pci_err_driver = {
- .probe = mpc85xx_pci_err_probe,
- .remove = __devexit_p(mpc85xx_pci_err_remove),
- .driver = {
- .name = "mpc85xx_pci_err",
- .owner = THIS_MODULE,
- .of_match_table = mpc85xx_pci_err_of_match,
- },
-};
+EXPORT_SYMBOL(mpc85xx_pci_err_probe);
#endif /* CONFIG_PCI */
@@ -503,8 +558,7 @@ static irqreturn_t mpc85xx_l2_isr(int irq, void *dev_id)
return IRQ_HANDLED;
}
-static int __devinit mpc85xx_l2_err_probe(struct platform_device *op,
- const struct of_device_id *match)
+static int mpc85xx_l2_err_probe(struct platform_device *op)
{
struct edac_device_ctl_info *edac_dev;
struct mpc85xx_l2_pdata *pdata;
@@ -540,15 +594,15 @@ static int __devinit mpc85xx_l2_err_probe(struct platform_device *op,
/* we only need the error registers */
r.start += 0xe00;
- if (!devm_request_mem_region(&op->dev, r.start,
- r.end - r.start + 1, pdata->name)) {
+ if (!devm_request_mem_region(&op->dev, r.start, resource_size(&r),
+ pdata->name)) {
printk(KERN_ERR "%s: Error while requesting mem region\n",
__func__);
res = -EBUSY;
goto err;
}
- pdata->l2_vbase = devm_ioremap(&op->dev, r.start, r.end - r.start + 1);
+ pdata->l2_vbase = devm_ioremap(&op->dev, r.start, resource_size(&r));
if (!pdata->l2_vbase) {
printk(KERN_ERR "%s: Unable to setup L2 err regs\n", __func__);
res = -ENOMEM;
@@ -572,18 +626,18 @@ static int __devinit mpc85xx_l2_err_probe(struct platform_device *op,
pdata->edac_idx = edac_dev_idx++;
if (edac_device_add_device(edac_dev) > 0) {
- debugf3("%s(): failed edac_device_add_device()\n", __func__);
+ edac_dbg(3, "failed edac_device_add_device()\n");
goto err;
}
if (edac_op_state == EDAC_OPSTATE_INT) {
pdata->irq = irq_of_parse_and_map(op->dev.of_node, 0);
res = devm_request_irq(&op->dev, pdata->irq,
- mpc85xx_l2_isr, IRQF_DISABLED,
+ mpc85xx_l2_isr, 0,
"[EDAC] L2 err", edac_dev);
if (res < 0) {
printk(KERN_ERR
- "%s: Unable to requiest irq %d for "
+ "%s: Unable to request irq %d for "
"MPC85xx L2 err\n", __func__, pdata->irq);
irq_dispose_mapping(pdata->irq);
res = -ENODEV;
@@ -600,7 +654,7 @@ static int __devinit mpc85xx_l2_err_probe(struct platform_device *op,
devres_remove_group(&op->dev, mpc85xx_l2_err_probe);
- debugf3("%s(): success\n", __func__);
+ edac_dbg(3, "success\n");
printk(KERN_INFO EDAC_MOD_STR " L2 err registered\n");
return 0;
@@ -618,7 +672,7 @@ static int mpc85xx_l2_err_remove(struct platform_device *op)
struct edac_device_ctl_info *edac_dev = dev_get_drvdata(&op->dev);
struct mpc85xx_l2_pdata *pdata = edac_dev->pvt_info;
- debugf0("%s()\n", __func__);
+ edac_dbg(0, "\n");
if (edac_op_state == EDAC_OPSTATE_INT) {
out_be32(pdata->l2_vbase + MPC85XX_L2_ERRINTEN, 0);
@@ -656,7 +710,7 @@ static struct of_device_id mpc85xx_l2_err_of_match[] = {
};
MODULE_DEVICE_TABLE(of, mpc85xx_l2_err_of_match);
-static struct of_platform_driver mpc85xx_l2_err_driver = {
+static struct platform_driver mpc85xx_l2_err_driver = {
.probe = mpc85xx_l2_err_probe,
.remove = mpc85xx_l2_err_remove,
.driver = {
@@ -815,7 +869,7 @@ static void mpc85xx_mc_check(struct mem_ctl_info *mci)
pfn = err_addr >> PAGE_SHIFT;
for (row_index = 0; row_index < mci->nr_csrows; row_index++) {
- csrow = &mci->csrows[row_index];
+ csrow = mci->csrows[row_index];
if ((pfn >= csrow->first_page) && (pfn <= csrow->last_page))
break;
}
@@ -856,12 +910,16 @@ static void mpc85xx_mc_check(struct mem_ctl_info *mci)
mpc85xx_mc_printk(mci, KERN_ERR, "PFN out of range!\n");
if (err_detect & DDR_EDE_SBE)
- edac_mc_handle_ce(mci, pfn, err_addr & PAGE_MASK,
- syndrome, row_index, 0, mci->ctl_name);
+ edac_mc_handle_error(HW_EVENT_ERR_CORRECTED, mci, 1,
+ pfn, err_addr & ~PAGE_MASK, syndrome,
+ row_index, 0, -1,
+ mci->ctl_name, "");
if (err_detect & DDR_EDE_MBE)
- edac_mc_handle_ue(mci, pfn, err_addr & PAGE_MASK,
- row_index, mci->ctl_name);
+ edac_mc_handle_error(HW_EVENT_ERR_UNCORRECTED, mci, 1,
+ pfn, err_addr & ~PAGE_MASK, syndrome,
+ row_index, 0, -1,
+ mci->ctl_name, "");
out_be32(pdata->mc_vbase + MPC85XX_MC_ERR_DETECT, err_detect);
}
@@ -881,10 +939,11 @@ static irqreturn_t mpc85xx_mc_isr(int irq, void *dev_id)
return IRQ_HANDLED;
}
-static void __devinit mpc85xx_init_csrows(struct mem_ctl_info *mci)
+static void mpc85xx_init_csrows(struct mem_ctl_info *mci)
{
struct mpc85xx_mc_pdata *pdata = mci->pvt_info;
struct csrow_info *csrow;
+ struct dimm_info *dimm;
u32 sdram_ctl;
u32 sdtype;
enum mem_type mtype;
@@ -930,7 +989,9 @@ static void __devinit mpc85xx_init_csrows(struct mem_ctl_info *mci)
u32 start;
u32 end;
- csrow = &mci->csrows[index];
+ csrow = mci->csrows[index];
+ dimm = csrow->channels[0]->dimm;
+
cs_bnds = in_be32(pdata->mc_vbase + MPC85XX_MC_CS_BNDS_0 +
(index * MPC85XX_MC_CS_BNDS_OFS));
@@ -946,20 +1007,21 @@ static void __devinit mpc85xx_init_csrows(struct mem_ctl_info *mci)
csrow->first_page = start;
csrow->last_page = end;
- csrow->nr_pages = end + 1 - start;
- csrow->grain = 8;
- csrow->mtype = mtype;
- csrow->dtype = DEV_UNKNOWN;
+
+ dimm->nr_pages = end + 1 - start;
+ dimm->grain = 8;
+ dimm->mtype = mtype;
+ dimm->dtype = DEV_UNKNOWN;
if (sdram_ctl & DSC_X32_EN)
- csrow->dtype = DEV_X32;
- csrow->edac_mode = EDAC_SECDED;
+ dimm->dtype = DEV_X32;
+ dimm->edac_mode = EDAC_SECDED;
}
}
-static int __devinit mpc85xx_mc_err_probe(struct platform_device *op,
- const struct of_device_id *match)
+static int mpc85xx_mc_err_probe(struct platform_device *op)
{
struct mem_ctl_info *mci;
+ struct edac_mc_layer layers[2];
struct mpc85xx_mc_pdata *pdata;
struct resource r;
u32 sdram_ctl;
@@ -968,7 +1030,14 @@ static int __devinit mpc85xx_mc_err_probe(struct platform_device *op,
if (!devres_open_group(&op->dev, mpc85xx_mc_err_probe, GFP_KERNEL))
return -ENOMEM;
- mci = edac_mc_alloc(sizeof(*pdata), 4, 1, edac_mc_idx);
+ layers[0].type = EDAC_MC_LAYER_CHIP_SELECT;
+ layers[0].size = 4;
+ layers[0].is_virt_csrow = true;
+ layers[1].type = EDAC_MC_LAYER_CHANNEL;
+ layers[1].size = 1;
+ layers[1].is_virt_csrow = false;
+ mci = edac_mc_alloc(edac_mc_idx, ARRAY_SIZE(layers), layers,
+ sizeof(*pdata));
if (!mci) {
devres_release_group(&op->dev, mpc85xx_mc_err_probe);
return -ENOMEM;
@@ -977,9 +1046,9 @@ static int __devinit mpc85xx_mc_err_probe(struct platform_device *op,
pdata = mci->pvt_info;
pdata->name = "mpc85xx_mc_err";
pdata->irq = NO_IRQ;
- mci->dev = &op->dev;
+ mci->pdev = &op->dev;
pdata->edac_idx = edac_mc_idx++;
- dev_set_drvdata(mci->dev, mci);
+ dev_set_drvdata(mci->pdev, mci);
mci->ctl_name = pdata->name;
mci->dev_name = pdata->name;
@@ -990,15 +1059,15 @@ static int __devinit mpc85xx_mc_err_probe(struct platform_device *op,
goto err;
}
- if (!devm_request_mem_region(&op->dev, r.start,
- r.end - r.start + 1, pdata->name)) {
+ if (!devm_request_mem_region(&op->dev, r.start, resource_size(&r),
+ pdata->name)) {
printk(KERN_ERR "%s: Error while requesting mem region\n",
__func__);
res = -EBUSY;
goto err;
}
- pdata->mc_vbase = devm_ioremap(&op->dev, r.start, r.end - r.start + 1);
+ pdata->mc_vbase = devm_ioremap(&op->dev, r.start, resource_size(&r));
if (!pdata->mc_vbase) {
printk(KERN_ERR "%s: Unable to setup MC err regs\n", __func__);
res = -ENOMEM;
@@ -1013,7 +1082,7 @@ static int __devinit mpc85xx_mc_err_probe(struct platform_device *op,
goto err;
}
- debugf3("%s(): init mci\n", __func__);
+ edac_dbg(3, "init mci\n");
mci->mtype_cap = MEM_FLAG_RDDR | MEM_FLAG_RDDR2 |
MEM_FLAG_DDR | MEM_FLAG_DDR2;
mci->edac_ctl_cap = EDAC_FLAG_NONE | EDAC_FLAG_SECDED;
@@ -1028,8 +1097,6 @@ static int __devinit mpc85xx_mc_err_probe(struct platform_device *op,
mci->scrub_mode = SCRUB_SW_SRC;
- mpc85xx_set_mc_sysfs_attributes(mci);
-
mpc85xx_init_csrows(mci);
/* store the original error disable bits */
@@ -1041,7 +1108,13 @@ static int __devinit mpc85xx_mc_err_probe(struct platform_device *op,
out_be32(pdata->mc_vbase + MPC85XX_MC_ERR_DETECT, ~0);
if (edac_mc_add_mc(mci)) {
- debugf3("%s(): failed edac_mc_add_mc()\n", __func__);
+ edac_dbg(3, "failed edac_mc_add_mc()\n");
+ goto err;
+ }
+
+ if (mpc85xx_create_sysfs_attributes(mci)) {
+ edac_mc_del_mc(mci->pdev);
+ edac_dbg(3, "failed edac_mc_add_mc()\n");
goto err;
}
@@ -1060,7 +1133,7 @@ static int __devinit mpc85xx_mc_err_probe(struct platform_device *op,
pdata->irq = irq_of_parse_and_map(op->dev.of_node, 0);
res = devm_request_irq(&op->dev, pdata->irq,
mpc85xx_mc_isr,
- IRQF_DISABLED | IRQF_SHARED,
+ IRQF_SHARED,
"[EDAC] MC err", mci);
if (res < 0) {
printk(KERN_ERR "%s: Unable to request irq %d for "
@@ -1075,7 +1148,7 @@ static int __devinit mpc85xx_mc_err_probe(struct platform_device *op,
}
devres_remove_group(&op->dev, mpc85xx_mc_err_probe);
- debugf3("%s(): success\n", __func__);
+ edac_dbg(3, "success\n");
printk(KERN_INFO EDAC_MOD_STR " MC err registered\n");
return 0;
@@ -1093,7 +1166,7 @@ static int mpc85xx_mc_err_remove(struct platform_device *op)
struct mem_ctl_info *mci = dev_get_drvdata(&op->dev);
struct mpc85xx_mc_pdata *pdata = mci->pvt_info;
- debugf0("%s()\n", __func__);
+ edac_dbg(0, "\n");
if (edac_op_state == EDAC_OPSTATE_INT) {
out_be32(pdata->mc_vbase + MPC85XX_MC_ERR_INT_EN, 0);
@@ -1104,6 +1177,7 @@ static int mpc85xx_mc_err_remove(struct platform_device *op)
orig_ddr_err_disable);
out_be32(pdata->mc_vbase + MPC85XX_MC_ERR_SBE, orig_ddr_err_sbe);
+ mpc85xx_remove_sysfs_attributes(mci);
edac_mc_del_mc(&op->dev);
edac_mc_free(mci);
return 0;
@@ -1131,12 +1205,12 @@ static struct of_device_id mpc85xx_mc_err_of_match[] = {
{ .compatible = "fsl,p1020-memory-controller", },
{ .compatible = "fsl,p1021-memory-controller", },
{ .compatible = "fsl,p2020-memory-controller", },
- { .compatible = "fsl,p4080-memory-controller", },
+ { .compatible = "fsl,qoriq-memory-controller", },
{},
};
MODULE_DEVICE_TABLE(of, mpc85xx_mc_err_of_match);
-static struct of_platform_driver mpc85xx_mc_err_driver = {
+static struct platform_driver mpc85xx_mc_err_driver = {
.probe = mpc85xx_mc_err_probe,
.remove = mpc85xx_mc_err_remove,
.driver = {
@@ -1150,13 +1224,14 @@ static struct of_platform_driver mpc85xx_mc_err_driver = {
static void __init mpc85xx_mc_clear_rfxe(void *data)
{
orig_hid1[smp_processor_id()] = mfspr(SPRN_HID1);
- mtspr(SPRN_HID1, (orig_hid1[smp_processor_id()] & ~0x20000));
+ mtspr(SPRN_HID1, (orig_hid1[smp_processor_id()] & ~HID1_RFXE));
}
#endif
static int __init mpc85xx_mc_init(void)
{
int res = 0;
+ u32 pvr = 0;
printk(KERN_INFO "Freescale(R) MPC85xx EDAC driver, "
"(C) 2006 Montavista Software\n");
@@ -1171,27 +1246,26 @@ static int __init mpc85xx_mc_init(void)
break;
}
- res = of_register_platform_driver(&mpc85xx_mc_err_driver);
+ res = platform_driver_register(&mpc85xx_mc_err_driver);
if (res)
printk(KERN_WARNING EDAC_MOD_STR "MC fails to register\n");
- res = of_register_platform_driver(&mpc85xx_l2_err_driver);
+ res = platform_driver_register(&mpc85xx_l2_err_driver);
if (res)
printk(KERN_WARNING EDAC_MOD_STR "L2 fails to register\n");
-#ifdef CONFIG_PCI
- res = of_register_platform_driver(&mpc85xx_pci_err_driver);
- if (res)
- printk(KERN_WARNING EDAC_MOD_STR "PCI fails to register\n");
-#endif
-
#ifdef CONFIG_FSL_SOC_BOOKE
- /*
- * need to clear HID1[RFXE] to disable machine check int
- * so we can catch it
- */
- if (edac_op_state == EDAC_OPSTATE_INT)
- on_each_cpu(mpc85xx_mc_clear_rfxe, NULL, 0);
+ pvr = mfspr(SPRN_PVR);
+
+ if ((PVR_VER(pvr) == PVR_VER_E500V1) ||
+ (PVR_VER(pvr) == PVR_VER_E500V2)) {
+ /*
+ * need to clear HID1[RFXE] to disable machine check int
+ * so we can catch it
+ */
+ if (edac_op_state == EDAC_OPSTATE_INT)
+ on_each_cpu(mpc85xx_mc_clear_rfxe, NULL, 0);
+ }
#endif
return 0;
@@ -1209,13 +1283,15 @@ static void __exit mpc85xx_mc_restore_hid1(void *data)
static void __exit mpc85xx_mc_exit(void)
{
#ifdef CONFIG_FSL_SOC_BOOKE
- on_each_cpu(mpc85xx_mc_restore_hid1, NULL, 0);
-#endif
-#ifdef CONFIG_PCI
- of_unregister_platform_driver(&mpc85xx_pci_err_driver);
+ u32 pvr = mfspr(SPRN_PVR);
+
+ if ((PVR_VER(pvr) == PVR_VER_E500V1) ||
+ (PVR_VER(pvr) == PVR_VER_E500V2)) {
+ on_each_cpu(mpc85xx_mc_restore_hid1, NULL, 0);
+ }
#endif
- of_unregister_platform_driver(&mpc85xx_l2_err_driver);
- of_unregister_platform_driver(&mpc85xx_mc_err_driver);
+ platform_driver_unregister(&mpc85xx_l2_err_driver);
+ platform_driver_unregister(&mpc85xx_mc_err_driver);
}
module_exit(mpc85xx_mc_exit);
diff --git a/drivers/edac/mpc85xx_edac.h b/drivers/edac/mpc85xx_edac.h
index cb24df83946..8c625643622 100644
--- a/drivers/edac/mpc85xx_edac.h
+++ b/drivers/edac/mpc85xx_edac.h
@@ -11,7 +11,7 @@
#ifndef _MPC85XX_EDAC_H_
#define _MPC85XX_EDAC_H_
-#define MPC85XX_REVISION " Ver: 2.0.0 " __DATE__
+#define MPC85XX_REVISION " Ver: 2.0.0"
#define EDAC_MOD_STR "MPC85xx_edac"
#define mpc85xx_printk(level, fmt, arg...) \
@@ -134,13 +134,19 @@
#define MPC85XX_PCI_ERR_DR 0x0000
#define MPC85XX_PCI_ERR_CAP_DR 0x0004
#define MPC85XX_PCI_ERR_EN 0x0008
+#define PEX_ERR_ICCAIE_EN_BIT 0x00020000
#define MPC85XX_PCI_ERR_ATTRIB 0x000c
#define MPC85XX_PCI_ERR_ADDR 0x0010
+#define PEX_ERR_ICCAD_DISR_BIT 0x00020000
#define MPC85XX_PCI_ERR_EXT_ADDR 0x0014
#define MPC85XX_PCI_ERR_DL 0x0018
#define MPC85XX_PCI_ERR_DH 0x001c
#define MPC85XX_PCI_GAS_TIMR 0x0020
#define MPC85XX_PCI_PCIX_TIMR 0x0024
+#define MPC85XX_PCIE_ERR_CAP_R0 0x0028
+#define MPC85XX_PCIE_ERR_CAP_R1 0x002c
+#define MPC85XX_PCIE_ERR_CAP_R2 0x0030
+#define MPC85XX_PCIE_ERR_CAP_R3 0x0034
struct mpc85xx_mc_pdata {
char *name;
@@ -158,6 +164,7 @@ struct mpc85xx_l2_pdata {
struct mpc85xx_pci_pdata {
char *name;
+ bool is_pcie;
int edac_idx;
void __iomem *pci_vbase;
int irq;
diff --git a/drivers/edac/mv64x60_edac.c b/drivers/edac/mv64x60_edac.c
index 7e5ff367705..542fad70e36 100644
--- a/drivers/edac/mv64x60_edac.c
+++ b/drivers/edac/mv64x60_edac.c
@@ -100,7 +100,7 @@ static int __init mv64x60_pci_fixup(struct platform_device *pdev)
return 0;
}
-static int __devinit mv64x60_pci_err_probe(struct platform_device *pdev)
+static int mv64x60_pci_err_probe(struct platform_device *pdev)
{
struct edac_pci_ctl_info *pci;
struct mv64x60_pci_pdata *pdata;
@@ -169,7 +169,7 @@ static int __devinit mv64x60_pci_err_probe(struct platform_device *pdev)
MV64X60_PCIx_ERR_MASK_VAL);
if (edac_pci_add_device(pci, pdata->edac_idx) > 0) {
- debugf3("%s(): failed edac_pci_add_device()\n", __func__);
+ edac_dbg(3, "failed edac_pci_add_device()\n");
goto err;
}
@@ -194,7 +194,7 @@ static int __devinit mv64x60_pci_err_probe(struct platform_device *pdev)
devres_remove_group(&pdev->dev, mv64x60_pci_err_probe);
/* get this far and it's successful */
- debugf3("%s(): success\n", __func__);
+ edac_dbg(3, "success\n");
return 0;
@@ -210,7 +210,7 @@ static int mv64x60_pci_err_remove(struct platform_device *pdev)
{
struct edac_pci_ctl_info *pci = platform_get_drvdata(pdev);
- debugf0("%s()\n", __func__);
+ edac_dbg(0, "\n");
edac_pci_del_device(&pdev->dev);
@@ -221,7 +221,7 @@ static int mv64x60_pci_err_remove(struct platform_device *pdev)
static struct platform_driver mv64x60_pci_err_driver = {
.probe = mv64x60_pci_err_probe,
- .remove = __devexit_p(mv64x60_pci_err_remove),
+ .remove = mv64x60_pci_err_remove,
.driver = {
.name = "mv64x60_pci_err",
}
@@ -271,7 +271,7 @@ static irqreturn_t mv64x60_sram_isr(int irq, void *dev_id)
return IRQ_HANDLED;
}
-static int __devinit mv64x60_sram_err_probe(struct platform_device *pdev)
+static int mv64x60_sram_err_probe(struct platform_device *pdev)
{
struct edac_device_ctl_info *edac_dev;
struct mv64x60_sram_pdata *pdata;
@@ -336,7 +336,7 @@ static int __devinit mv64x60_sram_err_probe(struct platform_device *pdev)
pdata->edac_idx = edac_dev_idx++;
if (edac_device_add_device(edac_dev) > 0) {
- debugf3("%s(): failed edac_device_add_device()\n", __func__);
+ edac_dbg(3, "failed edac_device_add_device()\n");
goto err;
}
@@ -363,7 +363,7 @@ static int __devinit mv64x60_sram_err_probe(struct platform_device *pdev)
devres_remove_group(&pdev->dev, mv64x60_sram_err_probe);
/* get this far and it's successful */
- debugf3("%s(): success\n", __func__);
+ edac_dbg(3, "success\n");
return 0;
@@ -379,7 +379,7 @@ static int mv64x60_sram_err_remove(struct platform_device *pdev)
{
struct edac_device_ctl_info *edac_dev = platform_get_drvdata(pdev);
- debugf0("%s()\n", __func__);
+ edac_dbg(0, "\n");
edac_device_del_device(&pdev->dev);
edac_device_free_ctl_info(edac_dev);
@@ -439,7 +439,7 @@ static irqreturn_t mv64x60_cpu_isr(int irq, void *dev_id)
return IRQ_HANDLED;
}
-static int __devinit mv64x60_cpu_err_probe(struct platform_device *pdev)
+static int mv64x60_cpu_err_probe(struct platform_device *pdev)
{
struct edac_device_ctl_info *edac_dev;
struct resource *r;
@@ -531,7 +531,7 @@ static int __devinit mv64x60_cpu_err_probe(struct platform_device *pdev)
pdata->edac_idx = edac_dev_idx++;
if (edac_device_add_device(edac_dev) > 0) {
- debugf3("%s(): failed edac_device_add_device()\n", __func__);
+ edac_dbg(3, "failed edac_device_add_device()\n");
goto err;
}
@@ -558,7 +558,7 @@ static int __devinit mv64x60_cpu_err_probe(struct platform_device *pdev)
devres_remove_group(&pdev->dev, mv64x60_cpu_err_probe);
/* get this far and it's successful */
- debugf3("%s(): success\n", __func__);
+ edac_dbg(3, "success\n");
return 0;
@@ -574,7 +574,7 @@ static int mv64x60_cpu_err_remove(struct platform_device *pdev)
{
struct edac_device_ctl_info *edac_dev = platform_get_drvdata(pdev);
- debugf0("%s()\n", __func__);
+ edac_dbg(0, "\n");
edac_device_del_device(&pdev->dev);
edac_device_free_ctl_info(edac_dev);
@@ -611,12 +611,17 @@ static void mv64x60_mc_check(struct mem_ctl_info *mci)
/* first bit clear in ECC Err Reg, 1 bit error, correctable by HW */
if (!(reg & 0x1))
- edac_mc_handle_ce(mci, err_addr >> PAGE_SHIFT,
- err_addr & PAGE_MASK, syndrome, 0, 0,
- mci->ctl_name);
+ edac_mc_handle_error(HW_EVENT_ERR_CORRECTED, mci, 1,
+ err_addr >> PAGE_SHIFT,
+ err_addr & PAGE_MASK, syndrome,
+ 0, 0, -1,
+ mci->ctl_name, "");
else /* 2 bit error, UE */
- edac_mc_handle_ue(mci, err_addr >> PAGE_SHIFT,
- err_addr & PAGE_MASK, 0, mci->ctl_name);
+ edac_mc_handle_error(HW_EVENT_ERR_UNCORRECTED, mci, 1,
+ err_addr >> PAGE_SHIFT,
+ err_addr & PAGE_MASK, 0,
+ 0, 0, -1,
+ mci->ctl_name, "");
/* clear the error */
out_le32(pdata->mc_vbase + MV64X60_SDRAM_ERR_ADDR, 0);
@@ -656,6 +661,8 @@ static void mv64x60_init_csrows(struct mem_ctl_info *mci,
struct mv64x60_mc_pdata *pdata)
{
struct csrow_info *csrow;
+ struct dimm_info *dimm;
+
u32 devtype;
u32 ctl;
@@ -663,36 +670,37 @@ static void mv64x60_init_csrows(struct mem_ctl_info *mci,
ctl = in_le32(pdata->mc_vbase + MV64X60_SDRAM_CONFIG);
- csrow = &mci->csrows[0];
- csrow->first_page = 0;
- csrow->nr_pages = pdata->total_mem >> PAGE_SHIFT;
- csrow->last_page = csrow->first_page + csrow->nr_pages - 1;
- csrow->grain = 8;
+ csrow = mci->csrows[0];
+ dimm = csrow->channels[0]->dimm;
+
+ dimm->nr_pages = pdata->total_mem >> PAGE_SHIFT;
+ dimm->grain = 8;
- csrow->mtype = (ctl & MV64X60_SDRAM_REGISTERED) ? MEM_RDDR : MEM_DDR;
+ dimm->mtype = (ctl & MV64X60_SDRAM_REGISTERED) ? MEM_RDDR : MEM_DDR;
devtype = (ctl >> 20) & 0x3;
switch (devtype) {
case 0x0:
- csrow->dtype = DEV_X32;
+ dimm->dtype = DEV_X32;
break;
case 0x2: /* could be X8 too, but no way to tell */
- csrow->dtype = DEV_X16;
+ dimm->dtype = DEV_X16;
break;
case 0x3:
- csrow->dtype = DEV_X4;
+ dimm->dtype = DEV_X4;
break;
default:
- csrow->dtype = DEV_UNKNOWN;
+ dimm->dtype = DEV_UNKNOWN;
break;
}
- csrow->edac_mode = EDAC_SECDED;
+ dimm->edac_mode = EDAC_SECDED;
}
-static int __devinit mv64x60_mc_err_probe(struct platform_device *pdev)
+static int mv64x60_mc_err_probe(struct platform_device *pdev)
{
struct mem_ctl_info *mci;
+ struct edac_mc_layer layers[2];
struct mv64x60_mc_pdata *pdata;
struct resource *r;
u32 ctl;
@@ -701,7 +709,14 @@ static int __devinit mv64x60_mc_err_probe(struct platform_device *pdev)
if (!devres_open_group(&pdev->dev, mv64x60_mc_err_probe, GFP_KERNEL))
return -ENOMEM;
- mci = edac_mc_alloc(sizeof(struct mv64x60_mc_pdata), 1, 1, edac_mc_idx);
+ layers[0].type = EDAC_MC_LAYER_CHIP_SELECT;
+ layers[0].size = 1;
+ layers[0].is_virt_csrow = true;
+ layers[1].type = EDAC_MC_LAYER_CHANNEL;
+ layers[1].size = 1;
+ layers[1].is_virt_csrow = false;
+ mci = edac_mc_alloc(edac_mc_idx, ARRAY_SIZE(layers), layers,
+ sizeof(struct mv64x60_mc_pdata));
if (!mci) {
printk(KERN_ERR "%s: No memory for CPU err\n", __func__);
devres_release_group(&pdev->dev, mv64x60_mc_err_probe);
@@ -709,7 +724,7 @@ static int __devinit mv64x60_mc_err_probe(struct platform_device *pdev)
}
pdata = mci->pvt_info;
- mci->dev = &pdev->dev;
+ mci->pdev = &pdev->dev;
platform_set_drvdata(pdev, mci);
pdata->name = "mv64x60_mc_err";
pdata->irq = NO_IRQ;
@@ -751,7 +766,7 @@ static int __devinit mv64x60_mc_err_probe(struct platform_device *pdev)
goto err2;
}
- debugf3("%s(): init mci\n", __func__);
+ edac_dbg(3, "init mci\n");
mci->mtype_cap = MEM_FLAG_RDDR | MEM_FLAG_DDR;
mci->edac_ctl_cap = EDAC_FLAG_NONE | EDAC_FLAG_SECDED;
mci->edac_cap = EDAC_FLAG_SECDED;
@@ -775,7 +790,7 @@ static int __devinit mv64x60_mc_err_probe(struct platform_device *pdev)
out_le32(pdata->mc_vbase + MV64X60_SDRAM_ERR_ECC_CNTL, ctl);
if (edac_mc_add_mc(mci)) {
- debugf3("%s(): failed edac_mc_add_mc()\n", __func__);
+ edac_dbg(3, "failed edac_mc_add_mc()\n");
goto err;
}
@@ -800,7 +815,7 @@ static int __devinit mv64x60_mc_err_probe(struct platform_device *pdev)
}
/* get this far and it's successful */
- debugf3("%s(): success\n", __func__);
+ edac_dbg(3, "success\n");
return 0;
@@ -816,7 +831,7 @@ static int mv64x60_mc_err_remove(struct platform_device *pdev)
{
struct mem_ctl_info *mci = platform_get_drvdata(pdev);
- debugf0("%s()\n", __func__);
+ edac_dbg(0, "\n");
edac_mc_del_mc(&pdev->dev);
edac_mc_free(mci);
diff --git a/drivers/edac/mv64x60_edac.h b/drivers/edac/mv64x60_edac.h
index e042e2daa8f..c7f209c92a1 100644
--- a/drivers/edac/mv64x60_edac.h
+++ b/drivers/edac/mv64x60_edac.h
@@ -12,7 +12,7 @@
#ifndef _MV64X60_EDAC_H_
#define _MV64X60_EDAC_H_
-#define MV64x60_REVISION " Ver: 2.0.0 " __DATE__
+#define MV64x60_REVISION " Ver: 2.0.0"
#define EDAC_MOD_STR "MV64x60_edac"
#define mv64x60_printk(level, fmt, arg...) \
diff --git a/drivers/edac/octeon_edac-l2c.c b/drivers/edac/octeon_edac-l2c.c
new file mode 100644
index 00000000000..7e98084d364
--- /dev/null
+++ b/drivers/edac/octeon_edac-l2c.c
@@ -0,0 +1,208 @@
+/*
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License. See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * Copyright (C) 2012 Cavium, Inc.
+ *
+ * Copyright (C) 2009 Wind River Systems,
+ * written by Ralf Baechle <ralf@linux-mips.org>
+ */
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/slab.h>
+#include <linux/io.h>
+#include <linux/edac.h>
+
+#include <asm/octeon/cvmx.h>
+
+#include "edac_core.h"
+#include "edac_module.h"
+
+#define EDAC_MOD_STR "octeon-l2c"
+
+static void octeon_l2c_poll_oct1(struct edac_device_ctl_info *l2c)
+{
+ union cvmx_l2t_err l2t_err, l2t_err_reset;
+ union cvmx_l2d_err l2d_err, l2d_err_reset;
+
+ l2t_err_reset.u64 = 0;
+ l2t_err.u64 = cvmx_read_csr(CVMX_L2T_ERR);
+ if (l2t_err.s.sec_err) {
+ edac_device_handle_ce(l2c, 0, 0,
+ "Tag Single bit error (corrected)");
+ l2t_err_reset.s.sec_err = 1;
+ }
+ if (l2t_err.s.ded_err) {
+ edac_device_handle_ue(l2c, 0, 0,
+ "Tag Double bit error (detected)");
+ l2t_err_reset.s.ded_err = 1;
+ }
+ if (l2t_err_reset.u64)
+ cvmx_write_csr(CVMX_L2T_ERR, l2t_err_reset.u64);
+
+ l2d_err_reset.u64 = 0;
+ l2d_err.u64 = cvmx_read_csr(CVMX_L2D_ERR);
+ if (l2d_err.s.sec_err) {
+ edac_device_handle_ce(l2c, 0, 1,
+ "Data Single bit error (corrected)");
+ l2d_err_reset.s.sec_err = 1;
+ }
+ if (l2d_err.s.ded_err) {
+ edac_device_handle_ue(l2c, 0, 1,
+ "Data Double bit error (detected)");
+ l2d_err_reset.s.ded_err = 1;
+ }
+ if (l2d_err_reset.u64)
+ cvmx_write_csr(CVMX_L2D_ERR, l2d_err_reset.u64);
+
+}
+
+static void _octeon_l2c_poll_oct2(struct edac_device_ctl_info *l2c, int tad)
+{
+ union cvmx_l2c_err_tdtx err_tdtx, err_tdtx_reset;
+ union cvmx_l2c_err_ttgx err_ttgx, err_ttgx_reset;
+ char buf1[64];
+ char buf2[80];
+
+ err_tdtx_reset.u64 = 0;
+ err_tdtx.u64 = cvmx_read_csr(CVMX_L2C_ERR_TDTX(tad));
+ if (err_tdtx.s.dbe || err_tdtx.s.sbe ||
+ err_tdtx.s.vdbe || err_tdtx.s.vsbe)
+ snprintf(buf1, sizeof(buf1),
+ "type:%d, syn:0x%x, way:%d",
+ err_tdtx.s.type, err_tdtx.s.syn, err_tdtx.s.wayidx);
+
+ if (err_tdtx.s.dbe) {
+ snprintf(buf2, sizeof(buf2),
+ "L2D Double bit error (detected):%s", buf1);
+ err_tdtx_reset.s.dbe = 1;
+ edac_device_handle_ue(l2c, tad, 1, buf2);
+ }
+ if (err_tdtx.s.sbe) {
+ snprintf(buf2, sizeof(buf2),
+ "L2D Single bit error (corrected):%s", buf1);
+ err_tdtx_reset.s.sbe = 1;
+ edac_device_handle_ce(l2c, tad, 1, buf2);
+ }
+ if (err_tdtx.s.vdbe) {
+ snprintf(buf2, sizeof(buf2),
+ "VBF Double bit error (detected):%s", buf1);
+ err_tdtx_reset.s.vdbe = 1;
+ edac_device_handle_ue(l2c, tad, 1, buf2);
+ }
+ if (err_tdtx.s.vsbe) {
+ snprintf(buf2, sizeof(buf2),
+ "VBF Single bit error (corrected):%s", buf1);
+ err_tdtx_reset.s.vsbe = 1;
+ edac_device_handle_ce(l2c, tad, 1, buf2);
+ }
+ if (err_tdtx_reset.u64)
+ cvmx_write_csr(CVMX_L2C_ERR_TDTX(tad), err_tdtx_reset.u64);
+
+ err_ttgx_reset.u64 = 0;
+ err_ttgx.u64 = cvmx_read_csr(CVMX_L2C_ERR_TTGX(tad));
+
+ if (err_ttgx.s.dbe || err_ttgx.s.sbe)
+ snprintf(buf1, sizeof(buf1),
+ "type:%d, syn:0x%x, way:%d",
+ err_ttgx.s.type, err_ttgx.s.syn, err_ttgx.s.wayidx);
+
+ if (err_ttgx.s.dbe) {
+ snprintf(buf2, sizeof(buf2),
+ "Tag Double bit error (detected):%s", buf1);
+ err_ttgx_reset.s.dbe = 1;
+ edac_device_handle_ue(l2c, tad, 0, buf2);
+ }
+ if (err_ttgx.s.sbe) {
+ snprintf(buf2, sizeof(buf2),
+ "Tag Single bit error (corrected):%s", buf1);
+ err_ttgx_reset.s.sbe = 1;
+ edac_device_handle_ce(l2c, tad, 0, buf2);
+ }
+ if (err_ttgx_reset.u64)
+ cvmx_write_csr(CVMX_L2C_ERR_TTGX(tad), err_ttgx_reset.u64);
+}
+
+static void octeon_l2c_poll_oct2(struct edac_device_ctl_info *l2c)
+{
+ int i;
+ for (i = 0; i < l2c->nr_instances; i++)
+ _octeon_l2c_poll_oct2(l2c, i);
+}
+
+static int octeon_l2c_probe(struct platform_device *pdev)
+{
+ struct edac_device_ctl_info *l2c;
+
+ int num_tads = OCTEON_IS_MODEL(OCTEON_CN68XX) ? 4 : 1;
+
+ /* 'Tags' are block 0, 'Data' is block 1*/
+ l2c = edac_device_alloc_ctl_info(0, "l2c", num_tads, "l2c", 2, 0,
+ NULL, 0, edac_device_alloc_index());
+ if (!l2c)
+ return -ENOMEM;
+
+ l2c->dev = &pdev->dev;
+ platform_set_drvdata(pdev, l2c);
+ l2c->dev_name = dev_name(&pdev->dev);
+
+ l2c->mod_name = "octeon-l2c";
+ l2c->ctl_name = "octeon_l2c_err";
+
+
+ if (OCTEON_IS_MODEL(OCTEON_FAM_1_PLUS)) {
+ union cvmx_l2t_err l2t_err;
+ union cvmx_l2d_err l2d_err;
+
+ l2t_err.u64 = cvmx_read_csr(CVMX_L2T_ERR);
+ l2t_err.s.sec_intena = 0; /* We poll */
+ l2t_err.s.ded_intena = 0;
+ cvmx_write_csr(CVMX_L2T_ERR, l2t_err.u64);
+
+ l2d_err.u64 = cvmx_read_csr(CVMX_L2D_ERR);
+ l2d_err.s.sec_intena = 0; /* We poll */
+ l2d_err.s.ded_intena = 0;
+ cvmx_write_csr(CVMX_L2T_ERR, l2d_err.u64);
+
+ l2c->edac_check = octeon_l2c_poll_oct1;
+ } else {
+ /* OCTEON II */
+ l2c->edac_check = octeon_l2c_poll_oct2;
+ }
+
+ if (edac_device_add_device(l2c) > 0) {
+ pr_err("%s: edac_device_add_device() failed\n", __func__);
+ goto err;
+ }
+
+
+ return 0;
+
+err:
+ edac_device_free_ctl_info(l2c);
+
+ return -ENXIO;
+}
+
+static int octeon_l2c_remove(struct platform_device *pdev)
+{
+ struct edac_device_ctl_info *l2c = platform_get_drvdata(pdev);
+
+ edac_device_del_device(&pdev->dev);
+ edac_device_free_ctl_info(l2c);
+
+ return 0;
+}
+
+static struct platform_driver octeon_l2c_driver = {
+ .probe = octeon_l2c_probe,
+ .remove = octeon_l2c_remove,
+ .driver = {
+ .name = "octeon_l2c_edac",
+ }
+};
+module_platform_driver(octeon_l2c_driver);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Ralf Baechle <ralf@linux-mips.org>");
diff --git a/drivers/edac/octeon_edac-lmc.c b/drivers/edac/octeon_edac-lmc.c
new file mode 100644
index 00000000000..4bd10f94f06
--- /dev/null
+++ b/drivers/edac/octeon_edac-lmc.c
@@ -0,0 +1,353 @@
+/*
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License. See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * Copyright (C) 2009 Wind River Systems,
+ * written by Ralf Baechle <ralf@linux-mips.org>
+ *
+ * Copyright (c) 2013 by Cisco Systems, Inc.
+ * All rights reserved.
+ */
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/slab.h>
+#include <linux/io.h>
+#include <linux/edac.h>
+#include <linux/ctype.h>
+
+#include <asm/octeon/octeon.h>
+#include <asm/octeon/cvmx-lmcx-defs.h>
+
+#include "edac_core.h"
+#include "edac_module.h"
+
+#define OCTEON_MAX_MC 4
+
+#define to_mci(k) container_of(k, struct mem_ctl_info, dev)
+
+struct octeon_lmc_pvt {
+ unsigned long inject;
+ unsigned long error_type;
+ unsigned long dimm;
+ unsigned long rank;
+ unsigned long bank;
+ unsigned long row;
+ unsigned long col;
+};
+
+static void octeon_lmc_edac_poll(struct mem_ctl_info *mci)
+{
+ union cvmx_lmcx_mem_cfg0 cfg0;
+ bool do_clear = false;
+ char msg[64];
+
+ cfg0.u64 = cvmx_read_csr(CVMX_LMCX_MEM_CFG0(mci->mc_idx));
+ if (cfg0.s.sec_err || cfg0.s.ded_err) {
+ union cvmx_lmcx_fadr fadr;
+ fadr.u64 = cvmx_read_csr(CVMX_LMCX_FADR(mci->mc_idx));
+ snprintf(msg, sizeof(msg),
+ "DIMM %d rank %d bank %d row %d col %d",
+ fadr.cn30xx.fdimm, fadr.cn30xx.fbunk,
+ fadr.cn30xx.fbank, fadr.cn30xx.frow, fadr.cn30xx.fcol);
+ }
+
+ if (cfg0.s.sec_err) {
+ edac_mc_handle_error(HW_EVENT_ERR_CORRECTED, mci, 1, 0, 0, 0,
+ -1, -1, -1, msg, "");
+ cfg0.s.sec_err = -1; /* Done, re-arm */
+ do_clear = true;
+ }
+
+ if (cfg0.s.ded_err) {
+ edac_mc_handle_error(HW_EVENT_ERR_UNCORRECTED, mci, 1, 0, 0, 0,
+ -1, -1, -1, msg, "");
+ cfg0.s.ded_err = -1; /* Done, re-arm */
+ do_clear = true;
+ }
+ if (do_clear)
+ cvmx_write_csr(CVMX_LMCX_MEM_CFG0(mci->mc_idx), cfg0.u64);
+}
+
+static void octeon_lmc_edac_poll_o2(struct mem_ctl_info *mci)
+{
+ struct octeon_lmc_pvt *pvt = mci->pvt_info;
+ union cvmx_lmcx_int int_reg;
+ bool do_clear = false;
+ char msg[64];
+
+ if (!pvt->inject)
+ int_reg.u64 = cvmx_read_csr(CVMX_LMCX_INT(mci->mc_idx));
+ else {
+ if (pvt->error_type == 1)
+ int_reg.s.sec_err = 1;
+ if (pvt->error_type == 2)
+ int_reg.s.ded_err = 1;
+ }
+
+ if (int_reg.s.sec_err || int_reg.s.ded_err) {
+ union cvmx_lmcx_fadr fadr;
+ if (likely(!pvt->inject))
+ fadr.u64 = cvmx_read_csr(CVMX_LMCX_FADR(mci->mc_idx));
+ else {
+ fadr.cn61xx.fdimm = pvt->dimm;
+ fadr.cn61xx.fbunk = pvt->rank;
+ fadr.cn61xx.fbank = pvt->bank;
+ fadr.cn61xx.frow = pvt->row;
+ fadr.cn61xx.fcol = pvt->col;
+ }
+ snprintf(msg, sizeof(msg),
+ "DIMM %d rank %d bank %d row %d col %d",
+ fadr.cn61xx.fdimm, fadr.cn61xx.fbunk,
+ fadr.cn61xx.fbank, fadr.cn61xx.frow, fadr.cn61xx.fcol);
+ }
+
+ if (int_reg.s.sec_err) {
+ edac_mc_handle_error(HW_EVENT_ERR_CORRECTED, mci, 1, 0, 0, 0,
+ -1, -1, -1, msg, "");
+ int_reg.s.sec_err = -1; /* Done, re-arm */
+ do_clear = true;
+ }
+
+ if (int_reg.s.ded_err) {
+ edac_mc_handle_error(HW_EVENT_ERR_UNCORRECTED, mci, 1, 0, 0, 0,
+ -1, -1, -1, msg, "");
+ int_reg.s.ded_err = -1; /* Done, re-arm */
+ do_clear = true;
+ }
+
+ if (do_clear) {
+ if (likely(!pvt->inject))
+ cvmx_write_csr(CVMX_LMCX_INT(mci->mc_idx), int_reg.u64);
+ else
+ pvt->inject = 0;
+ }
+}
+
+/************************ MC SYSFS parts ***********************************/
+
+/* Only a couple naming differences per template, so very similar */
+#define TEMPLATE_SHOW(reg) \
+static ssize_t octeon_mc_inject_##reg##_show(struct device *dev, \
+ struct device_attribute *attr, \
+ char *data) \
+{ \
+ struct mem_ctl_info *mci = to_mci(dev); \
+ struct octeon_lmc_pvt *pvt = mci->pvt_info; \
+ return sprintf(data, "%016llu\n", (u64)pvt->reg); \
+}
+
+#define TEMPLATE_STORE(reg) \
+static ssize_t octeon_mc_inject_##reg##_store(struct device *dev, \
+ struct device_attribute *attr, \
+ const char *data, size_t count) \
+{ \
+ struct mem_ctl_info *mci = to_mci(dev); \
+ struct octeon_lmc_pvt *pvt = mci->pvt_info; \
+ if (isdigit(*data)) { \
+ if (!kstrtoul(data, 0, &pvt->reg)) \
+ return count; \
+ } \
+ return 0; \
+}
+
+TEMPLATE_SHOW(inject);
+TEMPLATE_STORE(inject);
+TEMPLATE_SHOW(dimm);
+TEMPLATE_STORE(dimm);
+TEMPLATE_SHOW(bank);
+TEMPLATE_STORE(bank);
+TEMPLATE_SHOW(rank);
+TEMPLATE_STORE(rank);
+TEMPLATE_SHOW(row);
+TEMPLATE_STORE(row);
+TEMPLATE_SHOW(col);
+TEMPLATE_STORE(col);
+
+static ssize_t octeon_mc_inject_error_type_store(struct device *dev,
+ struct device_attribute *attr,
+ const char *data,
+ size_t count)
+{
+ struct mem_ctl_info *mci = to_mci(dev);
+ struct octeon_lmc_pvt *pvt = mci->pvt_info;
+
+ if (!strncmp(data, "single", 6))
+ pvt->error_type = 1;
+ else if (!strncmp(data, "double", 6))
+ pvt->error_type = 2;
+
+ return count;
+}
+
+static ssize_t octeon_mc_inject_error_type_show(struct device *dev,
+ struct device_attribute *attr,
+ char *data)
+{
+ struct mem_ctl_info *mci = to_mci(dev);
+ struct octeon_lmc_pvt *pvt = mci->pvt_info;
+ if (pvt->error_type == 1)
+ return sprintf(data, "single");
+ else if (pvt->error_type == 2)
+ return sprintf(data, "double");
+
+ return 0;
+}
+
+static DEVICE_ATTR(inject, S_IRUGO | S_IWUSR,
+ octeon_mc_inject_inject_show, octeon_mc_inject_inject_store);
+static DEVICE_ATTR(error_type, S_IRUGO | S_IWUSR,
+ octeon_mc_inject_error_type_show, octeon_mc_inject_error_type_store);
+static DEVICE_ATTR(dimm, S_IRUGO | S_IWUSR,
+ octeon_mc_inject_dimm_show, octeon_mc_inject_dimm_store);
+static DEVICE_ATTR(rank, S_IRUGO | S_IWUSR,
+ octeon_mc_inject_rank_show, octeon_mc_inject_rank_store);
+static DEVICE_ATTR(bank, S_IRUGO | S_IWUSR,
+ octeon_mc_inject_bank_show, octeon_mc_inject_bank_store);
+static DEVICE_ATTR(row, S_IRUGO | S_IWUSR,
+ octeon_mc_inject_row_show, octeon_mc_inject_row_store);
+static DEVICE_ATTR(col, S_IRUGO | S_IWUSR,
+ octeon_mc_inject_col_show, octeon_mc_inject_col_store);
+
+
+static int octeon_set_mc_sysfs_attributes(struct mem_ctl_info *mci)
+{
+ int rc;
+
+ rc = device_create_file(&mci->dev, &dev_attr_inject);
+ if (rc < 0)
+ return rc;
+ rc = device_create_file(&mci->dev, &dev_attr_error_type);
+ if (rc < 0)
+ return rc;
+ rc = device_create_file(&mci->dev, &dev_attr_dimm);
+ if (rc < 0)
+ return rc;
+ rc = device_create_file(&mci->dev, &dev_attr_rank);
+ if (rc < 0)
+ return rc;
+ rc = device_create_file(&mci->dev, &dev_attr_bank);
+ if (rc < 0)
+ return rc;
+ rc = device_create_file(&mci->dev, &dev_attr_row);
+ if (rc < 0)
+ return rc;
+ rc = device_create_file(&mci->dev, &dev_attr_col);
+ if (rc < 0)
+ return rc;
+
+ return 0;
+}
+
+static int octeon_lmc_edac_probe(struct platform_device *pdev)
+{
+ struct mem_ctl_info *mci;
+ struct edac_mc_layer layers[1];
+ int mc = pdev->id;
+
+ opstate_init();
+
+ layers[0].type = EDAC_MC_LAYER_CHANNEL;
+ layers[0].size = 1;
+ layers[0].is_virt_csrow = false;
+
+ if (OCTEON_IS_MODEL(OCTEON_FAM_1_PLUS)) {
+ union cvmx_lmcx_mem_cfg0 cfg0;
+
+ cfg0.u64 = cvmx_read_csr(CVMX_LMCX_MEM_CFG0(0));
+ if (!cfg0.s.ecc_ena) {
+ dev_info(&pdev->dev, "Disabled (ECC not enabled)\n");
+ return 0;
+ }
+
+ mci = edac_mc_alloc(mc, ARRAY_SIZE(layers), layers, sizeof(struct octeon_lmc_pvt));
+ if (!mci)
+ return -ENXIO;
+
+ mci->pdev = &pdev->dev;
+ mci->dev_name = dev_name(&pdev->dev);
+
+ mci->mod_name = "octeon-lmc";
+ mci->ctl_name = "octeon-lmc-err";
+ mci->edac_check = octeon_lmc_edac_poll;
+
+ if (edac_mc_add_mc(mci)) {
+ dev_err(&pdev->dev, "edac_mc_add_mc() failed\n");
+ edac_mc_free(mci);
+ return -ENXIO;
+ }
+
+ if (octeon_set_mc_sysfs_attributes(mci)) {
+ dev_err(&pdev->dev, "octeon_set_mc_sysfs_attributes() failed\n");
+ return -ENXIO;
+ }
+
+
+ cfg0.u64 = cvmx_read_csr(CVMX_LMCX_MEM_CFG0(mc));
+ cfg0.s.intr_ded_ena = 0; /* We poll */
+ cfg0.s.intr_sec_ena = 0;
+ cvmx_write_csr(CVMX_LMCX_MEM_CFG0(mc), cfg0.u64);
+ } else {
+ /* OCTEON II */
+ union cvmx_lmcx_int_en en;
+ union cvmx_lmcx_config config;
+
+ config.u64 = cvmx_read_csr(CVMX_LMCX_CONFIG(0));
+ if (!config.s.ecc_ena) {
+ dev_info(&pdev->dev, "Disabled (ECC not enabled)\n");
+ return 0;
+ }
+
+ mci = edac_mc_alloc(mc, ARRAY_SIZE(layers), layers, sizeof(struct octeon_lmc_pvt));
+ if (!mci)
+ return -ENXIO;
+
+ mci->pdev = &pdev->dev;
+ mci->dev_name = dev_name(&pdev->dev);
+
+ mci->mod_name = "octeon-lmc";
+ mci->ctl_name = "co_lmc_err";
+ mci->edac_check = octeon_lmc_edac_poll_o2;
+
+ if (edac_mc_add_mc(mci)) {
+ dev_err(&pdev->dev, "edac_mc_add_mc() failed\n");
+ edac_mc_free(mci);
+ return -ENXIO;
+ }
+
+ if (octeon_set_mc_sysfs_attributes(mci)) {
+ dev_err(&pdev->dev, "octeon_set_mc_sysfs_attributes() failed\n");
+ return -ENXIO;
+ }
+
+
+ en.u64 = cvmx_read_csr(CVMX_LMCX_MEM_CFG0(mc));
+ en.s.intr_ded_ena = 0; /* We poll */
+ en.s.intr_sec_ena = 0;
+ cvmx_write_csr(CVMX_LMCX_MEM_CFG0(mc), en.u64);
+ }
+ platform_set_drvdata(pdev, mci);
+
+ return 0;
+}
+
+static int octeon_lmc_edac_remove(struct platform_device *pdev)
+{
+ struct mem_ctl_info *mci = platform_get_drvdata(pdev);
+
+ edac_mc_del_mc(&pdev->dev);
+ edac_mc_free(mci);
+ return 0;
+}
+
+static struct platform_driver octeon_lmc_edac_driver = {
+ .probe = octeon_lmc_edac_probe,
+ .remove = octeon_lmc_edac_remove,
+ .driver = {
+ .name = "octeon_lmc_edac",
+ }
+};
+module_platform_driver(octeon_lmc_edac_driver);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Ralf Baechle <ralf@linux-mips.org>");
diff --git a/drivers/edac/octeon_edac-pc.c b/drivers/edac/octeon_edac-pc.c
new file mode 100644
index 00000000000..0f83c33a7d1
--- /dev/null
+++ b/drivers/edac/octeon_edac-pc.c
@@ -0,0 +1,143 @@
+/*
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License. See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * Copyright (C) 2012 Cavium, Inc.
+ *
+ * Copyright (C) 2009 Wind River Systems,
+ * written by Ralf Baechle <ralf@linux-mips.org>
+ */
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/slab.h>
+#include <linux/interrupt.h>
+#include <linux/io.h>
+#include <linux/edac.h>
+
+#include "edac_core.h"
+#include "edac_module.h"
+
+#include <asm/octeon/cvmx.h>
+#include <asm/mipsregs.h>
+
+extern int register_co_cache_error_notifier(struct notifier_block *nb);
+extern int unregister_co_cache_error_notifier(struct notifier_block *nb);
+
+extern unsigned long long cache_err_dcache[NR_CPUS];
+
+struct co_cache_error {
+ struct notifier_block notifier;
+ struct edac_device_ctl_info *ed;
+};
+
+/**
+ * EDAC CPU cache error callback
+ *
+ * @event: non-zero if unrecoverable.
+ */
+static int co_cache_error_event(struct notifier_block *this,
+ unsigned long event, void *ptr)
+{
+ struct co_cache_error *p = container_of(this, struct co_cache_error,
+ notifier);
+
+ unsigned int core = cvmx_get_core_num();
+ unsigned int cpu = smp_processor_id();
+ u64 icache_err = read_octeon_c0_icacheerr();
+ u64 dcache_err;
+
+ if (event) {
+ dcache_err = cache_err_dcache[core];
+ cache_err_dcache[core] = 0;
+ } else {
+ dcache_err = read_octeon_c0_dcacheerr();
+ }
+
+ if (icache_err & 1) {
+ edac_device_printk(p->ed, KERN_ERR,
+ "CacheErr (Icache):%llx, core %d/cpu %d, cp0_errorepc == %lx\n",
+ (unsigned long long)icache_err, core, cpu,
+ read_c0_errorepc());
+ write_octeon_c0_icacheerr(0);
+ edac_device_handle_ce(p->ed, cpu, 1, "icache");
+ }
+ if (dcache_err & 1) {
+ edac_device_printk(p->ed, KERN_ERR,
+ "CacheErr (Dcache):%llx, core %d/cpu %d, cp0_errorepc == %lx\n",
+ (unsigned long long)dcache_err, core, cpu,
+ read_c0_errorepc());
+ if (event)
+ edac_device_handle_ue(p->ed, cpu, 0, "dcache");
+ else
+ edac_device_handle_ce(p->ed, cpu, 0, "dcache");
+
+ /* Clear the error indication */
+ if (OCTEON_IS_MODEL(OCTEON_FAM_2))
+ write_octeon_c0_dcacheerr(1);
+ else
+ write_octeon_c0_dcacheerr(0);
+ }
+
+ return NOTIFY_STOP;
+}
+
+static int co_cache_error_probe(struct platform_device *pdev)
+{
+ struct co_cache_error *p = devm_kzalloc(&pdev->dev, sizeof(*p),
+ GFP_KERNEL);
+ if (!p)
+ return -ENOMEM;
+
+ p->notifier.notifier_call = co_cache_error_event;
+ platform_set_drvdata(pdev, p);
+
+ p->ed = edac_device_alloc_ctl_info(0, "cpu", num_possible_cpus(),
+ "cache", 2, 0, NULL, 0,
+ edac_device_alloc_index());
+ if (!p->ed)
+ goto err;
+
+ p->ed->dev = &pdev->dev;
+
+ p->ed->dev_name = dev_name(&pdev->dev);
+
+ p->ed->mod_name = "octeon-cpu";
+ p->ed->ctl_name = "cache";
+
+ if (edac_device_add_device(p->ed)) {
+ pr_err("%s: edac_device_add_device() failed\n", __func__);
+ goto err1;
+ }
+
+ register_co_cache_error_notifier(&p->notifier);
+
+ return 0;
+
+err1:
+ edac_device_free_ctl_info(p->ed);
+err:
+ return -ENXIO;
+}
+
+static int co_cache_error_remove(struct platform_device *pdev)
+{
+ struct co_cache_error *p = platform_get_drvdata(pdev);
+
+ unregister_co_cache_error_notifier(&p->notifier);
+ edac_device_del_device(&pdev->dev);
+ edac_device_free_ctl_info(p->ed);
+ return 0;
+}
+
+static struct platform_driver co_cache_error_driver = {
+ .probe = co_cache_error_probe,
+ .remove = co_cache_error_remove,
+ .driver = {
+ .name = "octeon_pc_edac",
+ }
+};
+module_platform_driver(co_cache_error_driver);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Ralf Baechle <ralf@linux-mips.org>");
diff --git a/drivers/edac/octeon_edac-pci.c b/drivers/edac/octeon_edac-pci.c
new file mode 100644
index 00000000000..9ca73cec74e
--- /dev/null
+++ b/drivers/edac/octeon_edac-pci.c
@@ -0,0 +1,111 @@
+/*
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License. See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * Copyright (C) 2012 Cavium, Inc.
+ * Copyright (C) 2009 Wind River Systems,
+ * written by Ralf Baechle <ralf@linux-mips.org>
+ */
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/slab.h>
+#include <linux/io.h>
+#include <linux/edac.h>
+
+#include <asm/octeon/cvmx.h>
+#include <asm/octeon/cvmx-npi-defs.h>
+#include <asm/octeon/cvmx-pci-defs.h>
+#include <asm/octeon/octeon.h>
+
+#include "edac_core.h"
+#include "edac_module.h"
+
+static void octeon_pci_poll(struct edac_pci_ctl_info *pci)
+{
+ union cvmx_pci_cfg01 cfg01;
+
+ cfg01.u32 = octeon_npi_read32(CVMX_NPI_PCI_CFG01);
+ if (cfg01.s.dpe) { /* Detected parity error */
+ edac_pci_handle_pe(pci, pci->ctl_name);
+ cfg01.s.dpe = 1; /* Reset */
+ octeon_npi_write32(CVMX_NPI_PCI_CFG01, cfg01.u32);
+ }
+ if (cfg01.s.sse) {
+ edac_pci_handle_npe(pci, "Signaled System Error");
+ cfg01.s.sse = 1; /* Reset */
+ octeon_npi_write32(CVMX_NPI_PCI_CFG01, cfg01.u32);
+ }
+ if (cfg01.s.rma) {
+ edac_pci_handle_npe(pci, "Received Master Abort");
+ cfg01.s.rma = 1; /* Reset */
+ octeon_npi_write32(CVMX_NPI_PCI_CFG01, cfg01.u32);
+ }
+ if (cfg01.s.rta) {
+ edac_pci_handle_npe(pci, "Received Target Abort");
+ cfg01.s.rta = 1; /* Reset */
+ octeon_npi_write32(CVMX_NPI_PCI_CFG01, cfg01.u32);
+ }
+ if (cfg01.s.sta) {
+ edac_pci_handle_npe(pci, "Signaled Target Abort");
+ cfg01.s.sta = 1; /* Reset */
+ octeon_npi_write32(CVMX_NPI_PCI_CFG01, cfg01.u32);
+ }
+ if (cfg01.s.mdpe) {
+ edac_pci_handle_npe(pci, "Master Data Parity Error");
+ cfg01.s.mdpe = 1; /* Reset */
+ octeon_npi_write32(CVMX_NPI_PCI_CFG01, cfg01.u32);
+ }
+}
+
+static int octeon_pci_probe(struct platform_device *pdev)
+{
+ struct edac_pci_ctl_info *pci;
+ int res = 0;
+
+ pci = edac_pci_alloc_ctl_info(0, "octeon_pci_err");
+ if (!pci)
+ return -ENOMEM;
+
+ pci->dev = &pdev->dev;
+ platform_set_drvdata(pdev, pci);
+ pci->dev_name = dev_name(&pdev->dev);
+
+ pci->mod_name = "octeon-pci";
+ pci->ctl_name = "octeon_pci_err";
+ pci->edac_check = octeon_pci_poll;
+
+ if (edac_pci_add_device(pci, 0) > 0) {
+ pr_err("%s: edac_pci_add_device() failed\n", __func__);
+ goto err;
+ }
+
+ return 0;
+
+err:
+ edac_pci_free_ctl_info(pci);
+
+ return res;
+}
+
+static int octeon_pci_remove(struct platform_device *pdev)
+{
+ struct edac_pci_ctl_info *pci = platform_get_drvdata(pdev);
+
+ edac_pci_del_device(&pdev->dev);
+ edac_pci_free_ctl_info(pci);
+
+ return 0;
+}
+
+static struct platform_driver octeon_pci_driver = {
+ .probe = octeon_pci_probe,
+ .remove = octeon_pci_remove,
+ .driver = {
+ .name = "octeon_pci_edac",
+ }
+};
+module_platform_driver(octeon_pci_driver);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Ralf Baechle <ralf@linux-mips.org>");
diff --git a/drivers/edac/pasemi_edac.c b/drivers/edac/pasemi_edac.c
index 7f71ee43674..9c971b57553 100644
--- a/drivers/edac/pasemi_edac.c
+++ b/drivers/edac/pasemi_edac.c
@@ -74,7 +74,7 @@ static int system_mmc_id;
static u32 pasemi_edac_get_error_info(struct mem_ctl_info *mci)
{
- struct pci_dev *pdev = to_pci_dev(mci->dev);
+ struct pci_dev *pdev = to_pci_dev(mci->pdev);
u32 tmp;
pci_read_config_dword(pdev, MCDEBUG_ERRSTA,
@@ -95,7 +95,7 @@ static u32 pasemi_edac_get_error_info(struct mem_ctl_info *mci)
static void pasemi_edac_process_error_info(struct mem_ctl_info *mci, u32 errsta)
{
- struct pci_dev *pdev = to_pci_dev(mci->dev);
+ struct pci_dev *pdev = to_pci_dev(mci->pdev);
u32 errlog1a;
u32 cs;
@@ -110,15 +110,16 @@ static void pasemi_edac_process_error_info(struct mem_ctl_info *mci, u32 errsta)
/* uncorrectable/multi-bit errors */
if (errsta & (MCDEBUG_ERRSTA_MBE_STATUS |
MCDEBUG_ERRSTA_RFL_STATUS)) {
- edac_mc_handle_ue(mci, mci->csrows[cs].first_page, 0,
- cs, mci->ctl_name);
+ edac_mc_handle_error(HW_EVENT_ERR_UNCORRECTED, mci, 1,
+ mci->csrows[cs]->first_page, 0, 0,
+ cs, 0, -1, mci->ctl_name, "");
}
/* correctable/single-bit errors */
- if (errsta & MCDEBUG_ERRSTA_SBE_STATUS) {
- edac_mc_handle_ce(mci, mci->csrows[cs].first_page, 0,
- 0, cs, 0, mci->ctl_name);
- }
+ if (errsta & MCDEBUG_ERRSTA_SBE_STATUS)
+ edac_mc_handle_error(HW_EVENT_ERR_CORRECTED, mci, 1,
+ mci->csrows[cs]->first_page, 0, 0,
+ cs, 0, -1, mci->ctl_name, "");
}
static void pasemi_edac_check(struct mem_ctl_info *mci)
@@ -135,11 +136,13 @@ static int pasemi_edac_init_csrows(struct mem_ctl_info *mci,
enum edac_type edac_mode)
{
struct csrow_info *csrow;
+ struct dimm_info *dimm;
u32 rankcfg;
int index;
for (index = 0; index < mci->nr_csrows; index++) {
- csrow = &mci->csrows[index];
+ csrow = mci->csrows[index];
+ dimm = csrow->channels[0]->dimm;
pci_read_config_dword(pdev,
MCDRAM_RANKCFG + (index * 12),
@@ -151,20 +154,20 @@ static int pasemi_edac_init_csrows(struct mem_ctl_info *mci,
switch ((rankcfg & MCDRAM_RANKCFG_TYPE_SIZE_M) >>
MCDRAM_RANKCFG_TYPE_SIZE_S) {
case 0:
- csrow->nr_pages = 128 << (20 - PAGE_SHIFT);
+ dimm->nr_pages = 128 << (20 - PAGE_SHIFT);
break;
case 1:
- csrow->nr_pages = 256 << (20 - PAGE_SHIFT);
+ dimm->nr_pages = 256 << (20 - PAGE_SHIFT);
break;
case 2:
case 3:
- csrow->nr_pages = 512 << (20 - PAGE_SHIFT);
+ dimm->nr_pages = 512 << (20 - PAGE_SHIFT);
break;
case 4:
- csrow->nr_pages = 1024 << (20 - PAGE_SHIFT);
+ dimm->nr_pages = 1024 << (20 - PAGE_SHIFT);
break;
case 5:
- csrow->nr_pages = 2048 << (20 - PAGE_SHIFT);
+ dimm->nr_pages = 2048 << (20 - PAGE_SHIFT);
break;
default:
edac_mc_printk(mci, KERN_ERR,
@@ -174,21 +177,22 @@ static int pasemi_edac_init_csrows(struct mem_ctl_info *mci,
}
csrow->first_page = last_page_in_mmc;
- csrow->last_page = csrow->first_page + csrow->nr_pages - 1;
- last_page_in_mmc += csrow->nr_pages;
+ csrow->last_page = csrow->first_page + dimm->nr_pages - 1;
+ last_page_in_mmc += dimm->nr_pages;
csrow->page_mask = 0;
- csrow->grain = PASEMI_EDAC_ERROR_GRAIN;
- csrow->mtype = MEM_DDR;
- csrow->dtype = DEV_UNKNOWN;
- csrow->edac_mode = edac_mode;
+ dimm->grain = PASEMI_EDAC_ERROR_GRAIN;
+ dimm->mtype = MEM_DDR;
+ dimm->dtype = DEV_UNKNOWN;
+ dimm->edac_mode = edac_mode;
}
return 0;
}
-static int __devinit pasemi_edac_probe(struct pci_dev *pdev,
- const struct pci_device_id *ent)
+static int pasemi_edac_probe(struct pci_dev *pdev,
+ const struct pci_device_id *ent)
{
struct mem_ctl_info *mci = NULL;
+ struct edac_mc_layer layers[2];
u32 errctl1, errcor, scrub, mcen;
pci_read_config_dword(pdev, MCCFG_MCEN, &mcen);
@@ -205,9 +209,14 @@ static int __devinit pasemi_edac_probe(struct pci_dev *pdev,
MCDEBUG_ERRCTL1_RFL_LOG_EN;
pci_write_config_dword(pdev, MCDEBUG_ERRCTL1, errctl1);
- mci = edac_mc_alloc(0, PASEMI_EDAC_NR_CSROWS, PASEMI_EDAC_NR_CHANS,
- system_mmc_id++);
-
+ layers[0].type = EDAC_MC_LAYER_CHIP_SELECT;
+ layers[0].size = PASEMI_EDAC_NR_CSROWS;
+ layers[0].is_virt_csrow = true;
+ layers[1].type = EDAC_MC_LAYER_CHANNEL;
+ layers[1].size = PASEMI_EDAC_NR_CHANS;
+ layers[1].is_virt_csrow = false;
+ mci = edac_mc_alloc(system_mmc_id++, ARRAY_SIZE(layers), layers,
+ 0);
if (mci == NULL)
return -ENOMEM;
@@ -216,7 +225,7 @@ static int __devinit pasemi_edac_probe(struct pci_dev *pdev,
MCCFG_ERRCOR_ECC_GEN_EN |
MCCFG_ERRCOR_ECC_CRR_EN;
- mci->dev = &pdev->dev;
+ mci->pdev = &pdev->dev;
mci->mtype_cap = MEM_FLAG_DDR | MEM_FLAG_RDDR;
mci->edac_ctl_cap = EDAC_FLAG_NONE | EDAC_FLAG_EC | EDAC_FLAG_SECDED;
mci->edac_cap = (errcor & MCCFG_ERRCOR_ECC_GEN_EN) ?
@@ -257,7 +266,7 @@ fail:
return -ENODEV;
}
-static void __devexit pasemi_edac_remove(struct pci_dev *pdev)
+static void pasemi_edac_remove(struct pci_dev *pdev)
{
struct mem_ctl_info *mci = edac_mc_del_mc(&pdev->dev);
@@ -278,7 +287,7 @@ MODULE_DEVICE_TABLE(pci, pasemi_edac_pci_tbl);
static struct pci_driver pasemi_edac_driver = {
.name = MODULE_NAME,
.probe = pasemi_edac_probe,
- .remove = __devexit_p(pasemi_edac_remove),
+ .remove = pasemi_edac_remove,
.id_table = pasemi_edac_pci_tbl,
};
diff --git a/drivers/edac/ppc4xx_edac.c b/drivers/edac/ppc4xx_edac.c
index 070cea41b66..ef6b7e08f48 100644
--- a/drivers/edac/ppc4xx_edac.c
+++ b/drivers/edac/ppc4xx_edac.c
@@ -113,7 +113,7 @@
#define EDAC_OPSTATE_UNKNOWN_STR "unknown"
#define PPC4XX_EDAC_MODULE_NAME "ppc4xx_edac"
-#define PPC4XX_EDAC_MODULE_REVISION "v1.0.0 " __DATE__
+#define PPC4XX_EDAC_MODULE_REVISION "v1.0.0"
#define PPC4XX_EDAC_MESSAGE_SIZE 256
@@ -142,7 +142,7 @@
/*
* The ibm,sdram-4xx-ddr2 Device Control Registers (DCRs) are
- * indirectly acccessed and have a base and length defined by the
+ * indirectly accessed and have a base and length defined by the
* device tree. The base can be anything; however, we expect the
* length to be precisely two registers, the first for the address
* window and the second for the data window.
@@ -184,8 +184,7 @@ struct ppc4xx_ecc_status {
/* Function Prototypes */
-static int ppc4xx_edac_probe(struct platform_device *device,
- const struct of_device_id *device_id);
+static int ppc4xx_edac_probe(struct platform_device *device);
static int ppc4xx_edac_remove(struct platform_device *device);
/* Global Variables */
@@ -201,12 +200,12 @@ static struct of_device_id ppc4xx_edac_match[] = {
{ }
};
-static struct of_platform_driver ppc4xx_edac_driver = {
+static struct platform_driver ppc4xx_edac_driver = {
.probe = ppc4xx_edac_probe,
.remove = ppc4xx_edac_remove,
.driver = {
.owner = THIS_MODULE,
- .name = PPC4XX_EDAC_MODULE_NAME
+ .name = PPC4XX_EDAC_MODULE_NAME,
.of_match_table = ppc4xx_edac_match,
},
};
@@ -728,7 +727,10 @@ ppc4xx_edac_handle_ce(struct mem_ctl_info *mci,
for (row = 0; row < mci->nr_csrows; row++)
if (ppc4xx_edac_check_bank_error(status, row))
- edac_mc_handle_ce_no_info(mci, message);
+ edac_mc_handle_error(HW_EVENT_ERR_CORRECTED, mci, 1,
+ 0, 0, 0,
+ row, 0, -1,
+ message, "");
}
/**
@@ -756,7 +758,10 @@ ppc4xx_edac_handle_ue(struct mem_ctl_info *mci,
for (row = 0; row < mci->nr_csrows; row++)
if (ppc4xx_edac_check_bank_error(status, row))
- edac_mc_handle_ue(mci, page, offset, row, message);
+ edac_mc_handle_error(HW_EVENT_ERR_UNCORRECTED, mci, 1,
+ page, offset, 0,
+ row, 0, -1,
+ message, "");
}
/**
@@ -833,8 +838,7 @@ ppc4xx_edac_isr(int irq, void *dev_id)
*
* Returns a device type width enumeration.
*/
-static enum dev_type __devinit
-ppc4xx_edac_get_dtype(u32 mcopt1)
+static enum dev_type ppc4xx_edac_get_dtype(u32 mcopt1)
{
switch (mcopt1 & SDRAM_MCOPT1_WDTH_MASK) {
case SDRAM_MCOPT1_WDTH_16:
@@ -857,8 +861,7 @@ ppc4xx_edac_get_dtype(u32 mcopt1)
*
* Returns a memory type enumeration.
*/
-static enum mem_type __devinit
-ppc4xx_edac_get_mtype(u32 mcopt1)
+static enum mem_type ppc4xx_edac_get_mtype(u32 mcopt1)
{
bool rden = ((mcopt1 & SDRAM_MCOPT1_RDEN_MASK) == SDRAM_MCOPT1_RDEN);
@@ -873,7 +876,7 @@ ppc4xx_edac_get_mtype(u32 mcopt1)
}
/**
- * ppc4xx_edac_init_csrows - intialize driver instance rows
+ * ppc4xx_edac_init_csrows - initialize driver instance rows
* @mci: A pointer to the EDAC memory controller instance
* associated with the ibm,sdram-4xx-ddr2 controller for which
* the csrows (i.e. banks/ranks) are being initialized.
@@ -881,24 +884,22 @@ ppc4xx_edac_get_mtype(u32 mcopt1)
* currently set for the controller, from which bank width
* and memory typ information is derived.
*
- * This routine intializes the virtual "chip select rows" associated
+ * This routine initializes the virtual "chip select rows" associated
* with the EDAC memory controller instance. An ibm,sdram-4xx-ddr2
* controller bank/rank is mapped to a row.
*
* Returns 0 if OK; otherwise, -EINVAL if the memory bank size
* configuration cannot be determined.
*/
-static int __devinit
-ppc4xx_edac_init_csrows(struct mem_ctl_info *mci, u32 mcopt1)
+static int ppc4xx_edac_init_csrows(struct mem_ctl_info *mci, u32 mcopt1)
{
const struct ppc4xx_edac_pdata *pdata = mci->pvt_info;
int status = 0;
enum mem_type mtype;
enum dev_type dtype;
enum edac_type edac_mode;
- int row;
- u32 mbxcf, size;
- static u32 ppc4xx_last_page;
+ int row, j;
+ u32 mbxcf, size, nr_pages;
/* Establish the memory type and width */
@@ -949,7 +950,7 @@ ppc4xx_edac_init_csrows(struct mem_ctl_info *mci, u32 mcopt1)
case SDRAM_MBCF_SZ_2GB:
case SDRAM_MBCF_SZ_4GB:
case SDRAM_MBCF_SZ_8GB:
- csi->nr_pages = SDRAM_MBCF_SZ_TO_PAGES(size);
+ nr_pages = SDRAM_MBCF_SZ_TO_PAGES(size);
break;
default:
ppc4xx_edac_mc_printk(KERN_ERR, mci,
@@ -960,10 +961,6 @@ ppc4xx_edac_init_csrows(struct mem_ctl_info *mci, u32 mcopt1)
goto done;
}
- csi->first_page = ppc4xx_last_page;
- csi->last_page = csi->first_page + csi->nr_pages - 1;
- csi->page_mask = 0;
-
/*
* It's unclear exactly what grain should be set to
* here. The SDRAM_ECCES register allows resolution of
@@ -976,15 +973,17 @@ ppc4xx_edac_init_csrows(struct mem_ctl_info *mci, u32 mcopt1)
* possible values would be the PLB width (16), the
* page size (PAGE_SIZE) or the memory width (2 or 4).
*/
+ for (j = 0; j < csi->nr_channels; j++) {
+ struct dimm_info *dimm = csi->channels[j].dimm;
- csi->grain = 1;
-
- csi->mtype = mtype;
- csi->dtype = dtype;
+ dimm->nr_pages = nr_pages / csi->nr_channels;
+ dimm->grain = 1;
- csi->edac_mode = edac_mode;
+ dimm->mtype = mtype;
+ dimm->dtype = dtype;
- ppc4xx_last_page += csi->nr_pages;
+ dimm->edac_mode = edac_mode;
+ }
}
done:
@@ -992,14 +991,11 @@ ppc4xx_edac_init_csrows(struct mem_ctl_info *mci, u32 mcopt1)
}
/**
- * ppc4xx_edac_mc_init - intialize driver instance
+ * ppc4xx_edac_mc_init - initialize driver instance
* @mci: A pointer to the EDAC memory controller instance being
* initialized.
* @op: A pointer to the OpenFirmware device tree node associated
* with the controller this EDAC instance is bound to.
- * @match: A pointer to the OpenFirmware device tree match
- * information associated with the controller this EDAC instance
- * is bound to.
* @dcr_host: A pointer to the DCR data containing the DCR mapping
* for this controller instance.
* @mcopt1: The 32-bit Memory Controller Option 1 register value
@@ -1012,26 +1008,23 @@ ppc4xx_edac_init_csrows(struct mem_ctl_info *mci, u32 mcopt1)
*
* Returns 0 if OK; otherwise, < 0 on error.
*/
-static int __devinit
-ppc4xx_edac_mc_init(struct mem_ctl_info *mci,
- struct platform_device *op,
- const struct of_device_id *match,
- const dcr_host_t *dcr_host,
- u32 mcopt1)
+static int ppc4xx_edac_mc_init(struct mem_ctl_info *mci,
+ struct platform_device *op,
+ const dcr_host_t *dcr_host, u32 mcopt1)
{
int status = 0;
const u32 memcheck = (mcopt1 & SDRAM_MCOPT1_MCHK_MASK);
struct ppc4xx_edac_pdata *pdata = NULL;
const struct device_node *np = op->dev.of_node;
- if (match == NULL)
+ if (of_match_device(ppc4xx_edac_match, &op->dev) == NULL)
return -EINVAL;
/* Initial driver pointers and private data */
- mci->dev = &op->dev;
+ mci->pdev = &op->dev;
- dev_set_drvdata(mci->dev, mci);
+ dev_set_drvdata(mci->pdev, mci);
pdata = mci->pvt_info;
@@ -1073,7 +1066,7 @@ ppc4xx_edac_mc_init(struct mem_ctl_info *mci,
mci->mod_name = PPC4XX_EDAC_MODULE_NAME;
mci->mod_ver = PPC4XX_EDAC_MODULE_REVISION;
- mci->ctl_name = match->compatible,
+ mci->ctl_name = ppc4xx_edac_match->compatible,
mci->dev_name = np->full_name;
/* Initialize callbacks */
@@ -1107,8 +1100,8 @@ ppc4xx_edac_mc_init(struct mem_ctl_info *mci,
* Returns 0 if OK; otherwise, -ENODEV if the interrupts could not be
* mapped and assigned.
*/
-static int __devinit
-ppc4xx_edac_register_irq(struct platform_device *op, struct mem_ctl_info *mci)
+static int ppc4xx_edac_register_irq(struct platform_device *op,
+ struct mem_ctl_info *mci)
{
int status = 0;
int ded_irq, sec_irq;
@@ -1185,8 +1178,8 @@ ppc4xx_edac_register_irq(struct platform_device *op, struct mem_ctl_info *mci)
* Returns 0 if the DCRs were successfully mapped; otherwise, < 0 on
* error.
*/
-static int __devinit
-ppc4xx_edac_map_dcrs(const struct device_node *np, dcr_host_t *dcr_host)
+static int ppc4xx_edac_map_dcrs(const struct device_node *np,
+ dcr_host_t *dcr_host)
{
unsigned int dcr_base, dcr_len;
@@ -1227,9 +1220,6 @@ ppc4xx_edac_map_dcrs(const struct device_node *np, dcr_host_t *dcr_host)
* ppc4xx_edac_probe - check controller and bind driver
* @op: A pointer to the OpenFirmware device tree node associated
* with the controller being probed for driver binding.
- * @match: A pointer to the OpenFirmware device tree match
- * information associated with the controller being probed
- * for driver binding.
*
* This routine probes a specific ibm,sdram-4xx-ddr2 controller
* instance for binding with the driver.
@@ -1237,14 +1227,14 @@ ppc4xx_edac_map_dcrs(const struct device_node *np, dcr_host_t *dcr_host)
* Returns 0 if the controller instance was successfully bound to the
* driver; otherwise, < 0 on error.
*/
-static int __devinit
-ppc4xx_edac_probe(struct platform_device *op, const struct of_device_id *match)
+static int ppc4xx_edac_probe(struct platform_device *op)
{
int status = 0;
u32 mcopt1, memcheck;
dcr_host_t dcr_host;
const struct device_node *np = op->dev.of_node;
struct mem_ctl_info *mci = NULL;
+ struct edac_mc_layer layers[2];
static int ppc4xx_edac_instance;
/*
@@ -1290,12 +1280,14 @@ ppc4xx_edac_probe(struct platform_device *op, const struct of_device_id *match)
* controller instance and perform the appropriate
* initialization.
*/
-
- mci = edac_mc_alloc(sizeof(struct ppc4xx_edac_pdata),
- ppc4xx_edac_nr_csrows,
- ppc4xx_edac_nr_chans,
- ppc4xx_edac_instance);
-
+ layers[0].type = EDAC_MC_LAYER_CHIP_SELECT;
+ layers[0].size = ppc4xx_edac_nr_csrows;
+ layers[0].is_virt_csrow = true;
+ layers[1].type = EDAC_MC_LAYER_CHANNEL;
+ layers[1].size = ppc4xx_edac_nr_chans;
+ layers[1].is_virt_csrow = false;
+ mci = edac_mc_alloc(ppc4xx_edac_instance, ARRAY_SIZE(layers), layers,
+ sizeof(struct ppc4xx_edac_pdata));
if (mci == NULL) {
ppc4xx_edac_printk(KERN_ERR, "%s: "
"Failed to allocate EDAC MC instance!\n",
@@ -1304,7 +1296,7 @@ ppc4xx_edac_probe(struct platform_device *op, const struct of_device_id *match)
goto done;
}
- status = ppc4xx_edac_mc_init(mci, op, match, &dcr_host, mcopt1);
+ status = ppc4xx_edac_mc_init(mci, op, &dcr_host, mcopt1);
if (status) {
ppc4xx_edac_mc_printk(KERN_ERR, mci,
@@ -1337,7 +1329,7 @@ ppc4xx_edac_probe(struct platform_device *op, const struct of_device_id *match)
return 0;
fail1:
- edac_mc_del_mc(mci->dev);
+ edac_mc_del_mc(mci->pdev);
fail:
edac_mc_free(mci);
@@ -1371,7 +1363,7 @@ ppc4xx_edac_remove(struct platform_device *op)
dcr_unmap(pdata->dcr_host, SDRAM_DCR_RESOURCE_LEN);
- edac_mc_del_mc(mci->dev);
+ edac_mc_del_mc(mci->pdev);
edac_mc_free(mci);
return 0;
@@ -1421,7 +1413,7 @@ ppc4xx_edac_init(void)
ppc4xx_edac_opstate_init();
- return of_register_platform_driver(&ppc4xx_edac_driver);
+ return platform_driver_register(&ppc4xx_edac_driver);
}
/**
@@ -1434,7 +1426,7 @@ ppc4xx_edac_init(void)
static void __exit
ppc4xx_edac_exit(void)
{
- of_unregister_platform_driver(&ppc4xx_edac_driver);
+ platform_driver_unregister(&ppc4xx_edac_driver);
}
module_init(ppc4xx_edac_init);
diff --git a/drivers/edac/r82600_edac.c b/drivers/edac/r82600_edac.c
index 6a822c631ef..8f936bc7a01 100644
--- a/drivers/edac/r82600_edac.c
+++ b/drivers/edac/r82600_edac.c
@@ -22,7 +22,7 @@
#include <linux/edac.h>
#include "edac_core.h"
-#define R82600_REVISION " Ver: 2.0.2 " __DATE__
+#define R82600_REVISION " Ver: 2.0.2"
#define EDAC_MOD_STR "r82600_edac"
#define r82600_printk(level, fmt, arg...) \
@@ -120,7 +120,7 @@
* write 0=NOP
*/
-#define R82600_DRBA 0x60 /* + 0x60..0x63 SDRAM Row Boundry Address
+#define R82600_DRBA 0x60 /* + 0x60..0x63 SDRAM Row Boundary Address
* Registers
*
* 7:0 Address lines 30:24 - upper limit of
@@ -131,7 +131,7 @@ struct r82600_error_info {
u32 eapr;
};
-static unsigned int disable_hardware_scrub;
+static bool disable_hardware_scrub;
static struct edac_pci_ctl_info *r82600_pci;
@@ -140,7 +140,7 @@ static void r82600_get_error_info(struct mem_ctl_info *mci,
{
struct pci_dev *pdev;
- pdev = to_pci_dev(mci->dev);
+ pdev = to_pci_dev(mci->pdev);
pci_read_config_dword(pdev, R82600_EAP, &info->eapr);
if (info->eapr & BIT(0))
@@ -179,10 +179,11 @@ static int r82600_process_error_info(struct mem_ctl_info *mci,
error_found = 1;
if (handle_errors)
- edac_mc_handle_ce(mci, page, 0, /* not avail */
- syndrome,
- edac_mc_find_csrow_by_page(mci, page),
- 0, mci->ctl_name);
+ edac_mc_handle_error(HW_EVENT_ERR_CORRECTED, mci, 1,
+ page, 0, syndrome,
+ edac_mc_find_csrow_by_page(mci, page),
+ 0, -1,
+ mci->ctl_name, "");
}
if (info->eapr & BIT(1)) { /* UE? */
@@ -190,9 +191,11 @@ static int r82600_process_error_info(struct mem_ctl_info *mci,
if (handle_errors)
/* 82600 doesn't give enough info */
- edac_mc_handle_ue(mci, page, 0,
- edac_mc_find_csrow_by_page(mci, page),
- mci->ctl_name);
+ edac_mc_handle_error(HW_EVENT_ERR_UNCORRECTED, mci, 1,
+ page, 0, 0,
+ edac_mc_find_csrow_by_page(mci, page),
+ 0, -1,
+ mci->ctl_name, "");
}
return error_found;
@@ -202,7 +205,7 @@ static void r82600_check(struct mem_ctl_info *mci)
{
struct r82600_error_info info;
- debugf1("MC%d: %s()\n", mci->mc_idx, __func__);
+ edac_dbg(1, "MC%d\n", mci->mc_idx);
r82600_get_error_info(mci, &info);
r82600_process_error_info(mci, &info, 1);
}
@@ -216,8 +219,9 @@ static void r82600_init_csrows(struct mem_ctl_info *mci, struct pci_dev *pdev,
u8 dramcr)
{
struct csrow_info *csrow;
+ struct dimm_info *dimm;
int index;
- u8 drbar; /* SDRAM Row Boundry Address Register */
+ u8 drbar; /* SDRAM Row Boundary Address Register */
u32 row_high_limit, row_high_limit_last;
u32 reg_sdram, ecc_on, row_base;
@@ -226,18 +230,19 @@ static void r82600_init_csrows(struct mem_ctl_info *mci, struct pci_dev *pdev,
row_high_limit_last = 0;
for (index = 0; index < mci->nr_csrows; index++) {
- csrow = &mci->csrows[index];
+ csrow = mci->csrows[index];
+ dimm = csrow->channels[0]->dimm;
/* find the DRAM Chip Select Base address and mask */
pci_read_config_byte(pdev, R82600_DRBA + index, &drbar);
- debugf1("%s() Row=%d DRBA = %#0x\n", __func__, index, drbar);
+ edac_dbg(1, "Row=%d DRBA = %#0x\n", index, drbar);
row_high_limit = ((u32) drbar << 24);
/* row_high_limit = ((u32)drbar << 24) | 0xffffffUL; */
- debugf1("%s() Row=%d, Boundry Address=%#0x, Last = %#0x\n",
- __func__, index, row_high_limit, row_high_limit_last);
+ edac_dbg(1, "Row=%d, Boundary Address=%#0x, Last = %#0x\n",
+ index, row_high_limit, row_high_limit_last);
/* Empty row [p.57] */
if (row_high_limit == row_high_limit_last)
@@ -247,16 +252,17 @@ static void r82600_init_csrows(struct mem_ctl_info *mci, struct pci_dev *pdev,
csrow->first_page = row_base >> PAGE_SHIFT;
csrow->last_page = (row_high_limit >> PAGE_SHIFT) - 1;
- csrow->nr_pages = csrow->last_page - csrow->first_page + 1;
+
+ dimm->nr_pages = csrow->last_page - csrow->first_page + 1;
/* Error address is top 19 bits - so granularity is *
* 14 bits */
- csrow->grain = 1 << 14;
- csrow->mtype = reg_sdram ? MEM_RDDR : MEM_DDR;
+ dimm->grain = 1 << 14;
+ dimm->mtype = reg_sdram ? MEM_RDDR : MEM_DDR;
/* FIXME - check that this is unknowable with this chipset */
- csrow->dtype = DEV_UNKNOWN;
+ dimm->dtype = DEV_UNKNOWN;
/* Mode is global on 82600 */
- csrow->edac_mode = ecc_on ? EDAC_SECDED : EDAC_NONE;
+ dimm->edac_mode = ecc_on ? EDAC_SECDED : EDAC_NONE;
row_high_limit_last = row_high_limit;
}
}
@@ -264,27 +270,32 @@ static void r82600_init_csrows(struct mem_ctl_info *mci, struct pci_dev *pdev,
static int r82600_probe1(struct pci_dev *pdev, int dev_idx)
{
struct mem_ctl_info *mci;
+ struct edac_mc_layer layers[2];
u8 dramcr;
u32 eapr;
u32 scrub_disabled;
u32 sdram_refresh_rate;
struct r82600_error_info discard;
- debugf0("%s()\n", __func__);
+ edac_dbg(0, "\n");
pci_read_config_byte(pdev, R82600_DRAMC, &dramcr);
pci_read_config_dword(pdev, R82600_EAP, &eapr);
scrub_disabled = eapr & BIT(31);
sdram_refresh_rate = dramcr & (BIT(0) | BIT(1));
- debugf2("%s(): sdram refresh rate = %#0x\n", __func__,
- sdram_refresh_rate);
- debugf2("%s(): DRAMC register = %#0x\n", __func__, dramcr);
- mci = edac_mc_alloc(0, R82600_NR_CSROWS, R82600_NR_CHANS, 0);
-
+ edac_dbg(2, "sdram refresh rate = %#0x\n", sdram_refresh_rate);
+ edac_dbg(2, "DRAMC register = %#0x\n", dramcr);
+ layers[0].type = EDAC_MC_LAYER_CHIP_SELECT;
+ layers[0].size = R82600_NR_CSROWS;
+ layers[0].is_virt_csrow = true;
+ layers[1].type = EDAC_MC_LAYER_CHANNEL;
+ layers[1].size = R82600_NR_CHANS;
+ layers[1].is_virt_csrow = false;
+ mci = edac_mc_alloc(0, ARRAY_SIZE(layers), layers, 0);
if (mci == NULL)
return -ENOMEM;
- debugf0("%s(): mci = %p\n", __func__, mci);
- mci->dev = &pdev->dev;
+ edac_dbg(0, "mci = %p\n", mci);
+ mci->pdev = &pdev->dev;
mci->mtype_cap = MEM_FLAG_RDDR | MEM_FLAG_DDR;
mci->edac_ctl_cap = EDAC_FLAG_NONE | EDAC_FLAG_EC | EDAC_FLAG_SECDED;
/* FIXME try to work out if the chip leads have been used for COM2
@@ -299,8 +310,8 @@ static int r82600_probe1(struct pci_dev *pdev, int dev_idx)
if (ecc_enabled(dramcr)) {
if (scrub_disabled)
- debugf3("%s(): mci = %p - Scrubbing disabled! EAP: "
- "%#0x\n", __func__, mci, eapr);
+ edac_dbg(3, "mci = %p - Scrubbing disabled! EAP: %#0x\n",
+ mci, eapr);
} else
mci->edac_cap = EDAC_FLAG_NONE;
@@ -317,15 +328,14 @@ static int r82600_probe1(struct pci_dev *pdev, int dev_idx)
* type of memory controller. The ID is therefore hardcoded to 0.
*/
if (edac_mc_add_mc(mci)) {
- debugf3("%s(): failed edac_mc_add_mc()\n", __func__);
+ edac_dbg(3, "failed edac_mc_add_mc()\n");
goto fail;
}
/* get this far and it's successful */
if (disable_hardware_scrub) {
- debugf3("%s(): Disabling Hardware Scrub (scrub on error)\n",
- __func__);
+ edac_dbg(3, "Disabling Hardware Scrub (scrub on error)\n");
pci_write_bits32(pdev, R82600_EAP, BIT(31), BIT(31));
}
@@ -340,7 +350,7 @@ static int r82600_probe1(struct pci_dev *pdev, int dev_idx)
__func__);
}
- debugf3("%s(): success\n", __func__);
+ edac_dbg(3, "success\n");
return 0;
fail:
@@ -349,20 +359,20 @@ fail:
}
/* returns count (>= 0), or negative on error */
-static int __devinit r82600_init_one(struct pci_dev *pdev,
- const struct pci_device_id *ent)
+static int r82600_init_one(struct pci_dev *pdev,
+ const struct pci_device_id *ent)
{
- debugf0("%s()\n", __func__);
+ edac_dbg(0, "\n");
/* don't need to call pci_enable_device() */
return r82600_probe1(pdev, ent->driver_data);
}
-static void __devexit r82600_remove_one(struct pci_dev *pdev)
+static void r82600_remove_one(struct pci_dev *pdev)
{
struct mem_ctl_info *mci;
- debugf0("%s()\n", __func__);
+ edac_dbg(0, "\n");
if (r82600_pci)
edac_pci_release_generic_ctl(r82600_pci);
@@ -373,7 +383,7 @@ static void __devexit r82600_remove_one(struct pci_dev *pdev)
edac_mc_free(mci);
}
-static const struct pci_device_id r82600_pci_tbl[] __devinitdata = {
+static const struct pci_device_id r82600_pci_tbl[] = {
{
PCI_DEVICE(PCI_VENDOR_ID_RADISYS, R82600_BRIDGE_ID)
},
@@ -387,7 +397,7 @@ MODULE_DEVICE_TABLE(pci, r82600_pci_tbl);
static struct pci_driver r82600_driver = {
.name = EDAC_MOD_STR,
.probe = r82600_init_one,
- .remove = __devexit_p(r82600_remove_one),
+ .remove = r82600_remove_one,
.id_table = r82600_pci_tbl,
};
diff --git a/drivers/edac/sb_edac.c b/drivers/edac/sb_edac.c
new file mode 100644
index 00000000000..deea0dc9999
--- /dev/null
+++ b/drivers/edac/sb_edac.c
@@ -0,0 +1,2189 @@
+/* Intel Sandy Bridge -EN/-EP/-EX Memory Controller kernel module
+ *
+ * This driver supports the memory controllers found on the Intel
+ * processor family Sandy Bridge.
+ *
+ * This file may be distributed under the terms of the
+ * GNU General Public License version 2 only.
+ *
+ * Copyright (c) 2011 by:
+ * Mauro Carvalho Chehab
+ */
+
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/pci.h>
+#include <linux/pci_ids.h>
+#include <linux/slab.h>
+#include <linux/delay.h>
+#include <linux/edac.h>
+#include <linux/mmzone.h>
+#include <linux/smp.h>
+#include <linux/bitmap.h>
+#include <linux/math64.h>
+#include <asm/processor.h>
+#include <asm/mce.h>
+
+#include "edac_core.h"
+
+/* Static vars */
+static LIST_HEAD(sbridge_edac_list);
+static DEFINE_MUTEX(sbridge_edac_lock);
+static int probed;
+
+/*
+ * Alter this version for the module when modifications are made
+ */
+#define SBRIDGE_REVISION " Ver: 1.1.0 "
+#define EDAC_MOD_STR "sbridge_edac"
+
+/*
+ * Debug macros
+ */
+#define sbridge_printk(level, fmt, arg...) \
+ edac_printk(level, "sbridge", fmt, ##arg)
+
+#define sbridge_mc_printk(mci, level, fmt, arg...) \
+ edac_mc_chipset_printk(mci, level, "sbridge", fmt, ##arg)
+
+/*
+ * Get a bit field at register value <v>, from bit <lo> to bit <hi>
+ */
+#define GET_BITFIELD(v, lo, hi) \
+ (((v) & GENMASK_ULL(hi, lo)) >> (lo))
+
+/*
+ * sbridge Memory Controller Registers
+ */
+
+/*
+ * FIXME: For now, let's order by device function, as it makes
+ * easier for driver's development process. This table should be
+ * moved to pci_id.h when submitted upstream
+ */
+#define PCI_DEVICE_ID_INTEL_SBRIDGE_SAD0 0x3cf4 /* 12.6 */
+#define PCI_DEVICE_ID_INTEL_SBRIDGE_SAD1 0x3cf6 /* 12.7 */
+#define PCI_DEVICE_ID_INTEL_SBRIDGE_BR 0x3cf5 /* 13.6 */
+#define PCI_DEVICE_ID_INTEL_SBRIDGE_IMC_HA0 0x3ca0 /* 14.0 */
+#define PCI_DEVICE_ID_INTEL_SBRIDGE_IMC_TA 0x3ca8 /* 15.0 */
+#define PCI_DEVICE_ID_INTEL_SBRIDGE_IMC_RAS 0x3c71 /* 15.1 */
+#define PCI_DEVICE_ID_INTEL_SBRIDGE_IMC_TAD0 0x3caa /* 15.2 */
+#define PCI_DEVICE_ID_INTEL_SBRIDGE_IMC_TAD1 0x3cab /* 15.3 */
+#define PCI_DEVICE_ID_INTEL_SBRIDGE_IMC_TAD2 0x3cac /* 15.4 */
+#define PCI_DEVICE_ID_INTEL_SBRIDGE_IMC_TAD3 0x3cad /* 15.5 */
+#define PCI_DEVICE_ID_INTEL_SBRIDGE_IMC_DDRIO 0x3cb8 /* 17.0 */
+
+ /*
+ * Currently, unused, but will be needed in the future
+ * implementations, as they hold the error counters
+ */
+#define PCI_DEVICE_ID_INTEL_SBRIDGE_IMC_ERR0 0x3c72 /* 16.2 */
+#define PCI_DEVICE_ID_INTEL_SBRIDGE_IMC_ERR1 0x3c73 /* 16.3 */
+#define PCI_DEVICE_ID_INTEL_SBRIDGE_IMC_ERR2 0x3c76 /* 16.6 */
+#define PCI_DEVICE_ID_INTEL_SBRIDGE_IMC_ERR3 0x3c77 /* 16.7 */
+
+/* Devices 12 Function 6, Offsets 0x80 to 0xcc */
+static const u32 sbridge_dram_rule[] = {
+ 0x80, 0x88, 0x90, 0x98, 0xa0,
+ 0xa8, 0xb0, 0xb8, 0xc0, 0xc8,
+};
+
+static const u32 ibridge_dram_rule[] = {
+ 0x60, 0x68, 0x70, 0x78, 0x80,
+ 0x88, 0x90, 0x98, 0xa0, 0xa8,
+ 0xb0, 0xb8, 0xc0, 0xc8, 0xd0,
+ 0xd8, 0xe0, 0xe8, 0xf0, 0xf8,
+};
+
+#define SAD_LIMIT(reg) ((GET_BITFIELD(reg, 6, 25) << 26) | 0x3ffffff)
+#define DRAM_ATTR(reg) GET_BITFIELD(reg, 2, 3)
+#define INTERLEAVE_MODE(reg) GET_BITFIELD(reg, 1, 1)
+#define DRAM_RULE_ENABLE(reg) GET_BITFIELD(reg, 0, 0)
+
+static char *get_dram_attr(u32 reg)
+{
+ switch(DRAM_ATTR(reg)) {
+ case 0:
+ return "DRAM";
+ case 1:
+ return "MMCFG";
+ case 2:
+ return "NXM";
+ default:
+ return "unknown";
+ }
+}
+
+static const u32 sbridge_interleave_list[] = {
+ 0x84, 0x8c, 0x94, 0x9c, 0xa4,
+ 0xac, 0xb4, 0xbc, 0xc4, 0xcc,
+};
+
+static const u32 ibridge_interleave_list[] = {
+ 0x64, 0x6c, 0x74, 0x7c, 0x84,
+ 0x8c, 0x94, 0x9c, 0xa4, 0xac,
+ 0xb4, 0xbc, 0xc4, 0xcc, 0xd4,
+ 0xdc, 0xe4, 0xec, 0xf4, 0xfc,
+};
+
+struct interleave_pkg {
+ unsigned char start;
+ unsigned char end;
+};
+
+static const struct interleave_pkg sbridge_interleave_pkg[] = {
+ { 0, 2 },
+ { 3, 5 },
+ { 8, 10 },
+ { 11, 13 },
+ { 16, 18 },
+ { 19, 21 },
+ { 24, 26 },
+ { 27, 29 },
+};
+
+static const struct interleave_pkg ibridge_interleave_pkg[] = {
+ { 0, 3 },
+ { 4, 7 },
+ { 8, 11 },
+ { 12, 15 },
+ { 16, 19 },
+ { 20, 23 },
+ { 24, 27 },
+ { 28, 31 },
+};
+
+static inline int sad_pkg(const struct interleave_pkg *table, u32 reg,
+ int interleave)
+{
+ return GET_BITFIELD(reg, table[interleave].start,
+ table[interleave].end);
+}
+
+/* Devices 12 Function 7 */
+
+#define TOLM 0x80
+#define TOHM 0x84
+
+#define GET_TOLM(reg) ((GET_BITFIELD(reg, 0, 3) << 28) | 0x3ffffff)
+#define GET_TOHM(reg) ((GET_BITFIELD(reg, 0, 20) << 25) | 0x3ffffff)
+
+/* Device 13 Function 6 */
+
+#define SAD_TARGET 0xf0
+
+#define SOURCE_ID(reg) GET_BITFIELD(reg, 9, 11)
+
+#define SAD_CONTROL 0xf4
+
+#define NODE_ID(reg) GET_BITFIELD(reg, 0, 2)
+
+/* Device 14 function 0 */
+
+static const u32 tad_dram_rule[] = {
+ 0x40, 0x44, 0x48, 0x4c,
+ 0x50, 0x54, 0x58, 0x5c,
+ 0x60, 0x64, 0x68, 0x6c,
+};
+#define MAX_TAD ARRAY_SIZE(tad_dram_rule)
+
+#define TAD_LIMIT(reg) ((GET_BITFIELD(reg, 12, 31) << 26) | 0x3ffffff)
+#define TAD_SOCK(reg) GET_BITFIELD(reg, 10, 11)
+#define TAD_CH(reg) GET_BITFIELD(reg, 8, 9)
+#define TAD_TGT3(reg) GET_BITFIELD(reg, 6, 7)
+#define TAD_TGT2(reg) GET_BITFIELD(reg, 4, 5)
+#define TAD_TGT1(reg) GET_BITFIELD(reg, 2, 3)
+#define TAD_TGT0(reg) GET_BITFIELD(reg, 0, 1)
+
+/* Device 15, function 0 */
+
+#define MCMTR 0x7c
+
+#define IS_ECC_ENABLED(mcmtr) GET_BITFIELD(mcmtr, 2, 2)
+#define IS_LOCKSTEP_ENABLED(mcmtr) GET_BITFIELD(mcmtr, 1, 1)
+#define IS_CLOSE_PG(mcmtr) GET_BITFIELD(mcmtr, 0, 0)
+
+/* Device 15, function 1 */
+
+#define RASENABLES 0xac
+#define IS_MIRROR_ENABLED(reg) GET_BITFIELD(reg, 0, 0)
+
+/* Device 15, functions 2-5 */
+
+static const int mtr_regs[] = {
+ 0x80, 0x84, 0x88,
+};
+
+#define RANK_DISABLE(mtr) GET_BITFIELD(mtr, 16, 19)
+#define IS_DIMM_PRESENT(mtr) GET_BITFIELD(mtr, 14, 14)
+#define RANK_CNT_BITS(mtr) GET_BITFIELD(mtr, 12, 13)
+#define RANK_WIDTH_BITS(mtr) GET_BITFIELD(mtr, 2, 4)
+#define COL_WIDTH_BITS(mtr) GET_BITFIELD(mtr, 0, 1)
+
+static const u32 tad_ch_nilv_offset[] = {
+ 0x90, 0x94, 0x98, 0x9c,
+ 0xa0, 0xa4, 0xa8, 0xac,
+ 0xb0, 0xb4, 0xb8, 0xbc,
+};
+#define CHN_IDX_OFFSET(reg) GET_BITFIELD(reg, 28, 29)
+#define TAD_OFFSET(reg) (GET_BITFIELD(reg, 6, 25) << 26)
+
+static const u32 rir_way_limit[] = {
+ 0x108, 0x10c, 0x110, 0x114, 0x118,
+};
+#define MAX_RIR_RANGES ARRAY_SIZE(rir_way_limit)
+
+#define IS_RIR_VALID(reg) GET_BITFIELD(reg, 31, 31)
+#define RIR_WAY(reg) GET_BITFIELD(reg, 28, 29)
+#define RIR_LIMIT(reg) ((GET_BITFIELD(reg, 1, 10) << 29)| 0x1fffffff)
+
+#define MAX_RIR_WAY 8
+
+static const u32 rir_offset[MAX_RIR_RANGES][MAX_RIR_WAY] = {
+ { 0x120, 0x124, 0x128, 0x12c, 0x130, 0x134, 0x138, 0x13c },
+ { 0x140, 0x144, 0x148, 0x14c, 0x150, 0x154, 0x158, 0x15c },
+ { 0x160, 0x164, 0x168, 0x16c, 0x170, 0x174, 0x178, 0x17c },
+ { 0x180, 0x184, 0x188, 0x18c, 0x190, 0x194, 0x198, 0x19c },
+ { 0x1a0, 0x1a4, 0x1a8, 0x1ac, 0x1b0, 0x1b4, 0x1b8, 0x1bc },
+};
+
+#define RIR_RNK_TGT(reg) GET_BITFIELD(reg, 16, 19)
+#define RIR_OFFSET(reg) GET_BITFIELD(reg, 2, 14)
+
+/* Device 16, functions 2-7 */
+
+/*
+ * FIXME: Implement the error count reads directly
+ */
+
+static const u32 correrrcnt[] = {
+ 0x104, 0x108, 0x10c, 0x110,
+};
+
+#define RANK_ODD_OV(reg) GET_BITFIELD(reg, 31, 31)
+#define RANK_ODD_ERR_CNT(reg) GET_BITFIELD(reg, 16, 30)
+#define RANK_EVEN_OV(reg) GET_BITFIELD(reg, 15, 15)
+#define RANK_EVEN_ERR_CNT(reg) GET_BITFIELD(reg, 0, 14)
+
+static const u32 correrrthrsld[] = {
+ 0x11c, 0x120, 0x124, 0x128,
+};
+
+#define RANK_ODD_ERR_THRSLD(reg) GET_BITFIELD(reg, 16, 30)
+#define RANK_EVEN_ERR_THRSLD(reg) GET_BITFIELD(reg, 0, 14)
+
+
+/* Device 17, function 0 */
+
+#define SB_RANK_CFG_A 0x0328
+
+#define IB_RANK_CFG_A 0x0320
+
+#define IS_RDIMM_ENABLED(reg) GET_BITFIELD(reg, 11, 11)
+
+/*
+ * sbridge structs
+ */
+
+#define NUM_CHANNELS 4
+#define MAX_DIMMS 3 /* Max DIMMS per channel */
+
+enum type {
+ SANDY_BRIDGE,
+ IVY_BRIDGE,
+};
+
+struct sbridge_pvt;
+struct sbridge_info {
+ enum type type;
+ u32 mcmtr;
+ u32 rankcfgr;
+ u64 (*get_tolm)(struct sbridge_pvt *pvt);
+ u64 (*get_tohm)(struct sbridge_pvt *pvt);
+ const u32 *dram_rule;
+ const u32 *interleave_list;
+ const struct interleave_pkg *interleave_pkg;
+ u8 max_sad;
+ u8 max_interleave;
+};
+
+struct sbridge_channel {
+ u32 ranks;
+ u32 dimms;
+};
+
+struct pci_id_descr {
+ int dev;
+ int func;
+ int dev_id;
+ int optional;
+};
+
+struct pci_id_table {
+ const struct pci_id_descr *descr;
+ int n_devs;
+};
+
+struct sbridge_dev {
+ struct list_head list;
+ u8 bus, mc;
+ u8 node_id, source_id;
+ struct pci_dev **pdev;
+ int n_devs;
+ struct mem_ctl_info *mci;
+};
+
+struct sbridge_pvt {
+ struct pci_dev *pci_ta, *pci_ddrio, *pci_ras;
+ struct pci_dev *pci_sad0, *pci_sad1;
+ struct pci_dev *pci_ha0, *pci_ha1;
+ struct pci_dev *pci_br0, *pci_br1;
+ struct pci_dev *pci_tad[NUM_CHANNELS];
+
+ struct sbridge_dev *sbridge_dev;
+
+ struct sbridge_info info;
+ struct sbridge_channel channel[NUM_CHANNELS];
+
+ /* Memory type detection */
+ bool is_mirrored, is_lockstep, is_close_pg;
+
+ /* Fifo double buffers */
+ struct mce mce_entry[MCE_LOG_LEN];
+ struct mce mce_outentry[MCE_LOG_LEN];
+
+ /* Fifo in/out counters */
+ unsigned mce_in, mce_out;
+
+ /* Count indicator to show errors not got */
+ unsigned mce_overrun;
+
+ /* Memory description */
+ u64 tolm, tohm;
+};
+
+#define PCI_DESCR(device, function, device_id, opt) \
+ .dev = (device), \
+ .func = (function), \
+ .dev_id = (device_id), \
+ .optional = opt
+
+static const struct pci_id_descr pci_dev_descr_sbridge[] = {
+ /* Processor Home Agent */
+ { PCI_DESCR(14, 0, PCI_DEVICE_ID_INTEL_SBRIDGE_IMC_HA0, 0) },
+
+ /* Memory controller */
+ { PCI_DESCR(15, 0, PCI_DEVICE_ID_INTEL_SBRIDGE_IMC_TA, 0) },
+ { PCI_DESCR(15, 1, PCI_DEVICE_ID_INTEL_SBRIDGE_IMC_RAS, 0) },
+ { PCI_DESCR(15, 2, PCI_DEVICE_ID_INTEL_SBRIDGE_IMC_TAD0, 0) },
+ { PCI_DESCR(15, 3, PCI_DEVICE_ID_INTEL_SBRIDGE_IMC_TAD1, 0) },
+ { PCI_DESCR(15, 4, PCI_DEVICE_ID_INTEL_SBRIDGE_IMC_TAD2, 0) },
+ { PCI_DESCR(15, 5, PCI_DEVICE_ID_INTEL_SBRIDGE_IMC_TAD3, 0) },
+ { PCI_DESCR(17, 0, PCI_DEVICE_ID_INTEL_SBRIDGE_IMC_DDRIO, 1) },
+
+ /* System Address Decoder */
+ { PCI_DESCR(12, 6, PCI_DEVICE_ID_INTEL_SBRIDGE_SAD0, 0) },
+ { PCI_DESCR(12, 7, PCI_DEVICE_ID_INTEL_SBRIDGE_SAD1, 0) },
+
+ /* Broadcast Registers */
+ { PCI_DESCR(13, 6, PCI_DEVICE_ID_INTEL_SBRIDGE_BR, 0) },
+};
+
+#define PCI_ID_TABLE_ENTRY(A) { .descr=A, .n_devs = ARRAY_SIZE(A) }
+static const struct pci_id_table pci_dev_descr_sbridge_table[] = {
+ PCI_ID_TABLE_ENTRY(pci_dev_descr_sbridge),
+ {0,} /* 0 terminated list. */
+};
+
+/* This changes depending if 1HA or 2HA:
+ * 1HA:
+ * 0x0eb8 (17.0) is DDRIO0
+ * 2HA:
+ * 0x0ebc (17.4) is DDRIO0
+ */
+#define PCI_DEVICE_ID_INTEL_IBRIDGE_IMC_1HA_DDRIO0 0x0eb8
+#define PCI_DEVICE_ID_INTEL_IBRIDGE_IMC_2HA_DDRIO0 0x0ebc
+
+/* pci ids */
+#define PCI_DEVICE_ID_INTEL_IBRIDGE_IMC_HA0 0x0ea0
+#define PCI_DEVICE_ID_INTEL_IBRIDGE_IMC_HA0_TA 0x0ea8
+#define PCI_DEVICE_ID_INTEL_IBRIDGE_IMC_HA0_RAS 0x0e71
+#define PCI_DEVICE_ID_INTEL_IBRIDGE_IMC_HA0_TAD0 0x0eaa
+#define PCI_DEVICE_ID_INTEL_IBRIDGE_IMC_HA0_TAD1 0x0eab
+#define PCI_DEVICE_ID_INTEL_IBRIDGE_IMC_HA0_TAD2 0x0eac
+#define PCI_DEVICE_ID_INTEL_IBRIDGE_IMC_HA0_TAD3 0x0ead
+#define PCI_DEVICE_ID_INTEL_IBRIDGE_SAD 0x0ec8
+#define PCI_DEVICE_ID_INTEL_IBRIDGE_BR0 0x0ec9
+#define PCI_DEVICE_ID_INTEL_IBRIDGE_BR1 0x0eca
+#define PCI_DEVICE_ID_INTEL_IBRIDGE_IMC_HA1 0x0e60
+#define PCI_DEVICE_ID_INTEL_IBRIDGE_IMC_HA1_TA 0x0e68
+#define PCI_DEVICE_ID_INTEL_IBRIDGE_IMC_HA1_RAS 0x0e79
+#define PCI_DEVICE_ID_INTEL_IBRIDGE_IMC_HA1_TAD0 0x0e6a
+#define PCI_DEVICE_ID_INTEL_IBRIDGE_IMC_HA1_TAD1 0x0e6b
+
+static const struct pci_id_descr pci_dev_descr_ibridge[] = {
+ /* Processor Home Agent */
+ { PCI_DESCR(14, 0, PCI_DEVICE_ID_INTEL_IBRIDGE_IMC_HA0, 0) },
+
+ /* Memory controller */
+ { PCI_DESCR(15, 0, PCI_DEVICE_ID_INTEL_IBRIDGE_IMC_HA0_TA, 0) },
+ { PCI_DESCR(15, 1, PCI_DEVICE_ID_INTEL_IBRIDGE_IMC_HA0_RAS, 0) },
+ { PCI_DESCR(15, 2, PCI_DEVICE_ID_INTEL_IBRIDGE_IMC_HA0_TAD0, 0) },
+ { PCI_DESCR(15, 3, PCI_DEVICE_ID_INTEL_IBRIDGE_IMC_HA0_TAD1, 0) },
+ { PCI_DESCR(15, 4, PCI_DEVICE_ID_INTEL_IBRIDGE_IMC_HA0_TAD2, 0) },
+ { PCI_DESCR(15, 5, PCI_DEVICE_ID_INTEL_IBRIDGE_IMC_HA0_TAD3, 0) },
+
+ /* System Address Decoder */
+ { PCI_DESCR(22, 0, PCI_DEVICE_ID_INTEL_IBRIDGE_SAD, 0) },
+
+ /* Broadcast Registers */
+ { PCI_DESCR(22, 1, PCI_DEVICE_ID_INTEL_IBRIDGE_BR0, 1) },
+ { PCI_DESCR(22, 2, PCI_DEVICE_ID_INTEL_IBRIDGE_BR1, 0) },
+
+ /* Optional, mode 2HA */
+ { PCI_DESCR(28, 0, PCI_DEVICE_ID_INTEL_IBRIDGE_IMC_HA1, 1) },
+#if 0
+ { PCI_DESCR(29, 0, PCI_DEVICE_ID_INTEL_IBRIDGE_IMC_HA1_TA, 1) },
+ { PCI_DESCR(29, 1, PCI_DEVICE_ID_INTEL_IBRIDGE_IMC_HA1_RAS, 1) },
+#endif
+ { PCI_DESCR(29, 2, PCI_DEVICE_ID_INTEL_IBRIDGE_IMC_HA1_TAD0, 1) },
+ { PCI_DESCR(29, 3, PCI_DEVICE_ID_INTEL_IBRIDGE_IMC_HA1_TAD1, 1) },
+
+ { PCI_DESCR(17, 0, PCI_DEVICE_ID_INTEL_IBRIDGE_IMC_1HA_DDRIO0, 1) },
+ { PCI_DESCR(17, 4, PCI_DEVICE_ID_INTEL_IBRIDGE_IMC_2HA_DDRIO0, 1) },
+};
+
+static const struct pci_id_table pci_dev_descr_ibridge_table[] = {
+ PCI_ID_TABLE_ENTRY(pci_dev_descr_ibridge),
+ {0,} /* 0 terminated list. */
+};
+
+/*
+ * pci_device_id table for which devices we are looking for
+ */
+static const struct pci_device_id sbridge_pci_tbl[] = {
+ {PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_SBRIDGE_IMC_TA)},
+ {PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IBRIDGE_IMC_HA0_TA)},
+ {0,} /* 0 terminated list. */
+};
+
+
+/****************************************************************************
+ Ancillary status routines
+ ****************************************************************************/
+
+static inline int numrank(u32 mtr)
+{
+ int ranks = (1 << RANK_CNT_BITS(mtr));
+
+ if (ranks > 4) {
+ edac_dbg(0, "Invalid number of ranks: %d (max = 4) raw value = %x (%04x)\n",
+ ranks, (unsigned int)RANK_CNT_BITS(mtr), mtr);
+ return -EINVAL;
+ }
+
+ return ranks;
+}
+
+static inline int numrow(u32 mtr)
+{
+ int rows = (RANK_WIDTH_BITS(mtr) + 12);
+
+ if (rows < 13 || rows > 18) {
+ edac_dbg(0, "Invalid number of rows: %d (should be between 14 and 17) raw value = %x (%04x)\n",
+ rows, (unsigned int)RANK_WIDTH_BITS(mtr), mtr);
+ return -EINVAL;
+ }
+
+ return 1 << rows;
+}
+
+static inline int numcol(u32 mtr)
+{
+ int cols = (COL_WIDTH_BITS(mtr) + 10);
+
+ if (cols > 12) {
+ edac_dbg(0, "Invalid number of cols: %d (max = 4) raw value = %x (%04x)\n",
+ cols, (unsigned int)COL_WIDTH_BITS(mtr), mtr);
+ return -EINVAL;
+ }
+
+ return 1 << cols;
+}
+
+static struct sbridge_dev *get_sbridge_dev(u8 bus)
+{
+ struct sbridge_dev *sbridge_dev;
+
+ list_for_each_entry(sbridge_dev, &sbridge_edac_list, list) {
+ if (sbridge_dev->bus == bus)
+ return sbridge_dev;
+ }
+
+ return NULL;
+}
+
+static struct sbridge_dev *alloc_sbridge_dev(u8 bus,
+ const struct pci_id_table *table)
+{
+ struct sbridge_dev *sbridge_dev;
+
+ sbridge_dev = kzalloc(sizeof(*sbridge_dev), GFP_KERNEL);
+ if (!sbridge_dev)
+ return NULL;
+
+ sbridge_dev->pdev = kzalloc(sizeof(*sbridge_dev->pdev) * table->n_devs,
+ GFP_KERNEL);
+ if (!sbridge_dev->pdev) {
+ kfree(sbridge_dev);
+ return NULL;
+ }
+
+ sbridge_dev->bus = bus;
+ sbridge_dev->n_devs = table->n_devs;
+ list_add_tail(&sbridge_dev->list, &sbridge_edac_list);
+
+ return sbridge_dev;
+}
+
+static void free_sbridge_dev(struct sbridge_dev *sbridge_dev)
+{
+ list_del(&sbridge_dev->list);
+ kfree(sbridge_dev->pdev);
+ kfree(sbridge_dev);
+}
+
+static u64 sbridge_get_tolm(struct sbridge_pvt *pvt)
+{
+ u32 reg;
+
+ /* Address range is 32:28 */
+ pci_read_config_dword(pvt->pci_sad1, TOLM, &reg);
+ return GET_TOLM(reg);
+}
+
+static u64 sbridge_get_tohm(struct sbridge_pvt *pvt)
+{
+ u32 reg;
+
+ pci_read_config_dword(pvt->pci_sad1, TOHM, &reg);
+ return GET_TOHM(reg);
+}
+
+static u64 ibridge_get_tolm(struct sbridge_pvt *pvt)
+{
+ u32 reg;
+
+ pci_read_config_dword(pvt->pci_br1, TOLM, &reg);
+
+ return GET_TOLM(reg);
+}
+
+static u64 ibridge_get_tohm(struct sbridge_pvt *pvt)
+{
+ u32 reg;
+
+ pci_read_config_dword(pvt->pci_br1, TOHM, &reg);
+
+ return GET_TOHM(reg);
+}
+
+static inline u8 sad_pkg_socket(u8 pkg)
+{
+ /* on Ivy Bridge, nodeID is SASS, where A is HA and S is node id */
+ return (pkg >> 3) | (pkg & 0x3);
+}
+
+static inline u8 sad_pkg_ha(u8 pkg)
+{
+ return (pkg >> 2) & 0x1;
+}
+
+/****************************************************************************
+ Memory check routines
+ ****************************************************************************/
+static struct pci_dev *get_pdev_slot_func(u8 bus, unsigned slot,
+ unsigned func)
+{
+ struct sbridge_dev *sbridge_dev = get_sbridge_dev(bus);
+ int i;
+
+ if (!sbridge_dev)
+ return NULL;
+
+ for (i = 0; i < sbridge_dev->n_devs; i++) {
+ if (!sbridge_dev->pdev[i])
+ continue;
+
+ if (PCI_SLOT(sbridge_dev->pdev[i]->devfn) == slot &&
+ PCI_FUNC(sbridge_dev->pdev[i]->devfn) == func) {
+ edac_dbg(1, "Associated %02x.%02x.%d with %p\n",
+ bus, slot, func, sbridge_dev->pdev[i]);
+ return sbridge_dev->pdev[i];
+ }
+ }
+
+ return NULL;
+}
+
+/**
+ * check_if_ecc_is_active() - Checks if ECC is active
+ * bus: Device bus
+ */
+static int check_if_ecc_is_active(const u8 bus)
+{
+ struct pci_dev *pdev = NULL;
+ u32 mcmtr;
+
+ pdev = get_pdev_slot_func(bus, 15, 0);
+ if (!pdev) {
+ sbridge_printk(KERN_ERR, "Couldn't find PCI device "
+ "%2x.%02d.%d!!!\n",
+ bus, 15, 0);
+ return -ENODEV;
+ }
+
+ pci_read_config_dword(pdev, MCMTR, &mcmtr);
+ if (!IS_ECC_ENABLED(mcmtr)) {
+ sbridge_printk(KERN_ERR, "ECC is disabled. Aborting\n");
+ return -ENODEV;
+ }
+ return 0;
+}
+
+static int get_dimm_config(struct mem_ctl_info *mci)
+{
+ struct sbridge_pvt *pvt = mci->pvt_info;
+ struct dimm_info *dimm;
+ unsigned i, j, banks, ranks, rows, cols, npages;
+ u64 size;
+ u32 reg;
+ enum edac_type mode;
+ enum mem_type mtype;
+
+ pci_read_config_dword(pvt->pci_br0, SAD_TARGET, &reg);
+ pvt->sbridge_dev->source_id = SOURCE_ID(reg);
+
+ pci_read_config_dword(pvt->pci_br0, SAD_CONTROL, &reg);
+ pvt->sbridge_dev->node_id = NODE_ID(reg);
+ edac_dbg(0, "mc#%d: Node ID: %d, source ID: %d\n",
+ pvt->sbridge_dev->mc,
+ pvt->sbridge_dev->node_id,
+ pvt->sbridge_dev->source_id);
+
+ pci_read_config_dword(pvt->pci_ras, RASENABLES, &reg);
+ if (IS_MIRROR_ENABLED(reg)) {
+ edac_dbg(0, "Memory mirror is enabled\n");
+ pvt->is_mirrored = true;
+ } else {
+ edac_dbg(0, "Memory mirror is disabled\n");
+ pvt->is_mirrored = false;
+ }
+
+ pci_read_config_dword(pvt->pci_ta, MCMTR, &pvt->info.mcmtr);
+ if (IS_LOCKSTEP_ENABLED(pvt->info.mcmtr)) {
+ edac_dbg(0, "Lockstep is enabled\n");
+ mode = EDAC_S8ECD8ED;
+ pvt->is_lockstep = true;
+ } else {
+ edac_dbg(0, "Lockstep is disabled\n");
+ mode = EDAC_S4ECD4ED;
+ pvt->is_lockstep = false;
+ }
+ if (IS_CLOSE_PG(pvt->info.mcmtr)) {
+ edac_dbg(0, "address map is on closed page mode\n");
+ pvt->is_close_pg = true;
+ } else {
+ edac_dbg(0, "address map is on open page mode\n");
+ pvt->is_close_pg = false;
+ }
+
+ if (pvt->pci_ddrio) {
+ pci_read_config_dword(pvt->pci_ddrio, pvt->info.rankcfgr,
+ &reg);
+ if (IS_RDIMM_ENABLED(reg)) {
+ /* FIXME: Can also be LRDIMM */
+ edac_dbg(0, "Memory is registered\n");
+ mtype = MEM_RDDR3;
+ } else {
+ edac_dbg(0, "Memory is unregistered\n");
+ mtype = MEM_DDR3;
+ }
+ } else {
+ edac_dbg(0, "Cannot determine memory type\n");
+ mtype = MEM_UNKNOWN;
+ }
+
+ /* On all supported DDR3 DIMM types, there are 8 banks available */
+ banks = 8;
+
+ for (i = 0; i < NUM_CHANNELS; i++) {
+ u32 mtr;
+
+ for (j = 0; j < ARRAY_SIZE(mtr_regs); j++) {
+ dimm = EDAC_DIMM_PTR(mci->layers, mci->dimms, mci->n_layers,
+ i, j, 0);
+ pci_read_config_dword(pvt->pci_tad[i],
+ mtr_regs[j], &mtr);
+ edac_dbg(4, "Channel #%d MTR%d = %x\n", i, j, mtr);
+ if (IS_DIMM_PRESENT(mtr)) {
+ pvt->channel[i].dimms++;
+
+ ranks = numrank(mtr);
+ rows = numrow(mtr);
+ cols = numcol(mtr);
+
+ /* DDR3 has 8 I/O banks */
+ size = ((u64)rows * cols * banks * ranks) >> (20 - 3);
+ npages = MiB_TO_PAGES(size);
+
+ edac_dbg(0, "mc#%d: channel %d, dimm %d, %Ld Mb (%d pages) bank: %d, rank: %d, row: %#x, col: %#x\n",
+ pvt->sbridge_dev->mc, i, j,
+ size, npages,
+ banks, ranks, rows, cols);
+
+ dimm->nr_pages = npages;
+ dimm->grain = 32;
+ dimm->dtype = (banks == 8) ? DEV_X8 : DEV_X4;
+ dimm->mtype = mtype;
+ dimm->edac_mode = mode;
+ snprintf(dimm->label, sizeof(dimm->label),
+ "CPU_SrcID#%u_Channel#%u_DIMM#%u",
+ pvt->sbridge_dev->source_id, i, j);
+ }
+ }
+ }
+
+ return 0;
+}
+
+static void get_memory_layout(const struct mem_ctl_info *mci)
+{
+ struct sbridge_pvt *pvt = mci->pvt_info;
+ int i, j, k, n_sads, n_tads, sad_interl;
+ u32 reg;
+ u64 limit, prv = 0;
+ u64 tmp_mb;
+ u32 mb, kb;
+ u32 rir_way;
+
+ /*
+ * Step 1) Get TOLM/TOHM ranges
+ */
+
+ pvt->tolm = pvt->info.get_tolm(pvt);
+ tmp_mb = (1 + pvt->tolm) >> 20;
+
+ mb = div_u64_rem(tmp_mb, 1000, &kb);
+ edac_dbg(0, "TOLM: %u.%03u GB (0x%016Lx)\n", mb, kb, (u64)pvt->tolm);
+
+ /* Address range is already 45:25 */
+ pvt->tohm = pvt->info.get_tohm(pvt);
+ tmp_mb = (1 + pvt->tohm) >> 20;
+
+ mb = div_u64_rem(tmp_mb, 1000, &kb);
+ edac_dbg(0, "TOHM: %u.%03u GB (0x%016Lx)\n", mb, kb, (u64)pvt->tohm);
+
+ /*
+ * Step 2) Get SAD range and SAD Interleave list
+ * TAD registers contain the interleave wayness. However, it
+ * seems simpler to just discover it indirectly, with the
+ * algorithm bellow.
+ */
+ prv = 0;
+ for (n_sads = 0; n_sads < pvt->info.max_sad; n_sads++) {
+ /* SAD_LIMIT Address range is 45:26 */
+ pci_read_config_dword(pvt->pci_sad0, pvt->info.dram_rule[n_sads],
+ &reg);
+ limit = SAD_LIMIT(reg);
+
+ if (!DRAM_RULE_ENABLE(reg))
+ continue;
+
+ if (limit <= prv)
+ break;
+
+ tmp_mb = (limit + 1) >> 20;
+ mb = div_u64_rem(tmp_mb, 1000, &kb);
+ edac_dbg(0, "SAD#%d %s up to %u.%03u GB (0x%016Lx) Interleave: %s reg=0x%08x\n",
+ n_sads,
+ get_dram_attr(reg),
+ mb, kb,
+ ((u64)tmp_mb) << 20L,
+ INTERLEAVE_MODE(reg) ? "8:6" : "[8:6]XOR[18:16]",
+ reg);
+ prv = limit;
+
+ pci_read_config_dword(pvt->pci_sad0, pvt->info.interleave_list[n_sads],
+ &reg);
+ sad_interl = sad_pkg(pvt->info.interleave_pkg, reg, 0);
+ for (j = 0; j < 8; j++) {
+ u32 pkg = sad_pkg(pvt->info.interleave_pkg, reg, j);
+ if (j > 0 && sad_interl == pkg)
+ break;
+
+ edac_dbg(0, "SAD#%d, interleave #%d: %d\n",
+ n_sads, j, pkg);
+ }
+ }
+
+ /*
+ * Step 3) Get TAD range
+ */
+ prv = 0;
+ for (n_tads = 0; n_tads < MAX_TAD; n_tads++) {
+ pci_read_config_dword(pvt->pci_ha0, tad_dram_rule[n_tads],
+ &reg);
+ limit = TAD_LIMIT(reg);
+ if (limit <= prv)
+ break;
+ tmp_mb = (limit + 1) >> 20;
+
+ mb = div_u64_rem(tmp_mb, 1000, &kb);
+ edac_dbg(0, "TAD#%d: up to %u.%03u GB (0x%016Lx), socket interleave %d, memory interleave %d, TGT: %d, %d, %d, %d, reg=0x%08x\n",
+ n_tads, mb, kb,
+ ((u64)tmp_mb) << 20L,
+ (u32)TAD_SOCK(reg),
+ (u32)TAD_CH(reg),
+ (u32)TAD_TGT0(reg),
+ (u32)TAD_TGT1(reg),
+ (u32)TAD_TGT2(reg),
+ (u32)TAD_TGT3(reg),
+ reg);
+ prv = limit;
+ }
+
+ /*
+ * Step 4) Get TAD offsets, per each channel
+ */
+ for (i = 0; i < NUM_CHANNELS; i++) {
+ if (!pvt->channel[i].dimms)
+ continue;
+ for (j = 0; j < n_tads; j++) {
+ pci_read_config_dword(pvt->pci_tad[i],
+ tad_ch_nilv_offset[j],
+ &reg);
+ tmp_mb = TAD_OFFSET(reg) >> 20;
+ mb = div_u64_rem(tmp_mb, 1000, &kb);
+ edac_dbg(0, "TAD CH#%d, offset #%d: %u.%03u GB (0x%016Lx), reg=0x%08x\n",
+ i, j,
+ mb, kb,
+ ((u64)tmp_mb) << 20L,
+ reg);
+ }
+ }
+
+ /*
+ * Step 6) Get RIR Wayness/Limit, per each channel
+ */
+ for (i = 0; i < NUM_CHANNELS; i++) {
+ if (!pvt->channel[i].dimms)
+ continue;
+ for (j = 0; j < MAX_RIR_RANGES; j++) {
+ pci_read_config_dword(pvt->pci_tad[i],
+ rir_way_limit[j],
+ &reg);
+
+ if (!IS_RIR_VALID(reg))
+ continue;
+
+ tmp_mb = RIR_LIMIT(reg) >> 20;
+ rir_way = 1 << RIR_WAY(reg);
+ mb = div_u64_rem(tmp_mb, 1000, &kb);
+ edac_dbg(0, "CH#%d RIR#%d, limit: %u.%03u GB (0x%016Lx), way: %d, reg=0x%08x\n",
+ i, j,
+ mb, kb,
+ ((u64)tmp_mb) << 20L,
+ rir_way,
+ reg);
+
+ for (k = 0; k < rir_way; k++) {
+ pci_read_config_dword(pvt->pci_tad[i],
+ rir_offset[j][k],
+ &reg);
+ tmp_mb = RIR_OFFSET(reg) << 6;
+
+ mb = div_u64_rem(tmp_mb, 1000, &kb);
+ edac_dbg(0, "CH#%d RIR#%d INTL#%d, offset %u.%03u GB (0x%016Lx), tgt: %d, reg=0x%08x\n",
+ i, j, k,
+ mb, kb,
+ ((u64)tmp_mb) << 20L,
+ (u32)RIR_RNK_TGT(reg),
+ reg);
+ }
+ }
+ }
+}
+
+static struct mem_ctl_info *get_mci_for_node_id(u8 node_id)
+{
+ struct sbridge_dev *sbridge_dev;
+
+ list_for_each_entry(sbridge_dev, &sbridge_edac_list, list) {
+ if (sbridge_dev->node_id == node_id)
+ return sbridge_dev->mci;
+ }
+ return NULL;
+}
+
+static int get_memory_error_data(struct mem_ctl_info *mci,
+ u64 addr,
+ u8 *socket,
+ long *channel_mask,
+ u8 *rank,
+ char **area_type, char *msg)
+{
+ struct mem_ctl_info *new_mci;
+ struct sbridge_pvt *pvt = mci->pvt_info;
+ struct pci_dev *pci_ha;
+ int n_rir, n_sads, n_tads, sad_way, sck_xch;
+ int sad_interl, idx, base_ch;
+ int interleave_mode;
+ unsigned sad_interleave[pvt->info.max_interleave];
+ u32 reg;
+ u8 ch_way, sck_way, pkg, sad_ha = 0;
+ u32 tad_offset;
+ u32 rir_way;
+ u32 mb, kb;
+ u64 ch_addr, offset, limit = 0, prv = 0;
+
+
+ /*
+ * Step 0) Check if the address is at special memory ranges
+ * The check bellow is probably enough to fill all cases where
+ * the error is not inside a memory, except for the legacy
+ * range (e. g. VGA addresses). It is unlikely, however, that the
+ * memory controller would generate an error on that range.
+ */
+ if ((addr > (u64) pvt->tolm) && (addr < (1LL << 32))) {
+ sprintf(msg, "Error at TOLM area, on addr 0x%08Lx", addr);
+ return -EINVAL;
+ }
+ if (addr >= (u64)pvt->tohm) {
+ sprintf(msg, "Error at MMIOH area, on addr 0x%016Lx", addr);
+ return -EINVAL;
+ }
+
+ /*
+ * Step 1) Get socket
+ */
+ for (n_sads = 0; n_sads < pvt->info.max_sad; n_sads++) {
+ pci_read_config_dword(pvt->pci_sad0, pvt->info.dram_rule[n_sads],
+ &reg);
+
+ if (!DRAM_RULE_ENABLE(reg))
+ continue;
+
+ limit = SAD_LIMIT(reg);
+ if (limit <= prv) {
+ sprintf(msg, "Can't discover the memory socket");
+ return -EINVAL;
+ }
+ if (addr <= limit)
+ break;
+ prv = limit;
+ }
+ if (n_sads == pvt->info.max_sad) {
+ sprintf(msg, "Can't discover the memory socket");
+ return -EINVAL;
+ }
+ *area_type = get_dram_attr(reg);
+ interleave_mode = INTERLEAVE_MODE(reg);
+
+ pci_read_config_dword(pvt->pci_sad0, pvt->info.interleave_list[n_sads],
+ &reg);
+
+ if (pvt->info.type == SANDY_BRIDGE) {
+ sad_interl = sad_pkg(pvt->info.interleave_pkg, reg, 0);
+ for (sad_way = 0; sad_way < 8; sad_way++) {
+ u32 pkg = sad_pkg(pvt->info.interleave_pkg, reg, sad_way);
+ if (sad_way > 0 && sad_interl == pkg)
+ break;
+ sad_interleave[sad_way] = pkg;
+ edac_dbg(0, "SAD interleave #%d: %d\n",
+ sad_way, sad_interleave[sad_way]);
+ }
+ edac_dbg(0, "mc#%d: Error detected on SAD#%d: address 0x%016Lx < 0x%016Lx, Interleave [%d:6]%s\n",
+ pvt->sbridge_dev->mc,
+ n_sads,
+ addr,
+ limit,
+ sad_way + 7,
+ !interleave_mode ? "" : "XOR[18:16]");
+ if (interleave_mode)
+ idx = ((addr >> 6) ^ (addr >> 16)) & 7;
+ else
+ idx = (addr >> 6) & 7;
+ switch (sad_way) {
+ case 1:
+ idx = 0;
+ break;
+ case 2:
+ idx = idx & 1;
+ break;
+ case 4:
+ idx = idx & 3;
+ break;
+ case 8:
+ break;
+ default:
+ sprintf(msg, "Can't discover socket interleave");
+ return -EINVAL;
+ }
+ *socket = sad_interleave[idx];
+ edac_dbg(0, "SAD interleave index: %d (wayness %d) = CPU socket %d\n",
+ idx, sad_way, *socket);
+ } else {
+ /* Ivy Bridge's SAD mode doesn't support XOR interleave mode */
+ idx = (addr >> 6) & 7;
+ pkg = sad_pkg(pvt->info.interleave_pkg, reg, idx);
+ *socket = sad_pkg_socket(pkg);
+ sad_ha = sad_pkg_ha(pkg);
+ edac_dbg(0, "SAD interleave package: %d = CPU socket %d, HA %d\n",
+ idx, *socket, sad_ha);
+ }
+
+ /*
+ * Move to the proper node structure, in order to access the
+ * right PCI registers
+ */
+ new_mci = get_mci_for_node_id(*socket);
+ if (!new_mci) {
+ sprintf(msg, "Struct for socket #%u wasn't initialized",
+ *socket);
+ return -EINVAL;
+ }
+ mci = new_mci;
+ pvt = mci->pvt_info;
+
+ /*
+ * Step 2) Get memory channel
+ */
+ prv = 0;
+ if (pvt->info.type == SANDY_BRIDGE)
+ pci_ha = pvt->pci_ha0;
+ else {
+ if (sad_ha)
+ pci_ha = pvt->pci_ha1;
+ else
+ pci_ha = pvt->pci_ha0;
+ }
+ for (n_tads = 0; n_tads < MAX_TAD; n_tads++) {
+ pci_read_config_dword(pci_ha, tad_dram_rule[n_tads], &reg);
+ limit = TAD_LIMIT(reg);
+ if (limit <= prv) {
+ sprintf(msg, "Can't discover the memory channel");
+ return -EINVAL;
+ }
+ if (addr <= limit)
+ break;
+ prv = limit;
+ }
+ if (n_tads == MAX_TAD) {
+ sprintf(msg, "Can't discover the memory channel");
+ return -EINVAL;
+ }
+
+ ch_way = TAD_CH(reg) + 1;
+ sck_way = TAD_SOCK(reg) + 1;
+
+ if (ch_way == 3)
+ idx = addr >> 6;
+ else
+ idx = addr >> (6 + sck_way);
+ idx = idx % ch_way;
+
+ /*
+ * FIXME: Shouldn't we use CHN_IDX_OFFSET() here, when ch_way == 3 ???
+ */
+ switch (idx) {
+ case 0:
+ base_ch = TAD_TGT0(reg);
+ break;
+ case 1:
+ base_ch = TAD_TGT1(reg);
+ break;
+ case 2:
+ base_ch = TAD_TGT2(reg);
+ break;
+ case 3:
+ base_ch = TAD_TGT3(reg);
+ break;
+ default:
+ sprintf(msg, "Can't discover the TAD target");
+ return -EINVAL;
+ }
+ *channel_mask = 1 << base_ch;
+
+ pci_read_config_dword(pvt->pci_tad[base_ch],
+ tad_ch_nilv_offset[n_tads],
+ &tad_offset);
+
+ if (pvt->is_mirrored) {
+ *channel_mask |= 1 << ((base_ch + 2) % 4);
+ switch(ch_way) {
+ case 2:
+ case 4:
+ sck_xch = 1 << sck_way * (ch_way >> 1);
+ break;
+ default:
+ sprintf(msg, "Invalid mirror set. Can't decode addr");
+ return -EINVAL;
+ }
+ } else
+ sck_xch = (1 << sck_way) * ch_way;
+
+ if (pvt->is_lockstep)
+ *channel_mask |= 1 << ((base_ch + 1) % 4);
+
+ offset = TAD_OFFSET(tad_offset);
+
+ edac_dbg(0, "TAD#%d: address 0x%016Lx < 0x%016Lx, socket interleave %d, channel interleave %d (offset 0x%08Lx), index %d, base ch: %d, ch mask: 0x%02lx\n",
+ n_tads,
+ addr,
+ limit,
+ (u32)TAD_SOCK(reg),
+ ch_way,
+ offset,
+ idx,
+ base_ch,
+ *channel_mask);
+
+ /* Calculate channel address */
+ /* Remove the TAD offset */
+
+ if (offset > addr) {
+ sprintf(msg, "Can't calculate ch addr: TAD offset 0x%08Lx is too high for addr 0x%08Lx!",
+ offset, addr);
+ return -EINVAL;
+ }
+ addr -= offset;
+ /* Store the low bits [0:6] of the addr */
+ ch_addr = addr & 0x7f;
+ /* Remove socket wayness and remove 6 bits */
+ addr >>= 6;
+ addr = div_u64(addr, sck_xch);
+#if 0
+ /* Divide by channel way */
+ addr = addr / ch_way;
+#endif
+ /* Recover the last 6 bits */
+ ch_addr |= addr << 6;
+
+ /*
+ * Step 3) Decode rank
+ */
+ for (n_rir = 0; n_rir < MAX_RIR_RANGES; n_rir++) {
+ pci_read_config_dword(pvt->pci_tad[base_ch],
+ rir_way_limit[n_rir],
+ &reg);
+
+ if (!IS_RIR_VALID(reg))
+ continue;
+
+ limit = RIR_LIMIT(reg);
+ mb = div_u64_rem(limit >> 20, 1000, &kb);
+ edac_dbg(0, "RIR#%d, limit: %u.%03u GB (0x%016Lx), way: %d\n",
+ n_rir,
+ mb, kb,
+ limit,
+ 1 << RIR_WAY(reg));
+ if (ch_addr <= limit)
+ break;
+ }
+ if (n_rir == MAX_RIR_RANGES) {
+ sprintf(msg, "Can't discover the memory rank for ch addr 0x%08Lx",
+ ch_addr);
+ return -EINVAL;
+ }
+ rir_way = RIR_WAY(reg);
+ if (pvt->is_close_pg)
+ idx = (ch_addr >> 6);
+ else
+ idx = (ch_addr >> 13); /* FIXME: Datasheet says to shift by 15 */
+ idx %= 1 << rir_way;
+
+ pci_read_config_dword(pvt->pci_tad[base_ch],
+ rir_offset[n_rir][idx],
+ &reg);
+ *rank = RIR_RNK_TGT(reg);
+
+ edac_dbg(0, "RIR#%d: channel address 0x%08Lx < 0x%08Lx, RIR interleave %d, index %d\n",
+ n_rir,
+ ch_addr,
+ limit,
+ rir_way,
+ idx);
+
+ return 0;
+}
+
+/****************************************************************************
+ Device initialization routines: put/get, init/exit
+ ****************************************************************************/
+
+/*
+ * sbridge_put_all_devices 'put' all the devices that we have
+ * reserved via 'get'
+ */
+static void sbridge_put_devices(struct sbridge_dev *sbridge_dev)
+{
+ int i;
+
+ edac_dbg(0, "\n");
+ for (i = 0; i < sbridge_dev->n_devs; i++) {
+ struct pci_dev *pdev = sbridge_dev->pdev[i];
+ if (!pdev)
+ continue;
+ edac_dbg(0, "Removing dev %02x:%02x.%d\n",
+ pdev->bus->number,
+ PCI_SLOT(pdev->devfn), PCI_FUNC(pdev->devfn));
+ pci_dev_put(pdev);
+ }
+}
+
+static void sbridge_put_all_devices(void)
+{
+ struct sbridge_dev *sbridge_dev, *tmp;
+
+ list_for_each_entry_safe(sbridge_dev, tmp, &sbridge_edac_list, list) {
+ sbridge_put_devices(sbridge_dev);
+ free_sbridge_dev(sbridge_dev);
+ }
+}
+
+static int sbridge_get_onedevice(struct pci_dev **prev,
+ u8 *num_mc,
+ const struct pci_id_table *table,
+ const unsigned devno)
+{
+ struct sbridge_dev *sbridge_dev;
+ const struct pci_id_descr *dev_descr = &table->descr[devno];
+
+ struct pci_dev *pdev = NULL;
+ u8 bus = 0;
+
+ sbridge_printk(KERN_DEBUG,
+ "Seeking for: dev %02x.%d PCI ID %04x:%04x\n",
+ dev_descr->dev, dev_descr->func,
+ PCI_VENDOR_ID_INTEL, dev_descr->dev_id);
+
+ pdev = pci_get_device(PCI_VENDOR_ID_INTEL,
+ dev_descr->dev_id, *prev);
+
+ if (!pdev) {
+ if (*prev) {
+ *prev = pdev;
+ return 0;
+ }
+
+ if (dev_descr->optional)
+ return 0;
+
+ if (devno == 0)
+ return -ENODEV;
+
+ sbridge_printk(KERN_INFO,
+ "Device not found: dev %02x.%d PCI ID %04x:%04x\n",
+ dev_descr->dev, dev_descr->func,
+ PCI_VENDOR_ID_INTEL, dev_descr->dev_id);
+
+ /* End of list, leave */
+ return -ENODEV;
+ }
+ bus = pdev->bus->number;
+
+ sbridge_dev = get_sbridge_dev(bus);
+ if (!sbridge_dev) {
+ sbridge_dev = alloc_sbridge_dev(bus, table);
+ if (!sbridge_dev) {
+ pci_dev_put(pdev);
+ return -ENOMEM;
+ }
+ (*num_mc)++;
+ }
+
+ if (sbridge_dev->pdev[devno]) {
+ sbridge_printk(KERN_ERR,
+ "Duplicated device for "
+ "dev %02x:%d.%d PCI ID %04x:%04x\n",
+ bus, dev_descr->dev, dev_descr->func,
+ PCI_VENDOR_ID_INTEL, dev_descr->dev_id);
+ pci_dev_put(pdev);
+ return -ENODEV;
+ }
+
+ sbridge_dev->pdev[devno] = pdev;
+
+ /* Sanity check */
+ if (unlikely(PCI_SLOT(pdev->devfn) != dev_descr->dev ||
+ PCI_FUNC(pdev->devfn) != dev_descr->func)) {
+ sbridge_printk(KERN_ERR,
+ "Device PCI ID %04x:%04x "
+ "has dev %02x:%d.%d instead of dev %02x:%02x.%d\n",
+ PCI_VENDOR_ID_INTEL, dev_descr->dev_id,
+ bus, PCI_SLOT(pdev->devfn), PCI_FUNC(pdev->devfn),
+ bus, dev_descr->dev, dev_descr->func);
+ return -ENODEV;
+ }
+
+ /* Be sure that the device is enabled */
+ if (unlikely(pci_enable_device(pdev) < 0)) {
+ sbridge_printk(KERN_ERR,
+ "Couldn't enable "
+ "dev %02x:%d.%d PCI ID %04x:%04x\n",
+ bus, dev_descr->dev, dev_descr->func,
+ PCI_VENDOR_ID_INTEL, dev_descr->dev_id);
+ return -ENODEV;
+ }
+
+ edac_dbg(0, "Detected dev %02x:%d.%d PCI ID %04x:%04x\n",
+ bus, dev_descr->dev, dev_descr->func,
+ PCI_VENDOR_ID_INTEL, dev_descr->dev_id);
+
+ /*
+ * As stated on drivers/pci/search.c, the reference count for
+ * @from is always decremented if it is not %NULL. So, as we need
+ * to get all devices up to null, we need to do a get for the device
+ */
+ pci_dev_get(pdev);
+
+ *prev = pdev;
+
+ return 0;
+}
+
+/*
+ * sbridge_get_all_devices - Find and perform 'get' operation on the MCH's
+ * device/functions we want to reference for this driver.
+ * Need to 'get' device 16 func 1 and func 2.
+ * @num_mc: pointer to the memory controllers count, to be incremented in case
+ * of success.
+ * @table: model specific table
+ *
+ * returns 0 in case of success or error code
+ */
+static int sbridge_get_all_devices(u8 *num_mc,
+ const struct pci_id_table *table)
+{
+ int i, rc;
+ struct pci_dev *pdev = NULL;
+
+ while (table && table->descr) {
+ for (i = 0; i < table->n_devs; i++) {
+ pdev = NULL;
+ do {
+ rc = sbridge_get_onedevice(&pdev, num_mc,
+ table, i);
+ if (rc < 0) {
+ if (i == 0) {
+ i = table->n_devs;
+ break;
+ }
+ sbridge_put_all_devices();
+ return -ENODEV;
+ }
+ } while (pdev);
+ }
+ table++;
+ }
+
+ return 0;
+}
+
+static int sbridge_mci_bind_devs(struct mem_ctl_info *mci,
+ struct sbridge_dev *sbridge_dev)
+{
+ struct sbridge_pvt *pvt = mci->pvt_info;
+ struct pci_dev *pdev;
+ int i, func, slot;
+
+ for (i = 0; i < sbridge_dev->n_devs; i++) {
+ pdev = sbridge_dev->pdev[i];
+ if (!pdev)
+ continue;
+ slot = PCI_SLOT(pdev->devfn);
+ func = PCI_FUNC(pdev->devfn);
+ switch (slot) {
+ case 12:
+ switch (func) {
+ case 6:
+ pvt->pci_sad0 = pdev;
+ break;
+ case 7:
+ pvt->pci_sad1 = pdev;
+ break;
+ default:
+ goto error;
+ }
+ break;
+ case 13:
+ switch (func) {
+ case 6:
+ pvt->pci_br0 = pdev;
+ break;
+ default:
+ goto error;
+ }
+ break;
+ case 14:
+ switch (func) {
+ case 0:
+ pvt->pci_ha0 = pdev;
+ break;
+ default:
+ goto error;
+ }
+ break;
+ case 15:
+ switch (func) {
+ case 0:
+ pvt->pci_ta = pdev;
+ break;
+ case 1:
+ pvt->pci_ras = pdev;
+ break;
+ case 2:
+ case 3:
+ case 4:
+ case 5:
+ pvt->pci_tad[func - 2] = pdev;
+ break;
+ default:
+ goto error;
+ }
+ break;
+ case 17:
+ switch (func) {
+ case 0:
+ pvt->pci_ddrio = pdev;
+ break;
+ default:
+ goto error;
+ }
+ break;
+ default:
+ goto error;
+ }
+
+ edac_dbg(0, "Associated PCI %02x.%02d.%d with dev = %p\n",
+ sbridge_dev->bus,
+ PCI_SLOT(pdev->devfn), PCI_FUNC(pdev->devfn),
+ pdev);
+ }
+
+ /* Check if everything were registered */
+ if (!pvt->pci_sad0 || !pvt->pci_sad1 || !pvt->pci_ha0 ||
+ !pvt-> pci_tad || !pvt->pci_ras || !pvt->pci_ta)
+ goto enodev;
+
+ for (i = 0; i < NUM_CHANNELS; i++) {
+ if (!pvt->pci_tad[i])
+ goto enodev;
+ }
+ return 0;
+
+enodev:
+ sbridge_printk(KERN_ERR, "Some needed devices are missing\n");
+ return -ENODEV;
+
+error:
+ sbridge_printk(KERN_ERR, "Device %d, function %d "
+ "is out of the expected range\n",
+ slot, func);
+ return -EINVAL;
+}
+
+static int ibridge_mci_bind_devs(struct mem_ctl_info *mci,
+ struct sbridge_dev *sbridge_dev)
+{
+ struct sbridge_pvt *pvt = mci->pvt_info;
+ struct pci_dev *pdev, *tmp;
+ int i, func, slot;
+ bool mode_2ha = false;
+
+ tmp = pci_get_device(PCI_VENDOR_ID_INTEL,
+ PCI_DEVICE_ID_INTEL_IBRIDGE_IMC_HA1, NULL);
+ if (tmp) {
+ mode_2ha = true;
+ pci_dev_put(tmp);
+ }
+
+ for (i = 0; i < sbridge_dev->n_devs; i++) {
+ pdev = sbridge_dev->pdev[i];
+ if (!pdev)
+ continue;
+ slot = PCI_SLOT(pdev->devfn);
+ func = PCI_FUNC(pdev->devfn);
+
+ switch (slot) {
+ case 14:
+ if (func == 0) {
+ pvt->pci_ha0 = pdev;
+ break;
+ }
+ goto error;
+ case 15:
+ switch (func) {
+ case 0:
+ pvt->pci_ta = pdev;
+ break;
+ case 1:
+ pvt->pci_ras = pdev;
+ break;
+ case 4:
+ case 5:
+ /* if we have 2 HAs active, channels 2 and 3
+ * are in other device */
+ if (mode_2ha)
+ break;
+ /* fall through */
+ case 2:
+ case 3:
+ pvt->pci_tad[func - 2] = pdev;
+ break;
+ default:
+ goto error;
+ }
+ break;
+ case 17:
+ if (func == 4) {
+ pvt->pci_ddrio = pdev;
+ break;
+ } else if (func == 0) {
+ if (!mode_2ha)
+ pvt->pci_ddrio = pdev;
+ break;
+ }
+ goto error;
+ case 22:
+ switch (func) {
+ case 0:
+ pvt->pci_sad0 = pdev;
+ break;
+ case 1:
+ pvt->pci_br0 = pdev;
+ break;
+ case 2:
+ pvt->pci_br1 = pdev;
+ break;
+ default:
+ goto error;
+ }
+ break;
+ case 28:
+ if (func == 0) {
+ pvt->pci_ha1 = pdev;
+ break;
+ }
+ goto error;
+ case 29:
+ /* we shouldn't have this device if we have just one
+ * HA present */
+ WARN_ON(!mode_2ha);
+ if (func == 2 || func == 3) {
+ pvt->pci_tad[func] = pdev;
+ break;
+ }
+ goto error;
+ default:
+ goto error;
+ }
+
+ edac_dbg(0, "Associated PCI %02x.%02d.%d with dev = %p\n",
+ sbridge_dev->bus,
+ PCI_SLOT(pdev->devfn), PCI_FUNC(pdev->devfn),
+ pdev);
+ }
+
+ /* Check if everything were registered */
+ if (!pvt->pci_sad0 || !pvt->pci_ha0 || !pvt->pci_br0 ||
+ !pvt->pci_br1 || !pvt->pci_tad || !pvt->pci_ras ||
+ !pvt->pci_ta)
+ goto enodev;
+
+ for (i = 0; i < NUM_CHANNELS; i++) {
+ if (!pvt->pci_tad[i])
+ goto enodev;
+ }
+ return 0;
+
+enodev:
+ sbridge_printk(KERN_ERR, "Some needed devices are missing\n");
+ return -ENODEV;
+
+error:
+ sbridge_printk(KERN_ERR,
+ "Device %d, function %d is out of the expected range\n",
+ slot, func);
+ return -EINVAL;
+}
+
+/****************************************************************************
+ Error check routines
+ ****************************************************************************/
+
+/*
+ * While Sandy Bridge has error count registers, SMI BIOS read values from
+ * and resets the counters. So, they are not reliable for the OS to read
+ * from them. So, we have no option but to just trust on whatever MCE is
+ * telling us about the errors.
+ */
+static void sbridge_mce_output_error(struct mem_ctl_info *mci,
+ const struct mce *m)
+{
+ struct mem_ctl_info *new_mci;
+ struct sbridge_pvt *pvt = mci->pvt_info;
+ enum hw_event_mc_err_type tp_event;
+ char *type, *optype, msg[256];
+ bool ripv = GET_BITFIELD(m->mcgstatus, 0, 0);
+ bool overflow = GET_BITFIELD(m->status, 62, 62);
+ bool uncorrected_error = GET_BITFIELD(m->status, 61, 61);
+ bool recoverable;
+ u32 core_err_cnt = GET_BITFIELD(m->status, 38, 52);
+ u32 mscod = GET_BITFIELD(m->status, 16, 31);
+ u32 errcode = GET_BITFIELD(m->status, 0, 15);
+ u32 channel = GET_BITFIELD(m->status, 0, 3);
+ u32 optypenum = GET_BITFIELD(m->status, 4, 6);
+ long channel_mask, first_channel;
+ u8 rank, socket;
+ int rc, dimm;
+ char *area_type = NULL;
+
+ if (pvt->info.type == IVY_BRIDGE)
+ recoverable = true;
+ else
+ recoverable = GET_BITFIELD(m->status, 56, 56);
+
+ if (uncorrected_error) {
+ if (ripv) {
+ type = "FATAL";
+ tp_event = HW_EVENT_ERR_FATAL;
+ } else {
+ type = "NON_FATAL";
+ tp_event = HW_EVENT_ERR_UNCORRECTED;
+ }
+ } else {
+ type = "CORRECTED";
+ tp_event = HW_EVENT_ERR_CORRECTED;
+ }
+
+ /*
+ * According with Table 15-9 of the Intel Architecture spec vol 3A,
+ * memory errors should fit in this mask:
+ * 000f 0000 1mmm cccc (binary)
+ * where:
+ * f = Correction Report Filtering Bit. If 1, subsequent errors
+ * won't be shown
+ * mmm = error type
+ * cccc = channel
+ * If the mask doesn't match, report an error to the parsing logic
+ */
+ if (! ((errcode & 0xef80) == 0x80)) {
+ optype = "Can't parse: it is not a mem";
+ } else {
+ switch (optypenum) {
+ case 0:
+ optype = "generic undef request error";
+ break;
+ case 1:
+ optype = "memory read error";
+ break;
+ case 2:
+ optype = "memory write error";
+ break;
+ case 3:
+ optype = "addr/cmd error";
+ break;
+ case 4:
+ optype = "memory scrubbing error";
+ break;
+ default:
+ optype = "reserved";
+ break;
+ }
+ }
+
+ /* Only decode errors with an valid address (ADDRV) */
+ if (!GET_BITFIELD(m->status, 58, 58))
+ return;
+
+ rc = get_memory_error_data(mci, m->addr, &socket,
+ &channel_mask, &rank, &area_type, msg);
+ if (rc < 0)
+ goto err_parsing;
+ new_mci = get_mci_for_node_id(socket);
+ if (!new_mci) {
+ strcpy(msg, "Error: socket got corrupted!");
+ goto err_parsing;
+ }
+ mci = new_mci;
+ pvt = mci->pvt_info;
+
+ first_channel = find_first_bit(&channel_mask, NUM_CHANNELS);
+
+ if (rank < 4)
+ dimm = 0;
+ else if (rank < 8)
+ dimm = 1;
+ else
+ dimm = 2;
+
+
+ /*
+ * FIXME: On some memory configurations (mirror, lockstep), the
+ * Memory Controller can't point the error to a single DIMM. The
+ * EDAC core should be handling the channel mask, in order to point
+ * to the group of dimm's where the error may be happening.
+ */
+ snprintf(msg, sizeof(msg),
+ "%s%s area:%s err_code:%04x:%04x socket:%d channel_mask:%ld rank:%d",
+ overflow ? " OVERFLOW" : "",
+ (uncorrected_error && recoverable) ? " recoverable" : "",
+ area_type,
+ mscod, errcode,
+ socket,
+ channel_mask,
+ rank);
+
+ edac_dbg(0, "%s\n", msg);
+
+ /* FIXME: need support for channel mask */
+
+ /* Call the helper to output message */
+ edac_mc_handle_error(tp_event, mci, core_err_cnt,
+ m->addr >> PAGE_SHIFT, m->addr & ~PAGE_MASK, 0,
+ channel, dimm, -1,
+ optype, msg);
+ return;
+err_parsing:
+ edac_mc_handle_error(tp_event, mci, core_err_cnt, 0, 0, 0,
+ -1, -1, -1,
+ msg, "");
+
+}
+
+/*
+ * sbridge_check_error Retrieve and process errors reported by the
+ * hardware. Called by the Core module.
+ */
+static void sbridge_check_error(struct mem_ctl_info *mci)
+{
+ struct sbridge_pvt *pvt = mci->pvt_info;
+ int i;
+ unsigned count = 0;
+ struct mce *m;
+
+ /*
+ * MCE first step: Copy all mce errors into a temporary buffer
+ * We use a double buffering here, to reduce the risk of
+ * loosing an error.
+ */
+ smp_rmb();
+ count = (pvt->mce_out + MCE_LOG_LEN - pvt->mce_in)
+ % MCE_LOG_LEN;
+ if (!count)
+ return;
+
+ m = pvt->mce_outentry;
+ if (pvt->mce_in + count > MCE_LOG_LEN) {
+ unsigned l = MCE_LOG_LEN - pvt->mce_in;
+
+ memcpy(m, &pvt->mce_entry[pvt->mce_in], sizeof(*m) * l);
+ smp_wmb();
+ pvt->mce_in = 0;
+ count -= l;
+ m += l;
+ }
+ memcpy(m, &pvt->mce_entry[pvt->mce_in], sizeof(*m) * count);
+ smp_wmb();
+ pvt->mce_in += count;
+
+ smp_rmb();
+ if (pvt->mce_overrun) {
+ sbridge_printk(KERN_ERR, "Lost %d memory errors\n",
+ pvt->mce_overrun);
+ smp_wmb();
+ pvt->mce_overrun = 0;
+ }
+
+ /*
+ * MCE second step: parse errors and display
+ */
+ for (i = 0; i < count; i++)
+ sbridge_mce_output_error(mci, &pvt->mce_outentry[i]);
+}
+
+/*
+ * sbridge_mce_check_error Replicates mcelog routine to get errors
+ * This routine simply queues mcelog errors, and
+ * return. The error itself should be handled later
+ * by sbridge_check_error.
+ * WARNING: As this routine should be called at NMI time, extra care should
+ * be taken to avoid deadlocks, and to be as fast as possible.
+ */
+static int sbridge_mce_check_error(struct notifier_block *nb, unsigned long val,
+ void *data)
+{
+ struct mce *mce = (struct mce *)data;
+ struct mem_ctl_info *mci;
+ struct sbridge_pvt *pvt;
+ char *type;
+
+ if (get_edac_report_status() == EDAC_REPORTING_DISABLED)
+ return NOTIFY_DONE;
+
+ mci = get_mci_for_node_id(mce->socketid);
+ if (!mci)
+ return NOTIFY_BAD;
+ pvt = mci->pvt_info;
+
+ /*
+ * Just let mcelog handle it if the error is
+ * outside the memory controller. A memory error
+ * is indicated by bit 7 = 1 and bits = 8-11,13-15 = 0.
+ * bit 12 has an special meaning.
+ */
+ if ((mce->status & 0xefff) >> 7 != 1)
+ return NOTIFY_DONE;
+
+ if (mce->mcgstatus & MCG_STATUS_MCIP)
+ type = "Exception";
+ else
+ type = "Event";
+
+ sbridge_mc_printk(mci, KERN_DEBUG, "HANDLING MCE MEMORY ERROR\n");
+
+ sbridge_mc_printk(mci, KERN_DEBUG, "CPU %d: Machine Check %s: %Lx "
+ "Bank %d: %016Lx\n", mce->extcpu, type,
+ mce->mcgstatus, mce->bank, mce->status);
+ sbridge_mc_printk(mci, KERN_DEBUG, "TSC %llx ", mce->tsc);
+ sbridge_mc_printk(mci, KERN_DEBUG, "ADDR %llx ", mce->addr);
+ sbridge_mc_printk(mci, KERN_DEBUG, "MISC %llx ", mce->misc);
+
+ sbridge_mc_printk(mci, KERN_DEBUG, "PROCESSOR %u:%x TIME %llu SOCKET "
+ "%u APIC %x\n", mce->cpuvendor, mce->cpuid,
+ mce->time, mce->socketid, mce->apicid);
+
+ /* Only handle if it is the right mc controller */
+ if (cpu_data(mce->cpu).phys_proc_id != pvt->sbridge_dev->mc)
+ return NOTIFY_DONE;
+
+ smp_rmb();
+ if ((pvt->mce_out + 1) % MCE_LOG_LEN == pvt->mce_in) {
+ smp_wmb();
+ pvt->mce_overrun++;
+ return NOTIFY_DONE;
+ }
+
+ /* Copy memory error at the ringbuffer */
+ memcpy(&pvt->mce_entry[pvt->mce_out], mce, sizeof(*mce));
+ smp_wmb();
+ pvt->mce_out = (pvt->mce_out + 1) % MCE_LOG_LEN;
+
+ /* Handle fatal errors immediately */
+ if (mce->mcgstatus & 1)
+ sbridge_check_error(mci);
+
+ /* Advice mcelog that the error were handled */
+ return NOTIFY_STOP;
+}
+
+static struct notifier_block sbridge_mce_dec = {
+ .notifier_call = sbridge_mce_check_error,
+};
+
+/****************************************************************************
+ EDAC register/unregister logic
+ ****************************************************************************/
+
+static void sbridge_unregister_mci(struct sbridge_dev *sbridge_dev)
+{
+ struct mem_ctl_info *mci = sbridge_dev->mci;
+ struct sbridge_pvt *pvt;
+
+ if (unlikely(!mci || !mci->pvt_info)) {
+ edac_dbg(0, "MC: dev = %p\n", &sbridge_dev->pdev[0]->dev);
+
+ sbridge_printk(KERN_ERR, "Couldn't find mci handler\n");
+ return;
+ }
+
+ pvt = mci->pvt_info;
+
+ edac_dbg(0, "MC: mci = %p, dev = %p\n",
+ mci, &sbridge_dev->pdev[0]->dev);
+
+ /* Remove MC sysfs nodes */
+ edac_mc_del_mc(mci->pdev);
+
+ edac_dbg(1, "%s: free mci struct\n", mci->ctl_name);
+ kfree(mci->ctl_name);
+ edac_mc_free(mci);
+ sbridge_dev->mci = NULL;
+}
+
+static int sbridge_register_mci(struct sbridge_dev *sbridge_dev, enum type type)
+{
+ struct mem_ctl_info *mci;
+ struct edac_mc_layer layers[2];
+ struct sbridge_pvt *pvt;
+ struct pci_dev *pdev = sbridge_dev->pdev[0];
+ int rc;
+
+ /* Check the number of active and not disabled channels */
+ rc = check_if_ecc_is_active(sbridge_dev->bus);
+ if (unlikely(rc < 0))
+ return rc;
+
+ /* allocate a new MC control structure */
+ layers[0].type = EDAC_MC_LAYER_CHANNEL;
+ layers[0].size = NUM_CHANNELS;
+ layers[0].is_virt_csrow = false;
+ layers[1].type = EDAC_MC_LAYER_SLOT;
+ layers[1].size = MAX_DIMMS;
+ layers[1].is_virt_csrow = true;
+ mci = edac_mc_alloc(sbridge_dev->mc, ARRAY_SIZE(layers), layers,
+ sizeof(*pvt));
+
+ if (unlikely(!mci))
+ return -ENOMEM;
+
+ edac_dbg(0, "MC: mci = %p, dev = %p\n",
+ mci, &pdev->dev);
+
+ pvt = mci->pvt_info;
+ memset(pvt, 0, sizeof(*pvt));
+
+ /* Associate sbridge_dev and mci for future usage */
+ pvt->sbridge_dev = sbridge_dev;
+ sbridge_dev->mci = mci;
+
+ mci->mtype_cap = MEM_FLAG_DDR3;
+ mci->edac_ctl_cap = EDAC_FLAG_NONE;
+ mci->edac_cap = EDAC_FLAG_NONE;
+ mci->mod_name = "sbridge_edac.c";
+ mci->mod_ver = SBRIDGE_REVISION;
+ mci->dev_name = pci_name(pdev);
+ mci->ctl_page_to_phys = NULL;
+
+ /* Set the function pointer to an actual operation function */
+ mci->edac_check = sbridge_check_error;
+
+ pvt->info.type = type;
+ if (type == IVY_BRIDGE) {
+ pvt->info.rankcfgr = IB_RANK_CFG_A;
+ pvt->info.get_tolm = ibridge_get_tolm;
+ pvt->info.get_tohm = ibridge_get_tohm;
+ pvt->info.dram_rule = ibridge_dram_rule;
+ pvt->info.max_sad = ARRAY_SIZE(ibridge_dram_rule);
+ pvt->info.interleave_list = ibridge_interleave_list;
+ pvt->info.max_interleave = ARRAY_SIZE(ibridge_interleave_list);
+ pvt->info.interleave_pkg = ibridge_interleave_pkg;
+ mci->ctl_name = kasprintf(GFP_KERNEL, "Ivy Bridge Socket#%d", mci->mc_idx);
+
+ /* Store pci devices at mci for faster access */
+ rc = ibridge_mci_bind_devs(mci, sbridge_dev);
+ if (unlikely(rc < 0))
+ goto fail0;
+ } else {
+ pvt->info.rankcfgr = SB_RANK_CFG_A;
+ pvt->info.get_tolm = sbridge_get_tolm;
+ pvt->info.get_tohm = sbridge_get_tohm;
+ pvt->info.dram_rule = sbridge_dram_rule;
+ pvt->info.max_sad = ARRAY_SIZE(sbridge_dram_rule);
+ pvt->info.interleave_list = sbridge_interleave_list;
+ pvt->info.max_interleave = ARRAY_SIZE(sbridge_interleave_list);
+ pvt->info.interleave_pkg = sbridge_interleave_pkg;
+ mci->ctl_name = kasprintf(GFP_KERNEL, "Sandy Bridge Socket#%d", mci->mc_idx);
+
+ /* Store pci devices at mci for faster access */
+ rc = sbridge_mci_bind_devs(mci, sbridge_dev);
+ if (unlikely(rc < 0))
+ goto fail0;
+ }
+
+
+ /* Get dimm basic config and the memory layout */
+ get_dimm_config(mci);
+ get_memory_layout(mci);
+
+ /* record ptr to the generic device */
+ mci->pdev = &pdev->dev;
+
+ /* add this new MC control structure to EDAC's list of MCs */
+ if (unlikely(edac_mc_add_mc(mci))) {
+ edac_dbg(0, "MC: failed edac_mc_add_mc()\n");
+ rc = -EINVAL;
+ goto fail0;
+ }
+
+ return 0;
+
+fail0:
+ kfree(mci->ctl_name);
+ edac_mc_free(mci);
+ sbridge_dev->mci = NULL;
+ return rc;
+}
+
+/*
+ * sbridge_probe Probe for ONE instance of device to see if it is
+ * present.
+ * return:
+ * 0 for FOUND a device
+ * < 0 for error code
+ */
+
+static int sbridge_probe(struct pci_dev *pdev, const struct pci_device_id *id)
+{
+ int rc;
+ u8 mc, num_mc = 0;
+ struct sbridge_dev *sbridge_dev;
+ enum type type;
+
+ /* get the pci devices we want to reserve for our use */
+ mutex_lock(&sbridge_edac_lock);
+
+ /*
+ * All memory controllers are allocated at the first pass.
+ */
+ if (unlikely(probed >= 1)) {
+ mutex_unlock(&sbridge_edac_lock);
+ return -ENODEV;
+ }
+ probed++;
+
+ if (pdev->device == PCI_DEVICE_ID_INTEL_IBRIDGE_IMC_HA0_TA) {
+ rc = sbridge_get_all_devices(&num_mc, pci_dev_descr_ibridge_table);
+ type = IVY_BRIDGE;
+ } else {
+ rc = sbridge_get_all_devices(&num_mc, pci_dev_descr_sbridge_table);
+ type = SANDY_BRIDGE;
+ }
+ if (unlikely(rc < 0))
+ goto fail0;
+ mc = 0;
+
+ list_for_each_entry(sbridge_dev, &sbridge_edac_list, list) {
+ edac_dbg(0, "Registering MC#%d (%d of %d)\n",
+ mc, mc + 1, num_mc);
+ sbridge_dev->mc = mc++;
+ rc = sbridge_register_mci(sbridge_dev, type);
+ if (unlikely(rc < 0))
+ goto fail1;
+ }
+
+ sbridge_printk(KERN_INFO, "Driver loaded.\n");
+
+ mutex_unlock(&sbridge_edac_lock);
+ return 0;
+
+fail1:
+ list_for_each_entry(sbridge_dev, &sbridge_edac_list, list)
+ sbridge_unregister_mci(sbridge_dev);
+
+ sbridge_put_all_devices();
+fail0:
+ mutex_unlock(&sbridge_edac_lock);
+ return rc;
+}
+
+/*
+ * sbridge_remove destructor for one instance of device
+ *
+ */
+static void sbridge_remove(struct pci_dev *pdev)
+{
+ struct sbridge_dev *sbridge_dev;
+
+ edac_dbg(0, "\n");
+
+ /*
+ * we have a trouble here: pdev value for removal will be wrong, since
+ * it will point to the X58 register used to detect that the machine
+ * is a Nehalem or upper design. However, due to the way several PCI
+ * devices are grouped together to provide MC functionality, we need
+ * to use a different method for releasing the devices
+ */
+
+ mutex_lock(&sbridge_edac_lock);
+
+ if (unlikely(!probed)) {
+ mutex_unlock(&sbridge_edac_lock);
+ return;
+ }
+
+ list_for_each_entry(sbridge_dev, &sbridge_edac_list, list)
+ sbridge_unregister_mci(sbridge_dev);
+
+ /* Release PCI resources */
+ sbridge_put_all_devices();
+
+ probed--;
+
+ mutex_unlock(&sbridge_edac_lock);
+}
+
+MODULE_DEVICE_TABLE(pci, sbridge_pci_tbl);
+
+/*
+ * sbridge_driver pci_driver structure for this module
+ *
+ */
+static struct pci_driver sbridge_driver = {
+ .name = "sbridge_edac",
+ .probe = sbridge_probe,
+ .remove = sbridge_remove,
+ .id_table = sbridge_pci_tbl,
+};
+
+/*
+ * sbridge_init Module entry function
+ * Try to initialize this module for its devices
+ */
+static int __init sbridge_init(void)
+{
+ int pci_rc;
+
+ edac_dbg(2, "\n");
+
+ /* Ensure that the OPSTATE is set correctly for POLL or NMI */
+ opstate_init();
+
+ pci_rc = pci_register_driver(&sbridge_driver);
+ if (pci_rc >= 0) {
+ mce_register_decode_chain(&sbridge_mce_dec);
+ if (get_edac_report_status() == EDAC_REPORTING_DISABLED)
+ sbridge_printk(KERN_WARNING, "Loading driver, error reporting disabled.\n");
+ return 0;
+ }
+
+ sbridge_printk(KERN_ERR, "Failed to register device with error %d.\n",
+ pci_rc);
+
+ return pci_rc;
+}
+
+/*
+ * sbridge_exit() Module exit function
+ * Unregister the driver
+ */
+static void __exit sbridge_exit(void)
+{
+ edac_dbg(2, "\n");
+ pci_unregister_driver(&sbridge_driver);
+ mce_unregister_decode_chain(&sbridge_mce_dec);
+}
+
+module_init(sbridge_init);
+module_exit(sbridge_exit);
+
+module_param(edac_op_state, int, 0444);
+MODULE_PARM_DESC(edac_op_state, "EDAC Error Reporting state: 0=Poll,1=NMI");
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Mauro Carvalho Chehab");
+MODULE_AUTHOR("Red Hat Inc. (http://www.redhat.com)");
+MODULE_DESCRIPTION("MC Driver for Intel Sandy Bridge and Ivy Bridge memory controllers - "
+ SBRIDGE_REVISION);
diff --git a/drivers/edac/tile_edac.c b/drivers/edac/tile_edac.c
new file mode 100644
index 00000000000..578f915ee19
--- /dev/null
+++ b/drivers/edac/tile_edac.c
@@ -0,0 +1,266 @@
+/*
+ * Copyright 2011 Tilera Corporation. All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation, version 2.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ * NON INFRINGEMENT. See the GNU General Public License for
+ * more details.
+ * Tilera-specific EDAC driver.
+ *
+ * This source code is derived from the following driver:
+ *
+ * Cell MIC driver for ECC counting
+ *
+ * Copyright 2007 Benjamin Herrenschmidt, IBM Corp.
+ * <benh@kernel.crashing.org>
+ *
+ */
+
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/platform_device.h>
+#include <linux/io.h>
+#include <linux/uaccess.h>
+#include <linux/edac.h>
+#include <hv/hypervisor.h>
+#include <hv/drv_mshim_intf.h>
+
+#include "edac_core.h"
+
+#define DRV_NAME "tile-edac"
+
+/* Number of cs_rows needed per memory controller on TILEPro. */
+#define TILE_EDAC_NR_CSROWS 1
+
+/* Number of channels per memory controller on TILEPro. */
+#define TILE_EDAC_NR_CHANS 1
+
+/* Granularity of reported error in bytes on TILEPro. */
+#define TILE_EDAC_ERROR_GRAIN 8
+
+/* TILE processor has multiple independent memory controllers. */
+struct platform_device *mshim_pdev[TILE_MAX_MSHIMS];
+
+struct tile_edac_priv {
+ int hv_devhdl; /* Hypervisor device handle. */
+ int node; /* Memory controller instance #. */
+ unsigned int ce_count; /*
+ * Correctable-error counter
+ * kept by the driver.
+ */
+};
+
+static void tile_edac_check(struct mem_ctl_info *mci)
+{
+ struct tile_edac_priv *priv = mci->pvt_info;
+ struct mshim_mem_error mem_error;
+
+ if (hv_dev_pread(priv->hv_devhdl, 0, (HV_VirtAddr)&mem_error,
+ sizeof(struct mshim_mem_error), MSHIM_MEM_ERROR_OFF) !=
+ sizeof(struct mshim_mem_error)) {
+ pr_err(DRV_NAME ": MSHIM_MEM_ERROR_OFF pread failure.\n");
+ return;
+ }
+
+ /* Check if the current error count is different from the saved one. */
+ if (mem_error.sbe_count != priv->ce_count) {
+ dev_dbg(mci->pdev, "ECC CE err on node %d\n", priv->node);
+ priv->ce_count = mem_error.sbe_count;
+ edac_mc_handle_error(HW_EVENT_ERR_CORRECTED, mci, 1,
+ 0, 0, 0,
+ 0, 0, -1,
+ mci->ctl_name, "");
+ }
+}
+
+/*
+ * Initialize the 'csrows' table within the mci control structure with the
+ * addressing of memory.
+ */
+static int tile_edac_init_csrows(struct mem_ctl_info *mci)
+{
+ struct csrow_info *csrow = mci->csrows[0];
+ struct tile_edac_priv *priv = mci->pvt_info;
+ struct mshim_mem_info mem_info;
+ struct dimm_info *dimm = csrow->channels[0]->dimm;
+
+ if (hv_dev_pread(priv->hv_devhdl, 0, (HV_VirtAddr)&mem_info,
+ sizeof(struct mshim_mem_info), MSHIM_MEM_INFO_OFF) !=
+ sizeof(struct mshim_mem_info)) {
+ pr_err(DRV_NAME ": MSHIM_MEM_INFO_OFF pread failure.\n");
+ return -1;
+ }
+
+ if (mem_info.mem_ecc)
+ dimm->edac_mode = EDAC_SECDED;
+ else
+ dimm->edac_mode = EDAC_NONE;
+ switch (mem_info.mem_type) {
+ case DDR2:
+ dimm->mtype = MEM_DDR2;
+ break;
+
+ case DDR3:
+ dimm->mtype = MEM_DDR3;
+ break;
+
+ default:
+ return -1;
+ }
+
+ dimm->nr_pages = mem_info.mem_size >> PAGE_SHIFT;
+ dimm->grain = TILE_EDAC_ERROR_GRAIN;
+ dimm->dtype = DEV_UNKNOWN;
+
+ return 0;
+}
+
+static int tile_edac_mc_probe(struct platform_device *pdev)
+{
+ char hv_file[32];
+ int hv_devhdl;
+ struct mem_ctl_info *mci;
+ struct edac_mc_layer layers[2];
+ struct tile_edac_priv *priv;
+ int rc;
+
+ sprintf(hv_file, "mshim/%d", pdev->id);
+ hv_devhdl = hv_dev_open((HV_VirtAddr)hv_file, 0);
+ if (hv_devhdl < 0)
+ return -EINVAL;
+
+ /* A TILE MC has a single channel and one chip-select row. */
+ layers[0].type = EDAC_MC_LAYER_CHIP_SELECT;
+ layers[0].size = TILE_EDAC_NR_CSROWS;
+ layers[0].is_virt_csrow = true;
+ layers[1].type = EDAC_MC_LAYER_CHANNEL;
+ layers[1].size = TILE_EDAC_NR_CHANS;
+ layers[1].is_virt_csrow = false;
+ mci = edac_mc_alloc(pdev->id, ARRAY_SIZE(layers), layers,
+ sizeof(struct tile_edac_priv));
+ if (mci == NULL)
+ return -ENOMEM;
+ priv = mci->pvt_info;
+ priv->node = pdev->id;
+ priv->hv_devhdl = hv_devhdl;
+
+ mci->pdev = &pdev->dev;
+ mci->mtype_cap = MEM_FLAG_DDR2;
+ mci->edac_ctl_cap = EDAC_FLAG_SECDED;
+
+ mci->mod_name = DRV_NAME;
+#ifdef __tilegx__
+ mci->ctl_name = "TILEGx_Memory_Controller";
+#else
+ mci->ctl_name = "TILEPro_Memory_Controller";
+#endif
+ mci->dev_name = dev_name(&pdev->dev);
+ mci->edac_check = tile_edac_check;
+
+ /*
+ * Initialize the MC control structure 'csrows' table
+ * with the mapping and control information.
+ */
+ if (tile_edac_init_csrows(mci)) {
+ /* No csrows found. */
+ mci->edac_cap = EDAC_FLAG_NONE;
+ } else {
+ mci->edac_cap = EDAC_FLAG_SECDED;
+ }
+
+ platform_set_drvdata(pdev, mci);
+
+ /* Register with EDAC core */
+ rc = edac_mc_add_mc(mci);
+ if (rc) {
+ dev_err(&pdev->dev, "failed to register with EDAC core\n");
+ edac_mc_free(mci);
+ return rc;
+ }
+
+ return 0;
+}
+
+static int tile_edac_mc_remove(struct platform_device *pdev)
+{
+ struct mem_ctl_info *mci = platform_get_drvdata(pdev);
+
+ edac_mc_del_mc(&pdev->dev);
+ if (mci)
+ edac_mc_free(mci);
+ return 0;
+}
+
+static struct platform_driver tile_edac_mc_driver = {
+ .driver = {
+ .name = DRV_NAME,
+ .owner = THIS_MODULE,
+ },
+ .probe = tile_edac_mc_probe,
+ .remove = tile_edac_mc_remove,
+};
+
+/*
+ * Driver init routine.
+ */
+static int __init tile_edac_init(void)
+{
+ char hv_file[32];
+ struct platform_device *pdev;
+ int i, err, num = 0;
+
+ /* Only support POLL mode. */
+ edac_op_state = EDAC_OPSTATE_POLL;
+
+ err = platform_driver_register(&tile_edac_mc_driver);
+ if (err)
+ return err;
+
+ for (i = 0; i < TILE_MAX_MSHIMS; i++) {
+ /*
+ * Not all memory controllers are configured such as in the
+ * case of a simulator. So we register only those mshims
+ * that are configured by the hypervisor.
+ */
+ sprintf(hv_file, "mshim/%d", i);
+ if (hv_dev_open((HV_VirtAddr)hv_file, 0) < 0)
+ continue;
+
+ pdev = platform_device_register_simple(DRV_NAME, i, NULL, 0);
+ if (IS_ERR(pdev))
+ continue;
+ mshim_pdev[i] = pdev;
+ num++;
+ }
+
+ if (num == 0) {
+ platform_driver_unregister(&tile_edac_mc_driver);
+ return -ENODEV;
+ }
+ return 0;
+}
+
+/*
+ * Driver cleanup routine.
+ */
+static void __exit tile_edac_exit(void)
+{
+ int i;
+
+ for (i = 0; i < TILE_MAX_MSHIMS; i++) {
+ struct platform_device *pdev = mshim_pdev[i];
+ if (!pdev)
+ continue;
+
+ platform_device_unregister(pdev);
+ }
+ platform_driver_unregister(&tile_edac_mc_driver);
+}
+
+module_init(tile_edac_init);
+module_exit(tile_edac_exit);
diff --git a/drivers/edac/x38_edac.c b/drivers/edac/x38_edac.c
index b6f47de152f..4891b450830 100644
--- a/drivers/edac/x38_edac.c
+++ b/drivers/edac/x38_edac.c
@@ -103,10 +103,10 @@ static int how_many_channel(struct pci_dev *pdev)
pci_read_config_byte(pdev, X38_CAPID0 + 8, &capid0_8b);
if (capid0_8b & 0x20) { /* check DCD: Dual Channel Disable */
- debugf0("In single channel mode.\n");
+ edac_dbg(0, "In single channel mode\n");
x38_channel_num = 1;
} else {
- debugf0("In dual channel mode.\n");
+ edac_dbg(0, "In dual channel mode\n");
x38_channel_num = 2;
}
@@ -151,7 +151,7 @@ static void x38_clear_error_info(struct mem_ctl_info *mci)
{
struct pci_dev *pdev;
- pdev = to_pci_dev(mci->dev);
+ pdev = to_pci_dev(mci->pdev);
/*
* Clear any error bits.
@@ -172,7 +172,7 @@ static void x38_get_and_clear_error_info(struct mem_ctl_info *mci,
struct pci_dev *pdev;
void __iomem *window = mci->pvt_info;
- pdev = to_pci_dev(mci->dev);
+ pdev = to_pci_dev(mci->pdev);
/*
* This is a mess because there is no atomic way to read all the
@@ -215,19 +215,26 @@ static void x38_process_error_info(struct mem_ctl_info *mci,
return;
if ((info->errsts ^ info->errsts2) & X38_ERRSTS_BITS) {
- edac_mc_handle_ce_no_info(mci, "UE overwrote CE");
+ edac_mc_handle_error(HW_EVENT_ERR_UNCORRECTED, mci, 1, 0, 0, 0,
+ -1, -1, -1,
+ "UE overwrote CE", "");
info->errsts = info->errsts2;
}
for (channel = 0; channel < x38_channel_num; channel++) {
log = info->eccerrlog[channel];
if (log & X38_ECCERRLOG_UE) {
- edac_mc_handle_ue(mci, 0, 0,
- eccerrlog_row(channel, log), "x38 UE");
+ edac_mc_handle_error(HW_EVENT_ERR_UNCORRECTED, mci, 1,
+ 0, 0, 0,
+ eccerrlog_row(channel, log),
+ -1, -1,
+ "x38 UE", "");
} else if (log & X38_ECCERRLOG_CE) {
- edac_mc_handle_ce(mci, 0, 0,
- eccerrlog_syndrome(log),
- eccerrlog_row(channel, log), 0, "x38 CE");
+ edac_mc_handle_error(HW_EVENT_ERR_CORRECTED, mci, 1,
+ 0, 0, eccerrlog_syndrome(log),
+ eccerrlog_row(channel, log),
+ -1, -1,
+ "x38 CE", "");
}
}
}
@@ -236,13 +243,12 @@ static void x38_check(struct mem_ctl_info *mci)
{
struct x38_error_info info;
- debugf1("MC%d: %s()\n", mci->mc_idx, __func__);
+ edac_dbg(1, "MC%d\n", mci->mc_idx);
x38_get_and_clear_error_info(mci, &info);
x38_process_error_info(mci, &info);
}
-
-void __iomem *x38_map_mchbar(struct pci_dev *pdev)
+static void __iomem *x38_map_mchbar(struct pci_dev *pdev)
{
union {
u64 mchbar;
@@ -317,14 +323,14 @@ static unsigned long drb_to_nr_pages(
static int x38_probe1(struct pci_dev *pdev, int dev_idx)
{
int rc;
- int i;
+ int i, j;
struct mem_ctl_info *mci = NULL;
- unsigned long last_page;
+ struct edac_mc_layer layers[2];
u16 drbs[X38_CHANNELS][X38_RANKS_PER_CHANNEL];
bool stacked;
void __iomem *window;
- debugf0("MC: %s()\n", __func__);
+ edac_dbg(0, "MC:\n");
window = x38_map_mchbar(pdev);
if (!window)
@@ -335,13 +341,19 @@ static int x38_probe1(struct pci_dev *pdev, int dev_idx)
how_many_channel(pdev);
/* FIXME: unconventional pvt_info usage */
- mci = edac_mc_alloc(0, X38_RANKS, x38_channel_num, 0);
+ layers[0].type = EDAC_MC_LAYER_CHIP_SELECT;
+ layers[0].size = X38_RANKS;
+ layers[0].is_virt_csrow = true;
+ layers[1].type = EDAC_MC_LAYER_CHANNEL;
+ layers[1].size = x38_channel_num;
+ layers[1].is_virt_csrow = false;
+ mci = edac_mc_alloc(0, ARRAY_SIZE(layers), layers, 0);
if (!mci)
return -ENOMEM;
- debugf3("MC: %s(): init mci\n", __func__);
+ edac_dbg(3, "MC: init mci\n");
- mci->dev = &pdev->dev;
+ mci->pdev = &pdev->dev;
mci->mtype_cap = MEM_FLAG_DDR2;
mci->edac_ctl_cap = EDAC_FLAG_SECDED;
@@ -363,41 +375,38 @@ static int x38_probe1(struct pci_dev *pdev, int dev_idx)
* cumulative; the last one will contain the total memory
* contained in all ranks.
*/
- last_page = -1UL;
for (i = 0; i < mci->nr_csrows; i++) {
unsigned long nr_pages;
- struct csrow_info *csrow = &mci->csrows[i];
+ struct csrow_info *csrow = mci->csrows[i];
nr_pages = drb_to_nr_pages(drbs, stacked,
i / X38_RANKS_PER_CHANNEL,
i % X38_RANKS_PER_CHANNEL);
- if (nr_pages == 0) {
- csrow->mtype = MEM_EMPTY;
+ if (nr_pages == 0)
continue;
- }
- csrow->first_page = last_page + 1;
- last_page += nr_pages;
- csrow->last_page = last_page;
- csrow->nr_pages = nr_pages;
+ for (j = 0; j < x38_channel_num; j++) {
+ struct dimm_info *dimm = csrow->channels[j]->dimm;
- csrow->grain = nr_pages << PAGE_SHIFT;
- csrow->mtype = MEM_DDR2;
- csrow->dtype = DEV_UNKNOWN;
- csrow->edac_mode = EDAC_UNKNOWN;
+ dimm->nr_pages = nr_pages / x38_channel_num;
+ dimm->grain = nr_pages << PAGE_SHIFT;
+ dimm->mtype = MEM_DDR2;
+ dimm->dtype = DEV_UNKNOWN;
+ dimm->edac_mode = EDAC_UNKNOWN;
+ }
}
x38_clear_error_info(mci);
rc = -ENODEV;
if (edac_mc_add_mc(mci)) {
- debugf3("MC: %s(): failed edac_mc_add_mc()\n", __func__);
+ edac_dbg(3, "MC: failed edac_mc_add_mc()\n");
goto fail;
}
/* get this far and it's successful */
- debugf3("MC: %s(): success\n", __func__);
+ edac_dbg(3, "MC: success\n");
return 0;
fail:
@@ -408,12 +417,11 @@ fail:
return rc;
}
-static int __devinit x38_init_one(struct pci_dev *pdev,
- const struct pci_device_id *ent)
+static int x38_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
{
int rc;
- debugf0("MC: %s()\n", __func__);
+ edac_dbg(0, "MC:\n");
if (pci_enable_device(pdev) < 0)
return -EIO;
@@ -425,11 +433,11 @@ static int __devinit x38_init_one(struct pci_dev *pdev,
return rc;
}
-static void __devexit x38_remove_one(struct pci_dev *pdev)
+static void x38_remove_one(struct pci_dev *pdev)
{
struct mem_ctl_info *mci;
- debugf0("%s()\n", __func__);
+ edac_dbg(0, "\n");
mci = edac_mc_del_mc(&pdev->dev);
if (!mci)
@@ -440,7 +448,7 @@ static void __devexit x38_remove_one(struct pci_dev *pdev)
edac_mc_free(mci);
}
-static const struct pci_device_id x38_pci_tbl[] __devinitdata = {
+static const struct pci_device_id x38_pci_tbl[] = {
{
PCI_VEND_DEV(INTEL, X38_HB), PCI_ANY_ID, PCI_ANY_ID, 0, 0,
X38},
@@ -454,7 +462,7 @@ MODULE_DEVICE_TABLE(pci, x38_pci_tbl);
static struct pci_driver x38_driver = {
.name = EDAC_MOD_STR,
.probe = x38_init_one,
- .remove = __devexit_p(x38_remove_one),
+ .remove = x38_remove_one,
.id_table = x38_pci_tbl,
};
@@ -462,7 +470,7 @@ static int __init x38_init(void)
{
int pci_rc;
- debugf3("MC: %s()\n", __func__);
+ edac_dbg(3, "MC:\n");
/* Ensure that the OPSTATE is set correctly for POLL or NMI */
opstate_init();
@@ -476,14 +484,14 @@ static int __init x38_init(void)
mci_pdev = pci_get_device(PCI_VENDOR_ID_INTEL,
PCI_DEVICE_ID_INTEL_X38_HB, NULL);
if (!mci_pdev) {
- debugf0("x38 pci_get_device fail\n");
+ edac_dbg(0, "x38 pci_get_device fail\n");
pci_rc = -ENODEV;
goto fail1;
}
pci_rc = x38_init_one(mci_pdev, x38_pci_tbl);
if (pci_rc < 0) {
- debugf0("x38 init fail\n");
+ edac_dbg(0, "x38 init fail\n");
pci_rc = -ENODEV;
goto fail1;
}
@@ -503,7 +511,7 @@ fail0:
static void __exit x38_exit(void)
{
- debugf3("MC: %s()\n", __func__);
+ edac_dbg(3, "MC:\n");
pci_unregister_driver(&x38_driver);
if (!x38_registered) {