diff options
Diffstat (limited to 'drivers/edac')
52 files changed, 8200 insertions, 4785 deletions
diff --git a/drivers/edac/Kconfig b/drivers/edac/Kconfig index 5948a2194f5..878f09005fa 100644 --- a/drivers/edac/Kconfig +++ b/drivers/edac/Kconfig @@ -4,10 +4,13 @@ # Licensed and distributed under the GPL # +config EDAC_SUPPORT + bool + menuconfig EDAC bool "EDAC (Error Detection And Correction) reporting" depends on HAS_IOMEM - depends on X86 || PPC || TILE + depends on X86 || PPC || TILE || ARM || EDAC_SUPPORT help EDAC is designed to report errors in the core system. These are low-level errors that are reported in the CPU or @@ -29,15 +32,21 @@ menuconfig EDAC if EDAC -comment "Reporting subsystems" +config EDAC_LEGACY_SYSFS + bool "EDAC legacy sysfs" + default y + help + Enable the compatibility sysfs nodes. + Use 'Y' if your edac utilities aren't ported to work with the newer + structures. config EDAC_DEBUG bool "Debugging" help - This turns on debugging information for the entire EDAC - sub-system. You can insert module with "debug_level=x", current - there're four debug levels (x=0,1,2,3 from low to high). - Usually you should select 'N'. + This turns on debugging information for the entire EDAC subsystem. + You do so by inserting edac_module with "edac_debug_level=x." Valid + levels are 0-4 (from low to high) and by default it is set to 2. + Usually you should select 'N' here. config EDAC_DECODE_MCE tristate "Decode MCEs in human-readable form (only on AMD for now)" @@ -71,6 +80,29 @@ config EDAC_MM_EDAC occurred so that a particular failing memory module can be replaced. If unsure, select 'Y'. +config EDAC_GHES + bool "Output ACPI APEI/GHES BIOS detected errors via EDAC" + depends on ACPI_APEI_GHES && (EDAC_MM_EDAC=y) + default y + help + Not all machines support hardware-driven error report. Some of those + provide a BIOS-driven error report mechanism via ACPI, using the + APEI/GHES driver. By enabling this option, the error reports provided + by GHES are sent to userspace via the EDAC API. + + When this option is enabled, it will disable the hardware-driven + mechanisms, if a GHES BIOS is detected, entering into the + "Firmware First" mode. + + It should be noticed that keeping both GHES and a hardware-driven + error mechanism won't work well, as BIOS will race with OS, while + reading the error registers. So, if you want to not use "Firmware + first" GHES error mechanism, you should disable GHES either at + compilation time or by passing "ghes.disable=1" Kernel parameter + at boot time. + + In doubt, say 'Y'. + config EDAC_AMD64 tristate "AMD64 (Opteron, Athlon64) K8, F10h" depends on EDAC_MM_EDAC && AMD_NB && X86_64 && EDAC_DECODE_MCE @@ -113,7 +145,7 @@ config EDAC_E7XXX config EDAC_E752X tristate "Intel e752x (e7520, e7525, e7320) and 3100" - depends on EDAC_MM_EDAC && PCI && X86 && HOTPLUG + depends on EDAC_MM_EDAC && PCI && X86 help Support for error detection and correction on the Intel E7520, E7525, E7320 server chipsets. @@ -149,7 +181,7 @@ config EDAC_I3000 config EDAC_I3200 tristate "Intel 3200" - depends on EDAC_MM_EDAC && PCI && X86 && EXPERIMENTAL + depends on EDAC_MM_EDAC && PCI && X86 help Support for error detection and correction on the Intel 3200 and 3210 server chipsets. @@ -215,7 +247,7 @@ config EDAC_I7300 config EDAC_SBRIDGE tristate "Intel Sandy-Bridge Integrated MC" depends on EDAC_MM_EDAC && PCI && X86_64 && X86_MCE_INTEL - depends on EXPERIMENTAL + depends on PCI_MMCONFIG help Support for error detection and correction the Intel Sandy Bridge Integrated Memory Controller. @@ -294,4 +326,46 @@ config EDAC_TILE Support for error detection and correction on the Tilera memory controller. +config EDAC_HIGHBANK_MC + tristate "Highbank Memory Controller" + depends on EDAC_MM_EDAC && ARCH_HIGHBANK + help + Support for error detection and correction on the + Calxeda Highbank memory controller. + +config EDAC_HIGHBANK_L2 + tristate "Highbank L2 Cache" + depends on EDAC_MM_EDAC && ARCH_HIGHBANK + help + Support for error detection and correction on the + Calxeda Highbank memory controller. + +config EDAC_OCTEON_PC + tristate "Cavium Octeon Primary Caches" + depends on EDAC_MM_EDAC && CPU_CAVIUM_OCTEON + help + Support for error detection and correction on the primary caches of + the cnMIPS cores of Cavium Octeon family SOCs. + +config EDAC_OCTEON_L2C + tristate "Cavium Octeon Secondary Caches (L2C)" + depends on EDAC_MM_EDAC && CAVIUM_OCTEON_SOC + help + Support for error detection and correction on the + Cavium Octeon family of SOCs. + +config EDAC_OCTEON_LMC + tristate "Cavium Octeon DRAM Memory Controller (LMC)" + depends on EDAC_MM_EDAC && CAVIUM_OCTEON_SOC + help + Support for error detection and correction on the + Cavium Octeon family of SOCs. + +config EDAC_OCTEON_PCI + tristate "Cavium Octeon PCI Controller" + depends on EDAC_MM_EDAC && PCI && CAVIUM_OCTEON_SOC + help + Support for error detection and correction on the + Cavium Octeon family of SOCs. + endif # EDAC diff --git a/drivers/edac/Makefile b/drivers/edac/Makefile index 196a63dd37c..4154ed6a02c 100644 --- a/drivers/edac/Makefile +++ b/drivers/edac/Makefile @@ -16,6 +16,7 @@ ifdef CONFIG_PCI edac_core-y += edac_pci.o edac_pci_sysfs.o endif +obj-$(CONFIG_EDAC_GHES) += ghes_edac.o obj-$(CONFIG_EDAC_MCE_INJ) += mce_amd_inj.o edac_mce_amd-y := mce_amd.o @@ -55,3 +56,11 @@ obj-$(CONFIG_EDAC_AMD8111) += amd8111_edac.o obj-$(CONFIG_EDAC_AMD8131) += amd8131_edac.o obj-$(CONFIG_EDAC_TILE) += tile_edac.o + +obj-$(CONFIG_EDAC_HIGHBANK_MC) += highbank_mc_edac.o +obj-$(CONFIG_EDAC_HIGHBANK_L2) += highbank_l2_edac.o + +obj-$(CONFIG_EDAC_OCTEON_PC) += octeon_edac-pc.o +obj-$(CONFIG_EDAC_OCTEON_L2C) += octeon_edac-l2c.o +obj-$(CONFIG_EDAC_OCTEON_LMC) += octeon_edac-lmc.o +obj-$(CONFIG_EDAC_OCTEON_PCI) += octeon_edac-pci.o diff --git a/drivers/edac/amd64_edac.c b/drivers/edac/amd64_edac.c index c9eee6d33e9..f8bf00010d4 100644 --- a/drivers/edac/amd64_edac.c +++ b/drivers/edac/amd64_edac.c @@ -1,7 +1,7 @@ #include "amd64_edac.h" #include <asm/amd_nb.h> -static struct edac_pci_ctl_info *amd64_ctl_pci; +static struct edac_pci_ctl_info *pci_ctl; static int report_gart_errors; module_param(report_gart_errors, int, 0644); @@ -31,7 +31,7 @@ static struct ecc_settings **ecc_stngs; * *FIXME: Produce a better mapping/linearisation. */ -struct scrubrate { +static const struct scrubrate { u32 scrubval; /* bit pattern for scrub rate */ u32 bandwidth; /* bandwidth consumed (bytes/sec) */ } scrubrates[] = { @@ -60,8 +60,8 @@ struct scrubrate { { 0x00, 0UL}, /* scrubbing off */ }; -static int __amd64_read_pci_cfg_dword(struct pci_dev *pdev, int offset, - u32 *val, const char *func) +int __amd64_read_pci_cfg_dword(struct pci_dev *pdev, int offset, + u32 *val, const char *func) { int err = 0; @@ -98,6 +98,7 @@ int __amd64_write_pci_cfg_dword(struct pci_dev *pdev, int offset, * * F15h: we select which DCT we access using F1x10C[DctCfgSel] * + * F16h: has only 1 DCT */ static int k8_read_dct_pci_cfg(struct amd64_pvt *pvt, int addr, u32 *val, const char *func) @@ -122,7 +123,7 @@ static void f15h_select_dct(struct amd64_pvt *pvt, u8 dct) u32 reg = 0; amd64_read_pci_cfg(pvt->F1, DCT_CFG_SEL, ®); - reg &= 0xfffffffe; + reg &= (pvt->model >= 0x30) ? ~3 : ~1; reg |= dct; amd64_write_pci_cfg(pvt->F1, DCT_CFG_SEL, reg); } @@ -132,8 +133,9 @@ static int f15_read_dct_pci_cfg(struct amd64_pvt *pvt, int addr, u32 *val, { u8 dct = 0; + /* For F15 M30h, the second dct is DCT 3, refer to BKDG Section 2.10 */ if (addr >= 0x140 && addr <= 0x1a0) { - dct = 1; + dct = (pvt->model >= 0x30) ? 3 : 1; addr -= 0x100; } @@ -160,7 +162,7 @@ static int f15_read_dct_pci_cfg(struct amd64_pvt *pvt, int addr, u32 *val, * scan the scrub rate mapping table for a close or matching bandwidth value to * issue. If requested is too big, then use last maximum value found. */ -static int __amd64_set_scrub_rate(struct pci_dev *ctl, u32 new_bw, u32 min_rate) +static int __set_scrub_rate(struct pci_dev *ctl, u32 new_bw, u32 min_rate) { u32 scrubval; int i; @@ -170,8 +172,11 @@ static int __amd64_set_scrub_rate(struct pci_dev *ctl, u32 new_bw, u32 min_rate) * memory controller and apply to register. Search for the first * bandwidth entry that is greater or equal than the setting requested * and program that. If at last entry, turn off DRAM scrubbing. + * + * If no suitable bandwidth is found, turn off DRAM scrubbing entirely + * by falling back to the last element in scrubrates[]. */ - for (i = 0; i < ARRAY_SIZE(scrubrates); i++) { + for (i = 0; i < ARRAY_SIZE(scrubrates) - 1; i++) { /* * skip scrub rates which aren't recommended * (see F10 BKDG, F3x58) @@ -181,12 +186,6 @@ static int __amd64_set_scrub_rate(struct pci_dev *ctl, u32 new_bw, u32 min_rate) if (scrubrates[i].bandwidth <= new_bw) break; - - /* - * if no suitable bandwidth found, turn off DRAM scrubbing - * entirely by falling back to the last element in the - * scrubrates array. - */ } scrubval = scrubrates[i].scrubval; @@ -199,29 +198,29 @@ static int __amd64_set_scrub_rate(struct pci_dev *ctl, u32 new_bw, u32 min_rate) return 0; } -static int amd64_set_scrub_rate(struct mem_ctl_info *mci, u32 bw) +static int set_scrub_rate(struct mem_ctl_info *mci, u32 bw) { struct amd64_pvt *pvt = mci->pvt_info; u32 min_scrubrate = 0x5; - if (boot_cpu_data.x86 == 0xf) + if (pvt->fam == 0xf) min_scrubrate = 0x0; - /* F15h Erratum #505 */ - if (boot_cpu_data.x86 == 0x15) + /* Erratum #505 */ + if (pvt->fam == 0x15 && pvt->model < 0x10) f15h_select_dct(pvt, 0); - return __amd64_set_scrub_rate(pvt->F3, bw, min_scrubrate); + return __set_scrub_rate(pvt->F3, bw, min_scrubrate); } -static int amd64_get_scrub_rate(struct mem_ctl_info *mci) +static int get_scrub_rate(struct mem_ctl_info *mci) { struct amd64_pvt *pvt = mci->pvt_info; u32 scrubval = 0; int i, retval = -EINVAL; - /* F15h Erratum #505 */ - if (boot_cpu_data.x86 == 0x15) + /* Erratum #505 */ + if (pvt->fam == 0x15 && pvt->model < 0x10) f15h_select_dct(pvt, 0); amd64_read_pci_cfg(pvt->F3, SCRCTRL, &scrubval); @@ -241,8 +240,7 @@ static int amd64_get_scrub_rate(struct mem_ctl_info *mci) * returns true if the SysAddr given by sys_addr matches the * DRAM base/limit associated with node_id */ -static bool amd64_base_limit_match(struct amd64_pvt *pvt, u64 sys_addr, - unsigned nid) +static bool base_limit_match(struct amd64_pvt *pvt, u64 sys_addr, u8 nid) { u64 addr; @@ -268,7 +266,7 @@ static struct mem_ctl_info *find_mc_by_sys_addr(struct mem_ctl_info *mci, u64 sys_addr) { struct amd64_pvt *pvt; - unsigned node_id; + u8 node_id; u32 intlv_en, bits; /* @@ -286,7 +284,7 @@ static struct mem_ctl_info *find_mc_by_sys_addr(struct mem_ctl_info *mci, if (intlv_en == 0) { for (node_id = 0; node_id < DRAM_RANGES; node_id++) { - if (amd64_base_limit_match(pvt, sys_addr, node_id)) + if (base_limit_match(pvt, sys_addr, node_id)) goto found; } goto err_no_match; @@ -310,7 +308,7 @@ static struct mem_ctl_info *find_mc_by_sys_addr(struct mem_ctl_info *mci, } /* sanity test for sys_addr */ - if (unlikely(!amd64_base_limit_match(pvt, sys_addr, node_id))) { + if (unlikely(!base_limit_match(pvt, sys_addr, node_id))) { amd64_warn("%s: sys_addr 0x%llx falls outside base/limit address" "range for node %d with node interleaving enabled.\n", __func__, sys_addr, node_id); @@ -321,8 +319,8 @@ found: return edac_mc_find((int)node_id); err_no_match: - debugf2("sys_addr 0x%lx doesn't match any node\n", - (unsigned long)sys_addr); + edac_dbg(2, "sys_addr 0x%lx doesn't match any node\n", + (unsigned long)sys_addr); return NULL; } @@ -337,21 +335,45 @@ static void get_cs_base_and_mask(struct amd64_pvt *pvt, int csrow, u8 dct, u64 csbase, csmask, base_bits, mask_bits; u8 addr_shift; - if (boot_cpu_data.x86 == 0xf && pvt->ext_model < K8_REV_F) { + if (pvt->fam == 0xf && pvt->ext_model < K8_REV_F) { csbase = pvt->csels[dct].csbases[csrow]; csmask = pvt->csels[dct].csmasks[csrow]; - base_bits = GENMASK(21, 31) | GENMASK(9, 15); - mask_bits = GENMASK(21, 29) | GENMASK(9, 15); + base_bits = GENMASK_ULL(31, 21) | GENMASK_ULL(15, 9); + mask_bits = GENMASK_ULL(29, 21) | GENMASK_ULL(15, 9); addr_shift = 4; + + /* + * F16h and F15h, models 30h and later need two addr_shift values: + * 8 for high and 6 for low (cf. F16h BKDG). + */ + } else if (pvt->fam == 0x16 || + (pvt->fam == 0x15 && pvt->model >= 0x30)) { + csbase = pvt->csels[dct].csbases[csrow]; + csmask = pvt->csels[dct].csmasks[csrow >> 1]; + + *base = (csbase & GENMASK_ULL(15, 5)) << 6; + *base |= (csbase & GENMASK_ULL(30, 19)) << 8; + + *mask = ~0ULL; + /* poke holes for the csmask */ + *mask &= ~((GENMASK_ULL(15, 5) << 6) | + (GENMASK_ULL(30, 19) << 8)); + + *mask |= (csmask & GENMASK_ULL(15, 5)) << 6; + *mask |= (csmask & GENMASK_ULL(30, 19)) << 8; + + return; } else { csbase = pvt->csels[dct].csbases[csrow]; csmask = pvt->csels[dct].csmasks[csrow >> 1]; addr_shift = 8; - if (boot_cpu_data.x86 == 0x15) - base_bits = mask_bits = GENMASK(19,30) | GENMASK(5,13); + if (pvt->fam == 0x15) + base_bits = mask_bits = + GENMASK_ULL(30,19) | GENMASK_ULL(13,5); else - base_bits = mask_bits = GENMASK(19,28) | GENMASK(5,13); + base_bits = mask_bits = + GENMASK_ULL(28,19) | GENMASK_ULL(13,5); } *base = (csbase & base_bits) << addr_shift; @@ -393,15 +415,15 @@ static int input_addr_to_csrow(struct mem_ctl_info *mci, u64 input_addr) mask = ~mask; if ((input_addr & mask) == (base & mask)) { - debugf2("InputAddr 0x%lx matches csrow %d (node %d)\n", - (unsigned long)input_addr, csrow, - pvt->mc_node_id); + edac_dbg(2, "InputAddr 0x%lx matches csrow %d (node %d)\n", + (unsigned long)input_addr, csrow, + pvt->mc_node_id); return csrow; } } - debugf2("no matching csrow for InputAddr 0x%lx (MC node %d)\n", - (unsigned long)input_addr, pvt->mc_node_id); + edac_dbg(2, "no matching csrow for InputAddr 0x%lx (MC node %d)\n", + (unsigned long)input_addr, pvt->mc_node_id); return -1; } @@ -426,24 +448,23 @@ int amd64_get_dram_hole_info(struct mem_ctl_info *mci, u64 *hole_base, u64 *hole_offset, u64 *hole_size) { struct amd64_pvt *pvt = mci->pvt_info; - u64 base; /* only revE and later have the DRAM Hole Address Register */ - if (boot_cpu_data.x86 == 0xf && pvt->ext_model < K8_REV_E) { - debugf1(" revision %d for node %d does not support DHAR\n", - pvt->ext_model, pvt->mc_node_id); + if (pvt->fam == 0xf && pvt->ext_model < K8_REV_E) { + edac_dbg(1, " revision %d for node %d does not support DHAR\n", + pvt->ext_model, pvt->mc_node_id); return 1; } /* valid for Fam10h and above */ - if (boot_cpu_data.x86 >= 0x10 && !dhar_mem_hoist_valid(pvt)) { - debugf1(" Dram Memory Hoisting is DISABLED on this system\n"); + if (pvt->fam >= 0x10 && !dhar_mem_hoist_valid(pvt)) { + edac_dbg(1, " Dram Memory Hoisting is DISABLED on this system\n"); return 1; } if (!dhar_valid(pvt)) { - debugf1(" Dram Memory Hoisting is DISABLED on this node %d\n", - pvt->mc_node_id); + edac_dbg(1, " Dram Memory Hoisting is DISABLED on this node %d\n", + pvt->mc_node_id); return 1; } @@ -465,19 +486,15 @@ int amd64_get_dram_hole_info(struct mem_ctl_info *mci, u64 *hole_base, * addresses in the hole so that they start at 0x100000000. */ - base = dhar_base(pvt); - - *hole_base = base; - *hole_size = (0x1ull << 32) - base; + *hole_base = dhar_base(pvt); + *hole_size = (1ULL << 32) - *hole_base; - if (boot_cpu_data.x86 > 0xf) - *hole_offset = f10_dhar_offset(pvt); - else - *hole_offset = k8_dhar_offset(pvt); + *hole_offset = (pvt->fam > 0xf) ? f10_dhar_offset(pvt) + : k8_dhar_offset(pvt); - debugf1(" DHAR info for node %d base 0x%lx offset 0x%lx size 0x%lx\n", - pvt->mc_node_id, (unsigned long)*hole_base, - (unsigned long)*hole_offset, (unsigned long)*hole_size); + edac_dbg(1, " DHAR info for node %d base 0x%lx offset 0x%lx size 0x%lx\n", + pvt->mc_node_id, (unsigned long)*hole_base, + (unsigned long)*hole_offset, (unsigned long)*hole_size); return 0; } @@ -516,22 +533,21 @@ static u64 sys_addr_to_dram_addr(struct mem_ctl_info *mci, u64 sys_addr) { struct amd64_pvt *pvt = mci->pvt_info; u64 dram_base, hole_base, hole_offset, hole_size, dram_addr; - int ret = 0; + int ret; dram_base = get_dram_base(pvt, pvt->mc_node_id); ret = amd64_get_dram_hole_info(mci, &hole_base, &hole_offset, &hole_size); if (!ret) { - if ((sys_addr >= (1ull << 32)) && - (sys_addr < ((1ull << 32) + hole_size))) { + if ((sys_addr >= (1ULL << 32)) && + (sys_addr < ((1ULL << 32) + hole_size))) { /* use DHAR to translate SysAddr to DramAddr */ dram_addr = sys_addr - hole_offset; - debugf2("using DHAR to translate SysAddr 0x%lx to " - "DramAddr 0x%lx\n", - (unsigned long)sys_addr, - (unsigned long)dram_addr); + edac_dbg(2, "using DHAR to translate SysAddr 0x%lx to DramAddr 0x%lx\n", + (unsigned long)sys_addr, + (unsigned long)dram_addr); return dram_addr; } @@ -546,11 +562,10 @@ static u64 sys_addr_to_dram_addr(struct mem_ctl_info *mci, u64 sys_addr) * section 3.4.2 of AMD publication 24592: AMD x86-64 Architecture * Programmer's Manual Volume 1 Application Programming. */ - dram_addr = (sys_addr & GENMASK(0, 39)) - dram_base; + dram_addr = (sys_addr & GENMASK_ULL(39, 0)) - dram_base; - debugf2("using DRAM Base register to translate SysAddr 0x%lx to " - "DramAddr 0x%lx\n", (unsigned long)sys_addr, - (unsigned long)dram_addr); + edac_dbg(2, "using DRAM Base register to translate SysAddr 0x%lx to DramAddr 0x%lx\n", + (unsigned long)sys_addr, (unsigned long)dram_addr); return dram_addr; } @@ -583,12 +598,12 @@ static u64 dram_addr_to_input_addr(struct mem_ctl_info *mci, u64 dram_addr) * concerning translating a DramAddr to an InputAddr. */ intlv_shift = num_node_interleave_bits(dram_intlv_en(pvt, 0)); - input_addr = ((dram_addr >> intlv_shift) & GENMASK(12, 35)) + + input_addr = ((dram_addr >> intlv_shift) & GENMASK_ULL(35, 12)) + (dram_addr & 0xfff); - debugf2(" Intlv Shift=%d DramAddr=0x%lx maps to InputAddr=0x%lx\n", - intlv_shift, (unsigned long)dram_addr, - (unsigned long)input_addr); + edac_dbg(2, " Intlv Shift=%d DramAddr=0x%lx maps to InputAddr=0x%lx\n", + intlv_shift, (unsigned long)dram_addr, + (unsigned long)input_addr); return input_addr; } @@ -604,142 +619,18 @@ static u64 sys_addr_to_input_addr(struct mem_ctl_info *mci, u64 sys_addr) input_addr = dram_addr_to_input_addr(mci, sys_addr_to_dram_addr(mci, sys_addr)); - debugf2("SysAdddr 0x%lx translates to InputAddr 0x%lx\n", - (unsigned long)sys_addr, (unsigned long)input_addr); + edac_dbg(2, "SysAdddr 0x%lx translates to InputAddr 0x%lx\n", + (unsigned long)sys_addr, (unsigned long)input_addr); return input_addr; } - -/* - * @input_addr is an InputAddr associated with the node represented by mci. - * Translate @input_addr to a DramAddr and return the result. - */ -static u64 input_addr_to_dram_addr(struct mem_ctl_info *mci, u64 input_addr) -{ - struct amd64_pvt *pvt; - unsigned node_id, intlv_shift; - u64 bits, dram_addr; - u32 intlv_sel; - - /* - * Near the start of section 3.4.4 (p. 70, BKDG #26094, K8, revA-E) - * shows how to translate a DramAddr to an InputAddr. Here we reverse - * this procedure. When translating from a DramAddr to an InputAddr, the - * bits used for node interleaving are discarded. Here we recover these - * bits from the IntlvSel field of the DRAM Limit register (section - * 3.4.4.2) for the node that input_addr is associated with. - */ - pvt = mci->pvt_info; - node_id = pvt->mc_node_id; - - BUG_ON(node_id > 7); - - intlv_shift = num_node_interleave_bits(dram_intlv_en(pvt, 0)); - if (intlv_shift == 0) { - debugf1(" InputAddr 0x%lx translates to DramAddr of " - "same value\n", (unsigned long)input_addr); - - return input_addr; - } - - bits = ((input_addr & GENMASK(12, 35)) << intlv_shift) + - (input_addr & 0xfff); - - intlv_sel = dram_intlv_sel(pvt, node_id) & ((1 << intlv_shift) - 1); - dram_addr = bits + (intlv_sel << 12); - - debugf1("InputAddr 0x%lx translates to DramAddr 0x%lx " - "(%d node interleave bits)\n", (unsigned long)input_addr, - (unsigned long)dram_addr, intlv_shift); - - return dram_addr; -} - -/* - * @dram_addr is a DramAddr that maps to the node represented by mci. Convert - * @dram_addr to a SysAddr. - */ -static u64 dram_addr_to_sys_addr(struct mem_ctl_info *mci, u64 dram_addr) -{ - struct amd64_pvt *pvt = mci->pvt_info; - u64 hole_base, hole_offset, hole_size, base, sys_addr; - int ret = 0; - - ret = amd64_get_dram_hole_info(mci, &hole_base, &hole_offset, - &hole_size); - if (!ret) { - if ((dram_addr >= hole_base) && - (dram_addr < (hole_base + hole_size))) { - sys_addr = dram_addr + hole_offset; - - debugf1("using DHAR to translate DramAddr 0x%lx to " - "SysAddr 0x%lx\n", (unsigned long)dram_addr, - (unsigned long)sys_addr); - - return sys_addr; - } - } - - base = get_dram_base(pvt, pvt->mc_node_id); - sys_addr = dram_addr + base; - - /* - * The sys_addr we have computed up to this point is a 40-bit value - * because the k8 deals with 40-bit values. However, the value we are - * supposed to return is a full 64-bit physical address. The AMD - * x86-64 architecture specifies that the most significant implemented - * address bit through bit 63 of a physical address must be either all - * 0s or all 1s. Therefore we sign-extend the 40-bit sys_addr to a - * 64-bit value below. See section 3.4.2 of AMD publication 24592: - * AMD x86-64 Architecture Programmer's Manual Volume 1 Application - * Programming. - */ - sys_addr |= ~((sys_addr & (1ull << 39)) - 1); - - debugf1(" Node %d, DramAddr 0x%lx to SysAddr 0x%lx\n", - pvt->mc_node_id, (unsigned long)dram_addr, - (unsigned long)sys_addr); - - return sys_addr; -} - -/* - * @input_addr is an InputAddr associated with the node given by mci. Translate - * @input_addr to a SysAddr. - */ -static inline u64 input_addr_to_sys_addr(struct mem_ctl_info *mci, - u64 input_addr) -{ - return dram_addr_to_sys_addr(mci, - input_addr_to_dram_addr(mci, input_addr)); -} - -/* - * Find the minimum and maximum InputAddr values that map to the given @csrow. - * Pass back these values in *input_addr_min and *input_addr_max. - */ -static void find_csrow_limits(struct mem_ctl_info *mci, int csrow, - u64 *input_addr_min, u64 *input_addr_max) -{ - struct amd64_pvt *pvt; - u64 base, mask; - - pvt = mci->pvt_info; - BUG_ON((csrow < 0) || (csrow >= pvt->csels[0].b_cnt)); - - get_cs_base_and_mask(pvt, csrow, 0, &base, &mask); - - *input_addr_min = base & ~mask; - *input_addr_max = base | mask; -} - /* Map the Error address to a PAGE and PAGE OFFSET. */ static inline void error_address_to_page_and_offset(u64 error_address, - u32 *page, u32 *offset) + struct err_info *err) { - *page = (u32) (error_address >> PAGE_SHIFT); - *offset = ((u32) error_address) & ~PAGE_MASK; + err->page = (u32) (error_address >> PAGE_SHIFT); + err->offset = ((u32) error_address) & ~PAGE_MASK; } /* @@ -768,12 +659,12 @@ static int get_channel_from_ecc_syndrome(struct mem_ctl_info *, u16); * Determine if the DIMMs have ECC enabled. ECC is enabled ONLY if all the DIMMs * are ECC capable. */ -static unsigned long amd64_determine_edac_cap(struct amd64_pvt *pvt) +static unsigned long determine_edac_cap(struct amd64_pvt *pvt) { u8 bit; unsigned long edac_cap = EDAC_FLAG_NONE; - bit = (boot_cpu_data.x86 > 0xf || pvt->ext_model >= K8_REV_F) + bit = (pvt->fam > 0xf || pvt->ext_model >= K8_REV_F) ? 19 : 17; @@ -783,77 +674,79 @@ static unsigned long amd64_determine_edac_cap(struct amd64_pvt *pvt) return edac_cap; } -static void amd64_debug_display_dimm_sizes(struct amd64_pvt *, u8); +static void debug_display_dimm_sizes(struct amd64_pvt *, u8); -static void amd64_dump_dramcfg_low(u32 dclr, int chan) +static void debug_dump_dramcfg_low(struct amd64_pvt *pvt, u32 dclr, int chan) { - debugf1("F2x%d90 (DRAM Cfg Low): 0x%08x\n", chan, dclr); + edac_dbg(1, "F2x%d90 (DRAM Cfg Low): 0x%08x\n", chan, dclr); - debugf1(" DIMM type: %sbuffered; all DIMMs support ECC: %s\n", - (dclr & BIT(16)) ? "un" : "", - (dclr & BIT(19)) ? "yes" : "no"); + edac_dbg(1, " DIMM type: %sbuffered; all DIMMs support ECC: %s\n", + (dclr & BIT(16)) ? "un" : "", + (dclr & BIT(19)) ? "yes" : "no"); - debugf1(" PAR/ERR parity: %s\n", - (dclr & BIT(8)) ? "enabled" : "disabled"); + edac_dbg(1, " PAR/ERR parity: %s\n", + (dclr & BIT(8)) ? "enabled" : "disabled"); - if (boot_cpu_data.x86 == 0x10) - debugf1(" DCT 128bit mode width: %s\n", - (dclr & BIT(11)) ? "128b" : "64b"); + if (pvt->fam == 0x10) + edac_dbg(1, " DCT 128bit mode width: %s\n", + (dclr & BIT(11)) ? "128b" : "64b"); - debugf1(" x4 logical DIMMs present: L0: %s L1: %s L2: %s L3: %s\n", - (dclr & BIT(12)) ? "yes" : "no", - (dclr & BIT(13)) ? "yes" : "no", - (dclr & BIT(14)) ? "yes" : "no", - (dclr & BIT(15)) ? "yes" : "no"); + edac_dbg(1, " x4 logical DIMMs present: L0: %s L1: %s L2: %s L3: %s\n", + (dclr & BIT(12)) ? "yes" : "no", + (dclr & BIT(13)) ? "yes" : "no", + (dclr & BIT(14)) ? "yes" : "no", + (dclr & BIT(15)) ? "yes" : "no"); } /* Display and decode various NB registers for debug purposes. */ static void dump_misc_regs(struct amd64_pvt *pvt) { - debugf1("F3xE8 (NB Cap): 0x%08x\n", pvt->nbcap); + edac_dbg(1, "F3xE8 (NB Cap): 0x%08x\n", pvt->nbcap); - debugf1(" NB two channel DRAM capable: %s\n", - (pvt->nbcap & NBCAP_DCT_DUAL) ? "yes" : "no"); + edac_dbg(1, " NB two channel DRAM capable: %s\n", + (pvt->nbcap & NBCAP_DCT_DUAL) ? "yes" : "no"); - debugf1(" ECC capable: %s, ChipKill ECC capable: %s\n", - (pvt->nbcap & NBCAP_SECDED) ? "yes" : "no", - (pvt->nbcap & NBCAP_CHIPKILL) ? "yes" : "no"); + edac_dbg(1, " ECC capable: %s, ChipKill ECC capable: %s\n", + (pvt->nbcap & NBCAP_SECDED) ? "yes" : "no", + (pvt->nbcap & NBCAP_CHIPKILL) ? "yes" : "no"); - amd64_dump_dramcfg_low(pvt->dclr0, 0); + debug_dump_dramcfg_low(pvt, pvt->dclr0, 0); - debugf1("F3xB0 (Online Spare): 0x%08x\n", pvt->online_spare); + edac_dbg(1, "F3xB0 (Online Spare): 0x%08x\n", pvt->online_spare); - debugf1("F1xF0 (DRAM Hole Address): 0x%08x, base: 0x%08x, " - "offset: 0x%08x\n", - pvt->dhar, dhar_base(pvt), - (boot_cpu_data.x86 == 0xf) ? k8_dhar_offset(pvt) - : f10_dhar_offset(pvt)); + edac_dbg(1, "F1xF0 (DRAM Hole Address): 0x%08x, base: 0x%08x, offset: 0x%08x\n", + pvt->dhar, dhar_base(pvt), + (pvt->fam == 0xf) ? k8_dhar_offset(pvt) + : f10_dhar_offset(pvt)); - debugf1(" DramHoleValid: %s\n", dhar_valid(pvt) ? "yes" : "no"); + edac_dbg(1, " DramHoleValid: %s\n", dhar_valid(pvt) ? "yes" : "no"); - amd64_debug_display_dimm_sizes(pvt, 0); + debug_display_dimm_sizes(pvt, 0); /* everything below this point is Fam10h and above */ - if (boot_cpu_data.x86 == 0xf) + if (pvt->fam == 0xf) return; - amd64_debug_display_dimm_sizes(pvt, 1); + debug_display_dimm_sizes(pvt, 1); amd64_info("using %s syndromes.\n", ((pvt->ecc_sym_sz == 8) ? "x8" : "x4")); /* Only if NOT ganged does dclr1 have valid info */ if (!dct_ganging_enabled(pvt)) - amd64_dump_dramcfg_low(pvt->dclr1, 1); + debug_dump_dramcfg_low(pvt, pvt->dclr1, 1); } /* - * see BKDG, F2x[1,0][5C:40], F2[1,0][6C:60] + * See BKDG, F2x[1,0][5C:40], F2[1,0][6C:60] */ static void prep_chip_selects(struct amd64_pvt *pvt) { - if (boot_cpu_data.x86 == 0xf && pvt->ext_model < K8_REV_F) { + if (pvt->fam == 0xf && pvt->ext_model < K8_REV_F) { pvt->csels[0].b_cnt = pvt->csels[1].b_cnt = 8; pvt->csels[0].m_cnt = pvt->csels[1].m_cnt = 8; + } else if (pvt->fam == 0x15 && pvt->model >= 0x30) { + pvt->csels[0].b_cnt = pvt->csels[1].b_cnt = 4; + pvt->csels[0].m_cnt = pvt->csels[1].m_cnt = 2; } else { pvt->csels[0].b_cnt = pvt->csels[1].b_cnt = 8; pvt->csels[0].m_cnt = pvt->csels[1].m_cnt = 4; @@ -876,15 +769,15 @@ static void read_dct_base_mask(struct amd64_pvt *pvt) u32 *base1 = &pvt->csels[1].csbases[cs]; if (!amd64_read_dct_pci_cfg(pvt, reg0, base0)) - debugf0(" DCSB0[%d]=0x%08x reg: F2x%x\n", - cs, *base0, reg0); + edac_dbg(0, " DCSB0[%d]=0x%08x reg: F2x%x\n", + cs, *base0, reg0); - if (boot_cpu_data.x86 == 0xf || dct_ganging_enabled(pvt)) + if (pvt->fam == 0xf || dct_ganging_enabled(pvt)) continue; if (!amd64_read_dct_pci_cfg(pvt, reg1, base1)) - debugf0(" DCSB1[%d]=0x%08x reg: F2x%x\n", - cs, *base1, reg1); + edac_dbg(0, " DCSB1[%d]=0x%08x reg: F2x%x\n", + cs, *base1, reg1); } for_each_chip_select_mask(cs, 0, pvt) { @@ -894,26 +787,26 @@ static void read_dct_base_mask(struct amd64_pvt *pvt) u32 *mask1 = &pvt->csels[1].csmasks[cs]; if (!amd64_read_dct_pci_cfg(pvt, reg0, mask0)) - debugf0(" DCSM0[%d]=0x%08x reg: F2x%x\n", - cs, *mask0, reg0); + edac_dbg(0, " DCSM0[%d]=0x%08x reg: F2x%x\n", + cs, *mask0, reg0); - if (boot_cpu_data.x86 == 0xf || dct_ganging_enabled(pvt)) + if (pvt->fam == 0xf || dct_ganging_enabled(pvt)) continue; if (!amd64_read_dct_pci_cfg(pvt, reg1, mask1)) - debugf0(" DCSM1[%d]=0x%08x reg: F2x%x\n", - cs, *mask1, reg1); + edac_dbg(0, " DCSM1[%d]=0x%08x reg: F2x%x\n", + cs, *mask1, reg1); } } -static enum mem_type amd64_determine_memory_type(struct amd64_pvt *pvt, int cs) +static enum mem_type determine_memory_type(struct amd64_pvt *pvt, int cs) { enum mem_type type; /* F15h supports only DDR3 */ - if (boot_cpu_data.x86 >= 0x15) + if (pvt->fam >= 0x15) type = (pvt->dclr0 & BIT(16)) ? MEM_DDR3 : MEM_RDDR3; - else if (boot_cpu_data.x86 == 0x10 || pvt->ext_model >= K8_REV_F) { + else if (pvt->fam == 0x10 || pvt->ext_model >= K8_REV_F) { if (pvt->dchr0 & DDR3_MODE) type = (pvt->dclr0 & BIT(16)) ? MEM_DDR3 : MEM_RDDR3; else @@ -946,30 +839,30 @@ static int k8_early_channel_count(struct amd64_pvt *pvt) } /* On F10h and later ErrAddr is MC4_ADDR[47:1] */ -static u64 get_error_address(struct mce *m) +static u64 get_error_address(struct amd64_pvt *pvt, struct mce *m) { - struct cpuinfo_x86 *c = &boot_cpu_data; u64 addr; u8 start_bit = 1; u8 end_bit = 47; - if (c->x86 == 0xf) { + if (pvt->fam == 0xf) { start_bit = 3; end_bit = 39; } - addr = m->addr & GENMASK(start_bit, end_bit); + addr = m->addr & GENMASK_ULL(end_bit, start_bit); /* * Erratum 637 workaround */ - if (c->x86 == 0x15) { + if (pvt->fam == 0x15) { struct amd64_pvt *pvt; u64 cc6_base, tmp_addr; u32 tmp; - u8 mce_nid, intlv_en; + u16 mce_nid; + u8 intlv_en; - if ((addr & GENMASK(24, 47)) >> 24 != 0x00fdf7) + if ((addr & GENMASK_ULL(47, 24)) >> 24 != 0x00fdf7) return addr; mce_nid = amd_get_nb_id(m->extcpu); @@ -979,7 +872,7 @@ static u64 get_error_address(struct mce *m) intlv_en = tmp >> 21 & 0x7; /* add [47:27] + 3 trailing bits */ - cc6_base = (tmp & GENMASK(0, 20)) << 3; + cc6_base = (tmp & GENMASK_ULL(20, 0)) << 3; /* reverse and add DramIntlvEn */ cc6_base |= intlv_en ^ 0x7; @@ -988,18 +881,18 @@ static u64 get_error_address(struct mce *m) cc6_base <<= 24; if (!intlv_en) - return cc6_base | (addr & GENMASK(0, 23)); + return cc6_base | (addr & GENMASK_ULL(23, 0)); amd64_read_pci_cfg(pvt->F1, DRAM_LOCAL_NODE_BASE, &tmp); /* faster log2 */ - tmp_addr = (addr & GENMASK(12, 23)) << __fls(intlv_en + 1); + tmp_addr = (addr & GENMASK_ULL(23, 12)) << __fls(intlv_en + 1); /* OR DramIntlvSel into bits [14:12] */ - tmp_addr |= (tmp & GENMASK(21, 23)) >> 9; + tmp_addr |= (tmp & GENMASK_ULL(23, 21)) >> 9; /* add remaining [11:0] bits from original MC4_ADDR */ - tmp_addr |= addr & GENMASK(0, 11); + tmp_addr |= addr & GENMASK_ULL(11, 0); return cc6_base | tmp_addr; } @@ -1007,15 +900,34 @@ static u64 get_error_address(struct mce *m) return addr; } +static struct pci_dev *pci_get_related_function(unsigned int vendor, + unsigned int device, + struct pci_dev *related) +{ + struct pci_dev *dev = NULL; + + while ((dev = pci_get_device(vendor, device, dev))) { + if (pci_domain_nr(dev->bus) == pci_domain_nr(related->bus) && + (dev->bus->number == related->bus->number) && + (PCI_SLOT(dev->devfn) == PCI_SLOT(related->devfn))) + break; + } + + return dev; +} + static void read_dram_base_limit_regs(struct amd64_pvt *pvt, unsigned range) { - struct cpuinfo_x86 *c = &boot_cpu_data; + struct amd_northbridge *nb; + struct pci_dev *f1 = NULL; + unsigned int pci_func; int off = range << 3; + u32 llim; amd64_read_pci_cfg(pvt->F1, DRAM_BASE_LO + off, &pvt->ranges[range].base.lo); amd64_read_pci_cfg(pvt->F1, DRAM_LIMIT_LO + off, &pvt->ranges[range].lim.lo); - if (c->x86 == 0xf) + if (pvt->fam == 0xf) return; if (!dram_rw(pvt, range)) @@ -1024,52 +936,75 @@ static void read_dram_base_limit_regs(struct amd64_pvt *pvt, unsigned range) amd64_read_pci_cfg(pvt->F1, DRAM_BASE_HI + off, &pvt->ranges[range].base.hi); amd64_read_pci_cfg(pvt->F1, DRAM_LIMIT_HI + off, &pvt->ranges[range].lim.hi); - /* Factor in CC6 save area by reading dst node's limit reg */ - if (c->x86 == 0x15) { - struct pci_dev *f1 = NULL; - u8 nid = dram_dst_node(pvt, range); - u32 llim; + /* F15h: factor in CC6 save area by reading dst node's limit reg */ + if (pvt->fam != 0x15) + return; - f1 = pci_get_domain_bus_and_slot(0, 0, PCI_DEVFN(0x18 + nid, 1)); - if (WARN_ON(!f1)) - return; + nb = node_to_amd_nb(dram_dst_node(pvt, range)); + if (WARN_ON(!nb)) + return; - amd64_read_pci_cfg(f1, DRAM_LOCAL_NODE_LIM, &llim); + pci_func = (pvt->model == 0x30) ? PCI_DEVICE_ID_AMD_15H_M30H_NB_F1 + : PCI_DEVICE_ID_AMD_15H_NB_F1; - pvt->ranges[range].lim.lo &= GENMASK(0, 15); + f1 = pci_get_related_function(nb->misc->vendor, pci_func, nb->misc); + if (WARN_ON(!f1)) + return; - /* {[39:27],111b} */ - pvt->ranges[range].lim.lo |= ((llim & 0x1fff) << 3 | 0x7) << 16; + amd64_read_pci_cfg(f1, DRAM_LOCAL_NODE_LIM, &llim); - pvt->ranges[range].lim.hi &= GENMASK(0, 7); + pvt->ranges[range].lim.lo &= GENMASK_ULL(15, 0); - /* [47:40] */ - pvt->ranges[range].lim.hi |= llim >> 13; + /* {[39:27],111b} */ + pvt->ranges[range].lim.lo |= ((llim & 0x1fff) << 3 | 0x7) << 16; - pci_dev_put(f1); - } + pvt->ranges[range].lim.hi &= GENMASK_ULL(7, 0); + + /* [47:40] */ + pvt->ranges[range].lim.hi |= llim >> 13; + + pci_dev_put(f1); } static void k8_map_sysaddr_to_csrow(struct mem_ctl_info *mci, u64 sys_addr, - u16 syndrome) + struct err_info *err) { - struct mem_ctl_info *src_mci; struct amd64_pvt *pvt = mci->pvt_info; - int channel, csrow; - u32 page, offset; + + error_address_to_page_and_offset(sys_addr, err); + + /* + * Find out which node the error address belongs to. This may be + * different from the node that detected the error. + */ + err->src_mci = find_mc_by_sys_addr(mci, sys_addr); + if (!err->src_mci) { + amd64_mc_err(mci, "failed to map error addr 0x%lx to a node\n", + (unsigned long)sys_addr); + err->err_code = ERR_NODE; + return; + } + + /* Now map the sys_addr to a CSROW */ + err->csrow = sys_addr_to_csrow(err->src_mci, sys_addr); + if (err->csrow < 0) { + err->err_code = ERR_CSROW; + return; + } /* CHIPKILL enabled */ if (pvt->nbcfg & NBCFG_CHIPKILL) { - channel = get_channel_from_ecc_syndrome(mci, syndrome); - if (channel < 0) { + err->channel = get_channel_from_ecc_syndrome(mci, err->syndrome); + if (err->channel < 0) { /* * Syndrome didn't map, so we don't know which of the * 2 DIMMs is in error. So we need to ID 'both' of them * as suspect. */ - amd64_mc_warn(mci, "unknown syndrome 0x%04x - possible " - "error reporting race\n", syndrome); - edac_mc_handle_ce_no_info(mci, EDAC_MOD_STR); + amd64_mc_warn(err->src_mci, "unknown syndrome 0x%04x - " + "possible error reporting race\n", + err->syndrome); + err->err_code = ERR_CHANNEL; return; } } else { @@ -1081,30 +1016,7 @@ static void k8_map_sysaddr_to_csrow(struct mem_ctl_info *mci, u64 sys_addr, * was obtained from email communication with someone at AMD. * (Wish the email was placed in this comment - norsk) */ - channel = ((sys_addr & BIT(3)) != 0); - } - - /* - * Find out which node the error address belongs to. This may be - * different from the node that detected the error. - */ - src_mci = find_mc_by_sys_addr(mci, sys_addr); - if (!src_mci) { - amd64_mc_err(mci, "failed to map error addr 0x%lx to a node\n", - (unsigned long)sys_addr); - edac_mc_handle_ce_no_info(mci, EDAC_MOD_STR); - return; - } - - /* Now map the sys_addr to a CSROW */ - csrow = sys_addr_to_csrow(src_mci, sys_addr); - if (csrow < 0) { - edac_mc_handle_ce_no_info(src_mci, EDAC_MOD_STR); - } else { - error_address_to_page_and_offset(sys_addr, &page, &offset); - - edac_mc_handle_ce(src_mci, page, offset, syndrome, csrow, - channel, EDAC_MOD_STR); + err->channel = ((sys_addr & BIT(3)) != 0); } } @@ -1132,12 +1044,36 @@ static int k8_dbam_to_chip_select(struct amd64_pvt *pvt, u8 dct, return ddr2_cs_size(cs_mode, dclr & WIDTH_128); } else if (pvt->ext_model >= K8_REV_D) { + unsigned diff; WARN_ON(cs_mode > 10); - if (cs_mode == 3 || cs_mode == 8) - return 32 << (cs_mode - 1); - else - return 32 << cs_mode; + /* + * the below calculation, besides trying to win an obfuscated C + * contest, maps cs_mode values to DIMM chip select sizes. The + * mappings are: + * + * cs_mode CS size (mb) + * ======= ============ + * 0 32 + * 1 64 + * 2 128 + * 3 128 + * 4 256 + * 5 512 + * 6 256 + * 7 512 + * 8 1024 + * 9 1024 + * 10 2048 + * + * Basically, it calculates a value with which to shift the + * smallest CS size of 32MB. + * + * ddr[23]_cs_size have a similar purpose. + */ + diff = cs_mode/3 + (unsigned)(cs_mode > 5); + + return 32 << (cs_mode - diff); } else { WARN_ON(cs_mode > 6); @@ -1158,7 +1094,7 @@ static int f1x_early_channel_count(struct amd64_pvt *pvt) int i, j, channels = 0; /* On F10h, if we are in 128 bit mode, then we are using 2 channels */ - if (boot_cpu_data.x86 == 0x10 && (pvt->dclr0 & WIDTH_128)) + if (pvt->fam == 0x10 && (pvt->dclr0 & WIDTH_128)) return 2; /* @@ -1169,7 +1105,7 @@ static int f1x_early_channel_count(struct amd64_pvt *pvt) * Need to check DCT0[0] and DCT1[0] to see if only one of them has * their CSEnable bit on. If so, then SINGLE DIMM case. */ - debugf0("Data width is not 128 bits - need more decoding\n"); + edac_dbg(0, "Data width is not 128 bits - need more decoding\n"); /* * Check DRAM Bank Address Mapping values for each DIMM to see if there @@ -1241,38 +1177,83 @@ static int f15_dbam_to_chip_select(struct amd64_pvt *pvt, u8 dct, return ddr3_cs_size(cs_mode, false); } +/* + * F16h and F15h model 30h have only limited cs_modes. + */ +static int f16_dbam_to_chip_select(struct amd64_pvt *pvt, u8 dct, + unsigned cs_mode) +{ + WARN_ON(cs_mode > 12); + + if (cs_mode == 6 || cs_mode == 8 || + cs_mode == 9 || cs_mode == 12) + return -1; + else + return ddr3_cs_size(cs_mode, false); +} + static void read_dram_ctl_register(struct amd64_pvt *pvt) { - if (boot_cpu_data.x86 == 0xf) + if (pvt->fam == 0xf) return; if (!amd64_read_dct_pci_cfg(pvt, DCT_SEL_LO, &pvt->dct_sel_lo)) { - debugf0("F2x110 (DCTSelLow): 0x%08x, High range addrs at: 0x%x\n", - pvt->dct_sel_lo, dct_sel_baseaddr(pvt)); + edac_dbg(0, "F2x110 (DCTSelLow): 0x%08x, High range addrs at: 0x%x\n", + pvt->dct_sel_lo, dct_sel_baseaddr(pvt)); - debugf0(" DCTs operate in %s mode.\n", - (dct_ganging_enabled(pvt) ? "ganged" : "unganged")); + edac_dbg(0, " DCTs operate in %s mode\n", + (dct_ganging_enabled(pvt) ? "ganged" : "unganged")); if (!dct_ganging_enabled(pvt)) - debugf0(" Address range split per DCT: %s\n", - (dct_high_range_enabled(pvt) ? "yes" : "no")); + edac_dbg(0, " Address range split per DCT: %s\n", + (dct_high_range_enabled(pvt) ? "yes" : "no")); - debugf0(" data interleave for ECC: %s, " - "DRAM cleared since last warm reset: %s\n", - (dct_data_intlv_enabled(pvt) ? "enabled" : "disabled"), - (dct_memory_cleared(pvt) ? "yes" : "no")); + edac_dbg(0, " data interleave for ECC: %s, DRAM cleared since last warm reset: %s\n", + (dct_data_intlv_enabled(pvt) ? "enabled" : "disabled"), + (dct_memory_cleared(pvt) ? "yes" : "no")); - debugf0(" channel interleave: %s, " - "interleave bits selector: 0x%x\n", - (dct_interleave_enabled(pvt) ? "enabled" : "disabled"), - dct_sel_interleave_addr(pvt)); + edac_dbg(0, " channel interleave: %s, " + "interleave bits selector: 0x%x\n", + (dct_interleave_enabled(pvt) ? "enabled" : "disabled"), + dct_sel_interleave_addr(pvt)); } amd64_read_dct_pci_cfg(pvt, DCT_SEL_HI, &pvt->dct_sel_hi); } /* + * Determine channel (DCT) based on the interleaving mode (see F15h M30h BKDG, + * 2.10.12 Memory Interleaving Modes). + */ +static u8 f15_m30h_determine_channel(struct amd64_pvt *pvt, u64 sys_addr, + u8 intlv_en, int num_dcts_intlv, + u32 dct_sel) +{ + u8 channel = 0; + u8 select; + + if (!(intlv_en)) + return (u8)(dct_sel); + + if (num_dcts_intlv == 2) { + select = (sys_addr >> 8) & 0x3; + channel = select ? 0x3 : 0; + } else if (num_dcts_intlv == 4) { + u8 intlv_addr = dct_sel_interleave_addr(pvt); + switch (intlv_addr) { + case 0x4: + channel = (sys_addr >> 8) & 0x3; + break; + case 0x5: + channel = (sys_addr >> 9) & 0x3; + break; + } + } + return channel; +} + +/* * Determine channel (DCT) based on the interleaving mode: F10h BKDG, 2.8.9 Memory * Interleaving Modes. */ @@ -1314,7 +1295,7 @@ static u8 f1x_determine_channel(struct amd64_pvt *pvt, u64 sys_addr, } /* Convert the sys_addr to the normalized DCT address */ -static u64 f1x_get_norm_dct_addr(struct amd64_pvt *pvt, unsigned range, +static u64 f1x_get_norm_dct_addr(struct amd64_pvt *pvt, u8 range, u64 sys_addr, bool hi_rng, u32 dct_sel_base_addr) { @@ -1358,7 +1339,7 @@ static u64 f1x_get_norm_dct_addr(struct amd64_pvt *pvt, unsigned range, chan_off = dram_base; } - return (sys_addr & GENMASK(6,47)) - (chan_off & GENMASK(23,47)); + return (sys_addr & GENMASK_ULL(47,6)) - (chan_off & GENMASK_ULL(47,23)); } /* @@ -1390,7 +1371,7 @@ static int f10_process_possible_spare(struct amd64_pvt *pvt, u8 dct, int csrow) * -EINVAL: NOT FOUND * 0..csrow = Chip-Select Row */ -static int f1x_lookup_addr_in_dct(u64 in_addr, u32 nid, u8 dct) +static int f1x_lookup_addr_in_dct(u64 in_addr, u8 nid, u8 dct) { struct mem_ctl_info *mci; struct amd64_pvt *pvt; @@ -1404,7 +1385,7 @@ static int f1x_lookup_addr_in_dct(u64 in_addr, u32 nid, u8 dct) pvt = mci->pvt_info; - debugf1("input addr: 0x%llx, DCT: %d\n", in_addr, dct); + edac_dbg(1, "input addr: 0x%llx, DCT: %d\n", in_addr, dct); for_each_chip_select(csrow, dct, pvt) { if (!csrow_enabled(csrow, dct, pvt)) @@ -1412,19 +1393,22 @@ static int f1x_lookup_addr_in_dct(u64 in_addr, u32 nid, u8 dct) get_cs_base_and_mask(pvt, csrow, dct, &cs_base, &cs_mask); - debugf1(" CSROW=%d CSBase=0x%llx CSMask=0x%llx\n", - csrow, cs_base, cs_mask); + edac_dbg(1, " CSROW=%d CSBase=0x%llx CSMask=0x%llx\n", + csrow, cs_base, cs_mask); cs_mask = ~cs_mask; - debugf1(" (InputAddr & ~CSMask)=0x%llx " - "(CSBase & ~CSMask)=0x%llx\n", - (in_addr & cs_mask), (cs_base & cs_mask)); + edac_dbg(1, " (InputAddr & ~CSMask)=0x%llx (CSBase & ~CSMask)=0x%llx\n", + (in_addr & cs_mask), (cs_base & cs_mask)); if ((in_addr & cs_mask) == (cs_base & cs_mask)) { + if (pvt->fam == 0x15 && pvt->model >= 0x30) { + cs_found = csrow; + break; + } cs_found = f10_process_possible_spare(pvt, dct, csrow); - debugf1(" MATCH csrow=%d\n", cs_found); + edac_dbg(1, " MATCH csrow=%d\n", cs_found); break; } } @@ -1440,11 +1424,9 @@ static u64 f1x_swap_interleaved_region(struct amd64_pvt *pvt, u64 sys_addr) { u32 swap_reg, swap_base, swap_limit, rgn_size, tmp_addr; - if (boot_cpu_data.x86 == 0x10) { + if (pvt->fam == 0x10) { /* only revC3 and revE have that feature */ - if (boot_cpu_data.x86_model < 4 || - (boot_cpu_data.x86_model < 0xa && - boot_cpu_data.x86_mask < 3)) + if (pvt->model < 4 || (pvt->model < 0xa && pvt->stepping < 3)) return sys_addr; } @@ -1469,7 +1451,7 @@ static u64 f1x_swap_interleaved_region(struct amd64_pvt *pvt, u64 sys_addr) /* For a given @dram_range, check if @sys_addr falls within it. */ static int f1x_match_to_this_node(struct amd64_pvt *pvt, unsigned range, - u64 sys_addr, int *nid, int *chan_sel) + u64 sys_addr, int *chan_sel) { int cs_found = -EINVAL; u64 chan_addr; @@ -1481,8 +1463,8 @@ static int f1x_match_to_this_node(struct amd64_pvt *pvt, unsigned range, u8 intlv_en = dram_intlv_en(pvt, range); u32 intlv_sel = dram_intlv_sel(pvt, range); - debugf1("(range %d) SystemAddr= 0x%llx Limit=0x%llx\n", - range, sys_addr, get_dram_limit(pvt, range)); + edac_dbg(1, "(range %d) SystemAddr= 0x%llx Limit=0x%llx\n", + range, sys_addr, get_dram_limit(pvt, range)); if (dhar_valid(pvt) && dhar_base(pvt) <= sys_addr && @@ -1538,34 +1520,155 @@ static int f1x_match_to_this_node(struct amd64_pvt *pvt, unsigned range, (chan_addr & 0xfff); } - debugf1(" Normalized DCT addr: 0x%llx\n", chan_addr); + edac_dbg(1, " Normalized DCT addr: 0x%llx\n", chan_addr); cs_found = f1x_lookup_addr_in_dct(chan_addr, node_id, channel); - if (cs_found >= 0) { - *nid = node_id; + if (cs_found >= 0) *chan_sel = channel; + + return cs_found; +} + +static int f15_m30h_match_to_this_node(struct amd64_pvt *pvt, unsigned range, + u64 sys_addr, int *chan_sel) +{ + int cs_found = -EINVAL; + int num_dcts_intlv = 0; + u64 chan_addr, chan_offset; + u64 dct_base, dct_limit; + u32 dct_cont_base_reg, dct_cont_limit_reg, tmp; + u8 channel, alias_channel, leg_mmio_hole, dct_sel, dct_offset_en; + + u64 dhar_offset = f10_dhar_offset(pvt); + u8 intlv_addr = dct_sel_interleave_addr(pvt); + u8 node_id = dram_dst_node(pvt, range); + u8 intlv_en = dram_intlv_en(pvt, range); + + amd64_read_pci_cfg(pvt->F1, DRAM_CONT_BASE, &dct_cont_base_reg); + amd64_read_pci_cfg(pvt->F1, DRAM_CONT_LIMIT, &dct_cont_limit_reg); + + dct_offset_en = (u8) ((dct_cont_base_reg >> 3) & BIT(0)); + dct_sel = (u8) ((dct_cont_base_reg >> 4) & 0x7); + + edac_dbg(1, "(range %d) SystemAddr= 0x%llx Limit=0x%llx\n", + range, sys_addr, get_dram_limit(pvt, range)); + + if (!(get_dram_base(pvt, range) <= sys_addr) && + !(get_dram_limit(pvt, range) >= sys_addr)) + return -EINVAL; + + if (dhar_valid(pvt) && + dhar_base(pvt) <= sys_addr && + sys_addr < BIT_64(32)) { + amd64_warn("Huh? Address is in the MMIO hole: 0x%016llx\n", + sys_addr); + return -EINVAL; + } + + /* Verify sys_addr is within DCT Range. */ + dct_base = (u64) dct_sel_baseaddr(pvt); + dct_limit = (dct_cont_limit_reg >> 11) & 0x1FFF; + + if (!(dct_cont_base_reg & BIT(0)) && + !(dct_base <= (sys_addr >> 27) && + dct_limit >= (sys_addr >> 27))) + return -EINVAL; + + /* Verify number of dct's that participate in channel interleaving. */ + num_dcts_intlv = (int) hweight8(intlv_en); + + if (!(num_dcts_intlv % 2 == 0) || (num_dcts_intlv > 4)) + return -EINVAL; + + channel = f15_m30h_determine_channel(pvt, sys_addr, intlv_en, + num_dcts_intlv, dct_sel); + + /* Verify we stay within the MAX number of channels allowed */ + if (channel > 3) + return -EINVAL; + + leg_mmio_hole = (u8) (dct_cont_base_reg >> 1 & BIT(0)); + + /* Get normalized DCT addr */ + if (leg_mmio_hole && (sys_addr >= BIT_64(32))) + chan_offset = dhar_offset; + else + chan_offset = dct_base << 27; + + chan_addr = sys_addr - chan_offset; + + /* remove channel interleave */ + if (num_dcts_intlv == 2) { + if (intlv_addr == 0x4) + chan_addr = ((chan_addr >> 9) << 8) | + (chan_addr & 0xff); + else if (intlv_addr == 0x5) + chan_addr = ((chan_addr >> 10) << 9) | + (chan_addr & 0x1ff); + else + return -EINVAL; + + } else if (num_dcts_intlv == 4) { + if (intlv_addr == 0x4) + chan_addr = ((chan_addr >> 10) << 8) | + (chan_addr & 0xff); + else if (intlv_addr == 0x5) + chan_addr = ((chan_addr >> 11) << 9) | + (chan_addr & 0x1ff); + else + return -EINVAL; } + + if (dct_offset_en) { + amd64_read_pci_cfg(pvt->F1, + DRAM_CONT_HIGH_OFF + (int) channel * 4, + &tmp); + chan_addr += (u64) ((tmp >> 11) & 0xfff) << 27; + } + + f15h_select_dct(pvt, channel); + + edac_dbg(1, " Normalized DCT addr: 0x%llx\n", chan_addr); + + /* + * Find Chip select: + * if channel = 3, then alias it to 1. This is because, in F15 M30h, + * there is support for 4 DCT's, but only 2 are currently functional. + * They are DCT0 and DCT3. But we have read all registers of DCT3 into + * pvt->csels[1]. So we need to use '1' here to get correct info. + * Refer F15 M30h BKDG Section 2.10 and 2.10.3 for clarifications. + */ + alias_channel = (channel == 3) ? 1 : channel; + + cs_found = f1x_lookup_addr_in_dct(chan_addr, node_id, alias_channel); + + if (cs_found >= 0) + *chan_sel = alias_channel; + return cs_found; } -static int f1x_translate_sysaddr_to_cs(struct amd64_pvt *pvt, u64 sys_addr, - int *node, int *chan_sel) +static int f1x_translate_sysaddr_to_cs(struct amd64_pvt *pvt, + u64 sys_addr, + int *chan_sel) { int cs_found = -EINVAL; unsigned range; for (range = 0; range < DRAM_RANGES; range++) { - if (!dram_rw(pvt, range)) continue; - if ((get_dram_base(pvt, range) <= sys_addr) && - (get_dram_limit(pvt, range) >= sys_addr)) { + if (pvt->fam == 0x15 && pvt->model >= 0x30) + cs_found = f15_m30h_match_to_this_node(pvt, range, + sys_addr, + chan_sel); + else if ((get_dram_base(pvt, range) <= sys_addr) && + (get_dram_limit(pvt, range) >= sys_addr)) { cs_found = f1x_match_to_this_node(pvt, range, - sys_addr, node, - chan_sel); + sys_addr, chan_sel); if (cs_found >= 0) break; } @@ -1581,55 +1684,38 @@ static int f1x_translate_sysaddr_to_cs(struct amd64_pvt *pvt, u64 sys_addr, * (MCX_ADDR). */ static void f1x_map_sysaddr_to_csrow(struct mem_ctl_info *mci, u64 sys_addr, - u16 syndrome) + struct err_info *err) { struct amd64_pvt *pvt = mci->pvt_info; - u32 page, offset; - int nid, csrow, chan = 0; - csrow = f1x_translate_sysaddr_to_cs(pvt, sys_addr, &nid, &chan); + error_address_to_page_and_offset(sys_addr, err); - if (csrow < 0) { - edac_mc_handle_ce_no_info(mci, EDAC_MOD_STR); + err->csrow = f1x_translate_sysaddr_to_cs(pvt, sys_addr, &err->channel); + if (err->csrow < 0) { + err->err_code = ERR_CSROW; return; } - error_address_to_page_and_offset(sys_addr, &page, &offset); - /* * We need the syndromes for channel detection only when we're * ganged. Otherwise @chan should already contain the channel at * this point. */ if (dct_ganging_enabled(pvt)) - chan = get_channel_from_ecc_syndrome(mci, syndrome); - - if (chan >= 0) - edac_mc_handle_ce(mci, page, offset, syndrome, csrow, chan, - EDAC_MOD_STR); - else - /* - * Channel unknown, report all channels on this CSROW as failed. - */ - for (chan = 0; chan < mci->csrows[csrow].nr_channels; chan++) - edac_mc_handle_ce(mci, page, offset, syndrome, - csrow, chan, EDAC_MOD_STR); + err->channel = get_channel_from_ecc_syndrome(mci, err->syndrome); } /* * debug routine to display the memory sizes of all logical DIMMs and its * CSROWs */ -static void amd64_debug_display_dimm_sizes(struct amd64_pvt *pvt, u8 ctrl) +static void debug_display_dimm_sizes(struct amd64_pvt *pvt, u8 ctrl) { - int dimm, size0, size1, factor = 0; + int dimm, size0, size1; u32 *dcsb = ctrl ? pvt->csels[1].csbases : pvt->csels[0].csbases; u32 dbam = ctrl ? pvt->dbam1 : pvt->dbam0; - if (boot_cpu_data.x86 == 0xf) { - if (pvt->dclr0 & WIDTH_128) - factor = 1; - + if (pvt->fam == 0xf) { /* K8 families < revF not supported yet */ if (pvt->ext_model < K8_REV_F) return; @@ -1641,7 +1727,8 @@ static void amd64_debug_display_dimm_sizes(struct amd64_pvt *pvt, u8 ctrl) dcsb = (ctrl && !dct_ganging_enabled(pvt)) ? pvt->csels[1].csbases : pvt->csels[0].csbases; - debugf1("F2x%d80 (DRAM Bank Address Mapping): 0x%08x\n", ctrl, dbam); + edac_dbg(1, "F2x%d80 (DRAM Bank Address Mapping): 0x%08x\n", + ctrl, dbam); edac_printk(KERN_DEBUG, EDAC_MC, "DCT%d chip selects:\n", ctrl); @@ -1659,12 +1746,12 @@ static void amd64_debug_display_dimm_sizes(struct amd64_pvt *pvt, u8 ctrl) DBAM_DIMM(dimm, dbam)); amd64_info(EDAC_MC ": %d: %5dMB %d: %5dMB\n", - dimm * 2, size0 << factor, - dimm * 2 + 1, size1 << factor); + dimm * 2, size0, + dimm * 2 + 1, size1); } } -static struct amd64_family_type amd64_family_types[] = { +static struct amd64_family_type family_types[] = { [K8_CPUS] = { .ctl_name = "K8", .f1_id = PCI_DEVICE_ID_AMD_K8_NB_ADDRMAP, @@ -1698,25 +1785,41 @@ static struct amd64_family_type amd64_family_types[] = { .read_dct_pci_cfg = f15_read_dct_pci_cfg, } }, + [F15_M30H_CPUS] = { + .ctl_name = "F15h_M30h", + .f1_id = PCI_DEVICE_ID_AMD_15H_M30H_NB_F1, + .f3_id = PCI_DEVICE_ID_AMD_15H_M30H_NB_F3, + .ops = { + .early_channel_count = f1x_early_channel_count, + .map_sysaddr_to_csrow = f1x_map_sysaddr_to_csrow, + .dbam_to_cs = f16_dbam_to_chip_select, + .read_dct_pci_cfg = f15_read_dct_pci_cfg, + } + }, + [F16_CPUS] = { + .ctl_name = "F16h", + .f1_id = PCI_DEVICE_ID_AMD_16H_NB_F1, + .f3_id = PCI_DEVICE_ID_AMD_16H_NB_F3, + .ops = { + .early_channel_count = f1x_early_channel_count, + .map_sysaddr_to_csrow = f1x_map_sysaddr_to_csrow, + .dbam_to_cs = f16_dbam_to_chip_select, + .read_dct_pci_cfg = f10_read_dct_pci_cfg, + } + }, + [F16_M30H_CPUS] = { + .ctl_name = "F16h_M30h", + .f1_id = PCI_DEVICE_ID_AMD_16H_M30H_NB_F1, + .f3_id = PCI_DEVICE_ID_AMD_16H_M30H_NB_F3, + .ops = { + .early_channel_count = f1x_early_channel_count, + .map_sysaddr_to_csrow = f1x_map_sysaddr_to_csrow, + .dbam_to_cs = f16_dbam_to_chip_select, + .read_dct_pci_cfg = f10_read_dct_pci_cfg, + } + }, }; -static struct pci_dev *pci_get_related_function(unsigned int vendor, - unsigned int device, - struct pci_dev *related) -{ - struct pci_dev *dev = NULL; - - dev = pci_get_device(vendor, device, dev); - while (dev) { - if ((dev->bus->number == related->bus->number) && - (PCI_SLOT(dev->devfn) == PCI_SLOT(related->devfn))) - break; - dev = pci_get_device(vendor, device, dev); - } - - return dev; -} - /* * These are tables of eigenvectors (one per line) which can be used for the * construction of the syndrome tables. The modified syndrome search algorithm @@ -1724,7 +1827,7 @@ static struct pci_dev *pci_get_related_function(unsigned int vendor, * * Algorithm courtesy of Ross LaFetra from AMD. */ -static u16 x4_vectors[] = { +static const u16 x4_vectors[] = { 0x2f57, 0x1afe, 0x66cc, 0xdd88, 0x11eb, 0x3396, 0x7f4c, 0xeac8, 0x0001, 0x0002, 0x0004, 0x0008, @@ -1763,7 +1866,7 @@ static u16 x4_vectors[] = { 0x19a9, 0x2efe, 0xb5cc, 0x6f88, }; -static u16 x8_vectors[] = { +static const u16 x8_vectors[] = { 0x0145, 0x028a, 0x2374, 0x43c8, 0xa1f0, 0x0520, 0x0a40, 0x1480, 0x0211, 0x0422, 0x0844, 0x1088, 0x01b0, 0x44e0, 0x23c0, 0xed80, 0x1011, 0x0116, 0x022c, 0x0458, 0x08b0, 0x8c60, 0x2740, 0x4e80, @@ -1785,7 +1888,7 @@ static u16 x8_vectors[] = { 0x0100, 0x0200, 0x0400, 0x0800, 0x1000, 0x2000, 0x4000, 0x8000, }; -static int decode_syndrome(u16 syndrome, u16 *vectors, unsigned num_vecs, +static int decode_syndrome(u16 syndrome, const u16 *vectors, unsigned num_vecs, unsigned v_dim) { unsigned int i, err_sym; @@ -1817,7 +1920,7 @@ static int decode_syndrome(u16 syndrome, u16 *vectors, unsigned num_vecs, } } - debugf0("syndrome(%x) not found\n", syndrome); + edac_dbg(0, "syndrome(%x) not found\n", syndrome); return -1; } @@ -1881,82 +1984,56 @@ static int get_channel_from_ecc_syndrome(struct mem_ctl_info *mci, u16 syndrome) return map_err_sym_to_channel(err_sym, pvt->ecc_sym_sz); } -/* - * Handle any Correctable Errors (CEs) that have occurred. Check for valid ERROR - * ADDRESS and process. - */ -static void amd64_handle_ce(struct mem_ctl_info *mci, struct mce *m) -{ - struct amd64_pvt *pvt = mci->pvt_info; - u64 sys_addr; - u16 syndrome; - - /* Ensure that the Error Address is VALID */ - if (!(m->status & MCI_STATUS_ADDRV)) { - amd64_mc_err(mci, "HW has no ERROR_ADDRESS available\n"); - edac_mc_handle_ce_no_info(mci, EDAC_MOD_STR); - return; - } - - sys_addr = get_error_address(m); - syndrome = extract_syndrome(m->status); - - amd64_mc_err(mci, "CE ERROR_ADDRESS= 0x%llx\n", sys_addr); - - pvt->ops->map_sysaddr_to_csrow(mci, sys_addr, syndrome); -} - -/* Handle any Un-correctable Errors (UEs) */ -static void amd64_handle_ue(struct mem_ctl_info *mci, struct mce *m) +static void __log_bus_error(struct mem_ctl_info *mci, struct err_info *err, + u8 ecc_type) { - struct mem_ctl_info *log_mci, *src_mci = NULL; - int csrow; - u64 sys_addr; - u32 page, offset; - - log_mci = mci; + enum hw_event_mc_err_type err_type; + const char *string; - if (!(m->status & MCI_STATUS_ADDRV)) { - amd64_mc_err(mci, "HW has no ERROR_ADDRESS available\n"); - edac_mc_handle_ue_no_info(log_mci, EDAC_MOD_STR); + if (ecc_type == 2) + err_type = HW_EVENT_ERR_CORRECTED; + else if (ecc_type == 1) + err_type = HW_EVENT_ERR_UNCORRECTED; + else { + WARN(1, "Something is rotten in the state of Denmark.\n"); return; } - sys_addr = get_error_address(m); - - /* - * Find out which node the error address belongs to. This may be - * different from the node that detected the error. - */ - src_mci = find_mc_by_sys_addr(mci, sys_addr); - if (!src_mci) { - amd64_mc_err(mci, "ERROR ADDRESS (0x%lx) NOT mapped to a MC\n", - (unsigned long)sys_addr); - edac_mc_handle_ue_no_info(log_mci, EDAC_MOD_STR); - return; + switch (err->err_code) { + case DECODE_OK: + string = ""; + break; + case ERR_NODE: + string = "Failed to map error addr to a node"; + break; + case ERR_CSROW: + string = "Failed to map error addr to a csrow"; + break; + case ERR_CHANNEL: + string = "unknown syndrome - possible error reporting race"; + break; + default: + string = "WTF error"; + break; } - log_mci = src_mci; - - csrow = sys_addr_to_csrow(log_mci, sys_addr); - if (csrow < 0) { - amd64_mc_err(mci, "ERROR_ADDRESS (0x%lx) NOT mapped to CS\n", - (unsigned long)sys_addr); - edac_mc_handle_ue_no_info(log_mci, EDAC_MOD_STR); - } else { - error_address_to_page_and_offset(sys_addr, &page, &offset); - edac_mc_handle_ue(log_mci, page, offset, csrow, EDAC_MOD_STR); - } + edac_mc_handle_error(err_type, mci, 1, + err->page, err->offset, err->syndrome, + err->csrow, err->channel, -1, + string, ""); } -static inline void __amd64_decode_bus_error(struct mem_ctl_info *mci, - struct mce *m) +static inline void decode_bus_error(int node_id, struct mce *m) { - u16 ec = EC(m->status); - u8 xec = XEC(m->status, 0x1f); + struct mem_ctl_info *mci = mcis[node_id]; + struct amd64_pvt *pvt = mci->pvt_info; u8 ecc_type = (m->status >> 45) & 0x3; + u8 xec = XEC(m->status, 0x1f); + u16 ec = EC(m->status); + u64 sys_addr; + struct err_info err; - /* Bail early out if this was an 'observed' error */ + /* Bail out early if this was an 'observed' error */ if (PP(ec) == NBSL_PP_OBS) return; @@ -1964,15 +2041,16 @@ static inline void __amd64_decode_bus_error(struct mem_ctl_info *mci, if (xec && xec != F10_NBSL_EXT_ERR_ECC) return; + memset(&err, 0, sizeof(err)); + + sys_addr = get_error_address(pvt, m); + if (ecc_type == 2) - amd64_handle_ce(mci, m); - else if (ecc_type == 1) - amd64_handle_ue(mci, m); -} + err.syndrome = extract_syndrome(m->status); -void amd64_decode_bus_error(int node_id, struct mce *m) -{ - __amd64_decode_bus_error(mcis[node_id], m); + pvt->ops->map_sysaddr_to_csrow(mci, sys_addr, &err); + + __log_bus_error(mci, &err, ecc_type); } /* @@ -2002,9 +2080,9 @@ static int reserve_mc_sibling_devs(struct amd64_pvt *pvt, u16 f1_id, u16 f3_id) return -ENODEV; } - debugf1("F1: %s\n", pci_name(pvt->F1)); - debugf1("F2: %s\n", pci_name(pvt->F2)); - debugf1("F3: %s\n", pci_name(pvt->F3)); + edac_dbg(1, "F1: %s\n", pci_name(pvt->F1)); + edac_dbg(1, "F2: %s\n", pci_name(pvt->F2)); + edac_dbg(1, "F3: %s\n", pci_name(pvt->F3)); return 0; } @@ -2021,25 +2099,24 @@ static void free_mc_sibling_devs(struct amd64_pvt *pvt) */ static void read_mc_regs(struct amd64_pvt *pvt) { - struct cpuinfo_x86 *c = &boot_cpu_data; + unsigned range; u64 msr_val; u32 tmp; - unsigned range; /* * Retrieve TOP_MEM and TOP_MEM2; no masking off of reserved bits since * those are Read-As-Zero */ rdmsrl(MSR_K8_TOP_MEM1, pvt->top_mem); - debugf0(" TOP_MEM: 0x%016llx\n", pvt->top_mem); + edac_dbg(0, " TOP_MEM: 0x%016llx\n", pvt->top_mem); /* check first whether TOP_MEM2 is enabled */ rdmsrl(MSR_K8_SYSCFG, msr_val); if (msr_val & (1U << 21)) { rdmsrl(MSR_K8_TOP_MEM2, pvt->top_mem2); - debugf0(" TOP_MEM2: 0x%016llx\n", pvt->top_mem2); + edac_dbg(0, " TOP_MEM2: 0x%016llx\n", pvt->top_mem2); } else - debugf0(" TOP_MEM2 disabled.\n"); + edac_dbg(0, " TOP_MEM2 disabled\n"); amd64_read_pci_cfg(pvt->F3, NBCAP, &pvt->nbcap); @@ -2055,17 +2132,17 @@ static void read_mc_regs(struct amd64_pvt *pvt) if (!rw) continue; - debugf1(" DRAM range[%d], base: 0x%016llx; limit: 0x%016llx\n", - range, - get_dram_base(pvt, range), - get_dram_limit(pvt, range)); + edac_dbg(1, " DRAM range[%d], base: 0x%016llx; limit: 0x%016llx\n", + range, + get_dram_base(pvt, range), + get_dram_limit(pvt, range)); - debugf1(" IntlvEn=%s; Range access: %s%s IntlvSel=%d DstNode=%d\n", - dram_intlv_en(pvt, range) ? "Enabled" : "Disabled", - (rw & 0x1) ? "R" : "-", - (rw & 0x2) ? "W" : "-", - dram_intlv_sel(pvt, range), - dram_dst_node(pvt, range)); + edac_dbg(1, " IntlvEn=%s; Range access: %s%s IntlvSel=%d DstNode=%d\n", + dram_intlv_en(pvt, range) ? "Enabled" : "Disabled", + (rw & 0x1) ? "R" : "-", + (rw & 0x2) ? "W" : "-", + dram_intlv_sel(pvt, range), + dram_dst_node(pvt, range)); } read_dct_base_mask(pvt); @@ -2085,12 +2162,14 @@ static void read_mc_regs(struct amd64_pvt *pvt) pvt->ecc_sym_sz = 4; - if (c->x86 >= 0x10) { + if (pvt->fam >= 0x10) { amd64_read_pci_cfg(pvt->F3, EXT_NB_MCA_CFG, &tmp); - amd64_read_dct_pci_cfg(pvt, DBAM1, &pvt->dbam1); + if (pvt->fam != 0x16) + /* F16h has only DCT0 */ + amd64_read_dct_pci_cfg(pvt, DBAM1, &pvt->dbam1); /* F10h, revD and later can do x8 ECC too */ - if ((c->x86 > 0x10 || c->x86_model > 7) && tmp & BIT(25)) + if ((pvt->fam > 0x10 || pvt->model > 7) && tmp & BIT(25)) pvt->ecc_sym_sz = 8; } dump_misc_regs(pvt); @@ -2130,9 +2209,11 @@ static void read_mc_regs(struct amd64_pvt *pvt) * encompasses * */ -static u32 amd64_csrow_nr_pages(struct amd64_pvt *pvt, u8 dct, int csrow_nr) +static u32 get_csrow_nr_pages(struct amd64_pvt *pvt, u8 dct, int csrow_nr) { u32 cs_mode, nr_pages; + u32 dbam = dct ? pvt->dbam1 : pvt->dbam0; + /* * The math on this doesn't look right on the surface because x/2*4 can @@ -2141,19 +2222,13 @@ static u32 amd64_csrow_nr_pages(struct amd64_pvt *pvt, u8 dct, int csrow_nr) * number of bits to shift the DBAM register to extract the proper CSROW * field. */ - cs_mode = (pvt->dbam0 >> ((csrow_nr / 2) * 4)) & 0xF; + cs_mode = DBAM_DIMM(csrow_nr / 2, dbam); nr_pages = pvt->ops->dbam_to_cs(pvt, dct, cs_mode) << (20 - PAGE_SHIFT); - /* - * If dual channel then double the memory size of single channel. - * Channel count is 1 or 2 - */ - nr_pages <<= (pvt->channel_count - 1); - - debugf0(" (csrow=%d) DBAM map index= %d\n", csrow_nr, cs_mode); - debugf0(" nr_pages= %u channel-count = %d\n", - nr_pages, pvt->channel_count); + edac_dbg(0, "csrow: %d, channel: %d, DBAM idx: %d\n", + csrow_nr, dct, cs_mode); + edac_dbg(0, "nr_pages/channel: %u\n", nr_pages); return nr_pages; } @@ -2164,73 +2239,80 @@ static u32 amd64_csrow_nr_pages(struct amd64_pvt *pvt, u8 dct, int csrow_nr) */ static int init_csrows(struct mem_ctl_info *mci) { - struct csrow_info *csrow; struct amd64_pvt *pvt = mci->pvt_info; - u64 input_addr_min, input_addr_max, sys_addr, base, mask; + struct csrow_info *csrow; + struct dimm_info *dimm; + enum edac_type edac_mode; + enum mem_type mtype; + int i, j, empty = 1; + int nr_pages = 0; u32 val; - int i, empty = 1; amd64_read_pci_cfg(pvt->F3, NBCFG, &val); pvt->nbcfg = val; - debugf0("node %d, NBCFG=0x%08x[ChipKillEccCap: %d|DramEccEn: %d]\n", - pvt->mc_node_id, val, - !!(val & NBCFG_CHIPKILL), !!(val & NBCFG_ECC_ENABLE)); + edac_dbg(0, "node %d, NBCFG=0x%08x[ChipKillEccCap: %d|DramEccEn: %d]\n", + pvt->mc_node_id, val, + !!(val & NBCFG_CHIPKILL), !!(val & NBCFG_ECC_ENABLE)); + /* + * We iterate over DCT0 here but we look at DCT1 in parallel, if needed. + */ for_each_chip_select(i, 0, pvt) { - csrow = &mci->csrows[i]; + bool row_dct0 = !!csrow_enabled(i, 0, pvt); + bool row_dct1 = false; + + if (pvt->fam != 0xf) + row_dct1 = !!csrow_enabled(i, 1, pvt); - if (!csrow_enabled(i, 0, pvt)) { - debugf1("----CSROW %d EMPTY for node %d\n", i, - pvt->mc_node_id); + if (!row_dct0 && !row_dct1) continue; + + csrow = mci->csrows[i]; + empty = 0; + + edac_dbg(1, "MC node: %d, csrow: %d\n", + pvt->mc_node_id, i); + + if (row_dct0) { + nr_pages = get_csrow_nr_pages(pvt, 0, i); + csrow->channels[0]->dimm->nr_pages = nr_pages; } - debugf1("----CSROW %d VALID for MC node %d\n", - i, pvt->mc_node_id); + /* K8 has only one DCT */ + if (pvt->fam != 0xf && row_dct1) { + int row_dct1_pages = get_csrow_nr_pages(pvt, 1, i); - empty = 0; - csrow->nr_pages = amd64_csrow_nr_pages(pvt, 0, i); - find_csrow_limits(mci, i, &input_addr_min, &input_addr_max); - sys_addr = input_addr_to_sys_addr(mci, input_addr_min); - csrow->first_page = (u32) (sys_addr >> PAGE_SHIFT); - sys_addr = input_addr_to_sys_addr(mci, input_addr_max); - csrow->last_page = (u32) (sys_addr >> PAGE_SHIFT); - - get_cs_base_and_mask(pvt, i, 0, &base, &mask); - csrow->page_mask = ~mask; - /* 8 bytes of resolution */ - - csrow->mtype = amd64_determine_memory_type(pvt, i); - - debugf1(" for MC node %d csrow %d:\n", pvt->mc_node_id, i); - debugf1(" input_addr_min: 0x%lx input_addr_max: 0x%lx\n", - (unsigned long)input_addr_min, - (unsigned long)input_addr_max); - debugf1(" sys_addr: 0x%lx page_mask: 0x%lx\n", - (unsigned long)sys_addr, csrow->page_mask); - debugf1(" nr_pages: %u first_page: 0x%lx " - "last_page: 0x%lx\n", - (unsigned)csrow->nr_pages, - csrow->first_page, csrow->last_page); + csrow->channels[1]->dimm->nr_pages = row_dct1_pages; + nr_pages += row_dct1_pages; + } + + mtype = determine_memory_type(pvt, i); + + edac_dbg(1, "Total csrow%d pages: %u\n", i, nr_pages); /* * determine whether CHIPKILL or JUST ECC or NO ECC is operating */ if (pvt->nbcfg & NBCFG_ECC_ENABLE) - csrow->edac_mode = - (pvt->nbcfg & NBCFG_CHIPKILL) ? - EDAC_S4ECD4ED : EDAC_SECDED; + edac_mode = (pvt->nbcfg & NBCFG_CHIPKILL) ? + EDAC_S4ECD4ED : EDAC_SECDED; else - csrow->edac_mode = EDAC_NONE; + edac_mode = EDAC_NONE; + + for (j = 0; j < pvt->channel_count; j++) { + dimm = csrow->channels[j]->dimm; + dimm->mtype = mtype; + dimm->edac_mode = edac_mode; + } } return empty; } /* get all cores on this DCT */ -static void get_cpus_on_this_dct_cpumask(struct cpumask *mask, unsigned nid) +static void get_cpus_on_this_dct_cpumask(struct cpumask *mask, u16 nid) { int cpu; @@ -2240,7 +2322,7 @@ static void get_cpus_on_this_dct_cpumask(struct cpumask *mask, unsigned nid) } /* check MCG_CTL on all the cpus on this node */ -static bool amd64_nb_mce_bank_enabled_on_node(unsigned nid) +static bool nb_mce_bank_enabled_on_node(u16 nid) { cpumask_var_t mask; int cpu, nbe; @@ -2259,9 +2341,9 @@ static bool amd64_nb_mce_bank_enabled_on_node(unsigned nid) struct msr *reg = per_cpu_ptr(msrs, cpu); nbe = reg->l & MSR_MCGCTL_NBE; - debugf0("core: %u, MCG_CTL: 0x%llx, NB MSR is %s\n", - cpu, reg->q, - (nbe ? "enabled" : "disabled")); + edac_dbg(0, "core: %u, MCG_CTL: 0x%llx, NB MSR is %s\n", + cpu, reg->q, + (nbe ? "enabled" : "disabled")); if (!nbe) goto out; @@ -2273,7 +2355,7 @@ out: return ret; } -static int toggle_ecc_err_reporting(struct ecc_settings *s, u8 nid, bool on) +static int toggle_ecc_err_reporting(struct ecc_settings *s, u16 nid, bool on) { cpumask_var_t cmask; int cpu; @@ -2311,7 +2393,7 @@ static int toggle_ecc_err_reporting(struct ecc_settings *s, u8 nid, bool on) return 0; } -static bool enable_ecc_error_reporting(struct ecc_settings *s, u8 nid, +static bool enable_ecc_error_reporting(struct ecc_settings *s, u16 nid, struct pci_dev *F3) { bool ret = true; @@ -2332,8 +2414,8 @@ static bool enable_ecc_error_reporting(struct ecc_settings *s, u8 nid, amd64_read_pci_cfg(F3, NBCFG, &value); - debugf0("1: node %d, NBCFG=0x%08x[DramEccEn: %d]\n", - nid, value, !!(value & NBCFG_ECC_ENABLE)); + edac_dbg(0, "1: node %d, NBCFG=0x%08x[DramEccEn: %d]\n", + nid, value, !!(value & NBCFG_ECC_ENABLE)); if (!(value & NBCFG_ECC_ENABLE)) { amd64_warn("DRAM ECC disabled on this node, enabling...\n"); @@ -2357,13 +2439,13 @@ static bool enable_ecc_error_reporting(struct ecc_settings *s, u8 nid, s->flags.nb_ecc_prev = 1; } - debugf0("2: node %d, NBCFG=0x%08x[DramEccEn: %d]\n", - nid, value, !!(value & NBCFG_ECC_ENABLE)); + edac_dbg(0, "2: node %d, NBCFG=0x%08x[DramEccEn: %d]\n", + nid, value, !!(value & NBCFG_ECC_ENABLE)); return ret; } -static void restore_ecc_error_reporting(struct ecc_settings *s, u8 nid, +static void restore_ecc_error_reporting(struct ecc_settings *s, u16 nid, struct pci_dev *F3) { u32 value, mask = 0x3; /* UECC/CECC enable */ @@ -2402,7 +2484,7 @@ static const char *ecc_msg = "'ecc_enable_override'.\n" " (Note that use of the override may cause unknown side effects.)\n"; -static bool ecc_enabled(struct pci_dev *F3, u8 nid) +static bool ecc_enabled(struct pci_dev *F3, u16 nid) { u32 value; u8 ecc_en = 0; @@ -2413,7 +2495,7 @@ static bool ecc_enabled(struct pci_dev *F3, u8 nid) ecc_en = !!(value & NBCFG_ECC_ENABLE); amd64_info("DRAM ECC %s.\n", (ecc_en ? "enabled" : "disabled")); - nb_mce_en = amd64_nb_mce_bank_enabled_on_node(nid); + nb_mce_en = nb_mce_bank_enabled_on_node(nid); if (!nb_mce_en) amd64_notice("NB MCE bank disabled, set MSR " "0x%08x[4] on node %d to enable.\n", @@ -2426,26 +2508,32 @@ static bool ecc_enabled(struct pci_dev *F3, u8 nid) return true; } -struct mcidev_sysfs_attribute sysfs_attrs[ARRAY_SIZE(amd64_dbg_attrs) + - ARRAY_SIZE(amd64_inj_attrs) + - 1]; +static int set_mc_sysfs_attrs(struct mem_ctl_info *mci) +{ + struct amd64_pvt *pvt = mci->pvt_info; + int rc; -struct mcidev_sysfs_attribute terminator = { .attr = { .name = NULL } }; + rc = amd64_create_sysfs_dbg_files(mci); + if (rc < 0) + return rc; -static void set_mc_sysfs_attrs(struct mem_ctl_info *mci) -{ - unsigned int i = 0, j = 0; + if (pvt->fam >= 0x10) { + rc = amd64_create_sysfs_inject_files(mci); + if (rc < 0) + return rc; + } - for (; i < ARRAY_SIZE(amd64_dbg_attrs); i++) - sysfs_attrs[i] = amd64_dbg_attrs[i]; + return 0; +} - if (boot_cpu_data.x86 >= 0x10) - for (j = 0; j < ARRAY_SIZE(amd64_inj_attrs); j++, i++) - sysfs_attrs[i] = amd64_inj_attrs[j]; +static void del_mc_sysfs_attrs(struct mem_ctl_info *mci) +{ + struct amd64_pvt *pvt = mci->pvt_info; - sysfs_attrs[i] = terminator; + amd64_remove_sysfs_dbg_files(mci); - mci->mc_driver_sysfs_attributes = sysfs_attrs; + if (pvt->fam >= 0x10) + amd64_remove_sysfs_inject_files(mci); } static void setup_mci_misc_attrs(struct mem_ctl_info *mci, @@ -2462,7 +2550,7 @@ static void setup_mci_misc_attrs(struct mem_ctl_info *mci, if (pvt->nbcap & NBCAP_CHIPKILL) mci->edac_ctl_cap |= EDAC_FLAG_S4ECD4ED; - mci->edac_cap = amd64_determine_edac_cap(pvt); + mci->edac_cap = determine_edac_cap(pvt); mci->mod_name = EDAC_MOD_STR; mci->mod_ver = EDAC_AMD64_VERSION; mci->ctl_name = fam->ctl_name; @@ -2470,32 +2558,52 @@ static void setup_mci_misc_attrs(struct mem_ctl_info *mci, mci->ctl_page_to_phys = NULL; /* memory scrubber interface */ - mci->set_sdram_scrub_rate = amd64_set_scrub_rate; - mci->get_sdram_scrub_rate = amd64_get_scrub_rate; + mci->set_sdram_scrub_rate = set_scrub_rate; + mci->get_sdram_scrub_rate = get_scrub_rate; } /* * returns a pointer to the family descriptor on success, NULL otherwise. */ -static struct amd64_family_type *amd64_per_family_init(struct amd64_pvt *pvt) +static struct amd64_family_type *per_family_init(struct amd64_pvt *pvt) { - u8 fam = boot_cpu_data.x86; struct amd64_family_type *fam_type = NULL; - switch (fam) { + pvt->ext_model = boot_cpu_data.x86_model >> 4; + pvt->stepping = boot_cpu_data.x86_mask; + pvt->model = boot_cpu_data.x86_model; + pvt->fam = boot_cpu_data.x86; + + switch (pvt->fam) { case 0xf: - fam_type = &amd64_family_types[K8_CPUS]; - pvt->ops = &amd64_family_types[K8_CPUS].ops; + fam_type = &family_types[K8_CPUS]; + pvt->ops = &family_types[K8_CPUS].ops; break; case 0x10: - fam_type = &amd64_family_types[F10_CPUS]; - pvt->ops = &amd64_family_types[F10_CPUS].ops; + fam_type = &family_types[F10_CPUS]; + pvt->ops = &family_types[F10_CPUS].ops; break; case 0x15: - fam_type = &amd64_family_types[F15_CPUS]; - pvt->ops = &amd64_family_types[F15_CPUS].ops; + if (pvt->model == 0x30) { + fam_type = &family_types[F15_M30H_CPUS]; + pvt->ops = &family_types[F15_M30H_CPUS].ops; + break; + } + + fam_type = &family_types[F15_CPUS]; + pvt->ops = &family_types[F15_CPUS].ops; + break; + + case 0x16: + if (pvt->model == 0x30) { + fam_type = &family_types[F16_M30H_CPUS]; + pvt->ops = &family_types[F16_M30H_CPUS].ops; + break; + } + fam_type = &family_types[F16_CPUS]; + pvt->ops = &family_types[F16_CPUS].ops; break; default: @@ -2503,23 +2611,22 @@ static struct amd64_family_type *amd64_per_family_init(struct amd64_pvt *pvt) return NULL; } - pvt->ext_model = boot_cpu_data.x86_model >> 4; - amd64_info("%s %sdetected (node %d).\n", fam_type->ctl_name, - (fam == 0xf ? + (pvt->fam == 0xf ? (pvt->ext_model >= K8_REV_F ? "revF or later " : "revE or earlier ") : ""), pvt->mc_node_id); return fam_type; } -static int amd64_init_one_instance(struct pci_dev *F2) +static int init_one_instance(struct pci_dev *F2) { struct amd64_pvt *pvt = NULL; struct amd64_family_type *fam_type = NULL; struct mem_ctl_info *mci = NULL; + struct edac_mc_layer layers[2]; int err = 0, ret; - u8 nid = get_node_id(F2); + u16 nid = amd_get_node_id(F2); ret = -ENOMEM; pvt = kzalloc(sizeof(struct amd64_pvt), GFP_KERNEL); @@ -2530,7 +2637,7 @@ static int amd64_init_one_instance(struct pci_dev *F2) pvt->F2 = F2; ret = -EINVAL; - fam_type = amd64_per_family_init(pvt); + fam_type = per_family_init(pvt); if (!fam_type) goto err_free; @@ -2552,31 +2659,46 @@ static int amd64_init_one_instance(struct pci_dev *F2) goto err_siblings; ret = -ENOMEM; - mci = edac_mc_alloc(0, pvt->csels[0].b_cnt, pvt->channel_count, nid); + layers[0].type = EDAC_MC_LAYER_CHIP_SELECT; + layers[0].size = pvt->csels[0].b_cnt; + layers[0].is_virt_csrow = true; + layers[1].type = EDAC_MC_LAYER_CHANNEL; + + /* + * Always allocate two channels since we can have setups with DIMMs on + * only one channel. Also, this simplifies handling later for the price + * of a couple of KBs tops. + */ + layers[1].size = 2; + layers[1].is_virt_csrow = false; + + mci = edac_mc_alloc(nid, ARRAY_SIZE(layers), layers, 0); if (!mci) goto err_siblings; mci->pvt_info = pvt; - mci->dev = &pvt->F2->dev; + mci->pdev = &pvt->F2->dev; setup_mci_misc_attrs(mci, fam_type); if (init_csrows(mci)) mci->edac_cap = EDAC_FLAG_NONE; - set_mc_sysfs_attrs(mci); - ret = -ENODEV; if (edac_mc_add_mc(mci)) { - debugf1("failed edac_mc_add_mc()\n"); + edac_dbg(1, "failed edac_mc_add_mc()\n"); goto err_add_mc; } + if (set_mc_sysfs_attrs(mci)) { + edac_dbg(1, "failed edac_mc_add_mc()\n"); + goto err_add_sysfs; + } /* register stuff with EDAC MCE */ if (report_gart_errors) amd_report_gart_errors(true); - amd_register_ecc_decoder(amd64_decode_bus_error); + amd_register_ecc_decoder(decode_bus_error); mcis[nid] = mci; @@ -2584,6 +2706,8 @@ static int amd64_init_one_instance(struct pci_dev *F2) return 0; +err_add_sysfs: + edac_mc_del_mc(mci->pdev); err_add_mc: edac_mc_free(mci); @@ -2597,17 +2721,17 @@ err_ret: return ret; } -static int __devinit amd64_probe_one_instance(struct pci_dev *pdev, - const struct pci_device_id *mc_type) +static int probe_one_instance(struct pci_dev *pdev, + const struct pci_device_id *mc_type) { - u8 nid = get_node_id(pdev); + u16 nid = amd_get_node_id(pdev); struct pci_dev *F3 = node_to_amd_nb(nid)->misc; struct ecc_settings *s; int ret = 0; ret = pci_enable_device(pdev); if (ret < 0) { - debugf0("ret=%d\n", ret); + edac_dbg(0, "ret=%d\n", ret); return -EIO; } @@ -2630,7 +2754,7 @@ static int __devinit amd64_probe_one_instance(struct pci_dev *pdev, goto err_enable; } - ret = amd64_init_one_instance(pdev); + ret = init_one_instance(pdev); if (ret < 0) { amd64_err("Error probing instance: %d\n", nid); restore_ecc_error_reporting(s, nid, F3); @@ -2646,14 +2770,18 @@ err_out: return ret; } -static void __devexit amd64_remove_one_instance(struct pci_dev *pdev) +static void remove_one_instance(struct pci_dev *pdev) { struct mem_ctl_info *mci; struct amd64_pvt *pvt; - u8 nid = get_node_id(pdev); + u16 nid = amd_get_node_id(pdev); struct pci_dev *F3 = node_to_amd_nb(nid)->misc; struct ecc_settings *s = ecc_stngs[nid]; + mci = find_mci_by_dev(&pdev->dev); + WARN_ON(!mci); + + del_mc_sysfs_attrs(mci); /* Remove from EDAC CORE tracking list */ mci = edac_mc_del_mc(&pdev->dev); if (!mci) @@ -2667,7 +2795,7 @@ static void __devexit amd64_remove_one_instance(struct pci_dev *pdev) /* unregister from EDAC MCE */ amd_report_gart_errors(false); - amd_unregister_ecc_decoder(amd64_decode_bus_error); + amd_unregister_ecc_decoder(decode_bus_error); kfree(ecc_stngs[nid]); ecc_stngs[nid] = NULL; @@ -2685,7 +2813,7 @@ static void __devexit amd64_remove_one_instance(struct pci_dev *pdev) * PCI core identifies what devices are on a system during boot, and then * inquiry this table to see if this driver is for a given device found. */ -static const struct pci_device_id amd64_pci_table[] __devinitdata = { +static const struct pci_device_id amd64_pci_table[] = { { .vendor = PCI_VENDOR_ID_AMD, .device = PCI_DEVICE_ID_AMD_K8_NB_MEMCTL, @@ -2710,6 +2838,30 @@ static const struct pci_device_id amd64_pci_table[] __devinitdata = { .class = 0, .class_mask = 0, }, + { + .vendor = PCI_VENDOR_ID_AMD, + .device = PCI_DEVICE_ID_AMD_15H_M30H_NB_F2, + .subvendor = PCI_ANY_ID, + .subdevice = PCI_ANY_ID, + .class = 0, + .class_mask = 0, + }, + { + .vendor = PCI_VENDOR_ID_AMD, + .device = PCI_DEVICE_ID_AMD_16H_NB_F2, + .subvendor = PCI_ANY_ID, + .subdevice = PCI_ANY_ID, + .class = 0, + .class_mask = 0, + }, + { + .vendor = PCI_VENDOR_ID_AMD, + .device = PCI_DEVICE_ID_AMD_16H_M30H_NB_F2, + .subvendor = PCI_ANY_ID, + .subdevice = PCI_ANY_ID, + .class = 0, + .class_mask = 0, + }, {0, } }; @@ -2717,8 +2869,8 @@ MODULE_DEVICE_TABLE(pci, amd64_pci_table); static struct pci_driver amd64_pci_driver = { .name = EDAC_MOD_STR, - .probe = amd64_probe_one_instance, - .remove = __devexit_p(amd64_remove_one_instance), + .probe = probe_one_instance, + .remove = remove_one_instance, .id_table = amd64_pci_table, }; @@ -2727,23 +2879,18 @@ static void setup_pci_device(void) struct mem_ctl_info *mci; struct amd64_pvt *pvt; - if (amd64_ctl_pci) + if (pci_ctl) return; mci = mcis[0]; - if (mci) { - - pvt = mci->pvt_info; - amd64_ctl_pci = - edac_pci_create_generic_ctl(&pvt->F2->dev, EDAC_MOD_STR); - - if (!amd64_ctl_pci) { - pr_warning("%s(): Unable to create PCI control\n", - __func__); + if (!mci) + return; - pr_warning("%s(): PCI error report via EDAC not set\n", - __func__); - } + pvt = mci->pvt_info; + pci_ctl = edac_pci_create_generic_ctl(&pvt->F2->dev, EDAC_MOD_STR); + if (!pci_ctl) { + pr_warn("%s(): Unable to create PCI control\n", __func__); + pr_warn("%s(): PCI error report via EDAC not set\n", __func__); } } @@ -2799,8 +2946,8 @@ err_ret: static void __exit amd64_edac_exit(void) { - if (amd64_ctl_pci) - edac_pci_release_generic_ctl(amd64_ctl_pci); + if (pci_ctl) + edac_pci_release_generic_ctl(pci_ctl); pci_unregister_driver(&amd64_pci_driver); diff --git a/drivers/edac/amd64_edac.h b/drivers/edac/amd64_edac.h index 9a666cb985b..d903e0c2114 100644 --- a/drivers/edac/amd64_edac.h +++ b/drivers/edac/amd64_edac.h @@ -33,7 +33,7 @@ * detection. The mods to Rev F required more family * information detection. * - * Changes/Fixes by Borislav Petkov <borislav.petkov@amd.com>: + * Changes/Fixes by Borislav Petkov <bp@alien8.de>: * - misc fixes and code cleanups * * This module is based on the following documents @@ -160,19 +160,16 @@ #define OFF false /* - * Create a contiguous bitmask starting at bit position @lo and ending at - * position @hi. For example - * - * GENMASK(21, 39) gives us the 64bit vector 0x000000ffffe00000. - */ -#define GENMASK(lo, hi) (((1ULL << ((hi) - (lo) + 1)) - 1) << (lo)) - -/* * PCI-defined configuration space registers */ +#define PCI_DEVICE_ID_AMD_15H_M30H_NB_F1 0x141b +#define PCI_DEVICE_ID_AMD_15H_M30H_NB_F2 0x141c #define PCI_DEVICE_ID_AMD_15H_NB_F1 0x1601 #define PCI_DEVICE_ID_AMD_15H_NB_F2 0x1602 - +#define PCI_DEVICE_ID_AMD_16H_NB_F1 0x1531 +#define PCI_DEVICE_ID_AMD_16H_NB_F2 0x1532 +#define PCI_DEVICE_ID_AMD_16H_M30H_NB_F1 0x1581 +#define PCI_DEVICE_ID_AMD_16H_M30H_NB_F2 0x1582 /* * Function 1 - Address Map @@ -180,13 +177,22 @@ #define DRAM_BASE_LO 0x40 #define DRAM_LIMIT_LO 0x44 -#define dram_intlv_en(pvt, i) ((u8)((pvt->ranges[i].base.lo >> 8) & 0x7)) +/* + * F15 M30h D18F1x2[1C:00] + */ +#define DRAM_CONT_BASE 0x200 +#define DRAM_CONT_LIMIT 0x204 + +/* + * F15 M30h D18F1x2[4C:40] + */ +#define DRAM_CONT_HIGH_OFF 0x240 + #define dram_rw(pvt, i) ((u8)(pvt->ranges[i].base.lo & 0x3)) #define dram_intlv_sel(pvt, i) ((u8)((pvt->ranges[i].lim.lo >> 8) & 0x7)) #define dram_dst_node(pvt, i) ((u8)(pvt->ranges[i].lim.lo & 0x7)) #define DHAR 0xf0 -#define dhar_valid(pvt) ((pvt)->dhar & BIT(0)) #define dhar_mem_hoist_valid(pvt) ((pvt)->dhar & BIT(1)) #define dhar_base(pvt) ((pvt)->dhar & 0xff000000) #define k8_dhar_offset(pvt) (((pvt)->dhar & 0x0000ff00) << 16) @@ -219,7 +225,7 @@ #define DBAM1 0x180 /* Extract the DIMM 'type' on the i'th DIMM from the DBAM reg value passed */ -#define DBAM_DIMM(i, reg) ((((reg) >> (4*i))) & 0xF) +#define DBAM_DIMM(i, reg) ((((reg) >> (4*(i)))) & 0xF) #define DBAM_MAX_VALUE 11 @@ -233,8 +239,6 @@ #define DDR3_MODE BIT(8) #define DCT_SEL_LO 0x110 -#define dct_sel_baseaddr(pvt) ((pvt)->dct_sel_lo & 0xFFFFF800) -#define dct_sel_interleave_addr(pvt) (((pvt)->dct_sel_lo >> 6) & 0x3) #define dct_high_range_enabled(pvt) ((pvt)->dct_sel_lo & BIT(0)) #define dct_interleave_enabled(pvt) ((pvt)->dct_sel_lo & BIT(2)) @@ -267,18 +271,20 @@ #define online_spare_bad_dramcs(pvt, c) (((pvt)->online_spare >> (4 + 4 * (c))) & 0x7) #define F10_NB_ARRAY_ADDR 0xB8 -#define F10_NB_ARRAY_DRAM_ECC BIT(31) +#define F10_NB_ARRAY_DRAM BIT(31) /* Bits [2:1] are used to select 16-byte section within a 64-byte cacheline */ -#define SET_NB_ARRAY_ADDRESS(section) (((section) & 0x3) << 1) +#define SET_NB_ARRAY_ADDR(section) (((section) & 0x3) << 1) #define F10_NB_ARRAY_DATA 0xBC -#define SET_NB_DRAM_INJECTION_WRITE(word, bits) \ - (BIT(((word) & 0xF) + 20) | \ - BIT(17) | bits) -#define SET_NB_DRAM_INJECTION_READ(word, bits) \ - (BIT(((word) & 0xF) + 20) | \ - BIT(16) | bits) +#define F10_NB_ARR_ECC_WR_REQ BIT(17) +#define SET_NB_DRAM_INJECTION_WRITE(inj) \ + (BIT(((inj.word) & 0xF) + 20) | \ + F10_NB_ARR_ECC_WR_REQ | inj.bit_map) +#define SET_NB_DRAM_INJECTION_READ(inj) \ + (BIT(((inj.word) & 0xF) + 20) | \ + BIT(16) | inj.bit_map) + #define NBCAP 0xE8 #define NBCAP_CHIPKILL BIT(4) @@ -290,24 +296,21 @@ /* MSRs */ #define MSR_MCGCTL_NBE BIT(4) -/* AMD sets the first MC device at device ID 0x18. */ -static inline u8 get_node_id(struct pci_dev *pdev) -{ - return PCI_SLOT(pdev->devfn) - 0x18; -} - enum amd_families { K8_CPUS = 0, F10_CPUS, F15_CPUS, + F15_M30H_CPUS, + F16_CPUS, + F16_M30H_CPUS, NUM_FAMILIES, }; /* Error injection control structure */ struct error_injection { - u32 section; - u32 word; - u32 bit_map; + u32 section; + u32 word; + u32 bit_map; }; /* low and high part of PCI config space regs */ @@ -338,7 +341,11 @@ struct amd64_pvt { /* pci_device handles which we utilize */ struct pci_dev *F1, *F2, *F3; - unsigned mc_node_id; /* MC index of this MC node */ + u16 mc_node_id; /* MC index of this MC node */ + u8 fam; /* CPU family */ + u8 model; /* ... model */ + u8 stepping; /* ... stepping */ + int ext_model; /* extended model value of this node */ int channel_count; @@ -374,7 +381,24 @@ struct amd64_pvt { struct error_injection injection; }; -static inline u64 get_dram_base(struct amd64_pvt *pvt, unsigned i) +enum err_codes { + DECODE_OK = 0, + ERR_NODE = -1, + ERR_CSROW = -2, + ERR_CHANNEL = -3, +}; + +struct err_info { + int err_code; + struct mem_ctl_info *src_mci; + int csrow; + int channel; + u16 syndrome; + u32 page; + u32 offset; +}; + +static inline u64 get_dram_base(struct amd64_pvt *pvt, u8 i) { u64 addr = ((u64)pvt->ranges[i].base.lo & 0xffff0000) << 8; @@ -384,7 +408,7 @@ static inline u64 get_dram_base(struct amd64_pvt *pvt, unsigned i) return (((u64)pvt->ranges[i].base.hi & 0x000000ff) << 40) | addr; } -static inline u64 get_dram_limit(struct amd64_pvt *pvt, unsigned i) +static inline u64 get_dram_limit(struct amd64_pvt *pvt, u8 i) { u64 lim = (((u64)pvt->ranges[i].lim.lo & 0xffff0000) << 8) | 0x00ffffff; @@ -399,6 +423,14 @@ static inline u16 extract_syndrome(u64 status) return ((status >> 47) & 0xff) | ((status >> 16) & 0xff00); } +static inline u8 dct_sel_interleave_addr(struct amd64_pvt *pvt) +{ + if (pvt->fam == 0x15 && pvt->model >= 0x30) + return (((pvt->dct_sel_hi >> 9) & 0x1) << 2) | + ((pvt->dct_sel_lo >> 6) & 0x3); + + return ((pvt)->dct_sel_lo >> 6) & 0x3; +} /* * per-node ECC settings descriptor */ @@ -413,20 +445,33 @@ struct ecc_settings { }; #ifdef CONFIG_EDAC_DEBUG -#define NUM_DBG_ATTRS 5 +int amd64_create_sysfs_dbg_files(struct mem_ctl_info *mci); +void amd64_remove_sysfs_dbg_files(struct mem_ctl_info *mci); + #else -#define NUM_DBG_ATTRS 0 +static inline int amd64_create_sysfs_dbg_files(struct mem_ctl_info *mci) +{ + return 0; +} +static void inline amd64_remove_sysfs_dbg_files(struct mem_ctl_info *mci) +{ +} #endif #ifdef CONFIG_EDAC_AMD64_ERROR_INJECTION -#define NUM_INJ_ATTRS 5 +int amd64_create_sysfs_inject_files(struct mem_ctl_info *mci); +void amd64_remove_sysfs_inject_files(struct mem_ctl_info *mci); + #else -#define NUM_INJ_ATTRS 0 +static inline int amd64_create_sysfs_inject_files(struct mem_ctl_info *mci) +{ + return 0; +} +static inline void amd64_remove_sysfs_inject_files(struct mem_ctl_info *mci) +{ +} #endif -extern struct mcidev_sysfs_attribute amd64_dbg_attrs[NUM_DBG_ATTRS], - amd64_inj_attrs[NUM_INJ_ATTRS]; - /* * Each of the PCI Device IDs types have their own set of hardware accessor * functions and per device encoding/decoding logic. @@ -434,7 +479,7 @@ extern struct mcidev_sysfs_attribute amd64_dbg_attrs[NUM_DBG_ATTRS], struct low_ops { int (*early_channel_count) (struct amd64_pvt *pvt); void (*map_sysaddr_to_csrow) (struct mem_ctl_info *mci, u64 sys_addr, - u16 syndrome); + struct err_info *); int (*dbam_to_cs) (struct amd64_pvt *pvt, u8 dct, unsigned cs_mode); int (*read_dct_pci_cfg) (struct amd64_pvt *pvt, int offset, u32 *val, const char *func); @@ -446,6 +491,8 @@ struct amd64_family_type { struct low_ops ops; }; +int __amd64_read_pci_cfg_dword(struct pci_dev *pdev, int offset, + u32 *val, const char *func); int __amd64_write_pci_cfg_dword(struct pci_dev *pdev, int offset, u32 val, const char *func); @@ -460,3 +507,47 @@ int __amd64_write_pci_cfg_dword(struct pci_dev *pdev, int offset, int amd64_get_dram_hole_info(struct mem_ctl_info *mci, u64 *hole_base, u64 *hole_offset, u64 *hole_size); + +#define to_mci(k) container_of(k, struct mem_ctl_info, dev) + +/* Injection helpers */ +static inline void disable_caches(void *dummy) +{ + write_cr0(read_cr0() | X86_CR0_CD); + wbinvd(); +} + +static inline void enable_caches(void *dummy) +{ + write_cr0(read_cr0() & ~X86_CR0_CD); +} + +static inline u8 dram_intlv_en(struct amd64_pvt *pvt, unsigned int i) +{ + if (pvt->fam == 0x15 && pvt->model >= 0x30) { + u32 tmp; + amd64_read_pci_cfg(pvt->F1, DRAM_CONT_LIMIT, &tmp); + return (u8) tmp & 0xF; + } + return (u8) (pvt->ranges[i].base.lo >> 8) & 0x7; +} + +static inline u8 dhar_valid(struct amd64_pvt *pvt) +{ + if (pvt->fam == 0x15 && pvt->model >= 0x30) { + u32 tmp; + amd64_read_pci_cfg(pvt->F1, DRAM_CONT_BASE, &tmp); + return (tmp >> 1) & BIT(0); + } + return (pvt)->dhar & BIT(0); +} + +static inline u32 dct_sel_baseaddr(struct amd64_pvt *pvt) +{ + if (pvt->fam == 0x15 && pvt->model >= 0x30) { + u32 tmp; + amd64_read_pci_cfg(pvt->F1, DRAM_CONT_BASE, &tmp); + return (tmp >> 11) & 0x1FFF; + } + return (pvt)->dct_sel_lo & 0xFFFFF800; +} diff --git a/drivers/edac/amd64_edac_dbg.c b/drivers/edac/amd64_edac_dbg.c index e3562288f4c..2c1bbf74060 100644 --- a/drivers/edac/amd64_edac_dbg.c +++ b/drivers/edac/amd64_edac_dbg.c @@ -1,8 +1,11 @@ #include "amd64_edac.h" #define EDAC_DCT_ATTR_SHOW(reg) \ -static ssize_t amd64_##reg##_show(struct mem_ctl_info *mci, char *data) \ +static ssize_t amd64_##reg##_show(struct device *dev, \ + struct device_attribute *mattr, \ + char *data) \ { \ + struct mem_ctl_info *mci = to_mci(dev); \ struct amd64_pvt *pvt = mci->pvt_info; \ return sprintf(data, "0x%016llx\n", (u64)pvt->reg); \ } @@ -12,8 +15,12 @@ EDAC_DCT_ATTR_SHOW(dbam0); EDAC_DCT_ATTR_SHOW(top_mem); EDAC_DCT_ATTR_SHOW(top_mem2); -static ssize_t amd64_hole_show(struct mem_ctl_info *mci, char *data) +static ssize_t amd64_hole_show(struct device *dev, + struct device_attribute *mattr, + char *data) { + struct mem_ctl_info *mci = to_mci(dev); + u64 hole_base = 0; u64 hole_offset = 0; u64 hole_size = 0; @@ -27,46 +34,40 @@ static ssize_t amd64_hole_show(struct mem_ctl_info *mci, char *data) /* * update NUM_DBG_ATTRS in case you add new members */ -struct mcidev_sysfs_attribute amd64_dbg_attrs[] = { +static DEVICE_ATTR(dhar, S_IRUGO, amd64_dhar_show, NULL); +static DEVICE_ATTR(dbam, S_IRUGO, amd64_dbam0_show, NULL); +static DEVICE_ATTR(topmem, S_IRUGO, amd64_top_mem_show, NULL); +static DEVICE_ATTR(topmem2, S_IRUGO, amd64_top_mem2_show, NULL); +static DEVICE_ATTR(dram_hole, S_IRUGO, amd64_hole_show, NULL); + +int amd64_create_sysfs_dbg_files(struct mem_ctl_info *mci) +{ + int rc; + + rc = device_create_file(&mci->dev, &dev_attr_dhar); + if (rc < 0) + return rc; + rc = device_create_file(&mci->dev, &dev_attr_dbam); + if (rc < 0) + return rc; + rc = device_create_file(&mci->dev, &dev_attr_topmem); + if (rc < 0) + return rc; + rc = device_create_file(&mci->dev, &dev_attr_topmem2); + if (rc < 0) + return rc; + rc = device_create_file(&mci->dev, &dev_attr_dram_hole); + if (rc < 0) + return rc; - { - .attr = { - .name = "dhar", - .mode = (S_IRUGO) - }, - .show = amd64_dhar_show, - .store = NULL, - }, - { - .attr = { - .name = "dbam", - .mode = (S_IRUGO) - }, - .show = amd64_dbam0_show, - .store = NULL, - }, - { - .attr = { - .name = "topmem", - .mode = (S_IRUGO) - }, - .show = amd64_top_mem_show, - .store = NULL, - }, - { - .attr = { - .name = "topmem2", - .mode = (S_IRUGO) - }, - .show = amd64_top_mem2_show, - .store = NULL, - }, - { - .attr = { - .name = "dram_hole", - .mode = (S_IRUGO) - }, - .show = amd64_hole_show, - .store = NULL, - }, -}; + return 0; +} + +void amd64_remove_sysfs_dbg_files(struct mem_ctl_info *mci) +{ + device_remove_file(&mci->dev, &dev_attr_dhar); + device_remove_file(&mci->dev, &dev_attr_dbam); + device_remove_file(&mci->dev, &dev_attr_topmem); + device_remove_file(&mci->dev, &dev_attr_topmem2); + device_remove_file(&mci->dev, &dev_attr_dram_hole); +} diff --git a/drivers/edac/amd64_edac_inj.c b/drivers/edac/amd64_edac_inj.c index 303f10e03dd..0d66ae68d46 100644 --- a/drivers/edac/amd64_edac_inj.c +++ b/drivers/edac/amd64_edac_inj.c @@ -1,7 +1,10 @@ #include "amd64_edac.h" -static ssize_t amd64_inject_section_show(struct mem_ctl_info *mci, char *buf) +static ssize_t amd64_inject_section_show(struct device *dev, + struct device_attribute *mattr, + char *buf) { + struct mem_ctl_info *mci = to_mci(dev); struct amd64_pvt *pvt = mci->pvt_info; return sprintf(buf, "0x%x\n", pvt->injection.section); } @@ -12,29 +15,33 @@ static ssize_t amd64_inject_section_show(struct mem_ctl_info *mci, char *buf) * * range: 0..3 */ -static ssize_t amd64_inject_section_store(struct mem_ctl_info *mci, +static ssize_t amd64_inject_section_store(struct device *dev, + struct device_attribute *mattr, const char *data, size_t count) { + struct mem_ctl_info *mci = to_mci(dev); struct amd64_pvt *pvt = mci->pvt_info; unsigned long value; - int ret = 0; + int ret; - ret = strict_strtoul(data, 10, &value); - if (ret != -EINVAL) { + ret = kstrtoul(data, 10, &value); + if (ret < 0) + return ret; - if (value > 3) { - amd64_warn("%s: invalid section 0x%lx\n", __func__, value); - return -EINVAL; - } - - pvt->injection.section = (u32) value; - return count; + if (value > 3) { + amd64_warn("%s: invalid section 0x%lx\n", __func__, value); + return -EINVAL; } - return ret; + + pvt->injection.section = (u32) value; + return count; } -static ssize_t amd64_inject_word_show(struct mem_ctl_info *mci, char *buf) +static ssize_t amd64_inject_word_show(struct device *dev, + struct device_attribute *mattr, + char *buf) { + struct mem_ctl_info *mci = to_mci(dev); struct amd64_pvt *pvt = mci->pvt_info; return sprintf(buf, "0x%x\n", pvt->injection.word); } @@ -45,29 +52,33 @@ static ssize_t amd64_inject_word_show(struct mem_ctl_info *mci, char *buf) * * range: 0..8 */ -static ssize_t amd64_inject_word_store(struct mem_ctl_info *mci, - const char *data, size_t count) +static ssize_t amd64_inject_word_store(struct device *dev, + struct device_attribute *mattr, + const char *data, size_t count) { + struct mem_ctl_info *mci = to_mci(dev); struct amd64_pvt *pvt = mci->pvt_info; unsigned long value; - int ret = 0; + int ret; - ret = strict_strtoul(data, 10, &value); - if (ret != -EINVAL) { + ret = kstrtoul(data, 10, &value); + if (ret < 0) + return ret; - if (value > 8) { - amd64_warn("%s: invalid word 0x%lx\n", __func__, value); - return -EINVAL; - } - - pvt->injection.word = (u32) value; - return count; + if (value > 8) { + amd64_warn("%s: invalid word 0x%lx\n", __func__, value); + return -EINVAL; } - return ret; + + pvt->injection.word = (u32) value; + return count; } -static ssize_t amd64_inject_ecc_vector_show(struct mem_ctl_info *mci, char *buf) +static ssize_t amd64_inject_ecc_vector_show(struct device *dev, + struct device_attribute *mattr, + char *buf) { + struct mem_ctl_info *mci = to_mci(dev); struct amd64_pvt *pvt = mci->pvt_info; return sprintf(buf, "0x%x\n", pvt->injection.bit_map); } @@ -77,137 +88,154 @@ static ssize_t amd64_inject_ecc_vector_show(struct mem_ctl_info *mci, char *buf) * corresponding bit within the error injection word above. When used during a * DRAM ECC read, it holds the contents of the of the DRAM ECC bits. */ -static ssize_t amd64_inject_ecc_vector_store(struct mem_ctl_info *mci, - const char *data, size_t count) +static ssize_t amd64_inject_ecc_vector_store(struct device *dev, + struct device_attribute *mattr, + const char *data, size_t count) { + struct mem_ctl_info *mci = to_mci(dev); struct amd64_pvt *pvt = mci->pvt_info; unsigned long value; - int ret = 0; - - ret = strict_strtoul(data, 16, &value); - if (ret != -EINVAL) { + int ret; - if (value & 0xFFFF0000) { - amd64_warn("%s: invalid EccVector: 0x%lx\n", - __func__, value); - return -EINVAL; - } + ret = kstrtoul(data, 16, &value); + if (ret < 0) + return ret; - pvt->injection.bit_map = (u32) value; - return count; + if (value & 0xFFFF0000) { + amd64_warn("%s: invalid EccVector: 0x%lx\n", __func__, value); + return -EINVAL; } - return ret; + + pvt->injection.bit_map = (u32) value; + return count; } /* * Do a DRAM ECC read. Assemble staged values in the pvt area, format into * fields needed by the injection registers and read the NB Array Data Port. */ -static ssize_t amd64_inject_read_store(struct mem_ctl_info *mci, - const char *data, size_t count) +static ssize_t amd64_inject_read_store(struct device *dev, + struct device_attribute *mattr, + const char *data, size_t count) { + struct mem_ctl_info *mci = to_mci(dev); struct amd64_pvt *pvt = mci->pvt_info; unsigned long value; u32 section, word_bits; - int ret = 0; + int ret; - ret = strict_strtoul(data, 10, &value); - if (ret != -EINVAL) { + ret = kstrtoul(data, 10, &value); + if (ret < 0) + return ret; - /* Form value to choose 16-byte section of cacheline */ - section = F10_NB_ARRAY_DRAM_ECC | - SET_NB_ARRAY_ADDRESS(pvt->injection.section); - amd64_write_pci_cfg(pvt->F3, F10_NB_ARRAY_ADDR, section); + /* Form value to choose 16-byte section of cacheline */ + section = F10_NB_ARRAY_DRAM | SET_NB_ARRAY_ADDR(pvt->injection.section); - word_bits = SET_NB_DRAM_INJECTION_READ(pvt->injection.word, - pvt->injection.bit_map); + amd64_write_pci_cfg(pvt->F3, F10_NB_ARRAY_ADDR, section); - /* Issue 'word' and 'bit' along with the READ request */ - amd64_write_pci_cfg(pvt->F3, F10_NB_ARRAY_DATA, word_bits); + word_bits = SET_NB_DRAM_INJECTION_READ(pvt->injection); - debugf0("section=0x%x word_bits=0x%x\n", section, word_bits); + /* Issue 'word' and 'bit' along with the READ request */ + amd64_write_pci_cfg(pvt->F3, F10_NB_ARRAY_DATA, word_bits); - return count; - } - return ret; + edac_dbg(0, "section=0x%x word_bits=0x%x\n", section, word_bits); + + return count; } /* * Do a DRAM ECC write. Assemble staged values in the pvt area and format into * fields needed by the injection registers. */ -static ssize_t amd64_inject_write_store(struct mem_ctl_info *mci, +static ssize_t amd64_inject_write_store(struct device *dev, + struct device_attribute *mattr, const char *data, size_t count) { + struct mem_ctl_info *mci = to_mci(dev); struct amd64_pvt *pvt = mci->pvt_info; + u32 section, word_bits, tmp; unsigned long value; - u32 section, word_bits; - int ret = 0; + int ret; + + ret = kstrtoul(data, 10, &value); + if (ret < 0) + return ret; - ret = strict_strtoul(data, 10, &value); - if (ret != -EINVAL) { + /* Form value to choose 16-byte section of cacheline */ + section = F10_NB_ARRAY_DRAM | SET_NB_ARRAY_ADDR(pvt->injection.section); - /* Form value to choose 16-byte section of cacheline */ - section = F10_NB_ARRAY_DRAM_ECC | - SET_NB_ARRAY_ADDRESS(pvt->injection.section); - amd64_write_pci_cfg(pvt->F3, F10_NB_ARRAY_ADDR, section); + amd64_write_pci_cfg(pvt->F3, F10_NB_ARRAY_ADDR, section); - word_bits = SET_NB_DRAM_INJECTION_WRITE(pvt->injection.word, - pvt->injection.bit_map); + word_bits = SET_NB_DRAM_INJECTION_WRITE(pvt->injection); - /* Issue 'word' and 'bit' along with the READ request */ - amd64_write_pci_cfg(pvt->F3, F10_NB_ARRAY_DATA, word_bits); + pr_notice_once("Don't forget to decrease MCE polling interval in\n" + "/sys/bus/machinecheck/devices/machinecheck<CPUNUM>/check_interval\n" + "so that you can get the error report faster.\n"); - debugf0("section=0x%x word_bits=0x%x\n", section, word_bits); + on_each_cpu(disable_caches, NULL, 1); - return count; + /* Issue 'word' and 'bit' along with the READ request */ + amd64_write_pci_cfg(pvt->F3, F10_NB_ARRAY_DATA, word_bits); + + retry: + /* wait until injection happens */ + amd64_read_pci_cfg(pvt->F3, F10_NB_ARRAY_DATA, &tmp); + if (tmp & F10_NB_ARR_ECC_WR_REQ) { + cpu_relax(); + goto retry; } - return ret; + + on_each_cpu(enable_caches, NULL, 1); + + edac_dbg(0, "section=0x%x word_bits=0x%x\n", section, word_bits); + + return count; } /* * update NUM_INJ_ATTRS in case you add new members */ -struct mcidev_sysfs_attribute amd64_inj_attrs[] = { - - { - .attr = { - .name = "inject_section", - .mode = (S_IRUGO | S_IWUSR) - }, - .show = amd64_inject_section_show, - .store = amd64_inject_section_store, - }, - { - .attr = { - .name = "inject_word", - .mode = (S_IRUGO | S_IWUSR) - }, - .show = amd64_inject_word_show, - .store = amd64_inject_word_store, - }, - { - .attr = { - .name = "inject_ecc_vector", - .mode = (S_IRUGO | S_IWUSR) - }, - .show = amd64_inject_ecc_vector_show, - .store = amd64_inject_ecc_vector_store, - }, - { - .attr = { - .name = "inject_write", - .mode = (S_IRUGO | S_IWUSR) - }, - .show = NULL, - .store = amd64_inject_write_store, - }, - { - .attr = { - .name = "inject_read", - .mode = (S_IRUGO | S_IWUSR) - }, - .show = NULL, - .store = amd64_inject_read_store, - }, -}; + +static DEVICE_ATTR(inject_section, S_IRUGO | S_IWUSR, + amd64_inject_section_show, amd64_inject_section_store); +static DEVICE_ATTR(inject_word, S_IRUGO | S_IWUSR, + amd64_inject_word_show, amd64_inject_word_store); +static DEVICE_ATTR(inject_ecc_vector, S_IRUGO | S_IWUSR, + amd64_inject_ecc_vector_show, amd64_inject_ecc_vector_store); +static DEVICE_ATTR(inject_write, S_IWUSR, + NULL, amd64_inject_write_store); +static DEVICE_ATTR(inject_read, S_IWUSR, + NULL, amd64_inject_read_store); + + +int amd64_create_sysfs_inject_files(struct mem_ctl_info *mci) +{ + int rc; + + rc = device_create_file(&mci->dev, &dev_attr_inject_section); + if (rc < 0) + return rc; + rc = device_create_file(&mci->dev, &dev_attr_inject_word); + if (rc < 0) + return rc; + rc = device_create_file(&mci->dev, &dev_attr_inject_ecc_vector); + if (rc < 0) + return rc; + rc = device_create_file(&mci->dev, &dev_attr_inject_write); + if (rc < 0) + return rc; + rc = device_create_file(&mci->dev, &dev_attr_inject_read); + if (rc < 0) + return rc; + + return 0; +} + +void amd64_remove_sysfs_inject_files(struct mem_ctl_info *mci) +{ + device_remove_file(&mci->dev, &dev_attr_inject_section); + device_remove_file(&mci->dev, &dev_attr_inject_word); + device_remove_file(&mci->dev, &dev_attr_inject_ecc_vector); + device_remove_file(&mci->dev, &dev_attr_inject_write); + device_remove_file(&mci->dev, &dev_attr_inject_read); +} diff --git a/drivers/edac/amd76x_edac.c b/drivers/edac/amd76x_edac.c index e47e73bbbcc..3a501b530e1 100644 --- a/drivers/edac/amd76x_edac.c +++ b/drivers/edac/amd76x_edac.c @@ -29,7 +29,6 @@ edac_mc_chipset_printk(mci, level, "amd76x", fmt, ##arg) #define AMD76X_NR_CSROWS 8 -#define AMD76X_NR_CHANS 1 #define AMD76X_NR_DIMMS 4 /* AMD 76x register addresses - device 0 function 0 - PCI bridge */ @@ -106,7 +105,7 @@ static void amd76x_get_error_info(struct mem_ctl_info *mci, { struct pci_dev *pdev; - pdev = to_pci_dev(mci->dev); + pdev = to_pci_dev(mci->pdev); pci_read_config_dword(pdev, AMD76X_ECC_MODE_STATUS, &info->ecc_mode_status); @@ -146,8 +145,10 @@ static int amd76x_process_error_info(struct mem_ctl_info *mci, if (handle_errors) { row = (info->ecc_mode_status >> 4) & 0xf; - edac_mc_handle_ue(mci, mci->csrows[row].first_page, 0, - row, mci->ctl_name); + edac_mc_handle_error(HW_EVENT_ERR_UNCORRECTED, mci, 1, + mci->csrows[row]->first_page, 0, 0, + row, 0, -1, + mci->ctl_name, ""); } } @@ -159,8 +160,10 @@ static int amd76x_process_error_info(struct mem_ctl_info *mci, if (handle_errors) { row = info->ecc_mode_status & 0xf; - edac_mc_handle_ce(mci, mci->csrows[row].first_page, 0, - 0, row, 0, mci->ctl_name); + edac_mc_handle_error(HW_EVENT_ERR_CORRECTED, mci, 1, + mci->csrows[row]->first_page, 0, 0, + row, 0, -1, + mci->ctl_name, ""); } } @@ -177,7 +180,7 @@ static int amd76x_process_error_info(struct mem_ctl_info *mci, static void amd76x_check(struct mem_ctl_info *mci) { struct amd76x_error_info info; - debugf3("%s()\n", __func__); + edac_dbg(3, "\n"); amd76x_get_error_info(mci, &info); amd76x_process_error_info(mci, &info, 1); } @@ -186,11 +189,13 @@ static void amd76x_init_csrows(struct mem_ctl_info *mci, struct pci_dev *pdev, enum edac_type edac_mode) { struct csrow_info *csrow; + struct dimm_info *dimm; u32 mba, mba_base, mba_mask, dms; int index; for (index = 0; index < mci->nr_csrows; index++) { - csrow = &mci->csrows[index]; + csrow = mci->csrows[index]; + dimm = csrow->channels[0]->dimm; /* find the DRAM Chip Select Base address and mask */ pci_read_config_dword(pdev, @@ -203,13 +208,13 @@ static void amd76x_init_csrows(struct mem_ctl_info *mci, struct pci_dev *pdev, mba_mask = ((mba & 0xff80) << 16) | 0x7fffffUL; pci_read_config_dword(pdev, AMD76X_DRAM_MODE_STATUS, &dms); csrow->first_page = mba_base >> PAGE_SHIFT; - csrow->nr_pages = (mba_mask + 1) >> PAGE_SHIFT; - csrow->last_page = csrow->first_page + csrow->nr_pages - 1; + dimm->nr_pages = (mba_mask + 1) >> PAGE_SHIFT; + csrow->last_page = csrow->first_page + dimm->nr_pages - 1; csrow->page_mask = mba_mask >> PAGE_SHIFT; - csrow->grain = csrow->nr_pages << PAGE_SHIFT; - csrow->mtype = MEM_RDDR; - csrow->dtype = ((dms >> index) & 0x1) ? DEV_X4 : DEV_UNKNOWN; - csrow->edac_mode = edac_mode; + dimm->grain = dimm->nr_pages << PAGE_SHIFT; + dimm->mtype = MEM_RDDR; + dimm->dtype = ((dms >> index) & 0x1) ? DEV_X4 : DEV_UNKNOWN; + dimm->edac_mode = edac_mode; } } @@ -230,22 +235,29 @@ static int amd76x_probe1(struct pci_dev *pdev, int dev_idx) EDAC_SECDED, EDAC_SECDED }; - struct mem_ctl_info *mci = NULL; + struct mem_ctl_info *mci; + struct edac_mc_layer layers[2]; u32 ems; u32 ems_mode; struct amd76x_error_info discard; - debugf0("%s()\n", __func__); + edac_dbg(0, "\n"); pci_read_config_dword(pdev, AMD76X_ECC_MODE_STATUS, &ems); ems_mode = (ems >> 10) & 0x3; - mci = edac_mc_alloc(0, AMD76X_NR_CSROWS, AMD76X_NR_CHANS, 0); - if (mci == NULL) { + layers[0].type = EDAC_MC_LAYER_CHIP_SELECT; + layers[0].size = AMD76X_NR_CSROWS; + layers[0].is_virt_csrow = true; + layers[1].type = EDAC_MC_LAYER_CHANNEL; + layers[1].size = 1; + layers[1].is_virt_csrow = false; + mci = edac_mc_alloc(0, ARRAY_SIZE(layers), layers, 0); + + if (mci == NULL) return -ENOMEM; - } - debugf0("%s(): mci = %p\n", __func__, mci); - mci->dev = &pdev->dev; + edac_dbg(0, "mci = %p\n", mci); + mci->pdev = &pdev->dev; mci->mtype_cap = MEM_FLAG_RDDR; mci->edac_ctl_cap = EDAC_FLAG_NONE | EDAC_FLAG_EC | EDAC_FLAG_SECDED; mci->edac_cap = ems_mode ? @@ -264,7 +276,7 @@ static int amd76x_probe1(struct pci_dev *pdev, int dev_idx) * type of memory controller. The ID is therefore hardcoded to 0. */ if (edac_mc_add_mc(mci)) { - debugf3("%s(): failed edac_mc_add_mc()\n", __func__); + edac_dbg(3, "failed edac_mc_add_mc()\n"); goto fail; } @@ -280,7 +292,7 @@ static int amd76x_probe1(struct pci_dev *pdev, int dev_idx) } /* get this far and it's successful */ - debugf3("%s(): success\n", __func__); + edac_dbg(3, "success\n"); return 0; fail: @@ -289,10 +301,10 @@ fail: } /* returns count (>= 0), or negative on error */ -static int __devinit amd76x_init_one(struct pci_dev *pdev, - const struct pci_device_id *ent) +static int amd76x_init_one(struct pci_dev *pdev, + const struct pci_device_id *ent) { - debugf0("%s()\n", __func__); + edac_dbg(0, "\n"); /* don't need to call pci_enable_device() */ return amd76x_probe1(pdev, ent->driver_data); @@ -306,11 +318,11 @@ static int __devinit amd76x_init_one(struct pci_dev *pdev, * structure for the device then delete the mci and free the * resources. */ -static void __devexit amd76x_remove_one(struct pci_dev *pdev) +static void amd76x_remove_one(struct pci_dev *pdev) { struct mem_ctl_info *mci; - debugf0("%s()\n", __func__); + edac_dbg(0, "\n"); if (amd76x_pci) edac_pci_release_generic_ctl(amd76x_pci); @@ -321,7 +333,7 @@ static void __devexit amd76x_remove_one(struct pci_dev *pdev) edac_mc_free(mci); } -static const struct pci_device_id amd76x_pci_tbl[] __devinitdata = { +static const struct pci_device_id amd76x_pci_tbl[] = { { PCI_VEND_DEV(AMD, FE_GATE_700C), PCI_ANY_ID, PCI_ANY_ID, 0, 0, AMD762}, @@ -338,7 +350,7 @@ MODULE_DEVICE_TABLE(pci, amd76x_pci_tbl); static struct pci_driver amd76x_driver = { .name = EDAC_MOD_STR, .probe = amd76x_init_one, - .remove = __devexit_p(amd76x_remove_one), + .remove = amd76x_remove_one, .id_table = amd76x_pci_tbl, }; diff --git a/drivers/edac/amd8111_edac.c b/drivers/edac/amd8111_edac.c index ddd890052ce..2b63f7c2d6d 100644 --- a/drivers/edac/amd8111_edac.c +++ b/drivers/edac/amd8111_edac.c @@ -350,6 +350,7 @@ static int amd8111_dev_probe(struct pci_dev *dev, const struct pci_device_id *id) { struct amd8111_dev_info *dev_info = &amd8111_devices[id->driver_data]; + int ret = -ENODEV; dev_info->dev = pci_get_device(PCI_VENDOR_ID_AMD, dev_info->err_dev, NULL); @@ -359,16 +360,15 @@ static int amd8111_dev_probe(struct pci_dev *dev, "vendor %x, device %x, name %s\n", PCI_VENDOR_ID_AMD, dev_info->err_dev, dev_info->ctl_name); - return -ENODEV; + goto err; } if (pci_enable_device(dev_info->dev)) { - pci_dev_put(dev_info->dev); printk(KERN_ERR "failed to enable:" "vendor %x, device %x, name %s\n", PCI_VENDOR_ID_AMD, dev_info->err_dev, dev_info->ctl_name); - return -ENODEV; + goto err_dev_put; } /* @@ -381,8 +381,10 @@ static int amd8111_dev_probe(struct pci_dev *dev, edac_device_alloc_ctl_info(0, dev_info->ctl_name, 1, NULL, 0, 0, NULL, 0, dev_info->edac_idx); - if (!dev_info->edac_dev) - return -ENOMEM; + if (!dev_info->edac_dev) { + ret = -ENOMEM; + goto err_dev_put; + } dev_info->edac_dev->pvt_info = dev_info; dev_info->edac_dev->dev = &dev_info->dev->dev; @@ -399,8 +401,7 @@ static int amd8111_dev_probe(struct pci_dev *dev, if (edac_device_add_device(dev_info->edac_dev) > 0) { printk(KERN_ERR "failed to add edac_dev for %s\n", dev_info->ctl_name); - edac_device_free_ctl_info(dev_info->edac_dev); - return -ENODEV; + goto err_edac_free_ctl; } printk(KERN_INFO "added one edac_dev on AMD8111 " @@ -409,6 +410,13 @@ static int amd8111_dev_probe(struct pci_dev *dev, dev_info->ctl_name); return 0; + +err_edac_free_ctl: + edac_device_free_ctl_info(dev_info->edac_dev); +err_dev_put: + pci_dev_put(dev_info->dev); +err: + return ret; } static void amd8111_dev_remove(struct pci_dev *dev) @@ -437,6 +445,7 @@ static int amd8111_pci_probe(struct pci_dev *dev, const struct pci_device_id *id) { struct amd8111_pci_info *pci_info = &amd8111_pcis[id->driver_data]; + int ret = -ENODEV; pci_info->dev = pci_get_device(PCI_VENDOR_ID_AMD, pci_info->err_dev, NULL); @@ -446,16 +455,15 @@ static int amd8111_pci_probe(struct pci_dev *dev, "vendor %x, device %x, name %s\n", PCI_VENDOR_ID_AMD, pci_info->err_dev, pci_info->ctl_name); - return -ENODEV; + goto err; } if (pci_enable_device(pci_info->dev)) { - pci_dev_put(pci_info->dev); printk(KERN_ERR "failed to enable:" "vendor %x, device %x, name %s\n", PCI_VENDOR_ID_AMD, pci_info->err_dev, pci_info->ctl_name); - return -ENODEV; + goto err_dev_put; } /* @@ -465,8 +473,10 @@ static int amd8111_pci_probe(struct pci_dev *dev, */ pci_info->edac_idx = edac_pci_alloc_index(); pci_info->edac_dev = edac_pci_alloc_ctl_info(0, pci_info->ctl_name); - if (!pci_info->edac_dev) - return -ENOMEM; + if (!pci_info->edac_dev) { + ret = -ENOMEM; + goto err_dev_put; + } pci_info->edac_dev->pvt_info = pci_info; pci_info->edac_dev->dev = &pci_info->dev->dev; @@ -483,8 +493,7 @@ static int amd8111_pci_probe(struct pci_dev *dev, if (edac_pci_add_device(pci_info->edac_dev, pci_info->edac_idx) > 0) { printk(KERN_ERR "failed to add edac_pci for %s\n", pci_info->ctl_name); - edac_pci_free_ctl_info(pci_info->edac_dev); - return -ENODEV; + goto err_edac_free_ctl; } printk(KERN_INFO "added one edac_pci on AMD8111 " @@ -493,6 +502,13 @@ static int amd8111_pci_probe(struct pci_dev *dev, pci_info->ctl_name); return 0; + +err_edac_free_ctl: + edac_pci_free_ctl_info(pci_info->edac_dev); +err_dev_put: + pci_dev_put(pci_info->dev); +err: + return ret; } static void amd8111_pci_remove(struct pci_dev *dev) diff --git a/drivers/edac/cell_edac.c b/drivers/edac/cell_edac.c index 9a6a274e692..374b57fc596 100644 --- a/drivers/edac/cell_edac.c +++ b/drivers/edac/cell_edac.c @@ -15,6 +15,7 @@ #include <linux/platform_device.h> #include <linux/stop_machine.h> #include <linux/io.h> +#include <linux/of_address.h> #include <asm/machdep.h> #include <asm/cell-regs.h> @@ -33,10 +34,10 @@ struct cell_edac_priv static void cell_edac_count_ce(struct mem_ctl_info *mci, int chan, u64 ar) { struct cell_edac_priv *priv = mci->pvt_info; - struct csrow_info *csrow = &mci->csrows[0]; + struct csrow_info *csrow = mci->csrows[0]; unsigned long address, pfn, offset, syndrome; - dev_dbg(mci->dev, "ECC CE err on node %d, channel %d, ar = 0x%016llx\n", + dev_dbg(mci->pdev, "ECC CE err on node %d, channel %d, ar = 0x%016llx\n", priv->node, chan, ar); /* Address decoding is likely a bit bogus, to dbl check */ @@ -48,17 +49,18 @@ static void cell_edac_count_ce(struct mem_ctl_info *mci, int chan, u64 ar) syndrome = (ar & 0x000000001fe00000ul) >> 21; /* TODO: Decoding of the error address */ - edac_mc_handle_ce(mci, csrow->first_page + pfn, offset, - syndrome, 0, chan, ""); + edac_mc_handle_error(HW_EVENT_ERR_CORRECTED, mci, 1, + csrow->first_page + pfn, offset, syndrome, + 0, chan, -1, "", ""); } static void cell_edac_count_ue(struct mem_ctl_info *mci, int chan, u64 ar) { struct cell_edac_priv *priv = mci->pvt_info; - struct csrow_info *csrow = &mci->csrows[0]; + struct csrow_info *csrow = mci->csrows[0]; unsigned long address, pfn, offset; - dev_dbg(mci->dev, "ECC UE err on node %d, channel %d, ar = 0x%016llx\n", + dev_dbg(mci->pdev, "ECC UE err on node %d, channel %d, ar = 0x%016llx\n", priv->node, chan, ar); /* Address decoding is likely a bit bogus, to dbl check */ @@ -69,7 +71,9 @@ static void cell_edac_count_ue(struct mem_ctl_info *mci, int chan, u64 ar) offset = address & ~PAGE_MASK; /* TODO: Decoding of the error address */ - edac_mc_handle_ue(mci, csrow->first_page + pfn, offset, 0, ""); + edac_mc_handle_error(HW_EVENT_ERR_UNCORRECTED, mci, 1, + csrow->first_page + pfn, offset, 0, + 0, chan, -1, "", ""); } static void cell_edac_check(struct mem_ctl_info *mci) @@ -80,7 +84,7 @@ static void cell_edac_check(struct mem_ctl_info *mci) fir = in_be64(&priv->regs->mic_fir); #ifdef DEBUG if (fir != priv->prev_fir) { - dev_dbg(mci->dev, "fir change : 0x%016lx\n", fir); + dev_dbg(mci->pdev, "fir change : 0x%016lx\n", fir); priv->prev_fir = fir; } #endif @@ -116,16 +120,19 @@ static void cell_edac_check(struct mem_ctl_info *mci) mb(); /* sync up */ #ifdef DEBUG fir = in_be64(&priv->regs->mic_fir); - dev_dbg(mci->dev, "fir clear : 0x%016lx\n", fir); + dev_dbg(mci->pdev, "fir clear : 0x%016lx\n", fir); #endif } } -static void __devinit cell_edac_init_csrows(struct mem_ctl_info *mci) +static void cell_edac_init_csrows(struct mem_ctl_info *mci) { - struct csrow_info *csrow = &mci->csrows[0]; + struct csrow_info *csrow = mci->csrows[0]; + struct dimm_info *dimm; struct cell_edac_priv *priv = mci->pvt_info; struct device_node *np; + int j; + u32 nr_pages; for (np = NULL; (np = of_find_node_by_name(np, "memory")) != NULL;) { @@ -140,26 +147,33 @@ static void __devinit cell_edac_init_csrows(struct mem_ctl_info *mci) if (of_node_to_nid(np) != priv->node) continue; csrow->first_page = r.start >> PAGE_SHIFT; - csrow->nr_pages = resource_size(&r) >> PAGE_SHIFT; - csrow->last_page = csrow->first_page + csrow->nr_pages - 1; - csrow->mtype = MEM_XDR; - csrow->edac_mode = EDAC_SECDED; - dev_dbg(mci->dev, + nr_pages = resource_size(&r) >> PAGE_SHIFT; + csrow->last_page = csrow->first_page + nr_pages - 1; + + for (j = 0; j < csrow->nr_channels; j++) { + dimm = csrow->channels[j]->dimm; + dimm->mtype = MEM_XDR; + dimm->edac_mode = EDAC_SECDED; + dimm->nr_pages = nr_pages / csrow->nr_channels; + } + dev_dbg(mci->pdev, "Initialized on node %d, chanmask=0x%x," " first_page=0x%lx, nr_pages=0x%x\n", priv->node, priv->chanmask, - csrow->first_page, csrow->nr_pages); + csrow->first_page, nr_pages); break; } + of_node_put(np); } -static int __devinit cell_edac_probe(struct platform_device *pdev) +static int cell_edac_probe(struct platform_device *pdev) { struct cbe_mic_tm_regs __iomem *regs; struct mem_ctl_info *mci; + struct edac_mc_layer layers[2]; struct cell_edac_priv *priv; u64 reg; - int rc, chanmask; + int rc, chanmask, num_chans; regs = cbe_get_cpu_mic_tm_regs(cbe_node_to_cpu(pdev->id)); if (regs == NULL) @@ -184,15 +198,23 @@ static int __devinit cell_edac_probe(struct platform_device *pdev) in_be64(®s->mic_fir)); /* Allocate & init EDAC MC data structure */ - mci = edac_mc_alloc(sizeof(struct cell_edac_priv), 1, - chanmask == 3 ? 2 : 1, pdev->id); + num_chans = chanmask == 3 ? 2 : 1; + + layers[0].type = EDAC_MC_LAYER_CHIP_SELECT; + layers[0].size = 1; + layers[0].is_virt_csrow = true; + layers[1].type = EDAC_MC_LAYER_CHANNEL; + layers[1].size = num_chans; + layers[1].is_virt_csrow = false; + mci = edac_mc_alloc(pdev->id, ARRAY_SIZE(layers), layers, + sizeof(struct cell_edac_priv)); if (mci == NULL) return -ENOMEM; priv = mci->pvt_info; priv->regs = regs; priv->node = pdev->id; priv->chanmask = chanmask; - mci->dev = &pdev->dev; + mci->pdev = &pdev->dev; mci->mtype_cap = MEM_FLAG_XDR; mci->edac_ctl_cap = EDAC_FLAG_NONE | EDAC_FLAG_EC | EDAC_FLAG_SECDED; mci->edac_cap = EDAC_FLAG_EC | EDAC_FLAG_SECDED; @@ -213,7 +235,7 @@ static int __devinit cell_edac_probe(struct platform_device *pdev) return 0; } -static int __devexit cell_edac_remove(struct platform_device *pdev) +static int cell_edac_remove(struct platform_device *pdev) { struct mem_ctl_info *mci = edac_mc_del_mc(&pdev->dev); if (mci) @@ -227,7 +249,7 @@ static struct platform_driver cell_edac_driver = { .owner = THIS_MODULE, }, .probe = cell_edac_probe, - .remove = __devexit_p(cell_edac_remove), + .remove = cell_edac_remove, }; static int __init cell_edac_init(void) diff --git a/drivers/edac/cpc925_edac.c b/drivers/edac/cpc925_edac.c index a774c0ddaf5..df6575f1430 100644 --- a/drivers/edac/cpc925_edac.c +++ b/drivers/edac/cpc925_edac.c @@ -316,22 +316,23 @@ static void get_total_mem(struct cpc925_mc_pdata *pdata) reg += aw; size = of_read_number(reg, sw); reg += sw; - debugf1("%s: start 0x%lx, size 0x%lx\n", __func__, - start, size); + edac_dbg(1, "start 0x%lx, size 0x%lx\n", start, size); pdata->total_mem += size; } while (reg < reg_end); of_node_put(np); - debugf0("%s: total_mem 0x%lx\n", __func__, pdata->total_mem); + edac_dbg(0, "total_mem 0x%lx\n", pdata->total_mem); } static void cpc925_init_csrows(struct mem_ctl_info *mci) { struct cpc925_mc_pdata *pdata = mci->pvt_info; struct csrow_info *csrow; - int index; - u32 mbmr, mbbar, bba; - unsigned long row_size, last_nr_pages = 0; + struct dimm_info *dimm; + enum dev_type dtype; + int index, j; + u32 mbmr, mbbar, bba, grain; + unsigned long row_size, nr_pages, last_nr_pages = 0; get_total_mem(pdata); @@ -346,40 +347,44 @@ static void cpc925_init_csrows(struct mem_ctl_info *mci) if (bba == 0) continue; /* not populated */ - csrow = &mci->csrows[index]; + csrow = mci->csrows[index]; row_size = bba * (1UL << 28); /* 256M */ csrow->first_page = last_nr_pages; - csrow->nr_pages = row_size >> PAGE_SHIFT; - csrow->last_page = csrow->first_page + csrow->nr_pages - 1; + nr_pages = row_size >> PAGE_SHIFT; + csrow->last_page = csrow->first_page + nr_pages - 1; last_nr_pages = csrow->last_page + 1; - csrow->mtype = MEM_RDDR; - csrow->edac_mode = EDAC_SECDED; - switch (csrow->nr_channels) { case 1: /* Single channel */ - csrow->grain = 32; /* four-beat burst of 32 bytes */ + grain = 32; /* four-beat burst of 32 bytes */ break; case 2: /* Dual channel */ default: - csrow->grain = 64; /* four-beat burst of 64 bytes */ + grain = 64; /* four-beat burst of 64 bytes */ break; } - switch ((mbmr & MBMR_MODE_MASK) >> MBMR_MODE_SHIFT) { case 6: /* 0110, no way to differentiate X8 VS X16 */ case 5: /* 0101 */ case 8: /* 1000 */ - csrow->dtype = DEV_X16; + dtype = DEV_X16; break; case 7: /* 0111 */ case 9: /* 1001 */ - csrow->dtype = DEV_X8; + dtype = DEV_X8; break; default: - csrow->dtype = DEV_UNKNOWN; - break; + dtype = DEV_UNKNOWN; + break; + } + for (j = 0; j < csrow->nr_channels; j++) { + dimm = csrow->channels[j]->dimm; + dimm->nr_pages = nr_pages / csrow->nr_channels; + dimm->mtype = MEM_RDDR; + dimm->edac_mode = EDAC_SECDED; + dimm->grain = grain; + dimm->dtype = dtype; } } } @@ -457,7 +462,7 @@ static void cpc925_mc_get_pfn(struct mem_ctl_info *mci, u32 mear, *csrow = rank; #ifdef CONFIG_EDAC_DEBUG - if (mci->csrows[rank].first_page == 0) { + if (mci->csrows[rank]->first_page == 0) { cpc925_mc_printk(mci, KERN_ERR, "ECC occurs in a " "non-populated csrow, broken hardware?\n"); return; @@ -465,7 +470,7 @@ static void cpc925_mc_get_pfn(struct mem_ctl_info *mci, u32 mear, #endif /* Revert csrow number */ - pa = mci->csrows[rank].first_page << PAGE_SHIFT; + pa = mci->csrows[rank]->first_page << PAGE_SHIFT; /* Revert column address */ col += bcnt; @@ -506,7 +511,7 @@ static void cpc925_mc_get_pfn(struct mem_ctl_info *mci, u32 mear, *offset = pa & (PAGE_SIZE - 1); *pfn = pa >> PAGE_SHIFT; - debugf0("%s: ECC physical address 0x%lx\n", __func__, pa); + edac_dbg(0, "ECC physical address 0x%lx\n", pa); } static int cpc925_mc_find_channel(struct mem_ctl_info *mci, u16 syndrome) @@ -549,13 +554,18 @@ static void cpc925_mc_check(struct mem_ctl_info *mci) if (apiexcp & CECC_EXCP_DETECTED) { cpc925_mc_printk(mci, KERN_INFO, "DRAM CECC Fault\n"); channel = cpc925_mc_find_channel(mci, syndrome); - edac_mc_handle_ce(mci, pfn, offset, syndrome, - csrow, channel, mci->ctl_name); + edac_mc_handle_error(HW_EVENT_ERR_CORRECTED, mci, 1, + pfn, offset, syndrome, + csrow, channel, -1, + mci->ctl_name, ""); } if (apiexcp & UECC_EXCP_DETECTED) { cpc925_mc_printk(mci, KERN_INFO, "DRAM UECC Fault\n"); - edac_mc_handle_ue(mci, pfn, offset, csrow, mci->ctl_name); + edac_mc_handle_error(HW_EVENT_ERR_CORRECTED, mci, 1, + pfn, offset, 0, + csrow, -1, -1, + mci->ctl_name, ""); } cpc925_mc_printk(mci, KERN_INFO, "Dump registers:\n"); @@ -779,7 +789,7 @@ static struct cpc925_dev_info cpc925_devs[] = { .exit = cpc925_htlink_exit, .check = cpc925_htlink_check, }, - {0}, /* Terminated by NULL */ + { } }; /* @@ -841,8 +851,8 @@ static void cpc925_add_edac_devices(void __iomem *vbase) goto err2; } - debugf0("%s: Successfully added edac device for %s\n", - __func__, dev_info->ctl_name); + edac_dbg(0, "Successfully added edac device for %s\n", + dev_info->ctl_name); continue; @@ -873,8 +883,8 @@ static void cpc925_del_edac_devices(void) if (dev_info->exit) dev_info->exit(dev_info); - debugf0("%s: Successfully deleted edac device for %s\n", - __func__, dev_info->ctl_name); + edac_dbg(0, "Successfully deleted edac device for %s\n", + dev_info->ctl_name); } } @@ -889,7 +899,7 @@ static int cpc925_get_sdram_scrub_rate(struct mem_ctl_info *mci) mscr = __raw_readl(pdata->vbase + REG_MSCR_OFFSET); si = (mscr & MSCR_SI_MASK) >> MSCR_SI_SHIFT; - debugf0("%s, Mem Scrub Ctrl Register 0x%x\n", __func__, mscr); + edac_dbg(0, "Mem Scrub Ctrl Register 0x%x\n", mscr); if (((mscr & MSCR_SCRUB_MOD_MASK) != MSCR_BACKGR_SCRUB) || (si == 0)) { @@ -917,22 +927,22 @@ static int cpc925_mc_get_channels(void __iomem *vbase) ((mbcr & MBCR_64BITBUS_MASK) == 0)) dual = 1; - debugf0("%s: %s channel\n", __func__, - (dual > 0) ? "Dual" : "Single"); + edac_dbg(0, "%s channel\n", (dual > 0) ? "Dual" : "Single"); return dual; } -static int __devinit cpc925_probe(struct platform_device *pdev) +static int cpc925_probe(struct platform_device *pdev) { static int edac_mc_idx; struct mem_ctl_info *mci; + struct edac_mc_layer layers[2]; void __iomem *vbase; struct cpc925_mc_pdata *pdata; struct resource *r; int res = 0, nr_channels; - debugf0("%s: %s platform device found!\n", __func__, pdev->name); + edac_dbg(0, "%s platform device found!\n", pdev->name); if (!devres_open_group(&pdev->dev, cpc925_probe, GFP_KERNEL)) { res = -ENOMEM; @@ -962,9 +972,16 @@ static int __devinit cpc925_probe(struct platform_device *pdev) goto err2; } - nr_channels = cpc925_mc_get_channels(vbase); - mci = edac_mc_alloc(sizeof(struct cpc925_mc_pdata), - CPC925_NR_CSROWS, nr_channels + 1, edac_mc_idx); + nr_channels = cpc925_mc_get_channels(vbase) + 1; + + layers[0].type = EDAC_MC_LAYER_CHIP_SELECT; + layers[0].size = CPC925_NR_CSROWS; + layers[0].is_virt_csrow = true; + layers[1].type = EDAC_MC_LAYER_CHANNEL; + layers[1].size = nr_channels; + layers[1].is_virt_csrow = false; + mci = edac_mc_alloc(edac_mc_idx, ARRAY_SIZE(layers), layers, + sizeof(struct cpc925_mc_pdata)); if (!mci) { cpc925_printk(KERN_ERR, "No memory for mem_ctl_info\n"); res = -ENOMEM; @@ -976,7 +993,7 @@ static int __devinit cpc925_probe(struct platform_device *pdev) pdata->edac_idx = edac_mc_idx++; pdata->name = pdev->name; - mci->dev = &pdev->dev; + mci->pdev = &pdev->dev; platform_set_drvdata(pdev, mci); mci->dev_name = dev_name(&pdev->dev); mci->mtype_cap = MEM_FLAG_RDDR | MEM_FLAG_DDR; @@ -1007,7 +1024,7 @@ static int __devinit cpc925_probe(struct platform_device *pdev) cpc925_add_edac_devices(vbase); /* get this far and it's successful */ - debugf0("%s: success\n", __func__); + edac_dbg(0, "success\n"); res = 0; goto out; diff --git a/drivers/edac/e752x_edac.c b/drivers/edac/e752x_edac.c index 1af531a11d2..b2d71388172 100644 --- a/drivers/edac/e752x_edac.c +++ b/drivers/edac/e752x_edac.c @@ -4,7 +4,11 @@ * This file may be distributed under the terms of the * GNU General Public License. * - * See "enum e752x_chips" below for supported chipsets + * Implement support for the e7520, E7525, e7320 and i3100 memory controllers. + * + * Datasheets: + * http://www.intel.in/content/www/in/en/chipsets/e7525-memory-controller-hub-datasheet.html + * ftp://download.intel.com/design/intarch/datashts/31345803.pdf * * Written by Tom Zimmerman * @@ -13,8 +17,6 @@ * Wang Zhenyu at intel.com * Dave Jiang at mvista.com * - * $Id: edac_e752x.c,v 1.5.2.11 2005/10/05 00:43:44 dsp_llnl Exp $ - * */ #include <linux/module.h> @@ -187,8 +189,26 @@ enum e752x_chips { I3100 = 3 }; +/* + * Those chips Support single-rank and dual-rank memories only. + * + * On e752x chips, the odd rows are present only on dual-rank memories. + * Dividing the rank by two will provide the dimm# + * + * i3100 MC has a different mapping: it supports only 4 ranks. + * + * The mapping is (from 1 to n): + * slot single-ranked double-ranked + * dimm #1 -> rank #4 NA + * dimm #2 -> rank #3 NA + * dimm #3 -> rank #2 Ranks 2 and 3 + * dimm #4 -> rank $1 Ranks 1 and 4 + * + * FIXME: The current mapping for i3100 considers that it supports up to 8 + * ranks/chanel, but datasheet says that the MC supports only 4 ranks. + */ + struct e752x_pvt { - struct pci_dev *bridge_ck; struct pci_dev *dev_d0f0; struct pci_dev *dev_d0f1; u32 tolm; @@ -288,7 +308,7 @@ static unsigned long ctl_page_to_phys(struct mem_ctl_info *mci, u32 remap; struct e752x_pvt *pvt = (struct e752x_pvt *)mci->pvt_info; - debugf3("%s()\n", __func__); + edac_dbg(3, "\n"); if (page < pvt->tolm) return page; @@ -314,7 +334,7 @@ static void do_process_ce(struct mem_ctl_info *mci, u16 error_one, int i; struct e752x_pvt *pvt = (struct e752x_pvt *)mci->pvt_info; - debugf3("%s()\n", __func__); + edac_dbg(3, "\n"); /* convert the addr to 4k page */ page = sec1_add >> (PAGE_SHIFT - 4); @@ -350,8 +370,10 @@ static void do_process_ce(struct mem_ctl_info *mci, u16 error_one, channel = !(error_one & 1); /* e752x mc reads 34:6 of the DRAM linear address */ - edac_mc_handle_ce(mci, page, offset_in_page(sec1_add << 4), - sec1_syndrome, row, channel, "e752x CE"); + edac_mc_handle_error(HW_EVENT_ERR_CORRECTED, mci, 1, + page, offset_in_page(sec1_add << 4), sec1_syndrome, + row, channel, -1, + "e752x CE", ""); } static inline void process_ce(struct mem_ctl_info *mci, u16 error_one, @@ -371,7 +393,7 @@ static void do_process_ue(struct mem_ctl_info *mci, u16 error_one, int row; struct e752x_pvt *pvt = (struct e752x_pvt *)mci->pvt_info; - debugf3("%s()\n", __func__); + edac_dbg(3, "\n"); if (error_one & 0x0202) { error_2b = ded_add; @@ -385,9 +407,12 @@ static void do_process_ue(struct mem_ctl_info *mci, u16 error_one, edac_mc_find_csrow_by_page(mci, block_page); /* e752x mc reads 34:6 of the DRAM linear address */ - edac_mc_handle_ue(mci, block_page, - offset_in_page(error_2b << 4), - row, "e752x UE from Read"); + edac_mc_handle_error(HW_EVENT_ERR_UNCORRECTED, mci, 1, + block_page, + offset_in_page(error_2b << 4), 0, + row, -1, -1, + "e752x UE from Read", ""); + } if (error_one & 0x0404) { error_2b = scrb_add; @@ -401,9 +426,11 @@ static void do_process_ue(struct mem_ctl_info *mci, u16 error_one, edac_mc_find_csrow_by_page(mci, block_page); /* e752x mc reads 34:6 of the DRAM linear address */ - edac_mc_handle_ue(mci, block_page, - offset_in_page(error_2b << 4), - row, "e752x UE from Scruber"); + edac_mc_handle_error(HW_EVENT_ERR_UNCORRECTED, mci, 1, + block_page, + offset_in_page(error_2b << 4), 0, + row, -1, -1, + "e752x UE from Scruber", ""); } } @@ -425,8 +452,10 @@ static inline void process_ue_no_info_wr(struct mem_ctl_info *mci, if (!handle_error) return; - debugf3("%s()\n", __func__); - edac_mc_handle_ue_no_info(mci, "e752x UE log memory write"); + edac_dbg(3, "\n"); + edac_mc_handle_error(HW_EVENT_ERR_UNCORRECTED, mci, 1, 0, 0, 0, + -1, -1, -1, + "e752x UE log memory write", ""); } static void do_process_ded_retry(struct mem_ctl_info *mci, u16 error, @@ -861,7 +890,7 @@ static void e752x_get_error_info(struct mem_ctl_info *mci, info->buf_ferr); if (info->dram_ferr) - pci_write_bits16(pvt->bridge_ck, E752X_DRAM_FERR, + pci_write_bits16(pvt->dev_d0f1, E752X_DRAM_FERR, info->dram_ferr, info->dram_ferr); pci_write_config_dword(dev, E752X_FERR_GLOBAL, @@ -906,7 +935,7 @@ static void e752x_get_error_info(struct mem_ctl_info *mci, info->buf_nerr); if (info->dram_nerr) - pci_write_bits16(pvt->bridge_ck, E752X_DRAM_NERR, + pci_write_bits16(pvt->dev_d0f1, E752X_DRAM_NERR, info->dram_nerr, info->dram_nerr); pci_write_config_dword(dev, E752X_NERR_GLOBAL, @@ -952,7 +981,7 @@ static void e752x_check(struct mem_ctl_info *mci) { struct e752x_error_info info; - debugf3("%s()\n", __func__); + edac_dbg(3, "\n"); e752x_get_error_info(mci, &info); e752x_process_error_info(mci, &info, 1); } @@ -1039,12 +1068,13 @@ static void e752x_init_csrows(struct mem_ctl_info *mci, struct pci_dev *pdev, u16 ddrcsr) { struct csrow_info *csrow; + enum edac_type edac_mode; unsigned long last_cumul_size; int index, mem_dev, drc_chan; int drc_drbg; /* DRB granularity 0=64mb, 1=128mb */ int drc_ddim; /* DRAM Data Integrity Mode 0=none, 2=edac */ u8 value; - u32 dra, drc, cumul_size; + u32 dra, drc, cumul_size, i, nr_pages; dra = 0; for (index = 0; index < 4; index++) { @@ -1053,7 +1083,7 @@ static void e752x_init_csrows(struct mem_ctl_info *mci, struct pci_dev *pdev, dra |= dra_reg << (index * 8); } pci_read_config_dword(pdev, E752X_DRC, &drc); - drc_chan = dual_channel_active(ddrcsr); + drc_chan = dual_channel_active(ddrcsr) ? 1 : 0; drc_drbg = drc_chan + 1; /* 128 in dual mode, 64 in single */ drc_ddim = (drc >> 20) & 0x3; @@ -1065,39 +1095,45 @@ static void e752x_init_csrows(struct mem_ctl_info *mci, struct pci_dev *pdev, for (last_cumul_size = index = 0; index < mci->nr_csrows; index++) { /* mem_dev 0=x8, 1=x4 */ mem_dev = (dra >> (index * 4 + 2)) & 0x3; - csrow = &mci->csrows[remap_csrow_index(mci, index)]; + csrow = mci->csrows[remap_csrow_index(mci, index)]; mem_dev = (mem_dev == 2); pci_read_config_byte(pdev, E752X_DRB + index, &value); /* convert a 128 or 64 MiB DRB to a page size. */ cumul_size = value << (25 + drc_drbg - PAGE_SHIFT); - debugf3("%s(): (%d) cumul_size 0x%x\n", __func__, index, - cumul_size); + edac_dbg(3, "(%d) cumul_size 0x%x\n", index, cumul_size); if (cumul_size == last_cumul_size) continue; /* not populated */ csrow->first_page = last_cumul_size; csrow->last_page = cumul_size - 1; - csrow->nr_pages = cumul_size - last_cumul_size; + nr_pages = cumul_size - last_cumul_size; last_cumul_size = cumul_size; - csrow->grain = 1 << 12; /* 4KiB - resolution of CELOG */ - csrow->mtype = MEM_RDDR; /* only one type supported */ - csrow->dtype = mem_dev ? DEV_X4 : DEV_X8; /* - * if single channel or x8 devices then SECDED - * if dual channel and x4 then S4ECD4ED - */ + * if single channel or x8 devices then SECDED + * if dual channel and x4 then S4ECD4ED + */ if (drc_ddim) { if (drc_chan && mem_dev) { - csrow->edac_mode = EDAC_S4ECD4ED; + edac_mode = EDAC_S4ECD4ED; mci->edac_cap |= EDAC_FLAG_S4ECD4ED; } else { - csrow->edac_mode = EDAC_SECDED; + edac_mode = EDAC_SECDED; mci->edac_cap |= EDAC_FLAG_SECDED; } } else - csrow->edac_mode = EDAC_NONE; + edac_mode = EDAC_NONE; + for (i = 0; i < csrow->nr_channels; i++) { + struct dimm_info *dimm = csrow->channels[i]->dimm; + + edac_dbg(3, "Initializing rank at (%i,%i)\n", index, i); + dimm->nr_pages = nr_pages / csrow->nr_channels; + dimm->grain = 1 << 12; /* 4KiB - resolution of CELOG */ + dimm->mtype = MEM_RDDR; /* only one type supported */ + dimm->dtype = mem_dev ? DEV_X4 : DEV_X8; + dimm->edac_mode = edac_mode; + } } } @@ -1140,36 +1176,33 @@ static void e752x_init_mem_map_table(struct pci_dev *pdev, static int e752x_get_devs(struct pci_dev *pdev, int dev_idx, struct e752x_pvt *pvt) { - struct pci_dev *dev; + pvt->dev_d0f1 = pci_get_device(PCI_VENDOR_ID_INTEL, + pvt->dev_info->err_dev, NULL); - pvt->bridge_ck = pci_get_device(PCI_VENDOR_ID_INTEL, - pvt->dev_info->err_dev, pvt->bridge_ck); - - if (pvt->bridge_ck == NULL) - pvt->bridge_ck = pci_scan_single_device(pdev->bus, + if (pvt->dev_d0f1 == NULL) { + pvt->dev_d0f1 = pci_scan_single_device(pdev->bus, PCI_DEVFN(0, 1)); + pci_dev_get(pvt->dev_d0f1); + } - if (pvt->bridge_ck == NULL) { + if (pvt->dev_d0f1 == NULL) { e752x_printk(KERN_ERR, "error reporting device not found:" "vendor %x device 0x%x (broken BIOS?)\n", PCI_VENDOR_ID_INTEL, e752x_devs[dev_idx].err_dev); return 1; } - dev = pci_get_device(PCI_VENDOR_ID_INTEL, + pvt->dev_d0f0 = pci_get_device(PCI_VENDOR_ID_INTEL, e752x_devs[dev_idx].ctl_dev, NULL); - if (dev == NULL) + if (pvt->dev_d0f0 == NULL) goto fail; - pvt->dev_d0f0 = dev; - pvt->dev_d0f1 = pci_dev_get(pvt->bridge_ck); - return 0; fail: - pci_dev_put(pvt->bridge_ck); + pci_dev_put(pvt->dev_d0f1); return 1; } @@ -1226,13 +1259,14 @@ static int e752x_probe1(struct pci_dev *pdev, int dev_idx) u16 pci_data; u8 stat8; struct mem_ctl_info *mci; + struct edac_mc_layer layers[2]; struct e752x_pvt *pvt; u16 ddrcsr; int drc_chan; /* Number of channels 0=1chan,1=2chan */ struct e752x_error_info discard; - debugf0("%s(): mci\n", __func__); - debugf0("Starting Probe1\n"); + edac_dbg(0, "mci\n"); + edac_dbg(0, "Starting Probe1\n"); /* check to see if device 0 function 1 is enabled; if it isn't, we * assume the BIOS has reserved it for a reason and is expecting @@ -1252,13 +1286,17 @@ static int e752x_probe1(struct pci_dev *pdev, int dev_idx) /* Dual channel = 1, Single channel = 0 */ drc_chan = dual_channel_active(ddrcsr); - mci = edac_mc_alloc(sizeof(*pvt), E752X_NR_CSROWS, drc_chan + 1, 0); - - if (mci == NULL) { + layers[0].type = EDAC_MC_LAYER_CHIP_SELECT; + layers[0].size = E752X_NR_CSROWS; + layers[0].is_virt_csrow = true; + layers[1].type = EDAC_MC_LAYER_CHANNEL; + layers[1].size = drc_chan + 1; + layers[1].is_virt_csrow = false; + mci = edac_mc_alloc(0, ARRAY_SIZE(layers), layers, sizeof(*pvt)); + if (mci == NULL) return -ENOMEM; - } - debugf3("%s(): init mci\n", __func__); + edac_dbg(3, "init mci\n"); mci->mtype_cap = MEM_FLAG_RDDR; /* 3100 IMCH supports SECDEC only */ mci->edac_ctl_cap = (dev_idx == I3100) ? EDAC_FLAG_SECDED : @@ -1266,9 +1304,9 @@ static int e752x_probe1(struct pci_dev *pdev, int dev_idx) /* FIXME - what if different memory types are in different csrows? */ mci->mod_name = EDAC_MOD_STR; mci->mod_ver = E752X_REVISION; - mci->dev = &pdev->dev; + mci->pdev = &pdev->dev; - debugf3("%s(): init pvt\n", __func__); + edac_dbg(3, "init pvt\n"); pvt = (struct e752x_pvt *)mci->pvt_info; pvt->dev_info = &e752x_devs[dev_idx]; pvt->mc_symmetric = ((ddrcsr & 0x10) != 0); @@ -1278,7 +1316,7 @@ static int e752x_probe1(struct pci_dev *pdev, int dev_idx) return -ENODEV; } - debugf3("%s(): more mci init\n", __func__); + edac_dbg(3, "more mci init\n"); mci->ctl_name = pvt->dev_info->ctl_name; mci->dev_name = pci_name(pdev); mci->edac_check = e752x_check; @@ -1300,7 +1338,7 @@ static int e752x_probe1(struct pci_dev *pdev, int dev_idx) mci->edac_cap = EDAC_FLAG_SECDED; /* the only mode supported */ else mci->edac_cap |= EDAC_FLAG_NONE; - debugf3("%s(): tolm, remapbase, remaplimit\n", __func__); + edac_dbg(3, "tolm, remapbase, remaplimit\n"); /* load the top of low memory, remap base, and remap limit vars */ pci_read_config_word(pdev, E752X_TOLM, &pci_data); @@ -1317,7 +1355,7 @@ static int e752x_probe1(struct pci_dev *pdev, int dev_idx) * type of memory controller. The ID is therefore hardcoded to 0. */ if (edac_mc_add_mc(mci)) { - debugf3("%s(): failed edac_mc_add_mc()\n", __func__); + edac_dbg(3, "failed edac_mc_add_mc()\n"); goto fail; } @@ -1335,23 +1373,21 @@ static int e752x_probe1(struct pci_dev *pdev, int dev_idx) } /* get this far and it's successful */ - debugf3("%s(): success\n", __func__); + edac_dbg(3, "success\n"); return 0; fail: pci_dev_put(pvt->dev_d0f0); pci_dev_put(pvt->dev_d0f1); - pci_dev_put(pvt->bridge_ck); edac_mc_free(mci); return -ENODEV; } /* returns count (>= 0), or negative on error */ -static int __devinit e752x_init_one(struct pci_dev *pdev, - const struct pci_device_id *ent) +static int e752x_init_one(struct pci_dev *pdev, const struct pci_device_id *ent) { - debugf0("%s()\n", __func__); + edac_dbg(0, "\n"); /* wake up and enable device */ if (pci_enable_device(pdev) < 0) @@ -1360,12 +1396,12 @@ static int __devinit e752x_init_one(struct pci_dev *pdev, return e752x_probe1(pdev, ent->driver_data); } -static void __devexit e752x_remove_one(struct pci_dev *pdev) +static void e752x_remove_one(struct pci_dev *pdev) { struct mem_ctl_info *mci; struct e752x_pvt *pvt; - debugf0("%s()\n", __func__); + edac_dbg(0, "\n"); if (e752x_pci) edac_pci_release_generic_ctl(e752x_pci); @@ -1376,11 +1412,10 @@ static void __devexit e752x_remove_one(struct pci_dev *pdev) pvt = (struct e752x_pvt *)mci->pvt_info; pci_dev_put(pvt->dev_d0f0); pci_dev_put(pvt->dev_d0f1); - pci_dev_put(pvt->bridge_ck); edac_mc_free(mci); } -static const struct pci_device_id e752x_pci_tbl[] __devinitdata = { +static const struct pci_device_id e752x_pci_tbl[] = { { PCI_VEND_DEV(INTEL, 7520_0), PCI_ANY_ID, PCI_ANY_ID, 0, 0, E7520}, @@ -1403,7 +1438,7 @@ MODULE_DEVICE_TABLE(pci, e752x_pci_tbl); static struct pci_driver e752x_driver = { .name = EDAC_MOD_STR, .probe = e752x_init_one, - .remove = __devexit_p(e752x_remove_one), + .remove = e752x_remove_one, .id_table = e752x_pci_tbl, }; @@ -1411,7 +1446,7 @@ static int __init e752x_init(void) { int pci_rc; - debugf3("%s()\n", __func__); + edac_dbg(3, "\n"); /* Ensure that the OPSTATE is set correctly for POLL or NMI */ opstate_init(); @@ -1422,7 +1457,7 @@ static int __init e752x_init(void) static void __exit e752x_exit(void) { - debugf3("%s()\n", __func__); + edac_dbg(3, "\n"); pci_unregister_driver(&e752x_driver); } diff --git a/drivers/edac/e7xxx_edac.c b/drivers/edac/e7xxx_edac.c index 6ffb6d23281..3cda79bc8b0 100644 --- a/drivers/edac/e7xxx_edac.c +++ b/drivers/edac/e7xxx_edac.c @@ -10,6 +10,9 @@ * Based on work by Dan Hollis <goemon at anime dot net> and others. * http://www.anime.net/~goemon/linux-ecc/ * + * Datasheet: + * http://www.intel.com/content/www/us/en/chipsets/e7501-chipset-memory-controller-hub-datasheet.html + * * Contributors: * Eric Biederman (Linux Networx) * Tom Zimmerman (Linux Networx) @@ -71,7 +74,7 @@ #endif /* PCI_DEVICE_ID_INTEL_7505_1_ERR */ #define E7XXX_NR_CSROWS 8 /* number of csrows */ -#define E7XXX_NR_DIMMS 8 /* FIXME - is this correct? */ +#define E7XXX_NR_DIMMS 8 /* 2 channels, 4 dimms/channel */ /* E7XXX register addresses - device 0 function 0 */ #define E7XXX_DRB 0x60 /* DRAM row boundary register (8b) */ @@ -163,7 +166,7 @@ static const struct e7xxx_dev_info e7xxx_devs[] = { /* FIXME - is this valid for both SECDED and S4ECD4ED? */ static inline int e7xxx_find_channel(u16 syndrome) { - debugf3("%s()\n", __func__); + edac_dbg(3, "\n"); if ((syndrome & 0xff00) == 0) return 0; @@ -183,7 +186,7 @@ static unsigned long ctl_page_to_phys(struct mem_ctl_info *mci, u32 remap; struct e7xxx_pvt *pvt = (struct e7xxx_pvt *)mci->pvt_info; - debugf3("%s()\n", __func__); + edac_dbg(3, "\n"); if ((page < pvt->tolm) || ((page >= 0x100000) && (page < pvt->remapbase))) @@ -205,7 +208,7 @@ static void process_ce(struct mem_ctl_info *mci, struct e7xxx_error_info *info) int row; int channel; - debugf3("%s()\n", __func__); + edac_dbg(3, "\n"); /* read the error address */ error_1b = info->dram_celog_add; /* FIXME - should use PAGE_SHIFT */ @@ -216,13 +219,15 @@ static void process_ce(struct mem_ctl_info *mci, struct e7xxx_error_info *info) row = edac_mc_find_csrow_by_page(mci, page); /* convert syndrome to channel */ channel = e7xxx_find_channel(syndrome); - edac_mc_handle_ce(mci, page, 0, syndrome, row, channel, "e7xxx CE"); + edac_mc_handle_error(HW_EVENT_ERR_CORRECTED, mci, 1, page, 0, syndrome, + row, channel, -1, "e7xxx CE", ""); } static void process_ce_no_info(struct mem_ctl_info *mci) { - debugf3("%s()\n", __func__); - edac_mc_handle_ce_no_info(mci, "e7xxx CE log register overflow"); + edac_dbg(3, "\n"); + edac_mc_handle_error(HW_EVENT_ERR_UNCORRECTED, mci, 1, 0, 0, 0, -1, -1, -1, + "e7xxx CE log register overflow", ""); } static void process_ue(struct mem_ctl_info *mci, struct e7xxx_error_info *info) @@ -230,19 +235,23 @@ static void process_ue(struct mem_ctl_info *mci, struct e7xxx_error_info *info) u32 error_2b, block_page; int row; - debugf3("%s()\n", __func__); + edac_dbg(3, "\n"); /* read the error address */ error_2b = info->dram_uelog_add; /* FIXME - should use PAGE_SHIFT */ block_page = error_2b >> 6; /* convert to 4k address */ row = edac_mc_find_csrow_by_page(mci, block_page); - edac_mc_handle_ue(mci, block_page, 0, row, "e7xxx UE"); + + edac_mc_handle_error(HW_EVENT_ERR_UNCORRECTED, mci, 1, block_page, 0, 0, + row, -1, -1, "e7xxx UE", ""); } static void process_ue_no_info(struct mem_ctl_info *mci) { - debugf3("%s()\n", __func__); - edac_mc_handle_ue_no_info(mci, "e7xxx UE log register overflow"); + edac_dbg(3, "\n"); + + edac_mc_handle_error(HW_EVENT_ERR_UNCORRECTED, mci, 1, 0, 0, 0, -1, -1, -1, + "e7xxx UE log register overflow", ""); } static void e7xxx_get_error_info(struct mem_ctl_info *mci, @@ -325,7 +334,7 @@ static void e7xxx_check(struct mem_ctl_info *mci) { struct e7xxx_error_info info; - debugf3("%s()\n", __func__); + edac_dbg(3, "\n"); e7xxx_get_error_info(mci, &info); e7xxx_process_error_info(mci, &info, 1); } @@ -347,11 +356,13 @@ static void e7xxx_init_csrows(struct mem_ctl_info *mci, struct pci_dev *pdev, int dev_idx, u32 drc) { unsigned long last_cumul_size; - int index; + int index, j; u8 value; - u32 dra, cumul_size; + u32 dra, cumul_size, nr_pages; int drc_chan, drc_drbg, drc_ddim, mem_dev; struct csrow_info *csrow; + struct dimm_info *dimm; + enum edac_type edac_mode; pci_read_config_dword(pdev, E7XXX_DRA, &dra); drc_chan = dual_channel_active(drc, dev_idx); @@ -367,38 +378,44 @@ static void e7xxx_init_csrows(struct mem_ctl_info *mci, struct pci_dev *pdev, for (index = 0; index < mci->nr_csrows; index++) { /* mem_dev 0=x8, 1=x4 */ mem_dev = (dra >> (index * 4 + 3)) & 0x1; - csrow = &mci->csrows[index]; + csrow = mci->csrows[index]; pci_read_config_byte(pdev, E7XXX_DRB + index, &value); /* convert a 64 or 32 MiB DRB to a page size. */ cumul_size = value << (25 + drc_drbg - PAGE_SHIFT); - debugf3("%s(): (%d) cumul_size 0x%x\n", __func__, index, - cumul_size); + edac_dbg(3, "(%d) cumul_size 0x%x\n", index, cumul_size); if (cumul_size == last_cumul_size) continue; /* not populated */ csrow->first_page = last_cumul_size; csrow->last_page = cumul_size - 1; - csrow->nr_pages = cumul_size - last_cumul_size; + nr_pages = cumul_size - last_cumul_size; last_cumul_size = cumul_size; - csrow->grain = 1 << 12; /* 4KiB - resolution of CELOG */ - csrow->mtype = MEM_RDDR; /* only one type supported */ - csrow->dtype = mem_dev ? DEV_X4 : DEV_X8; /* - * if single channel or x8 devices then SECDED - * if dual channel and x4 then S4ECD4ED - */ + * if single channel or x8 devices then SECDED + * if dual channel and x4 then S4ECD4ED + */ if (drc_ddim) { if (drc_chan && mem_dev) { - csrow->edac_mode = EDAC_S4ECD4ED; + edac_mode = EDAC_S4ECD4ED; mci->edac_cap |= EDAC_FLAG_S4ECD4ED; } else { - csrow->edac_mode = EDAC_SECDED; + edac_mode = EDAC_SECDED; mci->edac_cap |= EDAC_FLAG_SECDED; } } else - csrow->edac_mode = EDAC_NONE; + edac_mode = EDAC_NONE; + + for (j = 0; j < drc_chan + 1; j++) { + dimm = csrow->channels[j]->dimm; + + dimm->nr_pages = nr_pages / (drc_chan + 1); + dimm->grain = 1 << 12; /* 4KiB - resolution of CELOG */ + dimm->mtype = MEM_RDDR; /* only one type supported */ + dimm->dtype = mem_dev ? DEV_X4 : DEV_X8; + dimm->edac_mode = edac_mode; + } } } @@ -406,30 +423,44 @@ static int e7xxx_probe1(struct pci_dev *pdev, int dev_idx) { u16 pci_data; struct mem_ctl_info *mci = NULL; + struct edac_mc_layer layers[2]; struct e7xxx_pvt *pvt = NULL; u32 drc; int drc_chan; struct e7xxx_error_info discard; - debugf0("%s(): mci\n", __func__); + edac_dbg(0, "mci\n"); pci_read_config_dword(pdev, E7XXX_DRC, &drc); drc_chan = dual_channel_active(drc, dev_idx); - mci = edac_mc_alloc(sizeof(*pvt), E7XXX_NR_CSROWS, drc_chan + 1, 0); - + /* + * According with the datasheet, this device has a maximum of + * 4 DIMMS per channel, either single-rank or dual-rank. So, the + * total amount of dimms is 8 (E7XXX_NR_DIMMS). + * That means that the DIMM is mapped as CSROWs, and the channel + * will map the rank. So, an error to either channel should be + * attributed to the same dimm. + */ + layers[0].type = EDAC_MC_LAYER_CHIP_SELECT; + layers[0].size = E7XXX_NR_CSROWS; + layers[0].is_virt_csrow = true; + layers[1].type = EDAC_MC_LAYER_CHANNEL; + layers[1].size = drc_chan + 1; + layers[1].is_virt_csrow = false; + mci = edac_mc_alloc(0, ARRAY_SIZE(layers), layers, sizeof(*pvt)); if (mci == NULL) return -ENOMEM; - debugf3("%s(): init mci\n", __func__); + edac_dbg(3, "init mci\n"); mci->mtype_cap = MEM_FLAG_RDDR; mci->edac_ctl_cap = EDAC_FLAG_NONE | EDAC_FLAG_SECDED | EDAC_FLAG_S4ECD4ED; /* FIXME - what if different memory types are in different csrows? */ mci->mod_name = EDAC_MOD_STR; mci->mod_ver = E7XXX_REVISION; - mci->dev = &pdev->dev; - debugf3("%s(): init pvt\n", __func__); + mci->pdev = &pdev->dev; + edac_dbg(3, "init pvt\n"); pvt = (struct e7xxx_pvt *)mci->pvt_info; pvt->dev_info = &e7xxx_devs[dev_idx]; pvt->bridge_ck = pci_get_device(PCI_VENDOR_ID_INTEL, @@ -442,14 +473,14 @@ static int e7xxx_probe1(struct pci_dev *pdev, int dev_idx) goto fail0; } - debugf3("%s(): more mci init\n", __func__); + edac_dbg(3, "more mci init\n"); mci->ctl_name = pvt->dev_info->ctl_name; mci->dev_name = pci_name(pdev); mci->edac_check = e7xxx_check; mci->ctl_page_to_phys = ctl_page_to_phys; e7xxx_init_csrows(mci, pdev, dev_idx, drc); mci->edac_cap |= EDAC_FLAG_NONE; - debugf3("%s(): tolm, remapbase, remaplimit\n", __func__); + edac_dbg(3, "tolm, remapbase, remaplimit\n"); /* load the top of low memory, remap base, and remap limit vars */ pci_read_config_word(pdev, E7XXX_TOLM, &pci_data); pvt->tolm = ((u32) pci_data) << 4; @@ -468,7 +499,7 @@ static int e7xxx_probe1(struct pci_dev *pdev, int dev_idx) * type of memory controller. The ID is therefore hardcoded to 0. */ if (edac_mc_add_mc(mci)) { - debugf3("%s(): failed edac_mc_add_mc()\n", __func__); + edac_dbg(3, "failed edac_mc_add_mc()\n"); goto fail1; } @@ -484,7 +515,7 @@ static int e7xxx_probe1(struct pci_dev *pdev, int dev_idx) } /* get this far and it's successful */ - debugf3("%s(): success\n", __func__); + edac_dbg(3, "success\n"); return 0; fail1: @@ -497,22 +528,21 @@ fail0: } /* returns count (>= 0), or negative on error */ -static int __devinit e7xxx_init_one(struct pci_dev *pdev, - const struct pci_device_id *ent) +static int e7xxx_init_one(struct pci_dev *pdev, const struct pci_device_id *ent) { - debugf0("%s()\n", __func__); + edac_dbg(0, "\n"); /* wake up and enable device */ return pci_enable_device(pdev) ? -EIO : e7xxx_probe1(pdev, ent->driver_data); } -static void __devexit e7xxx_remove_one(struct pci_dev *pdev) +static void e7xxx_remove_one(struct pci_dev *pdev) { struct mem_ctl_info *mci; struct e7xxx_pvt *pvt; - debugf0("%s()\n", __func__); + edac_dbg(0, "\n"); if (e7xxx_pci) edac_pci_release_generic_ctl(e7xxx_pci); @@ -525,7 +555,7 @@ static void __devexit e7xxx_remove_one(struct pci_dev *pdev) edac_mc_free(mci); } -static const struct pci_device_id e7xxx_pci_tbl[] __devinitdata = { +static const struct pci_device_id e7xxx_pci_tbl[] = { { PCI_VEND_DEV(INTEL, 7205_0), PCI_ANY_ID, PCI_ANY_ID, 0, 0, E7205}, @@ -548,7 +578,7 @@ MODULE_DEVICE_TABLE(pci, e7xxx_pci_tbl); static struct pci_driver e7xxx_driver = { .name = EDAC_MOD_STR, .probe = e7xxx_init_one, - .remove = __devexit_p(e7xxx_remove_one), + .remove = e7xxx_remove_one, .id_table = e7xxx_pci_tbl, }; diff --git a/drivers/edac/edac_core.h b/drivers/edac/edac_core.h index e48ab3108ad..3c2625e7980 100644 --- a/drivers/edac/edac_core.h +++ b/drivers/edac/edac_core.h @@ -71,26 +71,21 @@ extern const char *edac_mem_types[]; #ifdef CONFIG_EDAC_DEBUG extern int edac_debug_level; -#define edac_debug_printk(level, fmt, arg...) \ - do { \ - if (level <= edac_debug_level) \ - edac_printk(KERN_DEBUG, EDAC_DEBUG, \ - "%s: " fmt, __func__, ##arg); \ - } while (0) - -#define debugf0( ... ) edac_debug_printk(0, __VA_ARGS__ ) -#define debugf1( ... ) edac_debug_printk(1, __VA_ARGS__ ) -#define debugf2( ... ) edac_debug_printk(2, __VA_ARGS__ ) -#define debugf3( ... ) edac_debug_printk(3, __VA_ARGS__ ) -#define debugf4( ... ) edac_debug_printk(4, __VA_ARGS__ ) +#define edac_dbg(level, fmt, ...) \ +do { \ + if (level <= edac_debug_level) \ + edac_printk(KERN_DEBUG, EDAC_DEBUG, \ + "%s: " fmt, __func__, ##__VA_ARGS__); \ +} while (0) #else /* !CONFIG_EDAC_DEBUG */ -#define debugf0( ... ) -#define debugf1( ... ) -#define debugf2( ... ) -#define debugf3( ... ) -#define debugf4( ... ) +#define edac_dbg(level, fmt, ...) \ +do { \ + if (0) \ + edac_printk(KERN_DEBUG, EDAC_DEBUG, \ + "%s: " fmt, __func__, ##__VA_ARGS__); \ +} while (0) #endif /* !CONFIG_EDAC_DEBUG */ @@ -107,13 +102,13 @@ extern int edac_debug_level; * * CPU caches (L1 and L2) * DMA engines - * Core CPU swithces + * Core CPU switches * Fabric switch units * PCIe interface controllers * other EDAC/ECC type devices that can be monitored for * errors, etc. * - * It allows for a 2 level set of hiearchry. For example: + * It allows for a 2 level set of hierarchy. For example: * * cache could be composed of L1, L2 and L3 levels of cache. * Each CPU core would have its own L1 cache, while sharing @@ -447,8 +442,10 @@ static inline void pci_write_bits32(struct pci_dev *pdev, int offset, #endif /* CONFIG_PCI */ -extern struct mem_ctl_info *edac_mc_alloc(unsigned sz_pvt, unsigned nr_csrows, - unsigned nr_chans, int edac_index); +struct mem_ctl_info *edac_mc_alloc(unsigned mc_num, + unsigned n_layers, + struct edac_mc_layer *layers, + unsigned sz_pvt); extern int edac_mc_add_mc(struct mem_ctl_info *mci); extern void edac_mc_free(struct mem_ctl_info *mci); extern struct mem_ctl_info *edac_mc_find(int idx); @@ -457,34 +454,21 @@ extern struct mem_ctl_info *edac_mc_del_mc(struct device *dev); extern int edac_mc_find_csrow_by_page(struct mem_ctl_info *mci, unsigned long page); -/* - * The no info errors are used when error overflows are reported. - * There are a limited number of error logging registers that can - * be exausted. When all registers are exhausted and an additional - * error occurs then an error overflow register records that an - * error occurred and the type of error, but doesn't have any - * further information. The ce/ue versions make for cleaner - * reporting logic and function interface - reduces conditional - * statement clutter and extra function arguments. - */ -extern void edac_mc_handle_ce(struct mem_ctl_info *mci, - unsigned long page_frame_number, - unsigned long offset_in_page, - unsigned long syndrome, int row, int channel, - const char *msg); -extern void edac_mc_handle_ce_no_info(struct mem_ctl_info *mci, - const char *msg); -extern void edac_mc_handle_ue(struct mem_ctl_info *mci, - unsigned long page_frame_number, - unsigned long offset_in_page, int row, - const char *msg); -extern void edac_mc_handle_ue_no_info(struct mem_ctl_info *mci, - const char *msg); -extern void edac_mc_handle_fbd_ue(struct mem_ctl_info *mci, unsigned int csrow, - unsigned int channel0, unsigned int channel1, - char *msg); -extern void edac_mc_handle_fbd_ce(struct mem_ctl_info *mci, unsigned int csrow, - unsigned int channel, char *msg); +void edac_raw_mc_handle_error(const enum hw_event_mc_err_type type, + struct mem_ctl_info *mci, + struct edac_raw_error_desc *e); + +void edac_mc_handle_error(const enum hw_event_mc_err_type type, + struct mem_ctl_info *mci, + const u16 error_count, + const unsigned long page_frame_number, + const unsigned long offset_in_page, + const unsigned long syndrome, + const int top_layer, + const int mid_layer, + const int low_layer, + const char *msg, + const char *other_detail); /* * edac_device APIs @@ -496,6 +480,7 @@ extern void edac_device_handle_ue(struct edac_device_ctl_info *edac_dev, extern void edac_device_handle_ce(struct edac_device_ctl_info *edac_dev, int inst_nr, int block_nr, const char *msg); extern int edac_device_alloc_index(void); +extern const char *edac_layer_name[]; /* * edac_pci APIs diff --git a/drivers/edac/edac_device.c b/drivers/edac/edac_device.c index 4b154593343..592af5f0cf3 100644 --- a/drivers/edac/edac_device.c +++ b/drivers/edac/edac_device.c @@ -40,12 +40,13 @@ static LIST_HEAD(edac_device_list); #ifdef CONFIG_EDAC_DEBUG static void edac_device_dump_device(struct edac_device_ctl_info *edac_dev) { - debugf3("\tedac_dev = %p dev_idx=%d \n", edac_dev, edac_dev->dev_idx); - debugf4("\tedac_dev->edac_check = %p\n", edac_dev->edac_check); - debugf3("\tdev = %p\n", edac_dev->dev); - debugf3("\tmod_name:ctl_name = %s:%s\n", - edac_dev->mod_name, edac_dev->ctl_name); - debugf3("\tpvt_info = %p\n\n", edac_dev->pvt_info); + edac_dbg(3, "\tedac_dev = %p dev_idx=%d\n", + edac_dev, edac_dev->dev_idx); + edac_dbg(4, "\tedac_dev->edac_check = %p\n", edac_dev->edac_check); + edac_dbg(3, "\tdev = %p\n", edac_dev->dev); + edac_dbg(3, "\tmod_name:ctl_name = %s:%s\n", + edac_dev->mod_name, edac_dev->ctl_name); + edac_dbg(3, "\tpvt_info = %p\n\n", edac_dev->pvt_info); } #endif /* CONFIG_EDAC_DEBUG */ @@ -56,7 +57,7 @@ static void edac_device_dump_device(struct edac_device_ctl_info *edac_dev) * * The control structure is allocated in complete chunk * from the OS. It is in turn sub allocated to the - * various objects that compose the struture + * various objects that compose the structure * * The structure has a 'nr_instance' array within itself. * Each instance represents a major component @@ -79,11 +80,10 @@ struct edac_device_ctl_info *edac_device_alloc_ctl_info( unsigned total_size; unsigned count; unsigned instance, block, attr; - void *pvt; + void *pvt, *p; int err; - debugf4("%s() instances=%d blocks=%d\n", - __func__, nr_instances, nr_blocks); + edac_dbg(4, "instances=%d blocks=%d\n", nr_instances, nr_blocks); /* Calculate the size of memory we need to allocate AND * determine the offsets of the various item arrays @@ -92,35 +92,30 @@ struct edac_device_ctl_info *edac_device_alloc_ctl_info( * to be at least as stringent as what the compiler would * provide if we could simply hardcode everything into a single struct. */ - dev_ctl = (struct edac_device_ctl_info *)NULL; + p = NULL; + dev_ctl = edac_align_ptr(&p, sizeof(*dev_ctl), 1); /* Calc the 'end' offset past end of ONE ctl_info structure * which will become the start of the 'instance' array */ - dev_inst = edac_align_ptr(&dev_ctl[1], sizeof(*dev_inst)); + dev_inst = edac_align_ptr(&p, sizeof(*dev_inst), nr_instances); /* Calc the 'end' offset past the instance array within the ctl_info * which will become the start of the block array */ - dev_blk = edac_align_ptr(&dev_inst[nr_instances], sizeof(*dev_blk)); + count = nr_instances * nr_blocks; + dev_blk = edac_align_ptr(&p, sizeof(*dev_blk), count); /* Calc the 'end' offset past the dev_blk array * which will become the start of the attrib array, if any. */ - count = nr_instances * nr_blocks; - dev_attrib = edac_align_ptr(&dev_blk[count], sizeof(*dev_attrib)); - - /* Check for case of when an attribute array is specified */ - if (nr_attrib > 0) { - /* calc how many nr_attrib we need */ + /* calc how many nr_attrib we need */ + if (nr_attrib > 0) count *= nr_attrib; + dev_attrib = edac_align_ptr(&p, sizeof(*dev_attrib), count); - /* Calc the 'end' offset past the attributes array */ - pvt = edac_align_ptr(&dev_attrib[count], sz_private); - } else { - /* no attribute array specificed */ - pvt = edac_align_ptr(dev_attrib, sz_private); - } + /* Calc the 'end' offset past the attributes array */ + pvt = edac_align_ptr(&p, sz_private, 1); /* 'pvt' now points to where the private data area is. * At this point 'pvt' (like dev_inst,dev_blk and dev_attrib) @@ -161,8 +156,8 @@ struct edac_device_ctl_info *edac_device_alloc_ctl_info( /* Name of this edac device */ snprintf(dev_ctl->name,sizeof(dev_ctl->name),"%s",edac_device_name); - debugf4("%s() edac_dev=%p next after end=%p\n", - __func__, dev_ctl, pvt + sz_private ); + edac_dbg(4, "edac_dev=%p next after end=%p\n", + dev_ctl, pvt + sz_private); /* Initialize every Instance */ for (instance = 0; instance < nr_instances; instance++) { @@ -183,10 +178,8 @@ struct edac_device_ctl_info *edac_device_alloc_ctl_info( snprintf(blk->name, sizeof(blk->name), "%s%d", edac_block_name, block+offset_value); - debugf4("%s() instance=%d inst_p=%p block=#%d " - "block_p=%p name='%s'\n", - __func__, instance, inst, block, - blk, blk->name); + edac_dbg(4, "instance=%d inst_p=%p block=#%d block_p=%p name='%s'\n", + instance, inst, block, blk, blk->name); /* if there are NO attributes OR no attribute pointer * then continue on to next block iteration @@ -199,8 +192,8 @@ struct edac_device_ctl_info *edac_device_alloc_ctl_info( attrib_p = &dev_attrib[block*nr_instances*nr_attrib]; blk->block_attributes = attrib_p; - debugf4("%s() THIS BLOCK_ATTRIB=%p\n", - __func__, blk->block_attributes); + edac_dbg(4, "THIS BLOCK_ATTRIB=%p\n", + blk->block_attributes); /* Initialize every user specified attribute in this * block with the data the caller passed in @@ -219,11 +212,10 @@ struct edac_device_ctl_info *edac_device_alloc_ctl_info( attrib->block = blk; /* up link */ - debugf4("%s() alloc-attrib=%p attrib_name='%s' " - "attrib-spec=%p spec-name=%s\n", - __func__, attrib, attrib->attr.name, - &attrib_spec[attr], - attrib_spec[attr].attr.name + edac_dbg(4, "alloc-attrib=%p attrib_name='%s' attrib-spec=%p spec-name=%s\n", + attrib, attrib->attr.name, + &attrib_spec[attr], + attrib_spec[attr].attr.name ); } } @@ -278,7 +270,7 @@ static struct edac_device_ctl_info *find_edac_device_by_dev(struct device *dev) struct edac_device_ctl_info *edac_dev; struct list_head *item; - debugf0("%s()\n", __func__); + edac_dbg(0, "\n"); list_for_each(item, &edac_device_list) { edac_dev = list_entry(item, struct edac_device_ctl_info, link); @@ -394,7 +386,7 @@ static void edac_device_workq_function(struct work_struct *work_req) /* Reschedule the workq for the next time period to start again * if the number of msec is for 1 sec, then adjust to the next - * whole one second to save timers fireing all over the period + * whole one second to save timers firing all over the period * between integral seconds */ if (edac_dev->poll_msec == 1000) @@ -413,7 +405,7 @@ static void edac_device_workq_function(struct work_struct *work_req) void edac_device_workq_setup(struct edac_device_ctl_info *edac_dev, unsigned msec) { - debugf0("%s()\n", __func__); + edac_dbg(0, "\n"); /* take the arg 'msec' and set it into the control structure * to used in the time period calculation @@ -445,6 +437,9 @@ void edac_device_workq_teardown(struct edac_device_ctl_info *edac_dev) { int status; + if (!edac_dev->edac_check) + return; + status = cancel_delayed_work(&edac_dev->work); if (status == 0) { /* workq instance might be running, wait for it */ @@ -501,7 +496,7 @@ EXPORT_SYMBOL_GPL(edac_device_alloc_index); */ int edac_device_add_device(struct edac_device_ctl_info *edac_dev) { - debugf0("%s()\n", __func__); + edac_dbg(0, "\n"); #ifdef CONFIG_EDAC_DEBUG if (edac_debug_level >= 3) @@ -538,12 +533,9 @@ int edac_device_add_device(struct edac_device_ctl_info *edac_dev) /* Report action taken */ edac_device_printk(edac_dev, KERN_INFO, - "Giving out device to module '%s' controller " - "'%s': DEV '%s' (%s)\n", - edac_dev->mod_name, - edac_dev->ctl_name, - edac_dev_name(edac_dev), - edac_op_state_to_string(edac_dev->op_state)); + "Giving out device to module %s controller %s: DEV %s (%s)\n", + edac_dev->mod_name, edac_dev->ctl_name, edac_dev->dev_name, + edac_op_state_to_string(edac_dev->op_state)); mutex_unlock(&device_ctls_mutex); return 0; @@ -563,7 +555,7 @@ EXPORT_SYMBOL_GPL(edac_device_add_device); * Remove sysfs entries for specified edac_device structure and * then remove edac_device structure from global list * - * @pdev: + * @dev: * Pointer to 'struct device' representing edac_device * structure to remove. * @@ -575,7 +567,7 @@ struct edac_device_ctl_info *edac_device_del_device(struct device *dev) { struct edac_device_ctl_info *edac_dev; - debugf0("%s()\n", __func__); + edac_dbg(0, "\n"); mutex_lock(&device_ctls_mutex); diff --git a/drivers/edac/edac_device_sysfs.c b/drivers/edac/edac_device_sysfs.c index b4ea185cceb..fb68a06ad68 100644 --- a/drivers/edac/edac_device_sysfs.c +++ b/drivers/edac/edac_device_sysfs.c @@ -202,7 +202,7 @@ static void edac_device_ctrl_master_release(struct kobject *kobj) { struct edac_device_ctl_info *edac_dev = to_edacdev(kobj); - debugf4("%s() control index=%d\n", __func__, edac_dev->dev_idx); + edac_dbg(4, "control index=%d\n", edac_dev->dev_idx); /* decrement the EDAC CORE module ref count */ module_put(edac_dev->owner); @@ -233,12 +233,12 @@ int edac_device_register_sysfs_main_kobj(struct edac_device_ctl_info *edac_dev) struct bus_type *edac_subsys; int err; - debugf1("%s()\n", __func__); + edac_dbg(1, "\n"); /* get the /sys/devices/system/edac reference */ edac_subsys = edac_get_sysfs_subsys(); if (edac_subsys == NULL) { - debugf1("%s() no edac_subsys error\n", __func__); + edac_dbg(1, "no edac_subsys error\n"); err = -ENODEV; goto err_out; } @@ -264,8 +264,8 @@ int edac_device_register_sysfs_main_kobj(struct edac_device_ctl_info *edac_dev) &edac_subsys->dev_root->kobj, "%s", edac_dev->name); if (err) { - debugf1("%s()Failed to register '.../edac/%s'\n", - __func__, edac_dev->name); + edac_dbg(1, "Failed to register '.../edac/%s'\n", + edac_dev->name); goto err_kobj_reg; } kobject_uevent(&edac_dev->kobj, KOBJ_ADD); @@ -274,8 +274,7 @@ int edac_device_register_sysfs_main_kobj(struct edac_device_ctl_info *edac_dev) * edac_device_unregister_sysfs_main_kobj() must be used */ - debugf4("%s() Registered '.../edac/%s' kobject\n", - __func__, edac_dev->name); + edac_dbg(4, "Registered '.../edac/%s' kobject\n", edac_dev->name); return 0; @@ -296,9 +295,8 @@ err_out: */ void edac_device_unregister_sysfs_main_kobj(struct edac_device_ctl_info *dev) { - debugf0("%s()\n", __func__); - debugf4("%s() name of kobject is: %s\n", - __func__, kobject_name(&dev->kobj)); + edac_dbg(0, "\n"); + edac_dbg(4, "name of kobject is: %s\n", kobject_name(&dev->kobj)); /* * Unregister the edac device's kobject and @@ -336,7 +334,7 @@ static void edac_device_ctrl_instance_release(struct kobject *kobj) { struct edac_device_instance *instance; - debugf1("%s()\n", __func__); + edac_dbg(1, "\n"); /* map from this kobj to the main control struct * and then dec the main kobj count @@ -442,7 +440,7 @@ static void edac_device_ctrl_block_release(struct kobject *kobj) { struct edac_device_block *block; - debugf1("%s()\n", __func__); + edac_dbg(1, "\n"); /* get the container of the kobj */ block = to_block(kobj); @@ -524,10 +522,10 @@ static int edac_device_create_block(struct edac_device_ctl_info *edac_dev, struct edac_dev_sysfs_block_attribute *sysfs_attrib; struct kobject *main_kobj; - debugf4("%s() Instance '%s' inst_p=%p block '%s' block_p=%p\n", - __func__, instance->name, instance, block->name, block); - debugf4("%s() block kobj=%p block kobj->parent=%p\n", - __func__, &block->kobj, &block->kobj.parent); + edac_dbg(4, "Instance '%s' inst_p=%p block '%s' block_p=%p\n", + instance->name, instance, block->name, block); + edac_dbg(4, "block kobj=%p block kobj->parent=%p\n", + &block->kobj, &block->kobj.parent); /* init this block's kobject */ memset(&block->kobj, 0, sizeof(struct kobject)); @@ -546,8 +544,7 @@ static int edac_device_create_block(struct edac_device_ctl_info *edac_dev, &instance->kobj, "%s", block->name); if (err) { - debugf1("%s() Failed to register instance '%s'\n", - __func__, block->name); + edac_dbg(1, "Failed to register instance '%s'\n", block->name); kobject_put(main_kobj); err = -ENODEV; goto err_out; @@ -560,11 +557,9 @@ static int edac_device_create_block(struct edac_device_ctl_info *edac_dev, if (sysfs_attrib && block->nr_attribs) { for (i = 0; i < block->nr_attribs; i++, sysfs_attrib++) { - debugf4("%s() creating block attrib='%s' " - "attrib->%p to kobj=%p\n", - __func__, - sysfs_attrib->attr.name, - sysfs_attrib, &block->kobj); + edac_dbg(4, "creating block attrib='%s' attrib->%p to kobj=%p\n", + sysfs_attrib->attr.name, + sysfs_attrib, &block->kobj); /* Create each block_attribute file */ err = sysfs_create_file(&block->kobj, @@ -647,14 +642,14 @@ static int edac_device_create_instance(struct edac_device_ctl_info *edac_dev, err = kobject_init_and_add(&instance->kobj, &ktype_instance_ctrl, &edac_dev->kobj, "%s", instance->name); if (err != 0) { - debugf2("%s() Failed to register instance '%s'\n", - __func__, instance->name); + edac_dbg(2, "Failed to register instance '%s'\n", + instance->name); kobject_put(main_kobj); goto err_out; } - debugf4("%s() now register '%d' blocks for instance %d\n", - __func__, instance->nr_blocks, idx); + edac_dbg(4, "now register '%d' blocks for instance %d\n", + instance->nr_blocks, idx); /* register all blocks of this instance */ for (i = 0; i < instance->nr_blocks; i++) { @@ -670,8 +665,8 @@ static int edac_device_create_instance(struct edac_device_ctl_info *edac_dev, } kobject_uevent(&instance->kobj, KOBJ_ADD); - debugf4("%s() Registered instance %d '%s' kobject\n", - __func__, idx, instance->name); + edac_dbg(4, "Registered instance %d '%s' kobject\n", + idx, instance->name); return 0; @@ -715,7 +710,7 @@ static int edac_device_create_instances(struct edac_device_ctl_info *edac_dev) int i, j; int err; - debugf0("%s()\n", __func__); + edac_dbg(0, "\n"); /* iterate over creation of the instances */ for (i = 0; i < edac_dev->nr_instances; i++) { @@ -817,12 +812,12 @@ int edac_device_create_sysfs(struct edac_device_ctl_info *edac_dev) int err; struct kobject *edac_kobj = &edac_dev->kobj; - debugf0("%s() idx=%d\n", __func__, edac_dev->dev_idx); + edac_dbg(0, "idx=%d\n", edac_dev->dev_idx); /* go create any main attributes callers wants */ err = edac_device_add_main_sysfs_attributes(edac_dev); if (err) { - debugf0("%s() failed to add sysfs attribs\n", __func__); + edac_dbg(0, "failed to add sysfs attribs\n"); goto err_out; } @@ -832,8 +827,7 @@ int edac_device_create_sysfs(struct edac_device_ctl_info *edac_dev) err = sysfs_create_link(edac_kobj, &edac_dev->dev->kobj, EDAC_DEVICE_SYMLINK); if (err) { - debugf0("%s() sysfs_create_link() returned err= %d\n", - __func__, err); + edac_dbg(0, "sysfs_create_link() returned err= %d\n", err); goto err_remove_main_attribs; } @@ -843,14 +837,13 @@ int edac_device_create_sysfs(struct edac_device_ctl_info *edac_dev) */ err = edac_device_create_instances(edac_dev); if (err) { - debugf0("%s() edac_device_create_instances() " - "returned err= %d\n", __func__, err); + edac_dbg(0, "edac_device_create_instances() returned err= %d\n", + err); goto err_remove_link; } - debugf4("%s() create-instances done, idx=%d\n", - __func__, edac_dev->dev_idx); + edac_dbg(4, "create-instances done, idx=%d\n", edac_dev->dev_idx); return 0; @@ -873,7 +866,7 @@ err_out: */ void edac_device_remove_sysfs(struct edac_device_ctl_info *edac_dev) { - debugf0("%s()\n", __func__); + edac_dbg(0, "\n"); /* remove any main attributes for this device */ edac_device_remove_main_sysfs_attributes(edac_dev); diff --git a/drivers/edac/edac_mc.c b/drivers/edac/edac_mc.c index ca6c04d350e..2c694b5297c 100644 --- a/drivers/edac/edac_mc.c +++ b/drivers/edac/edac_mc.c @@ -27,52 +27,103 @@ #include <linux/list.h> #include <linux/ctype.h> #include <linux/edac.h> +#include <linux/bitops.h> #include <asm/uaccess.h> #include <asm/page.h> #include <asm/edac.h> #include "edac_core.h" #include "edac_module.h" +#define CREATE_TRACE_POINTS +#define TRACE_INCLUDE_PATH ../../include/ras +#include <ras/ras_event.h> + /* lock to memory controller's control array */ static DEFINE_MUTEX(mem_ctls_mutex); static LIST_HEAD(mc_devices); +/* + * Used to lock EDAC MC to just one module, avoiding two drivers e. g. + * apei/ghes and i7core_edac to be used at the same time. + */ +static void const *edac_mc_owner; + +static struct bus_type mc_bus[EDAC_MAX_MCS]; + +unsigned edac_dimm_info_location(struct dimm_info *dimm, char *buf, + unsigned len) +{ + struct mem_ctl_info *mci = dimm->mci; + int i, n, count = 0; + char *p = buf; + + for (i = 0; i < mci->n_layers; i++) { + n = snprintf(p, len, "%s %d ", + edac_layer_name[mci->layers[i].type], + dimm->location[i]); + p += n; + len -= n; + count += n; + if (!len) + break; + } + + return count; +} + #ifdef CONFIG_EDAC_DEBUG -static void edac_mc_dump_channel(struct channel_info *chan) +static void edac_mc_dump_channel(struct rank_info *chan) { - debugf4("\tchannel = %p\n", chan); - debugf4("\tchannel->chan_idx = %d\n", chan->chan_idx); - debugf4("\tchannel->ce_count = %d\n", chan->ce_count); - debugf4("\tchannel->label = '%s'\n", chan->label); - debugf4("\tchannel->csrow = %p\n\n", chan->csrow); + edac_dbg(4, " channel->chan_idx = %d\n", chan->chan_idx); + edac_dbg(4, " channel = %p\n", chan); + edac_dbg(4, " channel->csrow = %p\n", chan->csrow); + edac_dbg(4, " channel->dimm = %p\n", chan->dimm); +} + +static void edac_mc_dump_dimm(struct dimm_info *dimm, int number) +{ + char location[80]; + + edac_dimm_info_location(dimm, location, sizeof(location)); + + edac_dbg(4, "%s%i: %smapped as virtual row %d, chan %d\n", + dimm->mci->csbased ? "rank" : "dimm", + number, location, dimm->csrow, dimm->cschannel); + edac_dbg(4, " dimm = %p\n", dimm); + edac_dbg(4, " dimm->label = '%s'\n", dimm->label); + edac_dbg(4, " dimm->nr_pages = 0x%x\n", dimm->nr_pages); + edac_dbg(4, " dimm->grain = %d\n", dimm->grain); + edac_dbg(4, " dimm->nr_pages = 0x%x\n", dimm->nr_pages); } static void edac_mc_dump_csrow(struct csrow_info *csrow) { - debugf4("\tcsrow = %p\n", csrow); - debugf4("\tcsrow->csrow_idx = %d\n", csrow->csrow_idx); - debugf4("\tcsrow->first_page = 0x%lx\n", csrow->first_page); - debugf4("\tcsrow->last_page = 0x%lx\n", csrow->last_page); - debugf4("\tcsrow->page_mask = 0x%lx\n", csrow->page_mask); - debugf4("\tcsrow->nr_pages = 0x%x\n", csrow->nr_pages); - debugf4("\tcsrow->nr_channels = %d\n", csrow->nr_channels); - debugf4("\tcsrow->channels = %p\n", csrow->channels); - debugf4("\tcsrow->mci = %p\n\n", csrow->mci); + edac_dbg(4, "csrow->csrow_idx = %d\n", csrow->csrow_idx); + edac_dbg(4, " csrow = %p\n", csrow); + edac_dbg(4, " csrow->first_page = 0x%lx\n", csrow->first_page); + edac_dbg(4, " csrow->last_page = 0x%lx\n", csrow->last_page); + edac_dbg(4, " csrow->page_mask = 0x%lx\n", csrow->page_mask); + edac_dbg(4, " csrow->nr_channels = %d\n", csrow->nr_channels); + edac_dbg(4, " csrow->channels = %p\n", csrow->channels); + edac_dbg(4, " csrow->mci = %p\n", csrow->mci); } static void edac_mc_dump_mci(struct mem_ctl_info *mci) { - debugf3("\tmci = %p\n", mci); - debugf3("\tmci->mtype_cap = %lx\n", mci->mtype_cap); - debugf3("\tmci->edac_ctl_cap = %lx\n", mci->edac_ctl_cap); - debugf3("\tmci->edac_cap = %lx\n", mci->edac_cap); - debugf4("\tmci->edac_check = %p\n", mci->edac_check); - debugf3("\tmci->nr_csrows = %d, csrows = %p\n", - mci->nr_csrows, mci->csrows); - debugf3("\tdev = %p\n", mci->dev); - debugf3("\tmod_name:ctl_name = %s:%s\n", mci->mod_name, mci->ctl_name); - debugf3("\tpvt_info = %p\n\n", mci->pvt_info); + edac_dbg(3, "\tmci = %p\n", mci); + edac_dbg(3, "\tmci->mtype_cap = %lx\n", mci->mtype_cap); + edac_dbg(3, "\tmci->edac_ctl_cap = %lx\n", mci->edac_ctl_cap); + edac_dbg(3, "\tmci->edac_cap = %lx\n", mci->edac_cap); + edac_dbg(4, "\tmci->edac_check = %p\n", mci->edac_check); + edac_dbg(3, "\tmci->nr_csrows = %d, csrows = %p\n", + mci->nr_csrows, mci->csrows); + edac_dbg(3, "\tmci->nr_dimms = %d, dimms = %p\n", + mci->tot_dimms, mci->dimms); + edac_dbg(3, "\tdev = %p\n", mci->pdev); + edac_dbg(3, "\tmod_name:ctl_name = %s:%s\n", + mci->mod_name, mci->ctl_name); + edac_dbg(3, "\tpvt_info = %p\n\n", mci->pvt_info); } #endif /* CONFIG_EDAC_DEBUG */ @@ -101,18 +152,37 @@ const char *edac_mem_types[] = { }; EXPORT_SYMBOL_GPL(edac_mem_types); -/* 'ptr' points to a possibly unaligned item X such that sizeof(X) is 'size'. - * Adjust 'ptr' so that its alignment is at least as stringent as what the - * compiler would provide for X and return the aligned result. +/** + * edac_align_ptr - Prepares the pointer offsets for a single-shot allocation + * @p: pointer to a pointer with the memory offset to be used. At + * return, this will be incremented to point to the next offset + * @size: Size of the data structure to be reserved + * @n_elems: Number of elements that should be reserved * * If 'size' is a constant, the compiler will optimize this whole function - * down to either a no-op or the addition of a constant to the value of 'ptr'. + * down to either a no-op or the addition of a constant to the value of '*p'. + * + * The 'p' pointer is absolutely needed to keep the proper advancing + * further in memory to the proper offsets when allocating the struct along + * with its embedded structs, as edac_device_alloc_ctl_info() does it + * above, for example. + * + * At return, the pointer 'p' will be incremented to be used on a next call + * to this function. */ -void *edac_align_ptr(void *ptr, unsigned size) +void *edac_align_ptr(void **p, unsigned size, int n_elems) { unsigned align, r; + void *ptr = *p; + + *p += size * n_elems; - /* Here we assume that the alignment of a "long long" is the most + /* + * 'p' can possibly be an unaligned item X such that sizeof(X) is + * 'size'. Adjust 'p' so that its alignment is at least as + * stringent as what the compiler would provide for X and return + * the aligned result. + * Here we assume that the alignment of a "long long" is the most * stringent alignment that the compiler will ever provide by default. * As far as I know, this is a reasonable assumption. */ @@ -127,19 +197,53 @@ void *edac_align_ptr(void *ptr, unsigned size) else return (char *)ptr; - r = size % align; + r = (unsigned long)p % align; if (r == 0) return (char *)ptr; + *p += align - r; + return (void *)(((unsigned long)ptr) + align - r); } +static void _edac_mc_free(struct mem_ctl_info *mci) +{ + int i, chn, row; + struct csrow_info *csr; + const unsigned int tot_dimms = mci->tot_dimms; + const unsigned int tot_channels = mci->num_cschannel; + const unsigned int tot_csrows = mci->nr_csrows; + + if (mci->dimms) { + for (i = 0; i < tot_dimms; i++) + kfree(mci->dimms[i]); + kfree(mci->dimms); + } + if (mci->csrows) { + for (row = 0; row < tot_csrows; row++) { + csr = mci->csrows[row]; + if (csr) { + if (csr->channels) { + for (chn = 0; chn < tot_channels; chn++) + kfree(csr->channels[chn]); + kfree(csr->channels); + } + kfree(csr); + } + } + kfree(mci->csrows); + } + kfree(mci); +} + /** - * edac_mc_alloc: Allocate a struct mem_ctl_info structure - * @size_pvt: size of private storage needed - * @nr_csrows: Number of CWROWS needed for this MC - * @nr_chans: Number of channels for the MC + * edac_mc_alloc: Allocate and partially fill a struct mem_ctl_info structure + * @mc_num: Memory controller number + * @n_layers: Number of MC hierarchy layers + * layers: Describes each layer as seen by the Memory Controller + * @size_pvt: size of private storage needed + * * * Everything is kmalloc'ed as one big chunk - more efficient. * Only can be used if all structures have the same lifetime - otherwise @@ -147,32 +251,75 @@ void *edac_align_ptr(void *ptr, unsigned size) * * Use edac_mc_free() to free mc structures allocated by this function. * + * NOTE: drivers handle multi-rank memories in different ways: in some + * drivers, one multi-rank memory stick is mapped as one entry, while, in + * others, a single multi-rank memory stick would be mapped into several + * entries. Currently, this function will allocate multiple struct dimm_info + * on such scenarios, as grouping the multiple ranks require drivers change. + * * Returns: - * NULL allocation failed - * struct mem_ctl_info pointer + * On failure: NULL + * On success: struct mem_ctl_info pointer */ -struct mem_ctl_info *edac_mc_alloc(unsigned sz_pvt, unsigned nr_csrows, - unsigned nr_chans, int edac_index) +struct mem_ctl_info *edac_mc_alloc(unsigned mc_num, + unsigned n_layers, + struct edac_mc_layer *layers, + unsigned sz_pvt) { struct mem_ctl_info *mci; - struct csrow_info *csi, *csrow; - struct channel_info *chi, *chp, *chan; - void *pvt; - unsigned size; - int row, chn; - int err; + struct edac_mc_layer *layer; + struct csrow_info *csr; + struct rank_info *chan; + struct dimm_info *dimm; + u32 *ce_per_layer[EDAC_MAX_LAYERS], *ue_per_layer[EDAC_MAX_LAYERS]; + unsigned pos[EDAC_MAX_LAYERS]; + unsigned size, tot_dimms = 1, count = 1; + unsigned tot_csrows = 1, tot_channels = 1, tot_errcount = 0; + void *pvt, *p, *ptr = NULL; + int i, j, row, chn, n, len, off; + bool per_rank = false; + + BUG_ON(n_layers > EDAC_MAX_LAYERS || n_layers == 0); + /* + * Calculate the total amount of dimms and csrows/cschannels while + * in the old API emulation mode + */ + for (i = 0; i < n_layers; i++) { + tot_dimms *= layers[i].size; + if (layers[i].is_virt_csrow) + tot_csrows *= layers[i].size; + else + tot_channels *= layers[i].size; + + if (layers[i].type == EDAC_MC_LAYER_CHIP_SELECT) + per_rank = true; + } /* Figure out the offsets of the various items from the start of an mc * structure. We want the alignment of each item to be at least as * stringent as what the compiler would provide if we could simply * hardcode everything into a single struct. */ - mci = (struct mem_ctl_info *)0; - csi = edac_align_ptr(&mci[1], sizeof(*csi)); - chi = edac_align_ptr(&csi[nr_csrows], sizeof(*chi)); - pvt = edac_align_ptr(&chi[nr_chans * nr_csrows], sz_pvt); + mci = edac_align_ptr(&ptr, sizeof(*mci), 1); + layer = edac_align_ptr(&ptr, sizeof(*layer), n_layers); + for (i = 0; i < n_layers; i++) { + count *= layers[i].size; + edac_dbg(4, "errcount layer %d size %d\n", i, count); + ce_per_layer[i] = edac_align_ptr(&ptr, sizeof(u32), count); + ue_per_layer[i] = edac_align_ptr(&ptr, sizeof(u32), count); + tot_errcount += 2 * count; + } + + edac_dbg(4, "allocating %d error counters\n", tot_errcount); + pvt = edac_align_ptr(&ptr, sz_pvt, 1); size = ((unsigned long)pvt) + sz_pvt; + edac_dbg(1, "allocating %u bytes for mci data (%d %s, %d csrows/channels)\n", + size, + tot_dimms, + per_rank ? "ranks" : "dimms", + tot_csrows * tot_channels); + mci = kzalloc(size, GFP_KERNEL); if (mci == NULL) return NULL; @@ -180,50 +327,134 @@ struct mem_ctl_info *edac_mc_alloc(unsigned sz_pvt, unsigned nr_csrows, /* Adjust pointers so they point within the memory we just allocated * rather than an imaginary chunk of memory located at address 0. */ - csi = (struct csrow_info *)(((char *)mci) + ((unsigned long)csi)); - chi = (struct channel_info *)(((char *)mci) + ((unsigned long)chi)); + layer = (struct edac_mc_layer *)(((char *)mci) + ((unsigned long)layer)); + for (i = 0; i < n_layers; i++) { + mci->ce_per_layer[i] = (u32 *)((char *)mci + ((unsigned long)ce_per_layer[i])); + mci->ue_per_layer[i] = (u32 *)((char *)mci + ((unsigned long)ue_per_layer[i])); + } pvt = sz_pvt ? (((char *)mci) + ((unsigned long)pvt)) : NULL; /* setup index and various internal pointers */ - mci->mc_idx = edac_index; - mci->csrows = csi; + mci->mc_idx = mc_num; + mci->tot_dimms = tot_dimms; mci->pvt_info = pvt; - mci->nr_csrows = nr_csrows; - - for (row = 0; row < nr_csrows; row++) { - csrow = &csi[row]; - csrow->csrow_idx = row; - csrow->mci = mci; - csrow->nr_channels = nr_chans; - chp = &chi[row * nr_chans]; - csrow->channels = chp; - - for (chn = 0; chn < nr_chans; chn++) { - chan = &chp[chn]; + mci->n_layers = n_layers; + mci->layers = layer; + memcpy(mci->layers, layers, sizeof(*layer) * n_layers); + mci->nr_csrows = tot_csrows; + mci->num_cschannel = tot_channels; + mci->csbased = per_rank; + + /* + * Alocate and fill the csrow/channels structs + */ + mci->csrows = kcalloc(tot_csrows, sizeof(*mci->csrows), GFP_KERNEL); + if (!mci->csrows) + goto error; + for (row = 0; row < tot_csrows; row++) { + csr = kzalloc(sizeof(**mci->csrows), GFP_KERNEL); + if (!csr) + goto error; + mci->csrows[row] = csr; + csr->csrow_idx = row; + csr->mci = mci; + csr->nr_channels = tot_channels; + csr->channels = kcalloc(tot_channels, sizeof(*csr->channels), + GFP_KERNEL); + if (!csr->channels) + goto error; + + for (chn = 0; chn < tot_channels; chn++) { + chan = kzalloc(sizeof(**csr->channels), GFP_KERNEL); + if (!chan) + goto error; + csr->channels[chn] = chan; chan->chan_idx = chn; - chan->csrow = csrow; + chan->csrow = csr; } } - mci->op_state = OP_ALLOC; - INIT_LIST_HEAD(&mci->grp_kobj_list); - /* - * Initialize the 'root' kobj for the edac_mc controller + * Allocate and fill the dimm structs */ - err = edac_mc_register_sysfs_main_kobj(mci); - if (err) { - kfree(mci); - return NULL; + mci->dimms = kcalloc(tot_dimms, sizeof(*mci->dimms), GFP_KERNEL); + if (!mci->dimms) + goto error; + + memset(&pos, 0, sizeof(pos)); + row = 0; + chn = 0; + for (i = 0; i < tot_dimms; i++) { + chan = mci->csrows[row]->channels[chn]; + off = EDAC_DIMM_OFF(layer, n_layers, pos[0], pos[1], pos[2]); + if (off < 0 || off >= tot_dimms) { + edac_mc_printk(mci, KERN_ERR, "EDAC core bug: EDAC_DIMM_OFF is trying to do an illegal data access\n"); + goto error; + } + + dimm = kzalloc(sizeof(**mci->dimms), GFP_KERNEL); + if (!dimm) + goto error; + mci->dimms[off] = dimm; + dimm->mci = mci; + + /* + * Copy DIMM location and initialize it. + */ + len = sizeof(dimm->label); + p = dimm->label; + n = snprintf(p, len, "mc#%u", mc_num); + p += n; + len -= n; + for (j = 0; j < n_layers; j++) { + n = snprintf(p, len, "%s#%u", + edac_layer_name[layers[j].type], + pos[j]); + p += n; + len -= n; + dimm->location[j] = pos[j]; + + if (len <= 0) + break; + } + + /* Link it to the csrows old API data */ + chan->dimm = dimm; + dimm->csrow = row; + dimm->cschannel = chn; + + /* Increment csrow location */ + if (layers[0].is_virt_csrow) { + chn++; + if (chn == tot_channels) { + chn = 0; + row++; + } + } else { + row++; + if (row == tot_csrows) { + row = 0; + chn++; + } + } + + /* Increment dimm location */ + for (j = n_layers - 1; j >= 0; j--) { + pos[j]++; + if (pos[j] < layers[j].size) + break; + pos[j] = 0; + } } - /* at this point, the root kobj is valid, and in order to - * 'free' the object, then the function: - * edac_mc_unregister_sysfs_main_kobj() must be called - * which will perform kobj unregistration and the actual free - * will occur during the kobject callback operation - */ + mci->op_state = OP_ALLOC; + return mci; + +error: + _edac_mc_free(mci); + + return NULL; } EXPORT_SYMBOL_GPL(edac_mc_alloc); @@ -234,12 +465,18 @@ EXPORT_SYMBOL_GPL(edac_mc_alloc); */ void edac_mc_free(struct mem_ctl_info *mci) { - debugf1("%s()\n", __func__); + edac_dbg(1, "\n"); - edac_mc_unregister_sysfs_main_kobj(mci); + /* If we're not yet registered with sysfs free only what was allocated + * in edac_mc_alloc(). + */ + if (!device_is_registered(&mci->dev)) { + _edac_mc_free(mci); + return; + } - /* free the mci instance memory here */ - kfree(mci); + /* the mci instance is freed here, when the sysfs object is dropped */ + edac_unregister_sysfs(mci); } EXPORT_SYMBOL_GPL(edac_mc_free); @@ -256,12 +493,12 @@ struct mem_ctl_info *find_mci_by_dev(struct device *dev) struct mem_ctl_info *mci; struct list_head *item; - debugf3("%s()\n", __func__); + edac_dbg(3, "\n"); list_for_each(item, &mc_devices) { mci = list_entry(item, struct mem_ctl_info, link); - if (mci->dev == dev) + if (mci->pdev == dev) return mci; } @@ -322,16 +559,19 @@ static void edac_mc_workq_function(struct work_struct *work_req) * * called with the mem_ctls_mutex held */ -static void edac_mc_workq_setup(struct mem_ctl_info *mci, unsigned msec) +static void edac_mc_workq_setup(struct mem_ctl_info *mci, unsigned msec, + bool init) { - debugf0("%s()\n", __func__); + edac_dbg(0, "\n"); /* if this instance is not in the POLL state, then simply return */ if (mci->op_state != OP_RUNNING_POLL) return; - INIT_DELAYED_WORK(&mci->work, edac_mc_workq_function); - queue_delayed_work(edac_workqueue, &mci->work, msecs_to_jiffies(msec)); + if (init) + INIT_DELAYED_WORK(&mci->work, edac_mc_workq_function); + + mod_delayed_work(edac_workqueue, &mci->work, msecs_to_jiffies(msec)); } /* @@ -351,8 +591,7 @@ static void edac_mc_workq_teardown(struct mem_ctl_info *mci) status = cancel_delayed_work(&mci->work); if (status == 0) { - debugf0("%s() not canceled, flush the queue\n", - __func__); + edac_dbg(0, "not canceled, flush the queue\n"); /* workq instance might be running, wait for it */ flush_workqueue(edac_workqueue); @@ -365,32 +604,17 @@ static void edac_mc_workq_teardown(struct mem_ctl_info *mci) * user space has updated our poll period value, need to * reset our workq delays */ -void edac_mc_reset_delay_period(int value) +void edac_mc_reset_delay_period(unsigned long value) { struct mem_ctl_info *mci; struct list_head *item; mutex_lock(&mem_ctls_mutex); - /* scan the list and turn off all workq timers, doing so under lock - */ - list_for_each(item, &mc_devices) { - mci = list_entry(item, struct mem_ctl_info, link); - - if (mci->op_state == OP_RUNNING_POLL) - cancel_delayed_work(&mci->work); - } - - mutex_unlock(&mem_ctls_mutex); - - - /* re-walk the list, and reset the poll delay */ - mutex_lock(&mem_ctls_mutex); - list_for_each(item, &mc_devices) { mci = list_entry(item, struct mem_ctl_info, link); - edac_mc_workq_setup(mci, (unsigned long) value); + edac_mc_workq_setup(mci, value, false); } mutex_unlock(&mem_ctls_mutex); @@ -413,7 +637,7 @@ static int add_mc_to_global_list(struct mem_ctl_info *mci) insert_before = &mc_devices; - p = find_mci_by_dev(mci->dev); + p = find_mci_by_dev(mci->pdev); if (unlikely(p != NULL)) goto fail0; @@ -435,7 +659,7 @@ static int add_mc_to_global_list(struct mem_ctl_info *mci) fail0: edac_printk(KERN_WARNING, EDAC_MC, - "%s (%s) %s %s already assigned %d\n", dev_name(p->dev), + "%s (%s) %s %s already assigned %d\n", dev_name(p->pdev), edac_dev_name(mci), p->mod_name, p->ctl_name, p->mc_idx); return 1; @@ -446,9 +670,9 @@ fail1: return 1; } -static void del_mc_from_global_list(struct mem_ctl_info *mci) +static int del_mc_from_global_list(struct mem_ctl_info *mci) { - atomic_dec(&edac_handlers); + int handlers = atomic_dec_return(&edac_handlers); list_del_rcu(&mci->link); /* these are for safe removal of devices from global list while @@ -456,6 +680,8 @@ static void del_mc_from_global_list(struct mem_ctl_info *mci) */ synchronize_rcu(); INIT_LIST_HEAD(&mci->link); + + return handlers; } /** @@ -490,7 +716,6 @@ EXPORT_SYMBOL(edac_mc_find); * edac_mc_add_mc: Insert the 'mci' structure into the mci global list and * create sysfs entries associated with mci structure * @mci: pointer to the mci structure to be added to the list - * @mc_idx: A unique numeric identifier to be assigned to the 'mci' structure. * * Return: * 0 Success @@ -500,7 +725,13 @@ EXPORT_SYMBOL(edac_mc_find); /* FIXME - should a warning be printed if no error detection? correction? */ int edac_mc_add_mc(struct mem_ctl_info *mci) { - debugf0("%s()\n", __func__); + int ret = -EINVAL; + edac_dbg(0, "\n"); + + if (mci->mc_idx >= EDAC_MAX_MCS) { + pr_warn_once("Too many memory controllers: %d\n", mci->mc_idx); + return -ENODEV; + } #ifdef CONFIG_EDAC_DEBUG if (edac_debug_level >= 3) @@ -510,23 +741,39 @@ int edac_mc_add_mc(struct mem_ctl_info *mci) int i; for (i = 0; i < mci->nr_csrows; i++) { + struct csrow_info *csrow = mci->csrows[i]; + u32 nr_pages = 0; int j; - edac_mc_dump_csrow(&mci->csrows[i]); - for (j = 0; j < mci->csrows[i].nr_channels; j++) - edac_mc_dump_channel(&mci->csrows[i]. - channels[j]); + for (j = 0; j < csrow->nr_channels; j++) + nr_pages += csrow->channels[j]->dimm->nr_pages; + if (!nr_pages) + continue; + edac_mc_dump_csrow(csrow); + for (j = 0; j < csrow->nr_channels; j++) + if (csrow->channels[j]->dimm->nr_pages) + edac_mc_dump_channel(csrow->channels[j]); } + for (i = 0; i < mci->tot_dimms; i++) + if (mci->dimms[i]->nr_pages) + edac_mc_dump_dimm(mci->dimms[i], i); } #endif mutex_lock(&mem_ctls_mutex); + if (edac_mc_owner && edac_mc_owner != mci->mod_name) { + ret = -EPERM; + goto fail0; + } + if (add_mc_to_global_list(mci)) goto fail0; /* set load time so that error rate can be tracked */ mci->start_time = jiffies; + mci->bus = &mc_bus[mci->mc_idx]; + if (edac_create_sysfs_mci_device(mci)) { edac_mc_printk(mci, KERN_WARNING, "failed to create sysfs device\n"); @@ -538,14 +785,18 @@ int edac_mc_add_mc(struct mem_ctl_info *mci) /* This instance is NOW RUNNING */ mci->op_state = OP_RUNNING_POLL; - edac_mc_workq_setup(mci, edac_mc_get_poll_msec()); + edac_mc_workq_setup(mci, edac_mc_get_poll_msec(), true); } else { mci->op_state = OP_RUNNING_INTERRUPT; } /* Report action taken */ - edac_mc_printk(mci, KERN_INFO, "Giving out device to '%s' '%s':" - " DEV %s\n", mci->mod_name, mci->ctl_name, edac_dev_name(mci)); + edac_mc_printk(mci, KERN_INFO, + "Giving out device to module %s controller %s: DEV %s (%s)\n", + mci->mod_name, mci->ctl_name, mci->dev_name, + edac_op_state_to_string(mci->op_state)); + + edac_mc_owner = mci->mod_name; mutex_unlock(&mem_ctls_mutex); return 0; @@ -555,7 +806,7 @@ fail1: fail0: mutex_unlock(&mem_ctls_mutex); - return 1; + return ret; } EXPORT_SYMBOL_GPL(edac_mc_add_mc); @@ -570,7 +821,7 @@ struct mem_ctl_info *edac_mc_del_mc(struct device *dev) { struct mem_ctl_info *mci; - debugf0("%s()\n", __func__); + edac_dbg(0, "\n"); mutex_lock(&mem_ctls_mutex); @@ -581,7 +832,8 @@ struct mem_ctl_info *edac_mc_del_mc(struct device *dev) return NULL; } - del_mc_from_global_list(mci); + if (!del_mc_from_global_list(mci)) + edac_mc_owner = NULL; mutex_unlock(&mem_ctls_mutex); /* flush workq processes */ @@ -608,7 +860,7 @@ static void edac_mc_scrub_block(unsigned long page, unsigned long offset, void *virt_addr; unsigned long flags = 0; - debugf3("%s()\n", __func__); + edac_dbg(3, "\n"); /* ECC error page was not in our memory. Ignore it. */ if (!pfn_valid(page)) @@ -620,13 +872,13 @@ static void edac_mc_scrub_block(unsigned long page, unsigned long offset, if (PageHighMem(pg)) local_irq_save(flags); - virt_addr = kmap_atomic(pg, KM_BOUNCE_READ); + virt_addr = kmap_atomic(pg); /* Perform architecture specific atomic scrub operation */ atomic_scrub(virt_addr + offset, size); /* Unmap and complete */ - kunmap_atomic(virt_addr, KM_BOUNCE_READ); + kunmap_atomic(virt_addr); if (PageHighMem(pg)) local_irq_restore(flags); @@ -635,22 +887,26 @@ static void edac_mc_scrub_block(unsigned long page, unsigned long offset, /* FIXME - should return -1 */ int edac_mc_find_csrow_by_page(struct mem_ctl_info *mci, unsigned long page) { - struct csrow_info *csrows = mci->csrows; - int row, i; + struct csrow_info **csrows = mci->csrows; + int row, i, j, n; - debugf1("MC%d: %s(): 0x%lx\n", mci->mc_idx, __func__, page); + edac_dbg(1, "MC%d: 0x%lx\n", mci->mc_idx, page); row = -1; for (i = 0; i < mci->nr_csrows; i++) { - struct csrow_info *csrow = &csrows[i]; - - if (csrow->nr_pages == 0) + struct csrow_info *csrow = csrows[i]; + n = 0; + for (j = 0; j < csrow->nr_channels; j++) { + struct dimm_info *dimm = csrow->channels[j]->dimm; + n += dimm->nr_pages; + } + if (n == 0) continue; - debugf3("MC%d: %s(): first(0x%lx) page(0x%lx) last(0x%lx) " - "mask(0x%lx)\n", mci->mc_idx, __func__, - csrow->first_page, page, csrow->last_page, - csrow->page_mask); + edac_dbg(3, "MC%d: first(0x%lx) page(0x%lx) last(0x%lx) mask(0x%lx)\n", + mci->mc_idx, + csrow->first_page, page, csrow->last_page, + csrow->page_mask); if ((page >= csrow->first_page) && (page <= csrow->last_page) && @@ -670,249 +926,381 @@ int edac_mc_find_csrow_by_page(struct mem_ctl_info *mci, unsigned long page) } EXPORT_SYMBOL_GPL(edac_mc_find_csrow_by_page); -/* FIXME - setable log (warning/emerg) levels */ -/* FIXME - integrate with evlog: http://evlog.sourceforge.net/ */ -void edac_mc_handle_ce(struct mem_ctl_info *mci, - unsigned long page_frame_number, - unsigned long offset_in_page, unsigned long syndrome, - int row, int channel, const char *msg) +const char *edac_layer_name[] = { + [EDAC_MC_LAYER_BRANCH] = "branch", + [EDAC_MC_LAYER_CHANNEL] = "channel", + [EDAC_MC_LAYER_SLOT] = "slot", + [EDAC_MC_LAYER_CHIP_SELECT] = "csrow", + [EDAC_MC_LAYER_ALL_MEM] = "memory", +}; +EXPORT_SYMBOL_GPL(edac_layer_name); + +static void edac_inc_ce_error(struct mem_ctl_info *mci, + bool enable_per_layer_report, + const int pos[EDAC_MAX_LAYERS], + const u16 count) { - unsigned long remapped_page; + int i, index = 0; - debugf3("MC%d: %s()\n", mci->mc_idx, __func__); + mci->ce_mc += count; - /* FIXME - maybe make panic on INTERNAL ERROR an option */ - if (row >= mci->nr_csrows || row < 0) { - /* something is wrong */ - edac_mc_printk(mci, KERN_ERR, - "INTERNAL ERROR: row out of range " - "(%d >= %d)\n", row, mci->nr_csrows); - edac_mc_handle_ce_no_info(mci, "INTERNAL ERROR"); + if (!enable_per_layer_report) { + mci->ce_noinfo_count += count; return; } - if (channel >= mci->csrows[row].nr_channels || channel < 0) { - /* something is wrong */ - edac_mc_printk(mci, KERN_ERR, - "INTERNAL ERROR: channel out of range " - "(%d >= %d)\n", channel, - mci->csrows[row].nr_channels); - edac_mc_handle_ce_no_info(mci, "INTERNAL ERROR"); + for (i = 0; i < mci->n_layers; i++) { + if (pos[i] < 0) + break; + index += pos[i]; + mci->ce_per_layer[i][index] += count; + + if (i < mci->n_layers - 1) + index *= mci->layers[i + 1].size; + } +} + +static void edac_inc_ue_error(struct mem_ctl_info *mci, + bool enable_per_layer_report, + const int pos[EDAC_MAX_LAYERS], + const u16 count) +{ + int i, index = 0; + + mci->ue_mc += count; + + if (!enable_per_layer_report) { + mci->ce_noinfo_count += count; return; } - if (edac_mc_get_log_ce()) - /* FIXME - put in DIMM location */ - edac_mc_printk(mci, KERN_WARNING, - "CE page 0x%lx, offset 0x%lx, grain %d, syndrome " - "0x%lx, row %d, channel %d, label \"%s\": %s\n", - page_frame_number, offset_in_page, - mci->csrows[row].grain, syndrome, row, channel, - mci->csrows[row].channels[channel].label, msg); + for (i = 0; i < mci->n_layers; i++) { + if (pos[i] < 0) + break; + index += pos[i]; + mci->ue_per_layer[i][index] += count; - mci->ce_count++; - mci->csrows[row].ce_count++; - mci->csrows[row].channels[channel].ce_count++; + if (i < mci->n_layers - 1) + index *= mci->layers[i + 1].size; + } +} - if (mci->scrub_mode & SCRUB_SW_SRC) { +static void edac_ce_error(struct mem_ctl_info *mci, + const u16 error_count, + const int pos[EDAC_MAX_LAYERS], + const char *msg, + const char *location, + const char *label, + const char *detail, + const char *other_detail, + const bool enable_per_layer_report, + const unsigned long page_frame_number, + const unsigned long offset_in_page, + long grain) +{ + unsigned long remapped_page; + char *msg_aux = ""; + + if (*msg) + msg_aux = " "; + + if (edac_mc_get_log_ce()) { + if (other_detail && *other_detail) + edac_mc_printk(mci, KERN_WARNING, + "%d CE %s%son %s (%s %s - %s)\n", + error_count, msg, msg_aux, label, + location, detail, other_detail); + else + edac_mc_printk(mci, KERN_WARNING, + "%d CE %s%son %s (%s %s)\n", + error_count, msg, msg_aux, label, + location, detail); + } + edac_inc_ce_error(mci, enable_per_layer_report, pos, error_count); + + if (mci->scrub_mode == SCRUB_SW_SRC) { /* - * Some MC's can remap memory so that it is still available - * at a different address when PCI devices map into memory. - * MC's that can't do this lose the memory where PCI devices - * are mapped. This mapping is MC dependent and so we call - * back into the MC driver for it to map the MC page to - * a physical (CPU) page which can then be mapped to a virtual - * page - which can then be scrubbed. - */ + * Some memory controllers (called MCs below) can remap + * memory so that it is still available at a different + * address when PCI devices map into memory. + * MC's that can't do this, lose the memory where PCI + * devices are mapped. This mapping is MC-dependent + * and so we call back into the MC driver for it to + * map the MC page to a physical (CPU) page which can + * then be mapped to a virtual page - which can then + * be scrubbed. + */ remapped_page = mci->ctl_page_to_phys ? mci->ctl_page_to_phys(mci, page_frame_number) : page_frame_number; - edac_mc_scrub_block(remapped_page, offset_in_page, - mci->csrows[row].grain); + edac_mc_scrub_block(remapped_page, + offset_in_page, grain); } } -EXPORT_SYMBOL_GPL(edac_mc_handle_ce); - -void edac_mc_handle_ce_no_info(struct mem_ctl_info *mci, const char *msg) -{ - if (edac_mc_get_log_ce()) - edac_mc_printk(mci, KERN_WARNING, - "CE - no information available: %s\n", msg); - - mci->ce_noinfo_count++; - mci->ce_count++; -} -EXPORT_SYMBOL_GPL(edac_mc_handle_ce_no_info); -void edac_mc_handle_ue(struct mem_ctl_info *mci, - unsigned long page_frame_number, - unsigned long offset_in_page, int row, const char *msg) +static void edac_ue_error(struct mem_ctl_info *mci, + const u16 error_count, + const int pos[EDAC_MAX_LAYERS], + const char *msg, + const char *location, + const char *label, + const char *detail, + const char *other_detail, + const bool enable_per_layer_report) { - int len = EDAC_MC_LABEL_LEN * 4; - char labels[len + 1]; - char *pos = labels; - int chan; - int chars; - - debugf3("MC%d: %s()\n", mci->mc_idx, __func__); - - /* FIXME - maybe make panic on INTERNAL ERROR an option */ - if (row >= mci->nr_csrows || row < 0) { - /* something is wrong */ - edac_mc_printk(mci, KERN_ERR, - "INTERNAL ERROR: row out of range " - "(%d >= %d)\n", row, mci->nr_csrows); - edac_mc_handle_ue_no_info(mci, "INTERNAL ERROR"); - return; + char *msg_aux = ""; + + if (*msg) + msg_aux = " "; + + if (edac_mc_get_log_ue()) { + if (other_detail && *other_detail) + edac_mc_printk(mci, KERN_WARNING, + "%d UE %s%son %s (%s %s - %s)\n", + error_count, msg, msg_aux, label, + location, detail, other_detail); + else + edac_mc_printk(mci, KERN_WARNING, + "%d UE %s%son %s (%s %s)\n", + error_count, msg, msg_aux, label, + location, detail); } - chars = snprintf(pos, len + 1, "%s", - mci->csrows[row].channels[0].label); - len -= chars; - pos += chars; - - for (chan = 1; (chan < mci->csrows[row].nr_channels) && (len > 0); - chan++) { - chars = snprintf(pos, len + 1, ":%s", - mci->csrows[row].channels[chan].label); - len -= chars; - pos += chars; + if (edac_mc_get_panic_on_ue()) { + if (other_detail && *other_detail) + panic("UE %s%son %s (%s%s - %s)\n", + msg, msg_aux, label, location, detail, other_detail); + else + panic("UE %s%son %s (%s%s)\n", + msg, msg_aux, label, location, detail); } - if (edac_mc_get_log_ue()) - edac_mc_printk(mci, KERN_EMERG, - "UE page 0x%lx, offset 0x%lx, grain %d, row %d, " - "labels \"%s\": %s\n", page_frame_number, - offset_in_page, mci->csrows[row].grain, row, - labels, msg); - - if (edac_mc_get_panic_on_ue()) - panic("EDAC MC%d: UE page 0x%lx, offset 0x%lx, grain %d, " - "row %d, labels \"%s\": %s\n", mci->mc_idx, - page_frame_number, offset_in_page, - mci->csrows[row].grain, row, labels, msg); - - mci->ue_count++; - mci->csrows[row].ue_count++; + edac_inc_ue_error(mci, enable_per_layer_report, pos, error_count); } -EXPORT_SYMBOL_GPL(edac_mc_handle_ue); -void edac_mc_handle_ue_no_info(struct mem_ctl_info *mci, const char *msg) +/** + * edac_raw_mc_handle_error - reports a memory event to userspace without doing + * anything to discover the error location + * + * @type: severity of the error (CE/UE/Fatal) + * @mci: a struct mem_ctl_info pointer + * @e: error description + * + * This raw function is used internally by edac_mc_handle_error(). It should + * only be called directly when the hardware error come directly from BIOS, + * like in the case of APEI GHES driver. + */ +void edac_raw_mc_handle_error(const enum hw_event_mc_err_type type, + struct mem_ctl_info *mci, + struct edac_raw_error_desc *e) { - if (edac_mc_get_panic_on_ue()) - panic("EDAC MC%d: Uncorrected Error", mci->mc_idx); + char detail[80]; + int pos[EDAC_MAX_LAYERS] = { e->top_layer, e->mid_layer, e->low_layer }; + + /* Memory type dependent details about the error */ + if (type == HW_EVENT_ERR_CORRECTED) { + snprintf(detail, sizeof(detail), + "page:0x%lx offset:0x%lx grain:%ld syndrome:0x%lx", + e->page_frame_number, e->offset_in_page, + e->grain, e->syndrome); + edac_ce_error(mci, e->error_count, pos, e->msg, e->location, e->label, + detail, e->other_detail, e->enable_per_layer_report, + e->page_frame_number, e->offset_in_page, e->grain); + } else { + snprintf(detail, sizeof(detail), + "page:0x%lx offset:0x%lx grain:%ld", + e->page_frame_number, e->offset_in_page, e->grain); + + edac_ue_error(mci, e->error_count, pos, e->msg, e->location, e->label, + detail, e->other_detail, e->enable_per_layer_report); + } + - if (edac_mc_get_log_ue()) - edac_mc_printk(mci, KERN_WARNING, - "UE - no information available: %s\n", msg); - mci->ue_noinfo_count++; - mci->ue_count++; } -EXPORT_SYMBOL_GPL(edac_mc_handle_ue_no_info); +EXPORT_SYMBOL_GPL(edac_raw_mc_handle_error); -/************************************************************* - * On Fully Buffered DIMM modules, this help function is - * called to process UE events +/** + * edac_mc_handle_error - reports a memory event to userspace + * + * @type: severity of the error (CE/UE/Fatal) + * @mci: a struct mem_ctl_info pointer + * @error_count: Number of errors of the same type + * @page_frame_number: mem page where the error occurred + * @offset_in_page: offset of the error inside the page + * @syndrome: ECC syndrome + * @top_layer: Memory layer[0] position + * @mid_layer: Memory layer[1] position + * @low_layer: Memory layer[2] position + * @msg: Message meaningful to the end users that + * explains the event + * @other_detail: Technical details about the event that + * may help hardware manufacturers and + * EDAC developers to analyse the event */ -void edac_mc_handle_fbd_ue(struct mem_ctl_info *mci, - unsigned int csrow, - unsigned int channela, - unsigned int channelb, char *msg) +void edac_mc_handle_error(const enum hw_event_mc_err_type type, + struct mem_ctl_info *mci, + const u16 error_count, + const unsigned long page_frame_number, + const unsigned long offset_in_page, + const unsigned long syndrome, + const int top_layer, + const int mid_layer, + const int low_layer, + const char *msg, + const char *other_detail) { - int len = EDAC_MC_LABEL_LEN * 4; - char labels[len + 1]; - char *pos = labels; - int chars; + char *p; + int row = -1, chan = -1; + int pos[EDAC_MAX_LAYERS] = { top_layer, mid_layer, low_layer }; + int i, n_labels = 0; + u8 grain_bits; + struct edac_raw_error_desc *e = &mci->error_desc; + + edac_dbg(3, "MC%d\n", mci->mc_idx); + + /* Fills the error report buffer */ + memset(e, 0, sizeof (*e)); + e->error_count = error_count; + e->top_layer = top_layer; + e->mid_layer = mid_layer; + e->low_layer = low_layer; + e->page_frame_number = page_frame_number; + e->offset_in_page = offset_in_page; + e->syndrome = syndrome; + e->msg = msg; + e->other_detail = other_detail; - if (csrow >= mci->nr_csrows) { - /* something is wrong */ - edac_mc_printk(mci, KERN_ERR, - "INTERNAL ERROR: row out of range (%d >= %d)\n", - csrow, mci->nr_csrows); - edac_mc_handle_ue_no_info(mci, "INTERNAL ERROR"); - return; + /* + * Check if the event report is consistent and if the memory + * location is known. If it is known, enable_per_layer_report will be + * true, the DIMM(s) label info will be filled and the per-layer + * error counters will be incremented. + */ + for (i = 0; i < mci->n_layers; i++) { + if (pos[i] >= (int)mci->layers[i].size) { + + edac_mc_printk(mci, KERN_ERR, + "INTERNAL ERROR: %s value is out of range (%d >= %d)\n", + edac_layer_name[mci->layers[i].type], + pos[i], mci->layers[i].size); + /* + * Instead of just returning it, let's use what's + * known about the error. The increment routines and + * the DIMM filter logic will do the right thing by + * pointing the likely damaged DIMMs. + */ + pos[i] = -1; + } + if (pos[i] >= 0) + e->enable_per_layer_report = true; } - if (channela >= mci->csrows[csrow].nr_channels) { - /* something is wrong */ - edac_mc_printk(mci, KERN_ERR, - "INTERNAL ERROR: channel-a out of range " - "(%d >= %d)\n", - channela, mci->csrows[csrow].nr_channels); - edac_mc_handle_ue_no_info(mci, "INTERNAL ERROR"); - return; - } + /* + * Get the dimm label/grain that applies to the match criteria. + * As the error algorithm may not be able to point to just one memory + * stick, the logic here will get all possible labels that could + * pottentially be affected by the error. + * On FB-DIMM memory controllers, for uncorrected errors, it is common + * to have only the MC channel and the MC dimm (also called "branch") + * but the channel is not known, as the memory is arranged in pairs, + * where each memory belongs to a separate channel within the same + * branch. + */ + p = e->label; + *p = '\0'; - if (channelb >= mci->csrows[csrow].nr_channels) { - /* something is wrong */ - edac_mc_printk(mci, KERN_ERR, - "INTERNAL ERROR: channel-b out of range " - "(%d >= %d)\n", - channelb, mci->csrows[csrow].nr_channels); - edac_mc_handle_ue_no_info(mci, "INTERNAL ERROR"); - return; - } + for (i = 0; i < mci->tot_dimms; i++) { + struct dimm_info *dimm = mci->dimms[i]; - mci->ue_count++; - mci->csrows[csrow].ue_count++; + if (top_layer >= 0 && top_layer != dimm->location[0]) + continue; + if (mid_layer >= 0 && mid_layer != dimm->location[1]) + continue; + if (low_layer >= 0 && low_layer != dimm->location[2]) + continue; - /* Generate the DIMM labels from the specified channels */ - chars = snprintf(pos, len + 1, "%s", - mci->csrows[csrow].channels[channela].label); - len -= chars; - pos += chars; - chars = snprintf(pos, len + 1, "-%s", - mci->csrows[csrow].channels[channelb].label); + /* get the max grain, over the error match range */ + if (dimm->grain > e->grain) + e->grain = dimm->grain; - if (edac_mc_get_log_ue()) - edac_mc_printk(mci, KERN_EMERG, - "UE row %d, channel-a= %d channel-b= %d " - "labels \"%s\": %s\n", csrow, channela, channelb, - labels, msg); + /* + * If the error is memory-controller wide, there's no need to + * seek for the affected DIMMs because the whole + * channel/memory controller/... may be affected. + * Also, don't show errors for empty DIMM slots. + */ + if (e->enable_per_layer_report && dimm->nr_pages) { + if (n_labels >= EDAC_MAX_LABELS) { + e->enable_per_layer_report = false; + break; + } + n_labels++; + if (p != e->label) { + strcpy(p, OTHER_LABEL); + p += strlen(OTHER_LABEL); + } + strcpy(p, dimm->label); + p += strlen(p); + *p = '\0'; + + /* + * get csrow/channel of the DIMM, in order to allow + * incrementing the compat API counters + */ + edac_dbg(4, "%s csrows map: (%d,%d)\n", + mci->csbased ? "rank" : "dimm", + dimm->csrow, dimm->cschannel); + if (row == -1) + row = dimm->csrow; + else if (row >= 0 && row != dimm->csrow) + row = -2; + + if (chan == -1) + chan = dimm->cschannel; + else if (chan >= 0 && chan != dimm->cschannel) + chan = -2; + } + } - if (edac_mc_get_panic_on_ue()) - panic("UE row %d, channel-a= %d channel-b= %d " - "labels \"%s\": %s\n", csrow, channela, - channelb, labels, msg); -} -EXPORT_SYMBOL(edac_mc_handle_fbd_ue); + if (!e->enable_per_layer_report) { + strcpy(e->label, "any memory"); + } else { + edac_dbg(4, "csrow/channel to increment: (%d,%d)\n", row, chan); + if (p == e->label) + strcpy(e->label, "unknown memory"); + if (type == HW_EVENT_ERR_CORRECTED) { + if (row >= 0) { + mci->csrows[row]->ce_count += error_count; + if (chan >= 0) + mci->csrows[row]->channels[chan]->ce_count += error_count; + } + } else + if (row >= 0) + mci->csrows[row]->ue_count += error_count; + } -/************************************************************* - * On Fully Buffered DIMM modules, this help function is - * called to process CE events - */ -void edac_mc_handle_fbd_ce(struct mem_ctl_info *mci, - unsigned int csrow, unsigned int channel, char *msg) -{ + /* Fill the RAM location data */ + p = e->location; - /* Ensure boundary values */ - if (csrow >= mci->nr_csrows) { - /* something is wrong */ - edac_mc_printk(mci, KERN_ERR, - "INTERNAL ERROR: row out of range (%d >= %d)\n", - csrow, mci->nr_csrows); - edac_mc_handle_ce_no_info(mci, "INTERNAL ERROR"); - return; - } - if (channel >= mci->csrows[csrow].nr_channels) { - /* something is wrong */ - edac_mc_printk(mci, KERN_ERR, - "INTERNAL ERROR: channel out of range (%d >= %d)\n", - channel, mci->csrows[csrow].nr_channels); - edac_mc_handle_ce_no_info(mci, "INTERNAL ERROR"); - return; + for (i = 0; i < mci->n_layers; i++) { + if (pos[i] < 0) + continue; + + p += sprintf(p, "%s:%d ", + edac_layer_name[mci->layers[i].type], + pos[i]); } + if (p > e->location) + *(p - 1) = '\0'; - if (edac_mc_get_log_ce()) - /* FIXME - put in DIMM location */ - edac_mc_printk(mci, KERN_WARNING, - "CE row %d, channel %d, label \"%s\": %s\n", - csrow, channel, - mci->csrows[csrow].channels[channel].label, msg); + /* Report the error via the trace interface */ + grain_bits = fls_long(e->grain) + 1; + trace_mc_event(type, e->msg, e->label, e->error_count, + mci->mc_idx, e->top_layer, e->mid_layer, e->low_layer, + PAGES_TO_MiB(e->page_frame_number) | e->offset_in_page, + grain_bits, e->syndrome, e->other_detail); - mci->ce_count++; - mci->csrows[csrow].ce_count++; - mci->csrows[csrow].channels[channel].ce_count++; + edac_raw_mc_handle_error(type, mci, e); } -EXPORT_SYMBOL(edac_mc_handle_fbd_ce); +EXPORT_SYMBOL_GPL(edac_mc_handle_error); diff --git a/drivers/edac/edac_mc_sysfs.c b/drivers/edac/edac_mc_sysfs.c index d56e63477d5..01fae8289cf 100644 --- a/drivers/edac/edac_mc_sysfs.c +++ b/drivers/edac/edac_mc_sysfs.c @@ -7,17 +7,21 @@ * * Written Doug Thompson <norsk5@xmission.com> www.softwarebitmaker.com * + * (c) 2012-2013 - Mauro Carvalho Chehab + * The entire API were re-written, and ported to use struct device + * */ #include <linux/ctype.h> #include <linux/slab.h> #include <linux/edac.h> #include <linux/bug.h> +#include <linux/pm_runtime.h> +#include <linux/uaccess.h> #include "edac_core.h" #include "edac_module.h" - /* MC EDAC Controls, setable by module parameter, and sysfs */ static int edac_mc_log_ue = 1; static int edac_mc_log_ce = 1; @@ -48,16 +52,20 @@ int edac_mc_get_poll_msec(void) static int edac_set_poll_msec(const char *val, struct kernel_param *kp) { - long l; + unsigned long l; int ret; if (!val) return -EINVAL; - ret = strict_strtol(val, 0, &l); - if (ret == -EINVAL || ((int)l != l)) + ret = kstrtoul(val, 0, &l); + if (ret) + return ret; + + if (l < 1000) return -EINVAL; - *((int *)kp->arg) = l; + + *((unsigned long *)kp->arg) = l; /* notify edac_mc engine to reset the poll period */ edac_mc_reset_delay_period(l); @@ -78,10 +86,12 @@ module_param_call(edac_mc_poll_msec, edac_set_poll_msec, param_get_int, &edac_mc_poll_msec, 0644); MODULE_PARM_DESC(edac_mc_poll_msec, "Polling period in milliseconds"); +static struct device *mci_pdev; + /* * various constants for Memory Controllers */ -static const char *mem_types[] = { +static const char * const mem_types[] = { [MEM_EMPTY] = "Empty", [MEM_RESERVED] = "Reserved", [MEM_UNKNOWN] = "Unknown", @@ -101,7 +111,7 @@ static const char *mem_types[] = { [MEM_RDDR3] = "Registered-DDR3" }; -static const char *dev_types[] = { +static const char * const dev_types[] = { [DEV_UNKNOWN] = "Unknown", [DEV_X1] = "x1", [DEV_X2] = "x2", @@ -112,7 +122,7 @@ static const char *dev_types[] = { [DEV_X64] = "x64" }; -static const char *edac_caps[] = { +static const char * const edac_caps[] = { [EDAC_UNKNOWN] = "Unknown", [EDAC_NONE] = "None", [EDAC_RESERVED] = "Reserved", @@ -125,311 +135,530 @@ static const char *edac_caps[] = { [EDAC_S16ECD16ED] = "S16ECD16ED" }; -/* EDAC sysfs CSROW data structures and methods +#ifdef CONFIG_EDAC_LEGACY_SYSFS +/* + * EDAC sysfs CSROW data structures and methods + */ + +#define to_csrow(k) container_of(k, struct csrow_info, dev) + +/* + * We need it to avoid namespace conflicts between the legacy API + * and the per-dimm/per-rank one */ +#define DEVICE_ATTR_LEGACY(_name, _mode, _show, _store) \ + static struct device_attribute dev_attr_legacy_##_name = __ATTR(_name, _mode, _show, _store) + +struct dev_ch_attribute { + struct device_attribute attr; + int channel; +}; + +#define DEVICE_CHANNEL(_name, _mode, _show, _store, _var) \ + struct dev_ch_attribute dev_attr_legacy_##_name = \ + { __ATTR(_name, _mode, _show, _store), (_var) } + +#define to_channel(k) (container_of(k, struct dev_ch_attribute, attr)->channel) /* Set of more default csrow<id> attribute show/store functions */ -static ssize_t csrow_ue_count_show(struct csrow_info *csrow, char *data, - int private) +static ssize_t csrow_ue_count_show(struct device *dev, + struct device_attribute *mattr, char *data) { + struct csrow_info *csrow = to_csrow(dev); + return sprintf(data, "%u\n", csrow->ue_count); } -static ssize_t csrow_ce_count_show(struct csrow_info *csrow, char *data, - int private) +static ssize_t csrow_ce_count_show(struct device *dev, + struct device_attribute *mattr, char *data) { + struct csrow_info *csrow = to_csrow(dev); + return sprintf(data, "%u\n", csrow->ce_count); } -static ssize_t csrow_size_show(struct csrow_info *csrow, char *data, - int private) +static ssize_t csrow_size_show(struct device *dev, + struct device_attribute *mattr, char *data) { - return sprintf(data, "%u\n", PAGES_TO_MiB(csrow->nr_pages)); + struct csrow_info *csrow = to_csrow(dev); + int i; + u32 nr_pages = 0; + + for (i = 0; i < csrow->nr_channels; i++) + nr_pages += csrow->channels[i]->dimm->nr_pages; + return sprintf(data, "%u\n", PAGES_TO_MiB(nr_pages)); } -static ssize_t csrow_mem_type_show(struct csrow_info *csrow, char *data, - int private) +static ssize_t csrow_mem_type_show(struct device *dev, + struct device_attribute *mattr, char *data) { - return sprintf(data, "%s\n", mem_types[csrow->mtype]); + struct csrow_info *csrow = to_csrow(dev); + + return sprintf(data, "%s\n", mem_types[csrow->channels[0]->dimm->mtype]); } -static ssize_t csrow_dev_type_show(struct csrow_info *csrow, char *data, - int private) +static ssize_t csrow_dev_type_show(struct device *dev, + struct device_attribute *mattr, char *data) { - return sprintf(data, "%s\n", dev_types[csrow->dtype]); + struct csrow_info *csrow = to_csrow(dev); + + return sprintf(data, "%s\n", dev_types[csrow->channels[0]->dimm->dtype]); } -static ssize_t csrow_edac_mode_show(struct csrow_info *csrow, char *data, - int private) +static ssize_t csrow_edac_mode_show(struct device *dev, + struct device_attribute *mattr, + char *data) { - return sprintf(data, "%s\n", edac_caps[csrow->edac_mode]); + struct csrow_info *csrow = to_csrow(dev); + + return sprintf(data, "%s\n", edac_caps[csrow->channels[0]->dimm->edac_mode]); } /* show/store functions for DIMM Label attributes */ -static ssize_t channel_dimm_label_show(struct csrow_info *csrow, - char *data, int channel) +static ssize_t channel_dimm_label_show(struct device *dev, + struct device_attribute *mattr, + char *data) { + struct csrow_info *csrow = to_csrow(dev); + unsigned chan = to_channel(mattr); + struct rank_info *rank = csrow->channels[chan]; + /* if field has not been initialized, there is nothing to send */ - if (!csrow->channels[channel].label[0]) + if (!rank->dimm->label[0]) return 0; return snprintf(data, EDAC_MC_LABEL_LEN, "%s\n", - csrow->channels[channel].label); + rank->dimm->label); } -static ssize_t channel_dimm_label_store(struct csrow_info *csrow, - const char *data, - size_t count, int channel) +static ssize_t channel_dimm_label_store(struct device *dev, + struct device_attribute *mattr, + const char *data, size_t count) { + struct csrow_info *csrow = to_csrow(dev); + unsigned chan = to_channel(mattr); + struct rank_info *rank = csrow->channels[chan]; + ssize_t max_size = 0; max_size = min((ssize_t) count, (ssize_t) EDAC_MC_LABEL_LEN - 1); - strncpy(csrow->channels[channel].label, data, max_size); - csrow->channels[channel].label[max_size] = '\0'; + strncpy(rank->dimm->label, data, max_size); + rank->dimm->label[max_size] = '\0'; return max_size; } /* show function for dynamic chX_ce_count attribute */ -static ssize_t channel_ce_count_show(struct csrow_info *csrow, - char *data, int channel) +static ssize_t channel_ce_count_show(struct device *dev, + struct device_attribute *mattr, char *data) { - return sprintf(data, "%u\n", csrow->channels[channel].ce_count); + struct csrow_info *csrow = to_csrow(dev); + unsigned chan = to_channel(mattr); + struct rank_info *rank = csrow->channels[chan]; + + return sprintf(data, "%u\n", rank->ce_count); } -/* csrow specific attribute structure */ -struct csrowdev_attribute { - struct attribute attr; - ssize_t(*show) (struct csrow_info *, char *, int); - ssize_t(*store) (struct csrow_info *, const char *, size_t, int); - int private; -}; +/* cwrow<id>/attribute files */ +DEVICE_ATTR_LEGACY(size_mb, S_IRUGO, csrow_size_show, NULL); +DEVICE_ATTR_LEGACY(dev_type, S_IRUGO, csrow_dev_type_show, NULL); +DEVICE_ATTR_LEGACY(mem_type, S_IRUGO, csrow_mem_type_show, NULL); +DEVICE_ATTR_LEGACY(edac_mode, S_IRUGO, csrow_edac_mode_show, NULL); +DEVICE_ATTR_LEGACY(ue_count, S_IRUGO, csrow_ue_count_show, NULL); +DEVICE_ATTR_LEGACY(ce_count, S_IRUGO, csrow_ce_count_show, NULL); -#define to_csrow(k) container_of(k, struct csrow_info, kobj) -#define to_csrowdev_attr(a) container_of(a, struct csrowdev_attribute, attr) +/* default attributes of the CSROW<id> object */ +static struct attribute *csrow_attrs[] = { + &dev_attr_legacy_dev_type.attr, + &dev_attr_legacy_mem_type.attr, + &dev_attr_legacy_edac_mode.attr, + &dev_attr_legacy_size_mb.attr, + &dev_attr_legacy_ue_count.attr, + &dev_attr_legacy_ce_count.attr, + NULL, +}; -/* Set of show/store higher level functions for default csrow attributes */ -static ssize_t csrowdev_show(struct kobject *kobj, - struct attribute *attr, char *buffer) -{ - struct csrow_info *csrow = to_csrow(kobj); - struct csrowdev_attribute *csrowdev_attr = to_csrowdev_attr(attr); +static struct attribute_group csrow_attr_grp = { + .attrs = csrow_attrs, +}; - if (csrowdev_attr->show) - return csrowdev_attr->show(csrow, - buffer, csrowdev_attr->private); - return -EIO; -} +static const struct attribute_group *csrow_attr_groups[] = { + &csrow_attr_grp, + NULL +}; -static ssize_t csrowdev_store(struct kobject *kobj, struct attribute *attr, - const char *buffer, size_t count) +static void csrow_attr_release(struct device *dev) { - struct csrow_info *csrow = to_csrow(kobj); - struct csrowdev_attribute *csrowdev_attr = to_csrowdev_attr(attr); - - if (csrowdev_attr->store) - return csrowdev_attr->store(csrow, - buffer, - count, csrowdev_attr->private); - return -EIO; -} + struct csrow_info *csrow = container_of(dev, struct csrow_info, dev); -static const struct sysfs_ops csrowfs_ops = { - .show = csrowdev_show, - .store = csrowdev_store -}; + edac_dbg(1, "Releasing csrow device %s\n", dev_name(dev)); + kfree(csrow); +} -#define CSROWDEV_ATTR(_name,_mode,_show,_store,_private) \ -static struct csrowdev_attribute attr_##_name = { \ - .attr = {.name = __stringify(_name), .mode = _mode }, \ - .show = _show, \ - .store = _store, \ - .private = _private, \ +static struct device_type csrow_attr_type = { + .groups = csrow_attr_groups, + .release = csrow_attr_release, }; -/* default cwrow<id>/attribute files */ -CSROWDEV_ATTR(size_mb, S_IRUGO, csrow_size_show, NULL, 0); -CSROWDEV_ATTR(dev_type, S_IRUGO, csrow_dev_type_show, NULL, 0); -CSROWDEV_ATTR(mem_type, S_IRUGO, csrow_mem_type_show, NULL, 0); -CSROWDEV_ATTR(edac_mode, S_IRUGO, csrow_edac_mode_show, NULL, 0); -CSROWDEV_ATTR(ue_count, S_IRUGO, csrow_ue_count_show, NULL, 0); -CSROWDEV_ATTR(ce_count, S_IRUGO, csrow_ce_count_show, NULL, 0); +/* + * possible dynamic channel DIMM Label attribute files + * + */ -/* default attributes of the CSROW<id> object */ -static struct csrowdev_attribute *default_csrow_attr[] = { - &attr_dev_type, - &attr_mem_type, - &attr_edac_mode, - &attr_size_mb, - &attr_ue_count, - &attr_ce_count, - NULL, -}; +#define EDAC_NR_CHANNELS 6 -/* possible dynamic channel DIMM Label attribute files */ -CSROWDEV_ATTR(ch0_dimm_label, S_IRUGO | S_IWUSR, +DEVICE_CHANNEL(ch0_dimm_label, S_IRUGO | S_IWUSR, channel_dimm_label_show, channel_dimm_label_store, 0); -CSROWDEV_ATTR(ch1_dimm_label, S_IRUGO | S_IWUSR, +DEVICE_CHANNEL(ch1_dimm_label, S_IRUGO | S_IWUSR, channel_dimm_label_show, channel_dimm_label_store, 1); -CSROWDEV_ATTR(ch2_dimm_label, S_IRUGO | S_IWUSR, +DEVICE_CHANNEL(ch2_dimm_label, S_IRUGO | S_IWUSR, channel_dimm_label_show, channel_dimm_label_store, 2); -CSROWDEV_ATTR(ch3_dimm_label, S_IRUGO | S_IWUSR, +DEVICE_CHANNEL(ch3_dimm_label, S_IRUGO | S_IWUSR, channel_dimm_label_show, channel_dimm_label_store, 3); -CSROWDEV_ATTR(ch4_dimm_label, S_IRUGO | S_IWUSR, +DEVICE_CHANNEL(ch4_dimm_label, S_IRUGO | S_IWUSR, channel_dimm_label_show, channel_dimm_label_store, 4); -CSROWDEV_ATTR(ch5_dimm_label, S_IRUGO | S_IWUSR, +DEVICE_CHANNEL(ch5_dimm_label, S_IRUGO | S_IWUSR, channel_dimm_label_show, channel_dimm_label_store, 5); /* Total possible dynamic DIMM Label attribute file table */ -static struct csrowdev_attribute *dynamic_csrow_dimm_attr[] = { - &attr_ch0_dimm_label, - &attr_ch1_dimm_label, - &attr_ch2_dimm_label, - &attr_ch3_dimm_label, - &attr_ch4_dimm_label, - &attr_ch5_dimm_label +static struct device_attribute *dynamic_csrow_dimm_attr[] = { + &dev_attr_legacy_ch0_dimm_label.attr, + &dev_attr_legacy_ch1_dimm_label.attr, + &dev_attr_legacy_ch2_dimm_label.attr, + &dev_attr_legacy_ch3_dimm_label.attr, + &dev_attr_legacy_ch4_dimm_label.attr, + &dev_attr_legacy_ch5_dimm_label.attr }; /* possible dynamic channel ce_count attribute files */ -CSROWDEV_ATTR(ch0_ce_count, S_IRUGO | S_IWUSR, channel_ce_count_show, NULL, 0); -CSROWDEV_ATTR(ch1_ce_count, S_IRUGO | S_IWUSR, channel_ce_count_show, NULL, 1); -CSROWDEV_ATTR(ch2_ce_count, S_IRUGO | S_IWUSR, channel_ce_count_show, NULL, 2); -CSROWDEV_ATTR(ch3_ce_count, S_IRUGO | S_IWUSR, channel_ce_count_show, NULL, 3); -CSROWDEV_ATTR(ch4_ce_count, S_IRUGO | S_IWUSR, channel_ce_count_show, NULL, 4); -CSROWDEV_ATTR(ch5_ce_count, S_IRUGO | S_IWUSR, channel_ce_count_show, NULL, 5); +DEVICE_CHANNEL(ch0_ce_count, S_IRUGO, + channel_ce_count_show, NULL, 0); +DEVICE_CHANNEL(ch1_ce_count, S_IRUGO, + channel_ce_count_show, NULL, 1); +DEVICE_CHANNEL(ch2_ce_count, S_IRUGO, + channel_ce_count_show, NULL, 2); +DEVICE_CHANNEL(ch3_ce_count, S_IRUGO, + channel_ce_count_show, NULL, 3); +DEVICE_CHANNEL(ch4_ce_count, S_IRUGO, + channel_ce_count_show, NULL, 4); +DEVICE_CHANNEL(ch5_ce_count, S_IRUGO, + channel_ce_count_show, NULL, 5); /* Total possible dynamic ce_count attribute file table */ -static struct csrowdev_attribute *dynamic_csrow_ce_count_attr[] = { - &attr_ch0_ce_count, - &attr_ch1_ce_count, - &attr_ch2_ce_count, - &attr_ch3_ce_count, - &attr_ch4_ce_count, - &attr_ch5_ce_count +static struct device_attribute *dynamic_csrow_ce_count_attr[] = { + &dev_attr_legacy_ch0_ce_count.attr, + &dev_attr_legacy_ch1_ce_count.attr, + &dev_attr_legacy_ch2_ce_count.attr, + &dev_attr_legacy_ch3_ce_count.attr, + &dev_attr_legacy_ch4_ce_count.attr, + &dev_attr_legacy_ch5_ce_count.attr }; -#define EDAC_NR_CHANNELS 6 +static inline int nr_pages_per_csrow(struct csrow_info *csrow) +{ + int chan, nr_pages = 0; + + for (chan = 0; chan < csrow->nr_channels; chan++) + nr_pages += csrow->channels[chan]->dimm->nr_pages; -/* Create dynamic CHANNEL files, indexed by 'chan', under specifed CSROW */ -static int edac_create_channel_files(struct kobject *kobj, int chan) + return nr_pages; +} + +/* Create a CSROW object under specifed edac_mc_device */ +static int edac_create_csrow_object(struct mem_ctl_info *mci, + struct csrow_info *csrow, int index) { - int err = -ENODEV; + int err, chan; + + if (csrow->nr_channels >= EDAC_NR_CHANNELS) + return -ENODEV; - if (chan >= EDAC_NR_CHANNELS) + csrow->dev.type = &csrow_attr_type; + csrow->dev.bus = mci->bus; + device_initialize(&csrow->dev); + csrow->dev.parent = &mci->dev; + csrow->mci = mci; + dev_set_name(&csrow->dev, "csrow%d", index); + dev_set_drvdata(&csrow->dev, csrow); + + edac_dbg(0, "creating (virtual) csrow node %s\n", + dev_name(&csrow->dev)); + + err = device_add(&csrow->dev); + if (err < 0) return err; - /* create the DIMM label attribute file */ - err = sysfs_create_file(kobj, - (struct attribute *) - dynamic_csrow_dimm_attr[chan]); - - if (!err) { - /* create the CE Count attribute file */ - err = sysfs_create_file(kobj, - (struct attribute *) - dynamic_csrow_ce_count_attr[chan]); - } else { - debugf1("%s() dimm labels and ce_count files created", - __func__); + for (chan = 0; chan < csrow->nr_channels; chan++) { + /* Only expose populated DIMMs */ + if (!csrow->channels[chan]->dimm->nr_pages) + continue; + err = device_create_file(&csrow->dev, + dynamic_csrow_dimm_attr[chan]); + if (err < 0) + goto error; + err = device_create_file(&csrow->dev, + dynamic_csrow_ce_count_attr[chan]); + if (err < 0) { + device_remove_file(&csrow->dev, + dynamic_csrow_dimm_attr[chan]); + goto error; + } + } + + return 0; + +error: + for (--chan; chan >= 0; chan--) { + device_remove_file(&csrow->dev, + dynamic_csrow_dimm_attr[chan]); + device_remove_file(&csrow->dev, + dynamic_csrow_ce_count_attr[chan]); + } + put_device(&csrow->dev); + + return err; +} + +/* Create a CSROW object under specifed edac_mc_device */ +static int edac_create_csrow_objects(struct mem_ctl_info *mci) +{ + int err, i, chan; + struct csrow_info *csrow; + + for (i = 0; i < mci->nr_csrows; i++) { + csrow = mci->csrows[i]; + if (!nr_pages_per_csrow(csrow)) + continue; + err = edac_create_csrow_object(mci, mci->csrows[i], i); + if (err < 0) { + edac_dbg(1, + "failure: create csrow objects for csrow %d\n", + i); + goto error; + } + } + return 0; + +error: + for (--i; i >= 0; i--) { + csrow = mci->csrows[i]; + if (!nr_pages_per_csrow(csrow)) + continue; + for (chan = csrow->nr_channels - 1; chan >= 0; chan--) { + if (!csrow->channels[chan]->dimm->nr_pages) + continue; + device_remove_file(&csrow->dev, + dynamic_csrow_dimm_attr[chan]); + device_remove_file(&csrow->dev, + dynamic_csrow_ce_count_attr[chan]); + } + put_device(&mci->csrows[i]->dev); } return err; } -/* No memory to release for this kobj */ -static void edac_csrow_instance_release(struct kobject *kobj) +static void edac_delete_csrow_objects(struct mem_ctl_info *mci) +{ + int i, chan; + struct csrow_info *csrow; + + for (i = mci->nr_csrows - 1; i >= 0; i--) { + csrow = mci->csrows[i]; + if (!nr_pages_per_csrow(csrow)) + continue; + for (chan = csrow->nr_channels - 1; chan >= 0; chan--) { + if (!csrow->channels[chan]->dimm->nr_pages) + continue; + edac_dbg(1, "Removing csrow %d channel %d sysfs nodes\n", + i, chan); + device_remove_file(&csrow->dev, + dynamic_csrow_dimm_attr[chan]); + device_remove_file(&csrow->dev, + dynamic_csrow_ce_count_attr[chan]); + } + device_unregister(&mci->csrows[i]->dev); + } +} +#endif + +/* + * Per-dimm (or per-rank) devices + */ + +#define to_dimm(k) container_of(k, struct dimm_info, dev) + +/* show/store functions for DIMM Label attributes */ +static ssize_t dimmdev_location_show(struct device *dev, + struct device_attribute *mattr, char *data) +{ + struct dimm_info *dimm = to_dimm(dev); + + return edac_dimm_info_location(dimm, data, PAGE_SIZE); +} + +static ssize_t dimmdev_label_show(struct device *dev, + struct device_attribute *mattr, char *data) +{ + struct dimm_info *dimm = to_dimm(dev); + + /* if field has not been initialized, there is nothing to send */ + if (!dimm->label[0]) + return 0; + + return snprintf(data, EDAC_MC_LABEL_LEN, "%s\n", dimm->label); +} + +static ssize_t dimmdev_label_store(struct device *dev, + struct device_attribute *mattr, + const char *data, + size_t count) +{ + struct dimm_info *dimm = to_dimm(dev); + + ssize_t max_size = 0; + + max_size = min((ssize_t) count, (ssize_t) EDAC_MC_LABEL_LEN - 1); + strncpy(dimm->label, data, max_size); + dimm->label[max_size] = '\0'; + + return max_size; +} + +static ssize_t dimmdev_size_show(struct device *dev, + struct device_attribute *mattr, char *data) { - struct mem_ctl_info *mci; - struct csrow_info *cs; + struct dimm_info *dimm = to_dimm(dev); - debugf1("%s()\n", __func__); + return sprintf(data, "%u\n", PAGES_TO_MiB(dimm->nr_pages)); +} + +static ssize_t dimmdev_mem_type_show(struct device *dev, + struct device_attribute *mattr, char *data) +{ + struct dimm_info *dimm = to_dimm(dev); + + return sprintf(data, "%s\n", mem_types[dimm->mtype]); +} - cs = container_of(kobj, struct csrow_info, kobj); - mci = cs->mci; +static ssize_t dimmdev_dev_type_show(struct device *dev, + struct device_attribute *mattr, char *data) +{ + struct dimm_info *dimm = to_dimm(dev); + + return sprintf(data, "%s\n", dev_types[dimm->dtype]); +} + +static ssize_t dimmdev_edac_mode_show(struct device *dev, + struct device_attribute *mattr, + char *data) +{ + struct dimm_info *dimm = to_dimm(dev); - kobject_put(&mci->edac_mci_kobj); + return sprintf(data, "%s\n", edac_caps[dimm->edac_mode]); } -/* the kobj_type instance for a CSROW */ -static struct kobj_type ktype_csrow = { - .release = edac_csrow_instance_release, - .sysfs_ops = &csrowfs_ops, - .default_attrs = (struct attribute **)default_csrow_attr, +/* dimm/rank attribute files */ +static DEVICE_ATTR(dimm_label, S_IRUGO | S_IWUSR, + dimmdev_label_show, dimmdev_label_store); +static DEVICE_ATTR(dimm_location, S_IRUGO, dimmdev_location_show, NULL); +static DEVICE_ATTR(size, S_IRUGO, dimmdev_size_show, NULL); +static DEVICE_ATTR(dimm_mem_type, S_IRUGO, dimmdev_mem_type_show, NULL); +static DEVICE_ATTR(dimm_dev_type, S_IRUGO, dimmdev_dev_type_show, NULL); +static DEVICE_ATTR(dimm_edac_mode, S_IRUGO, dimmdev_edac_mode_show, NULL); + +/* attributes of the dimm<id>/rank<id> object */ +static struct attribute *dimm_attrs[] = { + &dev_attr_dimm_label.attr, + &dev_attr_dimm_location.attr, + &dev_attr_size.attr, + &dev_attr_dimm_mem_type.attr, + &dev_attr_dimm_dev_type.attr, + &dev_attr_dimm_edac_mode.attr, + NULL, }; -/* Create a CSROW object under specifed edac_mc_device */ -static int edac_create_csrow_object(struct mem_ctl_info *mci, - struct csrow_info *csrow, int index) +static struct attribute_group dimm_attr_grp = { + .attrs = dimm_attrs, +}; + +static const struct attribute_group *dimm_attr_groups[] = { + &dimm_attr_grp, + NULL +}; + +static void dimm_attr_release(struct device *dev) { - struct kobject *kobj_mci = &mci->edac_mci_kobj; - struct kobject *kobj; - int chan; - int err; + struct dimm_info *dimm = container_of(dev, struct dimm_info, dev); - /* generate ..../edac/mc/mc<id>/csrow<index> */ - memset(&csrow->kobj, 0, sizeof(csrow->kobj)); - csrow->mci = mci; /* include container up link */ + edac_dbg(1, "Releasing dimm device %s\n", dev_name(dev)); + kfree(dimm); +} - /* bump the mci instance's kobject's ref count */ - kobj = kobject_get(&mci->edac_mci_kobj); - if (!kobj) { - err = -ENODEV; - goto err_out; - } +static struct device_type dimm_attr_type = { + .groups = dimm_attr_groups, + .release = dimm_attr_release, +}; - /* Instanstiate the csrow object */ - err = kobject_init_and_add(&csrow->kobj, &ktype_csrow, kobj_mci, - "csrow%d", index); - if (err) - goto err_release_top_kobj; +/* Create a DIMM object under specifed memory controller device */ +static int edac_create_dimm_object(struct mem_ctl_info *mci, + struct dimm_info *dimm, + int index) +{ + int err; + dimm->mci = mci; - /* At this point, to release a csrow kobj, one must - * call the kobject_put and allow that tear down - * to work the releasing - */ + dimm->dev.type = &dimm_attr_type; + dimm->dev.bus = mci->bus; + device_initialize(&dimm->dev); - /* Create the dyanmic attribute files on this csrow, - * namely, the DIMM labels and the channel ce_count - */ - for (chan = 0; chan < csrow->nr_channels; chan++) { - err = edac_create_channel_files(&csrow->kobj, chan); - if (err) { - /* special case the unregister here */ - kobject_put(&csrow->kobj); - goto err_out; - } - } - kobject_uevent(&csrow->kobj, KOBJ_ADD); - return 0; + dimm->dev.parent = &mci->dev; + if (mci->csbased) + dev_set_name(&dimm->dev, "rank%d", index); + else + dev_set_name(&dimm->dev, "dimm%d", index); + dev_set_drvdata(&dimm->dev, dimm); + pm_runtime_forbid(&mci->dev); - /* error unwind stack */ -err_release_top_kobj: - kobject_put(&mci->edac_mci_kobj); + err = device_add(&dimm->dev); + + edac_dbg(0, "creating rank/dimm device %s\n", dev_name(&dimm->dev)); -err_out: return err; } -/* default sysfs methods and data structures for the main MCI kobject */ +/* + * Memory controller device + */ + +#define to_mci(k) container_of(k, struct mem_ctl_info, dev) -static ssize_t mci_reset_counters_store(struct mem_ctl_info *mci, +static ssize_t mci_reset_counters_store(struct device *dev, + struct device_attribute *mattr, const char *data, size_t count) { - int row, chan; - + struct mem_ctl_info *mci = to_mci(dev); + int cnt, row, chan, i; + mci->ue_mc = 0; + mci->ce_mc = 0; mci->ue_noinfo_count = 0; mci->ce_noinfo_count = 0; - mci->ue_count = 0; - mci->ce_count = 0; for (row = 0; row < mci->nr_csrows; row++) { - struct csrow_info *ri = &mci->csrows[row]; + struct csrow_info *ri = mci->csrows[row]; ri->ue_count = 0; ri->ce_count = 0; for (chan = 0; chan < ri->nr_channels; chan++) - ri->channels[chan].ce_count = 0; + ri->channels[chan]->ce_count = 0; + } + + cnt = 1; + for (i = 0; i < mci->n_layers; i++) { + cnt *= mci->layers[i].size; + memset(mci->ce_per_layer[i], 0, cnt * sizeof(u32)); + memset(mci->ue_per_layer[i], 0, cnt * sizeof(u32)); } mci->start_time = jiffies; @@ -445,16 +674,15 @@ static ssize_t mci_reset_counters_store(struct mem_ctl_info *mci, * Negative value still means that an error has occurred while setting * the scrub rate. */ -static ssize_t mci_sdram_scrub_rate_store(struct mem_ctl_info *mci, +static ssize_t mci_sdram_scrub_rate_store(struct device *dev, + struct device_attribute *mattr, const char *data, size_t count) { + struct mem_ctl_info *mci = to_mci(dev); unsigned long bandwidth = 0; int new_bw = 0; - if (!mci->set_sdram_scrub_rate) - return -EINVAL; - - if (strict_strtoul(data, 10, &bandwidth) < 0) + if (kstrtoul(data, 10, &bandwidth) < 0) return -EINVAL; new_bw = mci->set_sdram_scrub_rate(mci, bandwidth); @@ -470,13 +698,13 @@ static ssize_t mci_sdram_scrub_rate_store(struct mem_ctl_info *mci, /* * ->get_sdram_scrub_rate() return value semantics same as above. */ -static ssize_t mci_sdram_scrub_rate_show(struct mem_ctl_info *mci, char *data) +static ssize_t mci_sdram_scrub_rate_show(struct device *dev, + struct device_attribute *mattr, + char *data) { + struct mem_ctl_info *mci = to_mci(dev); int bandwidth = 0; - if (!mci->get_sdram_scrub_rate) - return -EINVAL; - bandwidth = mci->get_sdram_scrub_rate(mci); if (bandwidth < 0) { edac_printk(KERN_DEBUG, EDAC_MC, "Error reading scrub rate\n"); @@ -487,408 +715,253 @@ static ssize_t mci_sdram_scrub_rate_show(struct mem_ctl_info *mci, char *data) } /* default attribute files for the MCI object */ -static ssize_t mci_ue_count_show(struct mem_ctl_info *mci, char *data) +static ssize_t mci_ue_count_show(struct device *dev, + struct device_attribute *mattr, + char *data) { - return sprintf(data, "%d\n", mci->ue_count); + struct mem_ctl_info *mci = to_mci(dev); + + return sprintf(data, "%d\n", mci->ue_mc); } -static ssize_t mci_ce_count_show(struct mem_ctl_info *mci, char *data) +static ssize_t mci_ce_count_show(struct device *dev, + struct device_attribute *mattr, + char *data) { - return sprintf(data, "%d\n", mci->ce_count); + struct mem_ctl_info *mci = to_mci(dev); + + return sprintf(data, "%d\n", mci->ce_mc); } -static ssize_t mci_ce_noinfo_show(struct mem_ctl_info *mci, char *data) +static ssize_t mci_ce_noinfo_show(struct device *dev, + struct device_attribute *mattr, + char *data) { + struct mem_ctl_info *mci = to_mci(dev); + return sprintf(data, "%d\n", mci->ce_noinfo_count); } -static ssize_t mci_ue_noinfo_show(struct mem_ctl_info *mci, char *data) +static ssize_t mci_ue_noinfo_show(struct device *dev, + struct device_attribute *mattr, + char *data) { + struct mem_ctl_info *mci = to_mci(dev); + return sprintf(data, "%d\n", mci->ue_noinfo_count); } -static ssize_t mci_seconds_show(struct mem_ctl_info *mci, char *data) +static ssize_t mci_seconds_show(struct device *dev, + struct device_attribute *mattr, + char *data) { + struct mem_ctl_info *mci = to_mci(dev); + return sprintf(data, "%ld\n", (jiffies - mci->start_time) / HZ); } -static ssize_t mci_ctl_name_show(struct mem_ctl_info *mci, char *data) +static ssize_t mci_ctl_name_show(struct device *dev, + struct device_attribute *mattr, + char *data) { + struct mem_ctl_info *mci = to_mci(dev); + return sprintf(data, "%s\n", mci->ctl_name); } -static ssize_t mci_size_mb_show(struct mem_ctl_info *mci, char *data) +static ssize_t mci_size_mb_show(struct device *dev, + struct device_attribute *mattr, + char *data) { - int total_pages, csrow_idx; + struct mem_ctl_info *mci = to_mci(dev); + int total_pages = 0, csrow_idx, j; - for (total_pages = csrow_idx = 0; csrow_idx < mci->nr_csrows; - csrow_idx++) { - struct csrow_info *csrow = &mci->csrows[csrow_idx]; + for (csrow_idx = 0; csrow_idx < mci->nr_csrows; csrow_idx++) { + struct csrow_info *csrow = mci->csrows[csrow_idx]; - if (!csrow->nr_pages) - continue; + for (j = 0; j < csrow->nr_channels; j++) { + struct dimm_info *dimm = csrow->channels[j]->dimm; - total_pages += csrow->nr_pages; + total_pages += dimm->nr_pages; + } } return sprintf(data, "%u\n", PAGES_TO_MiB(total_pages)); } -#define to_mci(k) container_of(k, struct mem_ctl_info, edac_mci_kobj) -#define to_mcidev_attr(a) container_of(a,struct mcidev_sysfs_attribute,attr) - -/* MCI show/store functions for top most object */ -static ssize_t mcidev_show(struct kobject *kobj, struct attribute *attr, - char *buffer) +static ssize_t mci_max_location_show(struct device *dev, + struct device_attribute *mattr, + char *data) { - struct mem_ctl_info *mem_ctl_info = to_mci(kobj); - struct mcidev_sysfs_attribute *mcidev_attr = to_mcidev_attr(attr); - - debugf1("%s() mem_ctl_info %p\n", __func__, mem_ctl_info); + struct mem_ctl_info *mci = to_mci(dev); + int i; + char *p = data; - if (mcidev_attr->show) - return mcidev_attr->show(mem_ctl_info, buffer); + for (i = 0; i < mci->n_layers; i++) { + p += sprintf(p, "%s %d ", + edac_layer_name[mci->layers[i].type], + mci->layers[i].size - 1); + } - return -EIO; + return p - data; } -static ssize_t mcidev_store(struct kobject *kobj, struct attribute *attr, - const char *buffer, size_t count) +#ifdef CONFIG_EDAC_DEBUG +static ssize_t edac_fake_inject_write(struct file *file, + const char __user *data, + size_t count, loff_t *ppos) { - struct mem_ctl_info *mem_ctl_info = to_mci(kobj); - struct mcidev_sysfs_attribute *mcidev_attr = to_mcidev_attr(attr); - - debugf1("%s() mem_ctl_info %p\n", __func__, mem_ctl_info); - - if (mcidev_attr->store) - return mcidev_attr->store(mem_ctl_info, buffer, count); + struct device *dev = file->private_data; + struct mem_ctl_info *mci = to_mci(dev); + static enum hw_event_mc_err_type type; + u16 errcount = mci->fake_inject_count; + + if (!errcount) + errcount = 1; + + type = mci->fake_inject_ue ? HW_EVENT_ERR_UNCORRECTED + : HW_EVENT_ERR_CORRECTED; + + printk(KERN_DEBUG + "Generating %d %s fake error%s to %d.%d.%d to test core handling. NOTE: this won't test the driver-specific decoding logic.\n", + errcount, + (type == HW_EVENT_ERR_UNCORRECTED) ? "UE" : "CE", + errcount > 1 ? "s" : "", + mci->fake_inject_layer[0], + mci->fake_inject_layer[1], + mci->fake_inject_layer[2] + ); + edac_mc_handle_error(type, mci, errcount, 0, 0, 0, + mci->fake_inject_layer[0], + mci->fake_inject_layer[1], + mci->fake_inject_layer[2], + "FAKE ERROR", "for EDAC testing only"); - return -EIO; + return count; } -/* Intermediate show/store table */ -static const struct sysfs_ops mci_ops = { - .show = mcidev_show, - .store = mcidev_store -}; - -#define MCIDEV_ATTR(_name,_mode,_show,_store) \ -static struct mcidev_sysfs_attribute mci_attr_##_name = { \ - .attr = {.name = __stringify(_name), .mode = _mode }, \ - .show = _show, \ - .store = _store, \ +static const struct file_operations debug_fake_inject_fops = { + .open = simple_open, + .write = edac_fake_inject_write, + .llseek = generic_file_llseek, }; +#endif /* default Control file */ -MCIDEV_ATTR(reset_counters, S_IWUSR, NULL, mci_reset_counters_store); +DEVICE_ATTR(reset_counters, S_IWUSR, NULL, mci_reset_counters_store); /* default Attribute files */ -MCIDEV_ATTR(mc_name, S_IRUGO, mci_ctl_name_show, NULL); -MCIDEV_ATTR(size_mb, S_IRUGO, mci_size_mb_show, NULL); -MCIDEV_ATTR(seconds_since_reset, S_IRUGO, mci_seconds_show, NULL); -MCIDEV_ATTR(ue_noinfo_count, S_IRUGO, mci_ue_noinfo_show, NULL); -MCIDEV_ATTR(ce_noinfo_count, S_IRUGO, mci_ce_noinfo_show, NULL); -MCIDEV_ATTR(ue_count, S_IRUGO, mci_ue_count_show, NULL); -MCIDEV_ATTR(ce_count, S_IRUGO, mci_ce_count_show, NULL); +DEVICE_ATTR(mc_name, S_IRUGO, mci_ctl_name_show, NULL); +DEVICE_ATTR(size_mb, S_IRUGO, mci_size_mb_show, NULL); +DEVICE_ATTR(seconds_since_reset, S_IRUGO, mci_seconds_show, NULL); +DEVICE_ATTR(ue_noinfo_count, S_IRUGO, mci_ue_noinfo_show, NULL); +DEVICE_ATTR(ce_noinfo_count, S_IRUGO, mci_ce_noinfo_show, NULL); +DEVICE_ATTR(ue_count, S_IRUGO, mci_ue_count_show, NULL); +DEVICE_ATTR(ce_count, S_IRUGO, mci_ce_count_show, NULL); +DEVICE_ATTR(max_location, S_IRUGO, mci_max_location_show, NULL); /* memory scrubber attribute file */ -MCIDEV_ATTR(sdram_scrub_rate, S_IRUGO | S_IWUSR, mci_sdram_scrub_rate_show, - mci_sdram_scrub_rate_store); - -static struct mcidev_sysfs_attribute *mci_attr[] = { - &mci_attr_reset_counters, - &mci_attr_mc_name, - &mci_attr_size_mb, - &mci_attr_seconds_since_reset, - &mci_attr_ue_noinfo_count, - &mci_attr_ce_noinfo_count, - &mci_attr_ue_count, - &mci_attr_ce_count, - &mci_attr_sdram_scrub_rate, +DEVICE_ATTR(sdram_scrub_rate, 0, NULL, NULL); + +static struct attribute *mci_attrs[] = { + &dev_attr_reset_counters.attr, + &dev_attr_mc_name.attr, + &dev_attr_size_mb.attr, + &dev_attr_seconds_since_reset.attr, + &dev_attr_ue_noinfo_count.attr, + &dev_attr_ce_noinfo_count.attr, + &dev_attr_ue_count.attr, + &dev_attr_ce_count.attr, + &dev_attr_max_location.attr, NULL }; +static struct attribute_group mci_attr_grp = { + .attrs = mci_attrs, +}; -/* - * Release of a MC controlling instance - * - * each MC control instance has the following resources upon entry: - * a) a ref count on the top memctl kobj - * b) a ref count on this module - * - * this function must decrement those ref counts and then - * issue a free on the instance's memory - */ -static void edac_mci_control_release(struct kobject *kobj) -{ - struct mem_ctl_info *mci; - - mci = to_mci(kobj); +static const struct attribute_group *mci_attr_groups[] = { + &mci_attr_grp, + NULL +}; - debugf0("%s() mci instance idx=%d releasing\n", __func__, mci->mc_idx); +static void mci_attr_release(struct device *dev) +{ + struct mem_ctl_info *mci = container_of(dev, struct mem_ctl_info, dev); - /* decrement the module ref count */ - module_put(mci->owner); + edac_dbg(1, "Releasing csrow device %s\n", dev_name(dev)); + kfree(mci); } -static struct kobj_type ktype_mci = { - .release = edac_mci_control_release, - .sysfs_ops = &mci_ops, - .default_attrs = (struct attribute **)mci_attr, +static struct device_type mci_attr_type = { + .groups = mci_attr_groups, + .release = mci_attr_release, }; -/* EDAC memory controller sysfs kset: - * /sys/devices/system/edac/mc - */ -static struct kset *mc_kset; +#ifdef CONFIG_EDAC_DEBUG +static struct dentry *edac_debugfs; -/* - * edac_mc_register_sysfs_main_kobj - * - * setups and registers the main kobject for each mci - */ -int edac_mc_register_sysfs_main_kobj(struct mem_ctl_info *mci) +int __init edac_debugfs_init(void) { - struct kobject *kobj_mci; - int err; - - debugf1("%s()\n", __func__); - - kobj_mci = &mci->edac_mci_kobj; - - /* Init the mci's kobject */ - memset(kobj_mci, 0, sizeof(*kobj_mci)); - - /* Record which module 'owns' this control structure - * and bump the ref count of the module - */ - mci->owner = THIS_MODULE; - - /* bump ref count on this module */ - if (!try_module_get(mci->owner)) { - err = -ENODEV; - goto fail_out; + edac_debugfs = debugfs_create_dir("edac", NULL); + if (IS_ERR(edac_debugfs)) { + edac_debugfs = NULL; + return -ENOMEM; } - - /* this instance become part of the mc_kset */ - kobj_mci->kset = mc_kset; - - /* register the mc<id> kobject to the mc_kset */ - err = kobject_init_and_add(kobj_mci, &ktype_mci, NULL, - "mc%d", mci->mc_idx); - if (err) { - debugf1("%s()Failed to register '.../edac/mc%d'\n", - __func__, mci->mc_idx); - goto kobj_reg_fail; - } - kobject_uevent(kobj_mci, KOBJ_ADD); - - /* At this point, to 'free' the control struct, - * edac_mc_unregister_sysfs_main_kobj() must be used - */ - - debugf1("%s() Registered '.../edac/mc%d' kobject\n", - __func__, mci->mc_idx); - return 0; - - /* Error exit stack */ - -kobj_reg_fail: - module_put(mci->owner); - -fail_out: - return err; -} - -/* - * edac_mc_register_sysfs_main_kobj - * - * tears down and the main mci kobject from the mc_kset - */ -void edac_mc_unregister_sysfs_main_kobj(struct mem_ctl_info *mci) -{ - debugf1("%s()\n", __func__); - - /* delete the kobj from the mc_kset */ - kobject_put(&mci->edac_mci_kobj); -} - -#define EDAC_DEVICE_SYMLINK "device" - -#define grp_to_mci(k) (container_of(k, struct mcidev_sysfs_group_kobj, kobj)->mci) - -/* MCI show/store functions for top most object */ -static ssize_t inst_grp_show(struct kobject *kobj, struct attribute *attr, - char *buffer) -{ - struct mem_ctl_info *mem_ctl_info = grp_to_mci(kobj); - struct mcidev_sysfs_attribute *mcidev_attr = to_mcidev_attr(attr); - - debugf1("%s() mem_ctl_info %p\n", __func__, mem_ctl_info); - - if (mcidev_attr->show) - return mcidev_attr->show(mem_ctl_info, buffer); - - return -EIO; -} - -static ssize_t inst_grp_store(struct kobject *kobj, struct attribute *attr, - const char *buffer, size_t count) -{ - struct mem_ctl_info *mem_ctl_info = grp_to_mci(kobj); - struct mcidev_sysfs_attribute *mcidev_attr = to_mcidev_attr(attr); - - debugf1("%s() mem_ctl_info %p\n", __func__, mem_ctl_info); - - if (mcidev_attr->store) - return mcidev_attr->store(mem_ctl_info, buffer, count); - - return -EIO; } -/* No memory to release for this kobj */ -static void edac_inst_grp_release(struct kobject *kobj) +void __exit edac_debugfs_exit(void) { - struct mcidev_sysfs_group_kobj *grp; - struct mem_ctl_info *mci; - - debugf1("%s()\n", __func__); - - grp = container_of(kobj, struct mcidev_sysfs_group_kobj, kobj); - mci = grp->mci; + debugfs_remove(edac_debugfs); } -/* Intermediate show/store table */ -static struct sysfs_ops inst_grp_ops = { - .show = inst_grp_show, - .store = inst_grp_store -}; - -/* the kobj_type instance for a instance group */ -static struct kobj_type ktype_inst_grp = { - .release = edac_inst_grp_release, - .sysfs_ops = &inst_grp_ops, -}; - - -/* - * edac_create_mci_instance_attributes - * create MC driver specific attributes bellow an specified kobj - * This routine calls itself recursively, in order to create an entire - * object tree. - */ -static int edac_create_mci_instance_attributes(struct mem_ctl_info *mci, - const struct mcidev_sysfs_attribute *sysfs_attrib, - struct kobject *kobj) +static int edac_create_debug_nodes(struct mem_ctl_info *mci) { - int err; + struct dentry *d, *parent; + char name[80]; + int i; - debugf4("%s()\n", __func__); - - while (sysfs_attrib) { - debugf4("%s() sysfs_attrib = %p\n",__func__, sysfs_attrib); - if (sysfs_attrib->grp) { - struct mcidev_sysfs_group_kobj *grp_kobj; - - grp_kobj = kzalloc(sizeof(*grp_kobj), GFP_KERNEL); - if (!grp_kobj) - return -ENOMEM; - - grp_kobj->grp = sysfs_attrib->grp; - grp_kobj->mci = mci; - list_add_tail(&grp_kobj->list, &mci->grp_kobj_list); - - debugf0("%s() grp %s, mci %p\n", __func__, - sysfs_attrib->grp->name, mci); - - err = kobject_init_and_add(&grp_kobj->kobj, - &ktype_inst_grp, - &mci->edac_mci_kobj, - sysfs_attrib->grp->name); - if (err < 0) { - printk(KERN_ERR "kobject_init_and_add failed: %d\n", err); - return err; - } - err = edac_create_mci_instance_attributes(mci, - grp_kobj->grp->mcidev_attr, - &grp_kobj->kobj); - - if (err < 0) - return err; - } else if (sysfs_attrib->attr.name) { - debugf4("%s() file %s\n", __func__, - sysfs_attrib->attr.name); - - err = sysfs_create_file(kobj, &sysfs_attrib->attr); - if (err < 0) { - printk(KERN_ERR "sysfs_create_file failed: %d\n", err); - return err; - } - } else - break; - - sysfs_attrib++; + if (!edac_debugfs) + return -ENODEV; + + d = debugfs_create_dir(mci->dev.kobj.name, edac_debugfs); + if (!d) + return -ENOMEM; + parent = d; + + for (i = 0; i < mci->n_layers; i++) { + sprintf(name, "fake_inject_%s", + edac_layer_name[mci->layers[i].type]); + d = debugfs_create_u8(name, S_IRUGO | S_IWUSR, parent, + &mci->fake_inject_layer[i]); + if (!d) + goto nomem; } - return 0; -} + d = debugfs_create_bool("fake_inject_ue", S_IRUGO | S_IWUSR, parent, + &mci->fake_inject_ue); + if (!d) + goto nomem; -/* - * edac_remove_mci_instance_attributes - * remove MC driver specific attributes at the topmost level - * directory of this mci instance. - */ -static void edac_remove_mci_instance_attributes(struct mem_ctl_info *mci, - const struct mcidev_sysfs_attribute *sysfs_attrib, - struct kobject *kobj, int count) -{ - struct mcidev_sysfs_group_kobj *grp_kobj, *tmp; + d = debugfs_create_u16("fake_inject_count", S_IRUGO | S_IWUSR, parent, + &mci->fake_inject_count); + if (!d) + goto nomem; - debugf1("%s()\n", __func__); + d = debugfs_create_file("fake_inject", S_IWUSR, parent, + &mci->dev, + &debug_fake_inject_fops); + if (!d) + goto nomem; - /* - * loop if there are attributes and until we hit a NULL entry - * Remove first all the attributes - */ - while (sysfs_attrib) { - debugf4("%s() sysfs_attrib = %p\n",__func__, sysfs_attrib); - if (sysfs_attrib->grp) { - debugf4("%s() seeking for group %s\n", - __func__, sysfs_attrib->grp->name); - list_for_each_entry(grp_kobj, - &mci->grp_kobj_list, list) { - debugf4("%s() grp_kobj->grp = %p\n",__func__, grp_kobj->grp); - if (grp_kobj->grp == sysfs_attrib->grp) { - edac_remove_mci_instance_attributes(mci, - grp_kobj->grp->mcidev_attr, - &grp_kobj->kobj, count + 1); - debugf4("%s() group %s\n", __func__, - sysfs_attrib->grp->name); - kobject_put(&grp_kobj->kobj); - } - } - debugf4("%s() end of seeking for group %s\n", - __func__, sysfs_attrib->grp->name); - } else if (sysfs_attrib->attr.name) { - debugf4("%s() file %s\n", __func__, - sysfs_attrib->attr.name); - sysfs_remove_file(kobj, &sysfs_attrib->attr); - } else - break; - sysfs_attrib++; - } - - /* Remove the group objects */ - if (count) - return; - list_for_each_entry_safe(grp_kobj, tmp, - &mci->grp_kobj_list, list) { - list_del(&grp_kobj->list); - kfree(grp_kobj); - } + mci->debugfs = parent; + return 0; +nomem: + debugfs_remove(mci->debugfs); + return -ENOMEM; } - +#endif /* * Create a new Memory Controller kobject instance, @@ -900,71 +973,105 @@ static void edac_remove_mci_instance_attributes(struct mem_ctl_info *mci, */ int edac_create_sysfs_mci_device(struct mem_ctl_info *mci) { - int i; - int err; - struct csrow_info *csrow; - struct kobject *kobj_mci = &mci->edac_mci_kobj; + int i, err; + + /* + * The memory controller needs its own bus, in order to avoid + * namespace conflicts at /sys/bus/edac. + */ + mci->bus->name = kasprintf(GFP_KERNEL, "mc%d", mci->mc_idx); + if (!mci->bus->name) + return -ENOMEM; - debugf0("%s() idx=%d\n", __func__, mci->mc_idx); + edac_dbg(0, "creating bus %s\n", mci->bus->name); - INIT_LIST_HEAD(&mci->grp_kobj_list); + err = bus_register(mci->bus); + if (err < 0) + return err; - /* create a symlink for the device */ - err = sysfs_create_link(kobj_mci, &mci->dev->kobj, - EDAC_DEVICE_SYMLINK); - if (err) { - debugf1("%s() failure to create symlink\n", __func__); - goto fail0; + /* get the /sys/devices/system/edac subsys reference */ + mci->dev.type = &mci_attr_type; + device_initialize(&mci->dev); + + mci->dev.parent = mci_pdev; + mci->dev.bus = mci->bus; + dev_set_name(&mci->dev, "mc%d", mci->mc_idx); + dev_set_drvdata(&mci->dev, mci); + pm_runtime_forbid(&mci->dev); + + edac_dbg(0, "creating device %s\n", dev_name(&mci->dev)); + err = device_add(&mci->dev); + if (err < 0) { + edac_dbg(1, "failure: create device %s\n", dev_name(&mci->dev)); + bus_unregister(mci->bus); + kfree(mci->bus->name); + return err; } - /* If the low level driver desires some attributes, - * then create them now for the driver. - */ - if (mci->mc_driver_sysfs_attributes) { - err = edac_create_mci_instance_attributes(mci, - mci->mc_driver_sysfs_attributes, - &mci->edac_mci_kobj); + if (mci->set_sdram_scrub_rate || mci->get_sdram_scrub_rate) { + if (mci->get_sdram_scrub_rate) { + dev_attr_sdram_scrub_rate.attr.mode |= S_IRUGO; + dev_attr_sdram_scrub_rate.show = &mci_sdram_scrub_rate_show; + } + if (mci->set_sdram_scrub_rate) { + dev_attr_sdram_scrub_rate.attr.mode |= S_IWUSR; + dev_attr_sdram_scrub_rate.store = &mci_sdram_scrub_rate_store; + } + err = device_create_file(&mci->dev, + &dev_attr_sdram_scrub_rate); if (err) { - debugf1("%s() failure to create mci attributes\n", - __func__); - goto fail0; + edac_dbg(1, "failure: create sdram_scrub_rate\n"); + goto fail2; } } - - /* Make directories for each CSROW object under the mc<id> kobject + /* + * Create the dimm/rank devices */ - for (i = 0; i < mci->nr_csrows; i++) { - csrow = &mci->csrows[i]; - - /* Only expose populated CSROWs */ - if (csrow->nr_pages > 0) { - err = edac_create_csrow_object(mci, csrow, i); - if (err) { - debugf1("%s() failure: create csrow %d obj\n", - __func__, i); - goto fail1; - } + for (i = 0; i < mci->tot_dimms; i++) { + struct dimm_info *dimm = mci->dimms[i]; + /* Only expose populated DIMMs */ + if (dimm->nr_pages == 0) + continue; +#ifdef CONFIG_EDAC_DEBUG + edac_dbg(1, "creating dimm%d, located at ", i); + if (edac_debug_level >= 1) { + int lay; + for (lay = 0; lay < mci->n_layers; lay++) + printk(KERN_CONT "%s %d ", + edac_layer_name[mci->layers[lay].type], + dimm->location[lay]); + printk(KERN_CONT "\n"); + } +#endif + err = edac_create_dimm_object(mci, dimm, i); + if (err) { + edac_dbg(1, "failure: create dimm %d obj\n", i); + goto fail; } } +#ifdef CONFIG_EDAC_LEGACY_SYSFS + err = edac_create_csrow_objects(mci); + if (err < 0) + goto fail; +#endif + +#ifdef CONFIG_EDAC_DEBUG + edac_create_debug_nodes(mci); +#endif return 0; - /* CSROW error: backout what has already been registered, */ -fail1: +fail: for (i--; i >= 0; i--) { - if (csrow->nr_pages > 0) { - kobject_put(&mci->csrows[i].kobj); - } + struct dimm_info *dimm = mci->dimms[i]; + if (dimm->nr_pages == 0) + continue; + device_unregister(&dimm->dev); } - - /* remove the mci instance's attributes, if any */ - edac_remove_mci_instance_attributes(mci, - mci->mc_driver_sysfs_attributes, &mci->edac_mci_kobj, 0); - - /* remove the symlink */ - sysfs_remove_link(kobj_mci, EDAC_DEVICE_SYMLINK); - -fail0: +fail2: + device_unregister(&mci->dev); + bus_unregister(mci->bus); + kfree(mci->bus->name); return err; } @@ -975,90 +1082,91 @@ void edac_remove_sysfs_mci_device(struct mem_ctl_info *mci) { int i; - debugf0("%s()\n", __func__); + edac_dbg(0, "\n"); - /* remove all csrow kobjects */ - debugf4("%s() unregister this mci kobj\n", __func__); - for (i = 0; i < mci->nr_csrows; i++) { - if (mci->csrows[i].nr_pages > 0) { - debugf0("%s() unreg csrow-%d\n", __func__, i); - kobject_put(&mci->csrows[i].kobj); - } - } +#ifdef CONFIG_EDAC_DEBUG + debugfs_remove(mci->debugfs); +#endif +#ifdef CONFIG_EDAC_LEGACY_SYSFS + edac_delete_csrow_objects(mci); +#endif - /* remove this mci instance's attribtes */ - if (mci->mc_driver_sysfs_attributes) { - debugf4("%s() unregister mci private attributes\n", __func__); - edac_remove_mci_instance_attributes(mci, - mci->mc_driver_sysfs_attributes, - &mci->edac_mci_kobj, 0); + for (i = 0; i < mci->tot_dimms; i++) { + struct dimm_info *dimm = mci->dimms[i]; + if (dimm->nr_pages == 0) + continue; + edac_dbg(0, "removing device %s\n", dev_name(&dimm->dev)); + device_unregister(&dimm->dev); } - - /* remove the symlink */ - debugf4("%s() remove_link\n", __func__); - sysfs_remove_link(&mci->edac_mci_kobj, EDAC_DEVICE_SYMLINK); - - /* unregister this instance's kobject */ - debugf4("%s() remove_mci_instance\n", __func__); - kobject_put(&mci->edac_mci_kobj); } +void edac_unregister_sysfs(struct mem_ctl_info *mci) +{ + edac_dbg(1, "Unregistering device %s\n", dev_name(&mci->dev)); + device_unregister(&mci->dev); + bus_unregister(mci->bus); + kfree(mci->bus->name); +} +static void mc_attr_release(struct device *dev) +{ + /* + * There's no container structure here, as this is just the mci + * parent device, used to create the /sys/devices/mc sysfs node. + * So, there are no attributes on it. + */ + edac_dbg(1, "Releasing device %s\n", dev_name(dev)); + kfree(dev); +} - +static struct device_type mc_attr_type = { + .release = mc_attr_release, +}; /* - * edac_setup_sysfs_mc_kset(void) - * - * Initialize the mc_kset for the 'mc' entry - * This requires creating the top 'mc' directory with a kset - * and its controls/attributes. - * - * To this 'mc' kset, instance 'mci' will be grouped as children. - * - * Return: 0 SUCCESS - * !0 FAILURE error code + * Init/exit code for the module. Basically, creates/removes /sys/class/rc */ -int edac_sysfs_setup_mc_kset(void) +int __init edac_mc_sysfs_init(void) { - int err = -EINVAL; struct bus_type *edac_subsys; - - debugf1("%s()\n", __func__); + int err; /* get the /sys/devices/system/edac subsys reference */ edac_subsys = edac_get_sysfs_subsys(); if (edac_subsys == NULL) { - debugf1("%s() no edac_subsys error=%d\n", __func__, err); - goto fail_out; + edac_dbg(1, "no edac_subsys\n"); + err = -EINVAL; + goto out; } - /* Init the MC's kobject */ - mc_kset = kset_create_and_add("mc", NULL, &edac_subsys->dev_root->kobj); - if (!mc_kset) { + mci_pdev = kzalloc(sizeof(*mci_pdev), GFP_KERNEL); + if (!mci_pdev) { err = -ENOMEM; - debugf1("%s() Failed to register '.../edac/mc'\n", __func__); - goto fail_kset; + goto out_put_sysfs; } - debugf1("%s() Registered '.../edac/mc' kobject\n", __func__); + mci_pdev->bus = edac_subsys; + mci_pdev->type = &mc_attr_type; + device_initialize(mci_pdev); + dev_set_name(mci_pdev, "mc"); + + err = device_add(mci_pdev); + if (err < 0) + goto out_dev_free; + + edac_dbg(0, "device %s created\n", dev_name(mci_pdev)); return 0; -fail_kset: + out_dev_free: + kfree(mci_pdev); + out_put_sysfs: edac_put_sysfs_subsys(); - -fail_out: + out: return err; } -/* - * edac_sysfs_teardown_mc_kset - * - * deconstruct the mc_ket for memory controllers - */ -void edac_sysfs_teardown_mc_kset(void) +void __exit edac_mc_sysfs_exit(void) { - kset_unregister(mc_kset); + device_unregister(mci_pdev); edac_put_sysfs_subsys(); } - diff --git a/drivers/edac/edac_module.c b/drivers/edac/edac_module.c index 5ddaa86d6a6..a66941fea5a 100644 --- a/drivers/edac/edac_module.c +++ b/drivers/edac/edac_module.c @@ -15,12 +15,32 @@ #include "edac_core.h" #include "edac_module.h" -#define EDAC_VERSION "Ver: 2.1.0" +#define EDAC_VERSION "Ver: 3.0.0" #ifdef CONFIG_EDAC_DEBUG + +static int edac_set_debug_level(const char *buf, struct kernel_param *kp) +{ + unsigned long val; + int ret; + + ret = kstrtoul(buf, 0, &val); + if (ret) + return ret; + + if (val < 0 || val > 4) + return -EINVAL; + + return param_set_int(buf, kp); +} + /* Values of 0 to 4 will generate output */ int edac_debug_level = 2; EXPORT_SYMBOL_GPL(edac_debug_level); + +module_param_call(edac_debug_level, edac_set_debug_level, param_get_int, + &edac_debug_level, 0644); +MODULE_PARM_DESC(edac_debug_level, "EDAC debug level: [0-4], default: 2"); #endif /* scope is to module level only */ @@ -90,26 +110,21 @@ static int __init edac_init(void) */ edac_pci_clear_parity_errors(); - /* - * now set up the mc_kset under the edac class object - */ - err = edac_sysfs_setup_mc_kset(); + err = edac_mc_sysfs_init(); if (err) goto error; + edac_debugfs_init(); + /* Setup/Initialize the workq for this core */ err = edac_workqueue_setup(); if (err) { edac_printk(KERN_ERR, EDAC_MC, "init WorkQueue failure\n"); - goto workq_fail; + goto error; } return 0; - /* Error teardown stack */ -workq_fail: - edac_sysfs_teardown_mc_kset(); - error: return err; } @@ -120,26 +135,20 @@ error: */ static void __exit edac_exit(void) { - debugf0("%s()\n", __func__); + edac_dbg(0, "\n"); /* tear down the various subsystems */ edac_workqueue_teardown(); - edac_sysfs_teardown_mc_kset(); + edac_mc_sysfs_exit(); + edac_debugfs_exit(); } /* * Inform the kernel of our entry and exit points */ -module_init(edac_init); +subsys_initcall(edac_init); module_exit(edac_exit); MODULE_LICENSE("GPL"); MODULE_AUTHOR("Doug Thompson www.softwarebitmaker.com, et al"); MODULE_DESCRIPTION("Core library routines for EDAC reporting"); - -/* refer to *_sysfs.c files for parameters that are exported via sysfs */ - -#ifdef CONFIG_EDAC_DEBUG -module_param(edac_debug_level, int, 0644); -MODULE_PARM_DESC(edac_debug_level, "Debug level"); -#endif diff --git a/drivers/edac/edac_module.h b/drivers/edac/edac_module.h index 00f81b47a51..f2118bfcf8d 100644 --- a/drivers/edac/edac_module.h +++ b/drivers/edac/edac_module.h @@ -19,12 +19,12 @@ * * edac_mc objects */ -extern int edac_sysfs_setup_mc_kset(void); -extern void edac_sysfs_teardown_mc_kset(void); -extern int edac_mc_register_sysfs_main_kobj(struct mem_ctl_info *mci); -extern void edac_mc_unregister_sysfs_main_kobj(struct mem_ctl_info *mci); + /* on edac_mc_sysfs.c */ +int edac_mc_sysfs_init(void); +void edac_mc_sysfs_exit(void); extern int edac_create_sysfs_mci_device(struct mem_ctl_info *mci); extern void edac_remove_sysfs_mci_device(struct mem_ctl_info *mci); +void edac_unregister_sysfs(struct mem_ctl_info *mci); extern int edac_get_log_ue(void); extern int edac_get_log_ce(void); extern int edac_get_panic_on_ue(void); @@ -34,6 +34,10 @@ extern int edac_mc_get_panic_on_ue(void); extern int edac_get_poll_msec(void); extern int edac_mc_get_poll_msec(void); +unsigned edac_dimm_info_location(struct dimm_info *dimm, char *buf, + unsigned len); + + /* on edac_device.c */ extern int edac_device_register_sysfs_main_kobj( struct edac_device_ctl_info *edac_dev); extern void edac_device_unregister_sysfs_main_kobj( @@ -48,9 +52,23 @@ extern void edac_device_workq_setup(struct edac_device_ctl_info *edac_dev, extern void edac_device_workq_teardown(struct edac_device_ctl_info *edac_dev); extern void edac_device_reset_delay_period(struct edac_device_ctl_info *edac_dev, unsigned long value); -extern void edac_mc_reset_delay_period(int value); +extern void edac_mc_reset_delay_period(unsigned long value); + +extern void *edac_align_ptr(void **p, unsigned size, int n_elems); -extern void *edac_align_ptr(void *ptr, unsigned size); +/* + * EDAC debugfs functions + */ +#ifdef CONFIG_EDAC_DEBUG +int edac_debugfs_init(void); +void edac_debugfs_exit(void); +#else +static inline int edac_debugfs_init(void) +{ + return -ENODEV; +} +static inline void edac_debugfs_exit(void) {} +#endif /* * EDAC PCI functions diff --git a/drivers/edac/edac_pci.c b/drivers/edac/edac_pci.c index 63af1c5673d..2cf44b4db80 100644 --- a/drivers/edac/edac_pci.c +++ b/drivers/edac/edac_pci.c @@ -42,13 +42,13 @@ struct edac_pci_ctl_info *edac_pci_alloc_ctl_info(unsigned int sz_pvt, const char *edac_pci_name) { struct edac_pci_ctl_info *pci; - void *pvt; + void *p = NULL, *pvt; unsigned int size; - debugf1("%s()\n", __func__); + edac_dbg(1, "\n"); - pci = (struct edac_pci_ctl_info *)0; - pvt = edac_align_ptr(&pci[1], sz_pvt); + pci = edac_align_ptr(&p, sizeof(*pci), 1); + pvt = edac_align_ptr(&p, 1, sz_pvt); size = ((unsigned long)pvt) + sz_pvt; /* Alloc the needed control struct memory */ @@ -80,7 +80,7 @@ EXPORT_SYMBOL_GPL(edac_pci_alloc_ctl_info); */ void edac_pci_free_ctl_info(struct edac_pci_ctl_info *pci) { - debugf1("%s()\n", __func__); + edac_dbg(1, "\n"); edac_pci_remove_sysfs(pci); } @@ -97,7 +97,7 @@ static struct edac_pci_ctl_info *find_edac_pci_by_dev(struct device *dev) struct edac_pci_ctl_info *pci; struct list_head *item; - debugf1("%s()\n", __func__); + edac_dbg(1, "\n"); list_for_each(item, &edac_pci_list) { pci = list_entry(item, struct edac_pci_ctl_info, link); @@ -122,7 +122,7 @@ static int add_edac_pci_to_global_list(struct edac_pci_ctl_info *pci) struct list_head *item, *insert_before; struct edac_pci_ctl_info *rover; - debugf1("%s()\n", __func__); + edac_dbg(1, "\n"); insert_before = &edac_pci_list; @@ -226,7 +226,7 @@ static void edac_pci_workq_function(struct work_struct *work_req) int msec; unsigned long delay; - debugf3("%s() checking\n", __func__); + edac_dbg(3, "checking\n"); mutex_lock(&edac_pci_ctls_mutex); @@ -261,7 +261,7 @@ static void edac_pci_workq_function(struct work_struct *work_req) static void edac_pci_workq_setup(struct edac_pci_ctl_info *pci, unsigned int msec) { - debugf0("%s()\n", __func__); + edac_dbg(0, "\n"); INIT_DELAYED_WORK(&pci->work, edac_pci_workq_function); queue_delayed_work(edac_workqueue, &pci->work, @@ -276,7 +276,7 @@ static void edac_pci_workq_teardown(struct edac_pci_ctl_info *pci) { int status; - debugf0("%s()\n", __func__); + edac_dbg(0, "\n"); status = cancel_delayed_work(&pci->work); if (status == 0) @@ -293,7 +293,7 @@ static void edac_pci_workq_teardown(struct edac_pci_ctl_info *pci) void edac_pci_reset_delay_period(struct edac_pci_ctl_info *pci, unsigned long value) { - debugf0("%s()\n", __func__); + edac_dbg(0, "\n"); edac_pci_workq_teardown(pci); @@ -333,7 +333,7 @@ EXPORT_SYMBOL_GPL(edac_pci_alloc_index); */ int edac_pci_add_device(struct edac_pci_ctl_info *pci, int edac_idx) { - debugf0("%s()\n", __func__); + edac_dbg(0, "\n"); pci->pci_idx = edac_idx; pci->start_time = jiffies; @@ -358,11 +358,9 @@ int edac_pci_add_device(struct edac_pci_ctl_info *pci, int edac_idx) } edac_pci_printk(pci, KERN_INFO, - "Giving out device to module '%s' controller '%s':" - " DEV '%s' (%s)\n", - pci->mod_name, - pci->ctl_name, - edac_dev_name(pci), edac_op_state_to_string(pci->op_state)); + "Giving out device to module %s controller %s: DEV %s (%s)\n", + pci->mod_name, pci->ctl_name, pci->dev_name, + edac_op_state_to_string(pci->op_state)); mutex_unlock(&edac_pci_ctls_mutex); return 0; @@ -393,7 +391,7 @@ struct edac_pci_ctl_info *edac_pci_del_device(struct device *dev) { struct edac_pci_ctl_info *pci; - debugf0("%s()\n", __func__); + edac_dbg(0, "\n"); mutex_lock(&edac_pci_ctls_mutex); @@ -430,7 +428,7 @@ EXPORT_SYMBOL_GPL(edac_pci_del_device); */ static void edac_pci_generic_check(struct edac_pci_ctl_info *pci) { - debugf4("%s()\n", __func__); + edac_dbg(4, "\n"); edac_pci_do_parity_check(); } @@ -470,12 +468,13 @@ struct edac_pci_ctl_info *edac_pci_create_generic_ctl(struct device *dev, pci->mod_name = mod_name; pci->ctl_name = EDAC_PCI_GENCTL_NAME; - pci->edac_check = edac_pci_generic_check; + if (edac_op_state == EDAC_OPSTATE_POLL) + pci->edac_check = edac_pci_generic_check; pdata->edac_idx = edac_pci_idx++; if (edac_pci_add_device(pci, pdata->edac_idx) > 0) { - debugf3("%s(): failed edac_pci_add_device()\n", __func__); + edac_dbg(3, "failed edac_pci_add_device()\n"); edac_pci_free_ctl_info(pci); return NULL; } @@ -491,7 +490,7 @@ EXPORT_SYMBOL_GPL(edac_pci_create_generic_ctl); */ void edac_pci_release_generic_ctl(struct edac_pci_ctl_info *pci) { - debugf0("%s() pci mod=%s\n", __func__, pci->mod_name); + edac_dbg(0, "pci mod=%s\n", pci->mod_name); edac_pci_del_device(pci->dev); edac_pci_free_ctl_info(pci); diff --git a/drivers/edac/edac_pci_sysfs.c b/drivers/edac/edac_pci_sysfs.c index 97f5064e399..e8658e45176 100644 --- a/drivers/edac/edac_pci_sysfs.c +++ b/drivers/edac/edac_pci_sysfs.c @@ -78,7 +78,7 @@ static void edac_pci_instance_release(struct kobject *kobj) { struct edac_pci_ctl_info *pci; - debugf0("%s()\n", __func__); + edac_dbg(0, "\n"); /* Form pointer to containing struct, the pci control struct */ pci = to_instance(kobj); @@ -161,7 +161,7 @@ static int edac_pci_create_instance_kobj(struct edac_pci_ctl_info *pci, int idx) struct kobject *main_kobj; int err; - debugf0("%s()\n", __func__); + edac_dbg(0, "\n"); /* First bump the ref count on the top main kobj, which will * track the number of PCI instances we have, and thus nest @@ -177,14 +177,13 @@ static int edac_pci_create_instance_kobj(struct edac_pci_ctl_info *pci, int idx) err = kobject_init_and_add(&pci->kobj, &ktype_pci_instance, edac_pci_top_main_kobj, "pci%d", idx); if (err != 0) { - debugf2("%s() failed to register instance pci%d\n", - __func__, idx); + edac_dbg(2, "failed to register instance pci%d\n", idx); kobject_put(edac_pci_top_main_kobj); goto error_out; } kobject_uevent(&pci->kobj, KOBJ_ADD); - debugf1("%s() Register instance 'pci%d' kobject\n", __func__, idx); + edac_dbg(1, "Register instance 'pci%d' kobject\n", idx); return 0; @@ -201,7 +200,7 @@ error_out: static void edac_pci_unregister_sysfs_instance_kobj( struct edac_pci_ctl_info *pci) { - debugf0("%s()\n", __func__); + edac_dbg(0, "\n"); /* Unregister the instance kobject and allow its release * function release the main reference count and then @@ -257,7 +256,7 @@ static ssize_t edac_pci_dev_store(struct kobject *kobj, struct edac_pci_dev_attribute *edac_pci_dev; edac_pci_dev = (struct edac_pci_dev_attribute *)attr; - if (edac_pci_dev->show) + if (edac_pci_dev->store) return edac_pci_dev->store(edac_pci_dev->value, buffer, count); return -EIO; } @@ -317,7 +316,7 @@ static struct edac_pci_dev_attribute *edac_pci_attr[] = { */ static void edac_pci_release_main_kobj(struct kobject *kobj) { - debugf0("%s() here to module_put(THIS_MODULE)\n", __func__); + edac_dbg(0, "here to module_put(THIS_MODULE)\n"); kfree(kobj); @@ -345,7 +344,7 @@ static int edac_pci_main_kobj_setup(void) int err; struct bus_type *edac_subsys; - debugf0("%s()\n", __func__); + edac_dbg(0, "\n"); /* check and count if we have already created the main kobject */ if (atomic_inc_return(&edac_pci_sysfs_refcount) != 1) @@ -356,7 +355,7 @@ static int edac_pci_main_kobj_setup(void) */ edac_subsys = edac_get_sysfs_subsys(); if (edac_subsys == NULL) { - debugf1("%s() no edac_subsys\n", __func__); + edac_dbg(1, "no edac_subsys\n"); err = -ENODEV; goto decrement_count_fail; } @@ -366,14 +365,14 @@ static int edac_pci_main_kobj_setup(void) * level main kobj for EDAC PCI */ if (!try_module_get(THIS_MODULE)) { - debugf1("%s() try_module_get() failed\n", __func__); + edac_dbg(1, "try_module_get() failed\n"); err = -ENODEV; goto mod_get_fail; } edac_pci_top_main_kobj = kzalloc(sizeof(struct kobject), GFP_KERNEL); if (!edac_pci_top_main_kobj) { - debugf1("Failed to allocate\n"); + edac_dbg(1, "Failed to allocate\n"); err = -ENOMEM; goto kzalloc_fail; } @@ -383,7 +382,7 @@ static int edac_pci_main_kobj_setup(void) &ktype_edac_pci_main_kobj, &edac_subsys->dev_root->kobj, "pci"); if (err) { - debugf1("Failed to register '.../edac/pci'\n"); + edac_dbg(1, "Failed to register '.../edac/pci'\n"); goto kobject_init_and_add_fail; } @@ -392,7 +391,7 @@ static int edac_pci_main_kobj_setup(void) * must be used, for resources to be cleaned up properly */ kobject_uevent(edac_pci_top_main_kobj, KOBJ_ADD); - debugf1("Registered '.../edac/pci' kobject\n"); + edac_dbg(1, "Registered '.../edac/pci' kobject\n"); return 0; @@ -421,18 +420,17 @@ decrement_count_fail: */ static void edac_pci_main_kobj_teardown(void) { - debugf0("%s()\n", __func__); + edac_dbg(0, "\n"); /* Decrement the count and only if no more controller instances * are connected perform the unregisteration of the top level * main kobj */ if (atomic_dec_return(&edac_pci_sysfs_refcount) == 0) { - debugf0("%s() called kobject_put on main kobj\n", - __func__); + edac_dbg(0, "called kobject_put on main kobj\n"); kobject_put(edac_pci_top_main_kobj); + edac_put_sysfs_subsys(); } - edac_put_sysfs_subsys(); } /* @@ -446,7 +444,7 @@ int edac_pci_create_sysfs(struct edac_pci_ctl_info *pci) int err; struct kobject *edac_kobj = &pci->kobj; - debugf0("%s() idx=%d\n", __func__, pci->pci_idx); + edac_dbg(0, "idx=%d\n", pci->pci_idx); /* create the top main EDAC PCI kobject, IF needed */ err = edac_pci_main_kobj_setup(); @@ -460,8 +458,7 @@ int edac_pci_create_sysfs(struct edac_pci_ctl_info *pci) err = sysfs_create_link(edac_kobj, &pci->dev->kobj, EDAC_PCI_SYMLINK); if (err) { - debugf0("%s() sysfs_create_link() returned err= %d\n", - __func__, err); + edac_dbg(0, "sysfs_create_link() returned err= %d\n", err); goto symlink_fail; } @@ -484,7 +481,7 @@ unregister_cleanup: */ void edac_pci_remove_sysfs(struct edac_pci_ctl_info *pci) { - debugf0("%s() index=%d\n", __func__, pci->pci_idx); + edac_dbg(0, "index=%d\n", pci->pci_idx); /* Remove the symlink */ sysfs_remove_link(&pci->kobj, EDAC_PCI_SYMLINK); @@ -496,7 +493,7 @@ void edac_pci_remove_sysfs(struct edac_pci_ctl_info *pci) * if this 'pci' is the last instance. * If it is, the main kobject will be unregistered as a result */ - debugf0("%s() calling edac_pci_main_kobj_teardown()\n", __func__); + edac_dbg(0, "calling edac_pci_main_kobj_teardown()\n"); edac_pci_main_kobj_teardown(); } @@ -572,7 +569,7 @@ static void edac_pci_dev_parity_test(struct pci_dev *dev) local_irq_restore(flags); - debugf4("PCI STATUS= 0x%04x %s\n", status, dev_name(&dev->dev)); + edac_dbg(4, "PCI STATUS= 0x%04x %s\n", status, dev_name(&dev->dev)); /* check the status reg for errors on boards NOT marked as broken * if broken, we cannot trust any of the status bits @@ -603,13 +600,15 @@ static void edac_pci_dev_parity_test(struct pci_dev *dev) } - debugf4("PCI HEADER TYPE= 0x%02x %s\n", header_type, dev_name(&dev->dev)); + edac_dbg(4, "PCI HEADER TYPE= 0x%02x %s\n", + header_type, dev_name(&dev->dev)); if ((header_type & 0x7F) == PCI_HEADER_TYPE_BRIDGE) { /* On bridges, need to examine secondary status register */ status = get_pci_parity_status(dev, 1); - debugf4("PCI SEC_STATUS= 0x%04x %s\n", status, dev_name(&dev->dev)); + edac_dbg(4, "PCI SEC_STATUS= 0x%04x %s\n", + status, dev_name(&dev->dev)); /* check the secondary status reg for errors, * on NOT broken boards @@ -646,20 +645,16 @@ typedef void (*pci_parity_check_fn_t) (struct pci_dev *dev); /* * pci_dev parity list iterator - * Scan the PCI device list for one pass, looking for SERRORs - * Master Parity ERRORS or Parity ERRORs on primary or secondary devices + * + * Scan the PCI device list looking for SERRORs, Master Parity ERRORS or + * Parity ERRORs on primary or secondary devices. */ static inline void edac_pci_dev_parity_iterator(pci_parity_check_fn_t fn) { struct pci_dev *dev = NULL; - /* request for kernel access to the next PCI device, if any, - * and while we are looking at it have its reference count - * bumped until we are done with it - */ - while ((dev = pci_get_device(PCI_ANY_ID, PCI_ANY_ID, dev)) != NULL) { + for_each_pci_dev(dev) fn(dev); - } } /* @@ -671,7 +666,7 @@ void edac_pci_do_parity_check(void) { int before_count; - debugf3("%s()\n", __func__); + edac_dbg(3, "\n"); /* if policy has PCI check off, leave now */ if (!check_pci_errors) diff --git a/drivers/edac/edac_stub.c b/drivers/edac/edac_stub.c index 670c4481453..9d9e18aefaa 100644 --- a/drivers/edac/edac_stub.c +++ b/drivers/edac/edac_stub.c @@ -5,7 +5,7 @@ * * 2007 (c) MontaVista Software, Inc. * 2010 (c) Advanced Micro Devices Inc. - * Borislav Petkov <borislav.petkov@amd.com> + * Borislav Petkov <bp@alien8.de> * * This file is licensed under the terms of the GNU General Public * License version 2. This program is licensed "as is" without any @@ -15,6 +15,7 @@ #include <linux/module.h> #include <linux/edac.h> #include <linux/atomic.h> +#include <linux/device.h> #include <asm/edac.h> int edac_op_state = EDAC_OPSTATE_INVAL; @@ -28,6 +29,25 @@ EXPORT_SYMBOL_GPL(edac_err_assert); static atomic_t edac_subsys_valid = ATOMIC_INIT(0); +int edac_report_status = EDAC_REPORTING_ENABLED; +EXPORT_SYMBOL_GPL(edac_report_status); + +static int __init edac_report_setup(char *str) +{ + if (!str) + return -EINVAL; + + if (!strncmp(str, "on", 2)) + set_edac_report_status(EDAC_REPORTING_ENABLED); + else if (!strncmp(str, "off", 3)) + set_edac_report_status(EDAC_REPORTING_DISABLED); + else if (!strncmp(str, "force", 5)) + set_edac_report_status(EDAC_REPORTING_FORCE); + + return 0; +} +__setup("edac_report=", edac_report_setup); + /* * called to determine if there is an EDAC driver interested in * knowing an event (such as NMI) occurred diff --git a/drivers/edac/ghes_edac.c b/drivers/edac/ghes_edac.c new file mode 100644 index 00000000000..8399b4e16fe --- /dev/null +++ b/drivers/edac/ghes_edac.c @@ -0,0 +1,547 @@ +/* + * GHES/EDAC Linux driver + * + * This file may be distributed under the terms of the GNU General Public + * License version 2. + * + * Copyright (c) 2013 by Mauro Carvalho Chehab + * + * Red Hat Inc. http://www.redhat.com + */ + +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + +#include <acpi/ghes.h> +#include <linux/edac.h> +#include <linux/dmi.h> +#include "edac_core.h" +#include <ras/ras_event.h> + +#define GHES_EDAC_REVISION " Ver: 1.0.0" + +struct ghes_edac_pvt { + struct list_head list; + struct ghes *ghes; + struct mem_ctl_info *mci; + + /* Buffers for the error handling routine */ + char detail_location[240]; + char other_detail[160]; + char msg[80]; +}; + +static LIST_HEAD(ghes_reglist); +static DEFINE_MUTEX(ghes_edac_lock); +static int ghes_edac_mc_num; + + +/* Memory Device - Type 17 of SMBIOS spec */ +struct memdev_dmi_entry { + u8 type; + u8 length; + u16 handle; + u16 phys_mem_array_handle; + u16 mem_err_info_handle; + u16 total_width; + u16 data_width; + u16 size; + u8 form_factor; + u8 device_set; + u8 device_locator; + u8 bank_locator; + u8 memory_type; + u16 type_detail; + u16 speed; + u8 manufacturer; + u8 serial_number; + u8 asset_tag; + u8 part_number; + u8 attributes; + u32 extended_size; + u16 conf_mem_clk_speed; +} __attribute__((__packed__)); + +struct ghes_edac_dimm_fill { + struct mem_ctl_info *mci; + unsigned count; +}; + +char *memory_type[] = { + [MEM_EMPTY] = "EMPTY", + [MEM_RESERVED] = "RESERVED", + [MEM_UNKNOWN] = "UNKNOWN", + [MEM_FPM] = "FPM", + [MEM_EDO] = "EDO", + [MEM_BEDO] = "BEDO", + [MEM_SDR] = "SDR", + [MEM_RDR] = "RDR", + [MEM_DDR] = "DDR", + [MEM_RDDR] = "RDDR", + [MEM_RMBS] = "RMBS", + [MEM_DDR2] = "DDR2", + [MEM_FB_DDR2] = "FB_DDR2", + [MEM_RDDR2] = "RDDR2", + [MEM_XDR] = "XDR", + [MEM_DDR3] = "DDR3", + [MEM_RDDR3] = "RDDR3", +}; + +static void ghes_edac_count_dimms(const struct dmi_header *dh, void *arg) +{ + int *num_dimm = arg; + + if (dh->type == DMI_ENTRY_MEM_DEVICE) + (*num_dimm)++; +} + +static void ghes_edac_dmidecode(const struct dmi_header *dh, void *arg) +{ + struct ghes_edac_dimm_fill *dimm_fill = arg; + struct mem_ctl_info *mci = dimm_fill->mci; + + if (dh->type == DMI_ENTRY_MEM_DEVICE) { + struct memdev_dmi_entry *entry = (struct memdev_dmi_entry *)dh; + struct dimm_info *dimm = EDAC_DIMM_PTR(mci->layers, mci->dimms, + mci->n_layers, + dimm_fill->count, 0, 0); + + if (entry->size == 0xffff) { + pr_info("Can't get DIMM%i size\n", + dimm_fill->count); + dimm->nr_pages = MiB_TO_PAGES(32);/* Unknown */ + } else if (entry->size == 0x7fff) { + dimm->nr_pages = MiB_TO_PAGES(entry->extended_size); + } else { + if (entry->size & 1 << 15) + dimm->nr_pages = MiB_TO_PAGES((entry->size & + 0x7fff) << 10); + else + dimm->nr_pages = MiB_TO_PAGES(entry->size); + } + + switch (entry->memory_type) { + case 0x12: + if (entry->type_detail & 1 << 13) + dimm->mtype = MEM_RDDR; + else + dimm->mtype = MEM_DDR; + break; + case 0x13: + if (entry->type_detail & 1 << 13) + dimm->mtype = MEM_RDDR2; + else + dimm->mtype = MEM_DDR2; + break; + case 0x14: + dimm->mtype = MEM_FB_DDR2; + break; + case 0x18: + if (entry->type_detail & 1 << 13) + dimm->mtype = MEM_RDDR3; + else + dimm->mtype = MEM_DDR3; + break; + default: + if (entry->type_detail & 1 << 6) + dimm->mtype = MEM_RMBS; + else if ((entry->type_detail & ((1 << 7) | (1 << 13))) + == ((1 << 7) | (1 << 13))) + dimm->mtype = MEM_RDR; + else if (entry->type_detail & 1 << 7) + dimm->mtype = MEM_SDR; + else if (entry->type_detail & 1 << 9) + dimm->mtype = MEM_EDO; + else + dimm->mtype = MEM_UNKNOWN; + } + + /* + * Actually, we can only detect if the memory has bits for + * checksum or not + */ + if (entry->total_width == entry->data_width) + dimm->edac_mode = EDAC_NONE; + else + dimm->edac_mode = EDAC_SECDED; + + dimm->dtype = DEV_UNKNOWN; + dimm->grain = 128; /* Likely, worse case */ + + /* + * FIXME: It shouldn't be hard to also fill the DIMM labels + */ + + if (dimm->nr_pages) { + edac_dbg(1, "DIMM%i: %s size = %d MB%s\n", + dimm_fill->count, memory_type[dimm->mtype], + PAGES_TO_MiB(dimm->nr_pages), + (dimm->edac_mode != EDAC_NONE) ? "(ECC)" : ""); + edac_dbg(2, "\ttype %d, detail 0x%02x, width %d(total %d)\n", + entry->memory_type, entry->type_detail, + entry->total_width, entry->data_width); + } + + dimm_fill->count++; + } +} + +void ghes_edac_report_mem_error(struct ghes *ghes, int sev, + struct cper_sec_mem_err *mem_err) +{ + enum hw_event_mc_err_type type; + struct edac_raw_error_desc *e; + struct mem_ctl_info *mci; + struct ghes_edac_pvt *pvt = NULL; + char *p; + u8 grain_bits; + + list_for_each_entry(pvt, &ghes_reglist, list) { + if (ghes == pvt->ghes) + break; + } + if (!pvt) { + pr_err("Internal error: Can't find EDAC structure\n"); + return; + } + mci = pvt->mci; + e = &mci->error_desc; + + /* Cleans the error report buffer */ + memset(e, 0, sizeof (*e)); + e->error_count = 1; + strcpy(e->label, "unknown label"); + e->msg = pvt->msg; + e->other_detail = pvt->other_detail; + e->top_layer = -1; + e->mid_layer = -1; + e->low_layer = -1; + *pvt->other_detail = '\0'; + *pvt->msg = '\0'; + + switch (sev) { + case GHES_SEV_CORRECTED: + type = HW_EVENT_ERR_CORRECTED; + break; + case GHES_SEV_RECOVERABLE: + type = HW_EVENT_ERR_UNCORRECTED; + break; + case GHES_SEV_PANIC: + type = HW_EVENT_ERR_FATAL; + break; + default: + case GHES_SEV_NO: + type = HW_EVENT_ERR_INFO; + } + + edac_dbg(1, "error validation_bits: 0x%08llx\n", + (long long)mem_err->validation_bits); + + /* Error type, mapped on e->msg */ + if (mem_err->validation_bits & CPER_MEM_VALID_ERROR_TYPE) { + p = pvt->msg; + switch (mem_err->error_type) { + case 0: + p += sprintf(p, "Unknown"); + break; + case 1: + p += sprintf(p, "No error"); + break; + case 2: + p += sprintf(p, "Single-bit ECC"); + break; + case 3: + p += sprintf(p, "Multi-bit ECC"); + break; + case 4: + p += sprintf(p, "Single-symbol ChipKill ECC"); + break; + case 5: + p += sprintf(p, "Multi-symbol ChipKill ECC"); + break; + case 6: + p += sprintf(p, "Master abort"); + break; + case 7: + p += sprintf(p, "Target abort"); + break; + case 8: + p += sprintf(p, "Parity Error"); + break; + case 9: + p += sprintf(p, "Watchdog timeout"); + break; + case 10: + p += sprintf(p, "Invalid address"); + break; + case 11: + p += sprintf(p, "Mirror Broken"); + break; + case 12: + p += sprintf(p, "Memory Sparing"); + break; + case 13: + p += sprintf(p, "Scrub corrected error"); + break; + case 14: + p += sprintf(p, "Scrub uncorrected error"); + break; + case 15: + p += sprintf(p, "Physical Memory Map-out event"); + break; + default: + p += sprintf(p, "reserved error (%d)", + mem_err->error_type); + } + } else { + strcpy(pvt->msg, "unknown error"); + } + + /* Error address */ + if (mem_err->validation_bits & CPER_MEM_VALID_PA) { + e->page_frame_number = mem_err->physical_addr >> PAGE_SHIFT; + e->offset_in_page = mem_err->physical_addr & ~PAGE_MASK; + } + + /* Error grain */ + if (mem_err->validation_bits & CPER_MEM_VALID_PA_MASK) + e->grain = ~(mem_err->physical_addr_mask & ~PAGE_MASK); + + /* Memory error location, mapped on e->location */ + p = e->location; + if (mem_err->validation_bits & CPER_MEM_VALID_NODE) + p += sprintf(p, "node:%d ", mem_err->node); + if (mem_err->validation_bits & CPER_MEM_VALID_CARD) + p += sprintf(p, "card:%d ", mem_err->card); + if (mem_err->validation_bits & CPER_MEM_VALID_MODULE) + p += sprintf(p, "module:%d ", mem_err->module); + if (mem_err->validation_bits & CPER_MEM_VALID_RANK_NUMBER) + p += sprintf(p, "rank:%d ", mem_err->rank); + if (mem_err->validation_bits & CPER_MEM_VALID_BANK) + p += sprintf(p, "bank:%d ", mem_err->bank); + if (mem_err->validation_bits & CPER_MEM_VALID_ROW) + p += sprintf(p, "row:%d ", mem_err->row); + if (mem_err->validation_bits & CPER_MEM_VALID_COLUMN) + p += sprintf(p, "col:%d ", mem_err->column); + if (mem_err->validation_bits & CPER_MEM_VALID_BIT_POSITION) + p += sprintf(p, "bit_pos:%d ", mem_err->bit_pos); + if (mem_err->validation_bits & CPER_MEM_VALID_MODULE_HANDLE) { + const char *bank = NULL, *device = NULL; + dmi_memdev_name(mem_err->mem_dev_handle, &bank, &device); + if (bank != NULL && device != NULL) + p += sprintf(p, "DIMM location:%s %s ", bank, device); + else + p += sprintf(p, "DIMM DMI handle: 0x%.4x ", + mem_err->mem_dev_handle); + } + if (p > e->location) + *(p - 1) = '\0'; + + /* All other fields are mapped on e->other_detail */ + p = pvt->other_detail; + if (mem_err->validation_bits & CPER_MEM_VALID_ERROR_STATUS) { + u64 status = mem_err->error_status; + + p += sprintf(p, "status(0x%016llx): ", (long long)status); + switch ((status >> 8) & 0xff) { + case 1: + p += sprintf(p, "Error detected internal to the component "); + break; + case 16: + p += sprintf(p, "Error detected in the bus "); + break; + case 4: + p += sprintf(p, "Storage error in DRAM memory "); + break; + case 5: + p += sprintf(p, "Storage error in TLB "); + break; + case 6: + p += sprintf(p, "Storage error in cache "); + break; + case 7: + p += sprintf(p, "Error in one or more functional units "); + break; + case 8: + p += sprintf(p, "component failed self test "); + break; + case 9: + p += sprintf(p, "Overflow or undervalue of internal queue "); + break; + case 17: + p += sprintf(p, "Virtual address not found on IO-TLB or IO-PDIR "); + break; + case 18: + p += sprintf(p, "Improper access error "); + break; + case 19: + p += sprintf(p, "Access to a memory address which is not mapped to any component "); + break; + case 20: + p += sprintf(p, "Loss of Lockstep "); + break; + case 21: + p += sprintf(p, "Response not associated with a request "); + break; + case 22: + p += sprintf(p, "Bus parity error - must also set the A, C, or D Bits "); + break; + case 23: + p += sprintf(p, "Detection of a PATH_ERROR "); + break; + case 25: + p += sprintf(p, "Bus operation timeout "); + break; + case 26: + p += sprintf(p, "A read was issued to data that has been poisoned "); + break; + default: + p += sprintf(p, "reserved "); + break; + } + } + if (mem_err->validation_bits & CPER_MEM_VALID_REQUESTOR_ID) + p += sprintf(p, "requestorID: 0x%016llx ", + (long long)mem_err->requestor_id); + if (mem_err->validation_bits & CPER_MEM_VALID_RESPONDER_ID) + p += sprintf(p, "responderID: 0x%016llx ", + (long long)mem_err->responder_id); + if (mem_err->validation_bits & CPER_MEM_VALID_TARGET_ID) + p += sprintf(p, "targetID: 0x%016llx ", + (long long)mem_err->responder_id); + if (p > pvt->other_detail) + *(p - 1) = '\0'; + + /* Generate the trace event */ + grain_bits = fls_long(e->grain); + sprintf(pvt->detail_location, "APEI location: %s %s", + e->location, e->other_detail); + trace_mc_event(type, e->msg, e->label, e->error_count, + mci->mc_idx, e->top_layer, e->mid_layer, e->low_layer, + PAGES_TO_MiB(e->page_frame_number) | e->offset_in_page, + grain_bits, e->syndrome, pvt->detail_location); + + /* Report the error via EDAC API */ + edac_raw_mc_handle_error(type, mci, e); +} +EXPORT_SYMBOL_GPL(ghes_edac_report_mem_error); + +int ghes_edac_register(struct ghes *ghes, struct device *dev) +{ + bool fake = false; + int rc, num_dimm = 0; + struct mem_ctl_info *mci; + struct edac_mc_layer layers[1]; + struct ghes_edac_pvt *pvt; + struct ghes_edac_dimm_fill dimm_fill; + + /* Get the number of DIMMs */ + dmi_walk(ghes_edac_count_dimms, &num_dimm); + + /* Check if we've got a bogus BIOS */ + if (num_dimm == 0) { + fake = true; + num_dimm = 1; + } + + layers[0].type = EDAC_MC_LAYER_ALL_MEM; + layers[0].size = num_dimm; + layers[0].is_virt_csrow = true; + + /* + * We need to serialize edac_mc_alloc() and edac_mc_add_mc(), + * to avoid duplicated memory controller numbers + */ + mutex_lock(&ghes_edac_lock); + mci = edac_mc_alloc(ghes_edac_mc_num, ARRAY_SIZE(layers), layers, + sizeof(*pvt)); + if (!mci) { + pr_info("Can't allocate memory for EDAC data\n"); + mutex_unlock(&ghes_edac_lock); + return -ENOMEM; + } + + pvt = mci->pvt_info; + memset(pvt, 0, sizeof(*pvt)); + list_add_tail(&pvt->list, &ghes_reglist); + pvt->ghes = ghes; + pvt->mci = mci; + mci->pdev = dev; + + mci->mtype_cap = MEM_FLAG_EMPTY; + mci->edac_ctl_cap = EDAC_FLAG_NONE; + mci->edac_cap = EDAC_FLAG_NONE; + mci->mod_name = "ghes_edac.c"; + mci->mod_ver = GHES_EDAC_REVISION; + mci->ctl_name = "ghes_edac"; + mci->dev_name = "ghes"; + + if (!ghes_edac_mc_num) { + if (!fake) { + pr_info("This EDAC driver relies on BIOS to enumerate memory and get error reports.\n"); + pr_info("Unfortunately, not all BIOSes reflect the memory layout correctly.\n"); + pr_info("So, the end result of using this driver varies from vendor to vendor.\n"); + pr_info("If you find incorrect reports, please contact your hardware vendor\n"); + pr_info("to correct its BIOS.\n"); + pr_info("This system has %d DIMM sockets.\n", + num_dimm); + } else { + pr_info("This system has a very crappy BIOS: It doesn't even list the DIMMS.\n"); + pr_info("Its SMBIOS info is wrong. It is doubtful that the error report would\n"); + pr_info("work on such system. Use this driver with caution\n"); + } + } + + if (!fake) { + /* + * Fill DIMM info from DMI for the memory controller #0 + * + * Keep it in blank for the other memory controllers, as + * there's no reliable way to properly credit each DIMM to + * the memory controller, as different BIOSes fill the + * DMI bank location fields on different ways + */ + if (!ghes_edac_mc_num) { + dimm_fill.count = 0; + dimm_fill.mci = mci; + dmi_walk(ghes_edac_dmidecode, &dimm_fill); + } + } else { + struct dimm_info *dimm = EDAC_DIMM_PTR(mci->layers, mci->dimms, + mci->n_layers, 0, 0, 0); + + dimm->nr_pages = 1; + dimm->grain = 128; + dimm->mtype = MEM_UNKNOWN; + dimm->dtype = DEV_UNKNOWN; + dimm->edac_mode = EDAC_SECDED; + } + + rc = edac_mc_add_mc(mci); + if (rc < 0) { + pr_info("Can't register at EDAC core\n"); + edac_mc_free(mci); + mutex_unlock(&ghes_edac_lock); + return -ENODEV; + } + + ghes_edac_mc_num++; + mutex_unlock(&ghes_edac_lock); + return 0; +} +EXPORT_SYMBOL_GPL(ghes_edac_register); + +void ghes_edac_unregister(struct ghes *ghes) +{ + struct mem_ctl_info *mci; + struct ghes_edac_pvt *pvt, *tmp; + + list_for_each_entry_safe(pvt, tmp, &ghes_reglist, list) { + if (ghes == pvt->ghes) { + mci = pvt->mci; + edac_mc_del_mc(mci->pdev); + edac_mc_free(mci); + list_del(&pvt->list); + } + } +} +EXPORT_SYMBOL_GPL(ghes_edac_unregister); diff --git a/drivers/edac/highbank_l2_edac.c b/drivers/edac/highbank_l2_edac.c new file mode 100644 index 00000000000..2f193668ebc --- /dev/null +++ b/drivers/edac/highbank_l2_edac.c @@ -0,0 +1,154 @@ +/* + * Copyright 2011-2012 Calxeda, Inc. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program. If not, see <http://www.gnu.org/licenses/>. + */ +#include <linux/types.h> +#include <linux/kernel.h> +#include <linux/ctype.h> +#include <linux/edac.h> +#include <linux/interrupt.h> +#include <linux/platform_device.h> +#include <linux/of_platform.h> + +#include "edac_core.h" +#include "edac_module.h" + +#define SR_CLR_SB_ECC_INTR 0x0 +#define SR_CLR_DB_ECC_INTR 0x4 + +struct hb_l2_drvdata { + void __iomem *base; + int sb_irq; + int db_irq; +}; + +static irqreturn_t highbank_l2_err_handler(int irq, void *dev_id) +{ + struct edac_device_ctl_info *dci = dev_id; + struct hb_l2_drvdata *drvdata = dci->pvt_info; + + if (irq == drvdata->sb_irq) { + writel(1, drvdata->base + SR_CLR_SB_ECC_INTR); + edac_device_handle_ce(dci, 0, 0, dci->ctl_name); + } + if (irq == drvdata->db_irq) { + writel(1, drvdata->base + SR_CLR_DB_ECC_INTR); + edac_device_handle_ue(dci, 0, 0, dci->ctl_name); + } + + return IRQ_HANDLED; +} + +static const struct of_device_id hb_l2_err_of_match[] = { + { .compatible = "calxeda,hb-sregs-l2-ecc", }, + {}, +}; +MODULE_DEVICE_TABLE(of, hb_l2_err_of_match); + +static int highbank_l2_err_probe(struct platform_device *pdev) +{ + const struct of_device_id *id; + struct edac_device_ctl_info *dci; + struct hb_l2_drvdata *drvdata; + struct resource *r; + int res = 0; + + dci = edac_device_alloc_ctl_info(sizeof(*drvdata), "cpu", + 1, "L", 1, 2, NULL, 0, 0); + if (!dci) + return -ENOMEM; + + drvdata = dci->pvt_info; + dci->dev = &pdev->dev; + platform_set_drvdata(pdev, dci); + + if (!devres_open_group(&pdev->dev, NULL, GFP_KERNEL)) + return -ENOMEM; + + r = platform_get_resource(pdev, IORESOURCE_MEM, 0); + if (!r) { + dev_err(&pdev->dev, "Unable to get mem resource\n"); + res = -ENODEV; + goto err; + } + + if (!devm_request_mem_region(&pdev->dev, r->start, + resource_size(r), dev_name(&pdev->dev))) { + dev_err(&pdev->dev, "Error while requesting mem region\n"); + res = -EBUSY; + goto err; + } + + drvdata->base = devm_ioremap(&pdev->dev, r->start, resource_size(r)); + if (!drvdata->base) { + dev_err(&pdev->dev, "Unable to map regs\n"); + res = -ENOMEM; + goto err; + } + + id = of_match_device(hb_l2_err_of_match, &pdev->dev); + dci->mod_name = pdev->dev.driver->name; + dci->ctl_name = id ? id->compatible : "unknown"; + dci->dev_name = dev_name(&pdev->dev); + + if (edac_device_add_device(dci)) + goto err; + + drvdata->db_irq = platform_get_irq(pdev, 0); + res = devm_request_irq(&pdev->dev, drvdata->db_irq, + highbank_l2_err_handler, + 0, dev_name(&pdev->dev), dci); + if (res < 0) + goto err2; + + drvdata->sb_irq = platform_get_irq(pdev, 1); + res = devm_request_irq(&pdev->dev, drvdata->sb_irq, + highbank_l2_err_handler, + 0, dev_name(&pdev->dev), dci); + if (res < 0) + goto err2; + + devres_close_group(&pdev->dev, NULL); + return 0; +err2: + edac_device_del_device(&pdev->dev); +err: + devres_release_group(&pdev->dev, NULL); + edac_device_free_ctl_info(dci); + return res; +} + +static int highbank_l2_err_remove(struct platform_device *pdev) +{ + struct edac_device_ctl_info *dci = platform_get_drvdata(pdev); + + edac_device_del_device(&pdev->dev); + edac_device_free_ctl_info(dci); + return 0; +} + +static struct platform_driver highbank_l2_edac_driver = { + .probe = highbank_l2_err_probe, + .remove = highbank_l2_err_remove, + .driver = { + .name = "hb_l2_edac", + .of_match_table = hb_l2_err_of_match, + }, +}; + +module_platform_driver(highbank_l2_edac_driver); + +MODULE_LICENSE("GPL v2"); +MODULE_AUTHOR("Calxeda, Inc."); +MODULE_DESCRIPTION("EDAC Driver for Calxeda Highbank L2 Cache"); diff --git a/drivers/edac/highbank_mc_edac.c b/drivers/edac/highbank_mc_edac.c new file mode 100644 index 00000000000..f784de1dc79 --- /dev/null +++ b/drivers/edac/highbank_mc_edac.c @@ -0,0 +1,281 @@ +/* + * Copyright 2011-2012 Calxeda, Inc. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program. If not, see <http://www.gnu.org/licenses/>. + */ +#include <linux/types.h> +#include <linux/kernel.h> +#include <linux/ctype.h> +#include <linux/edac.h> +#include <linux/interrupt.h> +#include <linux/platform_device.h> +#include <linux/of_platform.h> +#include <linux/uaccess.h> + +#include "edac_core.h" +#include "edac_module.h" + +/* DDR Ctrlr Error Registers */ + +#define HB_DDR_ECC_ERR_BASE 0x128 +#define MW_DDR_ECC_ERR_BASE 0x1b4 + +#define HB_DDR_ECC_OPT 0x00 +#define HB_DDR_ECC_U_ERR_ADDR 0x08 +#define HB_DDR_ECC_U_ERR_STAT 0x0c +#define HB_DDR_ECC_U_ERR_DATAL 0x10 +#define HB_DDR_ECC_U_ERR_DATAH 0x14 +#define HB_DDR_ECC_C_ERR_ADDR 0x18 +#define HB_DDR_ECC_C_ERR_STAT 0x1c +#define HB_DDR_ECC_C_ERR_DATAL 0x20 +#define HB_DDR_ECC_C_ERR_DATAH 0x24 + +#define HB_DDR_ECC_OPT_MODE_MASK 0x3 +#define HB_DDR_ECC_OPT_FWC 0x100 +#define HB_DDR_ECC_OPT_XOR_SHIFT 16 + +/* DDR Ctrlr Interrupt Registers */ + +#define HB_DDR_ECC_INT_BASE 0x180 +#define MW_DDR_ECC_INT_BASE 0x218 + +#define HB_DDR_ECC_INT_STATUS 0x00 +#define HB_DDR_ECC_INT_ACK 0x04 + +#define HB_DDR_ECC_INT_STAT_CE 0x8 +#define HB_DDR_ECC_INT_STAT_DOUBLE_CE 0x10 +#define HB_DDR_ECC_INT_STAT_UE 0x20 +#define HB_DDR_ECC_INT_STAT_DOUBLE_UE 0x40 + +struct hb_mc_drvdata { + void __iomem *mc_err_base; + void __iomem *mc_int_base; +}; + +static irqreturn_t highbank_mc_err_handler(int irq, void *dev_id) +{ + struct mem_ctl_info *mci = dev_id; + struct hb_mc_drvdata *drvdata = mci->pvt_info; + u32 status, err_addr; + + /* Read the interrupt status register */ + status = readl(drvdata->mc_int_base + HB_DDR_ECC_INT_STATUS); + + if (status & HB_DDR_ECC_INT_STAT_UE) { + err_addr = readl(drvdata->mc_err_base + HB_DDR_ECC_U_ERR_ADDR); + edac_mc_handle_error(HW_EVENT_ERR_UNCORRECTED, mci, 1, + err_addr >> PAGE_SHIFT, + err_addr & ~PAGE_MASK, 0, + 0, 0, -1, + mci->ctl_name, ""); + } + if (status & HB_DDR_ECC_INT_STAT_CE) { + u32 syndrome = readl(drvdata->mc_err_base + HB_DDR_ECC_C_ERR_STAT); + syndrome = (syndrome >> 8) & 0xff; + err_addr = readl(drvdata->mc_err_base + HB_DDR_ECC_C_ERR_ADDR); + edac_mc_handle_error(HW_EVENT_ERR_CORRECTED, mci, 1, + err_addr >> PAGE_SHIFT, + err_addr & ~PAGE_MASK, syndrome, + 0, 0, -1, + mci->ctl_name, ""); + } + + /* clear the error, clears the interrupt */ + writel(status, drvdata->mc_int_base + HB_DDR_ECC_INT_ACK); + return IRQ_HANDLED; +} + +static void highbank_mc_err_inject(struct mem_ctl_info *mci, u8 synd) +{ + struct hb_mc_drvdata *pdata = mci->pvt_info; + u32 reg; + + reg = readl(pdata->mc_err_base + HB_DDR_ECC_OPT); + reg &= HB_DDR_ECC_OPT_MODE_MASK; + reg |= (synd << HB_DDR_ECC_OPT_XOR_SHIFT) | HB_DDR_ECC_OPT_FWC; + writel(reg, pdata->mc_err_base + HB_DDR_ECC_OPT); +} + +#define to_mci(k) container_of(k, struct mem_ctl_info, dev) + +static ssize_t highbank_mc_inject_ctrl(struct device *dev, + struct device_attribute *attr, const char *buf, size_t count) +{ + struct mem_ctl_info *mci = to_mci(dev); + u8 synd; + + if (kstrtou8(buf, 16, &synd)) + return -EINVAL; + + highbank_mc_err_inject(mci, synd); + + return count; +} + +static DEVICE_ATTR(inject_ctrl, S_IWUSR, NULL, highbank_mc_inject_ctrl); + +struct hb_mc_settings { + int err_offset; + int int_offset; +}; + +static struct hb_mc_settings hb_settings = { + .err_offset = HB_DDR_ECC_ERR_BASE, + .int_offset = HB_DDR_ECC_INT_BASE, +}; + +static struct hb_mc_settings mw_settings = { + .err_offset = MW_DDR_ECC_ERR_BASE, + .int_offset = MW_DDR_ECC_INT_BASE, +}; + +static struct of_device_id hb_ddr_ctrl_of_match[] = { + { .compatible = "calxeda,hb-ddr-ctrl", .data = &hb_settings }, + { .compatible = "calxeda,ecx-2000-ddr-ctrl", .data = &mw_settings }, + {}, +}; +MODULE_DEVICE_TABLE(of, hb_ddr_ctrl_of_match); + +static int highbank_mc_probe(struct platform_device *pdev) +{ + const struct of_device_id *id; + const struct hb_mc_settings *settings; + struct edac_mc_layer layers[2]; + struct mem_ctl_info *mci; + struct hb_mc_drvdata *drvdata; + struct dimm_info *dimm; + struct resource *r; + void __iomem *base; + u32 control; + int irq; + int res = 0; + + id = of_match_device(hb_ddr_ctrl_of_match, &pdev->dev); + if (!id) + return -ENODEV; + + layers[0].type = EDAC_MC_LAYER_CHIP_SELECT; + layers[0].size = 1; + layers[0].is_virt_csrow = true; + layers[1].type = EDAC_MC_LAYER_CHANNEL; + layers[1].size = 1; + layers[1].is_virt_csrow = false; + mci = edac_mc_alloc(0, ARRAY_SIZE(layers), layers, + sizeof(struct hb_mc_drvdata)); + if (!mci) + return -ENOMEM; + + mci->pdev = &pdev->dev; + drvdata = mci->pvt_info; + platform_set_drvdata(pdev, mci); + + if (!devres_open_group(&pdev->dev, NULL, GFP_KERNEL)) + return -ENOMEM; + + r = platform_get_resource(pdev, IORESOURCE_MEM, 0); + if (!r) { + dev_err(&pdev->dev, "Unable to get mem resource\n"); + res = -ENODEV; + goto err; + } + + if (!devm_request_mem_region(&pdev->dev, r->start, + resource_size(r), dev_name(&pdev->dev))) { + dev_err(&pdev->dev, "Error while requesting mem region\n"); + res = -EBUSY; + goto err; + } + + base = devm_ioremap(&pdev->dev, r->start, resource_size(r)); + if (!base) { + dev_err(&pdev->dev, "Unable to map regs\n"); + res = -ENOMEM; + goto err; + } + + settings = id->data; + drvdata->mc_err_base = base + settings->err_offset; + drvdata->mc_int_base = base + settings->int_offset; + + control = readl(drvdata->mc_err_base + HB_DDR_ECC_OPT) & 0x3; + if (!control || (control == 0x2)) { + dev_err(&pdev->dev, "No ECC present, or ECC disabled\n"); + res = -ENODEV; + goto err; + } + + mci->mtype_cap = MEM_FLAG_DDR3; + mci->edac_ctl_cap = EDAC_FLAG_NONE | EDAC_FLAG_SECDED; + mci->edac_cap = EDAC_FLAG_SECDED; + mci->mod_name = pdev->dev.driver->name; + mci->mod_ver = "1"; + mci->ctl_name = id->compatible; + mci->dev_name = dev_name(&pdev->dev); + mci->scrub_mode = SCRUB_SW_SRC; + + /* Only a single 4GB DIMM is supported */ + dimm = *mci->dimms; + dimm->nr_pages = (~0UL >> PAGE_SHIFT) + 1; + dimm->grain = 8; + dimm->dtype = DEV_X8; + dimm->mtype = MEM_DDR3; + dimm->edac_mode = EDAC_SECDED; + + res = edac_mc_add_mc(mci); + if (res < 0) + goto err; + + irq = platform_get_irq(pdev, 0); + res = devm_request_irq(&pdev->dev, irq, highbank_mc_err_handler, + 0, dev_name(&pdev->dev), mci); + if (res < 0) { + dev_err(&pdev->dev, "Unable to request irq %d\n", irq); + goto err2; + } + + device_create_file(&mci->dev, &dev_attr_inject_ctrl); + + devres_close_group(&pdev->dev, NULL); + return 0; +err2: + edac_mc_del_mc(&pdev->dev); +err: + devres_release_group(&pdev->dev, NULL); + edac_mc_free(mci); + return res; +} + +static int highbank_mc_remove(struct platform_device *pdev) +{ + struct mem_ctl_info *mci = platform_get_drvdata(pdev); + + device_remove_file(&mci->dev, &dev_attr_inject_ctrl); + edac_mc_del_mc(&pdev->dev); + edac_mc_free(mci); + return 0; +} + +static struct platform_driver highbank_mc_edac_driver = { + .probe = highbank_mc_probe, + .remove = highbank_mc_remove, + .driver = { + .name = "hb_mc_edac", + .of_match_table = hb_ddr_ctrl_of_match, + }, +}; + +module_platform_driver(highbank_mc_edac_driver); + +MODULE_LICENSE("GPL v2"); +MODULE_AUTHOR("Calxeda, Inc."); +MODULE_DESCRIPTION("EDAC Driver for Calxeda Highbank"); diff --git a/drivers/edac/i3000_edac.c b/drivers/edac/i3000_edac.c index c0510b3d703..cd28b968e5c 100644 --- a/drivers/edac/i3000_edac.c +++ b/drivers/edac/i3000_edac.c @@ -194,7 +194,7 @@ static void i3000_get_error_info(struct mem_ctl_info *mci, { struct pci_dev *pdev; - pdev = to_pci_dev(mci->dev); + pdev = to_pci_dev(mci->pdev); /* * This is a mess because there is no atomic way to read all the @@ -236,7 +236,7 @@ static int i3000_process_error_info(struct mem_ctl_info *mci, int row, multi_chan, channel; unsigned long pfn, offset; - multi_chan = mci->csrows[0].nr_channels - 1; + multi_chan = mci->csrows[0]->nr_channels - 1; if (!(info->errsts & I3000_ERRSTS_BITS)) return 0; @@ -245,7 +245,9 @@ static int i3000_process_error_info(struct mem_ctl_info *mci, return 1; if ((info->errsts ^ info->errsts2) & I3000_ERRSTS_BITS) { - edac_mc_handle_ce_no_info(mci, "UE overwrote CE"); + edac_mc_handle_error(HW_EVENT_ERR_UNCORRECTED, mci, 1, 0, 0, 0, + -1, -1, -1, + "UE overwrote CE", ""); info->errsts = info->errsts2; } @@ -256,10 +258,15 @@ static int i3000_process_error_info(struct mem_ctl_info *mci, row = edac_mc_find_csrow_by_page(mci, pfn); if (info->errsts & I3000_ERRSTS_UE) - edac_mc_handle_ue(mci, pfn, offset, row, "i3000 UE"); + edac_mc_handle_error(HW_EVENT_ERR_UNCORRECTED, mci, 1, + pfn, offset, 0, + row, -1, -1, + "i3000 UE", ""); else - edac_mc_handle_ce(mci, pfn, offset, info->derrsyn, row, - multi_chan ? channel : 0, "i3000 CE"); + edac_mc_handle_error(HW_EVENT_ERR_CORRECTED, mci, 1, + pfn, offset, info->derrsyn, + row, multi_chan ? channel : 0, -1, + "i3000 CE", ""); return 1; } @@ -268,7 +275,7 @@ static void i3000_check(struct mem_ctl_info *mci) { struct i3000_error_info info; - debugf1("MC%d: %s()\n", mci->mc_idx, __func__); + edac_dbg(1, "MC%d\n", mci->mc_idx); i3000_get_error_info(mci, &info); i3000_process_error_info(mci, &info, 1); } @@ -304,9 +311,10 @@ static int i3000_is_interleaved(const unsigned char *c0dra, static int i3000_probe1(struct pci_dev *pdev, int dev_idx) { int rc; - int i; + int i, j; struct mem_ctl_info *mci = NULL; - unsigned long last_cumul_size; + struct edac_mc_layer layers[2]; + unsigned long last_cumul_size, nr_pages; int interleaved, nr_channels; unsigned char dra[I3000_RANKS / 2], drb[I3000_RANKS]; unsigned char *c0dra = dra, *c1dra = &dra[I3000_RANKS_PER_CHANNEL / 2]; @@ -314,7 +322,7 @@ static int i3000_probe1(struct pci_dev *pdev, int dev_idx) unsigned long mchbar; void __iomem *window; - debugf0("MC: %s()\n", __func__); + edac_dbg(0, "MC:\n"); pci_read_config_dword(pdev, I3000_MCHBAR, (u32 *) & mchbar); mchbar &= I3000_MCHBAR_MASK; @@ -347,13 +355,20 @@ static int i3000_probe1(struct pci_dev *pdev, int dev_idx) */ interleaved = i3000_is_interleaved(c0dra, c1dra, c0drb, c1drb); nr_channels = interleaved ? 2 : 1; - mci = edac_mc_alloc(0, I3000_RANKS / nr_channels, nr_channels, 0); + + layers[0].type = EDAC_MC_LAYER_CHIP_SELECT; + layers[0].size = I3000_RANKS / nr_channels; + layers[0].is_virt_csrow = true; + layers[1].type = EDAC_MC_LAYER_CHANNEL; + layers[1].size = nr_channels; + layers[1].is_virt_csrow = false; + mci = edac_mc_alloc(0, ARRAY_SIZE(layers), layers, 0); if (!mci) return -ENOMEM; - debugf3("MC: %s(): init mci\n", __func__); + edac_dbg(3, "MC: init mci\n"); - mci->dev = &pdev->dev; + mci->pdev = &pdev->dev; mci->mtype_cap = MEM_FLAG_DDR2; mci->edac_ctl_cap = EDAC_FLAG_SECDED; @@ -378,27 +393,30 @@ static int i3000_probe1(struct pci_dev *pdev, int dev_idx) for (last_cumul_size = i = 0; i < mci->nr_csrows; i++) { u8 value; u32 cumul_size; - struct csrow_info *csrow = &mci->csrows[i]; + struct csrow_info *csrow = mci->csrows[i]; value = drb[i]; cumul_size = value << (I3000_DRB_SHIFT - PAGE_SHIFT); if (interleaved) cumul_size <<= 1; - debugf3("MC: %s(): (%d) cumul_size 0x%x\n", - __func__, i, cumul_size); - if (cumul_size == last_cumul_size) { - csrow->mtype = MEM_EMPTY; + edac_dbg(3, "MC: (%d) cumul_size 0x%x\n", i, cumul_size); + if (cumul_size == last_cumul_size) continue; - } csrow->first_page = last_cumul_size; csrow->last_page = cumul_size - 1; - csrow->nr_pages = cumul_size - last_cumul_size; + nr_pages = cumul_size - last_cumul_size; last_cumul_size = cumul_size; - csrow->grain = I3000_DEAP_GRAIN; - csrow->mtype = MEM_DDR2; - csrow->dtype = DEV_UNKNOWN; - csrow->edac_mode = EDAC_UNKNOWN; + + for (j = 0; j < nr_channels; j++) { + struct dimm_info *dimm = csrow->channels[j]->dimm; + + dimm->nr_pages = nr_pages / nr_channels; + dimm->grain = I3000_DEAP_GRAIN; + dimm->mtype = MEM_DDR2; + dimm->dtype = DEV_UNKNOWN; + dimm->edac_mode = EDAC_UNKNOWN; + } } /* @@ -410,7 +428,7 @@ static int i3000_probe1(struct pci_dev *pdev, int dev_idx) rc = -ENODEV; if (edac_mc_add_mc(mci)) { - debugf3("MC: %s(): failed edac_mc_add_mc()\n", __func__); + edac_dbg(3, "MC: failed edac_mc_add_mc()\n"); goto fail; } @@ -426,7 +444,7 @@ static int i3000_probe1(struct pci_dev *pdev, int dev_idx) } /* get this far and it's successful */ - debugf3("MC: %s(): success\n", __func__); + edac_dbg(3, "MC: success\n"); return 0; fail: @@ -437,12 +455,11 @@ fail: } /* returns count (>= 0), or negative on error */ -static int __devinit i3000_init_one(struct pci_dev *pdev, - const struct pci_device_id *ent) +static int i3000_init_one(struct pci_dev *pdev, const struct pci_device_id *ent) { int rc; - debugf0("MC: %s()\n", __func__); + edac_dbg(0, "MC:\n"); if (pci_enable_device(pdev) < 0) return -EIO; @@ -454,11 +471,11 @@ static int __devinit i3000_init_one(struct pci_dev *pdev, return rc; } -static void __devexit i3000_remove_one(struct pci_dev *pdev) +static void i3000_remove_one(struct pci_dev *pdev) { struct mem_ctl_info *mci; - debugf0("%s()\n", __func__); + edac_dbg(0, "\n"); if (i3000_pci) edac_pci_release_generic_ctl(i3000_pci); @@ -470,7 +487,7 @@ static void __devexit i3000_remove_one(struct pci_dev *pdev) edac_mc_free(mci); } -static const struct pci_device_id i3000_pci_tbl[] __devinitdata = { +static const struct pci_device_id i3000_pci_tbl[] = { { PCI_VEND_DEV(INTEL, 3000_HB), PCI_ANY_ID, PCI_ANY_ID, 0, 0, I3000}, @@ -484,7 +501,7 @@ MODULE_DEVICE_TABLE(pci, i3000_pci_tbl); static struct pci_driver i3000_driver = { .name = EDAC_MOD_STR, .probe = i3000_init_one, - .remove = __devexit_p(i3000_remove_one), + .remove = i3000_remove_one, .id_table = i3000_pci_tbl, }; @@ -492,7 +509,7 @@ static int __init i3000_init(void) { int pci_rc; - debugf3("MC: %s()\n", __func__); + edac_dbg(3, "MC:\n"); /* Ensure that the OPSTATE is set correctly for POLL or NMI */ opstate_init(); @@ -506,14 +523,14 @@ static int __init i3000_init(void) mci_pdev = pci_get_device(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_3000_HB, NULL); if (!mci_pdev) { - debugf0("i3000 pci_get_device fail\n"); + edac_dbg(0, "i3000 pci_get_device fail\n"); pci_rc = -ENODEV; goto fail1; } pci_rc = i3000_init_one(mci_pdev, i3000_pci_tbl); if (pci_rc < 0) { - debugf0("i3000 init fail\n"); + edac_dbg(0, "i3000 init fail\n"); pci_rc = -ENODEV; goto fail1; } @@ -533,7 +550,7 @@ fail0: static void __exit i3000_exit(void) { - debugf3("MC: %s()\n", __func__); + edac_dbg(3, "MC:\n"); pci_unregister_driver(&i3000_driver); if (!i3000_registered) { diff --git a/drivers/edac/i3200_edac.c b/drivers/edac/i3200_edac.c index aa08497a075..022a70273ad 100644 --- a/drivers/edac/i3200_edac.c +++ b/drivers/edac/i3200_edac.c @@ -15,12 +15,15 @@ #include <linux/io.h> #include "edac_core.h" +#include <asm-generic/io-64-nonatomic-lo-hi.h> + #define I3200_REVISION "1.1" #define EDAC_MOD_STR "i3200_edac" #define PCI_DEVICE_ID_INTEL_3200_HB 0x29f0 +#define I3200_DIMMS 4 #define I3200_RANKS 8 #define I3200_RANKS_PER_CHANNEL 4 #define I3200_CHANNELS 2 @@ -101,31 +104,28 @@ struct i3200_priv { static int nr_channels; -#ifndef readq -static inline __u64 readq(const volatile void __iomem *addr) -{ - const volatile u32 __iomem *p = addr; - u32 low, high; - - low = readl(p); - high = readl(p + 1); - - return low + ((u64)high << 32); -} -#endif - static int how_many_channels(struct pci_dev *pdev) { + int n_channels; + unsigned char capid0_8b; /* 8th byte of CAPID0 */ pci_read_config_byte(pdev, I3200_CAPID0 + 8, &capid0_8b); + if (capid0_8b & 0x20) { /* check DCD: Dual Channel Disable */ - debugf0("In single channel mode.\n"); - return 1; + edac_dbg(0, "In single channel mode\n"); + n_channels = 1; } else { - debugf0("In dual channel mode.\n"); - return 2; + edac_dbg(0, "In dual channel mode\n"); + n_channels = 2; } + + if (capid0_8b & 0x10) /* check if both channels are filled */ + edac_dbg(0, "2 DIMMS per channel disabled\n"); + else + edac_dbg(0, "2 DIMMS per channel enabled\n"); + + return n_channels; } static unsigned long eccerrlog_syndrome(u64 log) @@ -169,7 +169,7 @@ static void i3200_clear_error_info(struct mem_ctl_info *mci) { struct pci_dev *pdev; - pdev = to_pci_dev(mci->dev); + pdev = to_pci_dev(mci->pdev); /* * Clear any error bits. @@ -186,7 +186,7 @@ static void i3200_get_and_clear_error_info(struct mem_ctl_info *mci, struct i3200_priv *priv = mci->pvt_info; void __iomem *window = priv->window; - pdev = to_pci_dev(mci->dev); + pdev = to_pci_dev(mci->pdev); /* * This is a mess because there is no atomic way to read all the @@ -228,21 +228,25 @@ static void i3200_process_error_info(struct mem_ctl_info *mci, return; if ((info->errsts ^ info->errsts2) & I3200_ERRSTS_BITS) { - edac_mc_handle_ce_no_info(mci, "UE overwrote CE"); + edac_mc_handle_error(HW_EVENT_ERR_UNCORRECTED, mci, 1, 0, 0, 0, + -1, -1, -1, "UE overwrote CE", ""); info->errsts = info->errsts2; } for (channel = 0; channel < nr_channels; channel++) { log = info->eccerrlog[channel]; if (log & I3200_ECCERRLOG_UE) { - edac_mc_handle_ue(mci, 0, 0, - eccerrlog_row(channel, log), - "i3200 UE"); + edac_mc_handle_error(HW_EVENT_ERR_UNCORRECTED, mci, 1, + 0, 0, 0, + eccerrlog_row(channel, log), + -1, -1, + "i3000 UE", ""); } else if (log & I3200_ECCERRLOG_CE) { - edac_mc_handle_ce(mci, 0, 0, - eccerrlog_syndrome(log), - eccerrlog_row(channel, log), 0, - "i3200 CE"); + edac_mc_handle_error(HW_EVENT_ERR_UNCORRECTED, mci, 1, + 0, 0, eccerrlog_syndrome(log), + eccerrlog_row(channel, log), + -1, -1, + "i3000 UE", ""); } } } @@ -251,13 +255,12 @@ static void i3200_check(struct mem_ctl_info *mci) { struct i3200_error_info info; - debugf1("MC%d: %s()\n", mci->mc_idx, __func__); + edac_dbg(1, "MC%d\n", mci->mc_idx); i3200_get_and_clear_error_info(mci, &info); i3200_process_error_info(mci, &info); } - -void __iomem *i3200_map_mchbar(struct pci_dev *pdev) +static void __iomem *i3200_map_mchbar(struct pci_dev *pdev) { union { u64 mchbar; @@ -296,6 +299,8 @@ static void i3200_get_drbs(void __iomem *window, for (i = 0; i < I3200_RANKS_PER_CHANNEL; i++) { drbs[0][i] = readw(window + I3200_C0DRB + 2*i) & I3200_DRB_MASK; drbs[1][i] = readw(window + I3200_C1DRB + 2*i) & I3200_DRB_MASK; + + edac_dbg(0, "drb[0][%d] = %d, drb[1][%d] = %d\n", i, drbs[0][i], i, drbs[1][i]); } } @@ -317,6 +322,9 @@ static unsigned long drb_to_nr_pages( int n; n = drbs[channel][rank]; + if (!n) + return 0; + if (rank > 0) n -= drbs[channel][rank - 1]; if (stacked && (channel == 1) && @@ -330,15 +338,15 @@ static unsigned long drb_to_nr_pages( static int i3200_probe1(struct pci_dev *pdev, int dev_idx) { int rc; - int i; + int i, j; struct mem_ctl_info *mci = NULL; - unsigned long last_page; + struct edac_mc_layer layers[2]; u16 drbs[I3200_CHANNELS][I3200_RANKS_PER_CHANNEL]; bool stacked; void __iomem *window; struct i3200_priv *priv; - debugf0("MC: %s()\n", __func__); + edac_dbg(0, "MC:\n"); window = i3200_map_mchbar(pdev); if (!window) @@ -347,14 +355,20 @@ static int i3200_probe1(struct pci_dev *pdev, int dev_idx) i3200_get_drbs(window, drbs); nr_channels = how_many_channels(pdev); - mci = edac_mc_alloc(sizeof(struct i3200_priv), I3200_RANKS, - nr_channels, 0); + layers[0].type = EDAC_MC_LAYER_CHIP_SELECT; + layers[0].size = I3200_DIMMS; + layers[0].is_virt_csrow = true; + layers[1].type = EDAC_MC_LAYER_CHANNEL; + layers[1].size = nr_channels; + layers[1].is_virt_csrow = false; + mci = edac_mc_alloc(0, ARRAY_SIZE(layers), layers, + sizeof(struct i3200_priv)); if (!mci) return -ENOMEM; - debugf3("MC: %s(): init mci\n", __func__); + edac_dbg(3, "MC: init mci\n"); - mci->dev = &pdev->dev; + mci->pdev = &pdev->dev; mci->mtype_cap = MEM_FLAG_DDR2; mci->edac_ctl_cap = EDAC_FLAG_SECDED; @@ -377,41 +391,38 @@ static int i3200_probe1(struct pci_dev *pdev, int dev_idx) * cumulative; the last one will contain the total memory * contained in all ranks. */ - last_page = -1UL; - for (i = 0; i < mci->nr_csrows; i++) { + for (i = 0; i < I3200_DIMMS; i++) { unsigned long nr_pages; - struct csrow_info *csrow = &mci->csrows[i]; - nr_pages = drb_to_nr_pages(drbs, stacked, - i / I3200_RANKS_PER_CHANNEL, - i % I3200_RANKS_PER_CHANNEL); + for (j = 0; j < nr_channels; j++) { + struct dimm_info *dimm = EDAC_DIMM_PTR(mci->layers, mci->dimms, + mci->n_layers, i, j, 0); - if (nr_pages == 0) { - csrow->mtype = MEM_EMPTY; - continue; - } + nr_pages = drb_to_nr_pages(drbs, stacked, j, i); + if (nr_pages == 0) + continue; - csrow->first_page = last_page + 1; - last_page += nr_pages; - csrow->last_page = last_page; - csrow->nr_pages = nr_pages; + edac_dbg(0, "csrow %d, channel %d%s, size = %ld Mb\n", i, j, + stacked ? " (stacked)" : "", PAGES_TO_MiB(nr_pages)); - csrow->grain = nr_pages << PAGE_SHIFT; - csrow->mtype = MEM_DDR2; - csrow->dtype = DEV_UNKNOWN; - csrow->edac_mode = EDAC_UNKNOWN; + dimm->nr_pages = nr_pages; + dimm->grain = nr_pages << PAGE_SHIFT; + dimm->mtype = MEM_DDR2; + dimm->dtype = DEV_UNKNOWN; + dimm->edac_mode = EDAC_UNKNOWN; + } } i3200_clear_error_info(mci); rc = -ENODEV; if (edac_mc_add_mc(mci)) { - debugf3("MC: %s(): failed edac_mc_add_mc()\n", __func__); + edac_dbg(3, "MC: failed edac_mc_add_mc()\n"); goto fail; } /* get this far and it's successful */ - debugf3("MC: %s(): success\n", __func__); + edac_dbg(3, "MC: success\n"); return 0; fail: @@ -422,12 +433,11 @@ fail: return rc; } -static int __devinit i3200_init_one(struct pci_dev *pdev, - const struct pci_device_id *ent) +static int i3200_init_one(struct pci_dev *pdev, const struct pci_device_id *ent) { int rc; - debugf0("MC: %s()\n", __func__); + edac_dbg(0, "MC:\n"); if (pci_enable_device(pdev) < 0) return -EIO; @@ -439,12 +449,12 @@ static int __devinit i3200_init_one(struct pci_dev *pdev, return rc; } -static void __devexit i3200_remove_one(struct pci_dev *pdev) +static void i3200_remove_one(struct pci_dev *pdev) { struct mem_ctl_info *mci; struct i3200_priv *priv; - debugf0("%s()\n", __func__); + edac_dbg(0, "\n"); mci = edac_mc_del_mc(&pdev->dev); if (!mci) @@ -454,9 +464,11 @@ static void __devexit i3200_remove_one(struct pci_dev *pdev) iounmap(priv->window); edac_mc_free(mci); + + pci_disable_device(pdev); } -static const struct pci_device_id i3200_pci_tbl[] __devinitdata = { +static const struct pci_device_id i3200_pci_tbl[] = { { PCI_VEND_DEV(INTEL, 3200_HB), PCI_ANY_ID, PCI_ANY_ID, 0, 0, I3200}, @@ -470,7 +482,7 @@ MODULE_DEVICE_TABLE(pci, i3200_pci_tbl); static struct pci_driver i3200_driver = { .name = EDAC_MOD_STR, .probe = i3200_init_one, - .remove = __devexit_p(i3200_remove_one), + .remove = i3200_remove_one, .id_table = i3200_pci_tbl, }; @@ -478,7 +490,7 @@ static int __init i3200_init(void) { int pci_rc; - debugf3("MC: %s()\n", __func__); + edac_dbg(3, "MC:\n"); /* Ensure that the OPSTATE is set correctly for POLL or NMI */ opstate_init(); @@ -492,14 +504,14 @@ static int __init i3200_init(void) mci_pdev = pci_get_device(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_3200_HB, NULL); if (!mci_pdev) { - debugf0("i3200 pci_get_device fail\n"); + edac_dbg(0, "i3200 pci_get_device fail\n"); pci_rc = -ENODEV; goto fail1; } pci_rc = i3200_init_one(mci_pdev, i3200_pci_tbl); if (pci_rc < 0) { - debugf0("i3200 init fail\n"); + edac_dbg(0, "i3200 init fail\n"); pci_rc = -ENODEV; goto fail1; } @@ -519,7 +531,7 @@ fail0: static void __exit i3200_exit(void) { - debugf3("MC: %s()\n", __func__); + edac_dbg(3, "MC:\n"); pci_unregister_driver(&i3200_driver); if (!i3200_registered) { diff --git a/drivers/edac/i5000_edac.c b/drivers/edac/i5000_edac.c index 4dc3ac25a42..72e07e3cf71 100644 --- a/drivers/edac/i5000_edac.c +++ b/drivers/edac/i5000_edac.c @@ -270,9 +270,10 @@ #define MTR3 0x8C #define NUM_MTRS 4 -#define CHANNELS_PER_BRANCH (2) +#define CHANNELS_PER_BRANCH 2 +#define MAX_BRANCHES 2 -/* Defines to extract the vaious fields from the +/* Defines to extract the various fields from the * MTRx - Memory Technology Registers */ #define MTR_DIMMS_PRESENT(mtr) ((mtr) & (0x1 << 8)) @@ -286,22 +287,6 @@ #define MTR_DIMM_COLS(mtr) ((mtr) & 0x3) #define MTR_DIMM_COLS_ADDR_BITS(mtr) (MTR_DIMM_COLS(mtr) + 10) -#ifdef CONFIG_EDAC_DEBUG -static char *numrow_toString[] = { - "8,192 - 13 rows", - "16,384 - 14 rows", - "32,768 - 15 rows", - "reserved" -}; - -static char *numcol_toString[] = { - "1,024 - 10 columns", - "2,048 - 11 columns", - "4,096 - 12 columns", - "reserved" -}; -#endif - /* enables the report of miscellaneous messages as CE errors - default off */ static int misc_messages; @@ -343,7 +328,13 @@ struct i5000_pvt { struct pci_dev *branch_1; /* 22.0 */ u16 tolm; /* top of low memory */ - u64 ambase; /* AMB BAR */ + union { + u64 ambase; /* AMB BAR */ + struct { + u32 ambase_bottom; + u32 ambase_top; + } u __packed; + }; u16 mir0, mir1, mir2; @@ -473,7 +464,6 @@ static void i5000_process_fatal_error_info(struct mem_ctl_info *mci, char msg[EDAC_MC_LABEL_LEN + 1 + 160]; char *specific = NULL; u32 allErrors; - int branch; int channel; int bank; int rank; @@ -485,8 +475,7 @@ static void i5000_process_fatal_error_info(struct mem_ctl_info *mci, if (!allErrors) return; /* if no error, return now */ - branch = EXTRACT_FBDCHAN_INDX(info->ferr_fat_fbd); - channel = branch; + channel = EXTRACT_FBDCHAN_INDX(info->ferr_fat_fbd); /* Use the NON-Recoverable macros to extract data */ bank = NREC_BANK(info->nrecmema); @@ -495,10 +484,9 @@ static void i5000_process_fatal_error_info(struct mem_ctl_info *mci, ras = NREC_RAS(info->nrecmemb); cas = NREC_CAS(info->nrecmemb); - debugf0("\t\tCSROW= %d Channels= %d,%d (Branch= %d " - "DRAM Bank= %d rdwr= %s ras= %d cas= %d)\n", - rank, channel, channel + 1, branch >> 1, bank, - rdwr ? "Write" : "Read", ras, cas); + edac_dbg(0, "\t\tCSROW= %d Channel= %d (DRAM Bank= %d rdwr= %s ras= %d cas= %d)\n", + rank, channel, bank, + rdwr ? "Write" : "Read", ras, cas); /* Only 1 bit will be on */ switch (allErrors) { @@ -533,13 +521,14 @@ static void i5000_process_fatal_error_info(struct mem_ctl_info *mci, /* Form out message */ snprintf(msg, sizeof(msg), - "(Branch=%d DRAM-Bank=%d RDWR=%s RAS=%d CAS=%d " - "FATAL Err=0x%x (%s))", - branch >> 1, bank, rdwr ? "Write" : "Read", ras, cas, - allErrors, specific); + "Bank=%d RAS=%d CAS=%d FATAL Err=0x%x (%s)", + bank, ras, cas, allErrors, specific); /* Call the helper to output message */ - edac_mc_handle_fbd_ue(mci, rank, channel, channel + 1, msg); + edac_mc_handle_error(HW_EVENT_ERR_FATAL, mci, 1, 0, 0, 0, + channel >> 1, channel & 1, rank, + rdwr ? "Write error" : "Read error", + msg); } /* @@ -574,7 +563,7 @@ static void i5000_process_nonfatal_error_info(struct mem_ctl_info *mci, /* ONLY ONE of the possible error bits will be set, as per the docs */ ue_errors = allErrors & FERR_NF_UNCORRECTABLE; if (ue_errors) { - debugf0("\tUncorrected bits= 0x%x\n", ue_errors); + edac_dbg(0, "\tUncorrected bits= 0x%x\n", ue_errors); branch = EXTRACT_FBDCHAN_INDX(info->ferr_nf_fbd); @@ -590,11 +579,9 @@ static void i5000_process_nonfatal_error_info(struct mem_ctl_info *mci, ras = NREC_RAS(info->nrecmemb); cas = NREC_CAS(info->nrecmemb); - debugf0 - ("\t\tCSROW= %d Channels= %d,%d (Branch= %d " - "DRAM Bank= %d rdwr= %s ras= %d cas= %d)\n", - rank, channel, channel + 1, branch >> 1, bank, - rdwr ? "Write" : "Read", ras, cas); + edac_dbg(0, "\t\tCSROW= %d Channels= %d,%d (Branch= %d DRAM Bank= %d rdwr= %s ras= %d cas= %d)\n", + rank, channel, channel + 1, branch >> 1, bank, + rdwr ? "Write" : "Read", ras, cas); switch (ue_errors) { case FERR_NF_M12ERR: @@ -633,19 +620,20 @@ static void i5000_process_nonfatal_error_info(struct mem_ctl_info *mci, /* Form out message */ snprintf(msg, sizeof(msg), - "(Branch=%d DRAM-Bank=%d RDWR=%s RAS=%d " - "CAS=%d, UE Err=0x%x (%s))", - branch >> 1, bank, rdwr ? "Write" : "Read", ras, cas, - ue_errors, specific); + "Rank=%d Bank=%d RAS=%d CAS=%d, UE Err=0x%x (%s)", + rank, bank, ras, cas, ue_errors, specific); /* Call the helper to output message */ - edac_mc_handle_fbd_ue(mci, rank, channel, channel + 1, msg); + edac_mc_handle_error(HW_EVENT_ERR_UNCORRECTED, mci, 1, 0, 0, 0, + channel >> 1, -1, rank, + rdwr ? "Write error" : "Read error", + msg); } /* Check correctable errors */ ce_errors = allErrors & FERR_NF_CORRECTABLE; if (ce_errors) { - debugf0("\tCorrected bits= 0x%x\n", ce_errors); + edac_dbg(0, "\tCorrected bits= 0x%x\n", ce_errors); branch = EXTRACT_FBDCHAN_INDX(info->ferr_nf_fbd); @@ -663,10 +651,9 @@ static void i5000_process_nonfatal_error_info(struct mem_ctl_info *mci, ras = REC_RAS(info->recmemb); cas = REC_CAS(info->recmemb); - debugf0("\t\tCSROW= %d Channel= %d (Branch %d " - "DRAM Bank= %d rdwr= %s ras= %d cas= %d)\n", - rank, channel, branch >> 1, bank, - rdwr ? "Write" : "Read", ras, cas); + edac_dbg(0, "\t\tCSROW= %d Channel= %d (Branch %d DRAM Bank= %d rdwr= %s ras= %d cas= %d)\n", + rank, channel, branch >> 1, bank, + rdwr ? "Write" : "Read", ras, cas); switch (ce_errors) { case FERR_NF_M17ERR: @@ -685,13 +672,16 @@ static void i5000_process_nonfatal_error_info(struct mem_ctl_info *mci, /* Form out message */ snprintf(msg, sizeof(msg), - "(Branch=%d DRAM-Bank=%d RDWR=%s RAS=%d " + "Rank=%d Bank=%d RDWR=%s RAS=%d " "CAS=%d, CE Err=0x%x (%s))", branch >> 1, bank, rdwr ? "Write" : "Read", ras, cas, ce_errors, specific); /* Call the helper to output message */ - edac_mc_handle_fbd_ce(mci, rank, channel, msg); + edac_mc_handle_error(HW_EVENT_ERR_CORRECTED, mci, 1, 0, 0, 0, + channel >> 1, channel % 2, rank, + rdwr ? "Write error" : "Read error", + msg); } if (!misc_messages) @@ -731,11 +721,12 @@ static void i5000_process_nonfatal_error_info(struct mem_ctl_info *mci, /* Form out message */ snprintf(msg, sizeof(msg), - "(Branch=%d Err=%#x (%s))", branch >> 1, - misc_errors, specific); + "Err=%#x (%s)", misc_errors, specific); /* Call the helper to output message */ - edac_mc_handle_fbd_ce(mci, 0, 0, msg); + edac_mc_handle_error(HW_EVENT_ERR_CORRECTED, mci, 1, 0, 0, 0, + branch >> 1, -1, -1, + "Misc error", msg); } } @@ -774,7 +765,7 @@ static void i5000_clear_error(struct mem_ctl_info *mci) static void i5000_check_error(struct mem_ctl_info *mci) { struct i5000_error_info info; - debugf4("MC%d: %s: %s()\n", mci->mc_idx, __FILE__, __func__); + edac_dbg(4, "MC%d\n", mci->mc_idx); i5000_get_error_info(mci, &info); i5000_process_error_info(mci, &info, 1); } @@ -845,15 +836,16 @@ static int i5000_get_devices(struct mem_ctl_info *mci, int dev_idx) pvt->fsb_error_regs = pdev; - debugf1("System Address, processor bus- PCI Bus ID: %s %x:%x\n", - pci_name(pvt->system_address), - pvt->system_address->vendor, pvt->system_address->device); - debugf1("Branchmap, control and errors - PCI Bus ID: %s %x:%x\n", - pci_name(pvt->branchmap_werrors), - pvt->branchmap_werrors->vendor, pvt->branchmap_werrors->device); - debugf1("FSB Error Regs - PCI Bus ID: %s %x:%x\n", - pci_name(pvt->fsb_error_regs), - pvt->fsb_error_regs->vendor, pvt->fsb_error_regs->device); + edac_dbg(1, "System Address, processor bus- PCI Bus ID: %s %x:%x\n", + pci_name(pvt->system_address), + pvt->system_address->vendor, pvt->system_address->device); + edac_dbg(1, "Branchmap, control and errors - PCI Bus ID: %s %x:%x\n", + pci_name(pvt->branchmap_werrors), + pvt->branchmap_werrors->vendor, + pvt->branchmap_werrors->device); + edac_dbg(1, "FSB Error Regs - PCI Bus ID: %s %x:%x\n", + pci_name(pvt->fsb_error_regs), + pvt->fsb_error_regs->vendor, pvt->fsb_error_regs->device); pdev = NULL; pdev = pci_get_device(PCI_VENDOR_ID_INTEL, @@ -956,14 +948,14 @@ static int determine_amb_present_reg(struct i5000_pvt *pvt, int channel) * * return the proper MTR register as determine by the csrow and channel desired */ -static int determine_mtr(struct i5000_pvt *pvt, int csrow, int channel) +static int determine_mtr(struct i5000_pvt *pvt, int slot, int channel) { int mtr; if (channel < CHANNELS_PER_BRANCH) - mtr = pvt->b0_mtr[csrow >> 1]; + mtr = pvt->b0_mtr[slot]; else - mtr = pvt->b1_mtr[csrow >> 1]; + mtr = pvt->b1_mtr[slot]; return mtr; } @@ -976,49 +968,59 @@ static void decode_mtr(int slot_row, u16 mtr) ans = MTR_DIMMS_PRESENT(mtr); - debugf2("\tMTR%d=0x%x: DIMMs are %s\n", slot_row, mtr, - ans ? "Present" : "NOT Present"); + edac_dbg(2, "\tMTR%d=0x%x: DIMMs are %sPresent\n", + slot_row, mtr, ans ? "" : "NOT "); if (!ans) return; - debugf2("\t\tWIDTH: x%d\n", MTR_DRAM_WIDTH(mtr)); - debugf2("\t\tNUMBANK: %d bank(s)\n", MTR_DRAM_BANKS(mtr)); - debugf2("\t\tNUMRANK: %s\n", MTR_DIMM_RANK(mtr) ? "double" : "single"); - debugf2("\t\tNUMROW: %s\n", numrow_toString[MTR_DIMM_ROWS(mtr)]); - debugf2("\t\tNUMCOL: %s\n", numcol_toString[MTR_DIMM_COLS(mtr)]); + edac_dbg(2, "\t\tWIDTH: x%d\n", MTR_DRAM_WIDTH(mtr)); + edac_dbg(2, "\t\tNUMBANK: %d bank(s)\n", MTR_DRAM_BANKS(mtr)); + edac_dbg(2, "\t\tNUMRANK: %s\n", + MTR_DIMM_RANK(mtr) ? "double" : "single"); + edac_dbg(2, "\t\tNUMROW: %s\n", + MTR_DIMM_ROWS(mtr) == 0 ? "8,192 - 13 rows" : + MTR_DIMM_ROWS(mtr) == 1 ? "16,384 - 14 rows" : + MTR_DIMM_ROWS(mtr) == 2 ? "32,768 - 15 rows" : + "reserved"); + edac_dbg(2, "\t\tNUMCOL: %s\n", + MTR_DIMM_COLS(mtr) == 0 ? "1,024 - 10 columns" : + MTR_DIMM_COLS(mtr) == 1 ? "2,048 - 11 columns" : + MTR_DIMM_COLS(mtr) == 2 ? "4,096 - 12 columns" : + "reserved"); } -static void handle_channel(struct i5000_pvt *pvt, int csrow, int channel, +static void handle_channel(struct i5000_pvt *pvt, int slot, int channel, struct i5000_dimm_info *dinfo) { int mtr; int amb_present_reg; int addrBits; - mtr = determine_mtr(pvt, csrow, channel); + mtr = determine_mtr(pvt, slot, channel); if (MTR_DIMMS_PRESENT(mtr)) { amb_present_reg = determine_amb_present_reg(pvt, channel); - /* Determine if there is a DIMM present in this DIMM slot */ - if (amb_present_reg & (1 << (csrow >> 1))) { + /* Determine if there is a DIMM present in this DIMM slot */ + if (amb_present_reg) { dinfo->dual_rank = MTR_DIMM_RANK(mtr); - if (!((dinfo->dual_rank == 0) && - ((csrow & 0x1) == 0x1))) { - /* Start with the number of bits for a Bank - * on the DRAM */ - addrBits = MTR_DRAM_BANKS_ADDR_BITS(mtr); - /* Add thenumber of ROW bits */ - addrBits += MTR_DIMM_ROWS_ADDR_BITS(mtr); - /* add the number of COLUMN bits */ - addrBits += MTR_DIMM_COLS_ADDR_BITS(mtr); - - addrBits += 6; /* add 64 bits per DIMM */ - addrBits -= 20; /* divide by 2^^20 */ - addrBits -= 3; /* 8 bits per bytes */ - - dinfo->megabytes = 1 << addrBits; - } + /* Start with the number of bits for a Bank + * on the DRAM */ + addrBits = MTR_DRAM_BANKS_ADDR_BITS(mtr); + /* Add the number of ROW bits */ + addrBits += MTR_DIMM_ROWS_ADDR_BITS(mtr); + /* add the number of COLUMN bits */ + addrBits += MTR_DIMM_COLS_ADDR_BITS(mtr); + + /* Dual-rank memories have twice the size */ + if (dinfo->dual_rank) + addrBits++; + + addrBits += 6; /* add 64 bits per DIMM */ + addrBits -= 20; /* divide by 2^^20 */ + addrBits -= 3; /* 8 bits per bytes */ + + dinfo->megabytes = 1 << addrBits; } } } @@ -1032,10 +1034,9 @@ static void handle_channel(struct i5000_pvt *pvt, int csrow, int channel, static void calculate_dimm_size(struct i5000_pvt *pvt) { struct i5000_dimm_info *dinfo; - int csrow, max_csrows; + int slot, channel, branch; char *p, *mem_buffer; int space, n; - int channel; /* ================= Generate some debug output ================= */ space = PAGE_SIZE; @@ -1046,53 +1047,57 @@ static void calculate_dimm_size(struct i5000_pvt *pvt) return; } - n = snprintf(p, space, "\n"); - p += n; - space -= n; - - /* Scan all the actual CSROWS (which is # of DIMMS * 2) + /* Scan all the actual slots * and calculate the information for each DIMM - * Start with the highest csrow first, to display it first - * and work toward the 0th csrow + * Start with the highest slot first, to display it first + * and work toward the 0th slot */ - max_csrows = pvt->maxdimmperch * 2; - for (csrow = max_csrows - 1; csrow >= 0; csrow--) { + for (slot = pvt->maxdimmperch - 1; slot >= 0; slot--) { - /* on an odd csrow, first output a 'boundary' marker, + /* on an odd slot, first output a 'boundary' marker, * then reset the message buffer */ - if (csrow & 0x1) { - n = snprintf(p, space, "---------------------------" + if (slot & 0x1) { + n = snprintf(p, space, "--------------------------" "--------------------------------"); p += n; space -= n; - debugf2("%s\n", mem_buffer); + edac_dbg(2, "%s\n", mem_buffer); p = mem_buffer; space = PAGE_SIZE; } - n = snprintf(p, space, "csrow %2d ", csrow); + n = snprintf(p, space, "slot %2d ", slot); p += n; space -= n; for (channel = 0; channel < pvt->maxch; channel++) { - dinfo = &pvt->dimm_info[csrow][channel]; - handle_channel(pvt, csrow, channel, dinfo); - n = snprintf(p, space, "%4d MB | ", dinfo->megabytes); + dinfo = &pvt->dimm_info[slot][channel]; + handle_channel(pvt, slot, channel, dinfo); + if (dinfo->megabytes) + n = snprintf(p, space, "%4d MB %dR| ", + dinfo->megabytes, dinfo->dual_rank + 1); + else + n = snprintf(p, space, "%4d MB | ", 0); p += n; space -= n; } - n = snprintf(p, space, "\n"); p += n; space -= n; + edac_dbg(2, "%s\n", mem_buffer); + p = mem_buffer; + space = PAGE_SIZE; } /* Output the last bottom 'boundary' marker */ - n = snprintf(p, space, "---------------------------" - "--------------------------------\n"); + n = snprintf(p, space, "--------------------------" + "--------------------------------"); p += n; space -= n; + edac_dbg(2, "%s\n", mem_buffer); + p = mem_buffer; + space = PAGE_SIZE; /* now output the 'channel' labels */ - n = snprintf(p, space, " "); + n = snprintf(p, space, " "); p += n; space -= n; for (channel = 0; channel < pvt->maxch; channel++) { @@ -1100,12 +1105,20 @@ static void calculate_dimm_size(struct i5000_pvt *pvt) p += n; space -= n; } - n = snprintf(p, space, "\n"); + edac_dbg(2, "%s\n", mem_buffer); + p = mem_buffer; + space = PAGE_SIZE; + + n = snprintf(p, space, " "); p += n; - space -= n; + for (branch = 0; branch < MAX_BRANCHES; branch++) { + n = snprintf(p, space, " branch %d | ", branch); + p += n; + space -= n; + } /* output the last message and free buffer */ - debugf2("%s\n", mem_buffer); + edac_dbg(2, "%s\n", mem_buffer); kfree(mem_buffer); } @@ -1128,24 +1141,25 @@ static void i5000_get_mc_regs(struct mem_ctl_info *mci) pvt = mci->pvt_info; pci_read_config_dword(pvt->system_address, AMBASE, - (u32 *) & pvt->ambase); + &pvt->u.ambase_bottom); pci_read_config_dword(pvt->system_address, AMBASE + sizeof(u32), - ((u32 *) & pvt->ambase) + sizeof(u32)); + &pvt->u.ambase_top); maxdimmperch = pvt->maxdimmperch; maxch = pvt->maxch; - debugf2("AMBASE= 0x%lx MAXCH= %d MAX-DIMM-Per-CH= %d\n", - (long unsigned int)pvt->ambase, pvt->maxch, pvt->maxdimmperch); + edac_dbg(2, "AMBASE= 0x%lx MAXCH= %d MAX-DIMM-Per-CH= %d\n", + (long unsigned int)pvt->ambase, pvt->maxch, pvt->maxdimmperch); /* Get the Branch Map regs */ pci_read_config_word(pvt->branchmap_werrors, TOLM, &pvt->tolm); pvt->tolm >>= 12; - debugf2("\nTOLM (number of 256M regions) =%u (0x%x)\n", pvt->tolm, - pvt->tolm); + edac_dbg(2, "TOLM (number of 256M regions) =%u (0x%x)\n", + pvt->tolm, pvt->tolm); actual_tolm = pvt->tolm << 28; - debugf2("Actual TOLM byte addr=%u (0x%x)\n", actual_tolm, actual_tolm); + edac_dbg(2, "Actual TOLM byte addr=%u (0x%x)\n", + actual_tolm, actual_tolm); pci_read_config_word(pvt->branchmap_werrors, MIR0, &pvt->mir0); pci_read_config_word(pvt->branchmap_werrors, MIR1, &pvt->mir1); @@ -1155,15 +1169,18 @@ static void i5000_get_mc_regs(struct mem_ctl_info *mci) limit = (pvt->mir0 >> 4) & 0x0FFF; way0 = pvt->mir0 & 0x1; way1 = pvt->mir0 & 0x2; - debugf2("MIR0: limit= 0x%x WAY1= %u WAY0= %x\n", limit, way1, way0); + edac_dbg(2, "MIR0: limit= 0x%x WAY1= %u WAY0= %x\n", + limit, way1, way0); limit = (pvt->mir1 >> 4) & 0x0FFF; way0 = pvt->mir1 & 0x1; way1 = pvt->mir1 & 0x2; - debugf2("MIR1: limit= 0x%x WAY1= %u WAY0= %x\n", limit, way1, way0); + edac_dbg(2, "MIR1: limit= 0x%x WAY1= %u WAY0= %x\n", + limit, way1, way0); limit = (pvt->mir2 >> 4) & 0x0FFF; way0 = pvt->mir2 & 0x1; way1 = pvt->mir2 & 0x2; - debugf2("MIR2: limit= 0x%x WAY1= %u WAY0= %x\n", limit, way1, way0); + edac_dbg(2, "MIR2: limit= 0x%x WAY1= %u WAY0= %x\n", + limit, way1, way0); /* Get the MTR[0-3] regs */ for (slot_row = 0; slot_row < NUM_MTRS; slot_row++) { @@ -1172,31 +1189,31 @@ static void i5000_get_mc_regs(struct mem_ctl_info *mci) pci_read_config_word(pvt->branch_0, where, &pvt->b0_mtr[slot_row]); - debugf2("MTR%d where=0x%x B0 value=0x%x\n", slot_row, where, - pvt->b0_mtr[slot_row]); + edac_dbg(2, "MTR%d where=0x%x B0 value=0x%x\n", + slot_row, where, pvt->b0_mtr[slot_row]); if (pvt->maxch >= CHANNELS_PER_BRANCH) { pci_read_config_word(pvt->branch_1, where, &pvt->b1_mtr[slot_row]); - debugf2("MTR%d where=0x%x B1 value=0x%x\n", slot_row, - where, pvt->b1_mtr[slot_row]); + edac_dbg(2, "MTR%d where=0x%x B1 value=0x%x\n", + slot_row, where, pvt->b1_mtr[slot_row]); } else { pvt->b1_mtr[slot_row] = 0; } } /* Read and dump branch 0's MTRs */ - debugf2("\nMemory Technology Registers:\n"); - debugf2(" Branch 0:\n"); + edac_dbg(2, "Memory Technology Registers:\n"); + edac_dbg(2, " Branch 0:\n"); for (slot_row = 0; slot_row < NUM_MTRS; slot_row++) { decode_mtr(slot_row, pvt->b0_mtr[slot_row]); } pci_read_config_word(pvt->branch_0, AMB_PRESENT_0, &pvt->b0_ambpresent0); - debugf2("\t\tAMB-Branch 0-present0 0x%x:\n", pvt->b0_ambpresent0); + edac_dbg(2, "\t\tAMB-Branch 0-present0 0x%x:\n", pvt->b0_ambpresent0); pci_read_config_word(pvt->branch_0, AMB_PRESENT_1, &pvt->b0_ambpresent1); - debugf2("\t\tAMB-Branch 0-present1 0x%x:\n", pvt->b0_ambpresent1); + edac_dbg(2, "\t\tAMB-Branch 0-present1 0x%x:\n", pvt->b0_ambpresent1); /* Only if we have 2 branchs (4 channels) */ if (pvt->maxch < CHANNELS_PER_BRANCH) { @@ -1204,18 +1221,18 @@ static void i5000_get_mc_regs(struct mem_ctl_info *mci) pvt->b1_ambpresent1 = 0; } else { /* Read and dump branch 1's MTRs */ - debugf2(" Branch 1:\n"); + edac_dbg(2, " Branch 1:\n"); for (slot_row = 0; slot_row < NUM_MTRS; slot_row++) { decode_mtr(slot_row, pvt->b1_mtr[slot_row]); } pci_read_config_word(pvt->branch_1, AMB_PRESENT_0, &pvt->b1_ambpresent0); - debugf2("\t\tAMB-Branch 1-present0 0x%x:\n", - pvt->b1_ambpresent0); + edac_dbg(2, "\t\tAMB-Branch 1-present0 0x%x:\n", + pvt->b1_ambpresent0); pci_read_config_word(pvt->branch_1, AMB_PRESENT_1, &pvt->b1_ambpresent1); - debugf2("\t\tAMB-Branch 1-present1 0x%x:\n", - pvt->b1_ambpresent1); + edac_dbg(2, "\t\tAMB-Branch 1-present1 0x%x:\n", + pvt->b1_ambpresent1); } /* Go and determine the size of each DIMM and place in an @@ -1235,13 +1252,13 @@ static void i5000_get_mc_regs(struct mem_ctl_info *mci) static int i5000_init_csrows(struct mem_ctl_info *mci) { struct i5000_pvt *pvt; - struct csrow_info *p_csrow; + struct dimm_info *dimm; int empty, channel_count; int max_csrows; - int mtr, mtr1; + int mtr; int csrow_megs; int channel; - int csrow; + int slot; pvt = mci->pvt_info; @@ -1250,43 +1267,40 @@ static int i5000_init_csrows(struct mem_ctl_info *mci) empty = 1; /* Assume NO memory */ - for (csrow = 0; csrow < max_csrows; csrow++) { - p_csrow = &mci->csrows[csrow]; - - p_csrow->csrow_idx = csrow; - - /* use branch 0 for the basis */ - mtr = pvt->b0_mtr[csrow >> 1]; - mtr1 = pvt->b1_mtr[csrow >> 1]; - - /* if no DIMMS on this row, continue */ - if (!MTR_DIMMS_PRESENT(mtr) && !MTR_DIMMS_PRESENT(mtr1)) - continue; + /* + * FIXME: The memory layout used to map slot/channel into the + * real memory architecture is weird: branch+slot are "csrows" + * and channel is channel. That required an extra array (dimm_info) + * to map the dimms. A good cleanup would be to remove this array, + * and do a loop here with branch, channel, slot + */ + for (slot = 0; slot < max_csrows; slot++) { + for (channel = 0; channel < pvt->maxch; channel++) { - /* FAKE OUT VALUES, FIXME */ - p_csrow->first_page = 0 + csrow * 20; - p_csrow->last_page = 9 + csrow * 20; - p_csrow->page_mask = 0xFFF; + mtr = determine_mtr(pvt, slot, channel); - p_csrow->grain = 8; + if (!MTR_DIMMS_PRESENT(mtr)) + continue; - csrow_megs = 0; - for (channel = 0; channel < pvt->maxch; channel++) { - csrow_megs += pvt->dimm_info[csrow][channel].megabytes; - } + dimm = EDAC_DIMM_PTR(mci->layers, mci->dimms, mci->n_layers, + channel / MAX_BRANCHES, + channel % MAX_BRANCHES, slot); - p_csrow->nr_pages = csrow_megs << 8; + csrow_megs = pvt->dimm_info[slot][channel].megabytes; + dimm->grain = 8; - /* Assume DDR2 for now */ - p_csrow->mtype = MEM_FB_DDR2; + /* Assume DDR2 for now */ + dimm->mtype = MEM_FB_DDR2; - /* ask what device type on this row */ - if (MTR_DRAM_WIDTH(mtr)) - p_csrow->dtype = DEV_X8; - else - p_csrow->dtype = DEV_X4; + /* ask what device type on this row */ + if (MTR_DRAM_WIDTH(mtr)) + dimm->dtype = DEV_X8; + else + dimm->dtype = DEV_X4; - p_csrow->edac_mode = EDAC_S8ECD8ED; + dimm->edac_mode = EDAC_S8ECD8ED; + dimm->nr_pages = csrow_megs << 8; + } empty = 0; } @@ -1317,7 +1331,7 @@ static void i5000_enable_error_reporting(struct mem_ctl_info *mci) } /* - * i5000_get_dimm_and_channel_counts(pdev, &num_csrows, &num_channels) + * i5000_get_dimm_and_channel_counts(pdev, &nr_csrows, &num_channels) * * ask the device how many channels are present and how many CSROWS * as well @@ -1332,7 +1346,7 @@ static void i5000_get_dimm_and_channel_counts(struct pci_dev *pdev, * supported on this memory controller */ pci_read_config_byte(pdev, MAXDIMMPERCH, &value); - *num_dimms_per_channel = (int)value *2; + *num_dimms_per_channel = (int)value; pci_read_config_byte(pdev, MAXCH, &value); *num_channels = (int)value; @@ -1348,15 +1362,14 @@ static void i5000_get_dimm_and_channel_counts(struct pci_dev *pdev, static int i5000_probe1(struct pci_dev *pdev, int dev_idx) { struct mem_ctl_info *mci; + struct edac_mc_layer layers[3]; struct i5000_pvt *pvt; int num_channels; int num_dimms_per_channel; - int num_csrows; - debugf0("MC: %s: %s(), pdev bus %u dev=0x%x fn=0x%x\n", - __FILE__, __func__, - pdev->bus->number, - PCI_SLOT(pdev->devfn), PCI_FUNC(pdev->devfn)); + edac_dbg(0, "MC: pdev bus %u dev=0x%x fn=0x%x\n", + pdev->bus->number, + PCI_SLOT(pdev->devfn), PCI_FUNC(pdev->devfn)); /* We only are looking for func 0 of the set */ if (PCI_FUNC(pdev->devfn) != 0) @@ -1377,21 +1390,28 @@ static int i5000_probe1(struct pci_dev *pdev, int dev_idx) */ i5000_get_dimm_and_channel_counts(pdev, &num_dimms_per_channel, &num_channels); - num_csrows = num_dimms_per_channel * 2; - debugf0("MC: %s(): Number of - Channels= %d DIMMS= %d CSROWS= %d\n", - __func__, num_channels, num_dimms_per_channel, num_csrows); + edac_dbg(0, "MC: Number of Branches=2 Channels= %d DIMMS= %d\n", + num_channels, num_dimms_per_channel); /* allocate a new MC control structure */ - mci = edac_mc_alloc(sizeof(*pvt), num_csrows, num_channels, 0); + layers[0].type = EDAC_MC_LAYER_BRANCH; + layers[0].size = MAX_BRANCHES; + layers[0].is_virt_csrow = false; + layers[1].type = EDAC_MC_LAYER_CHANNEL; + layers[1].size = num_channels / MAX_BRANCHES; + layers[1].is_virt_csrow = false; + layers[2].type = EDAC_MC_LAYER_SLOT; + layers[2].size = num_dimms_per_channel; + layers[2].is_virt_csrow = true; + mci = edac_mc_alloc(0, ARRAY_SIZE(layers), layers, sizeof(*pvt)); if (mci == NULL) return -ENOMEM; - kobject_get(&mci->edac_mci_kobj); - debugf0("MC: %s: %s(): mci = %p\n", __FILE__, __func__, mci); + edac_dbg(0, "MC: mci = %p\n", mci); - mci->dev = &pdev->dev; /* record ptr to the generic device */ + mci->pdev = &pdev->dev; /* record ptr to the generic device */ pvt = mci->pvt_info; pvt->system_address = pdev; /* Record this device in our private */ @@ -1421,19 +1441,16 @@ static int i5000_probe1(struct pci_dev *pdev, int dev_idx) /* initialize the MC control structure 'csrows' table * with the mapping and control information */ if (i5000_init_csrows(mci)) { - debugf0("MC: Setting mci->edac_cap to EDAC_FLAG_NONE\n" - " because i5000_init_csrows() returned nonzero " - "value\n"); + edac_dbg(0, "MC: Setting mci->edac_cap to EDAC_FLAG_NONE because i5000_init_csrows() returned nonzero value\n"); mci->edac_cap = EDAC_FLAG_NONE; /* no csrows found */ } else { - debugf1("MC: Enable error reporting now\n"); + edac_dbg(1, "MC: Enable error reporting now\n"); i5000_enable_error_reporting(mci); } /* add this new MC control structure to EDAC's list of MCs */ if (edac_mc_add_mc(mci)) { - debugf0("MC: %s: %s(): failed edac_mc_add_mc()\n", - __FILE__, __func__); + edac_dbg(0, "MC: failed edac_mc_add_mc()\n"); /* FIXME: perhaps some code should go here that disables error * reporting if we just enabled it */ @@ -1461,7 +1478,6 @@ fail1: i5000_put_devices(mci); fail0: - kobject_put(&mci->edac_mci_kobj); edac_mc_free(mci); return -ENODEV; } @@ -1473,12 +1489,11 @@ fail0: * negative on error * count (>= 0) */ -static int __devinit i5000_init_one(struct pci_dev *pdev, - const struct pci_device_id *id) +static int i5000_init_one(struct pci_dev *pdev, const struct pci_device_id *id) { int rc; - debugf0("MC: %s: %s()\n", __FILE__, __func__); + edac_dbg(0, "MC:\n"); /* wake up device */ rc = pci_enable_device(pdev); @@ -1493,11 +1508,11 @@ static int __devinit i5000_init_one(struct pci_dev *pdev, * i5000_remove_one destructor for one instance of device * */ -static void __devexit i5000_remove_one(struct pci_dev *pdev) +static void i5000_remove_one(struct pci_dev *pdev) { struct mem_ctl_info *mci; - debugf0("%s: %s()\n", __FILE__, __func__); + edac_dbg(0, "\n"); if (i5000_pci) edac_pci_release_generic_ctl(i5000_pci); @@ -1507,7 +1522,6 @@ static void __devexit i5000_remove_one(struct pci_dev *pdev) /* retrieve references to resources, and free those resources */ i5000_put_devices(mci); - kobject_put(&mci->edac_mci_kobj); edac_mc_free(mci); } @@ -1516,7 +1530,7 @@ static void __devexit i5000_remove_one(struct pci_dev *pdev) * * The "E500P" device is the first device supported. */ -static const struct pci_device_id i5000_pci_tbl[] __devinitdata = { +static const struct pci_device_id i5000_pci_tbl[] = { {PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_I5000_DEV16), .driver_data = I5000P}, @@ -1532,7 +1546,7 @@ MODULE_DEVICE_TABLE(pci, i5000_pci_tbl); static struct pci_driver i5000_driver = { .name = KBUILD_BASENAME, .probe = i5000_init_one, - .remove = __devexit_p(i5000_remove_one), + .remove = i5000_remove_one, .id_table = i5000_pci_tbl, }; @@ -1544,7 +1558,7 @@ static int __init i5000_init(void) { int pci_rc; - debugf2("MC: %s: %s()\n", __FILE__, __func__); + edac_dbg(2, "MC:\n"); /* Ensure that the OPSTATE is set correctly for POLL or NMI */ opstate_init(); @@ -1560,7 +1574,7 @@ static int __init i5000_init(void) */ static void __exit i5000_exit(void) { - debugf2("MC: %s: %s()\n", __FILE__, __func__); + edac_dbg(2, "MC:\n"); pci_unregister_driver(&i5000_driver); } diff --git a/drivers/edac/i5100_edac.c b/drivers/edac/i5100_edac.c index bcbdeeca48b..6247d186177 100644 --- a/drivers/edac/i5100_edac.c +++ b/drivers/edac/i5100_edac.c @@ -14,6 +14,11 @@ * rows for each respective channel are laid out one after another, * the first half belonging to channel 0, the second half belonging * to channel 1. + * + * This driver is for DDR2 DIMMs, and it uses chip select to select among the + * several ranks. However, instead of showing memories as ranks, it outputs + * them as DIMM's. An internal table creates the association between ranks + * and DIMM's. */ #include <linux/module.h> #include <linux/init.h> @@ -22,6 +27,7 @@ #include <linux/edac.h> #include <linux/delay.h> #include <linux/mmzone.h> +#include <linux/debugfs.h> #include "edac_core.h" @@ -49,7 +55,7 @@ #define I5100_FERR_NF_MEM_M6ERR_MASK (1 << 6) #define I5100_FERR_NF_MEM_M5ERR_MASK (1 << 5) #define I5100_FERR_NF_MEM_M4ERR_MASK (1 << 4) -#define I5100_FERR_NF_MEM_M1ERR_MASK 1 +#define I5100_FERR_NF_MEM_M1ERR_MASK (1 << 1) #define I5100_FERR_NF_MEM_ANY_MASK \ (I5100_FERR_NF_MEM_M16ERR_MASK | \ I5100_FERR_NF_MEM_M15ERR_MASK | \ @@ -63,6 +69,14 @@ I5100_FERR_NF_MEM_M1ERR_MASK) #define I5100_NERR_NF_MEM 0xa4 /* MC Next Non-Fatal Errors */ #define I5100_EMASK_MEM 0xa8 /* MC Error Mask Register */ +#define I5100_MEM0EINJMSK0 0x200 /* Injection Mask0 Register Channel 0 */ +#define I5100_MEM1EINJMSK0 0x208 /* Injection Mask0 Register Channel 1 */ +#define I5100_MEMXEINJMSK0_EINJEN (1 << 27) +#define I5100_MEM0EINJMSK1 0x204 /* Injection Mask1 Register Channel 0 */ +#define I5100_MEM1EINJMSK1 0x206 /* Injection Mask1 Register Channel 1 */ + +/* Device 19, Function 0 */ +#define I5100_DINJ0 0x9a /* device 21 and 22, func 0 */ #define I5100_MTR_0 0x154 /* Memory Technology Registers 0-3 */ @@ -333,13 +347,26 @@ struct i5100_priv { unsigned ranksperchan; /* number of ranks per channel */ struct pci_dev *mc; /* device 16 func 1 */ + struct pci_dev *einj; /* device 19 func 0 */ struct pci_dev *ch0mm; /* device 21 func 0 */ struct pci_dev *ch1mm; /* device 22 func 0 */ struct delayed_work i5100_scrubbing; int scrub_enable; + + /* Error injection */ + u8 inject_channel; + u8 inject_hlinesel; + u8 inject_deviceptr1; + u8 inject_deviceptr2; + u16 inject_eccmask1; + u16 inject_eccmask2; + + struct dentry *debugfs; }; +static struct dentry *i5100_debugfs; + /* map a rank/chan to a slot number on the mainboard */ static int i5100_rank_to_slot(const struct mem_ctl_info *mci, int chan, int rank) @@ -410,14 +437,6 @@ static int i5100_csrow_to_chan(const struct mem_ctl_info *mci, int csrow) return csrow / priv->ranksperchan; } -static unsigned i5100_rank_to_csrow(const struct mem_ctl_info *mci, - int chan, int rank) -{ - const struct i5100_priv *priv = mci->pvt_info; - - return chan * priv->ranksperchan + rank; -} - static void i5100_handle_ce(struct mem_ctl_info *mci, int chan, unsigned bank, @@ -427,17 +446,17 @@ static void i5100_handle_ce(struct mem_ctl_info *mci, unsigned ras, const char *msg) { - const int csrow = i5100_rank_to_csrow(mci, chan, rank); + char detail[80]; - printk(KERN_ERR - "CE chan %d, bank %u, rank %u, syndrome 0x%lx, " - "cas %u, ras %u, csrow %u, label \"%s\": %s\n", - chan, bank, rank, syndrome, cas, ras, - csrow, mci->csrows[csrow].channels[0].label, msg); + /* Form out message */ + snprintf(detail, sizeof(detail), + "bank %u, cas %u, ras %u\n", + bank, cas, ras); - mci->ce_count++; - mci->csrows[csrow].ce_count++; - mci->csrows[csrow].channels[0].ce_count++; + edac_mc_handle_error(HW_EVENT_ERR_CORRECTED, mci, 1, + 0, 0, syndrome, + chan, rank, -1, + msg, detail); } static void i5100_handle_ue(struct mem_ctl_info *mci, @@ -449,16 +468,17 @@ static void i5100_handle_ue(struct mem_ctl_info *mci, unsigned ras, const char *msg) { - const int csrow = i5100_rank_to_csrow(mci, chan, rank); + char detail[80]; - printk(KERN_ERR - "UE chan %d, bank %u, rank %u, syndrome 0x%lx, " - "cas %u, ras %u, csrow %u, label \"%s\": %s\n", - chan, bank, rank, syndrome, cas, ras, - csrow, mci->csrows[csrow].channels[0].label, msg); + /* Form out message */ + snprintf(detail, sizeof(detail), + "bank %u, cas %u, ras %u\n", + bank, cas, ras); - mci->ue_count++; - mci->csrows[csrow].ue_count++; + edac_mc_handle_error(HW_EVENT_ERR_UNCORRECTED, mci, 1, + 0, 0, syndrome, + chan, rank, -1, + msg, detail); } static void i5100_read_log(struct mem_ctl_info *mci, int chan, @@ -535,23 +555,20 @@ static void i5100_read_log(struct mem_ctl_info *mci, int chan, static void i5100_check_error(struct mem_ctl_info *mci) { struct i5100_priv *priv = mci->pvt_info; - u32 dw; - + u32 dw, dw2; pci_read_config_dword(priv->mc, I5100_FERR_NF_MEM, &dw); if (i5100_ferr_nf_mem_any(dw)) { - u32 dw2; pci_read_config_dword(priv->mc, I5100_NERR_NF_MEM, &dw2); - if (dw2) - pci_write_config_dword(priv->mc, I5100_NERR_NF_MEM, - dw2); - pci_write_config_dword(priv->mc, I5100_FERR_NF_MEM, dw); i5100_read_log(mci, i5100_ferr_nf_mem_chan_indx(dw), i5100_ferr_nf_mem_any(dw), i5100_nerr_nf_mem_any(dw2)); + + pci_write_config_dword(priv->mc, I5100_NERR_NF_MEM, dw2); } + pci_write_config_dword(priv->mc, I5100_FERR_NF_MEM, dw); } /* The i5100 chipset will scrub the entire memory once, then @@ -643,8 +660,7 @@ static struct pci_dev *pci_get_device_func(unsigned vendor, return ret; } -static unsigned long __devinit i5100_npages(struct mem_ctl_info *mci, - int csrow) +static unsigned long i5100_npages(struct mem_ctl_info *mci, int csrow) { struct i5100_priv *priv = mci->pvt_info; const unsigned chan_rank = i5100_csrow_to_rank(mci, csrow); @@ -665,7 +681,7 @@ static unsigned long __devinit i5100_npages(struct mem_ctl_info *mci, ((unsigned long long) (1ULL << addr_lines) / PAGE_SIZE); } -static void __devinit i5100_init_mtr(struct mem_ctl_info *mci) +static void i5100_init_mtr(struct mem_ctl_info *mci) { struct i5100_priv *priv = mci->pvt_info; struct pci_dev *mms[2] = { priv->ch0mm, priv->ch1mm }; @@ -737,7 +753,7 @@ static int i5100_read_spd_byte(const struct mem_ctl_info *mci, * o not the only way to may chip selects to dimm slots * o investigate if there is some way to obtain this map from the bios */ -static void __devinit i5100_init_dimm_csmap(struct mem_ctl_info *mci) +static void i5100_init_dimm_csmap(struct mem_ctl_info *mci) { struct i5100_priv *priv = mci->pvt_info; int i; @@ -767,8 +783,8 @@ static void __devinit i5100_init_dimm_csmap(struct mem_ctl_info *mci) } } -static void __devinit i5100_init_dimm_layout(struct pci_dev *pdev, - struct mem_ctl_info *mci) +static void i5100_init_dimm_layout(struct pci_dev *pdev, + struct mem_ctl_info *mci) { struct i5100_priv *priv = mci->pvt_info; int i; @@ -789,8 +805,8 @@ static void __devinit i5100_init_dimm_layout(struct pci_dev *pdev, i5100_init_dimm_csmap(mci); } -static void __devinit i5100_init_interleaving(struct pci_dev *pdev, - struct mem_ctl_info *mci) +static void i5100_init_interleaving(struct pci_dev *pdev, + struct mem_ctl_info *mci) { u16 w; u32 dw; @@ -835,13 +851,13 @@ static void __devinit i5100_init_interleaving(struct pci_dev *pdev, i5100_init_mtr(mci); } -static void __devinit i5100_init_csrows(struct mem_ctl_info *mci) +static void i5100_init_csrows(struct mem_ctl_info *mci) { int i; - unsigned long total_pages = 0UL; struct i5100_priv *priv = mci->pvt_info; - for (i = 0; i < mci->nr_csrows; i++) { + for (i = 0; i < mci->tot_dimms; i++) { + struct dimm_info *dimm; const unsigned long npages = i5100_npages(mci, i); const unsigned chan = i5100_csrow_to_chan(mci, i); const unsigned rank = i5100_csrow_to_rank(mci, i); @@ -849,43 +865,143 @@ static void __devinit i5100_init_csrows(struct mem_ctl_info *mci) if (!npages) continue; - /* - * FIXME: these two are totally bogus -- I don't see how to - * map them correctly to this structure... - */ - mci->csrows[i].first_page = total_pages; - mci->csrows[i].last_page = total_pages + npages - 1; - mci->csrows[i].page_mask = 0UL; - - mci->csrows[i].nr_pages = npages; - mci->csrows[i].grain = 32; - mci->csrows[i].csrow_idx = i; - mci->csrows[i].dtype = - (priv->mtr[chan][rank].width == 4) ? DEV_X4 : DEV_X8; - mci->csrows[i].ue_count = 0; - mci->csrows[i].ce_count = 0; - mci->csrows[i].mtype = MEM_RDDR2; - mci->csrows[i].edac_mode = EDAC_SECDED; - mci->csrows[i].mci = mci; - mci->csrows[i].nr_channels = 1; - mci->csrows[i].channels[0].chan_idx = 0; - mci->csrows[i].channels[0].ce_count = 0; - mci->csrows[i].channels[0].csrow = mci->csrows + i; - snprintf(mci->csrows[i].channels[0].label, - sizeof(mci->csrows[i].channels[0].label), - "DIMM%u", i5100_rank_to_slot(mci, chan, rank)); - - total_pages += npages; + dimm = EDAC_DIMM_PTR(mci->layers, mci->dimms, mci->n_layers, + chan, rank, 0); + + dimm->nr_pages = npages; + dimm->grain = 32; + dimm->dtype = (priv->mtr[chan][rank].width == 4) ? + DEV_X4 : DEV_X8; + dimm->mtype = MEM_RDDR2; + dimm->edac_mode = EDAC_SECDED; + snprintf(dimm->label, sizeof(dimm->label), "DIMM%u", + i5100_rank_to_slot(mci, chan, rank)); + + edac_dbg(2, "dimm channel %d, rank %d, size %ld\n", + chan, rank, (long)PAGES_TO_MiB(npages)); } } -static int __devinit i5100_init_one(struct pci_dev *pdev, - const struct pci_device_id *id) +/**************************************************************************** + * Error injection routines + ****************************************************************************/ + +static void i5100_do_inject(struct mem_ctl_info *mci) +{ + struct i5100_priv *priv = mci->pvt_info; + u32 mask0; + u16 mask1; + + /* MEM[1:0]EINJMSK0 + * 31 - ADDRMATCHEN + * 29:28 - HLINESEL + * 00 Reserved + * 01 Lower half of cache line + * 10 Upper half of cache line + * 11 Both upper and lower parts of cache line + * 27 - EINJEN + * 25:19 - XORMASK1 for deviceptr1 + * 9:5 - SEC2RAM for deviceptr2 + * 4:0 - FIR2RAM for deviceptr1 + */ + mask0 = ((priv->inject_hlinesel & 0x3) << 28) | + I5100_MEMXEINJMSK0_EINJEN | + ((priv->inject_eccmask1 & 0xffff) << 10) | + ((priv->inject_deviceptr2 & 0x1f) << 5) | + (priv->inject_deviceptr1 & 0x1f); + + /* MEM[1:0]EINJMSK1 + * 15:0 - XORMASK2 for deviceptr2 + */ + mask1 = priv->inject_eccmask2; + + if (priv->inject_channel == 0) { + pci_write_config_dword(priv->mc, I5100_MEM0EINJMSK0, mask0); + pci_write_config_word(priv->mc, I5100_MEM0EINJMSK1, mask1); + } else { + pci_write_config_dword(priv->mc, I5100_MEM1EINJMSK0, mask0); + pci_write_config_word(priv->mc, I5100_MEM1EINJMSK1, mask1); + } + + /* Error Injection Response Function + * Intel 5100 Memory Controller Hub Chipset (318378) datasheet + * hints about this register but carry no data about them. All + * data regarding device 19 is based on experimentation and the + * Intel 7300 Chipset Memory Controller Hub (318082) datasheet + * which appears to be accurate for the i5100 in this area. + * + * The injection code don't work without setting this register. + * The register needs to be flipped off then on else the hardware + * will only preform the first injection. + * + * Stop condition bits 7:4 + * 1010 - Stop after one injection + * 1011 - Never stop injecting faults + * + * Start condition bits 3:0 + * 1010 - Never start + * 1011 - Start immediately + */ + pci_write_config_byte(priv->einj, I5100_DINJ0, 0xaa); + pci_write_config_byte(priv->einj, I5100_DINJ0, 0xab); +} + +#define to_mci(k) container_of(k, struct mem_ctl_info, dev) +static ssize_t inject_enable_write(struct file *file, const char __user *data, + size_t count, loff_t *ppos) +{ + struct device *dev = file->private_data; + struct mem_ctl_info *mci = to_mci(dev); + + i5100_do_inject(mci); + + return count; +} + +static const struct file_operations i5100_inject_enable_fops = { + .open = simple_open, + .write = inject_enable_write, + .llseek = generic_file_llseek, +}; + +static int i5100_setup_debugfs(struct mem_ctl_info *mci) +{ + struct i5100_priv *priv = mci->pvt_info; + + if (!i5100_debugfs) + return -ENODEV; + + priv->debugfs = debugfs_create_dir(mci->bus->name, i5100_debugfs); + + if (!priv->debugfs) + return -ENOMEM; + + debugfs_create_x8("inject_channel", S_IRUGO | S_IWUSR, priv->debugfs, + &priv->inject_channel); + debugfs_create_x8("inject_hlinesel", S_IRUGO | S_IWUSR, priv->debugfs, + &priv->inject_hlinesel); + debugfs_create_x8("inject_deviceptr1", S_IRUGO | S_IWUSR, priv->debugfs, + &priv->inject_deviceptr1); + debugfs_create_x8("inject_deviceptr2", S_IRUGO | S_IWUSR, priv->debugfs, + &priv->inject_deviceptr2); + debugfs_create_x16("inject_eccmask1", S_IRUGO | S_IWUSR, priv->debugfs, + &priv->inject_eccmask1); + debugfs_create_x16("inject_eccmask2", S_IRUGO | S_IWUSR, priv->debugfs, + &priv->inject_eccmask2); + debugfs_create_file("inject_enable", S_IWUSR, priv->debugfs, + &mci->dev, &i5100_inject_enable_fops); + + return 0; + +} + +static int i5100_init_one(struct pci_dev *pdev, const struct pci_device_id *id) { int rc; struct mem_ctl_info *mci; + struct edac_mc_layer layers[2]; struct i5100_priv *priv; - struct pci_dev *ch0mm, *ch1mm; + struct pci_dev *ch0mm, *ch1mm, *einj; int ret = 0; u32 dw; int ranksperch; @@ -944,19 +1060,43 @@ static int __devinit i5100_init_one(struct pci_dev *pdev, goto bail_ch1; } - mci = edac_mc_alloc(sizeof(*priv), ranksperch * 2, 1, 0); + layers[0].type = EDAC_MC_LAYER_CHANNEL; + layers[0].size = 2; + layers[0].is_virt_csrow = false; + layers[1].type = EDAC_MC_LAYER_SLOT; + layers[1].size = ranksperch; + layers[1].is_virt_csrow = true; + mci = edac_mc_alloc(0, ARRAY_SIZE(layers), layers, + sizeof(*priv)); if (!mci) { ret = -ENOMEM; goto bail_disable_ch1; } - mci->dev = &pdev->dev; + + /* device 19, func 0, Error injection */ + einj = pci_get_device_func(PCI_VENDOR_ID_INTEL, + PCI_DEVICE_ID_INTEL_5100_19, 0); + if (!einj) { + ret = -ENODEV; + goto bail_einj; + } + + rc = pci_enable_device(einj); + if (rc < 0) { + ret = rc; + goto bail_disable_einj; + } + + + mci->pdev = &pdev->dev; priv = mci->pvt_info; priv->ranksperchan = ranksperch; priv->mc = pdev; priv->ch0mm = ch0mm; priv->ch1mm = ch1mm; + priv->einj = einj; INIT_DELAYED_WORK(&(priv->i5100_scrubbing), i5100_refresh_scrubbing); @@ -984,6 +1124,13 @@ static int __devinit i5100_init_one(struct pci_dev *pdev, mci->set_sdram_scrub_rate = i5100_set_scrub_rate; mci->get_sdram_scrub_rate = i5100_get_scrub_rate; + priv->inject_channel = 0; + priv->inject_hlinesel = 0; + priv->inject_deviceptr1 = 0; + priv->inject_deviceptr2 = 0; + priv->inject_eccmask1 = 0; + priv->inject_eccmask2 = 0; + i5100_init_csrows(mci); /* this strange construction seems to be in every driver, dunno why */ @@ -1001,6 +1148,8 @@ static int __devinit i5100_init_one(struct pci_dev *pdev, goto bail_scrub; } + i5100_setup_debugfs(mci); + return ret; bail_scrub: @@ -1008,6 +1157,12 @@ bail_scrub: cancel_delayed_work_sync(&(priv->i5100_scrubbing)); edac_mc_free(mci); +bail_disable_einj: + pci_disable_device(einj); + +bail_einj: + pci_dev_put(einj); + bail_disable_ch1: pci_disable_device(ch1mm); @@ -1027,7 +1182,7 @@ bail: return ret; } -static void __devexit i5100_remove_one(struct pci_dev *pdev) +static void i5100_remove_one(struct pci_dev *pdev) { struct mem_ctl_info *mci; struct i5100_priv *priv; @@ -1039,19 +1194,23 @@ static void __devexit i5100_remove_one(struct pci_dev *pdev) priv = mci->pvt_info; + debugfs_remove_recursive(priv->debugfs); + priv->scrub_enable = 0; cancel_delayed_work_sync(&(priv->i5100_scrubbing)); pci_disable_device(pdev); pci_disable_device(priv->ch0mm); pci_disable_device(priv->ch1mm); + pci_disable_device(priv->einj); pci_dev_put(priv->ch0mm); pci_dev_put(priv->ch1mm); + pci_dev_put(priv->einj); edac_mc_free(mci); } -static const struct pci_device_id i5100_pci_tbl[] __devinitdata = { +static const struct pci_device_id i5100_pci_tbl[] = { /* Device 16, Function 0, Channel 0 Memory Map, Error Flag/Mask, ... */ { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_5100_16) }, { 0, } @@ -1061,7 +1220,7 @@ MODULE_DEVICE_TABLE(pci, i5100_pci_tbl); static struct pci_driver i5100_driver = { .name = KBUILD_BASENAME, .probe = i5100_init_one, - .remove = __devexit_p(i5100_remove_one), + .remove = i5100_remove_one, .id_table = i5100_pci_tbl, }; @@ -1069,13 +1228,16 @@ static int __init i5100_init(void) { int pci_rc; - pci_rc = pci_register_driver(&i5100_driver); + i5100_debugfs = debugfs_create_dir("i5100_edac", NULL); + pci_rc = pci_register_driver(&i5100_driver); return (pci_rc < 0) ? pci_rc : 0; } static void __exit i5100_exit(void) { + debugfs_remove(i5100_debugfs); + pci_unregister_driver(&i5100_driver); } diff --git a/drivers/edac/i5400_edac.c b/drivers/edac/i5400_edac.c index 74d6ec342af..6ef6ad1ba16 100644 --- a/drivers/edac/i5400_edac.c +++ b/drivers/edac/i5400_edac.c @@ -6,7 +6,7 @@ * * Copyright (c) 2008 by: * Ben Woodard <woodard@redhat.com> - * Mauro Carvalho Chehab <mchehab@redhat.com> + * Mauro Carvalho Chehab * * Red Hat Inc. http://www.redhat.com * @@ -18,6 +18,10 @@ * Intel 5400 Chipset Memory Controller Hub (MCH) - Datasheet * http://developer.intel.com/design/chipsets/datashts/313070.htm * + * This Memory Controller manages DDR2 FB-DIMMs. It has 2 branches, each with + * 2 channels operating in lockstep no-mirror mode. Each channel can have up to + * 4 dimm's, each with up to 8GB. + * */ #include <linux/module.h> @@ -44,12 +48,10 @@ edac_mc_chipset_printk(mci, level, "i5400", fmt, ##arg) /* Limits for i5400 */ -#define NUM_MTRS_PER_BRANCH 4 +#define MAX_BRANCHES 2 #define CHANNELS_PER_BRANCH 2 -#define MAX_DIMMS_PER_CHANNEL NUM_MTRS_PER_BRANCH -#define MAX_CHANNELS 4 -/* max possible csrows per channel */ -#define MAX_CSROWS (MAX_DIMMS_PER_CHANNEL) +#define DIMMS_PER_CHANNEL 4 +#define MAX_CHANNELS (MAX_BRANCHES * CHANNELS_PER_BRANCH) /* Device 16, * Function 0: System Address @@ -298,24 +300,6 @@ static inline int extract_fbdchan_indx(u32 x) return (x>>28) & 0x3; } -#ifdef CONFIG_EDAC_DEBUG -/* MTR NUMROW */ -static const char *numrow_toString[] = { - "8,192 - 13 rows", - "16,384 - 14 rows", - "32,768 - 15 rows", - "65,536 - 16 rows" -}; - -/* MTR NUMCOL */ -static const char *numcol_toString[] = { - "1,024 - 10 columns", - "2,048 - 11 columns", - "4,096 - 12 columns", - "reserved" -}; -#endif - /* Device name and register DID (Device ID) */ struct i5400_dev_info { const char *ctl_name; /* name for this device */ @@ -343,20 +327,26 @@ struct i5400_pvt { struct pci_dev *branch_1; /* 22.0 */ u16 tolm; /* top of low memory */ - u64 ambase; /* AMB BAR */ + union { + u64 ambase; /* AMB BAR */ + struct { + u32 ambase_bottom; + u32 ambase_top; + } u __packed; + }; u16 mir0, mir1; - u16 b0_mtr[NUM_MTRS_PER_BRANCH]; /* Memory Technlogy Reg */ + u16 b0_mtr[DIMMS_PER_CHANNEL]; /* Memory Technlogy Reg */ u16 b0_ambpresent0; /* Branch 0, Channel 0 */ u16 b0_ambpresent1; /* Brnach 0, Channel 1 */ - u16 b1_mtr[NUM_MTRS_PER_BRANCH]; /* Memory Technlogy Reg */ + u16 b1_mtr[DIMMS_PER_CHANNEL]; /* Memory Technlogy Reg */ u16 b1_ambpresent0; /* Branch 1, Channel 8 */ u16 b1_ambpresent1; /* Branch 1, Channel 1 */ /* DIMM information matrix, allocating architecture maximums */ - struct i5400_dimm_info dimm_info[MAX_CSROWS][MAX_CHANNELS]; + struct i5400_dimm_info dimm_info[DIMMS_PER_CHANNEL][MAX_CHANNELS]; /* Actual values for this controller */ int maxch; /* Max channels */ @@ -532,13 +522,15 @@ static void i5400_proccess_non_recoverable_info(struct mem_ctl_info *mci, int ras, cas; int errnum; char *type = NULL; + enum hw_event_mc_err_type tp_event = HW_EVENT_ERR_UNCORRECTED; if (!allErrors) return; /* if no error, return now */ - if (allErrors & ERROR_FAT_MASK) + if (allErrors & ERROR_FAT_MASK) { type = "FATAL"; - else if (allErrors & FERR_NF_UNCORRECTABLE) + tp_event = HW_EVENT_ERR_FATAL; + } else if (allErrors & FERR_NF_UNCORRECTABLE) type = "NON-FATAL uncorrected"; else type = "NON-FATAL recoverable"; @@ -556,23 +548,22 @@ static void i5400_proccess_non_recoverable_info(struct mem_ctl_info *mci, ras = nrec_ras(info); cas = nrec_cas(info); - debugf0("\t\tCSROW= %d Channels= %d,%d (Branch= %d " - "DRAM Bank= %d Buffer ID = %d rdwr= %s ras= %d cas= %d)\n", - rank, channel, channel + 1, branch >> 1, bank, - buf_id, rdwr_str(rdwr), ras, cas); + edac_dbg(0, "\t\tDIMM= %d Channels= %d,%d (Branch= %d DRAM Bank= %d Buffer ID = %d rdwr= %s ras= %d cas= %d)\n", + rank, channel, channel + 1, branch >> 1, bank, + buf_id, rdwr_str(rdwr), ras, cas); /* Only 1 bit will be on */ errnum = find_first_bit(&allErrors, ARRAY_SIZE(error_name)); /* Form out message */ snprintf(msg, sizeof(msg), - "%s (Branch=%d DRAM-Bank=%d Buffer ID = %d RDWR=%s " - "RAS=%d CAS=%d %s Err=0x%lx (%s))", - type, branch >> 1, bank, buf_id, rdwr_str(rdwr), ras, cas, - type, allErrors, error_name[errnum]); + "Bank=%d Buffer ID = %d RAS=%d CAS=%d Err=0x%lx (%s)", + bank, buf_id, ras, cas, allErrors, error_name[errnum]); - /* Call the helper to output message */ - edac_mc_handle_fbd_ue(mci, rank, channel, channel + 1, msg); + edac_mc_handle_error(tp_event, mci, 1, 0, 0, 0, + branch >> 1, -1, rank, + rdwr ? "Write error" : "Read error", + msg); } /* @@ -609,7 +600,7 @@ static void i5400_process_nonfatal_error_info(struct mem_ctl_info *mci, /* Correctable errors */ if (allErrors & ERROR_NF_CORRECTABLE) { - debugf0("\tCorrected bits= 0x%lx\n", allErrors); + edac_dbg(0, "\tCorrected bits= 0x%lx\n", allErrors); branch = extract_fbdchan_indx(info->ferr_nf_fbd); @@ -630,10 +621,9 @@ static void i5400_process_nonfatal_error_info(struct mem_ctl_info *mci, /* Only 1 bit will be on */ errnum = find_first_bit(&allErrors, ARRAY_SIZE(error_name)); - debugf0("\t\tCSROW= %d Channel= %d (Branch %d " - "DRAM Bank= %d rdwr= %s ras= %d cas= %d)\n", - rank, channel, branch >> 1, bank, - rdwr_str(rdwr), ras, cas); + edac_dbg(0, "\t\tDIMM= %d Channel= %d (Branch %d DRAM Bank= %d rdwr= %s ras= %d cas= %d)\n", + rank, channel, branch >> 1, bank, + rdwr_str(rdwr), ras, cas); /* Form out message */ snprintf(msg, sizeof(msg), @@ -642,8 +632,10 @@ static void i5400_process_nonfatal_error_info(struct mem_ctl_info *mci, branch >> 1, bank, rdwr_str(rdwr), ras, cas, allErrors, error_name[errnum]); - /* Call the helper to output message */ - edac_mc_handle_fbd_ce(mci, rank, channel, msg); + edac_mc_handle_error(HW_EVENT_ERR_CORRECTED, mci, 1, 0, 0, 0, + branch >> 1, channel % 2, rank, + rdwr ? "Write error" : "Read error", + msg); return; } @@ -694,7 +686,7 @@ static void i5400_clear_error(struct mem_ctl_info *mci) static void i5400_check_error(struct mem_ctl_info *mci) { struct i5400_error_info info; - debugf4("MC%d: %s: %s()\n", mci->mc_idx, __FILE__, __func__); + edac_dbg(4, "MC%d\n", mci->mc_idx); i5400_get_error_info(mci, &info); i5400_process_error_info(mci, &info); } @@ -735,7 +727,7 @@ static int i5400_get_devices(struct mem_ctl_info *mci, int dev_idx) /* Attempt to 'get' the MCH register we want */ pdev = NULL; - while (!pvt->branchmap_werrors || !pvt->fsb_error_regs) { + while (1) { pdev = pci_get_device(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_5400_ERR, pdev); if (!pdev) { @@ -743,33 +735,53 @@ static int i5400_get_devices(struct mem_ctl_info *mci, int dev_idx) i5400_printk(KERN_ERR, "'system address,Process Bus' " "device not found:" - "vendor 0x%x device 0x%x ERR funcs " + "vendor 0x%x device 0x%x ERR func 1 " "(broken BIOS?)\n", PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_5400_ERR); - goto error; + return -ENODEV; } - /* Store device 16 funcs 1 and 2 */ - switch (PCI_FUNC(pdev->devfn)) { - case 1: - pvt->branchmap_werrors = pdev; + /* Store device 16 func 1 */ + if (PCI_FUNC(pdev->devfn) == 1) break; - case 2: - pvt->fsb_error_regs = pdev; - break; - } } + pvt->branchmap_werrors = pdev; - debugf1("System Address, processor bus- PCI Bus ID: %s %x:%x\n", - pci_name(pvt->system_address), - pvt->system_address->vendor, pvt->system_address->device); - debugf1("Branchmap, control and errors - PCI Bus ID: %s %x:%x\n", - pci_name(pvt->branchmap_werrors), - pvt->branchmap_werrors->vendor, pvt->branchmap_werrors->device); - debugf1("FSB Error Regs - PCI Bus ID: %s %x:%x\n", - pci_name(pvt->fsb_error_regs), - pvt->fsb_error_regs->vendor, pvt->fsb_error_regs->device); + pdev = NULL; + while (1) { + pdev = pci_get_device(PCI_VENDOR_ID_INTEL, + PCI_DEVICE_ID_INTEL_5400_ERR, pdev); + if (!pdev) { + /* End of list, leave */ + i5400_printk(KERN_ERR, + "'system address,Process Bus' " + "device not found:" + "vendor 0x%x device 0x%x ERR func 2 " + "(broken BIOS?)\n", + PCI_VENDOR_ID_INTEL, + PCI_DEVICE_ID_INTEL_5400_ERR); + + pci_dev_put(pvt->branchmap_werrors); + return -ENODEV; + } + + /* Store device 16 func 2 */ + if (PCI_FUNC(pdev->devfn) == 2) + break; + } + pvt->fsb_error_regs = pdev; + + edac_dbg(1, "System Address, processor bus- PCI Bus ID: %s %x:%x\n", + pci_name(pvt->system_address), + pvt->system_address->vendor, pvt->system_address->device); + edac_dbg(1, "Branchmap, control and errors - PCI Bus ID: %s %x:%x\n", + pci_name(pvt->branchmap_werrors), + pvt->branchmap_werrors->vendor, + pvt->branchmap_werrors->device); + edac_dbg(1, "FSB Error Regs - PCI Bus ID: %s %x:%x\n", + pci_name(pvt->fsb_error_regs), + pvt->fsb_error_regs->vendor, pvt->fsb_error_regs->device); pvt->branch_0 = pci_get_device(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_5400_FBD0, NULL); @@ -778,7 +790,10 @@ static int i5400_get_devices(struct mem_ctl_info *mci, int dev_idx) "MC: 'BRANCH 0' device not found:" "vendor 0x%x device 0x%x Func 0 (broken BIOS?)\n", PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_5400_FBD0); - goto error; + + pci_dev_put(pvt->fsb_error_regs); + pci_dev_put(pvt->branchmap_werrors); + return -ENODEV; } /* If this device claims to have more than 2 channels then @@ -796,21 +811,21 @@ static int i5400_get_devices(struct mem_ctl_info *mci, int dev_idx) "(broken BIOS?)\n", PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_5400_FBD1); - goto error; + + pci_dev_put(pvt->branch_0); + pci_dev_put(pvt->fsb_error_regs); + pci_dev_put(pvt->branchmap_werrors); + return -ENODEV; } return 0; - -error: - i5400_put_devices(mci); - return -ENODEV; } /* * determine_amb_present * - * the information is contained in NUM_MTRS_PER_BRANCH different - * registers determining which of the NUM_MTRS_PER_BRANCH requires + * the information is contained in DIMMS_PER_CHANNEL different + * registers determining which of the DIMMS_PER_CHANNEL requires * knowing which channel is in question * * 2 branches, each with 2 channels @@ -839,11 +854,11 @@ static int determine_amb_present_reg(struct i5400_pvt *pvt, int channel) } /* - * determine_mtr(pvt, csrow, channel) + * determine_mtr(pvt, dimm, channel) * - * return the proper MTR register as determine by the csrow and desired channel + * return the proper MTR register as determine by the dimm and desired channel */ -static int determine_mtr(struct i5400_pvt *pvt, int csrow, int channel) +static int determine_mtr(struct i5400_pvt *pvt, int dimm, int channel) { int mtr; int n; @@ -851,11 +866,11 @@ static int determine_mtr(struct i5400_pvt *pvt, int csrow, int channel) /* There is one MTR for each slot pair of FB-DIMMs, Each slot pair may be at branch 0 or branch 1. */ - n = csrow; + n = dimm; - if (n >= NUM_MTRS_PER_BRANCH) { - debugf0("ERROR: trying to access an invalid csrow: %d\n", - csrow); + if (n >= DIMMS_PER_CHANNEL) { + edac_dbg(0, "ERROR: trying to access an invalid dimm: %d\n", + dimm); return 0; } @@ -875,35 +890,44 @@ static void decode_mtr(int slot_row, u16 mtr) ans = MTR_DIMMS_PRESENT(mtr); - debugf2("\tMTR%d=0x%x: DIMMs are %s\n", slot_row, mtr, - ans ? "Present" : "NOT Present"); + edac_dbg(2, "\tMTR%d=0x%x: DIMMs are %sPresent\n", + slot_row, mtr, ans ? "" : "NOT "); if (!ans) return; - debugf2("\t\tWIDTH: x%d\n", MTR_DRAM_WIDTH(mtr)); - - debugf2("\t\tELECTRICAL THROTTLING is %s\n", - MTR_DIMMS_ETHROTTLE(mtr) ? "enabled" : "disabled"); - - debugf2("\t\tNUMBANK: %d bank(s)\n", MTR_DRAM_BANKS(mtr)); - debugf2("\t\tNUMRANK: %s\n", MTR_DIMM_RANK(mtr) ? "double" : "single"); - debugf2("\t\tNUMROW: %s\n", numrow_toString[MTR_DIMM_ROWS(mtr)]); - debugf2("\t\tNUMCOL: %s\n", numcol_toString[MTR_DIMM_COLS(mtr)]); + edac_dbg(2, "\t\tWIDTH: x%d\n", MTR_DRAM_WIDTH(mtr)); + + edac_dbg(2, "\t\tELECTRICAL THROTTLING is %s\n", + MTR_DIMMS_ETHROTTLE(mtr) ? "enabled" : "disabled"); + + edac_dbg(2, "\t\tNUMBANK: %d bank(s)\n", MTR_DRAM_BANKS(mtr)); + edac_dbg(2, "\t\tNUMRANK: %s\n", + MTR_DIMM_RANK(mtr) ? "double" : "single"); + edac_dbg(2, "\t\tNUMROW: %s\n", + MTR_DIMM_ROWS(mtr) == 0 ? "8,192 - 13 rows" : + MTR_DIMM_ROWS(mtr) == 1 ? "16,384 - 14 rows" : + MTR_DIMM_ROWS(mtr) == 2 ? "32,768 - 15 rows" : + "65,536 - 16 rows"); + edac_dbg(2, "\t\tNUMCOL: %s\n", + MTR_DIMM_COLS(mtr) == 0 ? "1,024 - 10 columns" : + MTR_DIMM_COLS(mtr) == 1 ? "2,048 - 11 columns" : + MTR_DIMM_COLS(mtr) == 2 ? "4,096 - 12 columns" : + "reserved"); } -static void handle_channel(struct i5400_pvt *pvt, int csrow, int channel, +static void handle_channel(struct i5400_pvt *pvt, int dimm, int channel, struct i5400_dimm_info *dinfo) { int mtr; int amb_present_reg; int addrBits; - mtr = determine_mtr(pvt, csrow, channel); + mtr = determine_mtr(pvt, dimm, channel); if (MTR_DIMMS_PRESENT(mtr)) { amb_present_reg = determine_amb_present_reg(pvt, channel); /* Determine if there is a DIMM present in this DIMM slot */ - if (amb_present_reg & (1 << csrow)) { + if (amb_present_reg & (1 << dimm)) { /* Start with the number of bits for a Bank * on the DRAM */ addrBits = MTR_DRAM_BANKS_ADDR_BITS(mtr); @@ -932,10 +956,10 @@ static void handle_channel(struct i5400_pvt *pvt, int csrow, int channel, static void calculate_dimm_size(struct i5400_pvt *pvt) { struct i5400_dimm_info *dinfo; - int csrow, max_csrows; + int dimm, max_dimms; char *p, *mem_buffer; int space, n; - int channel; + int channel, branch; /* ================= Generate some debug output ================= */ space = PAGE_SIZE; @@ -946,52 +970,52 @@ static void calculate_dimm_size(struct i5400_pvt *pvt) return; } - /* Scan all the actual CSROWS + /* Scan all the actual DIMMS * and calculate the information for each DIMM - * Start with the highest csrow first, to display it first - * and work toward the 0th csrow + * Start with the highest dimm first, to display it first + * and work toward the 0th dimm */ - max_csrows = pvt->maxdimmperch; - for (csrow = max_csrows - 1; csrow >= 0; csrow--) { + max_dimms = pvt->maxdimmperch; + for (dimm = max_dimms - 1; dimm >= 0; dimm--) { - /* on an odd csrow, first output a 'boundary' marker, + /* on an odd dimm, first output a 'boundary' marker, * then reset the message buffer */ - if (csrow & 0x1) { + if (dimm & 0x1) { n = snprintf(p, space, "---------------------------" - "--------------------------------"); + "-------------------------------"); p += n; space -= n; - debugf2("%s\n", mem_buffer); + edac_dbg(2, "%s\n", mem_buffer); p = mem_buffer; space = PAGE_SIZE; } - n = snprintf(p, space, "csrow %2d ", csrow); + n = snprintf(p, space, "dimm %2d ", dimm); p += n; space -= n; for (channel = 0; channel < pvt->maxch; channel++) { - dinfo = &pvt->dimm_info[csrow][channel]; - handle_channel(pvt, csrow, channel, dinfo); + dinfo = &pvt->dimm_info[dimm][channel]; + handle_channel(pvt, dimm, channel, dinfo); n = snprintf(p, space, "%4d MB | ", dinfo->megabytes); p += n; space -= n; } - debugf2("%s\n", mem_buffer); + edac_dbg(2, "%s\n", mem_buffer); p = mem_buffer; space = PAGE_SIZE; } /* Output the last bottom 'boundary' marker */ n = snprintf(p, space, "---------------------------" - "--------------------------------"); + "-------------------------------"); p += n; space -= n; - debugf2("%s\n", mem_buffer); + edac_dbg(2, "%s\n", mem_buffer); p = mem_buffer; space = PAGE_SIZE; /* now output the 'channel' labels */ - n = snprintf(p, space, " "); + n = snprintf(p, space, " "); p += n; space -= n; for (channel = 0; channel < pvt->maxch; channel++) { @@ -1000,8 +1024,21 @@ static void calculate_dimm_size(struct i5400_pvt *pvt) space -= n; } + space -= n; + edac_dbg(2, "%s\n", mem_buffer); + p = mem_buffer; + space = PAGE_SIZE; + + n = snprintf(p, space, " "); + p += n; + for (branch = 0; branch < MAX_BRANCHES; branch++) { + n = snprintf(p, space, " branch %d | ", branch); + p += n; + space -= n; + } + /* output the last message and free buffer */ - debugf2("%s\n", mem_buffer); + edac_dbg(2, "%s\n", mem_buffer); kfree(mem_buffer); } @@ -1024,25 +1061,25 @@ static void i5400_get_mc_regs(struct mem_ctl_info *mci) pvt = mci->pvt_info; pci_read_config_dword(pvt->system_address, AMBASE, - (u32 *) &pvt->ambase); + &pvt->u.ambase_bottom); pci_read_config_dword(pvt->system_address, AMBASE + sizeof(u32), - ((u32 *) &pvt->ambase) + sizeof(u32)); + &pvt->u.ambase_top); maxdimmperch = pvt->maxdimmperch; maxch = pvt->maxch; - debugf2("AMBASE= 0x%lx MAXCH= %d MAX-DIMM-Per-CH= %d\n", - (long unsigned int)pvt->ambase, pvt->maxch, pvt->maxdimmperch); + edac_dbg(2, "AMBASE= 0x%lx MAXCH= %d MAX-DIMM-Per-CH= %d\n", + (long unsigned int)pvt->ambase, pvt->maxch, pvt->maxdimmperch); /* Get the Branch Map regs */ pci_read_config_word(pvt->branchmap_werrors, TOLM, &pvt->tolm); pvt->tolm >>= 12; - debugf2("\nTOLM (number of 256M regions) =%u (0x%x)\n", pvt->tolm, - pvt->tolm); + edac_dbg(2, "\nTOLM (number of 256M regions) =%u (0x%x)\n", + pvt->tolm, pvt->tolm); actual_tolm = (u32) ((1000l * pvt->tolm) >> (30 - 28)); - debugf2("Actual TOLM byte addr=%u.%03u GB (0x%x)\n", - actual_tolm/1000, actual_tolm % 1000, pvt->tolm << 28); + edac_dbg(2, "Actual TOLM byte addr=%u.%03u GB (0x%x)\n", + actual_tolm/1000, actual_tolm % 1000, pvt->tolm << 28); pci_read_config_word(pvt->branchmap_werrors, MIR0, &pvt->mir0); pci_read_config_word(pvt->branchmap_werrors, MIR1, &pvt->mir1); @@ -1051,22 +1088,24 @@ static void i5400_get_mc_regs(struct mem_ctl_info *mci) limit = (pvt->mir0 >> 4) & 0x0fff; way0 = pvt->mir0 & 0x1; way1 = pvt->mir0 & 0x2; - debugf2("MIR0: limit= 0x%x WAY1= %u WAY0= %x\n", limit, way1, way0); + edac_dbg(2, "MIR0: limit= 0x%x WAY1= %u WAY0= %x\n", + limit, way1, way0); limit = (pvt->mir1 >> 4) & 0xfff; way0 = pvt->mir1 & 0x1; way1 = pvt->mir1 & 0x2; - debugf2("MIR1: limit= 0x%x WAY1= %u WAY0= %x\n", limit, way1, way0); + edac_dbg(2, "MIR1: limit= 0x%x WAY1= %u WAY0= %x\n", + limit, way1, way0); /* Get the set of MTR[0-3] regs by each branch */ - for (slot_row = 0; slot_row < NUM_MTRS_PER_BRANCH; slot_row++) { + for (slot_row = 0; slot_row < DIMMS_PER_CHANNEL; slot_row++) { int where = MTR0 + (slot_row * sizeof(u16)); /* Branch 0 set of MTR registers */ pci_read_config_word(pvt->branch_0, where, &pvt->b0_mtr[slot_row]); - debugf2("MTR%d where=0x%x B0 value=0x%x\n", slot_row, where, - pvt->b0_mtr[slot_row]); + edac_dbg(2, "MTR%d where=0x%x B0 value=0x%x\n", + slot_row, where, pvt->b0_mtr[slot_row]); if (pvt->maxch < CHANNELS_PER_BRANCH) { pvt->b1_mtr[slot_row] = 0; @@ -1076,22 +1115,22 @@ static void i5400_get_mc_regs(struct mem_ctl_info *mci) /* Branch 1 set of MTR registers */ pci_read_config_word(pvt->branch_1, where, &pvt->b1_mtr[slot_row]); - debugf2("MTR%d where=0x%x B1 value=0x%x\n", slot_row, where, - pvt->b1_mtr[slot_row]); + edac_dbg(2, "MTR%d where=0x%x B1 value=0x%x\n", + slot_row, where, pvt->b1_mtr[slot_row]); } /* Read and dump branch 0's MTRs */ - debugf2("\nMemory Technology Registers:\n"); - debugf2(" Branch 0:\n"); - for (slot_row = 0; slot_row < NUM_MTRS_PER_BRANCH; slot_row++) + edac_dbg(2, "Memory Technology Registers:\n"); + edac_dbg(2, " Branch 0:\n"); + for (slot_row = 0; slot_row < DIMMS_PER_CHANNEL; slot_row++) decode_mtr(slot_row, pvt->b0_mtr[slot_row]); pci_read_config_word(pvt->branch_0, AMBPRESENT_0, &pvt->b0_ambpresent0); - debugf2("\t\tAMB-Branch 0-present0 0x%x:\n", pvt->b0_ambpresent0); + edac_dbg(2, "\t\tAMB-Branch 0-present0 0x%x:\n", pvt->b0_ambpresent0); pci_read_config_word(pvt->branch_0, AMBPRESENT_1, &pvt->b0_ambpresent1); - debugf2("\t\tAMB-Branch 0-present1 0x%x:\n", pvt->b0_ambpresent1); + edac_dbg(2, "\t\tAMB-Branch 0-present1 0x%x:\n", pvt->b0_ambpresent1); /* Only if we have 2 branchs (4 channels) */ if (pvt->maxch < CHANNELS_PER_BRANCH) { @@ -1099,18 +1138,18 @@ static void i5400_get_mc_regs(struct mem_ctl_info *mci) pvt->b1_ambpresent1 = 0; } else { /* Read and dump branch 1's MTRs */ - debugf2(" Branch 1:\n"); - for (slot_row = 0; slot_row < NUM_MTRS_PER_BRANCH; slot_row++) + edac_dbg(2, " Branch 1:\n"); + for (slot_row = 0; slot_row < DIMMS_PER_CHANNEL; slot_row++) decode_mtr(slot_row, pvt->b1_mtr[slot_row]); pci_read_config_word(pvt->branch_1, AMBPRESENT_0, &pvt->b1_ambpresent0); - debugf2("\t\tAMB-Branch 1-present0 0x%x:\n", - pvt->b1_ambpresent0); + edac_dbg(2, "\t\tAMB-Branch 1-present0 0x%x:\n", + pvt->b1_ambpresent0); pci_read_config_word(pvt->branch_1, AMBPRESENT_1, &pvt->b1_ambpresent1); - debugf2("\t\tAMB-Branch 1-present1 0x%x:\n", - pvt->b1_ambpresent1); + edac_dbg(2, "\t\tAMB-Branch 1-present1 0x%x:\n", + pvt->b1_ambpresent1); } /* Go and determine the size of each DIMM and place in an @@ -1119,7 +1158,7 @@ static void i5400_get_mc_regs(struct mem_ctl_info *mci) } /* - * i5400_init_csrows Initialize the 'csrows' table within + * i5400_init_dimms Initialize the 'dimms' table within * the mci control structure with the * addressing of memory. * @@ -1127,64 +1166,67 @@ static void i5400_get_mc_regs(struct mem_ctl_info *mci) * 0 success * 1 no actual memory found on this MC */ -static int i5400_init_csrows(struct mem_ctl_info *mci) +static int i5400_init_dimms(struct mem_ctl_info *mci) { struct i5400_pvt *pvt; - struct csrow_info *p_csrow; - int empty, channel_count; - int max_csrows; + struct dimm_info *dimm; + int ndimms, channel_count; + int max_dimms; int mtr; - int csrow_megs; - int channel; - int csrow; + int size_mb; + int channel, slot; pvt = mci->pvt_info; channel_count = pvt->maxch; - max_csrows = pvt->maxdimmperch; - - empty = 1; /* Assume NO memory */ - - for (csrow = 0; csrow < max_csrows; csrow++) { - p_csrow = &mci->csrows[csrow]; - - p_csrow->csrow_idx = csrow; - - /* use branch 0 for the basis */ - mtr = determine_mtr(pvt, csrow, 0); - - /* if no DIMMS on this row, continue */ - if (!MTR_DIMMS_PRESENT(mtr)) - continue; + max_dimms = pvt->maxdimmperch; - /* FAKE OUT VALUES, FIXME */ - p_csrow->first_page = 0 + csrow * 20; - p_csrow->last_page = 9 + csrow * 20; - p_csrow->page_mask = 0xFFF; + ndimms = 0; - p_csrow->grain = 8; - - csrow_megs = 0; - for (channel = 0; channel < pvt->maxch; channel++) - csrow_megs += pvt->dimm_info[csrow][channel].megabytes; - - p_csrow->nr_pages = csrow_megs << 8; - - /* Assume DDR2 for now */ - p_csrow->mtype = MEM_FB_DDR2; - - /* ask what device type on this row */ - if (MTR_DRAM_WIDTH(mtr)) - p_csrow->dtype = DEV_X8; - else - p_csrow->dtype = DEV_X4; - - p_csrow->edac_mode = EDAC_S8ECD8ED; - - empty = 0; + /* + * FIXME: remove pvt->dimm_info[slot][channel] and use the 3 + * layers here. + */ + for (channel = 0; channel < mci->layers[0].size * mci->layers[1].size; + channel++) { + for (slot = 0; slot < mci->layers[2].size; slot++) { + mtr = determine_mtr(pvt, slot, channel); + + /* if no DIMMS on this slot, continue */ + if (!MTR_DIMMS_PRESENT(mtr)) + continue; + + dimm = EDAC_DIMM_PTR(mci->layers, mci->dimms, mci->n_layers, + channel / 2, channel % 2, slot); + + size_mb = pvt->dimm_info[slot][channel].megabytes; + + edac_dbg(2, "dimm (branch %d channel %d slot %d): %d.%03d GB\n", + channel / 2, channel % 2, slot, + size_mb / 1000, size_mb % 1000); + + dimm->nr_pages = size_mb << 8; + dimm->grain = 8; + dimm->dtype = MTR_DRAM_WIDTH(mtr) ? DEV_X8 : DEV_X4; + dimm->mtype = MEM_FB_DDR2; + /* + * The eccc mechanism is SDDC (aka SECC), with + * is similar to Chipkill. + */ + dimm->edac_mode = MTR_DRAM_WIDTH(mtr) ? + EDAC_S8ECD8ED : EDAC_S4ECD4ED; + ndimms++; + } } - return empty; + /* + * When just one memory is provided, it should be at location (0,0,0). + * With such single-DIMM mode, the SDCC algorithm degrades to SECDEC+. + */ + if (ndimms == 1) + mci->dimms[0]->edac_mode = EDAC_SECDED; + + return (ndimms == 0); } /* @@ -1220,50 +1262,45 @@ static int i5400_probe1(struct pci_dev *pdev, int dev_idx) { struct mem_ctl_info *mci; struct i5400_pvt *pvt; - int num_channels; - int num_dimms_per_channel; - int num_csrows; + struct edac_mc_layer layers[3]; if (dev_idx >= ARRAY_SIZE(i5400_devs)) return -EINVAL; - debugf0("MC: %s: %s(), pdev bus %u dev=0x%x fn=0x%x\n", - __FILE__, __func__, - pdev->bus->number, - PCI_SLOT(pdev->devfn), PCI_FUNC(pdev->devfn)); + edac_dbg(0, "MC: pdev bus %u dev=0x%x fn=0x%x\n", + pdev->bus->number, + PCI_SLOT(pdev->devfn), PCI_FUNC(pdev->devfn)); /* We only are looking for func 0 of the set */ if (PCI_FUNC(pdev->devfn) != 0) return -ENODEV; - /* As we don't have a motherboard identification routine to determine - * actual number of slots/dimms per channel, we thus utilize the - * resource as specified by the chipset. Thus, we might have - * have more DIMMs per channel than actually on the mobo, but this - * allows the driver to support up to the chipset max, without - * some fancy mobo determination. + /* + * allocate a new MC control structure + * + * This drivers uses the DIMM slot as "csrow" and the rest as "channel". */ - num_dimms_per_channel = MAX_DIMMS_PER_CHANNEL; - num_channels = MAX_CHANNELS; - num_csrows = num_dimms_per_channel; - - debugf0("MC: %s(): Number of - Channels= %d DIMMS= %d CSROWS= %d\n", - __func__, num_channels, num_dimms_per_channel, num_csrows); - - /* allocate a new MC control structure */ - mci = edac_mc_alloc(sizeof(*pvt), num_csrows, num_channels, 0); - + layers[0].type = EDAC_MC_LAYER_BRANCH; + layers[0].size = MAX_BRANCHES; + layers[0].is_virt_csrow = false; + layers[1].type = EDAC_MC_LAYER_CHANNEL; + layers[1].size = CHANNELS_PER_BRANCH; + layers[1].is_virt_csrow = false; + layers[2].type = EDAC_MC_LAYER_SLOT; + layers[2].size = DIMMS_PER_CHANNEL; + layers[2].is_virt_csrow = true; + mci = edac_mc_alloc(0, ARRAY_SIZE(layers), layers, sizeof(*pvt)); if (mci == NULL) return -ENOMEM; - debugf0("MC: %s: %s(): mci = %p\n", __FILE__, __func__, mci); + edac_dbg(0, "MC: mci = %p\n", mci); - mci->dev = &pdev->dev; /* record ptr to the generic device */ + mci->pdev = &pdev->dev; /* record ptr to the generic device */ pvt = mci->pvt_info; pvt->system_address = pdev; /* Record this device in our private */ - pvt->maxch = num_channels; - pvt->maxdimmperch = num_dimms_per_channel; + pvt->maxch = MAX_CHANNELS; + pvt->maxdimmperch = DIMMS_PER_CHANNEL; /* 'get' the pci devices we want to reserve for our use */ if (i5400_get_devices(mci, dev_idx)) @@ -1285,22 +1322,19 @@ static int i5400_probe1(struct pci_dev *pdev, int dev_idx) /* Set the function pointer to an actual operation function */ mci->edac_check = i5400_check_error; - /* initialize the MC control structure 'csrows' table + /* initialize the MC control structure 'dimms' table * with the mapping and control information */ - if (i5400_init_csrows(mci)) { - debugf0("MC: Setting mci->edac_cap to EDAC_FLAG_NONE\n" - " because i5400_init_csrows() returned nonzero " - "value\n"); - mci->edac_cap = EDAC_FLAG_NONE; /* no csrows found */ + if (i5400_init_dimms(mci)) { + edac_dbg(0, "MC: Setting mci->edac_cap to EDAC_FLAG_NONE because i5400_init_dimms() returned nonzero value\n"); + mci->edac_cap = EDAC_FLAG_NONE; /* no dimms found */ } else { - debugf1("MC: Enable error reporting now\n"); + edac_dbg(1, "MC: Enable error reporting now\n"); i5400_enable_error_reporting(mci); } /* add this new MC control structure to EDAC's list of MCs */ if (edac_mc_add_mc(mci)) { - debugf0("MC: %s: %s(): failed edac_mc_add_mc()\n", - __FILE__, __func__); + edac_dbg(0, "MC: failed edac_mc_add_mc()\n"); /* FIXME: perhaps some code should go here that disables error * reporting if we just enabled it */ @@ -1339,12 +1373,11 @@ fail0: * negative on error * count (>= 0) */ -static int __devinit i5400_init_one(struct pci_dev *pdev, - const struct pci_device_id *id) +static int i5400_init_one(struct pci_dev *pdev, const struct pci_device_id *id) { int rc; - debugf0("MC: %s: %s()\n", __FILE__, __func__); + edac_dbg(0, "MC:\n"); /* wake up device */ rc = pci_enable_device(pdev); @@ -1359,11 +1392,11 @@ static int __devinit i5400_init_one(struct pci_dev *pdev, * i5400_remove_one destructor for one instance of device * */ -static void __devexit i5400_remove_one(struct pci_dev *pdev) +static void i5400_remove_one(struct pci_dev *pdev) { struct mem_ctl_info *mci; - debugf0("%s: %s()\n", __FILE__, __func__); + edac_dbg(0, "\n"); if (i5400_pci) edac_pci_release_generic_ctl(i5400_pci); @@ -1375,6 +1408,8 @@ static void __devexit i5400_remove_one(struct pci_dev *pdev) /* retrieve references to resources, and free those resources */ i5400_put_devices(mci); + pci_disable_device(pdev); + edac_mc_free(mci); } @@ -1383,7 +1418,7 @@ static void __devexit i5400_remove_one(struct pci_dev *pdev) * * The "E500P" device is the first device supported. */ -static const struct pci_device_id i5400_pci_tbl[] __devinitdata = { +static const struct pci_device_id i5400_pci_tbl[] = { {PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_5400_ERR)}, {0,} /* 0 terminated list. */ }; @@ -1397,7 +1432,7 @@ MODULE_DEVICE_TABLE(pci, i5400_pci_tbl); static struct pci_driver i5400_driver = { .name = "i5400_edac", .probe = i5400_init_one, - .remove = __devexit_p(i5400_remove_one), + .remove = i5400_remove_one, .id_table = i5400_pci_tbl, }; @@ -1409,7 +1444,7 @@ static int __init i5400_init(void) { int pci_rc; - debugf2("MC: %s: %s()\n", __FILE__, __func__); + edac_dbg(2, "MC:\n"); /* Ensure that the OPSTATE is set correctly for POLL or NMI */ opstate_init(); @@ -1425,7 +1460,7 @@ static int __init i5400_init(void) */ static void __exit i5400_exit(void) { - debugf2("MC: %s: %s()\n", __FILE__, __func__); + edac_dbg(2, "MC:\n"); pci_unregister_driver(&i5400_driver); } @@ -1434,7 +1469,7 @@ module_exit(i5400_exit); MODULE_LICENSE("GPL"); MODULE_AUTHOR("Ben Woodard <woodard@redhat.com>"); -MODULE_AUTHOR("Mauro Carvalho Chehab <mchehab@redhat.com>"); +MODULE_AUTHOR("Mauro Carvalho Chehab"); MODULE_AUTHOR("Red Hat Inc. (http://www.redhat.com)"); MODULE_DESCRIPTION("MC Driver for Intel I5400 memory controllers - " I5400_REVISION); diff --git a/drivers/edac/i7300_edac.c b/drivers/edac/i7300_edac.c index 6104dba380b..dcac982fdc7 100644 --- a/drivers/edac/i7300_edac.c +++ b/drivers/edac/i7300_edac.c @@ -5,7 +5,7 @@ * GNU General Public License version 2 only. * * Copyright (c) 2010 by: - * Mauro Carvalho Chehab <mchehab@redhat.com> + * Mauro Carvalho Chehab * * Red Hat Inc. http://www.redhat.com * @@ -182,24 +182,6 @@ static const u16 mtr_regs[MAX_SLOTS] = { #define MTR_DIMM_COLS(mtr) ((mtr) & 0x3) #define MTR_DIMM_COLS_ADDR_BITS(mtr) (MTR_DIMM_COLS(mtr) + 10) -#ifdef CONFIG_EDAC_DEBUG -/* MTR NUMROW */ -static const char *numrow_toString[] = { - "8,192 - 13 rows", - "16,384 - 14 rows", - "32,768 - 15 rows", - "65,536 - 16 rows" -}; - -/* MTR NUMCOL */ -static const char *numcol_toString[] = { - "1,024 - 10 columns", - "2,048 - 11 columns", - "4,096 - 12 columns", - "reserved" -}; -#endif - /************************************************ * i7300 Register definitions for error detection ************************************************/ @@ -215,8 +197,8 @@ static const char *ferr_fat_fbd_name[] = { [0] = "Memory Write error on non-redundant retry or " "FBD configuration Write error on retry", }; -#define GET_FBD_FAT_IDX(fbderr) (fbderr & (3 << 28)) -#define FERR_FAT_FBD_ERR_MASK ((1 << 0) | (1 << 1) | (1 << 2) | (1 << 3)) +#define GET_FBD_FAT_IDX(fbderr) (((fbderr) >> 28) & 3) +#define FERR_FAT_FBD_ERR_MASK ((1 << 0) | (1 << 1) | (1 << 2) | (1 << 22)) #define FERR_NF_FBD 0xa0 static const char *ferr_nf_fbd_name[] = { @@ -243,7 +225,7 @@ static const char *ferr_nf_fbd_name[] = { [1] = "Aliased Uncorrectable Non-Mirrored Demand Data ECC", [0] = "Uncorrectable Data ECC on Replay", }; -#define GET_FBD_NF_IDX(fbderr) (fbderr & (3 << 28)) +#define GET_FBD_NF_IDX(fbderr) (((fbderr) >> 28) & 3) #define FERR_NF_FBD_ERR_MASK ((1 << 24) | (1 << 23) | (1 << 22) | (1 << 21) |\ (1 << 18) | (1 << 17) | (1 << 16) | (1 << 15) |\ (1 << 14) | (1 << 13) | (1 << 11) | (1 << 10) |\ @@ -464,17 +446,14 @@ static void i7300_process_fbd_error(struct mem_ctl_info *mci) FERR_FAT_FBD, error_reg); snprintf(pvt->tmp_prt_buffer, PAGE_SIZE, - "FATAL (Branch=%d DRAM-Bank=%d %s " - "RAS=%d CAS=%d Err=0x%lx (%s))", - branch, bank, - is_wr ? "RDWR" : "RD", - ras, cas, - errors, specific); - - /* Call the helper to output message */ - edac_mc_handle_fbd_ue(mci, rank, branch << 1, - (branch << 1) + 1, - pvt->tmp_prt_buffer); + "Bank=%d RAS=%d CAS=%d Err=0x%lx (%s))", + bank, ras, cas, errors, specific); + + edac_mc_handle_error(HW_EVENT_ERR_FATAL, mci, 1, 0, 0, 0, + branch, -1, rank, + is_wr ? "Write error" : "Read error", + pvt->tmp_prt_buffer); + } /* read in the 1st NON-FATAL error register */ @@ -485,7 +464,7 @@ static void i7300_process_fbd_error(struct mem_ctl_info *mci) errnum = find_first_bit(&errors, ARRAY_SIZE(ferr_nf_fbd_name)); specific = GET_ERR_FROM_TABLE(ferr_nf_fbd_name, errnum); - branch = (GET_FBD_FAT_IDX(error_reg) == 2) ? 1 : 0; + branch = (GET_FBD_NF_IDX(error_reg) == 2) ? 1 : 0; pci_read_config_dword(pvt->pci_dev_16_1_fsb_addr_map, REDMEMA, &syndrome); @@ -513,23 +492,14 @@ static void i7300_process_fbd_error(struct mem_ctl_info *mci) /* Form out message */ snprintf(pvt->tmp_prt_buffer, PAGE_SIZE, - "Corrected error (Branch=%d, Channel %d), " - " DRAM-Bank=%d %s " - "RAS=%d CAS=%d, CE Err=0x%lx, Syndrome=0x%08x(%s))", - branch, channel, - bank, - is_wr ? "RDWR" : "RD", - ras, cas, - errors, syndrome, specific); - - /* - * Call the helper to output message - * NOTE: Errors are reported per-branch, and not per-channel - * Currently, we don't know how to identify the right - * channel. - */ - edac_mc_handle_fbd_ce(mci, rank, channel, - pvt->tmp_prt_buffer); + "DRAM-Bank=%d RAS=%d CAS=%d, Err=0x%lx (%s))", + bank, ras, cas, errors, specific); + + edac_mc_handle_error(HW_EVENT_ERR_CORRECTED, mci, 1, 0, 0, + syndrome, + branch >> 1, channel % 2, rank, + is_wr ? "Write error" : "Read error", + pvt->tmp_prt_buffer); } return; } @@ -617,8 +587,7 @@ static void i7300_enable_error_reporting(struct mem_ctl_info *mci) static int decode_mtr(struct i7300_pvt *pvt, int slot, int ch, int branch, struct i7300_dimm_info *dinfo, - struct csrow_info *p_csrow, - u32 *nr_pages) + struct dimm_info *dimm) { int mtr, ans, addrBits, channel; @@ -627,9 +596,8 @@ static int decode_mtr(struct i7300_pvt *pvt, mtr = pvt->mtr[slot][branch]; ans = MTR_DIMMS_PRESENT(mtr) ? 1 : 0; - debugf2("\tMTR%d CH%d: DIMMs are %s (mtr)\n", - slot, channel, - ans ? "Present" : "NOT Present"); + edac_dbg(2, "\tMTR%d CH%d: DIMMs are %sPresent (mtr)\n", + slot, channel, ans ? "" : "NOT "); /* Determine if there is a DIMM present in this DIMM slot */ if (!ans) @@ -650,23 +618,26 @@ static int decode_mtr(struct i7300_pvt *pvt, addrBits -= 3; /* 8 bits per bytes */ dinfo->megabytes = 1 << addrBits; - *nr_pages = dinfo->megabytes << 8; - - debugf2("\t\tWIDTH: x%d\n", MTR_DRAM_WIDTH(mtr)); - - debugf2("\t\tELECTRICAL THROTTLING is %s\n", - MTR_DIMMS_ETHROTTLE(mtr) ? "enabled" : "disabled"); - - debugf2("\t\tNUMBANK: %d bank(s)\n", MTR_DRAM_BANKS(mtr)); - debugf2("\t\tNUMRANK: %s\n", MTR_DIMM_RANKS(mtr) ? "double" : "single"); - debugf2("\t\tNUMROW: %s\n", numrow_toString[MTR_DIMM_ROWS(mtr)]); - debugf2("\t\tNUMCOL: %s\n", numcol_toString[MTR_DIMM_COLS(mtr)]); - debugf2("\t\tSIZE: %d MB\n", dinfo->megabytes); - p_csrow->grain = 8; - p_csrow->mtype = MEM_FB_DDR2; - p_csrow->csrow_idx = slot; - p_csrow->page_mask = 0; + edac_dbg(2, "\t\tWIDTH: x%d\n", MTR_DRAM_WIDTH(mtr)); + + edac_dbg(2, "\t\tELECTRICAL THROTTLING is %s\n", + MTR_DIMMS_ETHROTTLE(mtr) ? "enabled" : "disabled"); + + edac_dbg(2, "\t\tNUMBANK: %d bank(s)\n", MTR_DRAM_BANKS(mtr)); + edac_dbg(2, "\t\tNUMRANK: %s\n", + MTR_DIMM_RANKS(mtr) ? "double" : "single"); + edac_dbg(2, "\t\tNUMROW: %s\n", + MTR_DIMM_ROWS(mtr) == 0 ? "8,192 - 13 rows" : + MTR_DIMM_ROWS(mtr) == 1 ? "16,384 - 14 rows" : + MTR_DIMM_ROWS(mtr) == 2 ? "32,768 - 15 rows" : + "65,536 - 16 rows"); + edac_dbg(2, "\t\tNUMCOL: %s\n", + MTR_DIMM_COLS(mtr) == 0 ? "1,024 - 10 columns" : + MTR_DIMM_COLS(mtr) == 1 ? "2,048 - 11 columns" : + MTR_DIMM_COLS(mtr) == 2 ? "4,096 - 12 columns" : + "reserved"); + edac_dbg(2, "\t\tSIZE: %d MB\n", dinfo->megabytes); /* * The type of error detection actually depends of the @@ -677,26 +648,29 @@ static int decode_mtr(struct i7300_pvt *pvt, * See datasheet Sections 7.3.6 to 7.3.8 */ + dimm->nr_pages = MiB_TO_PAGES(dinfo->megabytes); + dimm->grain = 8; + dimm->mtype = MEM_FB_DDR2; if (IS_SINGLE_MODE(pvt->mc_settings_a)) { - p_csrow->edac_mode = EDAC_SECDED; - debugf2("\t\tECC code is 8-byte-over-32-byte SECDED+ code\n"); + dimm->edac_mode = EDAC_SECDED; + edac_dbg(2, "\t\tECC code is 8-byte-over-32-byte SECDED+ code\n"); } else { - debugf2("\t\tECC code is on Lockstep mode\n"); + edac_dbg(2, "\t\tECC code is on Lockstep mode\n"); if (MTR_DRAM_WIDTH(mtr) == 8) - p_csrow->edac_mode = EDAC_S8ECD8ED; + dimm->edac_mode = EDAC_S8ECD8ED; else - p_csrow->edac_mode = EDAC_S4ECD4ED; + dimm->edac_mode = EDAC_S4ECD4ED; } /* ask what device type on this row */ if (MTR_DRAM_WIDTH(mtr) == 8) { - debugf2("\t\tScrub algorithm for x8 is on %s mode\n", - IS_SCRBALGO_ENHANCED(pvt->mc_settings) ? - "enhanced" : "normal"); + edac_dbg(2, "\t\tScrub algorithm for x8 is on %s mode\n", + IS_SCRBALGO_ENHANCED(pvt->mc_settings) ? + "enhanced" : "normal"); - p_csrow->dtype = DEV_X8; + dimm->dtype = DEV_X8; } else - p_csrow->dtype = DEV_X4; + dimm->dtype = DEV_X4; return mtr; } @@ -726,14 +700,14 @@ static void print_dimm_size(struct i7300_pvt *pvt) p += n; space -= n; } - debugf2("%s\n", pvt->tmp_prt_buffer); + edac_dbg(2, "%s\n", pvt->tmp_prt_buffer); p = pvt->tmp_prt_buffer; space = PAGE_SIZE; n = snprintf(p, space, "-------------------------------" "------------------------------"); p += n; space -= n; - debugf2("%s\n", pvt->tmp_prt_buffer); + edac_dbg(2, "%s\n", pvt->tmp_prt_buffer); p = pvt->tmp_prt_buffer; space = PAGE_SIZE; @@ -749,7 +723,7 @@ static void print_dimm_size(struct i7300_pvt *pvt) space -= n; } - debugf2("%s\n", pvt->tmp_prt_buffer); + edac_dbg(2, "%s\n", pvt->tmp_prt_buffer); p = pvt->tmp_prt_buffer; space = PAGE_SIZE; } @@ -758,7 +732,7 @@ static void print_dimm_size(struct i7300_pvt *pvt) "------------------------------"); p += n; space -= n; - debugf2("%s\n", pvt->tmp_prt_buffer); + edac_dbg(2, "%s\n", pvt->tmp_prt_buffer); p = pvt->tmp_prt_buffer; space = PAGE_SIZE; #endif @@ -774,60 +748,68 @@ static int i7300_init_csrows(struct mem_ctl_info *mci) { struct i7300_pvt *pvt; struct i7300_dimm_info *dinfo; - struct csrow_info *p_csrow; int rc = -ENODEV; int mtr; - int ch, branch, slot, channel; - u32 last_page = 0, nr_pages; + int ch, branch, slot, channel, max_channel, max_branch; + struct dimm_info *dimm; pvt = mci->pvt_info; - debugf2("Memory Technology Registers:\n"); + edac_dbg(2, "Memory Technology Registers:\n"); + + if (IS_SINGLE_MODE(pvt->mc_settings_a)) { + max_branch = 1; + max_channel = 1; + } else { + max_branch = MAX_BRANCHES; + max_channel = MAX_CH_PER_BRANCH; + } /* Get the AMB present registers for the four channels */ - for (branch = 0; branch < MAX_BRANCHES; branch++) { + for (branch = 0; branch < max_branch; branch++) { /* Read and dump branch 0's MTRs */ channel = to_channel(0, branch); pci_read_config_word(pvt->pci_dev_2x_0_fbd_branch[branch], AMBPRESENT_0, &pvt->ambpresent[channel]); - debugf2("\t\tAMB-present CH%d = 0x%x:\n", - channel, pvt->ambpresent[channel]); + edac_dbg(2, "\t\tAMB-present CH%d = 0x%x:\n", + channel, pvt->ambpresent[channel]); + + if (max_channel == 1) + continue; channel = to_channel(1, branch); pci_read_config_word(pvt->pci_dev_2x_0_fbd_branch[branch], AMBPRESENT_1, &pvt->ambpresent[channel]); - debugf2("\t\tAMB-present CH%d = 0x%x:\n", - channel, pvt->ambpresent[channel]); + edac_dbg(2, "\t\tAMB-present CH%d = 0x%x:\n", + channel, pvt->ambpresent[channel]); } /* Get the set of MTR[0-7] regs by each branch */ for (slot = 0; slot < MAX_SLOTS; slot++) { int where = mtr_regs[slot]; - for (branch = 0; branch < MAX_BRANCHES; branch++) { + for (branch = 0; branch < max_branch; branch++) { pci_read_config_word(pvt->pci_dev_2x_0_fbd_branch[branch], where, &pvt->mtr[slot][branch]); - for (ch = 0; ch < MAX_BRANCHES; ch++) { + for (ch = 0; ch < max_channel; ch++) { int channel = to_channel(ch, branch); + dimm = EDAC_DIMM_PTR(mci->layers, mci->dimms, + mci->n_layers, branch, ch, slot); + dinfo = &pvt->dimm_info[slot][channel]; - p_csrow = &mci->csrows[slot]; mtr = decode_mtr(pvt, slot, ch, branch, - dinfo, p_csrow, &nr_pages); + dinfo, dimm); + /* if no DIMMS on this row, continue */ if (!MTR_DIMMS_PRESENT(mtr)) continue; - /* Update per_csrow memory count */ - p_csrow->nr_pages += nr_pages; - p_csrow->first_page = last_page; - last_page += nr_pages; - p_csrow->last_page = last_page; - rc = 0; + } } } @@ -843,12 +825,11 @@ static int i7300_init_csrows(struct mem_ctl_info *mci) static void decode_mir(int mir_no, u16 mir[MAX_MIR]) { if (mir[mir_no] & 3) - debugf2("MIR%d: limit= 0x%x Branch(es) that participate:" - " %s %s\n", - mir_no, - (mir[mir_no] >> 4) & 0xfff, - (mir[mir_no] & 1) ? "B0" : "", - (mir[mir_no] & 2) ? "B1" : ""); + edac_dbg(2, "MIR%d: limit= 0x%x Branch(es) that participate: %s %s\n", + mir_no, + (mir[mir_no] >> 4) & 0xfff, + (mir[mir_no] & 1) ? "B0" : "", + (mir[mir_no] & 2) ? "B1" : ""); } /** @@ -868,17 +849,17 @@ static int i7300_get_mc_regs(struct mem_ctl_info *mci) pci_read_config_dword(pvt->pci_dev_16_0_fsb_ctlr, AMBASE, (u32 *) &pvt->ambase); - debugf2("AMBASE= 0x%lx\n", (long unsigned int)pvt->ambase); + edac_dbg(2, "AMBASE= 0x%lx\n", (long unsigned int)pvt->ambase); /* Get the Branch Map regs */ pci_read_config_word(pvt->pci_dev_16_1_fsb_addr_map, TOLM, &pvt->tolm); pvt->tolm >>= 12; - debugf2("TOLM (number of 256M regions) =%u (0x%x)\n", pvt->tolm, - pvt->tolm); + edac_dbg(2, "TOLM (number of 256M regions) =%u (0x%x)\n", + pvt->tolm, pvt->tolm); actual_tolm = (u32) ((1000l * pvt->tolm) >> (30 - 28)); - debugf2("Actual TOLM byte addr=%u.%03u GB (0x%x)\n", - actual_tolm/1000, actual_tolm % 1000, pvt->tolm << 28); + edac_dbg(2, "Actual TOLM byte addr=%u.%03u GB (0x%x)\n", + actual_tolm/1000, actual_tolm % 1000, pvt->tolm << 28); /* Get memory controller settings */ pci_read_config_dword(pvt->pci_dev_16_1_fsb_addr_map, MC_SETTINGS, @@ -887,15 +868,15 @@ static int i7300_get_mc_regs(struct mem_ctl_info *mci) &pvt->mc_settings_a); if (IS_SINGLE_MODE(pvt->mc_settings_a)) - debugf0("Memory controller operating on single mode\n"); + edac_dbg(0, "Memory controller operating on single mode\n"); else - debugf0("Memory controller operating on %s mode\n", - IS_MIRRORED(pvt->mc_settings) ? "mirrored" : "non-mirrored"); + edac_dbg(0, "Memory controller operating on %smirrored mode\n", + IS_MIRRORED(pvt->mc_settings) ? "" : "non-"); - debugf0("Error detection is %s\n", - IS_ECC_ENABLED(pvt->mc_settings) ? "enabled" : "disabled"); - debugf0("Retry is %s\n", - IS_RETRY_ENABLED(pvt->mc_settings) ? "enabled" : "disabled"); + edac_dbg(0, "Error detection is %s\n", + IS_ECC_ENABLED(pvt->mc_settings) ? "enabled" : "disabled"); + edac_dbg(0, "Retry is %s\n", + IS_RETRY_ENABLED(pvt->mc_settings) ? "enabled" : "disabled"); /* Get Memory Interleave Range registers */ pci_read_config_word(pvt->pci_dev_16_1_fsb_addr_map, MIR0, @@ -953,7 +934,7 @@ static void i7300_put_devices(struct mem_ctl_info *mci) * Device 21 function 0: PCI_DEVICE_ID_INTEL_I7300_MCH_FB0 * Device 22 function 0: PCI_DEVICE_ID_INTEL_I7300_MCH_FB1 */ -static int __devinit i7300_get_devices(struct mem_ctl_info *mci) +static int i7300_get_devices(struct mem_ctl_info *mci) { struct i7300_pvt *pvt; struct pci_dev *pdev; @@ -962,45 +943,47 @@ static int __devinit i7300_get_devices(struct mem_ctl_info *mci) /* Attempt to 'get' the MCH register we want */ pdev = NULL; - while (!pvt->pci_dev_16_1_fsb_addr_map || - !pvt->pci_dev_16_2_fsb_err_regs) { - pdev = pci_get_device(PCI_VENDOR_ID_INTEL, - PCI_DEVICE_ID_INTEL_I7300_MCH_ERR, pdev); - if (!pdev) { - /* End of list, leave */ - i7300_printk(KERN_ERR, - "'system address,Process Bus' " - "device not found:" - "vendor 0x%x device 0x%x ERR funcs " - "(broken BIOS?)\n", - PCI_VENDOR_ID_INTEL, - PCI_DEVICE_ID_INTEL_I7300_MCH_ERR); - goto error; - } - + while ((pdev = pci_get_device(PCI_VENDOR_ID_INTEL, + PCI_DEVICE_ID_INTEL_I7300_MCH_ERR, + pdev))) { /* Store device 16 funcs 1 and 2 */ switch (PCI_FUNC(pdev->devfn)) { case 1: - pvt->pci_dev_16_1_fsb_addr_map = pdev; + if (!pvt->pci_dev_16_1_fsb_addr_map) + pvt->pci_dev_16_1_fsb_addr_map = + pci_dev_get(pdev); break; case 2: - pvt->pci_dev_16_2_fsb_err_regs = pdev; + if (!pvt->pci_dev_16_2_fsb_err_regs) + pvt->pci_dev_16_2_fsb_err_regs = + pci_dev_get(pdev); break; } } - debugf1("System Address, processor bus- PCI Bus ID: %s %x:%x\n", - pci_name(pvt->pci_dev_16_0_fsb_ctlr), - pvt->pci_dev_16_0_fsb_ctlr->vendor, - pvt->pci_dev_16_0_fsb_ctlr->device); - debugf1("Branchmap, control and errors - PCI Bus ID: %s %x:%x\n", - pci_name(pvt->pci_dev_16_1_fsb_addr_map), - pvt->pci_dev_16_1_fsb_addr_map->vendor, - pvt->pci_dev_16_1_fsb_addr_map->device); - debugf1("FSB Error Regs - PCI Bus ID: %s %x:%x\n", - pci_name(pvt->pci_dev_16_2_fsb_err_regs), - pvt->pci_dev_16_2_fsb_err_regs->vendor, - pvt->pci_dev_16_2_fsb_err_regs->device); + if (!pvt->pci_dev_16_1_fsb_addr_map || + !pvt->pci_dev_16_2_fsb_err_regs) { + /* At least one device was not found */ + i7300_printk(KERN_ERR, + "'system address,Process Bus' device not found:" + "vendor 0x%x device 0x%x ERR funcs (broken BIOS?)\n", + PCI_VENDOR_ID_INTEL, + PCI_DEVICE_ID_INTEL_I7300_MCH_ERR); + goto error; + } + + edac_dbg(1, "System Address, processor bus- PCI Bus ID: %s %x:%x\n", + pci_name(pvt->pci_dev_16_0_fsb_ctlr), + pvt->pci_dev_16_0_fsb_ctlr->vendor, + pvt->pci_dev_16_0_fsb_ctlr->device); + edac_dbg(1, "Branchmap, control and errors - PCI Bus ID: %s %x:%x\n", + pci_name(pvt->pci_dev_16_1_fsb_addr_map), + pvt->pci_dev_16_1_fsb_addr_map->vendor, + pvt->pci_dev_16_1_fsb_addr_map->device); + edac_dbg(1, "FSB Error Regs - PCI Bus ID: %s %x:%x\n", + pci_name(pvt->pci_dev_16_2_fsb_err_regs), + pvt->pci_dev_16_2_fsb_err_regs->vendor, + pvt->pci_dev_16_2_fsb_err_regs->device); pvt->pci_dev_2x_0_fbd_branch[0] = pci_get_device(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_I7300_MCH_FB0, @@ -1038,14 +1021,11 @@ error: * @pdev: struct pci_dev pointer * @id: struct pci_device_id pointer - currently unused */ -static int __devinit i7300_init_one(struct pci_dev *pdev, - const struct pci_device_id *id) +static int i7300_init_one(struct pci_dev *pdev, const struct pci_device_id *id) { struct mem_ctl_info *mci; + struct edac_mc_layer layers[3]; struct i7300_pvt *pvt; - int num_channels; - int num_dimms_per_channel; - int num_csrows; int rc; /* wake up device */ @@ -1053,38 +1033,31 @@ static int __devinit i7300_init_one(struct pci_dev *pdev, if (rc == -EIO) return rc; - debugf0("MC: " __FILE__ ": %s(), pdev bus %u dev=0x%x fn=0x%x\n", - __func__, - pdev->bus->number, - PCI_SLOT(pdev->devfn), PCI_FUNC(pdev->devfn)); + edac_dbg(0, "MC: pdev bus %u dev=0x%x fn=0x%x\n", + pdev->bus->number, + PCI_SLOT(pdev->devfn), PCI_FUNC(pdev->devfn)); /* We only are looking for func 0 of the set */ if (PCI_FUNC(pdev->devfn) != 0) return -ENODEV; - /* As we don't have a motherboard identification routine to determine - * actual number of slots/dimms per channel, we thus utilize the - * resource as specified by the chipset. Thus, we might have - * have more DIMMs per channel than actually on the mobo, but this - * allows the driver to support up to the chipset max, without - * some fancy mobo determination. - */ - num_dimms_per_channel = MAX_SLOTS; - num_channels = MAX_CHANNELS; - num_csrows = MAX_SLOTS * MAX_CHANNELS; - - debugf0("MC: %s(): Number of - Channels= %d DIMMS= %d CSROWS= %d\n", - __func__, num_channels, num_dimms_per_channel, num_csrows); - /* allocate a new MC control structure */ - mci = edac_mc_alloc(sizeof(*pvt), num_csrows, num_channels, 0); - + layers[0].type = EDAC_MC_LAYER_BRANCH; + layers[0].size = MAX_BRANCHES; + layers[0].is_virt_csrow = false; + layers[1].type = EDAC_MC_LAYER_CHANNEL; + layers[1].size = MAX_CH_PER_BRANCH; + layers[1].is_virt_csrow = true; + layers[2].type = EDAC_MC_LAYER_SLOT; + layers[2].size = MAX_SLOTS; + layers[2].is_virt_csrow = true; + mci = edac_mc_alloc(0, ARRAY_SIZE(layers), layers, sizeof(*pvt)); if (mci == NULL) return -ENOMEM; - debugf0("MC: " __FILE__ ": %s(): mci = %p\n", __func__, mci); + edac_dbg(0, "MC: mci = %p\n", mci); - mci->dev = &pdev->dev; /* record ptr to the generic device */ + mci->pdev = &pdev->dev; /* record ptr to the generic device */ pvt = mci->pvt_info; pvt->pci_dev_16_0_fsb_ctlr = pdev; /* Record this device in our private */ @@ -1115,19 +1088,16 @@ static int __devinit i7300_init_one(struct pci_dev *pdev, /* initialize the MC control structure 'csrows' table * with the mapping and control information */ if (i7300_get_mc_regs(mci)) { - debugf0("MC: Setting mci->edac_cap to EDAC_FLAG_NONE\n" - " because i7300_init_csrows() returned nonzero " - "value\n"); + edac_dbg(0, "MC: Setting mci->edac_cap to EDAC_FLAG_NONE because i7300_init_csrows() returned nonzero value\n"); mci->edac_cap = EDAC_FLAG_NONE; /* no csrows found */ } else { - debugf1("MC: Enable error reporting now\n"); + edac_dbg(1, "MC: Enable error reporting now\n"); i7300_enable_error_reporting(mci); } /* add this new MC control structure to EDAC's list of MCs */ if (edac_mc_add_mc(mci)) { - debugf0("MC: " __FILE__ - ": %s(): failed edac_mc_add_mc()\n", __func__); + edac_dbg(0, "MC: failed edac_mc_add_mc()\n"); /* FIXME: perhaps some code should go here that disables error * reporting if we just enabled it */ @@ -1164,12 +1134,12 @@ fail0: * i7300_remove_one() - Remove the driver * @pdev: struct pci_dev pointer */ -static void __devexit i7300_remove_one(struct pci_dev *pdev) +static void i7300_remove_one(struct pci_dev *pdev) { struct mem_ctl_info *mci; char *tmp; - debugf0(__FILE__ ": %s()\n", __func__); + edac_dbg(0, "\n"); if (i7300_pci) edac_pci_release_generic_ctl(i7300_pci); @@ -1192,7 +1162,7 @@ static void __devexit i7300_remove_one(struct pci_dev *pdev) * * Has only 8086:360c PCI ID */ -static const struct pci_device_id i7300_pci_tbl[] __devinitdata = { +static const struct pci_device_id i7300_pci_tbl[] = { {PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_I7300_MCH_ERR)}, {0,} /* 0 terminated list. */ }; @@ -1205,7 +1175,7 @@ MODULE_DEVICE_TABLE(pci, i7300_pci_tbl); static struct pci_driver i7300_driver = { .name = "i7300_edac", .probe = i7300_init_one, - .remove = __devexit_p(i7300_remove_one), + .remove = i7300_remove_one, .id_table = i7300_pci_tbl, }; @@ -1216,7 +1186,7 @@ static int __init i7300_init(void) { int pci_rc; - debugf2("MC: " __FILE__ ": %s()\n", __func__); + edac_dbg(2, "\n"); /* Ensure that the OPSTATE is set correctly for POLL or NMI */ opstate_init(); @@ -1231,7 +1201,7 @@ static int __init i7300_init(void) */ static void __exit i7300_exit(void) { - debugf2("MC: " __FILE__ ": %s()\n", __func__); + edac_dbg(2, "\n"); pci_unregister_driver(&i7300_driver); } @@ -1239,7 +1209,7 @@ module_init(i7300_init); module_exit(i7300_exit); MODULE_LICENSE("GPL"); -MODULE_AUTHOR("Mauro Carvalho Chehab <mchehab@redhat.com>"); +MODULE_AUTHOR("Mauro Carvalho Chehab"); MODULE_AUTHOR("Red Hat Inc. (http://www.redhat.com)"); MODULE_DESCRIPTION("MC Driver for Intel I7300 memory controllers - " I7300_REVISION); diff --git a/drivers/edac/i7core_edac.c b/drivers/edac/i7core_edac.c index 8568d9b6187..9cd0b301f81 100644 --- a/drivers/edac/i7core_edac.c +++ b/drivers/edac/i7core_edac.c @@ -9,7 +9,7 @@ * GNU General Public License version 2 only. * * Copyright (c) 2009-2010 by: - * Mauro Carvalho Chehab <mchehab@redhat.com> + * Mauro Carvalho Chehab * * Red Hat Inc. http://www.redhat.com * @@ -90,7 +90,7 @@ MODULE_PARM_DESC(use_pci_fixup, "Enable PCI fixup to seek for hidden devices"); #define MC_MAX_DOD 0x64 /* - * OFFSETS for Device 3 Function 4, as inicated on Xeon 5500 datasheet: + * OFFSETS for Device 3 Function 4, as indicated on Xeon 5500 datasheet: * http://www.arrownac.com/manufacturers/intel/s/nehalem/5500-datasheet-v2.pdf */ @@ -101,7 +101,7 @@ MODULE_PARM_DESC(use_pci_fixup, "Enable PCI fixup to seek for hidden devices"); #define DIMM1_COR_ERR(r) (((r) >> 16) & 0x7fff) #define DIMM0_COR_ERR(r) ((r) & 0x7fff) -/* OFFSETS for Device 3 Function 2, as inicated on Xeon 5500 datasheet */ +/* OFFSETS for Device 3 Function 2, as indicated on Xeon 5500 datasheet */ #define MC_SSRCONTROL 0x48 #define SSR_MODE_DISABLE 0x00 #define SSR_MODE_ENABLE 0x01 @@ -221,7 +221,9 @@ struct i7core_inject { }; struct i7core_channel { - u32 ranks; + bool is_3dimms_present; + bool is_single_4rank; + bool has_4rank; u32 dimms; }; @@ -246,6 +248,8 @@ struct i7core_dev { }; struct i7core_pvt { + struct device *addrmatch_dev, *chancounts_dev; + struct pci_dev *pci_noncore; struct pci_dev *pci_mcr[MAX_MCR_FUNC + 1]; struct pci_dev *pci_ch[NUM_CHANS][MAX_CHAN_FUNC + 1]; @@ -257,7 +261,6 @@ struct i7core_pvt { struct i7core_channel channel[NUM_CHANS]; int ce_count_available; - int csrow_map[NUM_CHANS][MAX_DIMMS]; /* ECC corrected errors counts per udimm */ unsigned long udimm_ce_count[MAX_DIMMS]; @@ -391,14 +394,14 @@ static const struct pci_id_table pci_dev_table[] = { /* * pci_device_id table for which devices we are looking for */ -static const struct pci_device_id i7core_pci_tbl[] __devinitdata = { +static const struct pci_device_id i7core_pci_tbl[] = { {PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_X58_HUB_MGMT)}, {PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_LYNNFIELD_QPI_LINK0)}, {0,} /* 0 terminated list. */ }; /**************************************************************************** - Anciliary status routines + Ancillary status routines ****************************************************************************/ /* MC_CONTROL bits */ @@ -417,21 +420,21 @@ static inline int numdimms(u32 dimms) static inline int numrank(u32 rank) { - static int ranks[4] = { 1, 2, 4, -EINVAL }; + static const int ranks[] = { 1, 2, 4, -EINVAL }; return ranks[rank & 0x3]; } static inline int numbank(u32 bank) { - static int banks[4] = { 4, 8, 16, -EINVAL }; + static const int banks[] = { 4, 8, 16, -EINVAL }; return banks[bank & 0x3]; } static inline int numrow(u32 row) { - static int rows[8] = { + static const int rows[] = { 1 << 12, 1 << 13, 1 << 14, 1 << 15, 1 << 16, -EINVAL, -EINVAL, -EINVAL, }; @@ -441,7 +444,7 @@ static inline int numrow(u32 row) static inline int numcol(u32 col) { - static int cols[8] = { + static const int cols[] = { 1 << 10, 1 << 11, 1 << 12, -EINVAL, }; return cols[col & 0x3]; @@ -492,116 +495,15 @@ static void free_i7core_dev(struct i7core_dev *i7core_dev) /**************************************************************************** Memory check routines ****************************************************************************/ -static struct pci_dev *get_pdev_slot_func(u8 socket, unsigned slot, - unsigned func) -{ - struct i7core_dev *i7core_dev = get_i7core_dev(socket); - int i; - - if (!i7core_dev) - return NULL; - for (i = 0; i < i7core_dev->n_devs; i++) { - if (!i7core_dev->pdev[i]) - continue; - - if (PCI_SLOT(i7core_dev->pdev[i]->devfn) == slot && - PCI_FUNC(i7core_dev->pdev[i]->devfn) == func) { - return i7core_dev->pdev[i]; - } - } - - return NULL; -} - -/** - * i7core_get_active_channels() - gets the number of channels and csrows - * @socket: Quick Path Interconnect socket - * @channels: Number of channels that will be returned - * @csrows: Number of csrows found - * - * Since EDAC core needs to know in advance the number of available channels - * and csrows, in order to allocate memory for csrows/channels, it is needed - * to run two similar steps. At the first step, implemented on this function, - * it checks the number of csrows/channels present at one socket. - * this is used in order to properly allocate the size of mci components. - * - * It should be noticed that none of the current available datasheets explain - * or even mention how csrows are seen by the memory controller. So, we need - * to add a fake description for csrows. - * So, this driver is attributing one DIMM memory for one csrow. - */ -static int i7core_get_active_channels(const u8 socket, unsigned *channels, - unsigned *csrows) -{ - struct pci_dev *pdev = NULL; - int i, j; - u32 status, control; - - *channels = 0; - *csrows = 0; - - pdev = get_pdev_slot_func(socket, 3, 0); - if (!pdev) { - i7core_printk(KERN_ERR, "Couldn't find socket %d fn 3.0!!!\n", - socket); - return -ENODEV; - } - - /* Device 3 function 0 reads */ - pci_read_config_dword(pdev, MC_STATUS, &status); - pci_read_config_dword(pdev, MC_CONTROL, &control); - - for (i = 0; i < NUM_CHANS; i++) { - u32 dimm_dod[3]; - /* Check if the channel is active */ - if (!(control & (1 << (8 + i)))) - continue; - - /* Check if the channel is disabled */ - if (status & (1 << i)) - continue; - - pdev = get_pdev_slot_func(socket, i + 4, 1); - if (!pdev) { - i7core_printk(KERN_ERR, "Couldn't find socket %d " - "fn %d.%d!!!\n", - socket, i + 4, 1); - return -ENODEV; - } - /* Devices 4-6 function 1 */ - pci_read_config_dword(pdev, - MC_DOD_CH_DIMM0, &dimm_dod[0]); - pci_read_config_dword(pdev, - MC_DOD_CH_DIMM1, &dimm_dod[1]); - pci_read_config_dword(pdev, - MC_DOD_CH_DIMM2, &dimm_dod[2]); - - (*channels)++; - - for (j = 0; j < 3; j++) { - if (!DIMM_PRESENT(dimm_dod[j])) - continue; - (*csrows)++; - } - } - - debugf0("Number of active channels on socket %d: %d\n", - socket, *channels); - - return 0; -} - -static int get_dimm_config(const struct mem_ctl_info *mci) +static int get_dimm_config(struct mem_ctl_info *mci) { struct i7core_pvt *pvt = mci->pvt_info; - struct csrow_info *csr; struct pci_dev *pdev; int i, j; - int csrow = 0; - unsigned long last_page = 0; enum edac_type mode; enum mem_type mtype; + struct dimm_info *dimm; /* Get data from the MC register, function 0 */ pdev = pvt->pci_mcr[0]; @@ -614,29 +516,28 @@ static int get_dimm_config(const struct mem_ctl_info *mci) pci_read_config_dword(pdev, MC_MAX_DOD, &pvt->info.max_dod); pci_read_config_dword(pdev, MC_CHANNEL_MAPPER, &pvt->info.ch_map); - debugf0("QPI %d control=0x%08x status=0x%08x dod=0x%08x map=0x%08x\n", - pvt->i7core_dev->socket, pvt->info.mc_control, pvt->info.mc_status, - pvt->info.max_dod, pvt->info.ch_map); + edac_dbg(0, "QPI %d control=0x%08x status=0x%08x dod=0x%08x map=0x%08x\n", + pvt->i7core_dev->socket, pvt->info.mc_control, + pvt->info.mc_status, pvt->info.max_dod, pvt->info.ch_map); if (ECC_ENABLED(pvt)) { - debugf0("ECC enabled with x%d SDCC\n", ECCx8(pvt) ? 8 : 4); + edac_dbg(0, "ECC enabled with x%d SDCC\n", ECCx8(pvt) ? 8 : 4); if (ECCx8(pvt)) mode = EDAC_S8ECD8ED; else mode = EDAC_S4ECD4ED; } else { - debugf0("ECC disabled\n"); + edac_dbg(0, "ECC disabled\n"); mode = EDAC_NONE; } /* FIXME: need to handle the error codes */ - debugf0("DOD Max limits: DIMMS: %d, %d-ranked, %d-banked " - "x%x x 0x%x\n", - numdimms(pvt->info.max_dod), - numrank(pvt->info.max_dod >> 2), - numbank(pvt->info.max_dod >> 4), - numrow(pvt->info.max_dod >> 6), - numcol(pvt->info.max_dod >> 9)); + edac_dbg(0, "DOD Max limits: DIMMS: %d, %d-ranked, %d-banked x%x x 0x%x\n", + numdimms(pvt->info.max_dod), + numrank(pvt->info.max_dod >> 2), + numbank(pvt->info.max_dod >> 4), + numrow(pvt->info.max_dod >> 6), + numcol(pvt->info.max_dod >> 9)); for (i = 0; i < NUM_CHANS; i++) { u32 data, dimm_dod[3], value[8]; @@ -645,11 +546,11 @@ static int get_dimm_config(const struct mem_ctl_info *mci) continue; if (!CH_ACTIVE(pvt, i)) { - debugf0("Channel %i is not active\n", i); + edac_dbg(0, "Channel %i is not active\n", i); continue; } if (CH_DISABLED(pvt, i)) { - debugf0("Channel %i is disabled\n", i); + edac_dbg(0, "Channel %i is disabled\n", i); continue; } @@ -657,21 +558,20 @@ static int get_dimm_config(const struct mem_ctl_info *mci) pci_read_config_dword(pvt->pci_ch[i][0], MC_CHANNEL_DIMM_INIT_PARAMS, &data); - pvt->channel[i].ranks = (data & QUAD_RANK_PRESENT) ? - 4 : 2; + + if (data & THREE_DIMMS_PRESENT) + pvt->channel[i].is_3dimms_present = true; + + if (data & SINGLE_QUAD_RANK_PRESENT) + pvt->channel[i].is_single_4rank = true; + + if (data & QUAD_RANK_PRESENT) + pvt->channel[i].has_4rank = true; if (data & REGISTERED_DIMM) mtype = MEM_RDDR3; else mtype = MEM_DDR3; -#if 0 - if (data & THREE_DIMMS_PRESENT) - pvt->channel[i].dimms = 3; - else if (data & SINGLE_QUAD_RANK_PRESENT) - pvt->channel[i].dimms = 1; - else - pvt->channel[i].dimms = 2; -#endif /* Devices 4-6 function 1 */ pci_read_config_dword(pvt->pci_ch[i][1], @@ -681,13 +581,14 @@ static int get_dimm_config(const struct mem_ctl_info *mci) pci_read_config_dword(pvt->pci_ch[i][1], MC_DOD_CH_DIMM2, &dimm_dod[2]); - debugf0("Ch%d phy rd%d, wr%d (0x%08x): " - "%d ranks, %cDIMMs\n", - i, - RDLCH(pvt->info.ch_map, i), WRLCH(pvt->info.ch_map, i), - data, - pvt->channel[i].ranks, - (data & REGISTERED_DIMM) ? 'R' : 'U'); + edac_dbg(0, "Ch%d phy rd%d, wr%d (0x%08x): %s%s%s%cDIMMs\n", + i, + RDLCH(pvt->info.ch_map, i), WRLCH(pvt->info.ch_map, i), + data, + pvt->channel[i].is_3dimms_present ? "3DIMMS " : "", + pvt->channel[i].is_3dimms_present ? "SINGLE_4R " : "", + pvt->channel[i].has_4rank ? "HAS_4R " : "", + (data & REGISTERED_DIMM) ? 'R' : 'U'); for (j = 0; j < 3; j++) { u32 banks, ranks, rows, cols; @@ -696,6 +597,8 @@ static int get_dimm_config(const struct mem_ctl_info *mci) if (!DIMM_PRESENT(dimm_dod[j])) continue; + dimm = EDAC_DIMM_PTR(mci->layers, mci->dimms, mci->n_layers, + i, j, 0); banks = numbank(MC_DOD_NUMBANK(dimm_dod[j])); ranks = numrank(MC_DOD_NUMRANK(dimm_dod[j])); rows = numrow(MC_DOD_NUMROW(dimm_dod[j])); @@ -704,54 +607,35 @@ static int get_dimm_config(const struct mem_ctl_info *mci) /* DDR3 has 8 I/O banks */ size = (rows * cols * banks * ranks) >> (20 - 3); - pvt->channel[i].dimms++; - - debugf0("\tdimm %d %d Mb offset: %x, " - "bank: %d, rank: %d, row: %#x, col: %#x\n", - j, size, - RANKOFFSET(dimm_dod[j]), - banks, ranks, rows, cols); + edac_dbg(0, "\tdimm %d %d Mb offset: %x, bank: %d, rank: %d, row: %#x, col: %#x\n", + j, size, + RANKOFFSET(dimm_dod[j]), + banks, ranks, rows, cols); npages = MiB_TO_PAGES(size); - csr = &mci->csrows[csrow]; - csr->first_page = last_page + 1; - last_page += npages; - csr->last_page = last_page; - csr->nr_pages = npages; - - csr->page_mask = 0; - csr->grain = 8; - csr->csrow_idx = csrow; - csr->nr_channels = 1; - - csr->channels[0].chan_idx = i; - csr->channels[0].ce_count = 0; - - pvt->csrow_map[i][j] = csrow; + dimm->nr_pages = npages; switch (banks) { case 4: - csr->dtype = DEV_X4; + dimm->dtype = DEV_X4; break; case 8: - csr->dtype = DEV_X8; + dimm->dtype = DEV_X8; break; case 16: - csr->dtype = DEV_X16; + dimm->dtype = DEV_X16; break; default: - csr->dtype = DEV_UNKNOWN; + dimm->dtype = DEV_UNKNOWN; } - csr->edac_mode = mode; - csr->mtype = mtype; - snprintf(csr->channels[0].label, - sizeof(csr->channels[0].label), - "CPU#%uChannel#%u_DIMM#%u", - pvt->i7core_dev->socket, i, j); - - csrow++; + snprintf(dimm->label, sizeof(dimm->label), + "CPU#%uChannel#%u_DIMM#%u", + pvt->i7core_dev->socket, i, j); + dimm->grain = 8; + dimm->edac_mode = mode; + dimm->mtype = mtype; } pci_read_config_dword(pdev, MC_SAG_CH_0, &value[0]); @@ -762,12 +646,12 @@ static int get_dimm_config(const struct mem_ctl_info *mci) pci_read_config_dword(pdev, MC_SAG_CH_5, &value[5]); pci_read_config_dword(pdev, MC_SAG_CH_6, &value[6]); pci_read_config_dword(pdev, MC_SAG_CH_7, &value[7]); - debugf1("\t[%i] DIVBY3\tREMOVED\tOFFSET\n", i); + edac_dbg(1, "\t[%i] DIVBY3\tREMOVED\tOFFSET\n", i); for (j = 0; j < 8; j++) - debugf1("\t\t%#x\t%#x\t%#x\n", - (value[j] >> 27) & 0x1, - (value[j] >> 24) & 0x7, - (value[j] & ((1 << 24) - 1))); + edac_dbg(1, "\t\t%#x\t%#x\t%#x\n", + (value[j] >> 27) & 0x1, + (value[j] >> 24) & 0x7, + (value[j] & ((1 << 24) - 1))); } return 0; @@ -777,6 +661,8 @@ static int get_dimm_config(const struct mem_ctl_info *mci) Error insertion routines ****************************************************************************/ +#define to_mci(k) container_of(k, struct mem_ctl_info, dev) + /* The i7core has independent error injection features per channel. However, to have a simpler code, we don't allow enabling error injection on more than one channel. @@ -806,9 +692,11 @@ static int disable_inject(const struct mem_ctl_info *mci) * bit 0 - refers to the lower 32-byte half cacheline * bit 1 - refers to the upper 32-byte half cacheline */ -static ssize_t i7core_inject_section_store(struct mem_ctl_info *mci, +static ssize_t i7core_inject_section_store(struct device *dev, + struct device_attribute *mattr, const char *data, size_t count) { + struct mem_ctl_info *mci = to_mci(dev); struct i7core_pvt *pvt = mci->pvt_info; unsigned long value; int rc; @@ -816,7 +704,7 @@ static ssize_t i7core_inject_section_store(struct mem_ctl_info *mci, if (pvt->inject.enable) disable_inject(mci); - rc = strict_strtoul(data, 10, &value); + rc = kstrtoul(data, 10, &value); if ((rc < 0) || (value > 3)) return -EIO; @@ -824,9 +712,11 @@ static ssize_t i7core_inject_section_store(struct mem_ctl_info *mci, return count; } -static ssize_t i7core_inject_section_show(struct mem_ctl_info *mci, - char *data) +static ssize_t i7core_inject_section_show(struct device *dev, + struct device_attribute *mattr, + char *data) { + struct mem_ctl_info *mci = to_mci(dev); struct i7core_pvt *pvt = mci->pvt_info; return sprintf(data, "0x%08x\n", pvt->inject.section); } @@ -839,17 +729,19 @@ static ssize_t i7core_inject_section_show(struct mem_ctl_info *mci, * bit 1 - inject ECC error * bit 2 - inject parity error */ -static ssize_t i7core_inject_type_store(struct mem_ctl_info *mci, +static ssize_t i7core_inject_type_store(struct device *dev, + struct device_attribute *mattr, const char *data, size_t count) { - struct i7core_pvt *pvt = mci->pvt_info; + struct mem_ctl_info *mci = to_mci(dev); +struct i7core_pvt *pvt = mci->pvt_info; unsigned long value; int rc; if (pvt->inject.enable) disable_inject(mci); - rc = strict_strtoul(data, 10, &value); + rc = kstrtoul(data, 10, &value); if ((rc < 0) || (value > 7)) return -EIO; @@ -857,10 +749,13 @@ static ssize_t i7core_inject_type_store(struct mem_ctl_info *mci, return count; } -static ssize_t i7core_inject_type_show(struct mem_ctl_info *mci, - char *data) +static ssize_t i7core_inject_type_show(struct device *dev, + struct device_attribute *mattr, + char *data) { + struct mem_ctl_info *mci = to_mci(dev); struct i7core_pvt *pvt = mci->pvt_info; + return sprintf(data, "0x%08x\n", pvt->inject.type); } @@ -874,9 +769,11 @@ static ssize_t i7core_inject_type_show(struct mem_ctl_info *mci, * 23:16 and 31:24). Flipping bits in two symbol pairs will cause an * uncorrectable error to be injected. */ -static ssize_t i7core_inject_eccmask_store(struct mem_ctl_info *mci, - const char *data, size_t count) +static ssize_t i7core_inject_eccmask_store(struct device *dev, + struct device_attribute *mattr, + const char *data, size_t count) { + struct mem_ctl_info *mci = to_mci(dev); struct i7core_pvt *pvt = mci->pvt_info; unsigned long value; int rc; @@ -884,7 +781,7 @@ static ssize_t i7core_inject_eccmask_store(struct mem_ctl_info *mci, if (pvt->inject.enable) disable_inject(mci); - rc = strict_strtoul(data, 10, &value); + rc = kstrtoul(data, 10, &value); if (rc < 0) return -EIO; @@ -892,10 +789,13 @@ static ssize_t i7core_inject_eccmask_store(struct mem_ctl_info *mci, return count; } -static ssize_t i7core_inject_eccmask_show(struct mem_ctl_info *mci, - char *data) +static ssize_t i7core_inject_eccmask_show(struct device *dev, + struct device_attribute *mattr, + char *data) { + struct mem_ctl_info *mci = to_mci(dev); struct i7core_pvt *pvt = mci->pvt_info; + return sprintf(data, "0x%08x\n", pvt->inject.eccmask); } @@ -912,14 +812,16 @@ static ssize_t i7core_inject_eccmask_show(struct mem_ctl_info *mci, #define DECLARE_ADDR_MATCH(param, limit) \ static ssize_t i7core_inject_store_##param( \ - struct mem_ctl_info *mci, \ - const char *data, size_t count) \ + struct device *dev, \ + struct device_attribute *mattr, \ + const char *data, size_t count) \ { \ + struct mem_ctl_info *mci = dev_get_drvdata(dev); \ struct i7core_pvt *pvt; \ long value; \ int rc; \ \ - debugf1("%s()\n", __func__); \ + edac_dbg(1, "\n"); \ pvt = mci->pvt_info; \ \ if (pvt->inject.enable) \ @@ -928,7 +830,7 @@ static ssize_t i7core_inject_store_##param( \ if (!strcasecmp(data, "any") || !strcasecmp(data, "any\n"))\ value = -1; \ else { \ - rc = strict_strtoul(data, 10, &value); \ + rc = kstrtoul(data, 10, &value); \ if ((rc < 0) || (value >= limit)) \ return -EIO; \ } \ @@ -939,13 +841,15 @@ static ssize_t i7core_inject_store_##param( \ } \ \ static ssize_t i7core_inject_show_##param( \ - struct mem_ctl_info *mci, \ - char *data) \ + struct device *dev, \ + struct device_attribute *mattr, \ + char *data) \ { \ + struct mem_ctl_info *mci = dev_get_drvdata(dev); \ struct i7core_pvt *pvt; \ \ pvt = mci->pvt_info; \ - debugf1("%s() pvt=%p\n", __func__, pvt); \ + edac_dbg(1, "pvt=%p\n", pvt); \ if (pvt->inject.param < 0) \ return sprintf(data, "any\n"); \ else \ @@ -953,14 +857,9 @@ static ssize_t i7core_inject_show_##param( \ } #define ATTR_ADDR_MATCH(param) \ - { \ - .attr = { \ - .name = #param, \ - .mode = (S_IRUGO | S_IWUSR) \ - }, \ - .show = i7core_inject_show_##param, \ - .store = i7core_inject_store_##param, \ - } + static DEVICE_ATTR(param, S_IRUGO | S_IWUSR, \ + i7core_inject_show_##param, \ + i7core_inject_store_##param) DECLARE_ADDR_MATCH(channel, 3); DECLARE_ADDR_MATCH(dimm, 3); @@ -969,14 +868,21 @@ DECLARE_ADDR_MATCH(bank, 32); DECLARE_ADDR_MATCH(page, 0x10000); DECLARE_ADDR_MATCH(col, 0x4000); +ATTR_ADDR_MATCH(channel); +ATTR_ADDR_MATCH(dimm); +ATTR_ADDR_MATCH(rank); +ATTR_ADDR_MATCH(bank); +ATTR_ADDR_MATCH(page); +ATTR_ADDR_MATCH(col); + static int write_and_test(struct pci_dev *dev, const int where, const u32 val) { u32 read; int count; - debugf0("setting pci %02x:%02x.%x reg=%02x value=%08x\n", - dev->bus->number, PCI_SLOT(dev->devfn), PCI_FUNC(dev->devfn), - where, val); + edac_dbg(0, "setting pci %02x:%02x.%x reg=%02x value=%08x\n", + dev->bus->number, PCI_SLOT(dev->devfn), PCI_FUNC(dev->devfn), + where, val); for (count = 0; count < 10; count++) { if (count) @@ -1014,9 +920,11 @@ static int write_and_test(struct pci_dev *dev, const int where, const u32 val) * is reliable enough to check if the MC is using the * three channels. However, this is not clear at the datasheet. */ -static ssize_t i7core_inject_enable_store(struct mem_ctl_info *mci, - const char *data, size_t count) +static ssize_t i7core_inject_enable_store(struct device *dev, + struct device_attribute *mattr, + const char *data, size_t count) { + struct mem_ctl_info *mci = to_mci(dev); struct i7core_pvt *pvt = mci->pvt_info; u32 injectmask; u64 mask = 0; @@ -1026,7 +934,7 @@ static ssize_t i7core_inject_enable_store(struct mem_ctl_info *mci, if (!pvt->pci_ch[pvt->inject.channel][0]) return 0; - rc = strict_strtoul(data, 10, &enable); + rc = kstrtoul(data, 10, &enable); if ((rc < 0)) return 0; @@ -1109,17 +1017,18 @@ static ssize_t i7core_inject_enable_store(struct mem_ctl_info *mci, pci_write_config_dword(pvt->pci_noncore, MC_CFG_CONTROL, 8); - debugf0("Error inject addr match 0x%016llx, ecc 0x%08x," - " inject 0x%08x\n", - mask, pvt->inject.eccmask, injectmask); + edac_dbg(0, "Error inject addr match 0x%016llx, ecc 0x%08x, inject 0x%08x\n", + mask, pvt->inject.eccmask, injectmask); return count; } -static ssize_t i7core_inject_enable_show(struct mem_ctl_info *mci, - char *data) +static ssize_t i7core_inject_enable_show(struct device *dev, + struct device_attribute *mattr, + char *data) { + struct mem_ctl_info *mci = to_mci(dev); struct i7core_pvt *pvt = mci->pvt_info; u32 injectmask; @@ -1129,7 +1038,7 @@ static ssize_t i7core_inject_enable_show(struct mem_ctl_info *mci, pci_read_config_dword(pvt->pci_ch[pvt->inject.channel][0], MC_CHANNEL_ERROR_INJECT, &injectmask); - debugf0("Inject error read: 0x%018x\n", injectmask); + edac_dbg(0, "Inject error read: 0x%018x\n", injectmask); if (injectmask & 0x0c) pvt->inject.enable = 1; @@ -1139,12 +1048,14 @@ static ssize_t i7core_inject_enable_show(struct mem_ctl_info *mci, #define DECLARE_COUNTER(param) \ static ssize_t i7core_show_counter_##param( \ - struct mem_ctl_info *mci, \ - char *data) \ + struct device *dev, \ + struct device_attribute *mattr, \ + char *data) \ { \ + struct mem_ctl_info *mci = dev_get_drvdata(dev); \ struct i7core_pvt *pvt = mci->pvt_info; \ \ - debugf1("%s() \n", __func__); \ + edac_dbg(1, "\n"); \ if (!pvt->ce_count_available || (pvt->is_registered)) \ return sprintf(data, "data unavailable\n"); \ return sprintf(data, "%lu\n", \ @@ -1152,121 +1063,179 @@ static ssize_t i7core_show_counter_##param( \ } #define ATTR_COUNTER(param) \ - { \ - .attr = { \ - .name = __stringify(udimm##param), \ - .mode = (S_IRUGO | S_IWUSR) \ - }, \ - .show = i7core_show_counter_##param \ - } + static DEVICE_ATTR(udimm##param, S_IRUGO | S_IWUSR, \ + i7core_show_counter_##param, \ + NULL) DECLARE_COUNTER(0); DECLARE_COUNTER(1); DECLARE_COUNTER(2); +ATTR_COUNTER(0); +ATTR_COUNTER(1); +ATTR_COUNTER(2); + /* - * Sysfs struct + * inject_addrmatch device sysfs struct */ -static const struct mcidev_sysfs_attribute i7core_addrmatch_attrs[] = { - ATTR_ADDR_MATCH(channel), - ATTR_ADDR_MATCH(dimm), - ATTR_ADDR_MATCH(rank), - ATTR_ADDR_MATCH(bank), - ATTR_ADDR_MATCH(page), - ATTR_ADDR_MATCH(col), - { } /* End of list */ +static struct attribute *i7core_addrmatch_attrs[] = { + &dev_attr_channel.attr, + &dev_attr_dimm.attr, + &dev_attr_rank.attr, + &dev_attr_bank.attr, + &dev_attr_page.attr, + &dev_attr_col.attr, + NULL +}; + +static struct attribute_group addrmatch_grp = { + .attrs = i7core_addrmatch_attrs, +}; + +static const struct attribute_group *addrmatch_groups[] = { + &addrmatch_grp, + NULL }; -static const struct mcidev_sysfs_group i7core_inject_addrmatch = { - .name = "inject_addrmatch", - .mcidev_attr = i7core_addrmatch_attrs, +static void addrmatch_release(struct device *device) +{ + edac_dbg(1, "Releasing device %s\n", dev_name(device)); + kfree(device); +} + +static struct device_type addrmatch_type = { + .groups = addrmatch_groups, + .release = addrmatch_release, }; -static const struct mcidev_sysfs_attribute i7core_udimm_counters_attrs[] = { - ATTR_COUNTER(0), - ATTR_COUNTER(1), - ATTR_COUNTER(2), - { .attr = { .name = NULL } } +/* + * all_channel_counts sysfs struct + */ + +static struct attribute *i7core_udimm_counters_attrs[] = { + &dev_attr_udimm0.attr, + &dev_attr_udimm1.attr, + &dev_attr_udimm2.attr, + NULL }; -static const struct mcidev_sysfs_group i7core_udimm_counters = { - .name = "all_channel_counts", - .mcidev_attr = i7core_udimm_counters_attrs, +static struct attribute_group all_channel_counts_grp = { + .attrs = i7core_udimm_counters_attrs, }; -static const struct mcidev_sysfs_attribute i7core_sysfs_rdimm_attrs[] = { - { - .attr = { - .name = "inject_section", - .mode = (S_IRUGO | S_IWUSR) - }, - .show = i7core_inject_section_show, - .store = i7core_inject_section_store, - }, { - .attr = { - .name = "inject_type", - .mode = (S_IRUGO | S_IWUSR) - }, - .show = i7core_inject_type_show, - .store = i7core_inject_type_store, - }, { - .attr = { - .name = "inject_eccmask", - .mode = (S_IRUGO | S_IWUSR) - }, - .show = i7core_inject_eccmask_show, - .store = i7core_inject_eccmask_store, - }, { - .grp = &i7core_inject_addrmatch, - }, { - .attr = { - .name = "inject_enable", - .mode = (S_IRUGO | S_IWUSR) - }, - .show = i7core_inject_enable_show, - .store = i7core_inject_enable_store, - }, - { } /* End of list */ +static const struct attribute_group *all_channel_counts_groups[] = { + &all_channel_counts_grp, + NULL }; -static const struct mcidev_sysfs_attribute i7core_sysfs_udimm_attrs[] = { - { - .attr = { - .name = "inject_section", - .mode = (S_IRUGO | S_IWUSR) - }, - .show = i7core_inject_section_show, - .store = i7core_inject_section_store, - }, { - .attr = { - .name = "inject_type", - .mode = (S_IRUGO | S_IWUSR) - }, - .show = i7core_inject_type_show, - .store = i7core_inject_type_store, - }, { - .attr = { - .name = "inject_eccmask", - .mode = (S_IRUGO | S_IWUSR) - }, - .show = i7core_inject_eccmask_show, - .store = i7core_inject_eccmask_store, - }, { - .grp = &i7core_inject_addrmatch, - }, { - .attr = { - .name = "inject_enable", - .mode = (S_IRUGO | S_IWUSR) - }, - .show = i7core_inject_enable_show, - .store = i7core_inject_enable_store, - }, { - .grp = &i7core_udimm_counters, - }, - { } /* End of list */ +static void all_channel_counts_release(struct device *device) +{ + edac_dbg(1, "Releasing device %s\n", dev_name(device)); + kfree(device); +} + +static struct device_type all_channel_counts_type = { + .groups = all_channel_counts_groups, + .release = all_channel_counts_release, }; +/* + * inject sysfs attributes + */ + +static DEVICE_ATTR(inject_section, S_IRUGO | S_IWUSR, + i7core_inject_section_show, i7core_inject_section_store); + +static DEVICE_ATTR(inject_type, S_IRUGO | S_IWUSR, + i7core_inject_type_show, i7core_inject_type_store); + + +static DEVICE_ATTR(inject_eccmask, S_IRUGO | S_IWUSR, + i7core_inject_eccmask_show, i7core_inject_eccmask_store); + +static DEVICE_ATTR(inject_enable, S_IRUGO | S_IWUSR, + i7core_inject_enable_show, i7core_inject_enable_store); + +static int i7core_create_sysfs_devices(struct mem_ctl_info *mci) +{ + struct i7core_pvt *pvt = mci->pvt_info; + int rc; + + rc = device_create_file(&mci->dev, &dev_attr_inject_section); + if (rc < 0) + return rc; + rc = device_create_file(&mci->dev, &dev_attr_inject_type); + if (rc < 0) + return rc; + rc = device_create_file(&mci->dev, &dev_attr_inject_eccmask); + if (rc < 0) + return rc; + rc = device_create_file(&mci->dev, &dev_attr_inject_enable); + if (rc < 0) + return rc; + + pvt->addrmatch_dev = kzalloc(sizeof(*pvt->addrmatch_dev), GFP_KERNEL); + if (!pvt->addrmatch_dev) + return rc; + + pvt->addrmatch_dev->type = &addrmatch_type; + pvt->addrmatch_dev->bus = mci->dev.bus; + device_initialize(pvt->addrmatch_dev); + pvt->addrmatch_dev->parent = &mci->dev; + dev_set_name(pvt->addrmatch_dev, "inject_addrmatch"); + dev_set_drvdata(pvt->addrmatch_dev, mci); + + edac_dbg(1, "creating %s\n", dev_name(pvt->addrmatch_dev)); + + rc = device_add(pvt->addrmatch_dev); + if (rc < 0) + return rc; + + if (!pvt->is_registered) { + pvt->chancounts_dev = kzalloc(sizeof(*pvt->chancounts_dev), + GFP_KERNEL); + if (!pvt->chancounts_dev) { + put_device(pvt->addrmatch_dev); + device_del(pvt->addrmatch_dev); + return rc; + } + + pvt->chancounts_dev->type = &all_channel_counts_type; + pvt->chancounts_dev->bus = mci->dev.bus; + device_initialize(pvt->chancounts_dev); + pvt->chancounts_dev->parent = &mci->dev; + dev_set_name(pvt->chancounts_dev, "all_channel_counts"); + dev_set_drvdata(pvt->chancounts_dev, mci); + + edac_dbg(1, "creating %s\n", dev_name(pvt->chancounts_dev)); + + rc = device_add(pvt->chancounts_dev); + if (rc < 0) + return rc; + } + return 0; +} + +static void i7core_delete_sysfs_devices(struct mem_ctl_info *mci) +{ + struct i7core_pvt *pvt = mci->pvt_info; + + edac_dbg(1, "\n"); + + device_remove_file(&mci->dev, &dev_attr_inject_section); + device_remove_file(&mci->dev, &dev_attr_inject_type); + device_remove_file(&mci->dev, &dev_attr_inject_eccmask); + device_remove_file(&mci->dev, &dev_attr_inject_enable); + + if (!pvt->is_registered) { + put_device(pvt->chancounts_dev); + device_del(pvt->chancounts_dev); + } + put_device(pvt->addrmatch_dev); + device_del(pvt->addrmatch_dev); +} + /**************************************************************************** Device initialization routines: put/get, init/exit ****************************************************************************/ @@ -1279,14 +1248,14 @@ static void i7core_put_devices(struct i7core_dev *i7core_dev) { int i; - debugf0(__FILE__ ": %s()\n", __func__); + edac_dbg(0, "\n"); for (i = 0; i < i7core_dev->n_devs; i++) { struct pci_dev *pdev = i7core_dev->pdev[i]; if (!pdev) continue; - debugf0("Removing dev %02x:%02x.%d\n", - pdev->bus->number, - PCI_SLOT(pdev->devfn), PCI_FUNC(pdev->devfn)); + edac_dbg(0, "Removing dev %02x:%02x.%d\n", + pdev->bus->number, + PCI_SLOT(pdev->devfn), PCI_FUNC(pdev->devfn)); pci_dev_put(pdev); } } @@ -1329,12 +1298,12 @@ static unsigned i7core_pci_lastbus(void) while ((b = pci_find_next_bus(b)) != NULL) { bus = b->number; - debugf0("Found bus %d\n", bus); + edac_dbg(0, "Found bus %d\n", bus); if (bus > last_bus) last_bus = bus; } - debugf0("Last bus %d\n", last_bus); + edac_dbg(0, "Last bus %d\n", last_bus); return last_bus; } @@ -1361,18 +1330,23 @@ static int i7core_get_onedevice(struct pci_dev **prev, dev_descr->dev_id, *prev); /* - * On Xeon 55xx, the Intel Quckpath Arch Generic Non-core regs + * On Xeon 55xx, the Intel QuickPath Arch Generic Non-core regs * is at addr 8086:2c40, instead of 8086:2c41. So, we need * to probe for the alternate address in case of failure */ - if (dev_descr->dev_id == PCI_DEVICE_ID_INTEL_I7_NONCORE && !pdev) + if (dev_descr->dev_id == PCI_DEVICE_ID_INTEL_I7_NONCORE && !pdev) { + pci_dev_get(*prev); /* pci_get_device will put it */ pdev = pci_get_device(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_I7_NONCORE_ALT, *prev); + } - if (dev_descr->dev_id == PCI_DEVICE_ID_INTEL_LYNNFIELD_NONCORE && !pdev) + if (dev_descr->dev_id == PCI_DEVICE_ID_INTEL_LYNNFIELD_NONCORE && + !pdev) { + pci_dev_get(*prev); /* pci_get_device will put it */ pdev = pci_get_device(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_LYNNFIELD_NONCORE_ALT, *prev); + } if (!pdev) { if (*prev) { @@ -1441,10 +1415,10 @@ static int i7core_get_onedevice(struct pci_dev **prev, return -ENODEV; } - debugf0("Detected socket %d dev %02x:%02x.%d PCI ID %04x:%04x\n", - socket, bus, dev_descr->dev, - dev_descr->func, - PCI_VENDOR_ID_INTEL, dev_descr->dev_id); + edac_dbg(0, "Detected socket %d dev %02x:%02x.%d PCI ID %04x:%04x\n", + socket, bus, dev_descr->dev, + dev_descr->func, + PCI_VENDOR_ID_INTEL, dev_descr->dev_id); /* * As stated on drivers/pci/search.c, the reference count for @@ -1542,13 +1516,13 @@ static int mci_bind_devs(struct mem_ctl_info *mci, family = "unknown"; pvt->enable_scrub = false; } - debugf0("Detected a processor type %s\n", family); + edac_dbg(0, "Detected a processor type %s\n", family); } else goto error; - debugf0("Associated fn %d.%d, dev = %p, socket %d\n", - PCI_SLOT(pdev->devfn), PCI_FUNC(pdev->devfn), - pdev, i7core_dev->socket); + edac_dbg(0, "Associated fn %d.%d, dev = %p, socket %d\n", + PCI_SLOT(pdev->devfn), PCI_FUNC(pdev->devfn), + pdev, i7core_dev->socket); if (PCI_SLOT(pdev->devfn) == 3 && PCI_FUNC(pdev->devfn) == 2) @@ -1567,24 +1541,6 @@ error: /**************************************************************************** Error check routines ****************************************************************************/ -static void i7core_rdimm_update_csrow(struct mem_ctl_info *mci, - const int chan, - const int dimm, - const int add) -{ - char *msg; - struct i7core_pvt *pvt = mci->pvt_info; - int row = pvt->csrow_map[chan][dimm], i; - - for (i = 0; i < add; i++) { - msg = kasprintf(GFP_KERNEL, "Corrected error " - "(Socket=%d channel=%d dimm=%d)", - pvt->i7core_dev->socket, chan, dimm); - - edac_mc_handle_fbd_ce(mci, row, 0, msg); - kfree (msg); - } -} static void i7core_rdimm_update_ce_count(struct mem_ctl_info *mci, const int chan, @@ -1623,12 +1579,17 @@ static void i7core_rdimm_update_ce_count(struct mem_ctl_info *mci, /*updated the edac core */ if (add0 != 0) - i7core_rdimm_update_csrow(mci, chan, 0, add0); + edac_mc_handle_error(HW_EVENT_ERR_CORRECTED, mci, add0, + 0, 0, 0, + chan, 0, -1, "error", ""); if (add1 != 0) - i7core_rdimm_update_csrow(mci, chan, 1, add1); + edac_mc_handle_error(HW_EVENT_ERR_CORRECTED, mci, add1, + 0, 0, 0, + chan, 1, -1, "error", ""); if (add2 != 0) - i7core_rdimm_update_csrow(mci, chan, 2, add2); - + edac_mc_handle_error(HW_EVENT_ERR_CORRECTED, mci, add2, + 0, 0, 0, + chan, 2, -1, "error", ""); } static void i7core_rdimm_check_mc_ecc_err(struct mem_ctl_info *mci) @@ -1651,8 +1612,8 @@ static void i7core_rdimm_check_mc_ecc_err(struct mem_ctl_info *mci) pci_read_config_dword(pvt->pci_mcr[2], MC_COR_ECC_CNT_5, &rcv[2][1]); for (i = 0 ; i < 3; i++) { - debugf3("MC_COR_ECC_CNT%d = 0x%x; MC_COR_ECC_CNT%d = 0x%x\n", - (i * 2), rcv[i][0], (i * 2) + 1, rcv[i][1]); + edac_dbg(3, "MC_COR_ECC_CNT%d = 0x%x; MC_COR_ECC_CNT%d = 0x%x\n", + (i * 2), rcv[i][0], (i * 2) + 1, rcv[i][1]); /*if the channel has 3 dimms*/ if (pvt->channel[i].dimms > 2) { new0 = DIMM_BOT_COR_ERR(rcv[i][0]); @@ -1683,7 +1644,7 @@ static void i7core_udimm_check_mc_ecc_err(struct mem_ctl_info *mci) int new0, new1, new2; if (!pvt->pci_mcr[4]) { - debugf0("%s MCR registers not found\n", __func__); + edac_dbg(0, "MCR registers not found\n"); return; } @@ -1747,20 +1708,26 @@ static void i7core_mce_output_error(struct mem_ctl_info *mci, const struct mce *m) { struct i7core_pvt *pvt = mci->pvt_info; - char *type, *optype, *err, *msg; + char *optype, *err; + enum hw_event_mc_err_type tp_event; unsigned long error = m->status & 0x1ff0000l; + bool uncorrected_error = m->mcgstatus & 1ll << 61; + bool ripv = m->mcgstatus & 1; u32 optypenum = (m->status >> 4) & 0x07; u32 core_err_cnt = (m->status >> 38) & 0x7fff; u32 dimm = (m->misc >> 16) & 0x3; u32 channel = (m->misc >> 18) & 0x3; u32 syndrome = m->misc >> 32; u32 errnum = find_first_bit(&error, 32); - int csrow; - if (m->mcgstatus & 1) - type = "FATAL"; - else - type = "NON_FATAL"; + if (uncorrected_error) { + if (ripv) + tp_event = HW_EVENT_ERR_FATAL; + else + tp_event = HW_EVENT_ERR_UNCORRECTED; + } else { + tp_event = HW_EVENT_ERR_CORRECTED; + } switch (optypenum) { case 0: @@ -1815,27 +1782,18 @@ static void i7core_mce_output_error(struct mem_ctl_info *mci, err = "unknown"; } - /* FIXME: should convert addr into bank and rank information */ - msg = kasprintf(GFP_ATOMIC, - "%s (addr = 0x%08llx, cpu=%d, Dimm=%d, Channel=%d, " - "syndrome=0x%08x, count=%d, Err=%08llx:%08llx (%s: %s))\n", - type, (long long) m->addr, m->cpu, dimm, channel, - syndrome, core_err_cnt, (long long)m->status, - (long long)m->misc, optype, err); - - debugf0("%s", msg); - - csrow = pvt->csrow_map[channel][dimm]; - - /* Call the helper to output message */ - if (m->mcgstatus & 1) - edac_mc_handle_fbd_ue(mci, csrow, 0, - 0 /* FIXME: should be channel here */, msg); - else if (!pvt->is_registered) - edac_mc_handle_fbd_ce(mci, csrow, - 0 /* FIXME: should be channel here */, msg); - - kfree(msg); + /* + * Call the helper to output message + * FIXME: what to do if core_err_cnt > 1? Currently, it generates + * only one event + */ + if (uncorrected_error || !pvt->is_registered) + edac_mc_handle_error(tp_event, mci, core_err_cnt, + m->addr >> PAGE_SHIFT, + m->addr & ~PAGE_MASK, + syndrome, + channel, dimm, -1, + err, optype); } /* @@ -1932,12 +1890,6 @@ static int i7core_mce_check_error(struct notifier_block *nb, unsigned long val, if (mce->bank != 8) return NOTIFY_DONE; -#ifdef CONFIG_SMP - /* Only handle if it is the right mc controller */ - if (mce->socketid != pvt->i7core_dev->socket) - return NOTIFY_DONE; -#endif - smp_rmb(); if ((pvt->mce_out + 1) % MCE_LOG_LEN == pvt->mce_in) { smp_wmb(); @@ -2132,7 +2084,7 @@ static int set_sdram_scrub_rate(struct mem_ctl_info *mci, u32 new_bw) /* * get_sdram_scrub_rate This routine convert current scrub rate value - * into byte/sec bandwidth accourding to + * into byte/sec bandwidth according to * SCRUBINTERVAL formula found in datasheet. */ static int get_sdram_scrub_rate(struct mem_ctl_info *mci) @@ -2218,8 +2170,7 @@ static void i7core_unregister_mci(struct i7core_dev *i7core_dev) struct i7core_pvt *pvt; if (unlikely(!mci || !mci->pvt_info)) { - debugf0("MC: " __FILE__ ": %s(): dev = %p\n", - __func__, &i7core_dev->pdev[0]->dev); + edac_dbg(0, "MC: dev = %p\n", &i7core_dev->pdev[0]->dev); i7core_printk(KERN_ERR, "Couldn't find mci handler\n"); return; @@ -2227,22 +2178,20 @@ static void i7core_unregister_mci(struct i7core_dev *i7core_dev) pvt = mci->pvt_info; - debugf0("MC: " __FILE__ ": %s(): mci = %p, dev = %p\n", - __func__, mci, &i7core_dev->pdev[0]->dev); + edac_dbg(0, "MC: mci = %p, dev = %p\n", mci, &i7core_dev->pdev[0]->dev); /* Disable scrubrate setting */ if (pvt->enable_scrub) disable_sdram_scrub_setting(mci); - mce_unregister_decode_chain(&i7_mce_dec); - /* Disable EDAC polling */ i7core_pci_ctl_release(pvt); /* Remove MC sysfs nodes */ - edac_mc_del_mc(mci->dev); + i7core_delete_sysfs_devices(mci); + edac_mc_del_mc(mci->pdev); - debugf1("%s: free mci struct\n", mci->ctl_name); + edac_dbg(1, "%s: free mci struct\n", mci->ctl_name); kfree(mci->ctl_name); edac_mc_free(mci); i7core_dev->mci = NULL; @@ -2252,20 +2201,23 @@ static int i7core_register_mci(struct i7core_dev *i7core_dev) { struct mem_ctl_info *mci; struct i7core_pvt *pvt; - int rc, channels, csrows; - - /* Check the number of active and not disabled channels */ - rc = i7core_get_active_channels(i7core_dev->socket, &channels, &csrows); - if (unlikely(rc < 0)) - return rc; + int rc; + struct edac_mc_layer layers[2]; /* allocate a new MC control structure */ - mci = edac_mc_alloc(sizeof(*pvt), csrows, channels, i7core_dev->socket); + + layers[0].type = EDAC_MC_LAYER_CHANNEL; + layers[0].size = NUM_CHANS; + layers[0].is_virt_csrow = false; + layers[1].type = EDAC_MC_LAYER_SLOT; + layers[1].size = MAX_DIMMS; + layers[1].is_virt_csrow = true; + mci = edac_mc_alloc(i7core_dev->socket, ARRAY_SIZE(layers), layers, + sizeof(*pvt)); if (unlikely(!mci)) return -ENOMEM; - debugf0("MC: " __FILE__ ": %s(): mci = %p, dev = %p\n", - __func__, mci, &i7core_dev->pdev[0]->dev); + edac_dbg(0, "MC: mci = %p, dev = %p\n", mci, &i7core_dev->pdev[0]->dev); pvt = mci->pvt_info; memset(pvt, 0, sizeof(*pvt)); @@ -2294,15 +2246,11 @@ static int i7core_register_mci(struct i7core_dev *i7core_dev) if (unlikely(rc < 0)) goto fail0; - if (pvt->is_registered) - mci->mc_driver_sysfs_attributes = i7core_sysfs_rdimm_attrs; - else - mci->mc_driver_sysfs_attributes = i7core_sysfs_udimm_attrs; /* Get dimm basic config */ get_dimm_config(mci); /* record ptr to the generic device */ - mci->dev = &i7core_dev->pdev[0]->dev; + mci->pdev = &i7core_dev->pdev[0]->dev; /* Set the function pointer to an actual operation function */ mci->edac_check = i7core_check_error; @@ -2312,8 +2260,7 @@ static int i7core_register_mci(struct i7core_dev *i7core_dev) /* add this new MC control structure to EDAC's list of MCs */ if (unlikely(edac_mc_add_mc(mci))) { - debugf0("MC: " __FILE__ - ": %s(): failed edac_mc_add_mc()\n", __func__); + edac_dbg(0, "MC: failed edac_mc_add_mc()\n"); /* FIXME: perhaps some code should go here that disables error * reporting if we just enabled it */ @@ -2321,6 +2268,12 @@ static int i7core_register_mci(struct i7core_dev *i7core_dev) rc = -EINVAL; goto fail0; } + if (i7core_create_sysfs_devices(mci)) { + edac_dbg(0, "MC: failed to create sysfs nodes\n"); + edac_mc_del_mc(mci->pdev); + rc = -EINVAL; + goto fail0; + } /* Default error mask is any memory */ pvt->inject.channel = 0; @@ -2336,8 +2289,6 @@ static int i7core_register_mci(struct i7core_dev *i7core_dev) /* DCLK for scrub rate setting */ pvt->dclk_freq = get_dclk_freq(); - mce_register_decode_chain(&i7_mce_dec); - return 0; fail0: @@ -2355,8 +2306,7 @@ fail0: * < 0 for error code */ -static int __devinit i7core_probe(struct pci_dev *pdev, - const struct pci_device_id *id) +static int i7core_probe(struct pci_dev *pdev, const struct pci_device_id *id) { int rc, count = 0; struct i7core_dev *i7core_dev; @@ -2418,11 +2368,11 @@ fail0: * i7core_remove destructor for one instance of device * */ -static void __devexit i7core_remove(struct pci_dev *pdev) +static void i7core_remove(struct pci_dev *pdev) { struct i7core_dev *i7core_dev; - debugf0(__FILE__ ": %s()\n", __func__); + edac_dbg(0, "\n"); /* * we have a trouble here: pdev value for removal will be wrong, since @@ -2459,7 +2409,7 @@ MODULE_DEVICE_TABLE(pci, i7core_pci_tbl); static struct pci_driver i7core_driver = { .name = "i7core_edac", .probe = i7core_probe, - .remove = __devexit_p(i7core_remove), + .remove = i7core_remove, .id_table = i7core_pci_tbl, }; @@ -2471,7 +2421,7 @@ static int __init i7core_init(void) { int pci_rc; - debugf2("MC: " __FILE__ ": %s()\n", __func__); + edac_dbg(2, "\n"); /* Ensure that the OPSTATE is set correctly for POLL or NMI */ opstate_init(); @@ -2481,8 +2431,10 @@ static int __init i7core_init(void) pci_rc = pci_register_driver(&i7core_driver); - if (pci_rc >= 0) + if (pci_rc >= 0) { + mce_register_decode_chain(&i7_mce_dec); return 0; + } i7core_printk(KERN_ERR, "Failed to register device with error %d.\n", pci_rc); @@ -2496,15 +2448,16 @@ static int __init i7core_init(void) */ static void __exit i7core_exit(void) { - debugf2("MC: " __FILE__ ": %s()\n", __func__); + edac_dbg(2, "\n"); pci_unregister_driver(&i7core_driver); + mce_unregister_decode_chain(&i7_mce_dec); } module_init(i7core_init); module_exit(i7core_exit); MODULE_LICENSE("GPL"); -MODULE_AUTHOR("Mauro Carvalho Chehab <mchehab@redhat.com>"); +MODULE_AUTHOR("Mauro Carvalho Chehab"); MODULE_AUTHOR("Red Hat Inc. (http://www.redhat.com)"); MODULE_DESCRIPTION("MC Driver for Intel i7 Core memory controllers - " I7CORE_REVISION); diff --git a/drivers/edac/i82443bxgx_edac.c b/drivers/edac/i82443bxgx_edac.c index 4329d39f902..d730e276d1a 100644 --- a/drivers/edac/i82443bxgx_edac.c +++ b/drivers/edac/i82443bxgx_edac.c @@ -12,7 +12,7 @@ * 440GX fix by Jason Uhlenkott <juhlenko@akamai.com>. * * Written with reference to 82443BX Host Bridge Datasheet: - * http://download.intel.com/design/chipsets/datashts/29063301.pdf + * http://download.intel.com/design/chipsets/datashts/29063301.pdf * references to this document given in []. * * This module doesn't support the 440LX, but it may be possible to @@ -124,7 +124,7 @@ static void i82443bxgx_edacmc_get_error_info(struct mem_ctl_info *mci, *info) { struct pci_dev *pdev; - pdev = to_pci_dev(mci->dev); + pdev = to_pci_dev(mci->pdev); pci_read_config_dword(pdev, I82443BXGX_EAP, &info->eap); if (info->eap & I82443BXGX_EAP_OFFSET_SBE) /* Clear error to allow next error to be reported [p.61] */ @@ -156,19 +156,19 @@ static int i82443bxgx_edacmc_process_error_info(struct mem_ctl_info *mci, if (info->eap & I82443BXGX_EAP_OFFSET_SBE) { error_found = 1; if (handle_errors) - edac_mc_handle_ce(mci, page, pageoffset, - /* 440BX/GX don't make syndrome information - * available */ - 0, edac_mc_find_csrow_by_page(mci, page), 0, - mci->ctl_name); + edac_mc_handle_error(HW_EVENT_ERR_CORRECTED, mci, 1, + page, pageoffset, 0, + edac_mc_find_csrow_by_page(mci, page), + 0, -1, mci->ctl_name, ""); } if (info->eap & I82443BXGX_EAP_OFFSET_MBE) { error_found = 1; if (handle_errors) - edac_mc_handle_ue(mci, page, pageoffset, - edac_mc_find_csrow_by_page(mci, page), - mci->ctl_name); + edac_mc_handle_error(HW_EVENT_ERR_UNCORRECTED, mci, 1, + page, pageoffset, 0, + edac_mc_find_csrow_by_page(mci, page), + 0, -1, mci->ctl_name, ""); } return error_found; @@ -178,7 +178,7 @@ static void i82443bxgx_edacmc_check(struct mem_ctl_info *mci) { struct i82443bxgx_edacmc_error_info info; - debugf1("MC%d: %s: %s()\n", mci->mc_idx, __FILE__, __func__); + edac_dbg(1, "MC%d\n", mci->mc_idx); i82443bxgx_edacmc_get_error_info(mci, &info); i82443bxgx_edacmc_process_error_info(mci, &info, 1); } @@ -189,6 +189,7 @@ static void i82443bxgx_init_csrows(struct mem_ctl_info *mci, enum mem_type mtype) { struct csrow_info *csrow; + struct dimm_info *dimm; int index; u8 drbar, dramc; u32 row_base, row_high_limit, row_high_limit_last; @@ -196,16 +197,17 @@ static void i82443bxgx_init_csrows(struct mem_ctl_info *mci, pci_read_config_byte(pdev, I82443BXGX_DRAMC, &dramc); row_high_limit_last = 0; for (index = 0; index < mci->nr_csrows; index++) { - csrow = &mci->csrows[index]; + csrow = mci->csrows[index]; + dimm = csrow->channels[0]->dimm; + pci_read_config_byte(pdev, I82443BXGX_DRB + index, &drbar); - debugf1("MC%d: %s: %s() Row=%d DRB = %#0x\n", - mci->mc_idx, __FILE__, __func__, index, drbar); + edac_dbg(1, "MC%d: Row=%d DRB = %#0x\n", + mci->mc_idx, index, drbar); row_high_limit = ((u32) drbar << 23); /* find the DRAM Chip Select Base address and mask */ - debugf1("MC%d: %s: %s() Row=%d, " - "Boundary Address=%#0x, Last = %#0x\n", - mci->mc_idx, __FILE__, __func__, index, row_high_limit, - row_high_limit_last); + edac_dbg(1, "MC%d: Row=%d, Boundary Address=%#0x, Last = %#0x\n", + mci->mc_idx, index, row_high_limit, + row_high_limit_last); /* 440GX goes to 2GB, represented with a DRB of 0. */ if (row_high_limit_last && !row_high_limit) @@ -217,14 +219,14 @@ static void i82443bxgx_init_csrows(struct mem_ctl_info *mci, row_base = row_high_limit_last; csrow->first_page = row_base >> PAGE_SHIFT; csrow->last_page = (row_high_limit >> PAGE_SHIFT) - 1; - csrow->nr_pages = csrow->last_page - csrow->first_page + 1; + dimm->nr_pages = csrow->last_page - csrow->first_page + 1; /* EAP reports in 4kilobyte granularity [61] */ - csrow->grain = 1 << 12; - csrow->mtype = mtype; + dimm->grain = 1 << 12; + dimm->mtype = mtype; /* I don't think 440BX can tell you device type? FIXME? */ - csrow->dtype = DEV_UNKNOWN; + dimm->dtype = DEV_UNKNOWN; /* Mode is global to all rows on 440BX */ - csrow->edac_mode = edac_mode; + dimm->edac_mode = edac_mode; row_high_limit_last = row_high_limit; } } @@ -232,12 +234,13 @@ static void i82443bxgx_init_csrows(struct mem_ctl_info *mci, static int i82443bxgx_edacmc_probe1(struct pci_dev *pdev, int dev_idx) { struct mem_ctl_info *mci; + struct edac_mc_layer layers[2]; u8 dramc; u32 nbxcfg, ecc_mode; enum mem_type mtype; enum edac_type edac_mode; - debugf0("MC: %s: %s()\n", __FILE__, __func__); + edac_dbg(0, "MC:\n"); /* Something is really hosed if PCI config space reads from * the MC aren't working. @@ -245,13 +248,18 @@ static int i82443bxgx_edacmc_probe1(struct pci_dev *pdev, int dev_idx) if (pci_read_config_dword(pdev, I82443BXGX_NBXCFG, &nbxcfg)) return -EIO; - mci = edac_mc_alloc(0, I82443BXGX_NR_CSROWS, I82443BXGX_NR_CHANS, 0); - + layers[0].type = EDAC_MC_LAYER_CHIP_SELECT; + layers[0].size = I82443BXGX_NR_CSROWS; + layers[0].is_virt_csrow = true; + layers[1].type = EDAC_MC_LAYER_CHANNEL; + layers[1].size = I82443BXGX_NR_CHANS; + layers[1].is_virt_csrow = false; + mci = edac_mc_alloc(0, ARRAY_SIZE(layers), layers, 0); if (mci == NULL) return -ENOMEM; - debugf0("MC: %s: %s(): mci = %p\n", __FILE__, __func__, mci); - mci->dev = &pdev->dev; + edac_dbg(0, "MC: mci = %p\n", mci); + mci->pdev = &pdev->dev; mci->mtype_cap = MEM_FLAG_EDO | MEM_FLAG_SDR | MEM_FLAG_RDR; mci->edac_ctl_cap = EDAC_FLAG_NONE | EDAC_FLAG_EC | EDAC_FLAG_SECDED; pci_read_config_byte(pdev, I82443BXGX_DRAMC, &dramc); @@ -266,8 +274,7 @@ static int i82443bxgx_edacmc_probe1(struct pci_dev *pdev, int dev_idx) mtype = MEM_RDR; break; default: - debugf0("Unknown/reserved DRAM type value " - "in DRAMC register!\n"); + edac_dbg(0, "Unknown/reserved DRAM type value in DRAMC register!\n"); mtype = -MEM_UNKNOWN; } @@ -296,8 +303,7 @@ static int i82443bxgx_edacmc_probe1(struct pci_dev *pdev, int dev_idx) edac_mode = EDAC_SECDED; break; default: - debugf0("%s(): Unknown/reserved ECC state " - "in NBXCFG register!\n", __func__); + edac_dbg(0, "Unknown/reserved ECC state in NBXCFG register!\n"); edac_mode = EDAC_UNKNOWN; break; } @@ -321,7 +327,7 @@ static int i82443bxgx_edacmc_probe1(struct pci_dev *pdev, int dev_idx) mci->ctl_page_to_phys = NULL; if (edac_mc_add_mc(mci)) { - debugf3("%s(): failed edac_mc_add_mc()\n", __func__); + edac_dbg(3, "failed edac_mc_add_mc()\n"); goto fail; } @@ -336,7 +342,7 @@ static int i82443bxgx_edacmc_probe1(struct pci_dev *pdev, int dev_idx) __func__); } - debugf3("MC: %s: %s(): success\n", __FILE__, __func__); + edac_dbg(3, "MC: success\n"); return 0; fail: @@ -347,12 +353,12 @@ fail: EXPORT_SYMBOL_GPL(i82443bxgx_edacmc_probe1); /* returns count (>= 0), or negative on error */ -static int __devinit i82443bxgx_edacmc_init_one(struct pci_dev *pdev, - const struct pci_device_id *ent) +static int i82443bxgx_edacmc_init_one(struct pci_dev *pdev, + const struct pci_device_id *ent) { int rc; - debugf0("MC: %s: %s()\n", __FILE__, __func__); + edac_dbg(0, "MC:\n"); /* don't need to call pci_enable_device() */ rc = i82443bxgx_edacmc_probe1(pdev, ent->driver_data); @@ -363,11 +369,11 @@ static int __devinit i82443bxgx_edacmc_init_one(struct pci_dev *pdev, return rc; } -static void __devexit i82443bxgx_edacmc_remove_one(struct pci_dev *pdev) +static void i82443bxgx_edacmc_remove_one(struct pci_dev *pdev) { struct mem_ctl_info *mci; - debugf0("%s: %s()\n", __FILE__, __func__); + edac_dbg(0, "\n"); if (i82443bxgx_pci) edac_pci_release_generic_ctl(i82443bxgx_pci); @@ -380,7 +386,7 @@ static void __devexit i82443bxgx_edacmc_remove_one(struct pci_dev *pdev) EXPORT_SYMBOL_GPL(i82443bxgx_edacmc_remove_one); -static const struct pci_device_id i82443bxgx_pci_tbl[] __devinitdata = { +static const struct pci_device_id i82443bxgx_pci_tbl[] = { {PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82443BX_0)}, {PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82443BX_2)}, {PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82443GX_0)}, @@ -393,7 +399,7 @@ MODULE_DEVICE_TABLE(pci, i82443bxgx_pci_tbl); static struct pci_driver i82443bxgx_edacmc_driver = { .name = EDAC_MOD_STR, .probe = i82443bxgx_edacmc_init_one, - .remove = __devexit_p(i82443bxgx_edacmc_remove_one), + .remove = i82443bxgx_edacmc_remove_one, .id_table = i82443bxgx_pci_tbl, }; @@ -419,7 +425,7 @@ static int __init i82443bxgx_edacmc_init(void) id = &i82443bxgx_pci_tbl[i]; } if (!mci_pdev) { - debugf0("i82443bxgx pci_get_device fail\n"); + edac_dbg(0, "i82443bxgx pci_get_device fail\n"); pci_rc = -ENODEV; goto fail1; } @@ -427,7 +433,7 @@ static int __init i82443bxgx_edacmc_init(void) pci_rc = i82443bxgx_edacmc_init_one(mci_pdev, i82443bxgx_pci_tbl); if (pci_rc < 0) { - debugf0("i82443bxgx init fail\n"); + edac_dbg(0, "i82443bxgx init fail\n"); pci_rc = -ENODEV; goto fail1; } diff --git a/drivers/edac/i82860_edac.c b/drivers/edac/i82860_edac.c index 931a0577504..3382f6344e4 100644 --- a/drivers/edac/i82860_edac.c +++ b/drivers/edac/i82860_edac.c @@ -67,7 +67,7 @@ static void i82860_get_error_info(struct mem_ctl_info *mci, { struct pci_dev *pdev; - pdev = to_pci_dev(mci->dev); + pdev = to_pci_dev(mci->pdev); /* * This is a mess because there is no atomic way to read all the @@ -99,6 +99,7 @@ static int i82860_process_error_info(struct mem_ctl_info *mci, struct i82860_error_info *info, int handle_errors) { + struct dimm_info *dimm; int row; if (!(info->errsts2 & 0x0003)) @@ -108,18 +109,25 @@ static int i82860_process_error_info(struct mem_ctl_info *mci, return 1; if ((info->errsts ^ info->errsts2) & 0x0003) { - edac_mc_handle_ce_no_info(mci, "UE overwrote CE"); + edac_mc_handle_error(HW_EVENT_ERR_UNCORRECTED, mci, 1, 0, 0, 0, + -1, -1, -1, "UE overwrote CE", ""); info->errsts = info->errsts2; } info->eap >>= PAGE_SHIFT; row = edac_mc_find_csrow_by_page(mci, info->eap); + dimm = mci->csrows[row]->channels[0]->dimm; if (info->errsts & 0x0002) - edac_mc_handle_ue(mci, info->eap, 0, row, "i82860 UE"); + edac_mc_handle_error(HW_EVENT_ERR_UNCORRECTED, mci, 1, + info->eap, 0, 0, + dimm->location[0], dimm->location[1], -1, + "i82860 UE", ""); else - edac_mc_handle_ce(mci, info->eap, 0, info->derrsyn, row, 0, - "i82860 UE"); + edac_mc_handle_error(HW_EVENT_ERR_UNCORRECTED, mci, 1, + info->eap, 0, info->derrsyn, + dimm->location[0], dimm->location[1], -1, + "i82860 CE", ""); return 1; } @@ -128,7 +136,7 @@ static void i82860_check(struct mem_ctl_info *mci) { struct i82860_error_info info; - debugf1("MC%d: %s()\n", mci->mc_idx, __func__); + edac_dbg(1, "MC%d\n", mci->mc_idx); i82860_get_error_info(mci, &info); i82860_process_error_info(mci, &info, 1); } @@ -140,6 +148,7 @@ static void i82860_init_csrows(struct mem_ctl_info *mci, struct pci_dev *pdev) u16 value; u32 cumul_size; struct csrow_info *csrow; + struct dimm_info *dimm; int index; pci_read_config_word(pdev, I82860_MCHCFG, &mchcfg_ddim); @@ -152,47 +161,56 @@ static void i82860_init_csrows(struct mem_ctl_info *mci, struct pci_dev *pdev) * in all eight rows. */ for (index = 0; index < mci->nr_csrows; index++) { - csrow = &mci->csrows[index]; + csrow = mci->csrows[index]; + dimm = csrow->channels[0]->dimm; + pci_read_config_word(pdev, I82860_GBA + index * 2, &value); cumul_size = (value & I82860_GBA_MASK) << (I82860_GBA_SHIFT - PAGE_SHIFT); - debugf3("%s(): (%d) cumul_size 0x%x\n", __func__, index, - cumul_size); + edac_dbg(3, "(%d) cumul_size 0x%x\n", index, cumul_size); if (cumul_size == last_cumul_size) continue; /* not populated */ csrow->first_page = last_cumul_size; csrow->last_page = cumul_size - 1; - csrow->nr_pages = cumul_size - last_cumul_size; + dimm->nr_pages = cumul_size - last_cumul_size; last_cumul_size = cumul_size; - csrow->grain = 1 << 12; /* I82860_EAP has 4KiB reolution */ - csrow->mtype = MEM_RMBS; - csrow->dtype = DEV_UNKNOWN; - csrow->edac_mode = mchcfg_ddim ? EDAC_SECDED : EDAC_NONE; + dimm->grain = 1 << 12; /* I82860_EAP has 4KiB reolution */ + dimm->mtype = MEM_RMBS; + dimm->dtype = DEV_UNKNOWN; + dimm->edac_mode = mchcfg_ddim ? EDAC_SECDED : EDAC_NONE; } } static int i82860_probe1(struct pci_dev *pdev, int dev_idx) { struct mem_ctl_info *mci; + struct edac_mc_layer layers[2]; struct i82860_error_info discard; - /* RDRAM has channels but these don't map onto the abstractions that - edac uses. - The device groups from the GRA registers seem to map reasonably - well onto the notion of a chip select row. - There are 16 GRA registers and since the name is associated with - the channel and the GRA registers map to physical devices so we are - going to make 1 channel for group. + /* + * RDRAM has channels but these don't map onto the csrow abstraction. + * According with the datasheet, there are 2 Rambus channels, supporting + * up to 16 direct RDRAM devices. + * The device groups from the GRA registers seem to map reasonably + * well onto the notion of a chip select row. + * There are 16 GRA registers and since the name is associated with + * the channel and the GRA registers map to physical devices so we are + * going to make 1 channel for group. */ - mci = edac_mc_alloc(0, 16, 1, 0); - + layers[0].type = EDAC_MC_LAYER_CHANNEL; + layers[0].size = 2; + layers[0].is_virt_csrow = true; + layers[1].type = EDAC_MC_LAYER_SLOT; + layers[1].size = 8; + layers[1].is_virt_csrow = true; + mci = edac_mc_alloc(0, ARRAY_SIZE(layers), layers, 0); if (!mci) return -ENOMEM; - debugf3("%s(): init mci\n", __func__); - mci->dev = &pdev->dev; + edac_dbg(3, "init mci\n"); + mci->pdev = &pdev->dev; mci->mtype_cap = MEM_FLAG_DDR; mci->edac_ctl_cap = EDAC_FLAG_NONE | EDAC_FLAG_SECDED; /* I"m not sure about this but I think that all RDRAM is SECDED */ @@ -210,7 +228,7 @@ static int i82860_probe1(struct pci_dev *pdev, int dev_idx) * type of memory controller. The ID is therefore hardcoded to 0. */ if (edac_mc_add_mc(mci)) { - debugf3("%s(): failed edac_mc_add_mc()\n", __func__); + edac_dbg(3, "failed edac_mc_add_mc()\n"); goto fail; } @@ -226,7 +244,7 @@ static int i82860_probe1(struct pci_dev *pdev, int dev_idx) } /* get this far and it's successful */ - debugf3("%s(): success\n", __func__); + edac_dbg(3, "success\n"); return 0; @@ -236,12 +254,12 @@ fail: } /* returns count (>= 0), or negative on error */ -static int __devinit i82860_init_one(struct pci_dev *pdev, - const struct pci_device_id *ent) +static int i82860_init_one(struct pci_dev *pdev, + const struct pci_device_id *ent) { int rc; - debugf0("%s()\n", __func__); + edac_dbg(0, "\n"); i82860_printk(KERN_INFO, "i82860 init one\n"); if (pci_enable_device(pdev) < 0) @@ -255,11 +273,11 @@ static int __devinit i82860_init_one(struct pci_dev *pdev, return rc; } -static void __devexit i82860_remove_one(struct pci_dev *pdev) +static void i82860_remove_one(struct pci_dev *pdev) { struct mem_ctl_info *mci; - debugf0("%s()\n", __func__); + edac_dbg(0, "\n"); if (i82860_pci) edac_pci_release_generic_ctl(i82860_pci); @@ -270,7 +288,7 @@ static void __devexit i82860_remove_one(struct pci_dev *pdev) edac_mc_free(mci); } -static const struct pci_device_id i82860_pci_tbl[] __devinitdata = { +static const struct pci_device_id i82860_pci_tbl[] = { { PCI_VEND_DEV(INTEL, 82860_0), PCI_ANY_ID, PCI_ANY_ID, 0, 0, I82860}, @@ -284,7 +302,7 @@ MODULE_DEVICE_TABLE(pci, i82860_pci_tbl); static struct pci_driver i82860_driver = { .name = EDAC_MOD_STR, .probe = i82860_init_one, - .remove = __devexit_p(i82860_remove_one), + .remove = i82860_remove_one, .id_table = i82860_pci_tbl, }; @@ -292,7 +310,7 @@ static int __init i82860_init(void) { int pci_rc; - debugf3("%s()\n", __func__); + edac_dbg(3, "\n"); /* Ensure that the OPSTATE is set correctly for POLL or NMI */ opstate_init(); @@ -305,7 +323,7 @@ static int __init i82860_init(void) PCI_DEVICE_ID_INTEL_82860_0, NULL); if (mci_pdev == NULL) { - debugf0("860 pci_get_device fail\n"); + edac_dbg(0, "860 pci_get_device fail\n"); pci_rc = -ENODEV; goto fail1; } @@ -313,7 +331,7 @@ static int __init i82860_init(void) pci_rc = i82860_init_one(mci_pdev, i82860_pci_tbl); if (pci_rc < 0) { - debugf0("860 init fail\n"); + edac_dbg(0, "860 init fail\n"); pci_rc = -ENODEV; goto fail1; } @@ -333,7 +351,7 @@ fail0: static void __exit i82860_exit(void) { - debugf3("%s()\n", __func__); + edac_dbg(3, "\n"); pci_unregister_driver(&i82860_driver); diff --git a/drivers/edac/i82875p_edac.c b/drivers/edac/i82875p_edac.c index 33864c63c68..64b68320249 100644 --- a/drivers/edac/i82875p_edac.c +++ b/drivers/edac/i82875p_edac.c @@ -38,7 +38,8 @@ #endif /* PCI_DEVICE_ID_INTEL_82875_6 */ /* four csrows in dual channel, eight in single channel */ -#define I82875P_NR_CSROWS(nr_chans) (8/(nr_chans)) +#define I82875P_NR_DIMMS 8 +#define I82875P_NR_CSROWS(nr_chans) (I82875P_NR_DIMMS / (nr_chans)) /* Intel 82875p register addresses - device 0 function 0 - DRAM Controller */ #define I82875P_EAP 0x58 /* Error Address Pointer (32b) @@ -188,7 +189,7 @@ static void i82875p_get_error_info(struct mem_ctl_info *mci, { struct pci_dev *pdev; - pdev = to_pci_dev(mci->dev); + pdev = to_pci_dev(mci->pdev); /* * This is a mess because there is no atomic way to read all the @@ -226,7 +227,7 @@ static int i82875p_process_error_info(struct mem_ctl_info *mci, { int row, multi_chan; - multi_chan = mci->csrows[0].nr_channels - 1; + multi_chan = mci->csrows[0]->nr_channels - 1; if (!(info->errsts & 0x0081)) return 0; @@ -235,7 +236,9 @@ static int i82875p_process_error_info(struct mem_ctl_info *mci, return 1; if ((info->errsts ^ info->errsts2) & 0x0081) { - edac_mc_handle_ce_no_info(mci, "UE overwrote CE"); + edac_mc_handle_error(HW_EVENT_ERR_UNCORRECTED, mci, 1, 0, 0, 0, + -1, -1, -1, + "UE overwrote CE", ""); info->errsts = info->errsts2; } @@ -243,11 +246,15 @@ static int i82875p_process_error_info(struct mem_ctl_info *mci, row = edac_mc_find_csrow_by_page(mci, info->eap); if (info->errsts & 0x0080) - edac_mc_handle_ue(mci, info->eap, 0, row, "i82875p UE"); + edac_mc_handle_error(HW_EVENT_ERR_UNCORRECTED, mci, 1, + info->eap, 0, 0, + row, -1, -1, + "i82875p UE", ""); else - edac_mc_handle_ce(mci, info->eap, 0, info->derrsyn, row, - multi_chan ? (info->des & 0x1) : 0, - "i82875p CE"); + edac_mc_handle_error(HW_EVENT_ERR_CORRECTED, mci, 1, + info->eap, 0, info->derrsyn, + row, multi_chan ? (info->des & 0x1) : 0, + -1, "i82875p CE", ""); return 1; } @@ -256,7 +263,7 @@ static void i82875p_check(struct mem_ctl_info *mci) { struct i82875p_error_info info; - debugf1("MC%d: %s()\n", mci->mc_idx, __func__); + edac_dbg(1, "MC%d\n", mci->mc_idx); i82875p_get_error_info(mci, &info); i82875p_process_error_info(mci, &info, 1); } @@ -268,7 +275,6 @@ static int i82875p_setup_overfl_dev(struct pci_dev *pdev, { struct pci_dev *dev; void __iomem *window; - int err; *ovrfl_pdev = NULL; *ovrfl_window = NULL; @@ -286,13 +292,8 @@ static int i82875p_setup_overfl_dev(struct pci_dev *pdev, if (dev == NULL) return 1; - err = pci_bus_add_device(dev); - if (err) { - i82875p_printk(KERN_ERR, - "%s(): pci_bus_add_device() Failed\n", - __func__); - } pci_bus_assign_resources(dev->bus); + pci_bus_add_device(dev); } *ovrfl_pdev = dev; @@ -342,11 +343,13 @@ static void i82875p_init_csrows(struct mem_ctl_info *mci, void __iomem * ovrfl_window, u32 drc) { struct csrow_info *csrow; + struct dimm_info *dimm; + unsigned nr_chans = dual_channel_active(drc) + 1; unsigned long last_cumul_size; u8 value; u32 drc_ddim; /* DRAM Data Integrity Mode 0=none,2=edac */ - u32 cumul_size; - int index; + u32 cumul_size, nr_pages; + int index, j; drc_ddim = (drc >> 18) & 0x1; last_cumul_size = 0; @@ -358,23 +361,28 @@ static void i82875p_init_csrows(struct mem_ctl_info *mci, */ for (index = 0; index < mci->nr_csrows; index++) { - csrow = &mci->csrows[index]; + csrow = mci->csrows[index]; value = readb(ovrfl_window + I82875P_DRB + index); cumul_size = value << (I82875P_DRB_SHIFT - PAGE_SHIFT); - debugf3("%s(): (%d) cumul_size 0x%x\n", __func__, index, - cumul_size); + edac_dbg(3, "(%d) cumul_size 0x%x\n", index, cumul_size); if (cumul_size == last_cumul_size) continue; /* not populated */ csrow->first_page = last_cumul_size; csrow->last_page = cumul_size - 1; - csrow->nr_pages = cumul_size - last_cumul_size; + nr_pages = cumul_size - last_cumul_size; last_cumul_size = cumul_size; - csrow->grain = 1 << 12; /* I82875P_EAP has 4KiB reolution */ - csrow->mtype = MEM_DDR; - csrow->dtype = DEV_UNKNOWN; - csrow->edac_mode = drc_ddim ? EDAC_SECDED : EDAC_NONE; + + for (j = 0; j < nr_chans; j++) { + dimm = csrow->channels[j]->dimm; + + dimm->nr_pages = nr_pages / nr_chans; + dimm->grain = 1 << 12; /* I82875P_EAP has 4KiB reolution */ + dimm->mtype = MEM_DDR; + dimm->dtype = DEV_UNKNOWN; + dimm->edac_mode = drc_ddim ? EDAC_SECDED : EDAC_NONE; + } } } @@ -382,6 +390,7 @@ static int i82875p_probe1(struct pci_dev *pdev, int dev_idx) { int rc = -ENODEV; struct mem_ctl_info *mci; + struct edac_mc_layer layers[2]; struct i82875p_pvt *pvt; struct pci_dev *ovrfl_pdev; void __iomem *ovrfl_window; @@ -389,27 +398,27 @@ static int i82875p_probe1(struct pci_dev *pdev, int dev_idx) u32 nr_chans; struct i82875p_error_info discard; - debugf0("%s()\n", __func__); - - ovrfl_pdev = pci_get_device(PCI_VEND_DEV(INTEL, 82875_6), NULL); + edac_dbg(0, "\n"); if (i82875p_setup_overfl_dev(pdev, &ovrfl_pdev, &ovrfl_window)) return -ENODEV; drc = readl(ovrfl_window + I82875P_DRC); nr_chans = dual_channel_active(drc) + 1; - mci = edac_mc_alloc(sizeof(*pvt), I82875P_NR_CSROWS(nr_chans), - nr_chans, 0); + layers[0].type = EDAC_MC_LAYER_CHIP_SELECT; + layers[0].size = I82875P_NR_CSROWS(nr_chans); + layers[0].is_virt_csrow = true; + layers[1].type = EDAC_MC_LAYER_CHANNEL; + layers[1].size = nr_chans; + layers[1].is_virt_csrow = false; + mci = edac_mc_alloc(0, ARRAY_SIZE(layers), layers, sizeof(*pvt)); if (!mci) { rc = -ENOMEM; goto fail0; } - /* Keeps mci available after edac_mc_del_mc() till edac_mc_free() */ - kobject_get(&mci->edac_mci_kobj); - - debugf3("%s(): init mci\n", __func__); - mci->dev = &pdev->dev; + edac_dbg(3, "init mci\n"); + mci->pdev = &pdev->dev; mci->mtype_cap = MEM_FLAG_DDR; mci->edac_ctl_cap = EDAC_FLAG_NONE | EDAC_FLAG_SECDED; mci->edac_cap = EDAC_FLAG_UNKNOWN; @@ -419,7 +428,7 @@ static int i82875p_probe1(struct pci_dev *pdev, int dev_idx) mci->dev_name = pci_name(pdev); mci->edac_check = i82875p_check; mci->ctl_page_to_phys = NULL; - debugf3("%s(): init pvt\n", __func__); + edac_dbg(3, "init pvt\n"); pvt = (struct i82875p_pvt *)mci->pvt_info; pvt->ovrfl_pdev = ovrfl_pdev; pvt->ovrfl_window = ovrfl_window; @@ -430,7 +439,7 @@ static int i82875p_probe1(struct pci_dev *pdev, int dev_idx) * type of memory controller. The ID is therefore hardcoded to 0. */ if (edac_mc_add_mc(mci)) { - debugf3("%s(): failed edac_mc_add_mc()\n", __func__); + edac_dbg(3, "failed edac_mc_add_mc()\n"); goto fail1; } @@ -446,11 +455,10 @@ static int i82875p_probe1(struct pci_dev *pdev, int dev_idx) } /* get this far and it's successful */ - debugf3("%s(): success\n", __func__); + edac_dbg(3, "success\n"); return 0; fail1: - kobject_put(&mci->edac_mci_kobj); edac_mc_free(mci); fail0: @@ -463,12 +471,12 @@ fail0: } /* returns count (>= 0), or negative on error */ -static int __devinit i82875p_init_one(struct pci_dev *pdev, - const struct pci_device_id *ent) +static int i82875p_init_one(struct pci_dev *pdev, + const struct pci_device_id *ent) { int rc; - debugf0("%s()\n", __func__); + edac_dbg(0, "\n"); i82875p_printk(KERN_INFO, "i82875p init one\n"); if (pci_enable_device(pdev) < 0) @@ -482,12 +490,12 @@ static int __devinit i82875p_init_one(struct pci_dev *pdev, return rc; } -static void __devexit i82875p_remove_one(struct pci_dev *pdev) +static void i82875p_remove_one(struct pci_dev *pdev) { struct mem_ctl_info *mci; struct i82875p_pvt *pvt = NULL; - debugf0("%s()\n", __func__); + edac_dbg(0, "\n"); if (i82875p_pci) edac_pci_release_generic_ctl(i82875p_pci); @@ -511,7 +519,7 @@ static void __devexit i82875p_remove_one(struct pci_dev *pdev) edac_mc_free(mci); } -static const struct pci_device_id i82875p_pci_tbl[] __devinitdata = { +static const struct pci_device_id i82875p_pci_tbl[] = { { PCI_VEND_DEV(INTEL, 82875_0), PCI_ANY_ID, PCI_ANY_ID, 0, 0, I82875P}, @@ -525,7 +533,7 @@ MODULE_DEVICE_TABLE(pci, i82875p_pci_tbl); static struct pci_driver i82875p_driver = { .name = EDAC_MOD_STR, .probe = i82875p_init_one, - .remove = __devexit_p(i82875p_remove_one), + .remove = i82875p_remove_one, .id_table = i82875p_pci_tbl, }; @@ -533,7 +541,7 @@ static int __init i82875p_init(void) { int pci_rc; - debugf3("%s()\n", __func__); + edac_dbg(3, "\n"); /* Ensure that the OPSTATE is set correctly for POLL or NMI */ opstate_init(); @@ -548,7 +556,7 @@ static int __init i82875p_init(void) PCI_DEVICE_ID_INTEL_82875_0, NULL); if (!mci_pdev) { - debugf0("875p pci_get_device fail\n"); + edac_dbg(0, "875p pci_get_device fail\n"); pci_rc = -ENODEV; goto fail1; } @@ -556,7 +564,7 @@ static int __init i82875p_init(void) pci_rc = i82875p_init_one(mci_pdev, i82875p_pci_tbl); if (pci_rc < 0) { - debugf0("875p init fail\n"); + edac_dbg(0, "875p init fail\n"); pci_rc = -ENODEV; goto fail1; } @@ -576,7 +584,7 @@ fail0: static void __exit i82875p_exit(void) { - debugf3("%s()\n", __func__); + edac_dbg(3, "\n"); i82875p_remove_one(mci_pdev); pci_dev_put(mci_pdev); diff --git a/drivers/edac/i82975x_edac.c b/drivers/edac/i82975x_edac.c index 4184e0171f0..10b10521f62 100644 --- a/drivers/edac/i82975x_edac.c +++ b/drivers/edac/i82975x_edac.c @@ -29,7 +29,8 @@ #define PCI_DEVICE_ID_INTEL_82975_0 0x277c #endif /* PCI_DEVICE_ID_INTEL_82975_0 */ -#define I82975X_NR_CSROWS(nr_chans) (8/(nr_chans)) +#define I82975X_NR_DIMMS 8 +#define I82975X_NR_CSROWS(nr_chans) (I82975X_NR_DIMMS / (nr_chans)) /* Intel 82975X register addresses - device 0 function 0 - DRAM Controller */ #define I82975X_EAP 0x58 /* Dram Error Address Pointer (32b) @@ -240,7 +241,7 @@ static void i82975x_get_error_info(struct mem_ctl_info *mci, { struct pci_dev *pdev; - pdev = to_pci_dev(mci->dev); + pdev = to_pci_dev(mci->pdev); /* * This is a mess because there is no atomic way to read all the @@ -287,7 +288,8 @@ static int i82975x_process_error_info(struct mem_ctl_info *mci, return 1; if ((info->errsts ^ info->errsts2) & 0x0003) { - edac_mc_handle_ce_no_info(mci, "UE overwrote CE"); + edac_mc_handle_error(HW_EVENT_ERR_UNCORRECTED, mci, 1, 0, 0, 0, + -1, -1, -1, "UE overwrote CE", ""); info->errsts = info->errsts2; } @@ -306,16 +308,21 @@ static int i82975x_process_error_info(struct mem_ctl_info *mci, (info->xeap & 1) ? 1 : 0, info->eap, (unsigned int) page); return 0; } - chan = (mci->csrows[row].nr_channels == 1) ? 0 : info->eap & 1; + chan = (mci->csrows[row]->nr_channels == 1) ? 0 : info->eap & 1; offst = info->eap & ((1 << PAGE_SHIFT) - - (1 << mci->csrows[row].grain)); + (1 << mci->csrows[row]->channels[chan]->dimm->grain)); if (info->errsts & 0x0002) - edac_mc_handle_ue(mci, page, offst , row, "i82975x UE"); + edac_mc_handle_error(HW_EVENT_ERR_UNCORRECTED, mci, 1, + page, offst, 0, + row, -1, -1, + "i82975x UE", ""); else - edac_mc_handle_ce(mci, page, offst, info->derrsyn, row, - chan, "i82975x CE"); + edac_mc_handle_error(HW_EVENT_ERR_CORRECTED, mci, 1, + page, offst, info->derrsyn, + row, chan ? chan : 0, -1, + "i82975x CE", ""); return 1; } @@ -324,7 +331,7 @@ static void i82975x_check(struct mem_ctl_info *mci) { struct i82975x_error_info info; - debugf1("MC%d: %s()\n", mci->mc_idx, __func__); + edac_dbg(1, "MC%d\n", mci->mc_idx); i82975x_get_error_info(mci, &info); i82975x_process_error_info(mci, &info, 1); } @@ -363,15 +370,13 @@ static enum dev_type i82975x_dram_type(void __iomem *mch_window, int rank) static void i82975x_init_csrows(struct mem_ctl_info *mci, struct pci_dev *pdev, void __iomem *mch_window) { - static const char *labels[4] = { - "DIMM A1", "DIMM A2", - "DIMM B1", "DIMM B2" - }; struct csrow_info *csrow; unsigned long last_cumul_size; u8 value; - u32 cumul_size; + u32 cumul_size, nr_pages; int index, chan; + struct dimm_info *dimm; + enum dev_type dtype; last_cumul_size = 0; @@ -385,7 +390,7 @@ static void i82975x_init_csrows(struct mem_ctl_info *mci, */ for (index = 0; index < mci->nr_csrows; index++) { - csrow = &mci->csrows[index]; + csrow = mci->csrows[index]; value = readb(mch_window + I82975X_DRB + index + ((index >= 4) ? 0x80 : 0)); @@ -397,8 +402,11 @@ static void i82975x_init_csrows(struct mem_ctl_info *mci, */ if (csrow->nr_channels > 1) cumul_size <<= 1; - debugf3("%s(): (%d) cumul_size 0x%x\n", __func__, index, - cumul_size); + edac_dbg(3, "(%d) cumul_size 0x%x\n", index, cumul_size); + + nr_pages = cumul_size - last_cumul_size; + if (!nr_pages) + continue; /* * Initialise dram labels @@ -406,22 +414,24 @@ static void i82975x_init_csrows(struct mem_ctl_info *mci, * [0-7] for single-channel; i.e. csrow->nr_channels = 1 * [0-3] for dual-channel; i.e. csrow->nr_channels = 2 */ - for (chan = 0; chan < csrow->nr_channels; chan++) - strncpy(csrow->channels[chan].label, - labels[(index >> 1) + (chan * 2)], - EDAC_MC_LABEL_LEN); - - if (cumul_size == last_cumul_size) - continue; /* not populated */ + dtype = i82975x_dram_type(mch_window, index); + for (chan = 0; chan < csrow->nr_channels; chan++) { + dimm = mci->csrows[index]->channels[chan]->dimm; + + dimm->nr_pages = nr_pages / csrow->nr_channels; + + snprintf(csrow->channels[chan]->dimm->label, EDAC_MC_LABEL_LEN, "DIMM %c%d", + (chan == 0) ? 'A' : 'B', + index); + dimm->grain = 1 << 7; /* 128Byte cache-line resolution */ + dimm->dtype = i82975x_dram_type(mch_window, index); + dimm->mtype = MEM_DDR2; /* I82975x supports only DDR2 */ + dimm->edac_mode = EDAC_SECDED; /* only supported */ + } csrow->first_page = last_cumul_size; csrow->last_page = cumul_size - 1; - csrow->nr_pages = cumul_size - last_cumul_size; last_cumul_size = cumul_size; - csrow->grain = 1 << 7; /* 128Byte cache-line resolution */ - csrow->mtype = MEM_DDR2; /* I82975x supports only DDR2 */ - csrow->dtype = i82975x_dram_type(mch_window, index); - csrow->edac_mode = EDAC_SECDED; /* only supported */ } } @@ -463,6 +473,7 @@ static int i82975x_probe1(struct pci_dev *pdev, int dev_idx) { int rc = -ENODEV; struct mem_ctl_info *mci; + struct edac_mc_layer layers[2]; struct i82975x_pvt *pvt; void __iomem *mch_window; u32 mchbar; @@ -474,11 +485,11 @@ static int i82975x_probe1(struct pci_dev *pdev, int dev_idx) u8 c1drb[4]; #endif - debugf0("%s()\n", __func__); + edac_dbg(0, "\n"); pci_read_config_dword(pdev, I82975X_MCHBAR, &mchbar); if (!(mchbar & 1)) { - debugf3("%s(): failed, MCHBAR disabled!\n", __func__); + edac_dbg(3, "failed, MCHBAR disabled!\n"); goto fail0; } mchbar &= 0xffffc000; /* bits 31:14 used for 16K window */ @@ -531,15 +542,20 @@ static int i82975x_probe1(struct pci_dev *pdev, int dev_idx) chans = dual_channel_active(mch_window) + 1; /* assuming only one controller, index thus is 0 */ - mci = edac_mc_alloc(sizeof(*pvt), I82975X_NR_CSROWS(chans), - chans, 0); + layers[0].type = EDAC_MC_LAYER_CHIP_SELECT; + layers[0].size = I82975X_NR_DIMMS; + layers[0].is_virt_csrow = true; + layers[1].type = EDAC_MC_LAYER_CHANNEL; + layers[1].size = I82975X_NR_CSROWS(chans); + layers[1].is_virt_csrow = false; + mci = edac_mc_alloc(0, ARRAY_SIZE(layers), layers, sizeof(*pvt)); if (!mci) { rc = -ENOMEM; goto fail1; } - debugf3("%s(): init mci\n", __func__); - mci->dev = &pdev->dev; + edac_dbg(3, "init mci\n"); + mci->pdev = &pdev->dev; mci->mtype_cap = MEM_FLAG_DDR2; mci->edac_ctl_cap = EDAC_FLAG_NONE | EDAC_FLAG_SECDED; mci->edac_cap = EDAC_FLAG_NONE | EDAC_FLAG_SECDED; @@ -549,7 +565,7 @@ static int i82975x_probe1(struct pci_dev *pdev, int dev_idx) mci->dev_name = pci_name(pdev); mci->edac_check = i82975x_check; mci->ctl_page_to_phys = NULL; - debugf3("%s(): init pvt\n", __func__); + edac_dbg(3, "init pvt\n"); pvt = (struct i82975x_pvt *) mci->pvt_info; pvt->mch_window = mch_window; i82975x_init_csrows(mci, pdev, mch_window); @@ -558,12 +574,12 @@ static int i82975x_probe1(struct pci_dev *pdev, int dev_idx) /* finalize this instance of memory controller with edac core */ if (edac_mc_add_mc(mci)) { - debugf3("%s(): failed edac_mc_add_mc()\n", __func__); + edac_dbg(3, "failed edac_mc_add_mc()\n"); goto fail2; } /* get this far and it's successful */ - debugf3("%s(): success\n", __func__); + edac_dbg(3, "success\n"); return 0; fail2: @@ -576,12 +592,12 @@ fail0: } /* returns count (>= 0), or negative on error */ -static int __devinit i82975x_init_one(struct pci_dev *pdev, - const struct pci_device_id *ent) +static int i82975x_init_one(struct pci_dev *pdev, + const struct pci_device_id *ent) { int rc; - debugf0("%s()\n", __func__); + edac_dbg(0, "\n"); if (pci_enable_device(pdev) < 0) return -EIO; @@ -594,12 +610,12 @@ static int __devinit i82975x_init_one(struct pci_dev *pdev, return rc; } -static void __devexit i82975x_remove_one(struct pci_dev *pdev) +static void i82975x_remove_one(struct pci_dev *pdev) { struct mem_ctl_info *mci; struct i82975x_pvt *pvt; - debugf0("%s()\n", __func__); + edac_dbg(0, "\n"); mci = edac_mc_del_mc(&pdev->dev); if (mci == NULL) @@ -612,7 +628,7 @@ static void __devexit i82975x_remove_one(struct pci_dev *pdev) edac_mc_free(mci); } -static const struct pci_device_id i82975x_pci_tbl[] __devinitdata = { +static const struct pci_device_id i82975x_pci_tbl[] = { { PCI_VEND_DEV(INTEL, 82975_0), PCI_ANY_ID, PCI_ANY_ID, 0, 0, I82975X @@ -627,7 +643,7 @@ MODULE_DEVICE_TABLE(pci, i82975x_pci_tbl); static struct pci_driver i82975x_driver = { .name = EDAC_MOD_STR, .probe = i82975x_init_one, - .remove = __devexit_p(i82975x_remove_one), + .remove = i82975x_remove_one, .id_table = i82975x_pci_tbl, }; @@ -635,7 +651,7 @@ static int __init i82975x_init(void) { int pci_rc; - debugf3("%s()\n", __func__); + edac_dbg(3, "\n"); /* Ensure that the OPSTATE is set correctly for POLL or NMI */ opstate_init(); @@ -649,7 +665,7 @@ static int __init i82975x_init(void) PCI_DEVICE_ID_INTEL_82975_0, NULL); if (!mci_pdev) { - debugf0("i82975x pci_get_device fail\n"); + edac_dbg(0, "i82975x pci_get_device fail\n"); pci_rc = -ENODEV; goto fail1; } @@ -657,7 +673,7 @@ static int __init i82975x_init(void) pci_rc = i82975x_init_one(mci_pdev, i82975x_pci_tbl); if (pci_rc < 0) { - debugf0("i82975x init fail\n"); + edac_dbg(0, "i82975x init fail\n"); pci_rc = -ENODEV; goto fail1; } @@ -677,7 +693,7 @@ fail0: static void __exit i82975x_exit(void) { - debugf3("%s()\n", __func__); + edac_dbg(3, "\n"); pci_unregister_driver(&i82975x_driver); diff --git a/drivers/edac/mce_amd.c b/drivers/edac/mce_amd.c index bd926ea2e00..5f43620d580 100644 --- a/drivers/edac/mce_amd.c +++ b/drivers/edac/mce_amd.c @@ -6,7 +6,6 @@ static struct amd_decoder_ops *fam_ops; static u8 xec_mask = 0xf; -static u8 nb_err_cpumask = 0xf; static bool report_gart_errors; static void (*nb_bus_decoder)(int node_id, struct mce *m); @@ -39,43 +38,30 @@ EXPORT_SYMBOL_GPL(amd_unregister_ecc_decoder); */ /* transaction type */ -const char *tt_msgs[] = { "INSN", "DATA", "GEN", "RESV" }; -EXPORT_SYMBOL_GPL(tt_msgs); +static const char * const tt_msgs[] = { "INSN", "DATA", "GEN", "RESV" }; /* cache level */ -const char *ll_msgs[] = { "RESV", "L1", "L2", "L3/GEN" }; -EXPORT_SYMBOL_GPL(ll_msgs); +static const char * const ll_msgs[] = { "RESV", "L1", "L2", "L3/GEN" }; /* memory transaction type */ -const char *rrrr_msgs[] = { +static const char * const rrrr_msgs[] = { "GEN", "RD", "WR", "DRD", "DWR", "IRD", "PRF", "EV", "SNP" }; -EXPORT_SYMBOL_GPL(rrrr_msgs); /* participating processor */ -const char *pp_msgs[] = { "SRC", "RES", "OBS", "GEN" }; +const char * const pp_msgs[] = { "SRC", "RES", "OBS", "GEN" }; EXPORT_SYMBOL_GPL(pp_msgs); /* request timeout */ -const char *to_msgs[] = { "no timeout", "timed out" }; -EXPORT_SYMBOL_GPL(to_msgs); +static const char * const to_msgs[] = { "no timeout", "timed out" }; /* memory or i/o */ -const char *ii_msgs[] = { "MEM", "RESV", "IO", "GEN" }; -EXPORT_SYMBOL_GPL(ii_msgs); - -static const char *f10h_nb_mce_desc[] = { - "HT link data error", - "Protocol error (link, L3, probe filter, etc.)", - "Parity error in NB-internal arrays", - "Link Retry due to IO link transmission error", - "L3 ECC data cache error", - "ECC error in L3 cache tag", - "L3 LRU parity bits error", - "ECC Error in the Probe Filter directory" -}; +static const char * const ii_msgs[] = { "MEM", "RESV", "IO", "GEN" }; -static const char * const f15h_ic_mce_desc[] = { +/* internal error type */ +static const char * const uu_msgs[] = { "RESV", "RESV", "HWA", "RESV" }; + +static const char * const f15h_mc1_mce_desc[] = { "UC during a demand linefill from L2", "Parity error during data load from IC", "Parity error for IC valid bit", @@ -88,14 +74,14 @@ static const char * const f15h_ic_mce_desc[] = { "Parity error for IC probe tag valid bit", "PFB non-cacheable bit parity error", "PFB valid bit parity error", /* xec = 0xd */ - "patch RAM", /* xec = 010 */ + "Microcode Patch Buffer", /* xec = 010 */ "uop queue", "insn buffer", "predecode buffer", "fetch address FIFO" }; -static const char * const f15h_cu_mce_desc[] = { +static const char * const f15h_mc2_mce_desc[] = { "Fill ECC error on data fills", /* xec = 0x4 */ "Fill parity error on insn fills", "Prefetcher request FIFO parity error", @@ -104,7 +90,7 @@ static const char * const f15h_cu_mce_desc[] = { "WCC Tag ECC error", "WCC Data ECC error", "WCB Data parity error", - "VB Data/ECC error", + "VB Data ECC or parity error", "L2 Tag ECC error", /* xec = 0x10 */ "Hard L2 Tag ECC error", "Multiple hits on L2 tag", @@ -112,7 +98,29 @@ static const char * const f15h_cu_mce_desc[] = { "PRB address parity error" }; -static const char * const fr_ex_mce_desc[] = { +static const char * const mc4_mce_desc[] = { + "DRAM ECC error detected on the NB", + "CRC error detected on HT link", + "Link-defined sync error packets detected on HT link", + "HT Master abort", + "HT Target abort", + "Invalid GART PTE entry during GART table walk", + "Unsupported atomic RMW received from an IO link", + "Watchdog timeout due to lack of progress", + "DRAM ECC error detected on the NB", + "SVM DMA Exclusion Vector error", + "HT data error detected on link", + "Protocol error (link, L3, probe filter)", + "NB internal arrays parity error", + "DRAM addr/ctl signals parity error", + "IO link transmission error", + "L3 data cache ECC error", /* xec = 0x1c */ + "L3 cache tag error", + "L3 LRU parity bits error", + "ECC Error in the Probe Filter directory" +}; + +static const char * const mc5_mce_desc[] = { "CPU Watchdog timer expire", "Wakeup array dest tag", "AG payload array", @@ -125,10 +133,11 @@ static const char * const fr_ex_mce_desc[] = { "Physical register file AG0 port", "Physical register file AG1 port", "Flag register file", - "DE correctable error could not be corrected" + "DE error occurred", + "Retire status queue" }; -static bool f12h_dc_mce(u16 ec, u8 xec) +static bool f12h_mc0_mce(u16 ec, u8 xec) { bool ret = false; @@ -146,26 +155,26 @@ static bool f12h_dc_mce(u16 ec, u8 xec) return ret; } -static bool f10h_dc_mce(u16 ec, u8 xec) +static bool f10h_mc0_mce(u16 ec, u8 xec) { if (R4(ec) == R4_GEN && LL(ec) == LL_L1) { pr_cont("during data scrub.\n"); return true; } - return f12h_dc_mce(ec, xec); + return f12h_mc0_mce(ec, xec); } -static bool k8_dc_mce(u16 ec, u8 xec) +static bool k8_mc0_mce(u16 ec, u8 xec) { if (BUS_ERROR(ec)) { pr_cont("during system linefill.\n"); return true; } - return f10h_dc_mce(ec, xec); + return f10h_mc0_mce(ec, xec); } -static bool f14h_dc_mce(u16 ec, u8 xec) +static bool cat_mc0_mce(u16 ec, u8 xec) { u8 r4 = R4(ec); bool ret = true; @@ -217,7 +226,7 @@ static bool f14h_dc_mce(u16 ec, u8 xec) return ret; } -static bool f15h_dc_mce(u16 ec, u8 xec) +static bool f15h_mc0_mce(u16 ec, u8 xec) { bool ret = true; @@ -255,22 +264,21 @@ static bool f15h_dc_mce(u16 ec, u8 xec) } else if (BUS_ERROR(ec)) { if (!xec) - pr_cont("during system linefill.\n"); + pr_cont("System Read Data Error.\n"); else - pr_cont(" Internal %s condition.\n", - ((xec == 1) ? "livelock" : "deadlock")); + pr_cont(" Internal error condition type %d.\n", xec); } else ret = false; return ret; } -static void amd_decode_dc_mce(struct mce *m) +static void decode_mc0_mce(struct mce *m) { u16 ec = EC(m->status); u8 xec = XEC(m->status, xec_mask); - pr_emerg(HW_ERR "Data Cache Error: "); + pr_emerg(HW_ERR "MC0 Error: "); /* TLB error signatures are the same across families */ if (TLB_ERROR(ec)) { @@ -280,13 +288,13 @@ static void amd_decode_dc_mce(struct mce *m) : (xec ? "multimatch" : "parity"))); return; } - } else if (fam_ops->dc_mce(ec, xec)) + } else if (fam_ops->mc0_mce(ec, xec)) ; else - pr_emerg(HW_ERR "Corrupted DC MCE info?\n"); + pr_emerg(HW_ERR "Corrupted MC0 MCE info?\n"); } -static bool k8_ic_mce(u16 ec, u8 xec) +static bool k8_mc1_mce(u16 ec, u8 xec) { u8 ll = LL(ec); bool ret = true; @@ -320,26 +328,32 @@ static bool k8_ic_mce(u16 ec, u8 xec) return ret; } -static bool f14h_ic_mce(u16 ec, u8 xec) +static bool cat_mc1_mce(u16 ec, u8 xec) { u8 r4 = R4(ec); bool ret = true; - if (MEM_ERROR(ec)) { - if (TT(ec) != 0 || LL(ec) != 1) - ret = false; + if (!MEM_ERROR(ec)) + return false; + + if (TT(ec) != TT_INSTR) + return false; + + if (r4 == R4_IRD) + pr_cont("Data/tag array parity error for a tag hit.\n"); + else if (r4 == R4_SNOOP) + pr_cont("Tag error during snoop/victimization.\n"); + else if (xec == 0x0) + pr_cont("Tag parity error from victim castout.\n"); + else if (xec == 0x2) + pr_cont("Microcode patch RAM parity error.\n"); + else + ret = false; - if (r4 == R4_IRD) - pr_cont("Data/tag array parity error for a tag hit.\n"); - else if (r4 == R4_SNOOP) - pr_cont("Tag error during snoop/victimization.\n"); - else - ret = false; - } return ret; } -static bool f15h_ic_mce(u16 ec, u8 xec) +static bool f15h_mc1_mce(u16 ec, u8 xec) { bool ret = true; @@ -348,15 +362,19 @@ static bool f15h_ic_mce(u16 ec, u8 xec) switch (xec) { case 0x0 ... 0xa: - pr_cont("%s.\n", f15h_ic_mce_desc[xec]); + pr_cont("%s.\n", f15h_mc1_mce_desc[xec]); break; case 0xd: - pr_cont("%s.\n", f15h_ic_mce_desc[xec-2]); + pr_cont("%s.\n", f15h_mc1_mce_desc[xec-2]); + break; + + case 0x10: + pr_cont("%s.\n", f15h_mc1_mce_desc[xec-4]); break; - case 0x10 ... 0x14: - pr_cont("Decoder %s parity error.\n", f15h_ic_mce_desc[xec-4]); + case 0x11 ... 0x14: + pr_cont("Decoder %s parity error.\n", f15h_mc1_mce_desc[xec-4]); break; default: @@ -365,12 +383,12 @@ static bool f15h_ic_mce(u16 ec, u8 xec) return ret; } -static void amd_decode_ic_mce(struct mce *m) +static void decode_mc1_mce(struct mce *m) { u16 ec = EC(m->status); u8 xec = XEC(m->status, xec_mask); - pr_emerg(HW_ERR "Instruction Cache Error: "); + pr_emerg(HW_ERR "MC1 Error: "); if (TLB_ERROR(ec)) pr_cont("%s TLB %s.\n", LL_MSG(ec), @@ -379,18 +397,15 @@ static void amd_decode_ic_mce(struct mce *m) bool k8 = (boot_cpu_data.x86 == 0xf && (m->status & BIT_64(58))); pr_cont("during %s.\n", (k8 ? "system linefill" : "NB data read")); - } else if (fam_ops->ic_mce(ec, xec)) + } else if (fam_ops->mc1_mce(ec, xec)) ; else - pr_emerg(HW_ERR "Corrupted IC MCE info?\n"); + pr_emerg(HW_ERR "Corrupted MC1 MCE info?\n"); } -static void amd_decode_bu_mce(struct mce *m) +static bool k8_mc2_mce(u16 ec, u8 xec) { - u16 ec = EC(m->status); - u8 xec = XEC(m->status, xec_mask); - - pr_emerg(HW_ERR "Bus Unit Error"); + bool ret = true; if (xec == 0x1) pr_cont(" in the write data buffers.\n"); @@ -415,24 +430,18 @@ static void amd_decode_bu_mce(struct mce *m) pr_cont(": %s parity/ECC error during data " "access from L2.\n", R4_MSG(ec)); else - goto wrong_bu_mce; + ret = false; } else - goto wrong_bu_mce; + ret = false; } else - goto wrong_bu_mce; - - return; + ret = false; -wrong_bu_mce: - pr_emerg(HW_ERR "Corrupted BU MCE info?\n"); + return ret; } -static void amd_decode_cu_mce(struct mce *m) +static bool f15h_mc2_mce(u16 ec, u8 xec) { - u16 ec = EC(m->status); - u8 xec = XEC(m->status, xec_mask); - - pr_emerg(HW_ERR "Combined Unit Error: "); + bool ret = true; if (TLB_ERROR(ec)) { if (xec == 0x0) @@ -440,238 +449,197 @@ static void amd_decode_cu_mce(struct mce *m) else if (xec == 0x1) pr_cont("Poison data provided for TLB fill.\n"); else - goto wrong_cu_mce; + ret = false; } else if (BUS_ERROR(ec)) { if (xec > 2) - goto wrong_cu_mce; + ret = false; pr_cont("Error during attempted NB data read.\n"); } else if (MEM_ERROR(ec)) { switch (xec) { case 0x4 ... 0xc: - pr_cont("%s.\n", f15h_cu_mce_desc[xec - 0x4]); + pr_cont("%s.\n", f15h_mc2_mce_desc[xec - 0x4]); break; case 0x10 ... 0x14: - pr_cont("%s.\n", f15h_cu_mce_desc[xec - 0x7]); + pr_cont("%s.\n", f15h_mc2_mce_desc[xec - 0x7]); break; default: - goto wrong_cu_mce; + ret = false; } } - return; - -wrong_cu_mce: - pr_emerg(HW_ERR "Corrupted CU MCE info?\n"); + return ret; } -static void amd_decode_ls_mce(struct mce *m) +static bool f16h_mc2_mce(u16 ec, u8 xec) { - u16 ec = EC(m->status); - u8 xec = XEC(m->status, xec_mask); - - if (boot_cpu_data.x86 >= 0x14) { - pr_emerg("You shouldn't be seeing an LS MCE on this cpu family," - " please report on LKML.\n"); - return; - } - - pr_emerg(HW_ERR "Load Store Error"); - - if (xec == 0x0) { - u8 r4 = R4(ec); + u8 r4 = R4(ec); - if (!BUS_ERROR(ec) || (r4 != R4_DRD && r4 != R4_DWR)) - goto wrong_ls_mce; - - pr_cont(" during %s.\n", R4_MSG(ec)); - } else - goto wrong_ls_mce; - - return; - -wrong_ls_mce: - pr_emerg(HW_ERR "Corrupted LS MCE info?\n"); -} - -static bool k8_nb_mce(u16 ec, u8 xec) -{ - bool ret = true; + if (!MEM_ERROR(ec)) + return false; switch (xec) { - case 0x1: - pr_cont("CRC error detected on HT link.\n"); + case 0x04 ... 0x05: + pr_cont("%cBUFF parity error.\n", (r4 == R4_RD) ? 'I' : 'O'); break; - case 0x5: - pr_cont("Invalid GART PTE entry during GART table walk.\n"); + case 0x09 ... 0x0b: + case 0x0d ... 0x0f: + pr_cont("ECC error in L2 tag (%s).\n", + ((r4 == R4_GEN) ? "BankReq" : + ((r4 == R4_SNOOP) ? "Prb" : "Fill"))); break; - case 0x6: - pr_cont("Unsupported atomic RMW received from an IO link.\n"); + case 0x10 ... 0x19: + case 0x1b: + pr_cont("ECC error in L2 data array (%s).\n", + (((r4 == R4_RD) && !(xec & 0x3)) ? "Hit" : + ((r4 == R4_GEN) ? "Attr" : + ((r4 == R4_EVICT) ? "Vict" : "Fill")))); break; - case 0x0: - case 0x8: - if (boot_cpu_data.x86 == 0x11) - return false; - - pr_cont("DRAM ECC error detected on the NB.\n"); - break; - - case 0xd: - pr_cont("Parity error on the DRAM addr/ctl signals.\n"); + case 0x1c ... 0x1d: + case 0x1f: + pr_cont("Parity error in L2 attribute bits (%s).\n", + ((r4 == R4_RD) ? "Hit" : + ((r4 == R4_GEN) ? "Attr" : "Fill"))); break; default: - ret = false; - break; + return false; } - return ret; + return true; } -static bool f10h_nb_mce(u16 ec, u8 xec) +static void decode_mc2_mce(struct mce *m) { - bool ret = true; - u8 offset = 0; - - if (k8_nb_mce(ec, xec)) - return true; - - switch(xec) { - case 0xa ... 0xc: - offset = 10; - break; - - case 0xe: - offset = 11; - break; + u16 ec = EC(m->status); + u8 xec = XEC(m->status, xec_mask); - case 0xf: - if (TLB_ERROR(ec)) - pr_cont("GART Table Walk data error.\n"); - else if (BUS_ERROR(ec)) - pr_cont("DMA Exclusion Vector Table Walk error.\n"); - else - ret = false; + pr_emerg(HW_ERR "MC2 Error: "); - goto out; - break; + if (!fam_ops->mc2_mce(ec, xec)) + pr_cont(HW_ERR "Corrupted MC2 MCE info?\n"); +} - case 0x19: - if (boot_cpu_data.x86 == 0x15) - pr_cont("Compute Unit Data Error.\n"); - else - ret = false; +static void decode_mc3_mce(struct mce *m) +{ + u16 ec = EC(m->status); + u8 xec = XEC(m->status, xec_mask); - goto out; - break; + if (boot_cpu_data.x86 >= 0x14) { + pr_emerg("You shouldn't be seeing MC3 MCE on this cpu family," + " please report on LKML.\n"); + return; + } - case 0x1c ... 0x1f: - offset = 24; - break; + pr_emerg(HW_ERR "MC3 Error"); - default: - ret = false; + if (xec == 0x0) { + u8 r4 = R4(ec); - goto out; - break; - } + if (!BUS_ERROR(ec) || (r4 != R4_DRD && r4 != R4_DWR)) + goto wrong_mc3_mce; - pr_cont("%s.\n", f10h_nb_mce_desc[xec - offset]); + pr_cont(" during %s.\n", R4_MSG(ec)); + } else + goto wrong_mc3_mce; -out: - return ret; -} + return; -static bool nb_noop_mce(u16 ec, u8 xec) -{ - return false; + wrong_mc3_mce: + pr_emerg(HW_ERR "Corrupted MC3 MCE info?\n"); } -void amd_decode_nb_mce(struct mce *m) +static void decode_mc4_mce(struct mce *m) { struct cpuinfo_x86 *c = &boot_cpu_data; int node_id = amd_get_nb_id(m->extcpu); u16 ec = EC(m->status); u8 xec = XEC(m->status, 0x1f); + u8 offset = 0; - pr_emerg(HW_ERR "Northbridge Error (node %d): ", node_id); + pr_emerg(HW_ERR "MC4 Error (node %d): ", node_id); switch (xec) { - case 0x2: - pr_cont("Sync error (sync packets on HT link detected).\n"); - return; + case 0x0 ... 0xe: - case 0x3: - pr_cont("HT Master abort.\n"); - return; + /* special handling for DRAM ECCs */ + if (xec == 0x0 || xec == 0x8) { + /* no ECCs on F11h */ + if (c->x86 == 0x11) + goto wrong_mc4_mce; - case 0x4: - pr_cont("HT Target abort.\n"); - return; + pr_cont("%s.\n", mc4_mce_desc[xec]); - case 0x7: - pr_cont("NB Watchdog timeout.\n"); + if (nb_bus_decoder) + nb_bus_decoder(node_id, m); + return; + } + break; + + case 0xf: + if (TLB_ERROR(ec)) + pr_cont("GART Table Walk data error.\n"); + else if (BUS_ERROR(ec)) + pr_cont("DMA Exclusion Vector Table Walk error.\n"); + else + goto wrong_mc4_mce; return; - case 0x9: - pr_cont("SVM DMA Exclusion Vector error.\n"); + case 0x19: + if (boot_cpu_data.x86 == 0x15 || boot_cpu_data.x86 == 0x16) + pr_cont("Compute Unit Data Error.\n"); + else + goto wrong_mc4_mce; return; - default: + case 0x1c ... 0x1f: + offset = 13; break; - } - if (!fam_ops->nb_mce(ec, xec)) - goto wrong_nb_mce; - - if (c->x86 == 0xf || c->x86 == 0x10 || c->x86 == 0x15) - if ((xec == 0x8 || xec == 0x0) && nb_bus_decoder) - nb_bus_decoder(node_id, m); + default: + goto wrong_mc4_mce; + } + pr_cont("%s.\n", mc4_mce_desc[xec - offset]); return; -wrong_nb_mce: - pr_emerg(HW_ERR "Corrupted NB MCE info?\n"); + wrong_mc4_mce: + pr_emerg(HW_ERR "Corrupted MC4 MCE info?\n"); } -EXPORT_SYMBOL_GPL(amd_decode_nb_mce); -static void amd_decode_fr_mce(struct mce *m) +static void decode_mc5_mce(struct mce *m) { struct cpuinfo_x86 *c = &boot_cpu_data; u8 xec = XEC(m->status, xec_mask); if (c->x86 == 0xf || c->x86 == 0x11) - goto wrong_fr_mce; + goto wrong_mc5_mce; - if (c->x86 != 0x15 && xec != 0x0) - goto wrong_fr_mce; - - pr_emerg(HW_ERR "%s Error: ", - (c->x86 == 0x15 ? "Execution Unit" : "FIROB")); + pr_emerg(HW_ERR "MC5 Error: "); if (xec == 0x0 || xec == 0xc) - pr_cont("%s.\n", fr_ex_mce_desc[xec]); - else if (xec < 0xd) - pr_cont("%s parity error.\n", fr_ex_mce_desc[xec]); + pr_cont("%s.\n", mc5_mce_desc[xec]); + else if (xec <= 0xd) + pr_cont("%s parity error.\n", mc5_mce_desc[xec]); else - goto wrong_fr_mce; + goto wrong_mc5_mce; return; -wrong_fr_mce: - pr_emerg(HW_ERR "Corrupted FR MCE info?\n"); + wrong_mc5_mce: + pr_emerg(HW_ERR "Corrupted MC5 MCE info?\n"); } -static void amd_decode_fp_mce(struct mce *m) +static void decode_mc6_mce(struct mce *m) { u8 xec = XEC(m->status, xec_mask); - pr_emerg(HW_ERR "Floating Point Unit Error: "); + pr_emerg(HW_ERR "MC6 Error: "); switch (xec) { case 0x1: @@ -695,7 +663,7 @@ static void amd_decode_fp_mce(struct mce *m) break; default: - goto wrong_fp_mce; + goto wrong_mc6_mce; break; } @@ -703,12 +671,16 @@ static void amd_decode_fp_mce(struct mce *m) return; -wrong_fp_mce: - pr_emerg(HW_ERR "Corrupted FP MCE info?\n"); + wrong_mc6_mce: + pr_emerg(HW_ERR "Corrupted MC6 MCE info?\n"); } static inline void amd_decode_err_code(u16 ec) { + if (INT_ERROR(ec)) { + pr_emerg(HW_ERR "internal: %s\n", UU_MSG(ec)); + return; + } pr_emerg(HW_ERR "cache level: %s", LL_MSG(ec)); @@ -743,27 +715,47 @@ static bool amd_filter_mce(struct mce *m) return false; } +static const char *decode_error_status(struct mce *m) +{ + if (m->status & MCI_STATUS_UC) { + if (m->status & MCI_STATUS_PCC) + return "System Fatal error."; + if (m->mcgstatus & MCG_STATUS_RIPV) + return "Uncorrected, software restartable error."; + return "Uncorrected, software containable error."; + } + + if (m->status & MCI_STATUS_DEFERRED) + return "Deferred error."; + + return "Corrected error, no action required."; +} + int amd_decode_mce(struct notifier_block *nb, unsigned long val, void *data) { struct mce *m = (struct mce *)data; - struct cpuinfo_x86 *c = &boot_cpu_data; + struct cpuinfo_x86 *c = &cpu_data(m->extcpu); int ecc; if (amd_filter_mce(m)) return NOTIFY_STOP; - pr_emerg(HW_ERR "CPU:%d\tMC%d_STATUS[%s|%s|%s|%s|%s", - m->extcpu, m->bank, + pr_emerg(HW_ERR "%s\n", decode_error_status(m)); + + pr_emerg(HW_ERR "CPU:%d (%x:%x:%x) MC%d_STATUS[%s|%s|%s|%s|%s", + m->extcpu, + c->x86, c->x86_model, c->x86_mask, + m->bank, ((m->status & MCI_STATUS_OVER) ? "Over" : "-"), ((m->status & MCI_STATUS_UC) ? "UE" : "CE"), ((m->status & MCI_STATUS_MISCV) ? "MiscV" : "-"), ((m->status & MCI_STATUS_PCC) ? "PCC" : "-"), ((m->status & MCI_STATUS_ADDRV) ? "AddrV" : "-")); - if (c->x86 == 0x15) + if (c->x86 == 0x15 || c->x86 == 0x16) pr_cont("|%s|%s", - ((m->status & BIT_64(44)) ? "Deferred" : "-"), - ((m->status & BIT_64(43)) ? "Poison" : "-")); + ((m->status & MCI_STATUS_DEFERRED) ? "Deferred" : "-"), + ((m->status & MCI_STATUS_POISON) ? "Poison" : "-")); /* do the two bits[14:13] together */ ecc = (m->status >> 45) & 0x3; @@ -773,44 +765,45 @@ int amd_decode_mce(struct notifier_block *nb, unsigned long val, void *data) pr_cont("]: 0x%016llx\n", m->status); if (m->status & MCI_STATUS_ADDRV) - pr_emerg(HW_ERR "\tMC%d_ADDR: 0x%016llx\n", m->bank, m->addr); + pr_emerg(HW_ERR "MC%d_ADDR: 0x%016llx\n", m->bank, m->addr); + + if (!fam_ops) + goto err_code; switch (m->bank) { case 0: - amd_decode_dc_mce(m); + decode_mc0_mce(m); break; case 1: - amd_decode_ic_mce(m); + decode_mc1_mce(m); break; case 2: - if (c->x86 == 0x15) - amd_decode_cu_mce(m); - else - amd_decode_bu_mce(m); + decode_mc2_mce(m); break; case 3: - amd_decode_ls_mce(m); + decode_mc3_mce(m); break; case 4: - amd_decode_nb_mce(m); + decode_mc4_mce(m); break; case 5: - amd_decode_fr_mce(m); + decode_mc5_mce(m); break; case 6: - amd_decode_fp_mce(m); + decode_mc6_mce(m); break; default: break; } + err_code: amd_decode_err_code(m->status & 0xffff); return NOTIFY_STOP; @@ -826,12 +819,7 @@ static int __init mce_amd_init(void) struct cpuinfo_x86 *c = &boot_cpu_data; if (c->x86_vendor != X86_VENDOR_AMD) - return 0; - - if ((c->x86 < 0xf || c->x86 > 0x12) && - (c->x86 != 0x14 || c->x86_model > 0xf) && - (c->x86 != 0x15 || c->x86_model > 0xf)) - return 0; + return -ENODEV; fam_ops = kzalloc(sizeof(struct amd_decoder_ops), GFP_KERNEL); if (!fam_ops) @@ -839,47 +827,53 @@ static int __init mce_amd_init(void) switch (c->x86) { case 0xf: - fam_ops->dc_mce = k8_dc_mce; - fam_ops->ic_mce = k8_ic_mce; - fam_ops->nb_mce = k8_nb_mce; + fam_ops->mc0_mce = k8_mc0_mce; + fam_ops->mc1_mce = k8_mc1_mce; + fam_ops->mc2_mce = k8_mc2_mce; break; case 0x10: - fam_ops->dc_mce = f10h_dc_mce; - fam_ops->ic_mce = k8_ic_mce; - fam_ops->nb_mce = f10h_nb_mce; + fam_ops->mc0_mce = f10h_mc0_mce; + fam_ops->mc1_mce = k8_mc1_mce; + fam_ops->mc2_mce = k8_mc2_mce; break; case 0x11: - fam_ops->dc_mce = k8_dc_mce; - fam_ops->ic_mce = k8_ic_mce; - fam_ops->nb_mce = f10h_nb_mce; + fam_ops->mc0_mce = k8_mc0_mce; + fam_ops->mc1_mce = k8_mc1_mce; + fam_ops->mc2_mce = k8_mc2_mce; break; case 0x12: - fam_ops->dc_mce = f12h_dc_mce; - fam_ops->ic_mce = k8_ic_mce; - fam_ops->nb_mce = nb_noop_mce; + fam_ops->mc0_mce = f12h_mc0_mce; + fam_ops->mc1_mce = k8_mc1_mce; + fam_ops->mc2_mce = k8_mc2_mce; break; case 0x14: - nb_err_cpumask = 0x3; - fam_ops->dc_mce = f14h_dc_mce; - fam_ops->ic_mce = f14h_ic_mce; - fam_ops->nb_mce = nb_noop_mce; + fam_ops->mc0_mce = cat_mc0_mce; + fam_ops->mc1_mce = cat_mc1_mce; + fam_ops->mc2_mce = k8_mc2_mce; break; case 0x15: xec_mask = 0x1f; - fam_ops->dc_mce = f15h_dc_mce; - fam_ops->ic_mce = f15h_ic_mce; - fam_ops->nb_mce = f10h_nb_mce; + fam_ops->mc0_mce = f15h_mc0_mce; + fam_ops->mc1_mce = f15h_mc1_mce; + fam_ops->mc2_mce = f15h_mc2_mce; + break; + + case 0x16: + xec_mask = 0x1f; + fam_ops->mc0_mce = cat_mc0_mce; + fam_ops->mc1_mce = cat_mc1_mce; + fam_ops->mc2_mce = f16h_mc2_mce; break; default: - printk(KERN_WARNING "Huh? What family is that: %d?!\n", c->x86); + printk(KERN_WARNING "Huh? What family is it: 0x%x?!\n", c->x86); kfree(fam_ops); - return -EINVAL; + fam_ops = NULL; } pr_info("MCE: In-kernel MCE decoding enabled.\n"); diff --git a/drivers/edac/mce_amd.h b/drivers/edac/mce_amd.h index 0106747e240..51b7e3a36e3 100644 --- a/drivers/edac/mce_amd.h +++ b/drivers/edac/mce_amd.h @@ -5,8 +5,6 @@ #include <asm/mce.h> -#define BIT_64(n) (U64_C(1) << (n)) - #define EC(x) ((x) & 0xffff) #define XEC(x, mask) (((x) >> 16) & mask) @@ -16,6 +14,7 @@ #define TLB_ERROR(x) (((x) & 0xFFF0) == 0x0010) #define MEM_ERROR(x) (((x) & 0xFF00) == 0x0100) #define BUS_ERROR(x) (((x) & 0xF800) == 0x0800) +#define INT_ERROR(x) (((x) & 0xF4FF) == 0x0400) #define TT(x) (((x) >> 2) & 0x3) #define TT_MSG(x) tt_msgs[TT(x)] @@ -27,14 +26,16 @@ #define TO_MSG(x) to_msgs[TO(x)] #define PP(x) (((x) >> 9) & 0x3) #define PP_MSG(x) pp_msgs[PP(x)] +#define UU(x) (((x) >> 8) & 0x3) +#define UU_MSG(x) uu_msgs[UU(x)] #define R4(x) (((x) >> 4) & 0xf) #define R4_MSG(x) ((R4(x) < 9) ? rrrr_msgs[R4(x)] : "Wrong R4!") -/* - * F3x4C bits (MCi_STATUS' high half) - */ -#define NBSH_ERR_CPU_VAL BIT(24) +#define MCI_STATUS_DEFERRED BIT_64(44) +#define MCI_STATUS_POISON BIT_64(43) + +extern const char * const pp_msgs[]; enum tt_ids { TT_INSTR = 0, @@ -69,26 +70,18 @@ enum rrrr_ids { R4_SNOOP, }; -extern const char *tt_msgs[]; -extern const char *ll_msgs[]; -extern const char *rrrr_msgs[]; -extern const char *pp_msgs[]; -extern const char *to_msgs[]; -extern const char *ii_msgs[]; - /* * per-family decoder ops */ struct amd_decoder_ops { - bool (*dc_mce)(u16, u8); - bool (*ic_mce)(u16, u8); - bool (*nb_mce)(u16, u8); + bool (*mc0_mce)(u16, u8); + bool (*mc1_mce)(u16, u8); + bool (*mc2_mce)(u16, u8); }; void amd_report_gart_errors(bool); void amd_register_ecc_decoder(void (*f)(int, struct mce *)); void amd_unregister_ecc_decoder(void (*f)(int, struct mce *)); -void amd_decode_nb_mce(struct mce *); int amd_decode_mce(struct notifier_block *nb, unsigned long val, void *data); #endif /* _EDAC_MCE_AMD_H */ diff --git a/drivers/edac/mce_amd_inj.c b/drivers/edac/mce_amd_inj.c index 885e8ad8fdc..5e46a9fea31 100644 --- a/drivers/edac/mce_amd_inj.c +++ b/drivers/edac/mce_amd_inj.c @@ -6,11 +6,12 @@ * This file may be distributed under the terms of the GNU General Public * License version 2. * - * Copyright (c) 2010: Borislav Petkov <borislav.petkov@amd.com> + * Copyright (c) 2010: Borislav Petkov <bp@alien8.de> * Advanced Micro Devices Inc. */ #include <linux/kobject.h> +#include <linux/device.h> #include <linux/edac.h> #include <linux/module.h> #include <asm/mce.h> @@ -42,7 +43,7 @@ static ssize_t edac_inject_##reg##_store(struct kobject *kobj, \ int ret = 0; \ unsigned long value; \ \ - ret = strict_strtoul(data, 16, &value); \ + ret = kstrtoul(data, 16, &value); \ if (ret < 0) \ printk(KERN_ERR "Error writing MCE " #reg " field.\n"); \ \ @@ -82,7 +83,7 @@ static ssize_t edac_inject_bank_store(struct kobject *kobj, int ret = 0; unsigned long value; - ret = strict_strtoul(data, 10, &value); + ret = kstrtoul(data, 10, &value); if (ret < 0) { printk(KERN_ERR "Invalid bank value!\n"); return -EINVAL; @@ -167,6 +168,6 @@ module_init(edac_init_mce_inject); module_exit(edac_exit_mce_inject); MODULE_LICENSE("GPL"); -MODULE_AUTHOR("Borislav Petkov <borislav.petkov@amd.com>"); +MODULE_AUTHOR("Borislav Petkov <bp@alien8.de>"); MODULE_AUTHOR("AMD Inc."); MODULE_DESCRIPTION("MCE injection facility for testing MCE decoding"); diff --git a/drivers/edac/mpc85xx_edac.c b/drivers/edac/mpc85xx_edac.c index 73464a62adf..f4aec2e6ef5 100644 --- a/drivers/edac/mpc85xx_edac.c +++ b/drivers/edac/mpc85xx_edac.c @@ -1,6 +1,8 @@ /* * Freescale MPC85xx Memory Controller kenel module * + * Parts Copyrighted (c) 2013 by Freescale Semiconductor, Inc. + * * Author: Dave Jiang <djiang@mvista.com> * * 2006-2007 (c) MontaVista Software, Inc. This file is licensed under @@ -49,34 +51,45 @@ static u32 orig_hid1[2]; /************************ MC SYSFS parts ***********************************/ -static ssize_t mpc85xx_mc_inject_data_hi_show(struct mem_ctl_info *mci, +#define to_mci(k) container_of(k, struct mem_ctl_info, dev) + +static ssize_t mpc85xx_mc_inject_data_hi_show(struct device *dev, + struct device_attribute *mattr, char *data) { + struct mem_ctl_info *mci = to_mci(dev); struct mpc85xx_mc_pdata *pdata = mci->pvt_info; return sprintf(data, "0x%08x", in_be32(pdata->mc_vbase + MPC85XX_MC_DATA_ERR_INJECT_HI)); } -static ssize_t mpc85xx_mc_inject_data_lo_show(struct mem_ctl_info *mci, +static ssize_t mpc85xx_mc_inject_data_lo_show(struct device *dev, + struct device_attribute *mattr, char *data) { + struct mem_ctl_info *mci = to_mci(dev); struct mpc85xx_mc_pdata *pdata = mci->pvt_info; return sprintf(data, "0x%08x", in_be32(pdata->mc_vbase + MPC85XX_MC_DATA_ERR_INJECT_LO)); } -static ssize_t mpc85xx_mc_inject_ctrl_show(struct mem_ctl_info *mci, char *data) +static ssize_t mpc85xx_mc_inject_ctrl_show(struct device *dev, + struct device_attribute *mattr, + char *data) { + struct mem_ctl_info *mci = to_mci(dev); struct mpc85xx_mc_pdata *pdata = mci->pvt_info; return sprintf(data, "0x%08x", in_be32(pdata->mc_vbase + MPC85XX_MC_ECC_ERR_INJECT)); } -static ssize_t mpc85xx_mc_inject_data_hi_store(struct mem_ctl_info *mci, +static ssize_t mpc85xx_mc_inject_data_hi_store(struct device *dev, + struct device_attribute *mattr, const char *data, size_t count) { + struct mem_ctl_info *mci = to_mci(dev); struct mpc85xx_mc_pdata *pdata = mci->pvt_info; if (isdigit(*data)) { out_be32(pdata->mc_vbase + MPC85XX_MC_DATA_ERR_INJECT_HI, @@ -86,9 +99,11 @@ static ssize_t mpc85xx_mc_inject_data_hi_store(struct mem_ctl_info *mci, return 0; } -static ssize_t mpc85xx_mc_inject_data_lo_store(struct mem_ctl_info *mci, +static ssize_t mpc85xx_mc_inject_data_lo_store(struct device *dev, + struct device_attribute *mattr, const char *data, size_t count) { + struct mem_ctl_info *mci = to_mci(dev); struct mpc85xx_mc_pdata *pdata = mci->pvt_info; if (isdigit(*data)) { out_be32(pdata->mc_vbase + MPC85XX_MC_DATA_ERR_INJECT_LO, @@ -98,9 +113,11 @@ static ssize_t mpc85xx_mc_inject_data_lo_store(struct mem_ctl_info *mci, return 0; } -static ssize_t mpc85xx_mc_inject_ctrl_store(struct mem_ctl_info *mci, - const char *data, size_t count) +static ssize_t mpc85xx_mc_inject_ctrl_store(struct device *dev, + struct device_attribute *mattr, + const char *data, size_t count) { + struct mem_ctl_info *mci = to_mci(dev); struct mpc85xx_mc_pdata *pdata = mci->pvt_info; if (isdigit(*data)) { out_be32(pdata->mc_vbase + MPC85XX_MC_ECC_ERR_INJECT, @@ -110,38 +127,35 @@ static ssize_t mpc85xx_mc_inject_ctrl_store(struct mem_ctl_info *mci, return 0; } -static struct mcidev_sysfs_attribute mpc85xx_mc_sysfs_attributes[] = { - { - .attr = { - .name = "inject_data_hi", - .mode = (S_IRUGO | S_IWUSR) - }, - .show = mpc85xx_mc_inject_data_hi_show, - .store = mpc85xx_mc_inject_data_hi_store}, - { - .attr = { - .name = "inject_data_lo", - .mode = (S_IRUGO | S_IWUSR) - }, - .show = mpc85xx_mc_inject_data_lo_show, - .store = mpc85xx_mc_inject_data_lo_store}, - { - .attr = { - .name = "inject_ctrl", - .mode = (S_IRUGO | S_IWUSR) - }, - .show = mpc85xx_mc_inject_ctrl_show, - .store = mpc85xx_mc_inject_ctrl_store}, +DEVICE_ATTR(inject_data_hi, S_IRUGO | S_IWUSR, + mpc85xx_mc_inject_data_hi_show, mpc85xx_mc_inject_data_hi_store); +DEVICE_ATTR(inject_data_lo, S_IRUGO | S_IWUSR, + mpc85xx_mc_inject_data_lo_show, mpc85xx_mc_inject_data_lo_store); +DEVICE_ATTR(inject_ctrl, S_IRUGO | S_IWUSR, + mpc85xx_mc_inject_ctrl_show, mpc85xx_mc_inject_ctrl_store); - /* End of list */ - { - .attr = {.name = NULL} - } -}; +static int mpc85xx_create_sysfs_attributes(struct mem_ctl_info *mci) +{ + int rc; + + rc = device_create_file(&mci->dev, &dev_attr_inject_data_hi); + if (rc < 0) + return rc; + rc = device_create_file(&mci->dev, &dev_attr_inject_data_lo); + if (rc < 0) + return rc; + rc = device_create_file(&mci->dev, &dev_attr_inject_ctrl); + if (rc < 0) + return rc; -static void mpc85xx_set_mc_sysfs_attributes(struct mem_ctl_info *mci) + return 0; +} + +static void mpc85xx_remove_sysfs_attributes(struct mem_ctl_info *mci) { - mci->mc_driver_sysfs_attributes = mpc85xx_mc_sysfs_attributes; + device_remove_file(&mci->dev, &dev_attr_inject_data_hi); + device_remove_file(&mci->dev, &dev_attr_inject_data_lo); + device_remove_file(&mci->dev, &dev_attr_inject_ctrl); } /**************************** PCI Err device ***************************/ @@ -184,6 +198,42 @@ static void mpc85xx_pci_check(struct edac_pci_ctl_info *pci) edac_pci_handle_npe(pci, pci->ctl_name); } +static void mpc85xx_pcie_check(struct edac_pci_ctl_info *pci) +{ + struct mpc85xx_pci_pdata *pdata = pci->pvt_info; + u32 err_detect; + + err_detect = in_be32(pdata->pci_vbase + MPC85XX_PCI_ERR_DR); + + pr_err("PCIe error(s) detected\n"); + pr_err("PCIe ERR_DR register: 0x%08x\n", err_detect); + pr_err("PCIe ERR_CAP_STAT register: 0x%08x\n", + in_be32(pdata->pci_vbase + MPC85XX_PCI_GAS_TIMR)); + pr_err("PCIe ERR_CAP_R0 register: 0x%08x\n", + in_be32(pdata->pci_vbase + MPC85XX_PCIE_ERR_CAP_R0)); + pr_err("PCIe ERR_CAP_R1 register: 0x%08x\n", + in_be32(pdata->pci_vbase + MPC85XX_PCIE_ERR_CAP_R1)); + pr_err("PCIe ERR_CAP_R2 register: 0x%08x\n", + in_be32(pdata->pci_vbase + MPC85XX_PCIE_ERR_CAP_R2)); + pr_err("PCIe ERR_CAP_R3 register: 0x%08x\n", + in_be32(pdata->pci_vbase + MPC85XX_PCIE_ERR_CAP_R3)); + + /* clear error bits */ + out_be32(pdata->pci_vbase + MPC85XX_PCI_ERR_DR, err_detect); +} + +static int mpc85xx_pcie_find_capability(struct device_node *np) +{ + struct pci_controller *hose; + + if (!np) + return -EINVAL; + + hose = pci_find_hose_for_OF_device(np); + + return early_find_capability(hose, 0, 0, PCI_CAP_ID_EXP); +} + static irqreturn_t mpc85xx_pci_isr(int irq, void *dev_id) { struct edac_pci_ctl_info *pci = dev_id; @@ -195,12 +245,15 @@ static irqreturn_t mpc85xx_pci_isr(int irq, void *dev_id) if (!err_detect) return IRQ_NONE; - mpc85xx_pci_check(pci); + if (pdata->is_pcie) + mpc85xx_pcie_check(pci); + else + mpc85xx_pci_check(pci); return IRQ_HANDLED; } -static int __devinit mpc85xx_pci_err_probe(struct platform_device *op) +int mpc85xx_pci_err_probe(struct platform_device *op) { struct edac_pci_ctl_info *pci; struct mpc85xx_pci_pdata *pdata; @@ -214,17 +267,35 @@ static int __devinit mpc85xx_pci_err_probe(struct platform_device *op) if (!pci) return -ENOMEM; + /* make sure error reporting method is sane */ + switch (edac_op_state) { + case EDAC_OPSTATE_POLL: + case EDAC_OPSTATE_INT: + break; + default: + edac_op_state = EDAC_OPSTATE_INT; + break; + } + pdata = pci->pvt_info; pdata->name = "mpc85xx_pci_err"; pdata->irq = NO_IRQ; + + if (mpc85xx_pcie_find_capability(op->dev.of_node) > 0) + pdata->is_pcie = true; + dev_set_drvdata(&op->dev, pci); pci->dev = &op->dev; pci->mod_name = EDAC_MOD_STR; pci->ctl_name = pdata->name; pci->dev_name = dev_name(&op->dev); - if (edac_op_state == EDAC_OPSTATE_POLL) - pci->edac_check = mpc85xx_pci_check; + if (edac_op_state == EDAC_OPSTATE_POLL) { + if (pdata->is_pcie) + pci->edac_check = mpc85xx_pcie_check; + else + pci->edac_check = mpc85xx_pci_check; + } pdata->edac_idx = edac_pci_idx++; @@ -253,33 +324,44 @@ static int __devinit mpc85xx_pci_err_probe(struct platform_device *op) goto err; } - orig_pci_err_cap_dr = - in_be32(pdata->pci_vbase + MPC85XX_PCI_ERR_CAP_DR); + if (pdata->is_pcie) { + orig_pci_err_cap_dr = + in_be32(pdata->pci_vbase + MPC85XX_PCI_ERR_ADDR); + out_be32(pdata->pci_vbase + MPC85XX_PCI_ERR_ADDR, ~0); + orig_pci_err_en = + in_be32(pdata->pci_vbase + MPC85XX_PCI_ERR_EN); + out_be32(pdata->pci_vbase + MPC85XX_PCI_ERR_EN, 0); + } else { + orig_pci_err_cap_dr = + in_be32(pdata->pci_vbase + MPC85XX_PCI_ERR_CAP_DR); - /* PCI master abort is expected during config cycles */ - out_be32(pdata->pci_vbase + MPC85XX_PCI_ERR_CAP_DR, 0x40); + /* PCI master abort is expected during config cycles */ + out_be32(pdata->pci_vbase + MPC85XX_PCI_ERR_CAP_DR, 0x40); - orig_pci_err_en = in_be32(pdata->pci_vbase + MPC85XX_PCI_ERR_EN); + orig_pci_err_en = + in_be32(pdata->pci_vbase + MPC85XX_PCI_ERR_EN); - /* disable master abort reporting */ - out_be32(pdata->pci_vbase + MPC85XX_PCI_ERR_EN, ~0x40); + /* disable master abort reporting */ + out_be32(pdata->pci_vbase + MPC85XX_PCI_ERR_EN, ~0x40); + } /* clear error bits */ out_be32(pdata->pci_vbase + MPC85XX_PCI_ERR_DR, ~0); if (edac_pci_add_device(pci, pdata->edac_idx) > 0) { - debugf3("%s(): failed edac_pci_add_device()\n", __func__); + edac_dbg(3, "failed edac_pci_add_device()\n"); goto err; } if (edac_op_state == EDAC_OPSTATE_INT) { pdata->irq = irq_of_parse_and_map(op->dev.of_node, 0); res = devm_request_irq(&op->dev, pdata->irq, - mpc85xx_pci_isr, IRQF_DISABLED, + mpc85xx_pci_isr, + IRQF_SHARED, "[EDAC] PCI err", pci); if (res < 0) { printk(KERN_ERR - "%s: Unable to requiest irq %d for " + "%s: Unable to request irq %d for " "MPC85xx PCI err\n", __func__, pdata->irq); irq_dispose_mapping(pdata->irq); res = -ENODEV; @@ -290,8 +372,24 @@ static int __devinit mpc85xx_pci_err_probe(struct platform_device *op) pdata->irq); } + if (pdata->is_pcie) { + /* + * Enable all PCIe error interrupt & error detect except invalid + * PEX_CONFIG_ADDR/PEX_CONFIG_DATA access interrupt generation + * enable bit and invalid PEX_CONFIG_ADDR/PEX_CONFIG_DATA access + * detection enable bit. Because PCIe bus code to initialize and + * configure these PCIe devices on booting will use some invalid + * PEX_CONFIG_ADDR/PEX_CONFIG_DATA, edac driver prints the much + * notice information. So disable this detect to fix ugly print. + */ + out_be32(pdata->pci_vbase + MPC85XX_PCI_ERR_EN, ~0 + & ~PEX_ERR_ICCAIE_EN_BIT); + out_be32(pdata->pci_vbase + MPC85XX_PCI_ERR_ADDR, 0 + | PEX_ERR_ICCAD_DISR_BIT); + } + devres_remove_group(&op->dev, mpc85xx_pci_err_probe); - debugf3("%s(): success\n", __func__); + edac_dbg(3, "success\n"); printk(KERN_INFO EDAC_MOD_STR " PCI err registered\n"); return 0; @@ -303,49 +401,7 @@ err: devres_release_group(&op->dev, mpc85xx_pci_err_probe); return res; } - -static int mpc85xx_pci_err_remove(struct platform_device *op) -{ - struct edac_pci_ctl_info *pci = dev_get_drvdata(&op->dev); - struct mpc85xx_pci_pdata *pdata = pci->pvt_info; - - debugf0("%s()\n", __func__); - - out_be32(pdata->pci_vbase + MPC85XX_PCI_ERR_CAP_DR, - orig_pci_err_cap_dr); - - out_be32(pdata->pci_vbase + MPC85XX_PCI_ERR_EN, orig_pci_err_en); - - edac_pci_del_device(pci->dev); - - if (edac_op_state == EDAC_OPSTATE_INT) - irq_dispose_mapping(pdata->irq); - - edac_pci_free_ctl_info(pci); - - return 0; -} - -static struct of_device_id mpc85xx_pci_err_of_match[] = { - { - .compatible = "fsl,mpc8540-pcix", - }, - { - .compatible = "fsl,mpc8540-pci", - }, - {}, -}; -MODULE_DEVICE_TABLE(of, mpc85xx_pci_err_of_match); - -static struct platform_driver mpc85xx_pci_err_driver = { - .probe = mpc85xx_pci_err_probe, - .remove = __devexit_p(mpc85xx_pci_err_remove), - .driver = { - .name = "mpc85xx_pci_err", - .owner = THIS_MODULE, - .of_match_table = mpc85xx_pci_err_of_match, - }, -}; +EXPORT_SYMBOL(mpc85xx_pci_err_probe); #endif /* CONFIG_PCI */ @@ -502,7 +558,7 @@ static irqreturn_t mpc85xx_l2_isr(int irq, void *dev_id) return IRQ_HANDLED; } -static int __devinit mpc85xx_l2_err_probe(struct platform_device *op) +static int mpc85xx_l2_err_probe(struct platform_device *op) { struct edac_device_ctl_info *edac_dev; struct mpc85xx_l2_pdata *pdata; @@ -570,18 +626,18 @@ static int __devinit mpc85xx_l2_err_probe(struct platform_device *op) pdata->edac_idx = edac_dev_idx++; if (edac_device_add_device(edac_dev) > 0) { - debugf3("%s(): failed edac_device_add_device()\n", __func__); + edac_dbg(3, "failed edac_device_add_device()\n"); goto err; } if (edac_op_state == EDAC_OPSTATE_INT) { pdata->irq = irq_of_parse_and_map(op->dev.of_node, 0); res = devm_request_irq(&op->dev, pdata->irq, - mpc85xx_l2_isr, IRQF_DISABLED, + mpc85xx_l2_isr, 0, "[EDAC] L2 err", edac_dev); if (res < 0) { printk(KERN_ERR - "%s: Unable to requiest irq %d for " + "%s: Unable to request irq %d for " "MPC85xx L2 err\n", __func__, pdata->irq); irq_dispose_mapping(pdata->irq); res = -ENODEV; @@ -598,7 +654,7 @@ static int __devinit mpc85xx_l2_err_probe(struct platform_device *op) devres_remove_group(&op->dev, mpc85xx_l2_err_probe); - debugf3("%s(): success\n", __func__); + edac_dbg(3, "success\n"); printk(KERN_INFO EDAC_MOD_STR " L2 err registered\n"); return 0; @@ -616,7 +672,7 @@ static int mpc85xx_l2_err_remove(struct platform_device *op) struct edac_device_ctl_info *edac_dev = dev_get_drvdata(&op->dev); struct mpc85xx_l2_pdata *pdata = edac_dev->pvt_info; - debugf0("%s()\n", __func__); + edac_dbg(0, "\n"); if (edac_op_state == EDAC_OPSTATE_INT) { out_be32(pdata->l2_vbase + MPC85XX_L2_ERRINTEN, 0); @@ -813,7 +869,7 @@ static void mpc85xx_mc_check(struct mem_ctl_info *mci) pfn = err_addr >> PAGE_SHIFT; for (row_index = 0; row_index < mci->nr_csrows; row_index++) { - csrow = &mci->csrows[row_index]; + csrow = mci->csrows[row_index]; if ((pfn >= csrow->first_page) && (pfn <= csrow->last_page)) break; } @@ -854,12 +910,16 @@ static void mpc85xx_mc_check(struct mem_ctl_info *mci) mpc85xx_mc_printk(mci, KERN_ERR, "PFN out of range!\n"); if (err_detect & DDR_EDE_SBE) - edac_mc_handle_ce(mci, pfn, err_addr & ~PAGE_MASK, - syndrome, row_index, 0, mci->ctl_name); + edac_mc_handle_error(HW_EVENT_ERR_CORRECTED, mci, 1, + pfn, err_addr & ~PAGE_MASK, syndrome, + row_index, 0, -1, + mci->ctl_name, ""); if (err_detect & DDR_EDE_MBE) - edac_mc_handle_ue(mci, pfn, err_addr & ~PAGE_MASK, - row_index, mci->ctl_name); + edac_mc_handle_error(HW_EVENT_ERR_UNCORRECTED, mci, 1, + pfn, err_addr & ~PAGE_MASK, syndrome, + row_index, 0, -1, + mci->ctl_name, ""); out_be32(pdata->mc_vbase + MPC85XX_MC_ERR_DETECT, err_detect); } @@ -879,10 +939,11 @@ static irqreturn_t mpc85xx_mc_isr(int irq, void *dev_id) return IRQ_HANDLED; } -static void __devinit mpc85xx_init_csrows(struct mem_ctl_info *mci) +static void mpc85xx_init_csrows(struct mem_ctl_info *mci) { struct mpc85xx_mc_pdata *pdata = mci->pvt_info; struct csrow_info *csrow; + struct dimm_info *dimm; u32 sdram_ctl; u32 sdtype; enum mem_type mtype; @@ -928,7 +989,9 @@ static void __devinit mpc85xx_init_csrows(struct mem_ctl_info *mci) u32 start; u32 end; - csrow = &mci->csrows[index]; + csrow = mci->csrows[index]; + dimm = csrow->channels[0]->dimm; + cs_bnds = in_be32(pdata->mc_vbase + MPC85XX_MC_CS_BNDS_0 + (index * MPC85XX_MC_CS_BNDS_OFS)); @@ -944,19 +1007,21 @@ static void __devinit mpc85xx_init_csrows(struct mem_ctl_info *mci) csrow->first_page = start; csrow->last_page = end; - csrow->nr_pages = end + 1 - start; - csrow->grain = 8; - csrow->mtype = mtype; - csrow->dtype = DEV_UNKNOWN; + + dimm->nr_pages = end + 1 - start; + dimm->grain = 8; + dimm->mtype = mtype; + dimm->dtype = DEV_UNKNOWN; if (sdram_ctl & DSC_X32_EN) - csrow->dtype = DEV_X32; - csrow->edac_mode = EDAC_SECDED; + dimm->dtype = DEV_X32; + dimm->edac_mode = EDAC_SECDED; } } -static int __devinit mpc85xx_mc_err_probe(struct platform_device *op) +static int mpc85xx_mc_err_probe(struct platform_device *op) { struct mem_ctl_info *mci; + struct edac_mc_layer layers[2]; struct mpc85xx_mc_pdata *pdata; struct resource r; u32 sdram_ctl; @@ -965,7 +1030,14 @@ static int __devinit mpc85xx_mc_err_probe(struct platform_device *op) if (!devres_open_group(&op->dev, mpc85xx_mc_err_probe, GFP_KERNEL)) return -ENOMEM; - mci = edac_mc_alloc(sizeof(*pdata), 4, 1, edac_mc_idx); + layers[0].type = EDAC_MC_LAYER_CHIP_SELECT; + layers[0].size = 4; + layers[0].is_virt_csrow = true; + layers[1].type = EDAC_MC_LAYER_CHANNEL; + layers[1].size = 1; + layers[1].is_virt_csrow = false; + mci = edac_mc_alloc(edac_mc_idx, ARRAY_SIZE(layers), layers, + sizeof(*pdata)); if (!mci) { devres_release_group(&op->dev, mpc85xx_mc_err_probe); return -ENOMEM; @@ -974,9 +1046,9 @@ static int __devinit mpc85xx_mc_err_probe(struct platform_device *op) pdata = mci->pvt_info; pdata->name = "mpc85xx_mc_err"; pdata->irq = NO_IRQ; - mci->dev = &op->dev; + mci->pdev = &op->dev; pdata->edac_idx = edac_mc_idx++; - dev_set_drvdata(mci->dev, mci); + dev_set_drvdata(mci->pdev, mci); mci->ctl_name = pdata->name; mci->dev_name = pdata->name; @@ -1010,7 +1082,7 @@ static int __devinit mpc85xx_mc_err_probe(struct platform_device *op) goto err; } - debugf3("%s(): init mci\n", __func__); + edac_dbg(3, "init mci\n"); mci->mtype_cap = MEM_FLAG_RDDR | MEM_FLAG_RDDR2 | MEM_FLAG_DDR | MEM_FLAG_DDR2; mci->edac_ctl_cap = EDAC_FLAG_NONE | EDAC_FLAG_SECDED; @@ -1025,8 +1097,6 @@ static int __devinit mpc85xx_mc_err_probe(struct platform_device *op) mci->scrub_mode = SCRUB_SW_SRC; - mpc85xx_set_mc_sysfs_attributes(mci); - mpc85xx_init_csrows(mci); /* store the original error disable bits */ @@ -1038,7 +1108,13 @@ static int __devinit mpc85xx_mc_err_probe(struct platform_device *op) out_be32(pdata->mc_vbase + MPC85XX_MC_ERR_DETECT, ~0); if (edac_mc_add_mc(mci)) { - debugf3("%s(): failed edac_mc_add_mc()\n", __func__); + edac_dbg(3, "failed edac_mc_add_mc()\n"); + goto err; + } + + if (mpc85xx_create_sysfs_attributes(mci)) { + edac_mc_del_mc(mci->pdev); + edac_dbg(3, "failed edac_mc_add_mc()\n"); goto err; } @@ -1057,7 +1133,7 @@ static int __devinit mpc85xx_mc_err_probe(struct platform_device *op) pdata->irq = irq_of_parse_and_map(op->dev.of_node, 0); res = devm_request_irq(&op->dev, pdata->irq, mpc85xx_mc_isr, - IRQF_DISABLED | IRQF_SHARED, + IRQF_SHARED, "[EDAC] MC err", mci); if (res < 0) { printk(KERN_ERR "%s: Unable to request irq %d for " @@ -1072,7 +1148,7 @@ static int __devinit mpc85xx_mc_err_probe(struct platform_device *op) } devres_remove_group(&op->dev, mpc85xx_mc_err_probe); - debugf3("%s(): success\n", __func__); + edac_dbg(3, "success\n"); printk(KERN_INFO EDAC_MOD_STR " MC err registered\n"); return 0; @@ -1090,7 +1166,7 @@ static int mpc85xx_mc_err_remove(struct platform_device *op) struct mem_ctl_info *mci = dev_get_drvdata(&op->dev); struct mpc85xx_mc_pdata *pdata = mci->pvt_info; - debugf0("%s()\n", __func__); + edac_dbg(0, "\n"); if (edac_op_state == EDAC_OPSTATE_INT) { out_be32(pdata->mc_vbase + MPC85XX_MC_ERR_INT_EN, 0); @@ -1101,6 +1177,7 @@ static int mpc85xx_mc_err_remove(struct platform_device *op) orig_ddr_err_disable); out_be32(pdata->mc_vbase + MPC85XX_MC_ERR_SBE, orig_ddr_err_sbe); + mpc85xx_remove_sysfs_attributes(mci); edac_mc_del_mc(&op->dev); edac_mc_free(mci); return 0; @@ -1177,12 +1254,6 @@ static int __init mpc85xx_mc_init(void) if (res) printk(KERN_WARNING EDAC_MOD_STR "L2 fails to register\n"); -#ifdef CONFIG_PCI - res = platform_driver_register(&mpc85xx_pci_err_driver); - if (res) - printk(KERN_WARNING EDAC_MOD_STR "PCI fails to register\n"); -#endif - #ifdef CONFIG_FSL_SOC_BOOKE pvr = mfspr(SPRN_PVR); @@ -1219,9 +1290,6 @@ static void __exit mpc85xx_mc_exit(void) on_each_cpu(mpc85xx_mc_restore_hid1, NULL, 0); } #endif -#ifdef CONFIG_PCI - platform_driver_unregister(&mpc85xx_pci_err_driver); -#endif platform_driver_unregister(&mpc85xx_l2_err_driver); platform_driver_unregister(&mpc85xx_mc_err_driver); } diff --git a/drivers/edac/mpc85xx_edac.h b/drivers/edac/mpc85xx_edac.h index 932016f2cf0..8c625643622 100644 --- a/drivers/edac/mpc85xx_edac.h +++ b/drivers/edac/mpc85xx_edac.h @@ -134,13 +134,19 @@ #define MPC85XX_PCI_ERR_DR 0x0000 #define MPC85XX_PCI_ERR_CAP_DR 0x0004 #define MPC85XX_PCI_ERR_EN 0x0008 +#define PEX_ERR_ICCAIE_EN_BIT 0x00020000 #define MPC85XX_PCI_ERR_ATTRIB 0x000c #define MPC85XX_PCI_ERR_ADDR 0x0010 +#define PEX_ERR_ICCAD_DISR_BIT 0x00020000 #define MPC85XX_PCI_ERR_EXT_ADDR 0x0014 #define MPC85XX_PCI_ERR_DL 0x0018 #define MPC85XX_PCI_ERR_DH 0x001c #define MPC85XX_PCI_GAS_TIMR 0x0020 #define MPC85XX_PCI_PCIX_TIMR 0x0024 +#define MPC85XX_PCIE_ERR_CAP_R0 0x0028 +#define MPC85XX_PCIE_ERR_CAP_R1 0x002c +#define MPC85XX_PCIE_ERR_CAP_R2 0x0030 +#define MPC85XX_PCIE_ERR_CAP_R3 0x0034 struct mpc85xx_mc_pdata { char *name; @@ -158,6 +164,7 @@ struct mpc85xx_l2_pdata { struct mpc85xx_pci_pdata { char *name; + bool is_pcie; int edac_idx; void __iomem *pci_vbase; int irq; diff --git a/drivers/edac/mv64x60_edac.c b/drivers/edac/mv64x60_edac.c index 7e5ff367705..542fad70e36 100644 --- a/drivers/edac/mv64x60_edac.c +++ b/drivers/edac/mv64x60_edac.c @@ -100,7 +100,7 @@ static int __init mv64x60_pci_fixup(struct platform_device *pdev) return 0; } -static int __devinit mv64x60_pci_err_probe(struct platform_device *pdev) +static int mv64x60_pci_err_probe(struct platform_device *pdev) { struct edac_pci_ctl_info *pci; struct mv64x60_pci_pdata *pdata; @@ -169,7 +169,7 @@ static int __devinit mv64x60_pci_err_probe(struct platform_device *pdev) MV64X60_PCIx_ERR_MASK_VAL); if (edac_pci_add_device(pci, pdata->edac_idx) > 0) { - debugf3("%s(): failed edac_pci_add_device()\n", __func__); + edac_dbg(3, "failed edac_pci_add_device()\n"); goto err; } @@ -194,7 +194,7 @@ static int __devinit mv64x60_pci_err_probe(struct platform_device *pdev) devres_remove_group(&pdev->dev, mv64x60_pci_err_probe); /* get this far and it's successful */ - debugf3("%s(): success\n", __func__); + edac_dbg(3, "success\n"); return 0; @@ -210,7 +210,7 @@ static int mv64x60_pci_err_remove(struct platform_device *pdev) { struct edac_pci_ctl_info *pci = platform_get_drvdata(pdev); - debugf0("%s()\n", __func__); + edac_dbg(0, "\n"); edac_pci_del_device(&pdev->dev); @@ -221,7 +221,7 @@ static int mv64x60_pci_err_remove(struct platform_device *pdev) static struct platform_driver mv64x60_pci_err_driver = { .probe = mv64x60_pci_err_probe, - .remove = __devexit_p(mv64x60_pci_err_remove), + .remove = mv64x60_pci_err_remove, .driver = { .name = "mv64x60_pci_err", } @@ -271,7 +271,7 @@ static irqreturn_t mv64x60_sram_isr(int irq, void *dev_id) return IRQ_HANDLED; } -static int __devinit mv64x60_sram_err_probe(struct platform_device *pdev) +static int mv64x60_sram_err_probe(struct platform_device *pdev) { struct edac_device_ctl_info *edac_dev; struct mv64x60_sram_pdata *pdata; @@ -336,7 +336,7 @@ static int __devinit mv64x60_sram_err_probe(struct platform_device *pdev) pdata->edac_idx = edac_dev_idx++; if (edac_device_add_device(edac_dev) > 0) { - debugf3("%s(): failed edac_device_add_device()\n", __func__); + edac_dbg(3, "failed edac_device_add_device()\n"); goto err; } @@ -363,7 +363,7 @@ static int __devinit mv64x60_sram_err_probe(struct platform_device *pdev) devres_remove_group(&pdev->dev, mv64x60_sram_err_probe); /* get this far and it's successful */ - debugf3("%s(): success\n", __func__); + edac_dbg(3, "success\n"); return 0; @@ -379,7 +379,7 @@ static int mv64x60_sram_err_remove(struct platform_device *pdev) { struct edac_device_ctl_info *edac_dev = platform_get_drvdata(pdev); - debugf0("%s()\n", __func__); + edac_dbg(0, "\n"); edac_device_del_device(&pdev->dev); edac_device_free_ctl_info(edac_dev); @@ -439,7 +439,7 @@ static irqreturn_t mv64x60_cpu_isr(int irq, void *dev_id) return IRQ_HANDLED; } -static int __devinit mv64x60_cpu_err_probe(struct platform_device *pdev) +static int mv64x60_cpu_err_probe(struct platform_device *pdev) { struct edac_device_ctl_info *edac_dev; struct resource *r; @@ -531,7 +531,7 @@ static int __devinit mv64x60_cpu_err_probe(struct platform_device *pdev) pdata->edac_idx = edac_dev_idx++; if (edac_device_add_device(edac_dev) > 0) { - debugf3("%s(): failed edac_device_add_device()\n", __func__); + edac_dbg(3, "failed edac_device_add_device()\n"); goto err; } @@ -558,7 +558,7 @@ static int __devinit mv64x60_cpu_err_probe(struct platform_device *pdev) devres_remove_group(&pdev->dev, mv64x60_cpu_err_probe); /* get this far and it's successful */ - debugf3("%s(): success\n", __func__); + edac_dbg(3, "success\n"); return 0; @@ -574,7 +574,7 @@ static int mv64x60_cpu_err_remove(struct platform_device *pdev) { struct edac_device_ctl_info *edac_dev = platform_get_drvdata(pdev); - debugf0("%s()\n", __func__); + edac_dbg(0, "\n"); edac_device_del_device(&pdev->dev); edac_device_free_ctl_info(edac_dev); @@ -611,12 +611,17 @@ static void mv64x60_mc_check(struct mem_ctl_info *mci) /* first bit clear in ECC Err Reg, 1 bit error, correctable by HW */ if (!(reg & 0x1)) - edac_mc_handle_ce(mci, err_addr >> PAGE_SHIFT, - err_addr & PAGE_MASK, syndrome, 0, 0, - mci->ctl_name); + edac_mc_handle_error(HW_EVENT_ERR_CORRECTED, mci, 1, + err_addr >> PAGE_SHIFT, + err_addr & PAGE_MASK, syndrome, + 0, 0, -1, + mci->ctl_name, ""); else /* 2 bit error, UE */ - edac_mc_handle_ue(mci, err_addr >> PAGE_SHIFT, - err_addr & PAGE_MASK, 0, mci->ctl_name); + edac_mc_handle_error(HW_EVENT_ERR_UNCORRECTED, mci, 1, + err_addr >> PAGE_SHIFT, + err_addr & PAGE_MASK, 0, + 0, 0, -1, + mci->ctl_name, ""); /* clear the error */ out_le32(pdata->mc_vbase + MV64X60_SDRAM_ERR_ADDR, 0); @@ -656,6 +661,8 @@ static void mv64x60_init_csrows(struct mem_ctl_info *mci, struct mv64x60_mc_pdata *pdata) { struct csrow_info *csrow; + struct dimm_info *dimm; + u32 devtype; u32 ctl; @@ -663,36 +670,37 @@ static void mv64x60_init_csrows(struct mem_ctl_info *mci, ctl = in_le32(pdata->mc_vbase + MV64X60_SDRAM_CONFIG); - csrow = &mci->csrows[0]; - csrow->first_page = 0; - csrow->nr_pages = pdata->total_mem >> PAGE_SHIFT; - csrow->last_page = csrow->first_page + csrow->nr_pages - 1; - csrow->grain = 8; + csrow = mci->csrows[0]; + dimm = csrow->channels[0]->dimm; + + dimm->nr_pages = pdata->total_mem >> PAGE_SHIFT; + dimm->grain = 8; - csrow->mtype = (ctl & MV64X60_SDRAM_REGISTERED) ? MEM_RDDR : MEM_DDR; + dimm->mtype = (ctl & MV64X60_SDRAM_REGISTERED) ? MEM_RDDR : MEM_DDR; devtype = (ctl >> 20) & 0x3; switch (devtype) { case 0x0: - csrow->dtype = DEV_X32; + dimm->dtype = DEV_X32; break; case 0x2: /* could be X8 too, but no way to tell */ - csrow->dtype = DEV_X16; + dimm->dtype = DEV_X16; break; case 0x3: - csrow->dtype = DEV_X4; + dimm->dtype = DEV_X4; break; default: - csrow->dtype = DEV_UNKNOWN; + dimm->dtype = DEV_UNKNOWN; break; } - csrow->edac_mode = EDAC_SECDED; + dimm->edac_mode = EDAC_SECDED; } -static int __devinit mv64x60_mc_err_probe(struct platform_device *pdev) +static int mv64x60_mc_err_probe(struct platform_device *pdev) { struct mem_ctl_info *mci; + struct edac_mc_layer layers[2]; struct mv64x60_mc_pdata *pdata; struct resource *r; u32 ctl; @@ -701,7 +709,14 @@ static int __devinit mv64x60_mc_err_probe(struct platform_device *pdev) if (!devres_open_group(&pdev->dev, mv64x60_mc_err_probe, GFP_KERNEL)) return -ENOMEM; - mci = edac_mc_alloc(sizeof(struct mv64x60_mc_pdata), 1, 1, edac_mc_idx); + layers[0].type = EDAC_MC_LAYER_CHIP_SELECT; + layers[0].size = 1; + layers[0].is_virt_csrow = true; + layers[1].type = EDAC_MC_LAYER_CHANNEL; + layers[1].size = 1; + layers[1].is_virt_csrow = false; + mci = edac_mc_alloc(edac_mc_idx, ARRAY_SIZE(layers), layers, + sizeof(struct mv64x60_mc_pdata)); if (!mci) { printk(KERN_ERR "%s: No memory for CPU err\n", __func__); devres_release_group(&pdev->dev, mv64x60_mc_err_probe); @@ -709,7 +724,7 @@ static int __devinit mv64x60_mc_err_probe(struct platform_device *pdev) } pdata = mci->pvt_info; - mci->dev = &pdev->dev; + mci->pdev = &pdev->dev; platform_set_drvdata(pdev, mci); pdata->name = "mv64x60_mc_err"; pdata->irq = NO_IRQ; @@ -751,7 +766,7 @@ static int __devinit mv64x60_mc_err_probe(struct platform_device *pdev) goto err2; } - debugf3("%s(): init mci\n", __func__); + edac_dbg(3, "init mci\n"); mci->mtype_cap = MEM_FLAG_RDDR | MEM_FLAG_DDR; mci->edac_ctl_cap = EDAC_FLAG_NONE | EDAC_FLAG_SECDED; mci->edac_cap = EDAC_FLAG_SECDED; @@ -775,7 +790,7 @@ static int __devinit mv64x60_mc_err_probe(struct platform_device *pdev) out_le32(pdata->mc_vbase + MV64X60_SDRAM_ERR_ECC_CNTL, ctl); if (edac_mc_add_mc(mci)) { - debugf3("%s(): failed edac_mc_add_mc()\n", __func__); + edac_dbg(3, "failed edac_mc_add_mc()\n"); goto err; } @@ -800,7 +815,7 @@ static int __devinit mv64x60_mc_err_probe(struct platform_device *pdev) } /* get this far and it's successful */ - debugf3("%s(): success\n", __func__); + edac_dbg(3, "success\n"); return 0; @@ -816,7 +831,7 @@ static int mv64x60_mc_err_remove(struct platform_device *pdev) { struct mem_ctl_info *mci = platform_get_drvdata(pdev); - debugf0("%s()\n", __func__); + edac_dbg(0, "\n"); edac_mc_del_mc(&pdev->dev); edac_mc_free(mci); diff --git a/drivers/edac/octeon_edac-l2c.c b/drivers/edac/octeon_edac-l2c.c new file mode 100644 index 00000000000..7e98084d364 --- /dev/null +++ b/drivers/edac/octeon_edac-l2c.c @@ -0,0 +1,208 @@ +/* + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + * + * Copyright (C) 2012 Cavium, Inc. + * + * Copyright (C) 2009 Wind River Systems, + * written by Ralf Baechle <ralf@linux-mips.org> + */ +#include <linux/module.h> +#include <linux/init.h> +#include <linux/slab.h> +#include <linux/io.h> +#include <linux/edac.h> + +#include <asm/octeon/cvmx.h> + +#include "edac_core.h" +#include "edac_module.h" + +#define EDAC_MOD_STR "octeon-l2c" + +static void octeon_l2c_poll_oct1(struct edac_device_ctl_info *l2c) +{ + union cvmx_l2t_err l2t_err, l2t_err_reset; + union cvmx_l2d_err l2d_err, l2d_err_reset; + + l2t_err_reset.u64 = 0; + l2t_err.u64 = cvmx_read_csr(CVMX_L2T_ERR); + if (l2t_err.s.sec_err) { + edac_device_handle_ce(l2c, 0, 0, + "Tag Single bit error (corrected)"); + l2t_err_reset.s.sec_err = 1; + } + if (l2t_err.s.ded_err) { + edac_device_handle_ue(l2c, 0, 0, + "Tag Double bit error (detected)"); + l2t_err_reset.s.ded_err = 1; + } + if (l2t_err_reset.u64) + cvmx_write_csr(CVMX_L2T_ERR, l2t_err_reset.u64); + + l2d_err_reset.u64 = 0; + l2d_err.u64 = cvmx_read_csr(CVMX_L2D_ERR); + if (l2d_err.s.sec_err) { + edac_device_handle_ce(l2c, 0, 1, + "Data Single bit error (corrected)"); + l2d_err_reset.s.sec_err = 1; + } + if (l2d_err.s.ded_err) { + edac_device_handle_ue(l2c, 0, 1, + "Data Double bit error (detected)"); + l2d_err_reset.s.ded_err = 1; + } + if (l2d_err_reset.u64) + cvmx_write_csr(CVMX_L2D_ERR, l2d_err_reset.u64); + +} + +static void _octeon_l2c_poll_oct2(struct edac_device_ctl_info *l2c, int tad) +{ + union cvmx_l2c_err_tdtx err_tdtx, err_tdtx_reset; + union cvmx_l2c_err_ttgx err_ttgx, err_ttgx_reset; + char buf1[64]; + char buf2[80]; + + err_tdtx_reset.u64 = 0; + err_tdtx.u64 = cvmx_read_csr(CVMX_L2C_ERR_TDTX(tad)); + if (err_tdtx.s.dbe || err_tdtx.s.sbe || + err_tdtx.s.vdbe || err_tdtx.s.vsbe) + snprintf(buf1, sizeof(buf1), + "type:%d, syn:0x%x, way:%d", + err_tdtx.s.type, err_tdtx.s.syn, err_tdtx.s.wayidx); + + if (err_tdtx.s.dbe) { + snprintf(buf2, sizeof(buf2), + "L2D Double bit error (detected):%s", buf1); + err_tdtx_reset.s.dbe = 1; + edac_device_handle_ue(l2c, tad, 1, buf2); + } + if (err_tdtx.s.sbe) { + snprintf(buf2, sizeof(buf2), + "L2D Single bit error (corrected):%s", buf1); + err_tdtx_reset.s.sbe = 1; + edac_device_handle_ce(l2c, tad, 1, buf2); + } + if (err_tdtx.s.vdbe) { + snprintf(buf2, sizeof(buf2), + "VBF Double bit error (detected):%s", buf1); + err_tdtx_reset.s.vdbe = 1; + edac_device_handle_ue(l2c, tad, 1, buf2); + } + if (err_tdtx.s.vsbe) { + snprintf(buf2, sizeof(buf2), + "VBF Single bit error (corrected):%s", buf1); + err_tdtx_reset.s.vsbe = 1; + edac_device_handle_ce(l2c, tad, 1, buf2); + } + if (err_tdtx_reset.u64) + cvmx_write_csr(CVMX_L2C_ERR_TDTX(tad), err_tdtx_reset.u64); + + err_ttgx_reset.u64 = 0; + err_ttgx.u64 = cvmx_read_csr(CVMX_L2C_ERR_TTGX(tad)); + + if (err_ttgx.s.dbe || err_ttgx.s.sbe) + snprintf(buf1, sizeof(buf1), + "type:%d, syn:0x%x, way:%d", + err_ttgx.s.type, err_ttgx.s.syn, err_ttgx.s.wayidx); + + if (err_ttgx.s.dbe) { + snprintf(buf2, sizeof(buf2), + "Tag Double bit error (detected):%s", buf1); + err_ttgx_reset.s.dbe = 1; + edac_device_handle_ue(l2c, tad, 0, buf2); + } + if (err_ttgx.s.sbe) { + snprintf(buf2, sizeof(buf2), + "Tag Single bit error (corrected):%s", buf1); + err_ttgx_reset.s.sbe = 1; + edac_device_handle_ce(l2c, tad, 0, buf2); + } + if (err_ttgx_reset.u64) + cvmx_write_csr(CVMX_L2C_ERR_TTGX(tad), err_ttgx_reset.u64); +} + +static void octeon_l2c_poll_oct2(struct edac_device_ctl_info *l2c) +{ + int i; + for (i = 0; i < l2c->nr_instances; i++) + _octeon_l2c_poll_oct2(l2c, i); +} + +static int octeon_l2c_probe(struct platform_device *pdev) +{ + struct edac_device_ctl_info *l2c; + + int num_tads = OCTEON_IS_MODEL(OCTEON_CN68XX) ? 4 : 1; + + /* 'Tags' are block 0, 'Data' is block 1*/ + l2c = edac_device_alloc_ctl_info(0, "l2c", num_tads, "l2c", 2, 0, + NULL, 0, edac_device_alloc_index()); + if (!l2c) + return -ENOMEM; + + l2c->dev = &pdev->dev; + platform_set_drvdata(pdev, l2c); + l2c->dev_name = dev_name(&pdev->dev); + + l2c->mod_name = "octeon-l2c"; + l2c->ctl_name = "octeon_l2c_err"; + + + if (OCTEON_IS_MODEL(OCTEON_FAM_1_PLUS)) { + union cvmx_l2t_err l2t_err; + union cvmx_l2d_err l2d_err; + + l2t_err.u64 = cvmx_read_csr(CVMX_L2T_ERR); + l2t_err.s.sec_intena = 0; /* We poll */ + l2t_err.s.ded_intena = 0; + cvmx_write_csr(CVMX_L2T_ERR, l2t_err.u64); + + l2d_err.u64 = cvmx_read_csr(CVMX_L2D_ERR); + l2d_err.s.sec_intena = 0; /* We poll */ + l2d_err.s.ded_intena = 0; + cvmx_write_csr(CVMX_L2T_ERR, l2d_err.u64); + + l2c->edac_check = octeon_l2c_poll_oct1; + } else { + /* OCTEON II */ + l2c->edac_check = octeon_l2c_poll_oct2; + } + + if (edac_device_add_device(l2c) > 0) { + pr_err("%s: edac_device_add_device() failed\n", __func__); + goto err; + } + + + return 0; + +err: + edac_device_free_ctl_info(l2c); + + return -ENXIO; +} + +static int octeon_l2c_remove(struct platform_device *pdev) +{ + struct edac_device_ctl_info *l2c = platform_get_drvdata(pdev); + + edac_device_del_device(&pdev->dev); + edac_device_free_ctl_info(l2c); + + return 0; +} + +static struct platform_driver octeon_l2c_driver = { + .probe = octeon_l2c_probe, + .remove = octeon_l2c_remove, + .driver = { + .name = "octeon_l2c_edac", + } +}; +module_platform_driver(octeon_l2c_driver); + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Ralf Baechle <ralf@linux-mips.org>"); diff --git a/drivers/edac/octeon_edac-lmc.c b/drivers/edac/octeon_edac-lmc.c new file mode 100644 index 00000000000..4bd10f94f06 --- /dev/null +++ b/drivers/edac/octeon_edac-lmc.c @@ -0,0 +1,353 @@ +/* + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + * + * Copyright (C) 2009 Wind River Systems, + * written by Ralf Baechle <ralf@linux-mips.org> + * + * Copyright (c) 2013 by Cisco Systems, Inc. + * All rights reserved. + */ +#include <linux/module.h> +#include <linux/init.h> +#include <linux/slab.h> +#include <linux/io.h> +#include <linux/edac.h> +#include <linux/ctype.h> + +#include <asm/octeon/octeon.h> +#include <asm/octeon/cvmx-lmcx-defs.h> + +#include "edac_core.h" +#include "edac_module.h" + +#define OCTEON_MAX_MC 4 + +#define to_mci(k) container_of(k, struct mem_ctl_info, dev) + +struct octeon_lmc_pvt { + unsigned long inject; + unsigned long error_type; + unsigned long dimm; + unsigned long rank; + unsigned long bank; + unsigned long row; + unsigned long col; +}; + +static void octeon_lmc_edac_poll(struct mem_ctl_info *mci) +{ + union cvmx_lmcx_mem_cfg0 cfg0; + bool do_clear = false; + char msg[64]; + + cfg0.u64 = cvmx_read_csr(CVMX_LMCX_MEM_CFG0(mci->mc_idx)); + if (cfg0.s.sec_err || cfg0.s.ded_err) { + union cvmx_lmcx_fadr fadr; + fadr.u64 = cvmx_read_csr(CVMX_LMCX_FADR(mci->mc_idx)); + snprintf(msg, sizeof(msg), + "DIMM %d rank %d bank %d row %d col %d", + fadr.cn30xx.fdimm, fadr.cn30xx.fbunk, + fadr.cn30xx.fbank, fadr.cn30xx.frow, fadr.cn30xx.fcol); + } + + if (cfg0.s.sec_err) { + edac_mc_handle_error(HW_EVENT_ERR_CORRECTED, mci, 1, 0, 0, 0, + -1, -1, -1, msg, ""); + cfg0.s.sec_err = -1; /* Done, re-arm */ + do_clear = true; + } + + if (cfg0.s.ded_err) { + edac_mc_handle_error(HW_EVENT_ERR_UNCORRECTED, mci, 1, 0, 0, 0, + -1, -1, -1, msg, ""); + cfg0.s.ded_err = -1; /* Done, re-arm */ + do_clear = true; + } + if (do_clear) + cvmx_write_csr(CVMX_LMCX_MEM_CFG0(mci->mc_idx), cfg0.u64); +} + +static void octeon_lmc_edac_poll_o2(struct mem_ctl_info *mci) +{ + struct octeon_lmc_pvt *pvt = mci->pvt_info; + union cvmx_lmcx_int int_reg; + bool do_clear = false; + char msg[64]; + + if (!pvt->inject) + int_reg.u64 = cvmx_read_csr(CVMX_LMCX_INT(mci->mc_idx)); + else { + if (pvt->error_type == 1) + int_reg.s.sec_err = 1; + if (pvt->error_type == 2) + int_reg.s.ded_err = 1; + } + + if (int_reg.s.sec_err || int_reg.s.ded_err) { + union cvmx_lmcx_fadr fadr; + if (likely(!pvt->inject)) + fadr.u64 = cvmx_read_csr(CVMX_LMCX_FADR(mci->mc_idx)); + else { + fadr.cn61xx.fdimm = pvt->dimm; + fadr.cn61xx.fbunk = pvt->rank; + fadr.cn61xx.fbank = pvt->bank; + fadr.cn61xx.frow = pvt->row; + fadr.cn61xx.fcol = pvt->col; + } + snprintf(msg, sizeof(msg), + "DIMM %d rank %d bank %d row %d col %d", + fadr.cn61xx.fdimm, fadr.cn61xx.fbunk, + fadr.cn61xx.fbank, fadr.cn61xx.frow, fadr.cn61xx.fcol); + } + + if (int_reg.s.sec_err) { + edac_mc_handle_error(HW_EVENT_ERR_CORRECTED, mci, 1, 0, 0, 0, + -1, -1, -1, msg, ""); + int_reg.s.sec_err = -1; /* Done, re-arm */ + do_clear = true; + } + + if (int_reg.s.ded_err) { + edac_mc_handle_error(HW_EVENT_ERR_UNCORRECTED, mci, 1, 0, 0, 0, + -1, -1, -1, msg, ""); + int_reg.s.ded_err = -1; /* Done, re-arm */ + do_clear = true; + } + + if (do_clear) { + if (likely(!pvt->inject)) + cvmx_write_csr(CVMX_LMCX_INT(mci->mc_idx), int_reg.u64); + else + pvt->inject = 0; + } +} + +/************************ MC SYSFS parts ***********************************/ + +/* Only a couple naming differences per template, so very similar */ +#define TEMPLATE_SHOW(reg) \ +static ssize_t octeon_mc_inject_##reg##_show(struct device *dev, \ + struct device_attribute *attr, \ + char *data) \ +{ \ + struct mem_ctl_info *mci = to_mci(dev); \ + struct octeon_lmc_pvt *pvt = mci->pvt_info; \ + return sprintf(data, "%016llu\n", (u64)pvt->reg); \ +} + +#define TEMPLATE_STORE(reg) \ +static ssize_t octeon_mc_inject_##reg##_store(struct device *dev, \ + struct device_attribute *attr, \ + const char *data, size_t count) \ +{ \ + struct mem_ctl_info *mci = to_mci(dev); \ + struct octeon_lmc_pvt *pvt = mci->pvt_info; \ + if (isdigit(*data)) { \ + if (!kstrtoul(data, 0, &pvt->reg)) \ + return count; \ + } \ + return 0; \ +} + +TEMPLATE_SHOW(inject); +TEMPLATE_STORE(inject); +TEMPLATE_SHOW(dimm); +TEMPLATE_STORE(dimm); +TEMPLATE_SHOW(bank); +TEMPLATE_STORE(bank); +TEMPLATE_SHOW(rank); +TEMPLATE_STORE(rank); +TEMPLATE_SHOW(row); +TEMPLATE_STORE(row); +TEMPLATE_SHOW(col); +TEMPLATE_STORE(col); + +static ssize_t octeon_mc_inject_error_type_store(struct device *dev, + struct device_attribute *attr, + const char *data, + size_t count) +{ + struct mem_ctl_info *mci = to_mci(dev); + struct octeon_lmc_pvt *pvt = mci->pvt_info; + + if (!strncmp(data, "single", 6)) + pvt->error_type = 1; + else if (!strncmp(data, "double", 6)) + pvt->error_type = 2; + + return count; +} + +static ssize_t octeon_mc_inject_error_type_show(struct device *dev, + struct device_attribute *attr, + char *data) +{ + struct mem_ctl_info *mci = to_mci(dev); + struct octeon_lmc_pvt *pvt = mci->pvt_info; + if (pvt->error_type == 1) + return sprintf(data, "single"); + else if (pvt->error_type == 2) + return sprintf(data, "double"); + + return 0; +} + +static DEVICE_ATTR(inject, S_IRUGO | S_IWUSR, + octeon_mc_inject_inject_show, octeon_mc_inject_inject_store); +static DEVICE_ATTR(error_type, S_IRUGO | S_IWUSR, + octeon_mc_inject_error_type_show, octeon_mc_inject_error_type_store); +static DEVICE_ATTR(dimm, S_IRUGO | S_IWUSR, + octeon_mc_inject_dimm_show, octeon_mc_inject_dimm_store); +static DEVICE_ATTR(rank, S_IRUGO | S_IWUSR, + octeon_mc_inject_rank_show, octeon_mc_inject_rank_store); +static DEVICE_ATTR(bank, S_IRUGO | S_IWUSR, + octeon_mc_inject_bank_show, octeon_mc_inject_bank_store); +static DEVICE_ATTR(row, S_IRUGO | S_IWUSR, + octeon_mc_inject_row_show, octeon_mc_inject_row_store); +static DEVICE_ATTR(col, S_IRUGO | S_IWUSR, + octeon_mc_inject_col_show, octeon_mc_inject_col_store); + + +static int octeon_set_mc_sysfs_attributes(struct mem_ctl_info *mci) +{ + int rc; + + rc = device_create_file(&mci->dev, &dev_attr_inject); + if (rc < 0) + return rc; + rc = device_create_file(&mci->dev, &dev_attr_error_type); + if (rc < 0) + return rc; + rc = device_create_file(&mci->dev, &dev_attr_dimm); + if (rc < 0) + return rc; + rc = device_create_file(&mci->dev, &dev_attr_rank); + if (rc < 0) + return rc; + rc = device_create_file(&mci->dev, &dev_attr_bank); + if (rc < 0) + return rc; + rc = device_create_file(&mci->dev, &dev_attr_row); + if (rc < 0) + return rc; + rc = device_create_file(&mci->dev, &dev_attr_col); + if (rc < 0) + return rc; + + return 0; +} + +static int octeon_lmc_edac_probe(struct platform_device *pdev) +{ + struct mem_ctl_info *mci; + struct edac_mc_layer layers[1]; + int mc = pdev->id; + + opstate_init(); + + layers[0].type = EDAC_MC_LAYER_CHANNEL; + layers[0].size = 1; + layers[0].is_virt_csrow = false; + + if (OCTEON_IS_MODEL(OCTEON_FAM_1_PLUS)) { + union cvmx_lmcx_mem_cfg0 cfg0; + + cfg0.u64 = cvmx_read_csr(CVMX_LMCX_MEM_CFG0(0)); + if (!cfg0.s.ecc_ena) { + dev_info(&pdev->dev, "Disabled (ECC not enabled)\n"); + return 0; + } + + mci = edac_mc_alloc(mc, ARRAY_SIZE(layers), layers, sizeof(struct octeon_lmc_pvt)); + if (!mci) + return -ENXIO; + + mci->pdev = &pdev->dev; + mci->dev_name = dev_name(&pdev->dev); + + mci->mod_name = "octeon-lmc"; + mci->ctl_name = "octeon-lmc-err"; + mci->edac_check = octeon_lmc_edac_poll; + + if (edac_mc_add_mc(mci)) { + dev_err(&pdev->dev, "edac_mc_add_mc() failed\n"); + edac_mc_free(mci); + return -ENXIO; + } + + if (octeon_set_mc_sysfs_attributes(mci)) { + dev_err(&pdev->dev, "octeon_set_mc_sysfs_attributes() failed\n"); + return -ENXIO; + } + + + cfg0.u64 = cvmx_read_csr(CVMX_LMCX_MEM_CFG0(mc)); + cfg0.s.intr_ded_ena = 0; /* We poll */ + cfg0.s.intr_sec_ena = 0; + cvmx_write_csr(CVMX_LMCX_MEM_CFG0(mc), cfg0.u64); + } else { + /* OCTEON II */ + union cvmx_lmcx_int_en en; + union cvmx_lmcx_config config; + + config.u64 = cvmx_read_csr(CVMX_LMCX_CONFIG(0)); + if (!config.s.ecc_ena) { + dev_info(&pdev->dev, "Disabled (ECC not enabled)\n"); + return 0; + } + + mci = edac_mc_alloc(mc, ARRAY_SIZE(layers), layers, sizeof(struct octeon_lmc_pvt)); + if (!mci) + return -ENXIO; + + mci->pdev = &pdev->dev; + mci->dev_name = dev_name(&pdev->dev); + + mci->mod_name = "octeon-lmc"; + mci->ctl_name = "co_lmc_err"; + mci->edac_check = octeon_lmc_edac_poll_o2; + + if (edac_mc_add_mc(mci)) { + dev_err(&pdev->dev, "edac_mc_add_mc() failed\n"); + edac_mc_free(mci); + return -ENXIO; + } + + if (octeon_set_mc_sysfs_attributes(mci)) { + dev_err(&pdev->dev, "octeon_set_mc_sysfs_attributes() failed\n"); + return -ENXIO; + } + + + en.u64 = cvmx_read_csr(CVMX_LMCX_MEM_CFG0(mc)); + en.s.intr_ded_ena = 0; /* We poll */ + en.s.intr_sec_ena = 0; + cvmx_write_csr(CVMX_LMCX_MEM_CFG0(mc), en.u64); + } + platform_set_drvdata(pdev, mci); + + return 0; +} + +static int octeon_lmc_edac_remove(struct platform_device *pdev) +{ + struct mem_ctl_info *mci = platform_get_drvdata(pdev); + + edac_mc_del_mc(&pdev->dev); + edac_mc_free(mci); + return 0; +} + +static struct platform_driver octeon_lmc_edac_driver = { + .probe = octeon_lmc_edac_probe, + .remove = octeon_lmc_edac_remove, + .driver = { + .name = "octeon_lmc_edac", + } +}; +module_platform_driver(octeon_lmc_edac_driver); + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Ralf Baechle <ralf@linux-mips.org>"); diff --git a/drivers/edac/octeon_edac-pc.c b/drivers/edac/octeon_edac-pc.c new file mode 100644 index 00000000000..0f83c33a7d1 --- /dev/null +++ b/drivers/edac/octeon_edac-pc.c @@ -0,0 +1,143 @@ +/* + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + * + * Copyright (C) 2012 Cavium, Inc. + * + * Copyright (C) 2009 Wind River Systems, + * written by Ralf Baechle <ralf@linux-mips.org> + */ +#include <linux/module.h> +#include <linux/init.h> +#include <linux/slab.h> +#include <linux/interrupt.h> +#include <linux/io.h> +#include <linux/edac.h> + +#include "edac_core.h" +#include "edac_module.h" + +#include <asm/octeon/cvmx.h> +#include <asm/mipsregs.h> + +extern int register_co_cache_error_notifier(struct notifier_block *nb); +extern int unregister_co_cache_error_notifier(struct notifier_block *nb); + +extern unsigned long long cache_err_dcache[NR_CPUS]; + +struct co_cache_error { + struct notifier_block notifier; + struct edac_device_ctl_info *ed; +}; + +/** + * EDAC CPU cache error callback + * + * @event: non-zero if unrecoverable. + */ +static int co_cache_error_event(struct notifier_block *this, + unsigned long event, void *ptr) +{ + struct co_cache_error *p = container_of(this, struct co_cache_error, + notifier); + + unsigned int core = cvmx_get_core_num(); + unsigned int cpu = smp_processor_id(); + u64 icache_err = read_octeon_c0_icacheerr(); + u64 dcache_err; + + if (event) { + dcache_err = cache_err_dcache[core]; + cache_err_dcache[core] = 0; + } else { + dcache_err = read_octeon_c0_dcacheerr(); + } + + if (icache_err & 1) { + edac_device_printk(p->ed, KERN_ERR, + "CacheErr (Icache):%llx, core %d/cpu %d, cp0_errorepc == %lx\n", + (unsigned long long)icache_err, core, cpu, + read_c0_errorepc()); + write_octeon_c0_icacheerr(0); + edac_device_handle_ce(p->ed, cpu, 1, "icache"); + } + if (dcache_err & 1) { + edac_device_printk(p->ed, KERN_ERR, + "CacheErr (Dcache):%llx, core %d/cpu %d, cp0_errorepc == %lx\n", + (unsigned long long)dcache_err, core, cpu, + read_c0_errorepc()); + if (event) + edac_device_handle_ue(p->ed, cpu, 0, "dcache"); + else + edac_device_handle_ce(p->ed, cpu, 0, "dcache"); + + /* Clear the error indication */ + if (OCTEON_IS_MODEL(OCTEON_FAM_2)) + write_octeon_c0_dcacheerr(1); + else + write_octeon_c0_dcacheerr(0); + } + + return NOTIFY_STOP; +} + +static int co_cache_error_probe(struct platform_device *pdev) +{ + struct co_cache_error *p = devm_kzalloc(&pdev->dev, sizeof(*p), + GFP_KERNEL); + if (!p) + return -ENOMEM; + + p->notifier.notifier_call = co_cache_error_event; + platform_set_drvdata(pdev, p); + + p->ed = edac_device_alloc_ctl_info(0, "cpu", num_possible_cpus(), + "cache", 2, 0, NULL, 0, + edac_device_alloc_index()); + if (!p->ed) + goto err; + + p->ed->dev = &pdev->dev; + + p->ed->dev_name = dev_name(&pdev->dev); + + p->ed->mod_name = "octeon-cpu"; + p->ed->ctl_name = "cache"; + + if (edac_device_add_device(p->ed)) { + pr_err("%s: edac_device_add_device() failed\n", __func__); + goto err1; + } + + register_co_cache_error_notifier(&p->notifier); + + return 0; + +err1: + edac_device_free_ctl_info(p->ed); +err: + return -ENXIO; +} + +static int co_cache_error_remove(struct platform_device *pdev) +{ + struct co_cache_error *p = platform_get_drvdata(pdev); + + unregister_co_cache_error_notifier(&p->notifier); + edac_device_del_device(&pdev->dev); + edac_device_free_ctl_info(p->ed); + return 0; +} + +static struct platform_driver co_cache_error_driver = { + .probe = co_cache_error_probe, + .remove = co_cache_error_remove, + .driver = { + .name = "octeon_pc_edac", + } +}; +module_platform_driver(co_cache_error_driver); + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Ralf Baechle <ralf@linux-mips.org>"); diff --git a/drivers/edac/octeon_edac-pci.c b/drivers/edac/octeon_edac-pci.c new file mode 100644 index 00000000000..9ca73cec74e --- /dev/null +++ b/drivers/edac/octeon_edac-pci.c @@ -0,0 +1,111 @@ +/* + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + * + * Copyright (C) 2012 Cavium, Inc. + * Copyright (C) 2009 Wind River Systems, + * written by Ralf Baechle <ralf@linux-mips.org> + */ +#include <linux/module.h> +#include <linux/init.h> +#include <linux/slab.h> +#include <linux/io.h> +#include <linux/edac.h> + +#include <asm/octeon/cvmx.h> +#include <asm/octeon/cvmx-npi-defs.h> +#include <asm/octeon/cvmx-pci-defs.h> +#include <asm/octeon/octeon.h> + +#include "edac_core.h" +#include "edac_module.h" + +static void octeon_pci_poll(struct edac_pci_ctl_info *pci) +{ + union cvmx_pci_cfg01 cfg01; + + cfg01.u32 = octeon_npi_read32(CVMX_NPI_PCI_CFG01); + if (cfg01.s.dpe) { /* Detected parity error */ + edac_pci_handle_pe(pci, pci->ctl_name); + cfg01.s.dpe = 1; /* Reset */ + octeon_npi_write32(CVMX_NPI_PCI_CFG01, cfg01.u32); + } + if (cfg01.s.sse) { + edac_pci_handle_npe(pci, "Signaled System Error"); + cfg01.s.sse = 1; /* Reset */ + octeon_npi_write32(CVMX_NPI_PCI_CFG01, cfg01.u32); + } + if (cfg01.s.rma) { + edac_pci_handle_npe(pci, "Received Master Abort"); + cfg01.s.rma = 1; /* Reset */ + octeon_npi_write32(CVMX_NPI_PCI_CFG01, cfg01.u32); + } + if (cfg01.s.rta) { + edac_pci_handle_npe(pci, "Received Target Abort"); + cfg01.s.rta = 1; /* Reset */ + octeon_npi_write32(CVMX_NPI_PCI_CFG01, cfg01.u32); + } + if (cfg01.s.sta) { + edac_pci_handle_npe(pci, "Signaled Target Abort"); + cfg01.s.sta = 1; /* Reset */ + octeon_npi_write32(CVMX_NPI_PCI_CFG01, cfg01.u32); + } + if (cfg01.s.mdpe) { + edac_pci_handle_npe(pci, "Master Data Parity Error"); + cfg01.s.mdpe = 1; /* Reset */ + octeon_npi_write32(CVMX_NPI_PCI_CFG01, cfg01.u32); + } +} + +static int octeon_pci_probe(struct platform_device *pdev) +{ + struct edac_pci_ctl_info *pci; + int res = 0; + + pci = edac_pci_alloc_ctl_info(0, "octeon_pci_err"); + if (!pci) + return -ENOMEM; + + pci->dev = &pdev->dev; + platform_set_drvdata(pdev, pci); + pci->dev_name = dev_name(&pdev->dev); + + pci->mod_name = "octeon-pci"; + pci->ctl_name = "octeon_pci_err"; + pci->edac_check = octeon_pci_poll; + + if (edac_pci_add_device(pci, 0) > 0) { + pr_err("%s: edac_pci_add_device() failed\n", __func__); + goto err; + } + + return 0; + +err: + edac_pci_free_ctl_info(pci); + + return res; +} + +static int octeon_pci_remove(struct platform_device *pdev) +{ + struct edac_pci_ctl_info *pci = platform_get_drvdata(pdev); + + edac_pci_del_device(&pdev->dev); + edac_pci_free_ctl_info(pci); + + return 0; +} + +static struct platform_driver octeon_pci_driver = { + .probe = octeon_pci_probe, + .remove = octeon_pci_remove, + .driver = { + .name = "octeon_pci_edac", + } +}; +module_platform_driver(octeon_pci_driver); + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Ralf Baechle <ralf@linux-mips.org>"); diff --git a/drivers/edac/pasemi_edac.c b/drivers/edac/pasemi_edac.c index 7f71ee43674..9c971b57553 100644 --- a/drivers/edac/pasemi_edac.c +++ b/drivers/edac/pasemi_edac.c @@ -74,7 +74,7 @@ static int system_mmc_id; static u32 pasemi_edac_get_error_info(struct mem_ctl_info *mci) { - struct pci_dev *pdev = to_pci_dev(mci->dev); + struct pci_dev *pdev = to_pci_dev(mci->pdev); u32 tmp; pci_read_config_dword(pdev, MCDEBUG_ERRSTA, @@ -95,7 +95,7 @@ static u32 pasemi_edac_get_error_info(struct mem_ctl_info *mci) static void pasemi_edac_process_error_info(struct mem_ctl_info *mci, u32 errsta) { - struct pci_dev *pdev = to_pci_dev(mci->dev); + struct pci_dev *pdev = to_pci_dev(mci->pdev); u32 errlog1a; u32 cs; @@ -110,15 +110,16 @@ static void pasemi_edac_process_error_info(struct mem_ctl_info *mci, u32 errsta) /* uncorrectable/multi-bit errors */ if (errsta & (MCDEBUG_ERRSTA_MBE_STATUS | MCDEBUG_ERRSTA_RFL_STATUS)) { - edac_mc_handle_ue(mci, mci->csrows[cs].first_page, 0, - cs, mci->ctl_name); + edac_mc_handle_error(HW_EVENT_ERR_UNCORRECTED, mci, 1, + mci->csrows[cs]->first_page, 0, 0, + cs, 0, -1, mci->ctl_name, ""); } /* correctable/single-bit errors */ - if (errsta & MCDEBUG_ERRSTA_SBE_STATUS) { - edac_mc_handle_ce(mci, mci->csrows[cs].first_page, 0, - 0, cs, 0, mci->ctl_name); - } + if (errsta & MCDEBUG_ERRSTA_SBE_STATUS) + edac_mc_handle_error(HW_EVENT_ERR_CORRECTED, mci, 1, + mci->csrows[cs]->first_page, 0, 0, + cs, 0, -1, mci->ctl_name, ""); } static void pasemi_edac_check(struct mem_ctl_info *mci) @@ -135,11 +136,13 @@ static int pasemi_edac_init_csrows(struct mem_ctl_info *mci, enum edac_type edac_mode) { struct csrow_info *csrow; + struct dimm_info *dimm; u32 rankcfg; int index; for (index = 0; index < mci->nr_csrows; index++) { - csrow = &mci->csrows[index]; + csrow = mci->csrows[index]; + dimm = csrow->channels[0]->dimm; pci_read_config_dword(pdev, MCDRAM_RANKCFG + (index * 12), @@ -151,20 +154,20 @@ static int pasemi_edac_init_csrows(struct mem_ctl_info *mci, switch ((rankcfg & MCDRAM_RANKCFG_TYPE_SIZE_M) >> MCDRAM_RANKCFG_TYPE_SIZE_S) { case 0: - csrow->nr_pages = 128 << (20 - PAGE_SHIFT); + dimm->nr_pages = 128 << (20 - PAGE_SHIFT); break; case 1: - csrow->nr_pages = 256 << (20 - PAGE_SHIFT); + dimm->nr_pages = 256 << (20 - PAGE_SHIFT); break; case 2: case 3: - csrow->nr_pages = 512 << (20 - PAGE_SHIFT); + dimm->nr_pages = 512 << (20 - PAGE_SHIFT); break; case 4: - csrow->nr_pages = 1024 << (20 - PAGE_SHIFT); + dimm->nr_pages = 1024 << (20 - PAGE_SHIFT); break; case 5: - csrow->nr_pages = 2048 << (20 - PAGE_SHIFT); + dimm->nr_pages = 2048 << (20 - PAGE_SHIFT); break; default: edac_mc_printk(mci, KERN_ERR, @@ -174,21 +177,22 @@ static int pasemi_edac_init_csrows(struct mem_ctl_info *mci, } csrow->first_page = last_page_in_mmc; - csrow->last_page = csrow->first_page + csrow->nr_pages - 1; - last_page_in_mmc += csrow->nr_pages; + csrow->last_page = csrow->first_page + dimm->nr_pages - 1; + last_page_in_mmc += dimm->nr_pages; csrow->page_mask = 0; - csrow->grain = PASEMI_EDAC_ERROR_GRAIN; - csrow->mtype = MEM_DDR; - csrow->dtype = DEV_UNKNOWN; - csrow->edac_mode = edac_mode; + dimm->grain = PASEMI_EDAC_ERROR_GRAIN; + dimm->mtype = MEM_DDR; + dimm->dtype = DEV_UNKNOWN; + dimm->edac_mode = edac_mode; } return 0; } -static int __devinit pasemi_edac_probe(struct pci_dev *pdev, - const struct pci_device_id *ent) +static int pasemi_edac_probe(struct pci_dev *pdev, + const struct pci_device_id *ent) { struct mem_ctl_info *mci = NULL; + struct edac_mc_layer layers[2]; u32 errctl1, errcor, scrub, mcen; pci_read_config_dword(pdev, MCCFG_MCEN, &mcen); @@ -205,9 +209,14 @@ static int __devinit pasemi_edac_probe(struct pci_dev *pdev, MCDEBUG_ERRCTL1_RFL_LOG_EN; pci_write_config_dword(pdev, MCDEBUG_ERRCTL1, errctl1); - mci = edac_mc_alloc(0, PASEMI_EDAC_NR_CSROWS, PASEMI_EDAC_NR_CHANS, - system_mmc_id++); - + layers[0].type = EDAC_MC_LAYER_CHIP_SELECT; + layers[0].size = PASEMI_EDAC_NR_CSROWS; + layers[0].is_virt_csrow = true; + layers[1].type = EDAC_MC_LAYER_CHANNEL; + layers[1].size = PASEMI_EDAC_NR_CHANS; + layers[1].is_virt_csrow = false; + mci = edac_mc_alloc(system_mmc_id++, ARRAY_SIZE(layers), layers, + 0); if (mci == NULL) return -ENOMEM; @@ -216,7 +225,7 @@ static int __devinit pasemi_edac_probe(struct pci_dev *pdev, MCCFG_ERRCOR_ECC_GEN_EN | MCCFG_ERRCOR_ECC_CRR_EN; - mci->dev = &pdev->dev; + mci->pdev = &pdev->dev; mci->mtype_cap = MEM_FLAG_DDR | MEM_FLAG_RDDR; mci->edac_ctl_cap = EDAC_FLAG_NONE | EDAC_FLAG_EC | EDAC_FLAG_SECDED; mci->edac_cap = (errcor & MCCFG_ERRCOR_ECC_GEN_EN) ? @@ -257,7 +266,7 @@ fail: return -ENODEV; } -static void __devexit pasemi_edac_remove(struct pci_dev *pdev) +static void pasemi_edac_remove(struct pci_dev *pdev) { struct mem_ctl_info *mci = edac_mc_del_mc(&pdev->dev); @@ -278,7 +287,7 @@ MODULE_DEVICE_TABLE(pci, pasemi_edac_pci_tbl); static struct pci_driver pasemi_edac_driver = { .name = MODULE_NAME, .probe = pasemi_edac_probe, - .remove = __devexit_p(pasemi_edac_remove), + .remove = pasemi_edac_remove, .id_table = pasemi_edac_pci_tbl, }; diff --git a/drivers/edac/ppc4xx_edac.c b/drivers/edac/ppc4xx_edac.c index fc757069c6a..ef6b7e08f48 100644 --- a/drivers/edac/ppc4xx_edac.c +++ b/drivers/edac/ppc4xx_edac.c @@ -184,7 +184,7 @@ struct ppc4xx_ecc_status { /* Function Prototypes */ -static int ppc4xx_edac_probe(struct platform_device *device) +static int ppc4xx_edac_probe(struct platform_device *device); static int ppc4xx_edac_remove(struct platform_device *device); /* Global Variables */ @@ -727,7 +727,10 @@ ppc4xx_edac_handle_ce(struct mem_ctl_info *mci, for (row = 0; row < mci->nr_csrows; row++) if (ppc4xx_edac_check_bank_error(status, row)) - edac_mc_handle_ce_no_info(mci, message); + edac_mc_handle_error(HW_EVENT_ERR_CORRECTED, mci, 1, + 0, 0, 0, + row, 0, -1, + message, ""); } /** @@ -755,7 +758,10 @@ ppc4xx_edac_handle_ue(struct mem_ctl_info *mci, for (row = 0; row < mci->nr_csrows; row++) if (ppc4xx_edac_check_bank_error(status, row)) - edac_mc_handle_ue(mci, page, offset, row, message); + edac_mc_handle_error(HW_EVENT_ERR_UNCORRECTED, mci, 1, + page, offset, 0, + row, 0, -1, + message, ""); } /** @@ -832,8 +838,7 @@ ppc4xx_edac_isr(int irq, void *dev_id) * * Returns a device type width enumeration. */ -static enum dev_type __devinit -ppc4xx_edac_get_dtype(u32 mcopt1) +static enum dev_type ppc4xx_edac_get_dtype(u32 mcopt1) { switch (mcopt1 & SDRAM_MCOPT1_WDTH_MASK) { case SDRAM_MCOPT1_WDTH_16: @@ -856,8 +861,7 @@ ppc4xx_edac_get_dtype(u32 mcopt1) * * Returns a memory type enumeration. */ -static enum mem_type __devinit -ppc4xx_edac_get_mtype(u32 mcopt1) +static enum mem_type ppc4xx_edac_get_mtype(u32 mcopt1) { bool rden = ((mcopt1 & SDRAM_MCOPT1_RDEN_MASK) == SDRAM_MCOPT1_RDEN); @@ -887,17 +891,15 @@ ppc4xx_edac_get_mtype(u32 mcopt1) * Returns 0 if OK; otherwise, -EINVAL if the memory bank size * configuration cannot be determined. */ -static int __devinit -ppc4xx_edac_init_csrows(struct mem_ctl_info *mci, u32 mcopt1) +static int ppc4xx_edac_init_csrows(struct mem_ctl_info *mci, u32 mcopt1) { const struct ppc4xx_edac_pdata *pdata = mci->pvt_info; int status = 0; enum mem_type mtype; enum dev_type dtype; enum edac_type edac_mode; - int row; - u32 mbxcf, size; - static u32 ppc4xx_last_page; + int row, j; + u32 mbxcf, size, nr_pages; /* Establish the memory type and width */ @@ -948,7 +950,7 @@ ppc4xx_edac_init_csrows(struct mem_ctl_info *mci, u32 mcopt1) case SDRAM_MBCF_SZ_2GB: case SDRAM_MBCF_SZ_4GB: case SDRAM_MBCF_SZ_8GB: - csi->nr_pages = SDRAM_MBCF_SZ_TO_PAGES(size); + nr_pages = SDRAM_MBCF_SZ_TO_PAGES(size); break; default: ppc4xx_edac_mc_printk(KERN_ERR, mci, @@ -959,10 +961,6 @@ ppc4xx_edac_init_csrows(struct mem_ctl_info *mci, u32 mcopt1) goto done; } - csi->first_page = ppc4xx_last_page; - csi->last_page = csi->first_page + csi->nr_pages - 1; - csi->page_mask = 0; - /* * It's unclear exactly what grain should be set to * here. The SDRAM_ECCES register allows resolution of @@ -975,15 +973,17 @@ ppc4xx_edac_init_csrows(struct mem_ctl_info *mci, u32 mcopt1) * possible values would be the PLB width (16), the * page size (PAGE_SIZE) or the memory width (2 or 4). */ + for (j = 0; j < csi->nr_channels; j++) { + struct dimm_info *dimm = csi->channels[j].dimm; - csi->grain = 1; - - csi->mtype = mtype; - csi->dtype = dtype; + dimm->nr_pages = nr_pages / csi->nr_channels; + dimm->grain = 1; - csi->edac_mode = edac_mode; + dimm->mtype = mtype; + dimm->dtype = dtype; - ppc4xx_last_page += csi->nr_pages; + dimm->edac_mode = edac_mode; + } } done: @@ -1008,11 +1008,9 @@ ppc4xx_edac_init_csrows(struct mem_ctl_info *mci, u32 mcopt1) * * Returns 0 if OK; otherwise, < 0 on error. */ -static int __devinit -ppc4xx_edac_mc_init(struct mem_ctl_info *mci, - struct platform_device *op, - const dcr_host_t *dcr_host, - u32 mcopt1) +static int ppc4xx_edac_mc_init(struct mem_ctl_info *mci, + struct platform_device *op, + const dcr_host_t *dcr_host, u32 mcopt1) { int status = 0; const u32 memcheck = (mcopt1 & SDRAM_MCOPT1_MCHK_MASK); @@ -1024,9 +1022,9 @@ ppc4xx_edac_mc_init(struct mem_ctl_info *mci, /* Initial driver pointers and private data */ - mci->dev = &op->dev; + mci->pdev = &op->dev; - dev_set_drvdata(mci->dev, mci); + dev_set_drvdata(mci->pdev, mci); pdata = mci->pvt_info; @@ -1068,7 +1066,7 @@ ppc4xx_edac_mc_init(struct mem_ctl_info *mci, mci->mod_name = PPC4XX_EDAC_MODULE_NAME; mci->mod_ver = PPC4XX_EDAC_MODULE_REVISION; - mci->ctl_name = match->compatible, + mci->ctl_name = ppc4xx_edac_match->compatible, mci->dev_name = np->full_name; /* Initialize callbacks */ @@ -1102,8 +1100,8 @@ ppc4xx_edac_mc_init(struct mem_ctl_info *mci, * Returns 0 if OK; otherwise, -ENODEV if the interrupts could not be * mapped and assigned. */ -static int __devinit -ppc4xx_edac_register_irq(struct platform_device *op, struct mem_ctl_info *mci) +static int ppc4xx_edac_register_irq(struct platform_device *op, + struct mem_ctl_info *mci) { int status = 0; int ded_irq, sec_irq; @@ -1180,8 +1178,8 @@ ppc4xx_edac_register_irq(struct platform_device *op, struct mem_ctl_info *mci) * Returns 0 if the DCRs were successfully mapped; otherwise, < 0 on * error. */ -static int __devinit -ppc4xx_edac_map_dcrs(const struct device_node *np, dcr_host_t *dcr_host) +static int ppc4xx_edac_map_dcrs(const struct device_node *np, + dcr_host_t *dcr_host) { unsigned int dcr_base, dcr_len; @@ -1229,13 +1227,14 @@ ppc4xx_edac_map_dcrs(const struct device_node *np, dcr_host_t *dcr_host) * Returns 0 if the controller instance was successfully bound to the * driver; otherwise, < 0 on error. */ -static int __devinit ppc4xx_edac_probe(struct platform_device *op) +static int ppc4xx_edac_probe(struct platform_device *op) { int status = 0; u32 mcopt1, memcheck; dcr_host_t dcr_host; const struct device_node *np = op->dev.of_node; struct mem_ctl_info *mci = NULL; + struct edac_mc_layer layers[2]; static int ppc4xx_edac_instance; /* @@ -1281,12 +1280,14 @@ static int __devinit ppc4xx_edac_probe(struct platform_device *op) * controller instance and perform the appropriate * initialization. */ - - mci = edac_mc_alloc(sizeof(struct ppc4xx_edac_pdata), - ppc4xx_edac_nr_csrows, - ppc4xx_edac_nr_chans, - ppc4xx_edac_instance); - + layers[0].type = EDAC_MC_LAYER_CHIP_SELECT; + layers[0].size = ppc4xx_edac_nr_csrows; + layers[0].is_virt_csrow = true; + layers[1].type = EDAC_MC_LAYER_CHANNEL; + layers[1].size = ppc4xx_edac_nr_chans; + layers[1].is_virt_csrow = false; + mci = edac_mc_alloc(ppc4xx_edac_instance, ARRAY_SIZE(layers), layers, + sizeof(struct ppc4xx_edac_pdata)); if (mci == NULL) { ppc4xx_edac_printk(KERN_ERR, "%s: " "Failed to allocate EDAC MC instance!\n", @@ -1328,7 +1329,7 @@ static int __devinit ppc4xx_edac_probe(struct platform_device *op) return 0; fail1: - edac_mc_del_mc(mci->dev); + edac_mc_del_mc(mci->pdev); fail: edac_mc_free(mci); @@ -1362,7 +1363,7 @@ ppc4xx_edac_remove(struct platform_device *op) dcr_unmap(pdata->dcr_host, SDRAM_DCR_RESOURCE_LEN); - edac_mc_del_mc(mci->dev); + edac_mc_del_mc(mci->pdev); edac_mc_free(mci); return 0; diff --git a/drivers/edac/r82600_edac.c b/drivers/edac/r82600_edac.c index e294e1b3616..8f936bc7a01 100644 --- a/drivers/edac/r82600_edac.c +++ b/drivers/edac/r82600_edac.c @@ -140,7 +140,7 @@ static void r82600_get_error_info(struct mem_ctl_info *mci, { struct pci_dev *pdev; - pdev = to_pci_dev(mci->dev); + pdev = to_pci_dev(mci->pdev); pci_read_config_dword(pdev, R82600_EAP, &info->eapr); if (info->eapr & BIT(0)) @@ -179,10 +179,11 @@ static int r82600_process_error_info(struct mem_ctl_info *mci, error_found = 1; if (handle_errors) - edac_mc_handle_ce(mci, page, 0, /* not avail */ - syndrome, - edac_mc_find_csrow_by_page(mci, page), - 0, mci->ctl_name); + edac_mc_handle_error(HW_EVENT_ERR_CORRECTED, mci, 1, + page, 0, syndrome, + edac_mc_find_csrow_by_page(mci, page), + 0, -1, + mci->ctl_name, ""); } if (info->eapr & BIT(1)) { /* UE? */ @@ -190,9 +191,11 @@ static int r82600_process_error_info(struct mem_ctl_info *mci, if (handle_errors) /* 82600 doesn't give enough info */ - edac_mc_handle_ue(mci, page, 0, - edac_mc_find_csrow_by_page(mci, page), - mci->ctl_name); + edac_mc_handle_error(HW_EVENT_ERR_UNCORRECTED, mci, 1, + page, 0, 0, + edac_mc_find_csrow_by_page(mci, page), + 0, -1, + mci->ctl_name, ""); } return error_found; @@ -202,7 +205,7 @@ static void r82600_check(struct mem_ctl_info *mci) { struct r82600_error_info info; - debugf1("MC%d: %s()\n", mci->mc_idx, __func__); + edac_dbg(1, "MC%d\n", mci->mc_idx); r82600_get_error_info(mci, &info); r82600_process_error_info(mci, &info, 1); } @@ -216,6 +219,7 @@ static void r82600_init_csrows(struct mem_ctl_info *mci, struct pci_dev *pdev, u8 dramcr) { struct csrow_info *csrow; + struct dimm_info *dimm; int index; u8 drbar; /* SDRAM Row Boundary Address Register */ u32 row_high_limit, row_high_limit_last; @@ -226,18 +230,19 @@ static void r82600_init_csrows(struct mem_ctl_info *mci, struct pci_dev *pdev, row_high_limit_last = 0; for (index = 0; index < mci->nr_csrows; index++) { - csrow = &mci->csrows[index]; + csrow = mci->csrows[index]; + dimm = csrow->channels[0]->dimm; /* find the DRAM Chip Select Base address and mask */ pci_read_config_byte(pdev, R82600_DRBA + index, &drbar); - debugf1("%s() Row=%d DRBA = %#0x\n", __func__, index, drbar); + edac_dbg(1, "Row=%d DRBA = %#0x\n", index, drbar); row_high_limit = ((u32) drbar << 24); /* row_high_limit = ((u32)drbar << 24) | 0xffffffUL; */ - debugf1("%s() Row=%d, Boundary Address=%#0x, Last = %#0x\n", - __func__, index, row_high_limit, row_high_limit_last); + edac_dbg(1, "Row=%d, Boundary Address=%#0x, Last = %#0x\n", + index, row_high_limit, row_high_limit_last); /* Empty row [p.57] */ if (row_high_limit == row_high_limit_last) @@ -247,16 +252,17 @@ static void r82600_init_csrows(struct mem_ctl_info *mci, struct pci_dev *pdev, csrow->first_page = row_base >> PAGE_SHIFT; csrow->last_page = (row_high_limit >> PAGE_SHIFT) - 1; - csrow->nr_pages = csrow->last_page - csrow->first_page + 1; + + dimm->nr_pages = csrow->last_page - csrow->first_page + 1; /* Error address is top 19 bits - so granularity is * * 14 bits */ - csrow->grain = 1 << 14; - csrow->mtype = reg_sdram ? MEM_RDDR : MEM_DDR; + dimm->grain = 1 << 14; + dimm->mtype = reg_sdram ? MEM_RDDR : MEM_DDR; /* FIXME - check that this is unknowable with this chipset */ - csrow->dtype = DEV_UNKNOWN; + dimm->dtype = DEV_UNKNOWN; /* Mode is global on 82600 */ - csrow->edac_mode = ecc_on ? EDAC_SECDED : EDAC_NONE; + dimm->edac_mode = ecc_on ? EDAC_SECDED : EDAC_NONE; row_high_limit_last = row_high_limit; } } @@ -264,27 +270,32 @@ static void r82600_init_csrows(struct mem_ctl_info *mci, struct pci_dev *pdev, static int r82600_probe1(struct pci_dev *pdev, int dev_idx) { struct mem_ctl_info *mci; + struct edac_mc_layer layers[2]; u8 dramcr; u32 eapr; u32 scrub_disabled; u32 sdram_refresh_rate; struct r82600_error_info discard; - debugf0("%s()\n", __func__); + edac_dbg(0, "\n"); pci_read_config_byte(pdev, R82600_DRAMC, &dramcr); pci_read_config_dword(pdev, R82600_EAP, &eapr); scrub_disabled = eapr & BIT(31); sdram_refresh_rate = dramcr & (BIT(0) | BIT(1)); - debugf2("%s(): sdram refresh rate = %#0x\n", __func__, - sdram_refresh_rate); - debugf2("%s(): DRAMC register = %#0x\n", __func__, dramcr); - mci = edac_mc_alloc(0, R82600_NR_CSROWS, R82600_NR_CHANS, 0); - + edac_dbg(2, "sdram refresh rate = %#0x\n", sdram_refresh_rate); + edac_dbg(2, "DRAMC register = %#0x\n", dramcr); + layers[0].type = EDAC_MC_LAYER_CHIP_SELECT; + layers[0].size = R82600_NR_CSROWS; + layers[0].is_virt_csrow = true; + layers[1].type = EDAC_MC_LAYER_CHANNEL; + layers[1].size = R82600_NR_CHANS; + layers[1].is_virt_csrow = false; + mci = edac_mc_alloc(0, ARRAY_SIZE(layers), layers, 0); if (mci == NULL) return -ENOMEM; - debugf0("%s(): mci = %p\n", __func__, mci); - mci->dev = &pdev->dev; + edac_dbg(0, "mci = %p\n", mci); + mci->pdev = &pdev->dev; mci->mtype_cap = MEM_FLAG_RDDR | MEM_FLAG_DDR; mci->edac_ctl_cap = EDAC_FLAG_NONE | EDAC_FLAG_EC | EDAC_FLAG_SECDED; /* FIXME try to work out if the chip leads have been used for COM2 @@ -299,8 +310,8 @@ static int r82600_probe1(struct pci_dev *pdev, int dev_idx) if (ecc_enabled(dramcr)) { if (scrub_disabled) - debugf3("%s(): mci = %p - Scrubbing disabled! EAP: " - "%#0x\n", __func__, mci, eapr); + edac_dbg(3, "mci = %p - Scrubbing disabled! EAP: %#0x\n", + mci, eapr); } else mci->edac_cap = EDAC_FLAG_NONE; @@ -317,15 +328,14 @@ static int r82600_probe1(struct pci_dev *pdev, int dev_idx) * type of memory controller. The ID is therefore hardcoded to 0. */ if (edac_mc_add_mc(mci)) { - debugf3("%s(): failed edac_mc_add_mc()\n", __func__); + edac_dbg(3, "failed edac_mc_add_mc()\n"); goto fail; } /* get this far and it's successful */ if (disable_hardware_scrub) { - debugf3("%s(): Disabling Hardware Scrub (scrub on error)\n", - __func__); + edac_dbg(3, "Disabling Hardware Scrub (scrub on error)\n"); pci_write_bits32(pdev, R82600_EAP, BIT(31), BIT(31)); } @@ -340,7 +350,7 @@ static int r82600_probe1(struct pci_dev *pdev, int dev_idx) __func__); } - debugf3("%s(): success\n", __func__); + edac_dbg(3, "success\n"); return 0; fail: @@ -349,20 +359,20 @@ fail: } /* returns count (>= 0), or negative on error */ -static int __devinit r82600_init_one(struct pci_dev *pdev, - const struct pci_device_id *ent) +static int r82600_init_one(struct pci_dev *pdev, + const struct pci_device_id *ent) { - debugf0("%s()\n", __func__); + edac_dbg(0, "\n"); /* don't need to call pci_enable_device() */ return r82600_probe1(pdev, ent->driver_data); } -static void __devexit r82600_remove_one(struct pci_dev *pdev) +static void r82600_remove_one(struct pci_dev *pdev) { struct mem_ctl_info *mci; - debugf0("%s()\n", __func__); + edac_dbg(0, "\n"); if (r82600_pci) edac_pci_release_generic_ctl(r82600_pci); @@ -373,7 +383,7 @@ static void __devexit r82600_remove_one(struct pci_dev *pdev) edac_mc_free(mci); } -static const struct pci_device_id r82600_pci_tbl[] __devinitdata = { +static const struct pci_device_id r82600_pci_tbl[] = { { PCI_DEVICE(PCI_VENDOR_ID_RADISYS, R82600_BRIDGE_ID) }, @@ -387,7 +397,7 @@ MODULE_DEVICE_TABLE(pci, r82600_pci_tbl); static struct pci_driver r82600_driver = { .name = EDAC_MOD_STR, .probe = r82600_init_one, - .remove = __devexit_p(r82600_remove_one), + .remove = r82600_remove_one, .id_table = r82600_pci_tbl, }; diff --git a/drivers/edac/sb_edac.c b/drivers/edac/sb_edac.c index 1dc118d83cc..deea0dc9999 100644 --- a/drivers/edac/sb_edac.c +++ b/drivers/edac/sb_edac.c @@ -7,7 +7,7 @@ * GNU General Public License version 2 only. * * Copyright (c) 2011 by: - * Mauro Carvalho Chehab <mchehab@redhat.com> + * Mauro Carvalho Chehab */ #include <linux/module.h> @@ -20,6 +20,7 @@ #include <linux/mmzone.h> #include <linux/smp.h> #include <linux/bitmap.h> +#include <linux/math64.h> #include <asm/processor.h> #include <asm/mce.h> @@ -33,7 +34,7 @@ static int probed; /* * Alter this version for the module when modifications are made */ -#define SBRIDGE_REVISION " Ver: 1.0.0 " +#define SBRIDGE_REVISION " Ver: 1.1.0 " #define EDAC_MOD_STR "sbridge_edac" /* @@ -49,7 +50,7 @@ static int probed; * Get a bit field at register value <v>, from bit <lo> to bit <hi> */ #define GET_BITFIELD(v, lo, hi) \ - (((v) & ((1ULL << ((hi) - (lo) + 1)) - 1) << (lo)) >> (lo)) + (((v) & GENMASK_ULL(hi, lo)) >> (lo)) /* * sbridge Memory Controller Registers @@ -57,7 +58,7 @@ static int probed; /* * FIXME: For now, let's order by device function, as it makes - * easier for driver's development proccess. This table should be + * easier for driver's development process. This table should be * moved to pci_id.h when submitted upstream */ #define PCI_DEVICE_ID_INTEL_SBRIDGE_SAD0 0x3cf4 /* 12.6 */ @@ -82,11 +83,17 @@ static int probed; #define PCI_DEVICE_ID_INTEL_SBRIDGE_IMC_ERR3 0x3c77 /* 16.7 */ /* Devices 12 Function 6, Offsets 0x80 to 0xcc */ -static const u32 dram_rule[] = { +static const u32 sbridge_dram_rule[] = { 0x80, 0x88, 0x90, 0x98, 0xa0, 0xa8, 0xb0, 0xb8, 0xc0, 0xc8, }; -#define MAX_SAD ARRAY_SIZE(dram_rule) + +static const u32 ibridge_dram_rule[] = { + 0x60, 0x68, 0x70, 0x78, 0x80, + 0x88, 0x90, 0x98, 0xa0, 0xa8, + 0xb0, 0xb8, 0xc0, 0xc8, 0xd0, + 0xd8, 0xe0, 0xe8, 0xf0, 0xf8, +}; #define SAD_LIMIT(reg) ((GET_BITFIELD(reg, 6, 25) << 26) | 0x3ffffff) #define DRAM_ATTR(reg) GET_BITFIELD(reg, 2, 3) @@ -107,43 +114,50 @@ static char *get_dram_attr(u32 reg) } } -static const u32 interleave_list[] = { +static const u32 sbridge_interleave_list[] = { 0x84, 0x8c, 0x94, 0x9c, 0xa4, 0xac, 0xb4, 0xbc, 0xc4, 0xcc, }; -#define MAX_INTERLEAVE ARRAY_SIZE(interleave_list) - -#define SAD_PKG0(reg) GET_BITFIELD(reg, 0, 2) -#define SAD_PKG1(reg) GET_BITFIELD(reg, 3, 5) -#define SAD_PKG2(reg) GET_BITFIELD(reg, 8, 10) -#define SAD_PKG3(reg) GET_BITFIELD(reg, 11, 13) -#define SAD_PKG4(reg) GET_BITFIELD(reg, 16, 18) -#define SAD_PKG5(reg) GET_BITFIELD(reg, 19, 21) -#define SAD_PKG6(reg) GET_BITFIELD(reg, 24, 26) -#define SAD_PKG7(reg) GET_BITFIELD(reg, 27, 29) - -static inline int sad_pkg(u32 reg, int interleave) + +static const u32 ibridge_interleave_list[] = { + 0x64, 0x6c, 0x74, 0x7c, 0x84, + 0x8c, 0x94, 0x9c, 0xa4, 0xac, + 0xb4, 0xbc, 0xc4, 0xcc, 0xd4, + 0xdc, 0xe4, 0xec, 0xf4, 0xfc, +}; + +struct interleave_pkg { + unsigned char start; + unsigned char end; +}; + +static const struct interleave_pkg sbridge_interleave_pkg[] = { + { 0, 2 }, + { 3, 5 }, + { 8, 10 }, + { 11, 13 }, + { 16, 18 }, + { 19, 21 }, + { 24, 26 }, + { 27, 29 }, +}; + +static const struct interleave_pkg ibridge_interleave_pkg[] = { + { 0, 3 }, + { 4, 7 }, + { 8, 11 }, + { 12, 15 }, + { 16, 19 }, + { 20, 23 }, + { 24, 27 }, + { 28, 31 }, +}; + +static inline int sad_pkg(const struct interleave_pkg *table, u32 reg, + int interleave) { - switch (interleave) { - case 0: - return SAD_PKG0(reg); - case 1: - return SAD_PKG1(reg); - case 2: - return SAD_PKG2(reg); - case 3: - return SAD_PKG3(reg); - case 4: - return SAD_PKG4(reg); - case 5: - return SAD_PKG5(reg); - case 6: - return SAD_PKG6(reg); - case 7: - return SAD_PKG7(reg); - default: - return -EINVAL; - } + return GET_BITFIELD(reg, table[interleave].start, + table[interleave].end); } /* Devices 12 Function 7 */ @@ -261,7 +275,9 @@ static const u32 correrrthrsld[] = { /* Device 17, function 0 */ -#define RANK_CFG_A 0x0328 +#define SB_RANK_CFG_A 0x0328 + +#define IB_RANK_CFG_A 0x0320 #define IS_RDIMM_ENABLED(reg) GET_BITFIELD(reg, 11, 11) @@ -272,8 +288,23 @@ static const u32 correrrthrsld[] = { #define NUM_CHANNELS 4 #define MAX_DIMMS 3 /* Max DIMMS per channel */ +enum type { + SANDY_BRIDGE, + IVY_BRIDGE, +}; + +struct sbridge_pvt; struct sbridge_info { - u32 mcmtr; + enum type type; + u32 mcmtr; + u32 rankcfgr; + u64 (*get_tolm)(struct sbridge_pvt *pvt); + u64 (*get_tohm)(struct sbridge_pvt *pvt); + const u32 *dram_rule; + const u32 *interleave_list; + const struct interleave_pkg *interleave_pkg; + u8 max_sad; + u8 max_interleave; }; struct sbridge_channel { @@ -304,8 +335,9 @@ struct sbridge_dev { struct sbridge_pvt { struct pci_dev *pci_ta, *pci_ddrio, *pci_ras; - struct pci_dev *pci_sad0, *pci_sad1, *pci_ha0; - struct pci_dev *pci_br; + struct pci_dev *pci_sad0, *pci_sad1; + struct pci_dev *pci_ha0, *pci_ha1; + struct pci_dev *pci_br0, *pci_br1; struct pci_dev *pci_tad[NUM_CHANNELS]; struct sbridge_dev *sbridge_dev; @@ -313,8 +345,6 @@ struct sbridge_pvt { struct sbridge_info info; struct sbridge_channel channel[NUM_CHANNELS]; - int csrow_map[NUM_CHANNELS][MAX_DIMMS]; - /* Memory type detection */ bool is_mirrored, is_lockstep, is_close_pg; @@ -332,30 +362,31 @@ struct sbridge_pvt { u64 tolm, tohm; }; -#define PCI_DESCR(device, function, device_id) \ - .dev = (device), \ - .func = (function), \ - .dev_id = (device_id) +#define PCI_DESCR(device, function, device_id, opt) \ + .dev = (device), \ + .func = (function), \ + .dev_id = (device_id), \ + .optional = opt static const struct pci_id_descr pci_dev_descr_sbridge[] = { /* Processor Home Agent */ - { PCI_DESCR(14, 0, PCI_DEVICE_ID_INTEL_SBRIDGE_IMC_HA0) }, + { PCI_DESCR(14, 0, PCI_DEVICE_ID_INTEL_SBRIDGE_IMC_HA0, 0) }, /* Memory controller */ - { PCI_DESCR(15, 0, PCI_DEVICE_ID_INTEL_SBRIDGE_IMC_TA) }, - { PCI_DESCR(15, 1, PCI_DEVICE_ID_INTEL_SBRIDGE_IMC_RAS) }, - { PCI_DESCR(15, 2, PCI_DEVICE_ID_INTEL_SBRIDGE_IMC_TAD0) }, - { PCI_DESCR(15, 3, PCI_DEVICE_ID_INTEL_SBRIDGE_IMC_TAD1) }, - { PCI_DESCR(15, 4, PCI_DEVICE_ID_INTEL_SBRIDGE_IMC_TAD2) }, - { PCI_DESCR(15, 5, PCI_DEVICE_ID_INTEL_SBRIDGE_IMC_TAD3) }, - { PCI_DESCR(17, 0, PCI_DEVICE_ID_INTEL_SBRIDGE_IMC_DDRIO) }, + { PCI_DESCR(15, 0, PCI_DEVICE_ID_INTEL_SBRIDGE_IMC_TA, 0) }, + { PCI_DESCR(15, 1, PCI_DEVICE_ID_INTEL_SBRIDGE_IMC_RAS, 0) }, + { PCI_DESCR(15, 2, PCI_DEVICE_ID_INTEL_SBRIDGE_IMC_TAD0, 0) }, + { PCI_DESCR(15, 3, PCI_DEVICE_ID_INTEL_SBRIDGE_IMC_TAD1, 0) }, + { PCI_DESCR(15, 4, PCI_DEVICE_ID_INTEL_SBRIDGE_IMC_TAD2, 0) }, + { PCI_DESCR(15, 5, PCI_DEVICE_ID_INTEL_SBRIDGE_IMC_TAD3, 0) }, + { PCI_DESCR(17, 0, PCI_DEVICE_ID_INTEL_SBRIDGE_IMC_DDRIO, 1) }, /* System Address Decoder */ - { PCI_DESCR(12, 6, PCI_DEVICE_ID_INTEL_SBRIDGE_SAD0) }, - { PCI_DESCR(12, 7, PCI_DEVICE_ID_INTEL_SBRIDGE_SAD1) }, + { PCI_DESCR(12, 6, PCI_DEVICE_ID_INTEL_SBRIDGE_SAD0, 0) }, + { PCI_DESCR(12, 7, PCI_DEVICE_ID_INTEL_SBRIDGE_SAD1, 0) }, /* Broadcast Registers */ - { PCI_DESCR(13, 6, PCI_DEVICE_ID_INTEL_SBRIDGE_BR) }, + { PCI_DESCR(13, 6, PCI_DEVICE_ID_INTEL_SBRIDGE_BR, 0) }, }; #define PCI_ID_TABLE_ENTRY(A) { .descr=A, .n_devs = ARRAY_SIZE(A) } @@ -364,17 +395,81 @@ static const struct pci_id_table pci_dev_descr_sbridge_table[] = { {0,} /* 0 terminated list. */ }; +/* This changes depending if 1HA or 2HA: + * 1HA: + * 0x0eb8 (17.0) is DDRIO0 + * 2HA: + * 0x0ebc (17.4) is DDRIO0 + */ +#define PCI_DEVICE_ID_INTEL_IBRIDGE_IMC_1HA_DDRIO0 0x0eb8 +#define PCI_DEVICE_ID_INTEL_IBRIDGE_IMC_2HA_DDRIO0 0x0ebc + +/* pci ids */ +#define PCI_DEVICE_ID_INTEL_IBRIDGE_IMC_HA0 0x0ea0 +#define PCI_DEVICE_ID_INTEL_IBRIDGE_IMC_HA0_TA 0x0ea8 +#define PCI_DEVICE_ID_INTEL_IBRIDGE_IMC_HA0_RAS 0x0e71 +#define PCI_DEVICE_ID_INTEL_IBRIDGE_IMC_HA0_TAD0 0x0eaa +#define PCI_DEVICE_ID_INTEL_IBRIDGE_IMC_HA0_TAD1 0x0eab +#define PCI_DEVICE_ID_INTEL_IBRIDGE_IMC_HA0_TAD2 0x0eac +#define PCI_DEVICE_ID_INTEL_IBRIDGE_IMC_HA0_TAD3 0x0ead +#define PCI_DEVICE_ID_INTEL_IBRIDGE_SAD 0x0ec8 +#define PCI_DEVICE_ID_INTEL_IBRIDGE_BR0 0x0ec9 +#define PCI_DEVICE_ID_INTEL_IBRIDGE_BR1 0x0eca +#define PCI_DEVICE_ID_INTEL_IBRIDGE_IMC_HA1 0x0e60 +#define PCI_DEVICE_ID_INTEL_IBRIDGE_IMC_HA1_TA 0x0e68 +#define PCI_DEVICE_ID_INTEL_IBRIDGE_IMC_HA1_RAS 0x0e79 +#define PCI_DEVICE_ID_INTEL_IBRIDGE_IMC_HA1_TAD0 0x0e6a +#define PCI_DEVICE_ID_INTEL_IBRIDGE_IMC_HA1_TAD1 0x0e6b + +static const struct pci_id_descr pci_dev_descr_ibridge[] = { + /* Processor Home Agent */ + { PCI_DESCR(14, 0, PCI_DEVICE_ID_INTEL_IBRIDGE_IMC_HA0, 0) }, + + /* Memory controller */ + { PCI_DESCR(15, 0, PCI_DEVICE_ID_INTEL_IBRIDGE_IMC_HA0_TA, 0) }, + { PCI_DESCR(15, 1, PCI_DEVICE_ID_INTEL_IBRIDGE_IMC_HA0_RAS, 0) }, + { PCI_DESCR(15, 2, PCI_DEVICE_ID_INTEL_IBRIDGE_IMC_HA0_TAD0, 0) }, + { PCI_DESCR(15, 3, PCI_DEVICE_ID_INTEL_IBRIDGE_IMC_HA0_TAD1, 0) }, + { PCI_DESCR(15, 4, PCI_DEVICE_ID_INTEL_IBRIDGE_IMC_HA0_TAD2, 0) }, + { PCI_DESCR(15, 5, PCI_DEVICE_ID_INTEL_IBRIDGE_IMC_HA0_TAD3, 0) }, + + /* System Address Decoder */ + { PCI_DESCR(22, 0, PCI_DEVICE_ID_INTEL_IBRIDGE_SAD, 0) }, + + /* Broadcast Registers */ + { PCI_DESCR(22, 1, PCI_DEVICE_ID_INTEL_IBRIDGE_BR0, 1) }, + { PCI_DESCR(22, 2, PCI_DEVICE_ID_INTEL_IBRIDGE_BR1, 0) }, + + /* Optional, mode 2HA */ + { PCI_DESCR(28, 0, PCI_DEVICE_ID_INTEL_IBRIDGE_IMC_HA1, 1) }, +#if 0 + { PCI_DESCR(29, 0, PCI_DEVICE_ID_INTEL_IBRIDGE_IMC_HA1_TA, 1) }, + { PCI_DESCR(29, 1, PCI_DEVICE_ID_INTEL_IBRIDGE_IMC_HA1_RAS, 1) }, +#endif + { PCI_DESCR(29, 2, PCI_DEVICE_ID_INTEL_IBRIDGE_IMC_HA1_TAD0, 1) }, + { PCI_DESCR(29, 3, PCI_DEVICE_ID_INTEL_IBRIDGE_IMC_HA1_TAD1, 1) }, + + { PCI_DESCR(17, 0, PCI_DEVICE_ID_INTEL_IBRIDGE_IMC_1HA_DDRIO0, 1) }, + { PCI_DESCR(17, 4, PCI_DEVICE_ID_INTEL_IBRIDGE_IMC_2HA_DDRIO0, 1) }, +}; + +static const struct pci_id_table pci_dev_descr_ibridge_table[] = { + PCI_ID_TABLE_ENTRY(pci_dev_descr_ibridge), + {0,} /* 0 terminated list. */ +}; + /* * pci_device_id table for which devices we are looking for */ -static const struct pci_device_id sbridge_pci_tbl[] __devinitdata = { +static const struct pci_device_id sbridge_pci_tbl[] = { {PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_SBRIDGE_IMC_TA)}, + {PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IBRIDGE_IMC_HA0_TA)}, {0,} /* 0 terminated list. */ }; /**************************************************************************** - Anciliary status routines + Ancillary status routines ****************************************************************************/ static inline int numrank(u32 mtr) @@ -382,8 +477,8 @@ static inline int numrank(u32 mtr) int ranks = (1 << RANK_CNT_BITS(mtr)); if (ranks > 4) { - debugf0("Invalid number of ranks: %d (max = 4) raw value = %x (%04x)", - ranks, (unsigned int)RANK_CNT_BITS(mtr), mtr); + edac_dbg(0, "Invalid number of ranks: %d (max = 4) raw value = %x (%04x)\n", + ranks, (unsigned int)RANK_CNT_BITS(mtr), mtr); return -EINVAL; } @@ -395,8 +490,8 @@ static inline int numrow(u32 mtr) int rows = (RANK_WIDTH_BITS(mtr) + 12); if (rows < 13 || rows > 18) { - debugf0("Invalid number of rows: %d (should be between 14 and 17) raw value = %x (%04x)", - rows, (unsigned int)RANK_WIDTH_BITS(mtr), mtr); + edac_dbg(0, "Invalid number of rows: %d (should be between 14 and 17) raw value = %x (%04x)\n", + rows, (unsigned int)RANK_WIDTH_BITS(mtr), mtr); return -EINVAL; } @@ -408,8 +503,8 @@ static inline int numcol(u32 mtr) int cols = (COL_WIDTH_BITS(mtr) + 10); if (cols > 12) { - debugf0("Invalid number of cols: %d (max = 4) raw value = %x (%04x)", - cols, (unsigned int)COL_WIDTH_BITS(mtr), mtr); + edac_dbg(0, "Invalid number of cols: %d (max = 4) raw value = %x (%04x)\n", + cols, (unsigned int)COL_WIDTH_BITS(mtr), mtr); return -EINVAL; } @@ -458,6 +553,52 @@ static void free_sbridge_dev(struct sbridge_dev *sbridge_dev) kfree(sbridge_dev); } +static u64 sbridge_get_tolm(struct sbridge_pvt *pvt) +{ + u32 reg; + + /* Address range is 32:28 */ + pci_read_config_dword(pvt->pci_sad1, TOLM, ®); + return GET_TOLM(reg); +} + +static u64 sbridge_get_tohm(struct sbridge_pvt *pvt) +{ + u32 reg; + + pci_read_config_dword(pvt->pci_sad1, TOHM, ®); + return GET_TOHM(reg); +} + +static u64 ibridge_get_tolm(struct sbridge_pvt *pvt) +{ + u32 reg; + + pci_read_config_dword(pvt->pci_br1, TOLM, ®); + + return GET_TOLM(reg); +} + +static u64 ibridge_get_tohm(struct sbridge_pvt *pvt) +{ + u32 reg; + + pci_read_config_dword(pvt->pci_br1, TOHM, ®); + + return GET_TOHM(reg); +} + +static inline u8 sad_pkg_socket(u8 pkg) +{ + /* on Ivy Bridge, nodeID is SASS, where A is HA and S is node id */ + return (pkg >> 3) | (pkg & 0x3); +} + +static inline u8 sad_pkg_ha(u8 pkg) +{ + return (pkg >> 2) & 0x1; +} + /**************************************************************************** Memory check routines ****************************************************************************/ @@ -476,8 +617,8 @@ static struct pci_dev *get_pdev_slot_func(u8 bus, unsigned slot, if (PCI_SLOT(sbridge_dev->pdev[i]->devfn) == slot && PCI_FUNC(sbridge_dev->pdev[i]->devfn) == func) { - debugf1("Associated %02x.%02x.%d with %p\n", - bus, slot, func, sbridge_dev->pdev[i]); + edac_dbg(1, "Associated %02x.%02x.%d with %p\n", + bus, slot, func, sbridge_dev->pdev[i]); return sbridge_dev->pdev[i]; } } @@ -486,29 +627,14 @@ static struct pci_dev *get_pdev_slot_func(u8 bus, unsigned slot, } /** - * sbridge_get_active_channels() - gets the number of channels and csrows + * check_if_ecc_is_active() - Checks if ECC is active * bus: Device bus - * @channels: Number of channels that will be returned - * @csrows: Number of csrows found - * - * Since EDAC core needs to know in advance the number of available channels - * and csrows, in order to allocate memory for csrows/channels, it is needed - * to run two similar steps. At the first step, implemented on this function, - * it checks the number of csrows/channels present at one socket, identified - * by the associated PCI bus. - * this is used in order to properly allocate the size of mci components. - * Note: one csrow is one dimm. */ -static int sbridge_get_active_channels(const u8 bus, unsigned *channels, - unsigned *csrows) +static int check_if_ecc_is_active(const u8 bus) { struct pci_dev *pdev = NULL; - int i, j; u32 mcmtr; - *channels = 0; - *csrows = 0; - pdev = get_pdev_slot_func(bus, 15, 0); if (!pdev) { sbridge_printk(KERN_ERR, "Couldn't find PCI device " @@ -522,90 +648,70 @@ static int sbridge_get_active_channels(const u8 bus, unsigned *channels, sbridge_printk(KERN_ERR, "ECC is disabled. Aborting\n"); return -ENODEV; } - - for (i = 0; i < NUM_CHANNELS; i++) { - u32 mtr; - - /* Device 15 functions 2 - 5 */ - pdev = get_pdev_slot_func(bus, 15, 2 + i); - if (!pdev) { - sbridge_printk(KERN_ERR, "Couldn't find PCI device " - "%2x.%02d.%d!!!\n", - bus, 15, 2 + i); - return -ENODEV; - } - (*channels)++; - - for (j = 0; j < ARRAY_SIZE(mtr_regs); j++) { - pci_read_config_dword(pdev, mtr_regs[j], &mtr); - debugf1("Bus#%02x channel #%d MTR%d = %x\n", bus, i, j, mtr); - if (IS_DIMM_PRESENT(mtr)) - (*csrows)++; - } - } - - debugf0("Number of active channels: %d, number of active dimms: %d\n", - *channels, *csrows); - return 0; } -static int get_dimm_config(const struct mem_ctl_info *mci) +static int get_dimm_config(struct mem_ctl_info *mci) { struct sbridge_pvt *pvt = mci->pvt_info; - struct csrow_info *csr; - int i, j, banks, ranks, rows, cols, size, npages; - int csrow = 0; - unsigned long last_page = 0; + struct dimm_info *dimm; + unsigned i, j, banks, ranks, rows, cols, npages; + u64 size; u32 reg; enum edac_type mode; enum mem_type mtype; - pci_read_config_dword(pvt->pci_br, SAD_TARGET, ®); + pci_read_config_dword(pvt->pci_br0, SAD_TARGET, ®); pvt->sbridge_dev->source_id = SOURCE_ID(reg); - pci_read_config_dword(pvt->pci_br, SAD_CONTROL, ®); + pci_read_config_dword(pvt->pci_br0, SAD_CONTROL, ®); pvt->sbridge_dev->node_id = NODE_ID(reg); - debugf0("mc#%d: Node ID: %d, source ID: %d\n", - pvt->sbridge_dev->mc, - pvt->sbridge_dev->node_id, - pvt->sbridge_dev->source_id); + edac_dbg(0, "mc#%d: Node ID: %d, source ID: %d\n", + pvt->sbridge_dev->mc, + pvt->sbridge_dev->node_id, + pvt->sbridge_dev->source_id); pci_read_config_dword(pvt->pci_ras, RASENABLES, ®); if (IS_MIRROR_ENABLED(reg)) { - debugf0("Memory mirror is enabled\n"); + edac_dbg(0, "Memory mirror is enabled\n"); pvt->is_mirrored = true; } else { - debugf0("Memory mirror is disabled\n"); + edac_dbg(0, "Memory mirror is disabled\n"); pvt->is_mirrored = false; } pci_read_config_dword(pvt->pci_ta, MCMTR, &pvt->info.mcmtr); if (IS_LOCKSTEP_ENABLED(pvt->info.mcmtr)) { - debugf0("Lockstep is enabled\n"); + edac_dbg(0, "Lockstep is enabled\n"); mode = EDAC_S8ECD8ED; pvt->is_lockstep = true; } else { - debugf0("Lockstep is disabled\n"); + edac_dbg(0, "Lockstep is disabled\n"); mode = EDAC_S4ECD4ED; pvt->is_lockstep = false; } if (IS_CLOSE_PG(pvt->info.mcmtr)) { - debugf0("address map is on closed page mode\n"); + edac_dbg(0, "address map is on closed page mode\n"); pvt->is_close_pg = true; } else { - debugf0("address map is on open page mode\n"); + edac_dbg(0, "address map is on open page mode\n"); pvt->is_close_pg = false; } - pci_read_config_dword(pvt->pci_ta, RANK_CFG_A, ®); - if (IS_RDIMM_ENABLED(reg)) { - /* FIXME: Can also be LRDIMM */ - debugf0("Memory is registered\n"); - mtype = MEM_RDDR3; + if (pvt->pci_ddrio) { + pci_read_config_dword(pvt->pci_ddrio, pvt->info.rankcfgr, + ®); + if (IS_RDIMM_ENABLED(reg)) { + /* FIXME: Can also be LRDIMM */ + edac_dbg(0, "Memory is registered\n"); + mtype = MEM_RDDR3; + } else { + edac_dbg(0, "Memory is unregistered\n"); + mtype = MEM_DDR3; + } } else { - debugf0("Memory is unregistered\n"); - mtype = MEM_DDR3; + edac_dbg(0, "Cannot determine memory type\n"); + mtype = MEM_UNKNOWN; } /* On all supported DDR3 DIMM types, there are 8 banks available */ @@ -615,9 +721,11 @@ static int get_dimm_config(const struct mem_ctl_info *mci) u32 mtr; for (j = 0; j < ARRAY_SIZE(mtr_regs); j++) { + dimm = EDAC_DIMM_PTR(mci->layers, mci->dimms, mci->n_layers, + i, j, 0); pci_read_config_dword(pvt->pci_tad[i], mtr_regs[j], &mtr); - debugf4("Channel #%d MTR%d = %x\n", i, j, mtr); + edac_dbg(4, "Channel #%d MTR%d = %x\n", i, j, mtr); if (IS_DIMM_PRESENT(mtr)) { pvt->channel[i].dimms++; @@ -626,36 +734,22 @@ static int get_dimm_config(const struct mem_ctl_info *mci) cols = numcol(mtr); /* DDR3 has 8 I/O banks */ - size = (rows * cols * banks * ranks) >> (20 - 3); + size = ((u64)rows * cols * banks * ranks) >> (20 - 3); npages = MiB_TO_PAGES(size); - debugf0("mc#%d: channel %d, dimm %d, %d Mb (%d pages) bank: %d, rank: %d, row: %#x, col: %#x\n", - pvt->sbridge_dev->mc, i, j, - size, npages, - banks, ranks, rows, cols); - csr = &mci->csrows[csrow]; - - csr->first_page = last_page; - csr->last_page = last_page + npages - 1; - csr->page_mask = 0UL; /* Unused */ - csr->nr_pages = npages; - csr->grain = 32; - csr->csrow_idx = csrow; - csr->dtype = (banks == 8) ? DEV_X8 : DEV_X4; - csr->ce_count = 0; - csr->ue_count = 0; - csr->mtype = mtype; - csr->edac_mode = mode; - csr->nr_channels = 1; - csr->channels[0].chan_idx = i; - csr->channels[0].ce_count = 0; - pvt->csrow_map[i][j] = csrow; - snprintf(csr->channels[0].label, - sizeof(csr->channels[0].label), + edac_dbg(0, "mc#%d: channel %d, dimm %d, %Ld Mb (%d pages) bank: %d, rank: %d, row: %#x, col: %#x\n", + pvt->sbridge_dev->mc, i, j, + size, npages, + banks, ranks, rows, cols); + + dimm->nr_pages = npages; + dimm->grain = 32; + dimm->dtype = (banks == 8) ? DEV_X8 : DEV_X4; + dimm->mtype = mtype; + dimm->edac_mode = mode; + snprintf(dimm->label, sizeof(dimm->label), "CPU_SrcID#%u_Channel#%u_DIMM#%u", pvt->sbridge_dev->source_id, i, j); - last_page += npages; - csrow++; } } } @@ -670,29 +764,25 @@ static void get_memory_layout(const struct mem_ctl_info *mci) u32 reg; u64 limit, prv = 0; u64 tmp_mb; + u32 mb, kb; u32 rir_way; /* * Step 1) Get TOLM/TOHM ranges */ - /* Address range is 32:28 */ - pci_read_config_dword(pvt->pci_sad1, TOLM, - ®); - pvt->tolm = GET_TOLM(reg); + pvt->tolm = pvt->info.get_tolm(pvt); tmp_mb = (1 + pvt->tolm) >> 20; - debugf0("TOLM: %Lu.%03Lu GB (0x%016Lx)\n", - tmp_mb / 1000, tmp_mb % 1000, (u64)pvt->tolm); + mb = div_u64_rem(tmp_mb, 1000, &kb); + edac_dbg(0, "TOLM: %u.%03u GB (0x%016Lx)\n", mb, kb, (u64)pvt->tolm); /* Address range is already 45:25 */ - pci_read_config_dword(pvt->pci_sad1, TOHM, - ®); - pvt->tohm = GET_TOHM(reg); + pvt->tohm = pvt->info.get_tohm(pvt); tmp_mb = (1 + pvt->tohm) >> 20; - debugf0("TOHM: %Lu.%03Lu GB (0x%016Lx)", - tmp_mb / 1000, tmp_mb % 1000, (u64)pvt->tohm); + mb = div_u64_rem(tmp_mb, 1000, &kb); + edac_dbg(0, "TOHM: %u.%03u GB (0x%016Lx)\n", mb, kb, (u64)pvt->tohm); /* * Step 2) Get SAD range and SAD Interleave list @@ -701,9 +791,9 @@ static void get_memory_layout(const struct mem_ctl_info *mci) * algorithm bellow. */ prv = 0; - for (n_sads = 0; n_sads < MAX_SAD; n_sads++) { + for (n_sads = 0; n_sads < pvt->info.max_sad; n_sads++) { /* SAD_LIMIT Address range is 45:26 */ - pci_read_config_dword(pvt->pci_sad0, dram_rule[n_sads], + pci_read_config_dword(pvt->pci_sad0, pvt->info.dram_rule[n_sads], ®); limit = SAD_LIMIT(reg); @@ -714,24 +804,26 @@ static void get_memory_layout(const struct mem_ctl_info *mci) break; tmp_mb = (limit + 1) >> 20; - debugf0("SAD#%d %s up to %Lu.%03Lu GB (0x%016Lx) %s reg=0x%08x\n", - n_sads, - get_dram_attr(reg), - tmp_mb / 1000, tmp_mb % 1000, - ((u64)tmp_mb) << 20L, - INTERLEAVE_MODE(reg) ? "Interleave: 8:6" : "Interleave: [8:6]XOR[18:16]", - reg); + mb = div_u64_rem(tmp_mb, 1000, &kb); + edac_dbg(0, "SAD#%d %s up to %u.%03u GB (0x%016Lx) Interleave: %s reg=0x%08x\n", + n_sads, + get_dram_attr(reg), + mb, kb, + ((u64)tmp_mb) << 20L, + INTERLEAVE_MODE(reg) ? "8:6" : "[8:6]XOR[18:16]", + reg); prv = limit; - pci_read_config_dword(pvt->pci_sad0, interleave_list[n_sads], + pci_read_config_dword(pvt->pci_sad0, pvt->info.interleave_list[n_sads], ®); - sad_interl = sad_pkg(reg, 0); + sad_interl = sad_pkg(pvt->info.interleave_pkg, reg, 0); for (j = 0; j < 8; j++) { - if (j > 0 && sad_interl == sad_pkg(reg, j)) + u32 pkg = sad_pkg(pvt->info.interleave_pkg, reg, j); + if (j > 0 && sad_interl == pkg) break; - debugf0("SAD#%d, interleave #%d: %d\n", - n_sads, j, sad_pkg(reg, j)); + edac_dbg(0, "SAD#%d, interleave #%d: %d\n", + n_sads, j, pkg); } } @@ -747,17 +839,18 @@ static void get_memory_layout(const struct mem_ctl_info *mci) break; tmp_mb = (limit + 1) >> 20; - debugf0("TAD#%d: up to %Lu.%03Lu GB (0x%016Lx), socket interleave %d, memory interleave %d, TGT: %d, %d, %d, %d, reg=0x%08x\n", - n_tads, tmp_mb / 1000, tmp_mb % 1000, - ((u64)tmp_mb) << 20L, - (u32)TAD_SOCK(reg), - (u32)TAD_CH(reg), - (u32)TAD_TGT0(reg), - (u32)TAD_TGT1(reg), - (u32)TAD_TGT2(reg), - (u32)TAD_TGT3(reg), - reg); - prv = tmp_mb; + mb = div_u64_rem(tmp_mb, 1000, &kb); + edac_dbg(0, "TAD#%d: up to %u.%03u GB (0x%016Lx), socket interleave %d, memory interleave %d, TGT: %d, %d, %d, %d, reg=0x%08x\n", + n_tads, mb, kb, + ((u64)tmp_mb) << 20L, + (u32)TAD_SOCK(reg), + (u32)TAD_CH(reg), + (u32)TAD_TGT0(reg), + (u32)TAD_TGT1(reg), + (u32)TAD_TGT2(reg), + (u32)TAD_TGT3(reg), + reg); + prv = limit; } /* @@ -771,11 +864,12 @@ static void get_memory_layout(const struct mem_ctl_info *mci) tad_ch_nilv_offset[j], ®); tmp_mb = TAD_OFFSET(reg) >> 20; - debugf0("TAD CH#%d, offset #%d: %Lu.%03Lu GB (0x%016Lx), reg=0x%08x\n", - i, j, - tmp_mb / 1000, tmp_mb % 1000, - ((u64)tmp_mb) << 20L, - reg); + mb = div_u64_rem(tmp_mb, 1000, &kb); + edac_dbg(0, "TAD CH#%d, offset #%d: %u.%03u GB (0x%016Lx), reg=0x%08x\n", + i, j, + mb, kb, + ((u64)tmp_mb) << 20L, + reg); } } @@ -795,12 +889,13 @@ static void get_memory_layout(const struct mem_ctl_info *mci) tmp_mb = RIR_LIMIT(reg) >> 20; rir_way = 1 << RIR_WAY(reg); - debugf0("CH#%d RIR#%d, limit: %Lu.%03Lu GB (0x%016Lx), way: %d, reg=0x%08x\n", - i, j, - tmp_mb / 1000, tmp_mb % 1000, - ((u64)tmp_mb) << 20L, - rir_way, - reg); + mb = div_u64_rem(tmp_mb, 1000, &kb); + edac_dbg(0, "CH#%d RIR#%d, limit: %u.%03u GB (0x%016Lx), way: %d, reg=0x%08x\n", + i, j, + mb, kb, + ((u64)tmp_mb) << 20L, + rir_way, + reg); for (k = 0; k < rir_way; k++) { pci_read_config_dword(pvt->pci_tad[i], @@ -808,18 +903,19 @@ static void get_memory_layout(const struct mem_ctl_info *mci) ®); tmp_mb = RIR_OFFSET(reg) << 6; - debugf0("CH#%d RIR#%d INTL#%d, offset %Lu.%03Lu GB (0x%016Lx), tgt: %d, reg=0x%08x\n", - i, j, k, - tmp_mb / 1000, tmp_mb % 1000, - ((u64)tmp_mb) << 20L, - (u32)RIR_RNK_TGT(reg), - reg); + mb = div_u64_rem(tmp_mb, 1000, &kb); + edac_dbg(0, "CH#%d RIR#%d INTL#%d, offset %u.%03u GB (0x%016Lx), tgt: %d, reg=0x%08x\n", + i, j, k, + mb, kb, + ((u64)tmp_mb) << 20L, + (u32)RIR_RNK_TGT(reg), + reg); } } } } -struct mem_ctl_info *get_mci_for_node_id(u8 node_id) +static struct mem_ctl_info *get_mci_for_node_id(u8 node_id) { struct sbridge_dev *sbridge_dev; @@ -835,20 +931,21 @@ static int get_memory_error_data(struct mem_ctl_info *mci, u8 *socket, long *channel_mask, u8 *rank, - char *area_type) + char **area_type, char *msg) { struct mem_ctl_info *new_mci; struct sbridge_pvt *pvt = mci->pvt_info; - char msg[256]; + struct pci_dev *pci_ha; int n_rir, n_sads, n_tads, sad_way, sck_xch; int sad_interl, idx, base_ch; int interleave_mode; - unsigned sad_interleave[MAX_INTERLEAVE]; + unsigned sad_interleave[pvt->info.max_interleave]; u32 reg; - u8 ch_way,sck_way; + u8 ch_way, sck_way, pkg, sad_ha = 0; u32 tad_offset; u32 rir_way; - u64 ch_addr, offset, limit, prv = 0; + u32 mb, kb; + u64 ch_addr, offset, limit = 0, prv = 0; /* @@ -858,22 +955,20 @@ static int get_memory_error_data(struct mem_ctl_info *mci, * range (e. g. VGA addresses). It is unlikely, however, that the * memory controller would generate an error on that range. */ - if ((addr > (u64) pvt->tolm) && (addr < (1L << 32))) { + if ((addr > (u64) pvt->tolm) && (addr < (1LL << 32))) { sprintf(msg, "Error at TOLM area, on addr 0x%08Lx", addr); - edac_mc_handle_ce_no_info(mci, msg); return -EINVAL; } if (addr >= (u64)pvt->tohm) { sprintf(msg, "Error at MMIOH area, on addr 0x%016Lx", addr); - edac_mc_handle_ce_no_info(mci, msg); return -EINVAL; } /* * Step 1) Get socket */ - for (n_sads = 0; n_sads < MAX_SAD; n_sads++) { - pci_read_config_dword(pvt->pci_sad0, dram_rule[n_sads], + for (n_sads = 0; n_sads < pvt->info.max_sad; n_sads++) { + pci_read_config_dword(pvt->pci_sad0, pvt->info.dram_rule[n_sads], ®); if (!DRAM_RULE_ENABLE(reg)) @@ -882,62 +977,71 @@ static int get_memory_error_data(struct mem_ctl_info *mci, limit = SAD_LIMIT(reg); if (limit <= prv) { sprintf(msg, "Can't discover the memory socket"); - edac_mc_handle_ce_no_info(mci, msg); return -EINVAL; } if (addr <= limit) break; prv = limit; } - if (n_sads == MAX_SAD) { + if (n_sads == pvt->info.max_sad) { sprintf(msg, "Can't discover the memory socket"); - edac_mc_handle_ce_no_info(mci, msg); return -EINVAL; } - area_type = get_dram_attr(reg); + *area_type = get_dram_attr(reg); interleave_mode = INTERLEAVE_MODE(reg); - pci_read_config_dword(pvt->pci_sad0, interleave_list[n_sads], + pci_read_config_dword(pvt->pci_sad0, pvt->info.interleave_list[n_sads], ®); - sad_interl = sad_pkg(reg, 0); - for (sad_way = 0; sad_way < 8; sad_way++) { - if (sad_way > 0 && sad_interl == sad_pkg(reg, sad_way)) + + if (pvt->info.type == SANDY_BRIDGE) { + sad_interl = sad_pkg(pvt->info.interleave_pkg, reg, 0); + for (sad_way = 0; sad_way < 8; sad_way++) { + u32 pkg = sad_pkg(pvt->info.interleave_pkg, reg, sad_way); + if (sad_way > 0 && sad_interl == pkg) + break; + sad_interleave[sad_way] = pkg; + edac_dbg(0, "SAD interleave #%d: %d\n", + sad_way, sad_interleave[sad_way]); + } + edac_dbg(0, "mc#%d: Error detected on SAD#%d: address 0x%016Lx < 0x%016Lx, Interleave [%d:6]%s\n", + pvt->sbridge_dev->mc, + n_sads, + addr, + limit, + sad_way + 7, + !interleave_mode ? "" : "XOR[18:16]"); + if (interleave_mode) + idx = ((addr >> 6) ^ (addr >> 16)) & 7; + else + idx = (addr >> 6) & 7; + switch (sad_way) { + case 1: + idx = 0; break; - sad_interleave[sad_way] = sad_pkg(reg, sad_way); - debugf0("SAD interleave #%d: %d\n", - sad_way, sad_interleave[sad_way]); - } - debugf0("mc#%d: Error detected on SAD#%d: address 0x%016Lx < 0x%016Lx, Interleave [%d:6]%s\n", - pvt->sbridge_dev->mc, - n_sads, - addr, - limit, - sad_way + 7, - INTERLEAVE_MODE(reg) ? "" : "XOR[18:16]"); - if (interleave_mode) - idx = ((addr >> 6) ^ (addr >> 16)) & 7; - else + case 2: + idx = idx & 1; + break; + case 4: + idx = idx & 3; + break; + case 8: + break; + default: + sprintf(msg, "Can't discover socket interleave"); + return -EINVAL; + } + *socket = sad_interleave[idx]; + edac_dbg(0, "SAD interleave index: %d (wayness %d) = CPU socket %d\n", + idx, sad_way, *socket); + } else { + /* Ivy Bridge's SAD mode doesn't support XOR interleave mode */ idx = (addr >> 6) & 7; - switch (sad_way) { - case 1: - idx = 0; - break; - case 2: - idx = idx & 1; - break; - case 4: - idx = idx & 3; - break; - case 8: - break; - default: - sprintf(msg, "Can't discover socket interleave"); - edac_mc_handle_ce_no_info(mci, msg); - return -EINVAL; + pkg = sad_pkg(pvt->info.interleave_pkg, reg, idx); + *socket = sad_pkg_socket(pkg); + sad_ha = sad_pkg_ha(pkg); + edac_dbg(0, "SAD interleave package: %d = CPU socket %d, HA %d\n", + idx, *socket, sad_ha); } - *socket = sad_interleave[idx]; - debugf0("SAD interleave index: %d (wayness %d) = CPU socket %d\n", - idx, sad_way, *socket); /* * Move to the proper node structure, in order to access the @@ -947,7 +1051,6 @@ static int get_memory_error_data(struct mem_ctl_info *mci, if (!new_mci) { sprintf(msg, "Struct for socket #%u wasn't initialized", *socket); - edac_mc_handle_ce_no_info(mci, msg); return -EINVAL; } mci = new_mci; @@ -957,27 +1060,32 @@ static int get_memory_error_data(struct mem_ctl_info *mci, * Step 2) Get memory channel */ prv = 0; + if (pvt->info.type == SANDY_BRIDGE) + pci_ha = pvt->pci_ha0; + else { + if (sad_ha) + pci_ha = pvt->pci_ha1; + else + pci_ha = pvt->pci_ha0; + } for (n_tads = 0; n_tads < MAX_TAD; n_tads++) { - pci_read_config_dword(pvt->pci_ha0, tad_dram_rule[n_tads], - ®); + pci_read_config_dword(pci_ha, tad_dram_rule[n_tads], ®); limit = TAD_LIMIT(reg); if (limit <= prv) { sprintf(msg, "Can't discover the memory channel"); - edac_mc_handle_ce_no_info(mci, msg); return -EINVAL; } if (addr <= limit) break; prv = limit; } + if (n_tads == MAX_TAD) { + sprintf(msg, "Can't discover the memory channel"); + return -EINVAL; + } + ch_way = TAD_CH(reg) + 1; sck_way = TAD_SOCK(reg) + 1; - /* - * FIXME: Is it right to always use channel 0 for offsets? - */ - pci_read_config_dword(pvt->pci_tad[0], - tad_ch_nilv_offset[n_tads], - &tad_offset); if (ch_way == 3) idx = addr >> 6; @@ -1003,11 +1111,14 @@ static int get_memory_error_data(struct mem_ctl_info *mci, break; default: sprintf(msg, "Can't discover the TAD target"); - edac_mc_handle_ce_no_info(mci, msg); return -EINVAL; } *channel_mask = 1 << base_ch; + pci_read_config_dword(pvt->pci_tad[base_ch], + tad_ch_nilv_offset[n_tads], + &tad_offset); + if (pvt->is_mirrored) { *channel_mask |= 1 << ((base_ch + 2) % 4); switch(ch_way) { @@ -1017,7 +1128,6 @@ static int get_memory_error_data(struct mem_ctl_info *mci, break; default: sprintf(msg, "Invalid mirror set. Can't decode addr"); - edac_mc_handle_ce_no_info(mci, msg); return -EINVAL; } } else @@ -1028,16 +1138,16 @@ static int get_memory_error_data(struct mem_ctl_info *mci, offset = TAD_OFFSET(tad_offset); - debugf0("TAD#%d: address 0x%016Lx < 0x%016Lx, socket interleave %d, channel interleave %d (offset 0x%08Lx), index %d, base ch: %d, ch mask: 0x%02lx\n", - n_tads, - addr, - limit, - (u32)TAD_SOCK(reg), - ch_way, - offset, - idx, - base_ch, - *channel_mask); + edac_dbg(0, "TAD#%d: address 0x%016Lx < 0x%016Lx, socket interleave %d, channel interleave %d (offset 0x%08Lx), index %d, base ch: %d, ch mask: 0x%02lx\n", + n_tads, + addr, + limit, + (u32)TAD_SOCK(reg), + ch_way, + offset, + idx, + base_ch, + *channel_mask); /* Calculate channel address */ /* Remove the TAD offset */ @@ -1045,7 +1155,6 @@ static int get_memory_error_data(struct mem_ctl_info *mci, if (offset > addr) { sprintf(msg, "Can't calculate ch addr: TAD offset 0x%08Lx is too high for addr 0x%08Lx!", offset, addr); - edac_mc_handle_ce_no_info(mci, msg); return -EINVAL; } addr -= offset; @@ -1053,7 +1162,7 @@ static int get_memory_error_data(struct mem_ctl_info *mci, ch_addr = addr & 0x7f; /* Remove socket wayness and remove 6 bits */ addr >>= 6; - addr /= sck_xch; + addr = div_u64(addr, sck_xch); #if 0 /* Divide by channel way */ addr = addr / ch_way; @@ -1073,19 +1182,18 @@ static int get_memory_error_data(struct mem_ctl_info *mci, continue; limit = RIR_LIMIT(reg); - - debugf0("RIR#%d, limit: %Lu.%03Lu GB (0x%016Lx), way: %d\n", - n_rir, - (limit >> 20) / 1000, (limit >> 20) % 1000, - limit, - 1 << RIR_WAY(reg)); + mb = div_u64_rem(limit >> 20, 1000, &kb); + edac_dbg(0, "RIR#%d, limit: %u.%03u GB (0x%016Lx), way: %d\n", + n_rir, + mb, kb, + limit, + 1 << RIR_WAY(reg)); if (ch_addr <= limit) break; } if (n_rir == MAX_RIR_RANGES) { sprintf(msg, "Can't discover the memory rank for ch addr 0x%08Lx", ch_addr); - edac_mc_handle_ce_no_info(mci, msg); return -EINVAL; } rir_way = RIR_WAY(reg); @@ -1100,12 +1208,12 @@ static int get_memory_error_data(struct mem_ctl_info *mci, ®); *rank = RIR_RNK_TGT(reg); - debugf0("RIR#%d: channel address 0x%08Lx < 0x%08Lx, RIR interleave %d, index %d\n", - n_rir, - ch_addr, - limit, - rir_way, - idx); + edac_dbg(0, "RIR#%d: channel address 0x%08Lx < 0x%08Lx, RIR interleave %d, index %d\n", + n_rir, + ch_addr, + limit, + rir_way, + idx); return 0; } @@ -1122,14 +1230,14 @@ static void sbridge_put_devices(struct sbridge_dev *sbridge_dev) { int i; - debugf0(__FILE__ ": %s()\n", __func__); + edac_dbg(0, "\n"); for (i = 0; i < sbridge_dev->n_devs; i++) { struct pci_dev *pdev = sbridge_dev->pdev[i]; if (!pdev) continue; - debugf0("Removing dev %02x:%02x.%d\n", - pdev->bus->number, - PCI_SLOT(pdev->devfn), PCI_FUNC(pdev->devfn)); + edac_dbg(0, "Removing dev %02x:%02x.%d\n", + pdev->bus->number, + PCI_SLOT(pdev->devfn), PCI_FUNC(pdev->devfn)); pci_dev_put(pdev); } } @@ -1144,12 +1252,6 @@ static void sbridge_put_all_devices(void) } } -/* - * sbridge_get_all_devices Find and perform 'get' operation on the MCH's - * device/functions we want to reference for this driver - * - * Need to 'get' device 16 func 1 and func 2 - */ static int sbridge_get_onedevice(struct pci_dev **prev, u8 *num_mc, const struct pci_id_table *table, @@ -1161,7 +1263,7 @@ static int sbridge_get_onedevice(struct pci_dev **prev, struct pci_dev *pdev = NULL; u8 bus = 0; - sbridge_printk(KERN_INFO, + sbridge_printk(KERN_DEBUG, "Seeking for: dev %02x.%d PCI ID %04x:%04x\n", dev_descr->dev, dev_descr->func, PCI_VENDOR_ID_INTEL, dev_descr->dev_id); @@ -1235,10 +1337,9 @@ static int sbridge_get_onedevice(struct pci_dev **prev, return -ENODEV; } - debugf0("Detected dev %02x:%d.%d PCI ID %04x:%04x\n", - bus, dev_descr->dev, - dev_descr->func, - PCI_VENDOR_ID_INTEL, dev_descr->dev_id); + edac_dbg(0, "Detected dev %02x:%d.%d PCI ID %04x:%04x\n", + bus, dev_descr->dev, dev_descr->func, + PCI_VENDOR_ID_INTEL, dev_descr->dev_id); /* * As stated on drivers/pci/search.c, the reference count for @@ -1252,11 +1353,21 @@ static int sbridge_get_onedevice(struct pci_dev **prev, return 0; } -static int sbridge_get_all_devices(u8 *num_mc) +/* + * sbridge_get_all_devices - Find and perform 'get' operation on the MCH's + * device/functions we want to reference for this driver. + * Need to 'get' device 16 func 1 and func 2. + * @num_mc: pointer to the memory controllers count, to be incremented in case + * of success. + * @table: model specific table + * + * returns 0 in case of success or error code + */ +static int sbridge_get_all_devices(u8 *num_mc, + const struct pci_id_table *table) { int i, rc; struct pci_dev *pdev = NULL; - const struct pci_id_table *table = pci_dev_descr_sbridge_table; while (table && table->descr) { for (i = 0; i < table->n_devs; i++) { @@ -1280,8 +1391,8 @@ static int sbridge_get_all_devices(u8 *num_mc) return 0; } -static int mci_bind_devs(struct mem_ctl_info *mci, - struct sbridge_dev *sbridge_dev) +static int sbridge_mci_bind_devs(struct mem_ctl_info *mci, + struct sbridge_dev *sbridge_dev) { struct sbridge_pvt *pvt = mci->pvt_info; struct pci_dev *pdev; @@ -1309,7 +1420,7 @@ static int mci_bind_devs(struct mem_ctl_info *mci, case 13: switch (func) { case 6: - pvt->pci_br = pdev; + pvt->pci_br0 = pdev; break; default: goto error; @@ -1355,16 +1466,15 @@ static int mci_bind_devs(struct mem_ctl_info *mci, goto error; } - debugf0("Associated PCI %02x.%02d.%d with dev = %p\n", - sbridge_dev->bus, - PCI_SLOT(pdev->devfn), PCI_FUNC(pdev->devfn), - pdev); + edac_dbg(0, "Associated PCI %02x.%02d.%d with dev = %p\n", + sbridge_dev->bus, + PCI_SLOT(pdev->devfn), PCI_FUNC(pdev->devfn), + pdev); } /* Check if everything were registered */ if (!pvt->pci_sad0 || !pvt->pci_sad1 || !pvt->pci_ha0 || - !pvt-> pci_tad || !pvt->pci_ras || !pvt->pci_ta || - !pvt->pci_ddrio) + !pvt-> pci_tad || !pvt->pci_ras || !pvt->pci_ta) goto enodev; for (i = 0; i < NUM_CHANNELS; i++) { @@ -1384,6 +1494,131 @@ error: return -EINVAL; } +static int ibridge_mci_bind_devs(struct mem_ctl_info *mci, + struct sbridge_dev *sbridge_dev) +{ + struct sbridge_pvt *pvt = mci->pvt_info; + struct pci_dev *pdev, *tmp; + int i, func, slot; + bool mode_2ha = false; + + tmp = pci_get_device(PCI_VENDOR_ID_INTEL, + PCI_DEVICE_ID_INTEL_IBRIDGE_IMC_HA1, NULL); + if (tmp) { + mode_2ha = true; + pci_dev_put(tmp); + } + + for (i = 0; i < sbridge_dev->n_devs; i++) { + pdev = sbridge_dev->pdev[i]; + if (!pdev) + continue; + slot = PCI_SLOT(pdev->devfn); + func = PCI_FUNC(pdev->devfn); + + switch (slot) { + case 14: + if (func == 0) { + pvt->pci_ha0 = pdev; + break; + } + goto error; + case 15: + switch (func) { + case 0: + pvt->pci_ta = pdev; + break; + case 1: + pvt->pci_ras = pdev; + break; + case 4: + case 5: + /* if we have 2 HAs active, channels 2 and 3 + * are in other device */ + if (mode_2ha) + break; + /* fall through */ + case 2: + case 3: + pvt->pci_tad[func - 2] = pdev; + break; + default: + goto error; + } + break; + case 17: + if (func == 4) { + pvt->pci_ddrio = pdev; + break; + } else if (func == 0) { + if (!mode_2ha) + pvt->pci_ddrio = pdev; + break; + } + goto error; + case 22: + switch (func) { + case 0: + pvt->pci_sad0 = pdev; + break; + case 1: + pvt->pci_br0 = pdev; + break; + case 2: + pvt->pci_br1 = pdev; + break; + default: + goto error; + } + break; + case 28: + if (func == 0) { + pvt->pci_ha1 = pdev; + break; + } + goto error; + case 29: + /* we shouldn't have this device if we have just one + * HA present */ + WARN_ON(!mode_2ha); + if (func == 2 || func == 3) { + pvt->pci_tad[func] = pdev; + break; + } + goto error; + default: + goto error; + } + + edac_dbg(0, "Associated PCI %02x.%02d.%d with dev = %p\n", + sbridge_dev->bus, + PCI_SLOT(pdev->devfn), PCI_FUNC(pdev->devfn), + pdev); + } + + /* Check if everything were registered */ + if (!pvt->pci_sad0 || !pvt->pci_ha0 || !pvt->pci_br0 || + !pvt->pci_br1 || !pvt->pci_tad || !pvt->pci_ras || + !pvt->pci_ta) + goto enodev; + + for (i = 0; i < NUM_CHANNELS; i++) { + if (!pvt->pci_tad[i]) + goto enodev; + } + return 0; + +enodev: + sbridge_printk(KERN_ERR, "Some needed devices are missing\n"); + return -ENODEV; + +error: + sbridge_printk(KERN_ERR, + "Device %d, function %d is out of the expected range\n", + slot, func); + return -EINVAL; +} + /**************************************************************************** Error check routines ****************************************************************************/ @@ -1399,11 +1634,12 @@ static void sbridge_mce_output_error(struct mem_ctl_info *mci, { struct mem_ctl_info *new_mci; struct sbridge_pvt *pvt = mci->pvt_info; - char *type, *optype, *msg, *recoverable_msg; + enum hw_event_mc_err_type tp_event; + char *type, *optype, msg[256]; bool ripv = GET_BITFIELD(m->mcgstatus, 0, 0); bool overflow = GET_BITFIELD(m->status, 62, 62); bool uncorrected_error = GET_BITFIELD(m->status, 61, 61); - bool recoverable = GET_BITFIELD(m->status, 56, 56); + bool recoverable; u32 core_err_cnt = GET_BITFIELD(m->status, 38, 52); u32 mscod = GET_BITFIELD(m->status, 16, 31); u32 errcode = GET_BITFIELD(m->status, 0, 15); @@ -1411,16 +1647,29 @@ static void sbridge_mce_output_error(struct mem_ctl_info *mci, u32 optypenum = GET_BITFIELD(m->status, 4, 6); long channel_mask, first_channel; u8 rank, socket; - int csrow, rc, dimm; - char *area_type = "Unknown"; + int rc, dimm; + char *area_type = NULL; - if (ripv) - type = "NON_FATAL"; + if (pvt->info.type == IVY_BRIDGE) + recoverable = true; else - type = "FATAL"; + recoverable = GET_BITFIELD(m->status, 56, 56); + + if (uncorrected_error) { + if (ripv) { + type = "FATAL"; + tp_event = HW_EVENT_ERR_FATAL; + } else { + type = "NON_FATAL"; + tp_event = HW_EVENT_ERR_UNCORRECTED; + } + } else { + type = "CORRECTED"; + tp_event = HW_EVENT_ERR_CORRECTED; + } /* - * According with Table 15-9 of the Intel Archictecture spec vol 3A, + * According with Table 15-9 of the Intel Architecture spec vol 3A, * memory errors should fit in this mask: * 000f 0000 1mmm cccc (binary) * where: @@ -1435,19 +1684,19 @@ static void sbridge_mce_output_error(struct mem_ctl_info *mci, } else { switch (optypenum) { case 0: - optype = "generic undef request"; + optype = "generic undef request error"; break; case 1: - optype = "memory read"; + optype = "memory read error"; break; case 2: - optype = "memory write"; + optype = "memory write error"; break; case 3: - optype = "addr/cmd"; + optype = "addr/cmd error"; break; case 4: - optype = "memory scrubbing"; + optype = "memory scrubbing error"; break; default: optype = "reserved"; @@ -1455,14 +1704,18 @@ static void sbridge_mce_output_error(struct mem_ctl_info *mci, } } + /* Only decode errors with an valid address (ADDRV) */ + if (!GET_BITFIELD(m->status, 58, 58)) + return; + rc = get_memory_error_data(mci, m->addr, &socket, - &channel_mask, &rank, area_type); + &channel_mask, &rank, &area_type, msg); if (rc < 0) - return; + goto err_parsing; new_mci = get_mci_for_node_id(socket); if (!new_mci) { - edac_mc_handle_ce_no_info(mci, "Error: socket got corrupted!"); - return; + strcpy(msg, "Error: socket got corrupted!"); + goto err_parsing; } mci = new_mci; pvt = mci->pvt_info; @@ -1476,45 +1729,38 @@ static void sbridge_mce_output_error(struct mem_ctl_info *mci, else dimm = 2; - csrow = pvt->csrow_map[first_channel][dimm]; - - if (uncorrected_error && recoverable) - recoverable_msg = " recoverable"; - else - recoverable_msg = ""; /* - * FIXME: What should we do with "channel" information on mcelog? - * Probably, we can just discard it, as the channel information - * comes from the get_memory_error_data() address decoding + * FIXME: On some memory configurations (mirror, lockstep), the + * Memory Controller can't point the error to a single DIMM. The + * EDAC core should be handling the channel mask, in order to point + * to the group of dimm's where the error may be happening. */ - msg = kasprintf(GFP_ATOMIC, - "%d %s error(s): %s on %s area %s%s: cpu=%d Err=%04x:%04x (ch=%d), " - "addr = 0x%08llx => socket=%d, Channel=%ld(mask=%ld), rank=%d\n", - core_err_cnt, - area_type, - optype, - type, - recoverable_msg, - overflow ? "OVERFLOW" : "", - m->cpu, - mscod, errcode, - channel, /* 1111b means not specified */ - (long long) m->addr, - socket, - first_channel, /* This is the real channel on SB */ - channel_mask, - rank); - - debugf0("%s", msg); + snprintf(msg, sizeof(msg), + "%s%s area:%s err_code:%04x:%04x socket:%d channel_mask:%ld rank:%d", + overflow ? " OVERFLOW" : "", + (uncorrected_error && recoverable) ? " recoverable" : "", + area_type, + mscod, errcode, + socket, + channel_mask, + rank); + + edac_dbg(0, "%s\n", msg); + + /* FIXME: need support for channel mask */ /* Call the helper to output message */ - if (uncorrected_error) - edac_mc_handle_fbd_ue(mci, csrow, 0, 0, msg); - else - edac_mc_handle_fbd_ce(mci, csrow, 0, msg); + edac_mc_handle_error(tp_event, mci, core_err_cnt, + m->addr >> PAGE_SHIFT, m->addr & ~PAGE_MASK, 0, + channel, dimm, -1, + optype, msg); + return; +err_parsing: + edac_mc_handle_error(tp_event, mci, core_err_cnt, 0, 0, 0, + -1, -1, -1, + msg, ""); - kfree(msg); } /* @@ -1582,6 +1828,10 @@ static int sbridge_mce_check_error(struct notifier_block *nb, unsigned long val, struct mce *mce = (struct mce *)data; struct mem_ctl_info *mci; struct sbridge_pvt *pvt; + char *type; + + if (get_edac_report_status() == EDAC_REPORTING_DISABLED) + return NOTIFY_DONE; mci = get_mci_for_node_id(mce->socketid); if (!mci) @@ -1597,17 +1847,23 @@ static int sbridge_mce_check_error(struct notifier_block *nb, unsigned long val, if ((mce->status & 0xefff) >> 7 != 1) return NOTIFY_DONE; - printk("sbridge: HANDLING MCE MEMORY ERROR\n"); + if (mce->mcgstatus & MCG_STATUS_MCIP) + type = "Exception"; + else + type = "Event"; + + sbridge_mc_printk(mci, KERN_DEBUG, "HANDLING MCE MEMORY ERROR\n"); - printk("CPU %d: Machine Check Exception: %Lx Bank %d: %016Lx\n", - mce->extcpu, mce->mcgstatus, mce->bank, mce->status); - printk("TSC %llx ", mce->tsc); - printk("ADDR %llx ", mce->addr); - printk("MISC %llx ", mce->misc); + sbridge_mc_printk(mci, KERN_DEBUG, "CPU %d: Machine Check %s: %Lx " + "Bank %d: %016Lx\n", mce->extcpu, type, + mce->mcgstatus, mce->bank, mce->status); + sbridge_mc_printk(mci, KERN_DEBUG, "TSC %llx ", mce->tsc); + sbridge_mc_printk(mci, KERN_DEBUG, "ADDR %llx ", mce->addr); + sbridge_mc_printk(mci, KERN_DEBUG, "MISC %llx ", mce->misc); - printk("PROCESSOR %u:%x TIME %llu SOCKET %u APIC %x\n", - mce->cpuvendor, mce->cpuid, mce->time, - mce->socketid, mce->apicid); + sbridge_mc_printk(mci, KERN_DEBUG, "PROCESSOR %u:%x TIME %llu SOCKET " + "%u APIC %x\n", mce->cpuvendor, mce->cpuid, + mce->time, mce->socketid, mce->apicid); /* Only handle if it is the right mc controller */ if (cpu_data(mce->cpu).phys_proc_id != pvt->sbridge_dev->mc) @@ -1647,8 +1903,7 @@ static void sbridge_unregister_mci(struct sbridge_dev *sbridge_dev) struct sbridge_pvt *pvt; if (unlikely(!mci || !mci->pvt_info)) { - debugf0("MC: " __FILE__ ": %s(): dev = %p\n", - __func__, &sbridge_dev->pdev[0]->dev); + edac_dbg(0, "MC: dev = %p\n", &sbridge_dev->pdev[0]->dev); sbridge_printk(KERN_ERR, "Couldn't find mci handler\n"); return; @@ -1656,38 +1911,46 @@ static void sbridge_unregister_mci(struct sbridge_dev *sbridge_dev) pvt = mci->pvt_info; - debugf0("MC: " __FILE__ ": %s(): mci = %p, dev = %p\n", - __func__, mci, &sbridge_dev->pdev[0]->dev); - - mce_unregister_decode_chain(&sbridge_mce_dec); + edac_dbg(0, "MC: mci = %p, dev = %p\n", + mci, &sbridge_dev->pdev[0]->dev); /* Remove MC sysfs nodes */ - edac_mc_del_mc(mci->dev); + edac_mc_del_mc(mci->pdev); - debugf1("%s: free mci struct\n", mci->ctl_name); + edac_dbg(1, "%s: free mci struct\n", mci->ctl_name); kfree(mci->ctl_name); edac_mc_free(mci); sbridge_dev->mci = NULL; } -static int sbridge_register_mci(struct sbridge_dev *sbridge_dev) +static int sbridge_register_mci(struct sbridge_dev *sbridge_dev, enum type type) { struct mem_ctl_info *mci; + struct edac_mc_layer layers[2]; struct sbridge_pvt *pvt; - int rc, channels, csrows; + struct pci_dev *pdev = sbridge_dev->pdev[0]; + int rc; /* Check the number of active and not disabled channels */ - rc = sbridge_get_active_channels(sbridge_dev->bus, &channels, &csrows); + rc = check_if_ecc_is_active(sbridge_dev->bus); if (unlikely(rc < 0)) return rc; /* allocate a new MC control structure */ - mci = edac_mc_alloc(sizeof(*pvt), csrows, channels, sbridge_dev->mc); + layers[0].type = EDAC_MC_LAYER_CHANNEL; + layers[0].size = NUM_CHANNELS; + layers[0].is_virt_csrow = false; + layers[1].type = EDAC_MC_LAYER_SLOT; + layers[1].size = MAX_DIMMS; + layers[1].is_virt_csrow = true; + mci = edac_mc_alloc(sbridge_dev->mc, ARRAY_SIZE(layers), layers, + sizeof(*pvt)); + if (unlikely(!mci)) return -ENOMEM; - debugf0("MC: " __FILE__ ": %s(): mci = %p, dev = %p\n", - __func__, mci, &sbridge_dev->pdev[0]->dev); + edac_dbg(0, "MC: mci = %p, dev = %p\n", + mci, &pdev->dev); pvt = mci->pvt_info; memset(pvt, 0, sizeof(*pvt)); @@ -1701,34 +1964,60 @@ static int sbridge_register_mci(struct sbridge_dev *sbridge_dev) mci->edac_cap = EDAC_FLAG_NONE; mci->mod_name = "sbridge_edac.c"; mci->mod_ver = SBRIDGE_REVISION; - mci->ctl_name = kasprintf(GFP_KERNEL, "Sandy Bridge Socket#%d", mci->mc_idx); - mci->dev_name = pci_name(sbridge_dev->pdev[0]); + mci->dev_name = pci_name(pdev); mci->ctl_page_to_phys = NULL; /* Set the function pointer to an actual operation function */ mci->edac_check = sbridge_check_error; - /* Store pci devices at mci for faster access */ - rc = mci_bind_devs(mci, sbridge_dev); - if (unlikely(rc < 0)) - goto fail0; + pvt->info.type = type; + if (type == IVY_BRIDGE) { + pvt->info.rankcfgr = IB_RANK_CFG_A; + pvt->info.get_tolm = ibridge_get_tolm; + pvt->info.get_tohm = ibridge_get_tohm; + pvt->info.dram_rule = ibridge_dram_rule; + pvt->info.max_sad = ARRAY_SIZE(ibridge_dram_rule); + pvt->info.interleave_list = ibridge_interleave_list; + pvt->info.max_interleave = ARRAY_SIZE(ibridge_interleave_list); + pvt->info.interleave_pkg = ibridge_interleave_pkg; + mci->ctl_name = kasprintf(GFP_KERNEL, "Ivy Bridge Socket#%d", mci->mc_idx); + + /* Store pci devices at mci for faster access */ + rc = ibridge_mci_bind_devs(mci, sbridge_dev); + if (unlikely(rc < 0)) + goto fail0; + } else { + pvt->info.rankcfgr = SB_RANK_CFG_A; + pvt->info.get_tolm = sbridge_get_tolm; + pvt->info.get_tohm = sbridge_get_tohm; + pvt->info.dram_rule = sbridge_dram_rule; + pvt->info.max_sad = ARRAY_SIZE(sbridge_dram_rule); + pvt->info.interleave_list = sbridge_interleave_list; + pvt->info.max_interleave = ARRAY_SIZE(sbridge_interleave_list); + pvt->info.interleave_pkg = sbridge_interleave_pkg; + mci->ctl_name = kasprintf(GFP_KERNEL, "Sandy Bridge Socket#%d", mci->mc_idx); + + /* Store pci devices at mci for faster access */ + rc = sbridge_mci_bind_devs(mci, sbridge_dev); + if (unlikely(rc < 0)) + goto fail0; + } + /* Get dimm basic config and the memory layout */ get_dimm_config(mci); get_memory_layout(mci); /* record ptr to the generic device */ - mci->dev = &sbridge_dev->pdev[0]->dev; + mci->pdev = &pdev->dev; /* add this new MC control structure to EDAC's list of MCs */ if (unlikely(edac_mc_add_mc(mci))) { - debugf0("MC: " __FILE__ - ": %s(): failed edac_mc_add_mc()\n", __func__); + edac_dbg(0, "MC: failed edac_mc_add_mc()\n"); rc = -EINVAL; goto fail0; } - mce_register_decode_chain(&sbridge_mce_dec); return 0; fail0: @@ -1746,12 +2035,12 @@ fail0: * < 0 for error code */ -static int __devinit sbridge_probe(struct pci_dev *pdev, - const struct pci_device_id *id) +static int sbridge_probe(struct pci_dev *pdev, const struct pci_device_id *id) { int rc; u8 mc, num_mc = 0; struct sbridge_dev *sbridge_dev; + enum type type; /* get the pci devices we want to reserve for our use */ mutex_lock(&sbridge_edac_lock); @@ -1765,15 +2054,22 @@ static int __devinit sbridge_probe(struct pci_dev *pdev, } probed++; - rc = sbridge_get_all_devices(&num_mc); + if (pdev->device == PCI_DEVICE_ID_INTEL_IBRIDGE_IMC_HA0_TA) { + rc = sbridge_get_all_devices(&num_mc, pci_dev_descr_ibridge_table); + type = IVY_BRIDGE; + } else { + rc = sbridge_get_all_devices(&num_mc, pci_dev_descr_sbridge_table); + type = SANDY_BRIDGE; + } if (unlikely(rc < 0)) goto fail0; mc = 0; list_for_each_entry(sbridge_dev, &sbridge_edac_list, list) { - debugf0("Registering MC#%d (%d of %d)\n", mc, mc + 1, num_mc); + edac_dbg(0, "Registering MC#%d (%d of %d)\n", + mc, mc + 1, num_mc); sbridge_dev->mc = mc++; - rc = sbridge_register_mci(sbridge_dev); + rc = sbridge_register_mci(sbridge_dev, type); if (unlikely(rc < 0)) goto fail1; } @@ -1797,11 +2093,11 @@ fail0: * sbridge_remove destructor for one instance of device * */ -static void __devexit sbridge_remove(struct pci_dev *pdev) +static void sbridge_remove(struct pci_dev *pdev) { struct sbridge_dev *sbridge_dev; - debugf0(__FILE__ ": %s()\n", __func__); + edac_dbg(0, "\n"); /* * we have a trouble here: pdev value for removal will be wrong, since @@ -1838,7 +2134,7 @@ MODULE_DEVICE_TABLE(pci, sbridge_pci_tbl); static struct pci_driver sbridge_driver = { .name = "sbridge_edac", .probe = sbridge_probe, - .remove = __devexit_p(sbridge_remove), + .remove = sbridge_remove, .id_table = sbridge_pci_tbl, }; @@ -1850,15 +2146,18 @@ static int __init sbridge_init(void) { int pci_rc; - debugf2("MC: " __FILE__ ": %s()\n", __func__); + edac_dbg(2, "\n"); /* Ensure that the OPSTATE is set correctly for POLL or NMI */ opstate_init(); pci_rc = pci_register_driver(&sbridge_driver); - - if (pci_rc >= 0) + if (pci_rc >= 0) { + mce_register_decode_chain(&sbridge_mce_dec); + if (get_edac_report_status() == EDAC_REPORTING_DISABLED) + sbridge_printk(KERN_WARNING, "Loading driver, error reporting disabled.\n"); return 0; + } sbridge_printk(KERN_ERR, "Failed to register device with error %d.\n", pci_rc); @@ -1872,8 +2171,9 @@ static int __init sbridge_init(void) */ static void __exit sbridge_exit(void) { - debugf2("MC: " __FILE__ ": %s()\n", __func__); + edac_dbg(2, "\n"); pci_unregister_driver(&sbridge_driver); + mce_unregister_decode_chain(&sbridge_mce_dec); } module_init(sbridge_init); @@ -1883,7 +2183,7 @@ module_param(edac_op_state, int, 0444); MODULE_PARM_DESC(edac_op_state, "EDAC Error Reporting state: 0=Poll,1=NMI"); MODULE_LICENSE("GPL"); -MODULE_AUTHOR("Mauro Carvalho Chehab <mchehab@redhat.com>"); +MODULE_AUTHOR("Mauro Carvalho Chehab"); MODULE_AUTHOR("Red Hat Inc. (http://www.redhat.com)"); -MODULE_DESCRIPTION("MC Driver for Intel Sandy Bridge memory controllers - " +MODULE_DESCRIPTION("MC Driver for Intel Sandy Bridge and Ivy Bridge memory controllers - " SBRIDGE_REVISION); diff --git a/drivers/edac/tile_edac.c b/drivers/edac/tile_edac.c index 1d5cf06f6c6..578f915ee19 100644 --- a/drivers/edac/tile_edac.c +++ b/drivers/edac/tile_edac.c @@ -69,9 +69,12 @@ static void tile_edac_check(struct mem_ctl_info *mci) /* Check if the current error count is different from the saved one. */ if (mem_error.sbe_count != priv->ce_count) { - dev_dbg(mci->dev, "ECC CE err on node %d\n", priv->node); + dev_dbg(mci->pdev, "ECC CE err on node %d\n", priv->node); priv->ce_count = mem_error.sbe_count; - edac_mc_handle_ce(mci, 0, 0, 0, 0, 0, mci->ctl_name); + edac_mc_handle_error(HW_EVENT_ERR_CORRECTED, mci, 1, + 0, 0, 0, + 0, 0, -1, + mci->ctl_name, ""); } } @@ -79,11 +82,12 @@ static void tile_edac_check(struct mem_ctl_info *mci) * Initialize the 'csrows' table within the mci control structure with the * addressing of memory. */ -static int __devinit tile_edac_init_csrows(struct mem_ctl_info *mci) +static int tile_edac_init_csrows(struct mem_ctl_info *mci) { - struct csrow_info *csrow = &mci->csrows[0]; + struct csrow_info *csrow = mci->csrows[0]; struct tile_edac_priv *priv = mci->pvt_info; struct mshim_mem_info mem_info; + struct dimm_info *dimm = csrow->channels[0]->dimm; if (hv_dev_pread(priv->hv_devhdl, 0, (HV_VirtAddr)&mem_info, sizeof(struct mshim_mem_info), MSHIM_MEM_INFO_OFF) != @@ -93,36 +97,35 @@ static int __devinit tile_edac_init_csrows(struct mem_ctl_info *mci) } if (mem_info.mem_ecc) - csrow->edac_mode = EDAC_SECDED; + dimm->edac_mode = EDAC_SECDED; else - csrow->edac_mode = EDAC_NONE; + dimm->edac_mode = EDAC_NONE; switch (mem_info.mem_type) { case DDR2: - csrow->mtype = MEM_DDR2; + dimm->mtype = MEM_DDR2; break; case DDR3: - csrow->mtype = MEM_DDR3; + dimm->mtype = MEM_DDR3; break; default: return -1; } - csrow->first_page = 0; - csrow->nr_pages = mem_info.mem_size >> PAGE_SHIFT; - csrow->last_page = csrow->first_page + csrow->nr_pages - 1; - csrow->grain = TILE_EDAC_ERROR_GRAIN; - csrow->dtype = DEV_UNKNOWN; + dimm->nr_pages = mem_info.mem_size >> PAGE_SHIFT; + dimm->grain = TILE_EDAC_ERROR_GRAIN; + dimm->dtype = DEV_UNKNOWN; return 0; } -static int __devinit tile_edac_mc_probe(struct platform_device *pdev) +static int tile_edac_mc_probe(struct platform_device *pdev) { char hv_file[32]; int hv_devhdl; struct mem_ctl_info *mci; + struct edac_mc_layer layers[2]; struct tile_edac_priv *priv; int rc; @@ -132,20 +135,30 @@ static int __devinit tile_edac_mc_probe(struct platform_device *pdev) return -EINVAL; /* A TILE MC has a single channel and one chip-select row. */ - mci = edac_mc_alloc(sizeof(struct tile_edac_priv), - TILE_EDAC_NR_CSROWS, TILE_EDAC_NR_CHANS, pdev->id); + layers[0].type = EDAC_MC_LAYER_CHIP_SELECT; + layers[0].size = TILE_EDAC_NR_CSROWS; + layers[0].is_virt_csrow = true; + layers[1].type = EDAC_MC_LAYER_CHANNEL; + layers[1].size = TILE_EDAC_NR_CHANS; + layers[1].is_virt_csrow = false; + mci = edac_mc_alloc(pdev->id, ARRAY_SIZE(layers), layers, + sizeof(struct tile_edac_priv)); if (mci == NULL) return -ENOMEM; priv = mci->pvt_info; priv->node = pdev->id; priv->hv_devhdl = hv_devhdl; - mci->dev = &pdev->dev; + mci->pdev = &pdev->dev; mci->mtype_cap = MEM_FLAG_DDR2; mci->edac_ctl_cap = EDAC_FLAG_SECDED; mci->mod_name = DRV_NAME; +#ifdef __tilegx__ + mci->ctl_name = "TILEGx_Memory_Controller"; +#else mci->ctl_name = "TILEPro_Memory_Controller"; +#endif mci->dev_name = dev_name(&pdev->dev); mci->edac_check = tile_edac_check; @@ -173,7 +186,7 @@ static int __devinit tile_edac_mc_probe(struct platform_device *pdev) return 0; } -static int __devexit tile_edac_mc_remove(struct platform_device *pdev) +static int tile_edac_mc_remove(struct platform_device *pdev) { struct mem_ctl_info *mci = platform_get_drvdata(pdev); @@ -189,7 +202,7 @@ static struct platform_driver tile_edac_mc_driver = { .owner = THIS_MODULE, }, .probe = tile_edac_mc_probe, - .remove = __devexit_p(tile_edac_mc_remove), + .remove = tile_edac_mc_remove, }; /* @@ -244,7 +257,6 @@ static void __exit tile_edac_exit(void) if (!pdev) continue; - platform_set_drvdata(pdev, NULL); platform_device_unregister(pdev); } platform_driver_unregister(&tile_edac_mc_driver); diff --git a/drivers/edac/x38_edac.c b/drivers/edac/x38_edac.c index b6f47de152f..4891b450830 100644 --- a/drivers/edac/x38_edac.c +++ b/drivers/edac/x38_edac.c @@ -103,10 +103,10 @@ static int how_many_channel(struct pci_dev *pdev) pci_read_config_byte(pdev, X38_CAPID0 + 8, &capid0_8b); if (capid0_8b & 0x20) { /* check DCD: Dual Channel Disable */ - debugf0("In single channel mode.\n"); + edac_dbg(0, "In single channel mode\n"); x38_channel_num = 1; } else { - debugf0("In dual channel mode.\n"); + edac_dbg(0, "In dual channel mode\n"); x38_channel_num = 2; } @@ -151,7 +151,7 @@ static void x38_clear_error_info(struct mem_ctl_info *mci) { struct pci_dev *pdev; - pdev = to_pci_dev(mci->dev); + pdev = to_pci_dev(mci->pdev); /* * Clear any error bits. @@ -172,7 +172,7 @@ static void x38_get_and_clear_error_info(struct mem_ctl_info *mci, struct pci_dev *pdev; void __iomem *window = mci->pvt_info; - pdev = to_pci_dev(mci->dev); + pdev = to_pci_dev(mci->pdev); /* * This is a mess because there is no atomic way to read all the @@ -215,19 +215,26 @@ static void x38_process_error_info(struct mem_ctl_info *mci, return; if ((info->errsts ^ info->errsts2) & X38_ERRSTS_BITS) { - edac_mc_handle_ce_no_info(mci, "UE overwrote CE"); + edac_mc_handle_error(HW_EVENT_ERR_UNCORRECTED, mci, 1, 0, 0, 0, + -1, -1, -1, + "UE overwrote CE", ""); info->errsts = info->errsts2; } for (channel = 0; channel < x38_channel_num; channel++) { log = info->eccerrlog[channel]; if (log & X38_ECCERRLOG_UE) { - edac_mc_handle_ue(mci, 0, 0, - eccerrlog_row(channel, log), "x38 UE"); + edac_mc_handle_error(HW_EVENT_ERR_UNCORRECTED, mci, 1, + 0, 0, 0, + eccerrlog_row(channel, log), + -1, -1, + "x38 UE", ""); } else if (log & X38_ECCERRLOG_CE) { - edac_mc_handle_ce(mci, 0, 0, - eccerrlog_syndrome(log), - eccerrlog_row(channel, log), 0, "x38 CE"); + edac_mc_handle_error(HW_EVENT_ERR_CORRECTED, mci, 1, + 0, 0, eccerrlog_syndrome(log), + eccerrlog_row(channel, log), + -1, -1, + "x38 CE", ""); } } } @@ -236,13 +243,12 @@ static void x38_check(struct mem_ctl_info *mci) { struct x38_error_info info; - debugf1("MC%d: %s()\n", mci->mc_idx, __func__); + edac_dbg(1, "MC%d\n", mci->mc_idx); x38_get_and_clear_error_info(mci, &info); x38_process_error_info(mci, &info); } - -void __iomem *x38_map_mchbar(struct pci_dev *pdev) +static void __iomem *x38_map_mchbar(struct pci_dev *pdev) { union { u64 mchbar; @@ -317,14 +323,14 @@ static unsigned long drb_to_nr_pages( static int x38_probe1(struct pci_dev *pdev, int dev_idx) { int rc; - int i; + int i, j; struct mem_ctl_info *mci = NULL; - unsigned long last_page; + struct edac_mc_layer layers[2]; u16 drbs[X38_CHANNELS][X38_RANKS_PER_CHANNEL]; bool stacked; void __iomem *window; - debugf0("MC: %s()\n", __func__); + edac_dbg(0, "MC:\n"); window = x38_map_mchbar(pdev); if (!window) @@ -335,13 +341,19 @@ static int x38_probe1(struct pci_dev *pdev, int dev_idx) how_many_channel(pdev); /* FIXME: unconventional pvt_info usage */ - mci = edac_mc_alloc(0, X38_RANKS, x38_channel_num, 0); + layers[0].type = EDAC_MC_LAYER_CHIP_SELECT; + layers[0].size = X38_RANKS; + layers[0].is_virt_csrow = true; + layers[1].type = EDAC_MC_LAYER_CHANNEL; + layers[1].size = x38_channel_num; + layers[1].is_virt_csrow = false; + mci = edac_mc_alloc(0, ARRAY_SIZE(layers), layers, 0); if (!mci) return -ENOMEM; - debugf3("MC: %s(): init mci\n", __func__); + edac_dbg(3, "MC: init mci\n"); - mci->dev = &pdev->dev; + mci->pdev = &pdev->dev; mci->mtype_cap = MEM_FLAG_DDR2; mci->edac_ctl_cap = EDAC_FLAG_SECDED; @@ -363,41 +375,38 @@ static int x38_probe1(struct pci_dev *pdev, int dev_idx) * cumulative; the last one will contain the total memory * contained in all ranks. */ - last_page = -1UL; for (i = 0; i < mci->nr_csrows; i++) { unsigned long nr_pages; - struct csrow_info *csrow = &mci->csrows[i]; + struct csrow_info *csrow = mci->csrows[i]; nr_pages = drb_to_nr_pages(drbs, stacked, i / X38_RANKS_PER_CHANNEL, i % X38_RANKS_PER_CHANNEL); - if (nr_pages == 0) { - csrow->mtype = MEM_EMPTY; + if (nr_pages == 0) continue; - } - csrow->first_page = last_page + 1; - last_page += nr_pages; - csrow->last_page = last_page; - csrow->nr_pages = nr_pages; + for (j = 0; j < x38_channel_num; j++) { + struct dimm_info *dimm = csrow->channels[j]->dimm; - csrow->grain = nr_pages << PAGE_SHIFT; - csrow->mtype = MEM_DDR2; - csrow->dtype = DEV_UNKNOWN; - csrow->edac_mode = EDAC_UNKNOWN; + dimm->nr_pages = nr_pages / x38_channel_num; + dimm->grain = nr_pages << PAGE_SHIFT; + dimm->mtype = MEM_DDR2; + dimm->dtype = DEV_UNKNOWN; + dimm->edac_mode = EDAC_UNKNOWN; + } } x38_clear_error_info(mci); rc = -ENODEV; if (edac_mc_add_mc(mci)) { - debugf3("MC: %s(): failed edac_mc_add_mc()\n", __func__); + edac_dbg(3, "MC: failed edac_mc_add_mc()\n"); goto fail; } /* get this far and it's successful */ - debugf3("MC: %s(): success\n", __func__); + edac_dbg(3, "MC: success\n"); return 0; fail: @@ -408,12 +417,11 @@ fail: return rc; } -static int __devinit x38_init_one(struct pci_dev *pdev, - const struct pci_device_id *ent) +static int x38_init_one(struct pci_dev *pdev, const struct pci_device_id *ent) { int rc; - debugf0("MC: %s()\n", __func__); + edac_dbg(0, "MC:\n"); if (pci_enable_device(pdev) < 0) return -EIO; @@ -425,11 +433,11 @@ static int __devinit x38_init_one(struct pci_dev *pdev, return rc; } -static void __devexit x38_remove_one(struct pci_dev *pdev) +static void x38_remove_one(struct pci_dev *pdev) { struct mem_ctl_info *mci; - debugf0("%s()\n", __func__); + edac_dbg(0, "\n"); mci = edac_mc_del_mc(&pdev->dev); if (!mci) @@ -440,7 +448,7 @@ static void __devexit x38_remove_one(struct pci_dev *pdev) edac_mc_free(mci); } -static const struct pci_device_id x38_pci_tbl[] __devinitdata = { +static const struct pci_device_id x38_pci_tbl[] = { { PCI_VEND_DEV(INTEL, X38_HB), PCI_ANY_ID, PCI_ANY_ID, 0, 0, X38}, @@ -454,7 +462,7 @@ MODULE_DEVICE_TABLE(pci, x38_pci_tbl); static struct pci_driver x38_driver = { .name = EDAC_MOD_STR, .probe = x38_init_one, - .remove = __devexit_p(x38_remove_one), + .remove = x38_remove_one, .id_table = x38_pci_tbl, }; @@ -462,7 +470,7 @@ static int __init x38_init(void) { int pci_rc; - debugf3("MC: %s()\n", __func__); + edac_dbg(3, "MC:\n"); /* Ensure that the OPSTATE is set correctly for POLL or NMI */ opstate_init(); @@ -476,14 +484,14 @@ static int __init x38_init(void) mci_pdev = pci_get_device(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_X38_HB, NULL); if (!mci_pdev) { - debugf0("x38 pci_get_device fail\n"); + edac_dbg(0, "x38 pci_get_device fail\n"); pci_rc = -ENODEV; goto fail1; } pci_rc = x38_init_one(mci_pdev, x38_pci_tbl); if (pci_rc < 0) { - debugf0("x38 init fail\n"); + edac_dbg(0, "x38 init fail\n"); pci_rc = -ENODEV; goto fail1; } @@ -503,7 +511,7 @@ fail0: static void __exit x38_exit(void) { - debugf3("MC: %s()\n", __func__); + edac_dbg(3, "MC:\n"); pci_unregister_driver(&x38_driver); if (!x38_registered) { |
