diff options
Diffstat (limited to 'drivers/edac')
44 files changed, 2758 insertions, 1001 deletions
diff --git a/drivers/edac/Kconfig b/drivers/edac/Kconfig index 66719925970..878f09005fa 100644 --- a/drivers/edac/Kconfig +++ b/drivers/edac/Kconfig @@ -80,6 +80,29 @@ config EDAC_MM_EDAC occurred so that a particular failing memory module can be replaced. If unsure, select 'Y'. +config EDAC_GHES + bool "Output ACPI APEI/GHES BIOS detected errors via EDAC" + depends on ACPI_APEI_GHES && (EDAC_MM_EDAC=y) + default y + help + Not all machines support hardware-driven error report. Some of those + provide a BIOS-driven error report mechanism via ACPI, using the + APEI/GHES driver. By enabling this option, the error reports provided + by GHES are sent to userspace via the EDAC API. + + When this option is enabled, it will disable the hardware-driven + mechanisms, if a GHES BIOS is detected, entering into the + "Firmware First" mode. + + It should be noticed that keeping both GHES and a hardware-driven + error mechanism won't work well, as BIOS will race with OS, while + reading the error registers. So, if you want to not use "Firmware + first" GHES error mechanism, you should disable GHES either at + compilation time or by passing "ghes.disable=1" Kernel parameter + at boot time. + + In doubt, say 'Y'. + config EDAC_AMD64 tristate "AMD64 (Opteron, Athlon64) K8, F10h" depends on EDAC_MM_EDAC && AMD_NB && X86_64 && EDAC_DECODE_MCE @@ -122,7 +145,7 @@ config EDAC_E7XXX config EDAC_E752X tristate "Intel e752x (e7520, e7525, e7320) and 3100" - depends on EDAC_MM_EDAC && PCI && X86 && HOTPLUG + depends on EDAC_MM_EDAC && PCI && X86 help Support for error detection and correction on the Intel E7520, E7525, E7320 server chipsets. @@ -158,7 +181,7 @@ config EDAC_I3000 config EDAC_I3200 tristate "Intel 3200" - depends on EDAC_MM_EDAC && PCI && X86 && EXPERIMENTAL + depends on EDAC_MM_EDAC && PCI && X86 help Support for error detection and correction on the Intel 3200 and 3210 server chipsets. @@ -224,7 +247,7 @@ config EDAC_I7300 config EDAC_SBRIDGE tristate "Intel Sandy-Bridge Integrated MC" depends on EDAC_MM_EDAC && PCI && X86_64 && X86_MCE_INTEL - depends on PCI_MMCONFIG && EXPERIMENTAL + depends on PCI_MMCONFIG help Support for error detection and correction the Intel Sandy Bridge Integrated Memory Controller. @@ -326,21 +349,21 @@ config EDAC_OCTEON_PC config EDAC_OCTEON_L2C tristate "Cavium Octeon Secondary Caches (L2C)" - depends on EDAC_MM_EDAC && CPU_CAVIUM_OCTEON + depends on EDAC_MM_EDAC && CAVIUM_OCTEON_SOC help Support for error detection and correction on the Cavium Octeon family of SOCs. config EDAC_OCTEON_LMC tristate "Cavium Octeon DRAM Memory Controller (LMC)" - depends on EDAC_MM_EDAC && CPU_CAVIUM_OCTEON + depends on EDAC_MM_EDAC && CAVIUM_OCTEON_SOC help Support for error detection and correction on the Cavium Octeon family of SOCs. config EDAC_OCTEON_PCI tristate "Cavium Octeon PCI Controller" - depends on EDAC_MM_EDAC && PCI && CPU_CAVIUM_OCTEON + depends on EDAC_MM_EDAC && PCI && CAVIUM_OCTEON_SOC help Support for error detection and correction on the Cavium Octeon family of SOCs. diff --git a/drivers/edac/Makefile b/drivers/edac/Makefile index 5608a9ba61b..4154ed6a02c 100644 --- a/drivers/edac/Makefile +++ b/drivers/edac/Makefile @@ -16,6 +16,7 @@ ifdef CONFIG_PCI edac_core-y += edac_pci.o edac_pci_sysfs.o endif +obj-$(CONFIG_EDAC_GHES) += ghes_edac.o obj-$(CONFIG_EDAC_MCE_INJ) += mce_amd_inj.o edac_mce_amd-y := mce_amd.o diff --git a/drivers/edac/amd64_edac.c b/drivers/edac/amd64_edac.c index ad8bf2aa629..f8bf00010d4 100644 --- a/drivers/edac/amd64_edac.c +++ b/drivers/edac/amd64_edac.c @@ -1,7 +1,7 @@ #include "amd64_edac.h" #include <asm/amd_nb.h> -static struct edac_pci_ctl_info *amd64_ctl_pci; +static struct edac_pci_ctl_info *pci_ctl; static int report_gart_errors; module_param(report_gart_errors, int, 0644); @@ -31,7 +31,7 @@ static struct ecc_settings **ecc_stngs; * *FIXME: Produce a better mapping/linearisation. */ -struct scrubrate { +static const struct scrubrate { u32 scrubval; /* bit pattern for scrub rate */ u32 bandwidth; /* bandwidth consumed (bytes/sec) */ } scrubrates[] = { @@ -98,6 +98,7 @@ int __amd64_write_pci_cfg_dword(struct pci_dev *pdev, int offset, * * F15h: we select which DCT we access using F1x10C[DctCfgSel] * + * F16h: has only 1 DCT */ static int k8_read_dct_pci_cfg(struct amd64_pvt *pvt, int addr, u32 *val, const char *func) @@ -122,7 +123,7 @@ static void f15h_select_dct(struct amd64_pvt *pvt, u8 dct) u32 reg = 0; amd64_read_pci_cfg(pvt->F1, DCT_CFG_SEL, ®); - reg &= 0xfffffffe; + reg &= (pvt->model >= 0x30) ? ~3 : ~1; reg |= dct; amd64_write_pci_cfg(pvt->F1, DCT_CFG_SEL, reg); } @@ -132,8 +133,9 @@ static int f15_read_dct_pci_cfg(struct amd64_pvt *pvt, int addr, u32 *val, { u8 dct = 0; + /* For F15 M30h, the second dct is DCT 3, refer to BKDG Section 2.10 */ if (addr >= 0x140 && addr <= 0x1a0) { - dct = 1; + dct = (pvt->model >= 0x30) ? 3 : 1; addr -= 0x100; } @@ -160,7 +162,7 @@ static int f15_read_dct_pci_cfg(struct amd64_pvt *pvt, int addr, u32 *val, * scan the scrub rate mapping table for a close or matching bandwidth value to * issue. If requested is too big, then use last maximum value found. */ -static int __amd64_set_scrub_rate(struct pci_dev *ctl, u32 new_bw, u32 min_rate) +static int __set_scrub_rate(struct pci_dev *ctl, u32 new_bw, u32 min_rate) { u32 scrubval; int i; @@ -196,29 +198,29 @@ static int __amd64_set_scrub_rate(struct pci_dev *ctl, u32 new_bw, u32 min_rate) return 0; } -static int amd64_set_scrub_rate(struct mem_ctl_info *mci, u32 bw) +static int set_scrub_rate(struct mem_ctl_info *mci, u32 bw) { struct amd64_pvt *pvt = mci->pvt_info; u32 min_scrubrate = 0x5; - if (boot_cpu_data.x86 == 0xf) + if (pvt->fam == 0xf) min_scrubrate = 0x0; - /* F15h Erratum #505 */ - if (boot_cpu_data.x86 == 0x15) + /* Erratum #505 */ + if (pvt->fam == 0x15 && pvt->model < 0x10) f15h_select_dct(pvt, 0); - return __amd64_set_scrub_rate(pvt->F3, bw, min_scrubrate); + return __set_scrub_rate(pvt->F3, bw, min_scrubrate); } -static int amd64_get_scrub_rate(struct mem_ctl_info *mci) +static int get_scrub_rate(struct mem_ctl_info *mci) { struct amd64_pvt *pvt = mci->pvt_info; u32 scrubval = 0; int i, retval = -EINVAL; - /* F15h Erratum #505 */ - if (boot_cpu_data.x86 == 0x15) + /* Erratum #505 */ + if (pvt->fam == 0x15 && pvt->model < 0x10) f15h_select_dct(pvt, 0); amd64_read_pci_cfg(pvt->F3, SCRCTRL, &scrubval); @@ -238,8 +240,7 @@ static int amd64_get_scrub_rate(struct mem_ctl_info *mci) * returns true if the SysAddr given by sys_addr matches the * DRAM base/limit associated with node_id */ -static bool amd64_base_limit_match(struct amd64_pvt *pvt, u64 sys_addr, - unsigned nid) +static bool base_limit_match(struct amd64_pvt *pvt, u64 sys_addr, u8 nid) { u64 addr; @@ -265,7 +266,7 @@ static struct mem_ctl_info *find_mc_by_sys_addr(struct mem_ctl_info *mci, u64 sys_addr) { struct amd64_pvt *pvt; - unsigned node_id; + u8 node_id; u32 intlv_en, bits; /* @@ -283,7 +284,7 @@ static struct mem_ctl_info *find_mc_by_sys_addr(struct mem_ctl_info *mci, if (intlv_en == 0) { for (node_id = 0; node_id < DRAM_RANGES; node_id++) { - if (amd64_base_limit_match(pvt, sys_addr, node_id)) + if (base_limit_match(pvt, sys_addr, node_id)) goto found; } goto err_no_match; @@ -307,7 +308,7 @@ static struct mem_ctl_info *find_mc_by_sys_addr(struct mem_ctl_info *mci, } /* sanity test for sys_addr */ - if (unlikely(!amd64_base_limit_match(pvt, sys_addr, node_id))) { + if (unlikely(!base_limit_match(pvt, sys_addr, node_id))) { amd64_warn("%s: sys_addr 0x%llx falls outside base/limit address" "range for node %d with node interleaving enabled.\n", __func__, sys_addr, node_id); @@ -334,21 +335,45 @@ static void get_cs_base_and_mask(struct amd64_pvt *pvt, int csrow, u8 dct, u64 csbase, csmask, base_bits, mask_bits; u8 addr_shift; - if (boot_cpu_data.x86 == 0xf && pvt->ext_model < K8_REV_F) { + if (pvt->fam == 0xf && pvt->ext_model < K8_REV_F) { csbase = pvt->csels[dct].csbases[csrow]; csmask = pvt->csels[dct].csmasks[csrow]; - base_bits = GENMASK(21, 31) | GENMASK(9, 15); - mask_bits = GENMASK(21, 29) | GENMASK(9, 15); + base_bits = GENMASK_ULL(31, 21) | GENMASK_ULL(15, 9); + mask_bits = GENMASK_ULL(29, 21) | GENMASK_ULL(15, 9); addr_shift = 4; + + /* + * F16h and F15h, models 30h and later need two addr_shift values: + * 8 for high and 6 for low (cf. F16h BKDG). + */ + } else if (pvt->fam == 0x16 || + (pvt->fam == 0x15 && pvt->model >= 0x30)) { + csbase = pvt->csels[dct].csbases[csrow]; + csmask = pvt->csels[dct].csmasks[csrow >> 1]; + + *base = (csbase & GENMASK_ULL(15, 5)) << 6; + *base |= (csbase & GENMASK_ULL(30, 19)) << 8; + + *mask = ~0ULL; + /* poke holes for the csmask */ + *mask &= ~((GENMASK_ULL(15, 5) << 6) | + (GENMASK_ULL(30, 19) << 8)); + + *mask |= (csmask & GENMASK_ULL(15, 5)) << 6; + *mask |= (csmask & GENMASK_ULL(30, 19)) << 8; + + return; } else { csbase = pvt->csels[dct].csbases[csrow]; csmask = pvt->csels[dct].csmasks[csrow >> 1]; addr_shift = 8; - if (boot_cpu_data.x86 == 0x15) - base_bits = mask_bits = GENMASK(19,30) | GENMASK(5,13); + if (pvt->fam == 0x15) + base_bits = mask_bits = + GENMASK_ULL(30,19) | GENMASK_ULL(13,5); else - base_bits = mask_bits = GENMASK(19,28) | GENMASK(5,13); + base_bits = mask_bits = + GENMASK_ULL(28,19) | GENMASK_ULL(13,5); } *base = (csbase & base_bits) << addr_shift; @@ -425,14 +450,14 @@ int amd64_get_dram_hole_info(struct mem_ctl_info *mci, u64 *hole_base, struct amd64_pvt *pvt = mci->pvt_info; /* only revE and later have the DRAM Hole Address Register */ - if (boot_cpu_data.x86 == 0xf && pvt->ext_model < K8_REV_E) { + if (pvt->fam == 0xf && pvt->ext_model < K8_REV_E) { edac_dbg(1, " revision %d for node %d does not support DHAR\n", pvt->ext_model, pvt->mc_node_id); return 1; } /* valid for Fam10h and above */ - if (boot_cpu_data.x86 >= 0x10 && !dhar_mem_hoist_valid(pvt)) { + if (pvt->fam >= 0x10 && !dhar_mem_hoist_valid(pvt)) { edac_dbg(1, " Dram Memory Hoisting is DISABLED on this system\n"); return 1; } @@ -464,10 +489,8 @@ int amd64_get_dram_hole_info(struct mem_ctl_info *mci, u64 *hole_base, *hole_base = dhar_base(pvt); *hole_size = (1ULL << 32) - *hole_base; - if (boot_cpu_data.x86 > 0xf) - *hole_offset = f10_dhar_offset(pvt); - else - *hole_offset = k8_dhar_offset(pvt); + *hole_offset = (pvt->fam > 0xf) ? f10_dhar_offset(pvt) + : k8_dhar_offset(pvt); edac_dbg(1, " DHAR info for node %d base 0x%lx offset 0x%lx size 0x%lx\n", pvt->mc_node_id, (unsigned long)*hole_base, @@ -539,7 +562,7 @@ static u64 sys_addr_to_dram_addr(struct mem_ctl_info *mci, u64 sys_addr) * section 3.4.2 of AMD publication 24592: AMD x86-64 Architecture * Programmer's Manual Volume 1 Application Programming. */ - dram_addr = (sys_addr & GENMASK(0, 39)) - dram_base; + dram_addr = (sys_addr & GENMASK_ULL(39, 0)) - dram_base; edac_dbg(2, "using DRAM Base register to translate SysAddr 0x%lx to DramAddr 0x%lx\n", (unsigned long)sys_addr, (unsigned long)dram_addr); @@ -575,7 +598,7 @@ static u64 dram_addr_to_input_addr(struct mem_ctl_info *mci, u64 dram_addr) * concerning translating a DramAddr to an InputAddr. */ intlv_shift = num_node_interleave_bits(dram_intlv_en(pvt, 0)); - input_addr = ((dram_addr >> intlv_shift) & GENMASK(12, 35)) + + input_addr = ((dram_addr >> intlv_shift) & GENMASK_ULL(35, 12)) + (dram_addr & 0xfff); edac_dbg(2, " Intlv Shift=%d DramAddr=0x%lx maps to InputAddr=0x%lx\n", @@ -602,111 +625,6 @@ static u64 sys_addr_to_input_addr(struct mem_ctl_info *mci, u64 sys_addr) return input_addr; } - -/* - * @input_addr is an InputAddr associated with the node represented by mci. - * Translate @input_addr to a DramAddr and return the result. - */ -static u64 input_addr_to_dram_addr(struct mem_ctl_info *mci, u64 input_addr) -{ - struct amd64_pvt *pvt; - unsigned node_id, intlv_shift; - u64 bits, dram_addr; - u32 intlv_sel; - - /* - * Near the start of section 3.4.4 (p. 70, BKDG #26094, K8, revA-E) - * shows how to translate a DramAddr to an InputAddr. Here we reverse - * this procedure. When translating from a DramAddr to an InputAddr, the - * bits used for node interleaving are discarded. Here we recover these - * bits from the IntlvSel field of the DRAM Limit register (section - * 3.4.4.2) for the node that input_addr is associated with. - */ - pvt = mci->pvt_info; - node_id = pvt->mc_node_id; - - BUG_ON(node_id > 7); - - intlv_shift = num_node_interleave_bits(dram_intlv_en(pvt, 0)); - if (intlv_shift == 0) { - edac_dbg(1, " InputAddr 0x%lx translates to DramAddr of same value\n", - (unsigned long)input_addr); - - return input_addr; - } - - bits = ((input_addr & GENMASK(12, 35)) << intlv_shift) + - (input_addr & 0xfff); - - intlv_sel = dram_intlv_sel(pvt, node_id) & ((1 << intlv_shift) - 1); - dram_addr = bits + (intlv_sel << 12); - - edac_dbg(1, "InputAddr 0x%lx translates to DramAddr 0x%lx (%d node interleave bits)\n", - (unsigned long)input_addr, - (unsigned long)dram_addr, intlv_shift); - - return dram_addr; -} - -/* - * @dram_addr is a DramAddr that maps to the node represented by mci. Convert - * @dram_addr to a SysAddr. - */ -static u64 dram_addr_to_sys_addr(struct mem_ctl_info *mci, u64 dram_addr) -{ - struct amd64_pvt *pvt = mci->pvt_info; - u64 hole_base, hole_offset, hole_size, base, sys_addr; - int ret = 0; - - ret = amd64_get_dram_hole_info(mci, &hole_base, &hole_offset, - &hole_size); - if (!ret) { - if ((dram_addr >= hole_base) && - (dram_addr < (hole_base + hole_size))) { - sys_addr = dram_addr + hole_offset; - - edac_dbg(1, "using DHAR to translate DramAddr 0x%lx to SysAddr 0x%lx\n", - (unsigned long)dram_addr, - (unsigned long)sys_addr); - - return sys_addr; - } - } - - base = get_dram_base(pvt, pvt->mc_node_id); - sys_addr = dram_addr + base; - - /* - * The sys_addr we have computed up to this point is a 40-bit value - * because the k8 deals with 40-bit values. However, the value we are - * supposed to return is a full 64-bit physical address. The AMD - * x86-64 architecture specifies that the most significant implemented - * address bit through bit 63 of a physical address must be either all - * 0s or all 1s. Therefore we sign-extend the 40-bit sys_addr to a - * 64-bit value below. See section 3.4.2 of AMD publication 24592: - * AMD x86-64 Architecture Programmer's Manual Volume 1 Application - * Programming. - */ - sys_addr |= ~((sys_addr & (1ull << 39)) - 1); - - edac_dbg(1, " Node %d, DramAddr 0x%lx to SysAddr 0x%lx\n", - pvt->mc_node_id, (unsigned long)dram_addr, - (unsigned long)sys_addr); - - return sys_addr; -} - -/* - * @input_addr is an InputAddr associated with the node given by mci. Translate - * @input_addr to a SysAddr. - */ -static inline u64 input_addr_to_sys_addr(struct mem_ctl_info *mci, - u64 input_addr) -{ - return dram_addr_to_sys_addr(mci, - input_addr_to_dram_addr(mci, input_addr)); -} - /* Map the Error address to a PAGE and PAGE OFFSET. */ static inline void error_address_to_page_and_offset(u64 error_address, struct err_info *err) @@ -741,12 +659,12 @@ static int get_channel_from_ecc_syndrome(struct mem_ctl_info *, u16); * Determine if the DIMMs have ECC enabled. ECC is enabled ONLY if all the DIMMs * are ECC capable. */ -static unsigned long amd64_determine_edac_cap(struct amd64_pvt *pvt) +static unsigned long determine_edac_cap(struct amd64_pvt *pvt) { u8 bit; unsigned long edac_cap = EDAC_FLAG_NONE; - bit = (boot_cpu_data.x86 > 0xf || pvt->ext_model >= K8_REV_F) + bit = (pvt->fam > 0xf || pvt->ext_model >= K8_REV_F) ? 19 : 17; @@ -756,9 +674,9 @@ static unsigned long amd64_determine_edac_cap(struct amd64_pvt *pvt) return edac_cap; } -static void amd64_debug_display_dimm_sizes(struct amd64_pvt *, u8); +static void debug_display_dimm_sizes(struct amd64_pvt *, u8); -static void amd64_dump_dramcfg_low(u32 dclr, int chan) +static void debug_dump_dramcfg_low(struct amd64_pvt *pvt, u32 dclr, int chan) { edac_dbg(1, "F2x%d90 (DRAM Cfg Low): 0x%08x\n", chan, dclr); @@ -769,7 +687,7 @@ static void amd64_dump_dramcfg_low(u32 dclr, int chan) edac_dbg(1, " PAR/ERR parity: %s\n", (dclr & BIT(8)) ? "enabled" : "disabled"); - if (boot_cpu_data.x86 == 0x10) + if (pvt->fam == 0x10) edac_dbg(1, " DCT 128bit mode width: %s\n", (dclr & BIT(11)) ? "128b" : "64b"); @@ -792,40 +710,43 @@ static void dump_misc_regs(struct amd64_pvt *pvt) (pvt->nbcap & NBCAP_SECDED) ? "yes" : "no", (pvt->nbcap & NBCAP_CHIPKILL) ? "yes" : "no"); - amd64_dump_dramcfg_low(pvt->dclr0, 0); + debug_dump_dramcfg_low(pvt, pvt->dclr0, 0); edac_dbg(1, "F3xB0 (Online Spare): 0x%08x\n", pvt->online_spare); edac_dbg(1, "F1xF0 (DRAM Hole Address): 0x%08x, base: 0x%08x, offset: 0x%08x\n", pvt->dhar, dhar_base(pvt), - (boot_cpu_data.x86 == 0xf) ? k8_dhar_offset(pvt) - : f10_dhar_offset(pvt)); + (pvt->fam == 0xf) ? k8_dhar_offset(pvt) + : f10_dhar_offset(pvt)); edac_dbg(1, " DramHoleValid: %s\n", dhar_valid(pvt) ? "yes" : "no"); - amd64_debug_display_dimm_sizes(pvt, 0); + debug_display_dimm_sizes(pvt, 0); /* everything below this point is Fam10h and above */ - if (boot_cpu_data.x86 == 0xf) + if (pvt->fam == 0xf) return; - amd64_debug_display_dimm_sizes(pvt, 1); + debug_display_dimm_sizes(pvt, 1); amd64_info("using %s syndromes.\n", ((pvt->ecc_sym_sz == 8) ? "x8" : "x4")); /* Only if NOT ganged does dclr1 have valid info */ if (!dct_ganging_enabled(pvt)) - amd64_dump_dramcfg_low(pvt->dclr1, 1); + debug_dump_dramcfg_low(pvt, pvt->dclr1, 1); } /* - * see BKDG, F2x[1,0][5C:40], F2[1,0][6C:60] + * See BKDG, F2x[1,0][5C:40], F2[1,0][6C:60] */ static void prep_chip_selects(struct amd64_pvt *pvt) { - if (boot_cpu_data.x86 == 0xf && pvt->ext_model < K8_REV_F) { + if (pvt->fam == 0xf && pvt->ext_model < K8_REV_F) { pvt->csels[0].b_cnt = pvt->csels[1].b_cnt = 8; pvt->csels[0].m_cnt = pvt->csels[1].m_cnt = 8; + } else if (pvt->fam == 0x15 && pvt->model >= 0x30) { + pvt->csels[0].b_cnt = pvt->csels[1].b_cnt = 4; + pvt->csels[0].m_cnt = pvt->csels[1].m_cnt = 2; } else { pvt->csels[0].b_cnt = pvt->csels[1].b_cnt = 8; pvt->csels[0].m_cnt = pvt->csels[1].m_cnt = 4; @@ -851,7 +772,7 @@ static void read_dct_base_mask(struct amd64_pvt *pvt) edac_dbg(0, " DCSB0[%d]=0x%08x reg: F2x%x\n", cs, *base0, reg0); - if (boot_cpu_data.x86 == 0xf || dct_ganging_enabled(pvt)) + if (pvt->fam == 0xf || dct_ganging_enabled(pvt)) continue; if (!amd64_read_dct_pci_cfg(pvt, reg1, base1)) @@ -869,7 +790,7 @@ static void read_dct_base_mask(struct amd64_pvt *pvt) edac_dbg(0, " DCSM0[%d]=0x%08x reg: F2x%x\n", cs, *mask0, reg0); - if (boot_cpu_data.x86 == 0xf || dct_ganging_enabled(pvt)) + if (pvt->fam == 0xf || dct_ganging_enabled(pvt)) continue; if (!amd64_read_dct_pci_cfg(pvt, reg1, mask1)) @@ -878,14 +799,14 @@ static void read_dct_base_mask(struct amd64_pvt *pvt) } } -static enum mem_type amd64_determine_memory_type(struct amd64_pvt *pvt, int cs) +static enum mem_type determine_memory_type(struct amd64_pvt *pvt, int cs) { enum mem_type type; /* F15h supports only DDR3 */ - if (boot_cpu_data.x86 >= 0x15) + if (pvt->fam >= 0x15) type = (pvt->dclr0 & BIT(16)) ? MEM_DDR3 : MEM_RDDR3; - else if (boot_cpu_data.x86 == 0x10 || pvt->ext_model >= K8_REV_F) { + else if (pvt->fam == 0x10 || pvt->ext_model >= K8_REV_F) { if (pvt->dchr0 & DDR3_MODE) type = (pvt->dclr0 & BIT(16)) ? MEM_DDR3 : MEM_RDDR3; else @@ -918,30 +839,30 @@ static int k8_early_channel_count(struct amd64_pvt *pvt) } /* On F10h and later ErrAddr is MC4_ADDR[47:1] */ -static u64 get_error_address(struct mce *m) +static u64 get_error_address(struct amd64_pvt *pvt, struct mce *m) { - struct cpuinfo_x86 *c = &boot_cpu_data; u64 addr; u8 start_bit = 1; u8 end_bit = 47; - if (c->x86 == 0xf) { + if (pvt->fam == 0xf) { start_bit = 3; end_bit = 39; } - addr = m->addr & GENMASK(start_bit, end_bit); + addr = m->addr & GENMASK_ULL(end_bit, start_bit); /* * Erratum 637 workaround */ - if (c->x86 == 0x15) { + if (pvt->fam == 0x15) { struct amd64_pvt *pvt; u64 cc6_base, tmp_addr; u32 tmp; - u8 mce_nid, intlv_en; + u16 mce_nid; + u8 intlv_en; - if ((addr & GENMASK(24, 47)) >> 24 != 0x00fdf7) + if ((addr & GENMASK_ULL(47, 24)) >> 24 != 0x00fdf7) return addr; mce_nid = amd_get_nb_id(m->extcpu); @@ -951,7 +872,7 @@ static u64 get_error_address(struct mce *m) intlv_en = tmp >> 21 & 0x7; /* add [47:27] + 3 trailing bits */ - cc6_base = (tmp & GENMASK(0, 20)) << 3; + cc6_base = (tmp & GENMASK_ULL(20, 0)) << 3; /* reverse and add DramIntlvEn */ cc6_base |= intlv_en ^ 0x7; @@ -960,18 +881,18 @@ static u64 get_error_address(struct mce *m) cc6_base <<= 24; if (!intlv_en) - return cc6_base | (addr & GENMASK(0, 23)); + return cc6_base | (addr & GENMASK_ULL(23, 0)); amd64_read_pci_cfg(pvt->F1, DRAM_LOCAL_NODE_BASE, &tmp); /* faster log2 */ - tmp_addr = (addr & GENMASK(12, 23)) << __fls(intlv_en + 1); + tmp_addr = (addr & GENMASK_ULL(23, 12)) << __fls(intlv_en + 1); /* OR DramIntlvSel into bits [14:12] */ - tmp_addr |= (tmp & GENMASK(21, 23)) >> 9; + tmp_addr |= (tmp & GENMASK_ULL(23, 21)) >> 9; /* add remaining [11:0] bits from original MC4_ADDR */ - tmp_addr |= addr & GENMASK(0, 11); + tmp_addr |= addr & GENMASK_ULL(11, 0); return cc6_base | tmp_addr; } @@ -979,15 +900,34 @@ static u64 get_error_address(struct mce *m) return addr; } +static struct pci_dev *pci_get_related_function(unsigned int vendor, + unsigned int device, + struct pci_dev *related) +{ + struct pci_dev *dev = NULL; + + while ((dev = pci_get_device(vendor, device, dev))) { + if (pci_domain_nr(dev->bus) == pci_domain_nr(related->bus) && + (dev->bus->number == related->bus->number) && + (PCI_SLOT(dev->devfn) == PCI_SLOT(related->devfn))) + break; + } + + return dev; +} + static void read_dram_base_limit_regs(struct amd64_pvt *pvt, unsigned range) { - struct cpuinfo_x86 *c = &boot_cpu_data; + struct amd_northbridge *nb; + struct pci_dev *f1 = NULL; + unsigned int pci_func; int off = range << 3; + u32 llim; amd64_read_pci_cfg(pvt->F1, DRAM_BASE_LO + off, &pvt->ranges[range].base.lo); amd64_read_pci_cfg(pvt->F1, DRAM_LIMIT_LO + off, &pvt->ranges[range].lim.lo); - if (c->x86 == 0xf) + if (pvt->fam == 0xf) return; if (!dram_rw(pvt, range)) @@ -996,30 +936,34 @@ static void read_dram_base_limit_regs(struct amd64_pvt *pvt, unsigned range) amd64_read_pci_cfg(pvt->F1, DRAM_BASE_HI + off, &pvt->ranges[range].base.hi); amd64_read_pci_cfg(pvt->F1, DRAM_LIMIT_HI + off, &pvt->ranges[range].lim.hi); - /* Factor in CC6 save area by reading dst node's limit reg */ - if (c->x86 == 0x15) { - struct pci_dev *f1 = NULL; - u8 nid = dram_dst_node(pvt, range); - u32 llim; + /* F15h: factor in CC6 save area by reading dst node's limit reg */ + if (pvt->fam != 0x15) + return; - f1 = pci_get_domain_bus_and_slot(0, 0, PCI_DEVFN(0x18 + nid, 1)); - if (WARN_ON(!f1)) - return; + nb = node_to_amd_nb(dram_dst_node(pvt, range)); + if (WARN_ON(!nb)) + return; + + pci_func = (pvt->model == 0x30) ? PCI_DEVICE_ID_AMD_15H_M30H_NB_F1 + : PCI_DEVICE_ID_AMD_15H_NB_F1; - amd64_read_pci_cfg(f1, DRAM_LOCAL_NODE_LIM, &llim); + f1 = pci_get_related_function(nb->misc->vendor, pci_func, nb->misc); + if (WARN_ON(!f1)) + return; - pvt->ranges[range].lim.lo &= GENMASK(0, 15); + amd64_read_pci_cfg(f1, DRAM_LOCAL_NODE_LIM, &llim); - /* {[39:27],111b} */ - pvt->ranges[range].lim.lo |= ((llim & 0x1fff) << 3 | 0x7) << 16; + pvt->ranges[range].lim.lo &= GENMASK_ULL(15, 0); - pvt->ranges[range].lim.hi &= GENMASK(0, 7); + /* {[39:27],111b} */ + pvt->ranges[range].lim.lo |= ((llim & 0x1fff) << 3 | 0x7) << 16; - /* [47:40] */ - pvt->ranges[range].lim.hi |= llim >> 13; + pvt->ranges[range].lim.hi &= GENMASK_ULL(7, 0); - pci_dev_put(f1); - } + /* [47:40] */ + pvt->ranges[range].lim.hi |= llim >> 13; + + pci_dev_put(f1); } static void k8_map_sysaddr_to_csrow(struct mem_ctl_info *mci, u64 sys_addr, @@ -1150,7 +1094,7 @@ static int f1x_early_channel_count(struct amd64_pvt *pvt) int i, j, channels = 0; /* On F10h, if we are in 128 bit mode, then we are using 2 channels */ - if (boot_cpu_data.x86 == 0x10 && (pvt->dclr0 & WIDTH_128)) + if (pvt->fam == 0x10 && (pvt->dclr0 & WIDTH_128)) return 2; /* @@ -1233,10 +1177,25 @@ static int f15_dbam_to_chip_select(struct amd64_pvt *pvt, u8 dct, return ddr3_cs_size(cs_mode, false); } +/* + * F16h and F15h model 30h have only limited cs_modes. + */ +static int f16_dbam_to_chip_select(struct amd64_pvt *pvt, u8 dct, + unsigned cs_mode) +{ + WARN_ON(cs_mode > 12); + + if (cs_mode == 6 || cs_mode == 8 || + cs_mode == 9 || cs_mode == 12) + return -1; + else + return ddr3_cs_size(cs_mode, false); +} + static void read_dram_ctl_register(struct amd64_pvt *pvt) { - if (boot_cpu_data.x86 == 0xf) + if (pvt->fam == 0xf) return; if (!amd64_read_dct_pci_cfg(pvt, DCT_SEL_LO, &pvt->dct_sel_lo)) { @@ -1264,6 +1223,37 @@ static void read_dram_ctl_register(struct amd64_pvt *pvt) } /* + * Determine channel (DCT) based on the interleaving mode (see F15h M30h BKDG, + * 2.10.12 Memory Interleaving Modes). + */ +static u8 f15_m30h_determine_channel(struct amd64_pvt *pvt, u64 sys_addr, + u8 intlv_en, int num_dcts_intlv, + u32 dct_sel) +{ + u8 channel = 0; + u8 select; + + if (!(intlv_en)) + return (u8)(dct_sel); + + if (num_dcts_intlv == 2) { + select = (sys_addr >> 8) & 0x3; + channel = select ? 0x3 : 0; + } else if (num_dcts_intlv == 4) { + u8 intlv_addr = dct_sel_interleave_addr(pvt); + switch (intlv_addr) { + case 0x4: + channel = (sys_addr >> 8) & 0x3; + break; + case 0x5: + channel = (sys_addr >> 9) & 0x3; + break; + } + } + return channel; +} + +/* * Determine channel (DCT) based on the interleaving mode: F10h BKDG, 2.8.9 Memory * Interleaving Modes. */ @@ -1305,7 +1295,7 @@ static u8 f1x_determine_channel(struct amd64_pvt *pvt, u64 sys_addr, } /* Convert the sys_addr to the normalized DCT address */ -static u64 f1x_get_norm_dct_addr(struct amd64_pvt *pvt, unsigned range, +static u64 f1x_get_norm_dct_addr(struct amd64_pvt *pvt, u8 range, u64 sys_addr, bool hi_rng, u32 dct_sel_base_addr) { @@ -1349,7 +1339,7 @@ static u64 f1x_get_norm_dct_addr(struct amd64_pvt *pvt, unsigned range, chan_off = dram_base; } - return (sys_addr & GENMASK(6,47)) - (chan_off & GENMASK(23,47)); + return (sys_addr & GENMASK_ULL(47,6)) - (chan_off & GENMASK_ULL(47,23)); } /* @@ -1381,7 +1371,7 @@ static int f10_process_possible_spare(struct amd64_pvt *pvt, u8 dct, int csrow) * -EINVAL: NOT FOUND * 0..csrow = Chip-Select Row */ -static int f1x_lookup_addr_in_dct(u64 in_addr, u32 nid, u8 dct) +static int f1x_lookup_addr_in_dct(u64 in_addr, u8 nid, u8 dct) { struct mem_ctl_info *mci; struct amd64_pvt *pvt; @@ -1412,6 +1402,10 @@ static int f1x_lookup_addr_in_dct(u64 in_addr, u32 nid, u8 dct) (in_addr & cs_mask), (cs_base & cs_mask)); if ((in_addr & cs_mask) == (cs_base & cs_mask)) { + if (pvt->fam == 0x15 && pvt->model >= 0x30) { + cs_found = csrow; + break; + } cs_found = f10_process_possible_spare(pvt, dct, csrow); edac_dbg(1, " MATCH csrow=%d\n", cs_found); @@ -1430,11 +1424,9 @@ static u64 f1x_swap_interleaved_region(struct amd64_pvt *pvt, u64 sys_addr) { u32 swap_reg, swap_base, swap_limit, rgn_size, tmp_addr; - if (boot_cpu_data.x86 == 0x10) { + if (pvt->fam == 0x10) { /* only revC3 and revE have that feature */ - if (boot_cpu_data.x86_model < 4 || - (boot_cpu_data.x86_model < 0xa && - boot_cpu_data.x86_mask < 3)) + if (pvt->model < 4 || (pvt->model < 0xa && pvt->stepping < 3)) return sys_addr; } @@ -1538,20 +1530,143 @@ static int f1x_match_to_this_node(struct amd64_pvt *pvt, unsigned range, return cs_found; } -static int f1x_translate_sysaddr_to_cs(struct amd64_pvt *pvt, u64 sys_addr, - int *chan_sel) +static int f15_m30h_match_to_this_node(struct amd64_pvt *pvt, unsigned range, + u64 sys_addr, int *chan_sel) +{ + int cs_found = -EINVAL; + int num_dcts_intlv = 0; + u64 chan_addr, chan_offset; + u64 dct_base, dct_limit; + u32 dct_cont_base_reg, dct_cont_limit_reg, tmp; + u8 channel, alias_channel, leg_mmio_hole, dct_sel, dct_offset_en; + + u64 dhar_offset = f10_dhar_offset(pvt); + u8 intlv_addr = dct_sel_interleave_addr(pvt); + u8 node_id = dram_dst_node(pvt, range); + u8 intlv_en = dram_intlv_en(pvt, range); + + amd64_read_pci_cfg(pvt->F1, DRAM_CONT_BASE, &dct_cont_base_reg); + amd64_read_pci_cfg(pvt->F1, DRAM_CONT_LIMIT, &dct_cont_limit_reg); + + dct_offset_en = (u8) ((dct_cont_base_reg >> 3) & BIT(0)); + dct_sel = (u8) ((dct_cont_base_reg >> 4) & 0x7); + + edac_dbg(1, "(range %d) SystemAddr= 0x%llx Limit=0x%llx\n", + range, sys_addr, get_dram_limit(pvt, range)); + + if (!(get_dram_base(pvt, range) <= sys_addr) && + !(get_dram_limit(pvt, range) >= sys_addr)) + return -EINVAL; + + if (dhar_valid(pvt) && + dhar_base(pvt) <= sys_addr && + sys_addr < BIT_64(32)) { + amd64_warn("Huh? Address is in the MMIO hole: 0x%016llx\n", + sys_addr); + return -EINVAL; + } + + /* Verify sys_addr is within DCT Range. */ + dct_base = (u64) dct_sel_baseaddr(pvt); + dct_limit = (dct_cont_limit_reg >> 11) & 0x1FFF; + + if (!(dct_cont_base_reg & BIT(0)) && + !(dct_base <= (sys_addr >> 27) && + dct_limit >= (sys_addr >> 27))) + return -EINVAL; + + /* Verify number of dct's that participate in channel interleaving. */ + num_dcts_intlv = (int) hweight8(intlv_en); + + if (!(num_dcts_intlv % 2 == 0) || (num_dcts_intlv > 4)) + return -EINVAL; + + channel = f15_m30h_determine_channel(pvt, sys_addr, intlv_en, + num_dcts_intlv, dct_sel); + + /* Verify we stay within the MAX number of channels allowed */ + if (channel > 3) + return -EINVAL; + + leg_mmio_hole = (u8) (dct_cont_base_reg >> 1 & BIT(0)); + + /* Get normalized DCT addr */ + if (leg_mmio_hole && (sys_addr >= BIT_64(32))) + chan_offset = dhar_offset; + else + chan_offset = dct_base << 27; + + chan_addr = sys_addr - chan_offset; + + /* remove channel interleave */ + if (num_dcts_intlv == 2) { + if (intlv_addr == 0x4) + chan_addr = ((chan_addr >> 9) << 8) | + (chan_addr & 0xff); + else if (intlv_addr == 0x5) + chan_addr = ((chan_addr >> 10) << 9) | + (chan_addr & 0x1ff); + else + return -EINVAL; + + } else if (num_dcts_intlv == 4) { + if (intlv_addr == 0x4) + chan_addr = ((chan_addr >> 10) << 8) | + (chan_addr & 0xff); + else if (intlv_addr == 0x5) + chan_addr = ((chan_addr >> 11) << 9) | + (chan_addr & 0x1ff); + else + return -EINVAL; + } + + if (dct_offset_en) { + amd64_read_pci_cfg(pvt->F1, + DRAM_CONT_HIGH_OFF + (int) channel * 4, + &tmp); + chan_addr += (u64) ((tmp >> 11) & 0xfff) << 27; + } + + f15h_select_dct(pvt, channel); + + edac_dbg(1, " Normalized DCT addr: 0x%llx\n", chan_addr); + + /* + * Find Chip select: + * if channel = 3, then alias it to 1. This is because, in F15 M30h, + * there is support for 4 DCT's, but only 2 are currently functional. + * They are DCT0 and DCT3. But we have read all registers of DCT3 into + * pvt->csels[1]. So we need to use '1' here to get correct info. + * Refer F15 M30h BKDG Section 2.10 and 2.10.3 for clarifications. + */ + alias_channel = (channel == 3) ? 1 : channel; + + cs_found = f1x_lookup_addr_in_dct(chan_addr, node_id, alias_channel); + + if (cs_found >= 0) + *chan_sel = alias_channel; + + return cs_found; +} + +static int f1x_translate_sysaddr_to_cs(struct amd64_pvt *pvt, + u64 sys_addr, + int *chan_sel) { int cs_found = -EINVAL; unsigned range; for (range = 0; range < DRAM_RANGES; range++) { - if (!dram_rw(pvt, range)) continue; - if ((get_dram_base(pvt, range) <= sys_addr) && - (get_dram_limit(pvt, range) >= sys_addr)) { + if (pvt->fam == 0x15 && pvt->model >= 0x30) + cs_found = f15_m30h_match_to_this_node(pvt, range, + sys_addr, + chan_sel); + else if ((get_dram_base(pvt, range) <= sys_addr) && + (get_dram_limit(pvt, range) >= sys_addr)) { cs_found = f1x_match_to_this_node(pvt, range, sys_addr, chan_sel); if (cs_found >= 0) @@ -1594,13 +1709,13 @@ static void f1x_map_sysaddr_to_csrow(struct mem_ctl_info *mci, u64 sys_addr, * debug routine to display the memory sizes of all logical DIMMs and its * CSROWs */ -static void amd64_debug_display_dimm_sizes(struct amd64_pvt *pvt, u8 ctrl) +static void debug_display_dimm_sizes(struct amd64_pvt *pvt, u8 ctrl) { int dimm, size0, size1; u32 *dcsb = ctrl ? pvt->csels[1].csbases : pvt->csels[0].csbases; u32 dbam = ctrl ? pvt->dbam1 : pvt->dbam0; - if (boot_cpu_data.x86 == 0xf) { + if (pvt->fam == 0xf) { /* K8 families < revF not supported yet */ if (pvt->ext_model < K8_REV_F) return; @@ -1636,7 +1751,7 @@ static void amd64_debug_display_dimm_sizes(struct amd64_pvt *pvt, u8 ctrl) } } -static struct amd64_family_type amd64_family_types[] = { +static struct amd64_family_type family_types[] = { [K8_CPUS] = { .ctl_name = "K8", .f1_id = PCI_DEVICE_ID_AMD_K8_NB_ADDRMAP, @@ -1670,25 +1785,41 @@ static struct amd64_family_type amd64_family_types[] = { .read_dct_pci_cfg = f15_read_dct_pci_cfg, } }, + [F15_M30H_CPUS] = { + .ctl_name = "F15h_M30h", + .f1_id = PCI_DEVICE_ID_AMD_15H_M30H_NB_F1, + .f3_id = PCI_DEVICE_ID_AMD_15H_M30H_NB_F3, + .ops = { + .early_channel_count = f1x_early_channel_count, + .map_sysaddr_to_csrow = f1x_map_sysaddr_to_csrow, + .dbam_to_cs = f16_dbam_to_chip_select, + .read_dct_pci_cfg = f15_read_dct_pci_cfg, + } + }, + [F16_CPUS] = { + .ctl_name = "F16h", + .f1_id = PCI_DEVICE_ID_AMD_16H_NB_F1, + .f3_id = PCI_DEVICE_ID_AMD_16H_NB_F3, + .ops = { + .early_channel_count = f1x_early_channel_count, + .map_sysaddr_to_csrow = f1x_map_sysaddr_to_csrow, + .dbam_to_cs = f16_dbam_to_chip_select, + .read_dct_pci_cfg = f10_read_dct_pci_cfg, + } + }, + [F16_M30H_CPUS] = { + .ctl_name = "F16h_M30h", + .f1_id = PCI_DEVICE_ID_AMD_16H_M30H_NB_F1, + .f3_id = PCI_DEVICE_ID_AMD_16H_M30H_NB_F3, + .ops = { + .early_channel_count = f1x_early_channel_count, + .map_sysaddr_to_csrow = f1x_map_sysaddr_to_csrow, + .dbam_to_cs = f16_dbam_to_chip_select, + .read_dct_pci_cfg = f10_read_dct_pci_cfg, + } + }, }; -static struct pci_dev *pci_get_related_function(unsigned int vendor, - unsigned int device, - struct pci_dev *related) -{ - struct pci_dev *dev = NULL; - - dev = pci_get_device(vendor, device, dev); - while (dev) { - if ((dev->bus->number == related->bus->number) && - (PCI_SLOT(dev->devfn) == PCI_SLOT(related->devfn))) - break; - dev = pci_get_device(vendor, device, dev); - } - - return dev; -} - /* * These are tables of eigenvectors (one per line) which can be used for the * construction of the syndrome tables. The modified syndrome search algorithm @@ -1696,7 +1827,7 @@ static struct pci_dev *pci_get_related_function(unsigned int vendor, * * Algorithm courtesy of Ross LaFetra from AMD. */ -static u16 x4_vectors[] = { +static const u16 x4_vectors[] = { 0x2f57, 0x1afe, 0x66cc, 0xdd88, 0x11eb, 0x3396, 0x7f4c, 0xeac8, 0x0001, 0x0002, 0x0004, 0x0008, @@ -1735,7 +1866,7 @@ static u16 x4_vectors[] = { 0x19a9, 0x2efe, 0xb5cc, 0x6f88, }; -static u16 x8_vectors[] = { +static const u16 x8_vectors[] = { 0x0145, 0x028a, 0x2374, 0x43c8, 0xa1f0, 0x0520, 0x0a40, 0x1480, 0x0211, 0x0422, 0x0844, 0x1088, 0x01b0, 0x44e0, 0x23c0, 0xed80, 0x1011, 0x0116, 0x022c, 0x0458, 0x08b0, 0x8c60, 0x2740, 0x4e80, @@ -1757,7 +1888,7 @@ static u16 x8_vectors[] = { 0x0100, 0x0200, 0x0400, 0x0800, 0x1000, 0x2000, 0x4000, 0x8000, }; -static int decode_syndrome(u16 syndrome, u16 *vectors, unsigned num_vecs, +static int decode_syndrome(u16 syndrome, const u16 *vectors, unsigned num_vecs, unsigned v_dim) { unsigned int i, err_sym; @@ -1892,9 +2023,9 @@ static void __log_bus_error(struct mem_ctl_info *mci, struct err_info *err, string, ""); } -static inline void __amd64_decode_bus_error(struct mem_ctl_info *mci, - struct mce *m) +static inline void decode_bus_error(int node_id, struct mce *m) { + struct mem_ctl_info *mci = mcis[node_id]; struct amd64_pvt *pvt = mci->pvt_info; u8 ecc_type = (m->status >> 45) & 0x3; u8 xec = XEC(m->status, 0x1f); @@ -1912,7 +2043,7 @@ static inline void __amd64_decode_bus_error(struct mem_ctl_info *mci, memset(&err, 0, sizeof(err)); - sys_addr = get_error_address(m); + sys_addr = get_error_address(pvt, m); if (ecc_type == 2) err.syndrome = extract_syndrome(m->status); @@ -1922,11 +2053,6 @@ static inline void __amd64_decode_bus_error(struct mem_ctl_info *mci, __log_bus_error(mci, &err, ecc_type); } -void amd64_decode_bus_error(int node_id, struct mce *m) -{ - __amd64_decode_bus_error(mcis[node_id], m); -} - /* * Use pvt->F2 which contains the F2 CPU PCI device to get the related * F1 (AddrMap) and F3 (Misc) devices. Return negative value on error. @@ -1973,10 +2099,9 @@ static void free_mc_sibling_devs(struct amd64_pvt *pvt) */ static void read_mc_regs(struct amd64_pvt *pvt) { - struct cpuinfo_x86 *c = &boot_cpu_data; + unsigned range; u64 msr_val; u32 tmp; - unsigned range; /* * Retrieve TOP_MEM and TOP_MEM2; no masking off of reserved bits since @@ -2037,12 +2162,14 @@ static void read_mc_regs(struct amd64_pvt *pvt) pvt->ecc_sym_sz = 4; - if (c->x86 >= 0x10) { + if (pvt->fam >= 0x10) { amd64_read_pci_cfg(pvt->F3, EXT_NB_MCA_CFG, &tmp); - amd64_read_dct_pci_cfg(pvt, DBAM1, &pvt->dbam1); + if (pvt->fam != 0x16) + /* F16h has only DCT0 */ + amd64_read_dct_pci_cfg(pvt, DBAM1, &pvt->dbam1); /* F10h, revD and later can do x8 ECC too */ - if ((c->x86 > 0x10 || c->x86_model > 7) && tmp & BIT(25)) + if ((pvt->fam > 0x10 || pvt->model > 7) && tmp & BIT(25)) pvt->ecc_sym_sz = 8; } dump_misc_regs(pvt); @@ -2082,7 +2209,7 @@ static void read_mc_regs(struct amd64_pvt *pvt) * encompasses * */ -static u32 amd64_csrow_nr_pages(struct amd64_pvt *pvt, u8 dct, int csrow_nr) +static u32 get_csrow_nr_pages(struct amd64_pvt *pvt, u8 dct, int csrow_nr) { u32 cs_mode, nr_pages; u32 dbam = dct ? pvt->dbam1 : pvt->dbam0; @@ -2136,7 +2263,7 @@ static int init_csrows(struct mem_ctl_info *mci) bool row_dct0 = !!csrow_enabled(i, 0, pvt); bool row_dct1 = false; - if (boot_cpu_data.x86 != 0xf) + if (pvt->fam != 0xf) row_dct1 = !!csrow_enabled(i, 1, pvt); if (!row_dct0 && !row_dct1) @@ -2148,14 +2275,20 @@ static int init_csrows(struct mem_ctl_info *mci) edac_dbg(1, "MC node: %d, csrow: %d\n", pvt->mc_node_id, i); - if (row_dct0) - nr_pages = amd64_csrow_nr_pages(pvt, 0, i); + if (row_dct0) { + nr_pages = get_csrow_nr_pages(pvt, 0, i); + csrow->channels[0]->dimm->nr_pages = nr_pages; + } /* K8 has only one DCT */ - if (boot_cpu_data.x86 != 0xf && row_dct1) - nr_pages += amd64_csrow_nr_pages(pvt, 1, i); + if (pvt->fam != 0xf && row_dct1) { + int row_dct1_pages = get_csrow_nr_pages(pvt, 1, i); + + csrow->channels[1]->dimm->nr_pages = row_dct1_pages; + nr_pages += row_dct1_pages; + } - mtype = amd64_determine_memory_type(pvt, i); + mtype = determine_memory_type(pvt, i); edac_dbg(1, "Total csrow%d pages: %u\n", i, nr_pages); @@ -2172,16 +2305,14 @@ static int init_csrows(struct mem_ctl_info *mci) dimm = csrow->channels[j]->dimm; dimm->mtype = mtype; dimm->edac_mode = edac_mode; - dimm->nr_pages = nr_pages; } - csrow->nr_pages = nr_pages; } return empty; } /* get all cores on this DCT */ -static void get_cpus_on_this_dct_cpumask(struct cpumask *mask, unsigned nid) +static void get_cpus_on_this_dct_cpumask(struct cpumask *mask, u16 nid) { int cpu; @@ -2191,7 +2322,7 @@ static void get_cpus_on_this_dct_cpumask(struct cpumask *mask, unsigned nid) } /* check MCG_CTL on all the cpus on this node */ -static bool amd64_nb_mce_bank_enabled_on_node(unsigned nid) +static bool nb_mce_bank_enabled_on_node(u16 nid) { cpumask_var_t mask; int cpu, nbe; @@ -2224,7 +2355,7 @@ out: return ret; } -static int toggle_ecc_err_reporting(struct ecc_settings *s, u8 nid, bool on) +static int toggle_ecc_err_reporting(struct ecc_settings *s, u16 nid, bool on) { cpumask_var_t cmask; int cpu; @@ -2262,7 +2393,7 @@ static int toggle_ecc_err_reporting(struct ecc_settings *s, u8 nid, bool on) return 0; } -static bool enable_ecc_error_reporting(struct ecc_settings *s, u8 nid, +static bool enable_ecc_error_reporting(struct ecc_settings *s, u16 nid, struct pci_dev *F3) { bool ret = true; @@ -2314,7 +2445,7 @@ static bool enable_ecc_error_reporting(struct ecc_settings *s, u8 nid, return ret; } -static void restore_ecc_error_reporting(struct ecc_settings *s, u8 nid, +static void restore_ecc_error_reporting(struct ecc_settings *s, u16 nid, struct pci_dev *F3) { u32 value, mask = 0x3; /* UECC/CECC enable */ @@ -2353,7 +2484,7 @@ static const char *ecc_msg = "'ecc_enable_override'.\n" " (Note that use of the override may cause unknown side effects.)\n"; -static bool ecc_enabled(struct pci_dev *F3, u8 nid) +static bool ecc_enabled(struct pci_dev *F3, u16 nid) { u32 value; u8 ecc_en = 0; @@ -2364,7 +2495,7 @@ static bool ecc_enabled(struct pci_dev *F3, u8 nid) ecc_en = !!(value & NBCFG_ECC_ENABLE); amd64_info("DRAM ECC %s.\n", (ecc_en ? "enabled" : "disabled")); - nb_mce_en = amd64_nb_mce_bank_enabled_on_node(nid); + nb_mce_en = nb_mce_bank_enabled_on_node(nid); if (!nb_mce_en) amd64_notice("NB MCE bank disabled, set MSR " "0x%08x[4] on node %d to enable.\n", @@ -2379,13 +2510,14 @@ static bool ecc_enabled(struct pci_dev *F3, u8 nid) static int set_mc_sysfs_attrs(struct mem_ctl_info *mci) { + struct amd64_pvt *pvt = mci->pvt_info; int rc; rc = amd64_create_sysfs_dbg_files(mci); if (rc < 0) return rc; - if (boot_cpu_data.x86 >= 0x10) { + if (pvt->fam >= 0x10) { rc = amd64_create_sysfs_inject_files(mci); if (rc < 0) return rc; @@ -2396,9 +2528,11 @@ static int set_mc_sysfs_attrs(struct mem_ctl_info *mci) static void del_mc_sysfs_attrs(struct mem_ctl_info *mci) { + struct amd64_pvt *pvt = mci->pvt_info; + amd64_remove_sysfs_dbg_files(mci); - if (boot_cpu_data.x86 >= 0x10) + if (pvt->fam >= 0x10) amd64_remove_sysfs_inject_files(mci); } @@ -2416,7 +2550,7 @@ static void setup_mci_misc_attrs(struct mem_ctl_info *mci, if (pvt->nbcap & NBCAP_CHIPKILL) mci->edac_ctl_cap |= EDAC_FLAG_S4ECD4ED; - mci->edac_cap = amd64_determine_edac_cap(pvt); + mci->edac_cap = determine_edac_cap(pvt); mci->mod_name = EDAC_MOD_STR; mci->mod_ver = EDAC_AMD64_VERSION; mci->ctl_name = fam->ctl_name; @@ -2424,32 +2558,52 @@ static void setup_mci_misc_attrs(struct mem_ctl_info *mci, mci->ctl_page_to_phys = NULL; /* memory scrubber interface */ - mci->set_sdram_scrub_rate = amd64_set_scrub_rate; - mci->get_sdram_scrub_rate = amd64_get_scrub_rate; + mci->set_sdram_scrub_rate = set_scrub_rate; + mci->get_sdram_scrub_rate = get_scrub_rate; } /* * returns a pointer to the family descriptor on success, NULL otherwise. */ -static struct amd64_family_type *amd64_per_family_init(struct amd64_pvt *pvt) +static struct amd64_family_type *per_family_init(struct amd64_pvt *pvt) { - u8 fam = boot_cpu_data.x86; struct amd64_family_type *fam_type = NULL; - switch (fam) { + pvt->ext_model = boot_cpu_data.x86_model >> 4; + pvt->stepping = boot_cpu_data.x86_mask; + pvt->model = boot_cpu_data.x86_model; + pvt->fam = boot_cpu_data.x86; + + switch (pvt->fam) { case 0xf: - fam_type = &amd64_family_types[K8_CPUS]; - pvt->ops = &amd64_family_types[K8_CPUS].ops; + fam_type = &family_types[K8_CPUS]; + pvt->ops = &family_types[K8_CPUS].ops; break; case 0x10: - fam_type = &amd64_family_types[F10_CPUS]; - pvt->ops = &amd64_family_types[F10_CPUS].ops; + fam_type = &family_types[F10_CPUS]; + pvt->ops = &family_types[F10_CPUS].ops; break; case 0x15: - fam_type = &amd64_family_types[F15_CPUS]; - pvt->ops = &amd64_family_types[F15_CPUS].ops; + if (pvt->model == 0x30) { + fam_type = &family_types[F15_M30H_CPUS]; + pvt->ops = &family_types[F15_M30H_CPUS].ops; + break; + } + + fam_type = &family_types[F15_CPUS]; + pvt->ops = &family_types[F15_CPUS].ops; + break; + + case 0x16: + if (pvt->model == 0x30) { + fam_type = &family_types[F16_M30H_CPUS]; + pvt->ops = &family_types[F16_M30H_CPUS].ops; + break; + } + fam_type = &family_types[F16_CPUS]; + pvt->ops = &family_types[F16_CPUS].ops; break; default: @@ -2457,24 +2611,22 @@ static struct amd64_family_type *amd64_per_family_init(struct amd64_pvt *pvt) return NULL; } - pvt->ext_model = boot_cpu_data.x86_model >> 4; - amd64_info("%s %sdetected (node %d).\n", fam_type->ctl_name, - (fam == 0xf ? + (pvt->fam == 0xf ? (pvt->ext_model >= K8_REV_F ? "revF or later " : "revE or earlier ") : ""), pvt->mc_node_id); return fam_type; } -static int amd64_init_one_instance(struct pci_dev *F2) +static int init_one_instance(struct pci_dev *F2) { struct amd64_pvt *pvt = NULL; struct amd64_family_type *fam_type = NULL; struct mem_ctl_info *mci = NULL; struct edac_mc_layer layers[2]; int err = 0, ret; - u8 nid = get_node_id(F2); + u16 nid = amd_get_node_id(F2); ret = -ENOMEM; pvt = kzalloc(sizeof(struct amd64_pvt), GFP_KERNEL); @@ -2485,7 +2637,7 @@ static int amd64_init_one_instance(struct pci_dev *F2) pvt->F2 = F2; ret = -EINVAL; - fam_type = amd64_per_family_init(pvt); + fam_type = per_family_init(pvt); if (!fam_type) goto err_free; @@ -2511,15 +2663,21 @@ static int amd64_init_one_instance(struct pci_dev *F2) layers[0].size = pvt->csels[0].b_cnt; layers[0].is_virt_csrow = true; layers[1].type = EDAC_MC_LAYER_CHANNEL; - layers[1].size = pvt->channel_count; + + /* + * Always allocate two channels since we can have setups with DIMMs on + * only one channel. Also, this simplifies handling later for the price + * of a couple of KBs tops. + */ + layers[1].size = 2; layers[1].is_virt_csrow = false; + mci = edac_mc_alloc(nid, ARRAY_SIZE(layers), layers, 0); if (!mci) goto err_siblings; mci->pvt_info = pvt; mci->pdev = &pvt->F2->dev; - mci->csbased = 1; setup_mci_misc_attrs(mci, fam_type); @@ -2540,7 +2698,7 @@ static int amd64_init_one_instance(struct pci_dev *F2) if (report_gart_errors) amd_report_gart_errors(true); - amd_register_ecc_decoder(amd64_decode_bus_error); + amd_register_ecc_decoder(decode_bus_error); mcis[nid] = mci; @@ -2563,10 +2721,10 @@ err_ret: return ret; } -static int amd64_probe_one_instance(struct pci_dev *pdev, - const struct pci_device_id *mc_type) +static int probe_one_instance(struct pci_dev *pdev, + const struct pci_device_id *mc_type) { - u8 nid = get_node_id(pdev); + u16 nid = amd_get_node_id(pdev); struct pci_dev *F3 = node_to_amd_nb(nid)->misc; struct ecc_settings *s; int ret = 0; @@ -2596,7 +2754,7 @@ static int amd64_probe_one_instance(struct pci_dev *pdev, goto err_enable; } - ret = amd64_init_one_instance(pdev); + ret = init_one_instance(pdev); if (ret < 0) { amd64_err("Error probing instance: %d\n", nid); restore_ecc_error_reporting(s, nid, F3); @@ -2612,15 +2770,17 @@ err_out: return ret; } -static void amd64_remove_one_instance(struct pci_dev *pdev) +static void remove_one_instance(struct pci_dev *pdev) { struct mem_ctl_info *mci; struct amd64_pvt *pvt; - u8 nid = get_node_id(pdev); + u16 nid = amd_get_node_id(pdev); struct pci_dev *F3 = node_to_amd_nb(nid)->misc; struct ecc_settings *s = ecc_stngs[nid]; mci = find_mci_by_dev(&pdev->dev); + WARN_ON(!mci); + del_mc_sysfs_attrs(mci); /* Remove from EDAC CORE tracking list */ mci = edac_mc_del_mc(&pdev->dev); @@ -2635,7 +2795,7 @@ static void amd64_remove_one_instance(struct pci_dev *pdev) /* unregister from EDAC MCE */ amd_report_gart_errors(false); - amd_unregister_ecc_decoder(amd64_decode_bus_error); + amd_unregister_ecc_decoder(decode_bus_error); kfree(ecc_stngs[nid]); ecc_stngs[nid] = NULL; @@ -2653,7 +2813,7 @@ static void amd64_remove_one_instance(struct pci_dev *pdev) * PCI core identifies what devices are on a system during boot, and then * inquiry this table to see if this driver is for a given device found. */ -static DEFINE_PCI_DEVICE_TABLE(amd64_pci_table) = { +static const struct pci_device_id amd64_pci_table[] = { { .vendor = PCI_VENDOR_ID_AMD, .device = PCI_DEVICE_ID_AMD_K8_NB_MEMCTL, @@ -2678,6 +2838,30 @@ static DEFINE_PCI_DEVICE_TABLE(amd64_pci_table) = { .class = 0, .class_mask = 0, }, + { + .vendor = PCI_VENDOR_ID_AMD, + .device = PCI_DEVICE_ID_AMD_15H_M30H_NB_F2, + .subvendor = PCI_ANY_ID, + .subdevice = PCI_ANY_ID, + .class = 0, + .class_mask = 0, + }, + { + .vendor = PCI_VENDOR_ID_AMD, + .device = PCI_DEVICE_ID_AMD_16H_NB_F2, + .subvendor = PCI_ANY_ID, + .subdevice = PCI_ANY_ID, + .class = 0, + .class_mask = 0, + }, + { + .vendor = PCI_VENDOR_ID_AMD, + .device = PCI_DEVICE_ID_AMD_16H_M30H_NB_F2, + .subvendor = PCI_ANY_ID, + .subdevice = PCI_ANY_ID, + .class = 0, + .class_mask = 0, + }, {0, } }; @@ -2685,8 +2869,8 @@ MODULE_DEVICE_TABLE(pci, amd64_pci_table); static struct pci_driver amd64_pci_driver = { .name = EDAC_MOD_STR, - .probe = amd64_probe_one_instance, - .remove = amd64_remove_one_instance, + .probe = probe_one_instance, + .remove = remove_one_instance, .id_table = amd64_pci_table, }; @@ -2695,23 +2879,18 @@ static void setup_pci_device(void) struct mem_ctl_info *mci; struct amd64_pvt *pvt; - if (amd64_ctl_pci) + if (pci_ctl) return; mci = mcis[0]; - if (mci) { - - pvt = mci->pvt_info; - amd64_ctl_pci = - edac_pci_create_generic_ctl(&pvt->F2->dev, EDAC_MOD_STR); - - if (!amd64_ctl_pci) { - pr_warning("%s(): Unable to create PCI control\n", - __func__); + if (!mci) + return; - pr_warning("%s(): PCI error report via EDAC not set\n", - __func__); - } + pvt = mci->pvt_info; + pci_ctl = edac_pci_create_generic_ctl(&pvt->F2->dev, EDAC_MOD_STR); + if (!pci_ctl) { + pr_warn("%s(): Unable to create PCI control\n", __func__); + pr_warn("%s(): PCI error report via EDAC not set\n", __func__); } } @@ -2767,8 +2946,8 @@ err_ret: static void __exit amd64_edac_exit(void) { - if (amd64_ctl_pci) - edac_pci_release_generic_ctl(amd64_ctl_pci); + if (pci_ctl) + edac_pci_release_generic_ctl(pci_ctl); pci_unregister_driver(&amd64_pci_driver); diff --git a/drivers/edac/amd64_edac.h b/drivers/edac/amd64_edac.h index e864f407806..d903e0c2114 100644 --- a/drivers/edac/amd64_edac.h +++ b/drivers/edac/amd64_edac.h @@ -160,19 +160,16 @@ #define OFF false /* - * Create a contiguous bitmask starting at bit position @lo and ending at - * position @hi. For example - * - * GENMASK(21, 39) gives us the 64bit vector 0x000000ffffe00000. - */ -#define GENMASK(lo, hi) (((1ULL << ((hi) - (lo) + 1)) - 1) << (lo)) - -/* * PCI-defined configuration space registers */ +#define PCI_DEVICE_ID_AMD_15H_M30H_NB_F1 0x141b +#define PCI_DEVICE_ID_AMD_15H_M30H_NB_F2 0x141c #define PCI_DEVICE_ID_AMD_15H_NB_F1 0x1601 #define PCI_DEVICE_ID_AMD_15H_NB_F2 0x1602 - +#define PCI_DEVICE_ID_AMD_16H_NB_F1 0x1531 +#define PCI_DEVICE_ID_AMD_16H_NB_F2 0x1532 +#define PCI_DEVICE_ID_AMD_16H_M30H_NB_F1 0x1581 +#define PCI_DEVICE_ID_AMD_16H_M30H_NB_F2 0x1582 /* * Function 1 - Address Map @@ -180,13 +177,22 @@ #define DRAM_BASE_LO 0x40 #define DRAM_LIMIT_LO 0x44 -#define dram_intlv_en(pvt, i) ((u8)((pvt->ranges[i].base.lo >> 8) & 0x7)) +/* + * F15 M30h D18F1x2[1C:00] + */ +#define DRAM_CONT_BASE 0x200 +#define DRAM_CONT_LIMIT 0x204 + +/* + * F15 M30h D18F1x2[4C:40] + */ +#define DRAM_CONT_HIGH_OFF 0x240 + #define dram_rw(pvt, i) ((u8)(pvt->ranges[i].base.lo & 0x3)) #define dram_intlv_sel(pvt, i) ((u8)((pvt->ranges[i].lim.lo >> 8) & 0x7)) #define dram_dst_node(pvt, i) ((u8)(pvt->ranges[i].lim.lo & 0x7)) #define DHAR 0xf0 -#define dhar_valid(pvt) ((pvt)->dhar & BIT(0)) #define dhar_mem_hoist_valid(pvt) ((pvt)->dhar & BIT(1)) #define dhar_base(pvt) ((pvt)->dhar & 0xff000000) #define k8_dhar_offset(pvt) (((pvt)->dhar & 0x0000ff00) << 16) @@ -233,8 +239,6 @@ #define DDR3_MODE BIT(8) #define DCT_SEL_LO 0x110 -#define dct_sel_baseaddr(pvt) ((pvt)->dct_sel_lo & 0xFFFFF800) -#define dct_sel_interleave_addr(pvt) (((pvt)->dct_sel_lo >> 6) & 0x3) #define dct_high_range_enabled(pvt) ((pvt)->dct_sel_lo & BIT(0)) #define dct_interleave_enabled(pvt) ((pvt)->dct_sel_lo & BIT(2)) @@ -292,16 +296,13 @@ /* MSRs */ #define MSR_MCGCTL_NBE BIT(4) -/* AMD sets the first MC device at device ID 0x18. */ -static inline u8 get_node_id(struct pci_dev *pdev) -{ - return PCI_SLOT(pdev->devfn) - 0x18; -} - enum amd_families { K8_CPUS = 0, F10_CPUS, F15_CPUS, + F15_M30H_CPUS, + F16_CPUS, + F16_M30H_CPUS, NUM_FAMILIES, }; @@ -340,7 +341,11 @@ struct amd64_pvt { /* pci_device handles which we utilize */ struct pci_dev *F1, *F2, *F3; - unsigned mc_node_id; /* MC index of this MC node */ + u16 mc_node_id; /* MC index of this MC node */ + u8 fam; /* CPU family */ + u8 model; /* ... model */ + u8 stepping; /* ... stepping */ + int ext_model; /* extended model value of this node */ int channel_count; @@ -393,7 +398,7 @@ struct err_info { u32 offset; }; -static inline u64 get_dram_base(struct amd64_pvt *pvt, unsigned i) +static inline u64 get_dram_base(struct amd64_pvt *pvt, u8 i) { u64 addr = ((u64)pvt->ranges[i].base.lo & 0xffff0000) << 8; @@ -403,7 +408,7 @@ static inline u64 get_dram_base(struct amd64_pvt *pvt, unsigned i) return (((u64)pvt->ranges[i].base.hi & 0x000000ff) << 40) | addr; } -static inline u64 get_dram_limit(struct amd64_pvt *pvt, unsigned i) +static inline u64 get_dram_limit(struct amd64_pvt *pvt, u8 i) { u64 lim = (((u64)pvt->ranges[i].lim.lo & 0xffff0000) << 8) | 0x00ffffff; @@ -418,6 +423,14 @@ static inline u16 extract_syndrome(u64 status) return ((status >> 47) & 0xff) | ((status >> 16) & 0xff00); } +static inline u8 dct_sel_interleave_addr(struct amd64_pvt *pvt) +{ + if (pvt->fam == 0x15 && pvt->model >= 0x30) + return (((pvt->dct_sel_hi >> 9) & 0x1) << 2) | + ((pvt->dct_sel_lo >> 6) & 0x3); + + return ((pvt)->dct_sel_lo >> 6) & 0x3; +} /* * per-node ECC settings descriptor */ @@ -508,3 +521,33 @@ static inline void enable_caches(void *dummy) { write_cr0(read_cr0() & ~X86_CR0_CD); } + +static inline u8 dram_intlv_en(struct amd64_pvt *pvt, unsigned int i) +{ + if (pvt->fam == 0x15 && pvt->model >= 0x30) { + u32 tmp; + amd64_read_pci_cfg(pvt->F1, DRAM_CONT_LIMIT, &tmp); + return (u8) tmp & 0xF; + } + return (u8) (pvt->ranges[i].base.lo >> 8) & 0x7; +} + +static inline u8 dhar_valid(struct amd64_pvt *pvt) +{ + if (pvt->fam == 0x15 && pvt->model >= 0x30) { + u32 tmp; + amd64_read_pci_cfg(pvt->F1, DRAM_CONT_BASE, &tmp); + return (tmp >> 1) & BIT(0); + } + return (pvt)->dhar & BIT(0); +} + +static inline u32 dct_sel_baseaddr(struct amd64_pvt *pvt) +{ + if (pvt->fam == 0x15 && pvt->model >= 0x30) { + u32 tmp; + amd64_read_pci_cfg(pvt->F1, DRAM_CONT_BASE, &tmp); + return (tmp >> 11) & 0x1FFF; + } + return (pvt)->dct_sel_lo & 0xFFFFF800; +} diff --git a/drivers/edac/amd64_edac_inj.c b/drivers/edac/amd64_edac_inj.c index 8c171fa1cb9..0d66ae68d46 100644 --- a/drivers/edac/amd64_edac_inj.c +++ b/drivers/edac/amd64_edac_inj.c @@ -24,7 +24,7 @@ static ssize_t amd64_inject_section_store(struct device *dev, unsigned long value; int ret; - ret = strict_strtoul(data, 10, &value); + ret = kstrtoul(data, 10, &value); if (ret < 0) return ret; @@ -61,7 +61,7 @@ static ssize_t amd64_inject_word_store(struct device *dev, unsigned long value; int ret; - ret = strict_strtoul(data, 10, &value); + ret = kstrtoul(data, 10, &value); if (ret < 0) return ret; @@ -97,7 +97,7 @@ static ssize_t amd64_inject_ecc_vector_store(struct device *dev, unsigned long value; int ret; - ret = strict_strtoul(data, 16, &value); + ret = kstrtoul(data, 16, &value); if (ret < 0) return ret; @@ -124,7 +124,7 @@ static ssize_t amd64_inject_read_store(struct device *dev, u32 section, word_bits; int ret; - ret = strict_strtoul(data, 10, &value); + ret = kstrtoul(data, 10, &value); if (ret < 0) return ret; @@ -157,7 +157,7 @@ static ssize_t amd64_inject_write_store(struct device *dev, unsigned long value; int ret; - ret = strict_strtoul(data, 10, &value); + ret = kstrtoul(data, 10, &value); if (ret < 0) return ret; @@ -202,9 +202,9 @@ static DEVICE_ATTR(inject_word, S_IRUGO | S_IWUSR, amd64_inject_word_show, amd64_inject_word_store); static DEVICE_ATTR(inject_ecc_vector, S_IRUGO | S_IWUSR, amd64_inject_ecc_vector_show, amd64_inject_ecc_vector_store); -static DEVICE_ATTR(inject_write, S_IRUGO | S_IWUSR, +static DEVICE_ATTR(inject_write, S_IWUSR, NULL, amd64_inject_write_store); -static DEVICE_ATTR(inject_read, S_IRUGO | S_IWUSR, +static DEVICE_ATTR(inject_read, S_IWUSR, NULL, amd64_inject_read_store); diff --git a/drivers/edac/amd76x_edac.c b/drivers/edac/amd76x_edac.c index 96e3ee3460a..3a501b530e1 100644 --- a/drivers/edac/amd76x_edac.c +++ b/drivers/edac/amd76x_edac.c @@ -333,7 +333,7 @@ static void amd76x_remove_one(struct pci_dev *pdev) edac_mc_free(mci); } -static DEFINE_PCI_DEVICE_TABLE(amd76x_pci_tbl) = { +static const struct pci_device_id amd76x_pci_tbl[] = { { PCI_VEND_DEV(AMD, FE_GATE_700C), PCI_ANY_ID, PCI_ANY_ID, 0, 0, AMD762}, diff --git a/drivers/edac/amd8111_edac.c b/drivers/edac/amd8111_edac.c index ddd890052ce..2b63f7c2d6d 100644 --- a/drivers/edac/amd8111_edac.c +++ b/drivers/edac/amd8111_edac.c @@ -350,6 +350,7 @@ static int amd8111_dev_probe(struct pci_dev *dev, const struct pci_device_id *id) { struct amd8111_dev_info *dev_info = &amd8111_devices[id->driver_data]; + int ret = -ENODEV; dev_info->dev = pci_get_device(PCI_VENDOR_ID_AMD, dev_info->err_dev, NULL); @@ -359,16 +360,15 @@ static int amd8111_dev_probe(struct pci_dev *dev, "vendor %x, device %x, name %s\n", PCI_VENDOR_ID_AMD, dev_info->err_dev, dev_info->ctl_name); - return -ENODEV; + goto err; } if (pci_enable_device(dev_info->dev)) { - pci_dev_put(dev_info->dev); printk(KERN_ERR "failed to enable:" "vendor %x, device %x, name %s\n", PCI_VENDOR_ID_AMD, dev_info->err_dev, dev_info->ctl_name); - return -ENODEV; + goto err_dev_put; } /* @@ -381,8 +381,10 @@ static int amd8111_dev_probe(struct pci_dev *dev, edac_device_alloc_ctl_info(0, dev_info->ctl_name, 1, NULL, 0, 0, NULL, 0, dev_info->edac_idx); - if (!dev_info->edac_dev) - return -ENOMEM; + if (!dev_info->edac_dev) { + ret = -ENOMEM; + goto err_dev_put; + } dev_info->edac_dev->pvt_info = dev_info; dev_info->edac_dev->dev = &dev_info->dev->dev; @@ -399,8 +401,7 @@ static int amd8111_dev_probe(struct pci_dev *dev, if (edac_device_add_device(dev_info->edac_dev) > 0) { printk(KERN_ERR "failed to add edac_dev for %s\n", dev_info->ctl_name); - edac_device_free_ctl_info(dev_info->edac_dev); - return -ENODEV; + goto err_edac_free_ctl; } printk(KERN_INFO "added one edac_dev on AMD8111 " @@ -409,6 +410,13 @@ static int amd8111_dev_probe(struct pci_dev *dev, dev_info->ctl_name); return 0; + +err_edac_free_ctl: + edac_device_free_ctl_info(dev_info->edac_dev); +err_dev_put: + pci_dev_put(dev_info->dev); +err: + return ret; } static void amd8111_dev_remove(struct pci_dev *dev) @@ -437,6 +445,7 @@ static int amd8111_pci_probe(struct pci_dev *dev, const struct pci_device_id *id) { struct amd8111_pci_info *pci_info = &amd8111_pcis[id->driver_data]; + int ret = -ENODEV; pci_info->dev = pci_get_device(PCI_VENDOR_ID_AMD, pci_info->err_dev, NULL); @@ -446,16 +455,15 @@ static int amd8111_pci_probe(struct pci_dev *dev, "vendor %x, device %x, name %s\n", PCI_VENDOR_ID_AMD, pci_info->err_dev, pci_info->ctl_name); - return -ENODEV; + goto err; } if (pci_enable_device(pci_info->dev)) { - pci_dev_put(pci_info->dev); printk(KERN_ERR "failed to enable:" "vendor %x, device %x, name %s\n", PCI_VENDOR_ID_AMD, pci_info->err_dev, pci_info->ctl_name); - return -ENODEV; + goto err_dev_put; } /* @@ -465,8 +473,10 @@ static int amd8111_pci_probe(struct pci_dev *dev, */ pci_info->edac_idx = edac_pci_alloc_index(); pci_info->edac_dev = edac_pci_alloc_ctl_info(0, pci_info->ctl_name); - if (!pci_info->edac_dev) - return -ENOMEM; + if (!pci_info->edac_dev) { + ret = -ENOMEM; + goto err_dev_put; + } pci_info->edac_dev->pvt_info = pci_info; pci_info->edac_dev->dev = &pci_info->dev->dev; @@ -483,8 +493,7 @@ static int amd8111_pci_probe(struct pci_dev *dev, if (edac_pci_add_device(pci_info->edac_dev, pci_info->edac_idx) > 0) { printk(KERN_ERR "failed to add edac_pci for %s\n", pci_info->ctl_name); - edac_pci_free_ctl_info(pci_info->edac_dev); - return -ENODEV; + goto err_edac_free_ctl; } printk(KERN_INFO "added one edac_pci on AMD8111 " @@ -493,6 +502,13 @@ static int amd8111_pci_probe(struct pci_dev *dev, pci_info->ctl_name); return 0; + +err_edac_free_ctl: + edac_pci_free_ctl_info(pci_info->edac_dev); +err_dev_put: + pci_dev_put(pci_info->dev); +err: + return ret; } static void amd8111_pci_remove(struct pci_dev *dev) diff --git a/drivers/edac/cell_edac.c b/drivers/edac/cell_edac.c index c2eaf334b90..374b57fc596 100644 --- a/drivers/edac/cell_edac.c +++ b/drivers/edac/cell_edac.c @@ -15,6 +15,7 @@ #include <linux/platform_device.h> #include <linux/stop_machine.h> #include <linux/io.h> +#include <linux/of_address.h> #include <asm/machdep.h> #include <asm/cell-regs.h> @@ -162,6 +163,7 @@ static void cell_edac_init_csrows(struct mem_ctl_info *mci) csrow->first_page, nr_pages); break; } + of_node_put(np); } static int cell_edac_probe(struct platform_device *pdev) diff --git a/drivers/edac/cpc925_edac.c b/drivers/edac/cpc925_edac.c index 7f3c57113ba..df6575f1430 100644 --- a/drivers/edac/cpc925_edac.c +++ b/drivers/edac/cpc925_edac.c @@ -789,7 +789,7 @@ static struct cpc925_dev_info cpc925_devs[] = { .exit = cpc925_htlink_exit, .check = cpc925_htlink_check, }, - {0}, /* Terminated by NULL */ + { } }; /* diff --git a/drivers/edac/e752x_edac.c b/drivers/edac/e752x_edac.c index 644fec54681..b2d71388172 100644 --- a/drivers/edac/e752x_edac.c +++ b/drivers/edac/e752x_edac.c @@ -209,7 +209,6 @@ enum e752x_chips { */ struct e752x_pvt { - struct pci_dev *bridge_ck; struct pci_dev *dev_d0f0; struct pci_dev *dev_d0f1; u32 tolm; @@ -891,7 +890,7 @@ static void e752x_get_error_info(struct mem_ctl_info *mci, info->buf_ferr); if (info->dram_ferr) - pci_write_bits16(pvt->bridge_ck, E752X_DRAM_FERR, + pci_write_bits16(pvt->dev_d0f1, E752X_DRAM_FERR, info->dram_ferr, info->dram_ferr); pci_write_config_dword(dev, E752X_FERR_GLOBAL, @@ -936,7 +935,7 @@ static void e752x_get_error_info(struct mem_ctl_info *mci, info->buf_nerr); if (info->dram_nerr) - pci_write_bits16(pvt->bridge_ck, E752X_DRAM_NERR, + pci_write_bits16(pvt->dev_d0f1, E752X_DRAM_NERR, info->dram_nerr, info->dram_nerr); pci_write_config_dword(dev, E752X_NERR_GLOBAL, @@ -1177,36 +1176,33 @@ static void e752x_init_mem_map_table(struct pci_dev *pdev, static int e752x_get_devs(struct pci_dev *pdev, int dev_idx, struct e752x_pvt *pvt) { - struct pci_dev *dev; - - pvt->bridge_ck = pci_get_device(PCI_VENDOR_ID_INTEL, - pvt->dev_info->err_dev, pvt->bridge_ck); + pvt->dev_d0f1 = pci_get_device(PCI_VENDOR_ID_INTEL, + pvt->dev_info->err_dev, NULL); - if (pvt->bridge_ck == NULL) - pvt->bridge_ck = pci_scan_single_device(pdev->bus, + if (pvt->dev_d0f1 == NULL) { + pvt->dev_d0f1 = pci_scan_single_device(pdev->bus, PCI_DEVFN(0, 1)); + pci_dev_get(pvt->dev_d0f1); + } - if (pvt->bridge_ck == NULL) { + if (pvt->dev_d0f1 == NULL) { e752x_printk(KERN_ERR, "error reporting device not found:" "vendor %x device 0x%x (broken BIOS?)\n", PCI_VENDOR_ID_INTEL, e752x_devs[dev_idx].err_dev); return 1; } - dev = pci_get_device(PCI_VENDOR_ID_INTEL, + pvt->dev_d0f0 = pci_get_device(PCI_VENDOR_ID_INTEL, e752x_devs[dev_idx].ctl_dev, NULL); - if (dev == NULL) + if (pvt->dev_d0f0 == NULL) goto fail; - pvt->dev_d0f0 = dev; - pvt->dev_d0f1 = pci_dev_get(pvt->bridge_ck); - return 0; fail: - pci_dev_put(pvt->bridge_ck); + pci_dev_put(pvt->dev_d0f1); return 1; } @@ -1383,7 +1379,6 @@ static int e752x_probe1(struct pci_dev *pdev, int dev_idx) fail: pci_dev_put(pvt->dev_d0f0); pci_dev_put(pvt->dev_d0f1); - pci_dev_put(pvt->bridge_ck); edac_mc_free(mci); return -ENODEV; @@ -1417,11 +1412,10 @@ static void e752x_remove_one(struct pci_dev *pdev) pvt = (struct e752x_pvt *)mci->pvt_info; pci_dev_put(pvt->dev_d0f0); pci_dev_put(pvt->dev_d0f1); - pci_dev_put(pvt->bridge_ck); edac_mc_free(mci); } -static DEFINE_PCI_DEVICE_TABLE(e752x_pci_tbl) = { +static const struct pci_device_id e752x_pci_tbl[] = { { PCI_VEND_DEV(INTEL, 7520_0), PCI_ANY_ID, PCI_ANY_ID, 0, 0, E7520}, diff --git a/drivers/edac/e7xxx_edac.c b/drivers/edac/e7xxx_edac.c index 1c4056a5038..3cda79bc8b0 100644 --- a/drivers/edac/e7xxx_edac.c +++ b/drivers/edac/e7xxx_edac.c @@ -555,7 +555,7 @@ static void e7xxx_remove_one(struct pci_dev *pdev) edac_mc_free(mci); } -static DEFINE_PCI_DEVICE_TABLE(e7xxx_pci_tbl) = { +static const struct pci_device_id e7xxx_pci_tbl[] = { { PCI_VEND_DEV(INTEL, 7205_0), PCI_ANY_ID, PCI_ANY_ID, 0, 0, E7205}, diff --git a/drivers/edac/edac_core.h b/drivers/edac/edac_core.h index 23bb99fa44f..3c2625e7980 100644 --- a/drivers/edac/edac_core.h +++ b/drivers/edac/edac_core.h @@ -453,6 +453,11 @@ extern struct mem_ctl_info *find_mci_by_dev(struct device *dev); extern struct mem_ctl_info *edac_mc_del_mc(struct device *dev); extern int edac_mc_find_csrow_by_page(struct mem_ctl_info *mci, unsigned long page); + +void edac_raw_mc_handle_error(const enum hw_event_mc_err_type type, + struct mem_ctl_info *mci, + struct edac_raw_error_desc *e); + void edac_mc_handle_error(const enum hw_event_mc_err_type type, struct mem_ctl_info *mci, const u16 error_count, diff --git a/drivers/edac/edac_device.c b/drivers/edac/edac_device.c index 211021dfec7..592af5f0cf3 100644 --- a/drivers/edac/edac_device.c +++ b/drivers/edac/edac_device.c @@ -437,6 +437,9 @@ void edac_device_workq_teardown(struct edac_device_ctl_info *edac_dev) { int status; + if (!edac_dev->edac_check) + return; + status = cancel_delayed_work(&edac_dev->work); if (status == 0) { /* workq instance might be running, wait for it */ @@ -530,12 +533,9 @@ int edac_device_add_device(struct edac_device_ctl_info *edac_dev) /* Report action taken */ edac_device_printk(edac_dev, KERN_INFO, - "Giving out device to module '%s' controller " - "'%s': DEV '%s' (%s)\n", - edac_dev->mod_name, - edac_dev->ctl_name, - edac_dev_name(edac_dev), - edac_op_state_to_string(edac_dev->op_state)); + "Giving out device to module %s controller %s: DEV %s (%s)\n", + edac_dev->mod_name, edac_dev->ctl_name, edac_dev->dev_name, + edac_op_state_to_string(edac_dev->op_state)); mutex_unlock(&device_ctls_mutex); return 0; diff --git a/drivers/edac/edac_mc.c b/drivers/edac/edac_mc.c index 281f566a551..2c694b5297c 100644 --- a/drivers/edac/edac_mc.c +++ b/drivers/edac/edac_mc.c @@ -42,6 +42,14 @@ static DEFINE_MUTEX(mem_ctls_mutex); static LIST_HEAD(mc_devices); +/* + * Used to lock EDAC MC to just one module, avoiding two drivers e. g. + * apei/ghes and i7core_edac to be used at the same time. + */ +static void const *edac_mc_owner; + +static struct bus_type mc_bus[EDAC_MAX_MCS]; + unsigned edac_dimm_info_location(struct dimm_info *dimm, char *buf, unsigned len) { @@ -80,7 +88,7 @@ static void edac_mc_dump_dimm(struct dimm_info *dimm, int number) edac_dimm_info_location(dimm, location, sizeof(location)); edac_dbg(4, "%s%i: %smapped as virtual row %d, chan %d\n", - dimm->mci->mem_is_per_rank ? "rank" : "dimm", + dimm->mci->csbased ? "rank" : "dimm", number, location, dimm->csrow, dimm->cschannel); edac_dbg(4, " dimm = %p\n", dimm); edac_dbg(4, " dimm->label = '%s'\n", dimm->label); @@ -335,12 +343,12 @@ struct mem_ctl_info *edac_mc_alloc(unsigned mc_num, memcpy(mci->layers, layers, sizeof(*layer) * n_layers); mci->nr_csrows = tot_csrows; mci->num_cschannel = tot_channels; - mci->mem_is_per_rank = per_rank; + mci->csbased = per_rank; /* * Alocate and fill the csrow/channels structs */ - mci->csrows = kcalloc(sizeof(*mci->csrows), tot_csrows, GFP_KERNEL); + mci->csrows = kcalloc(tot_csrows, sizeof(*mci->csrows), GFP_KERNEL); if (!mci->csrows) goto error; for (row = 0; row < tot_csrows; row++) { @@ -351,7 +359,7 @@ struct mem_ctl_info *edac_mc_alloc(unsigned mc_num, csr->csrow_idx = row; csr->mci = mci; csr->nr_channels = tot_channels; - csr->channels = kcalloc(sizeof(*csr->channels), tot_channels, + csr->channels = kcalloc(tot_channels, sizeof(*csr->channels), GFP_KERNEL); if (!csr->channels) goto error; @@ -369,7 +377,7 @@ struct mem_ctl_info *edac_mc_alloc(unsigned mc_num, /* * Allocate and fill the dimm structs */ - mci->dimms = kcalloc(sizeof(*mci->dimms), tot_dimms, GFP_KERNEL); + mci->dimms = kcalloc(tot_dimms, sizeof(*mci->dimms), GFP_KERNEL); if (!mci->dimms) goto error; @@ -441,13 +449,6 @@ struct mem_ctl_info *edac_mc_alloc(unsigned mc_num, mci->op_state = OP_ALLOC; - /* at this point, the root kobj is valid, and in order to - * 'free' the object, then the function: - * edac_mc_unregister_sysfs_main_kobj() must be called - * which will perform kobj unregistration and the actual free - * will occur during the kobject callback operation - */ - return mci; error: @@ -558,7 +559,8 @@ static void edac_mc_workq_function(struct work_struct *work_req) * * called with the mem_ctls_mutex held */ -static void edac_mc_workq_setup(struct mem_ctl_info *mci, unsigned msec) +static void edac_mc_workq_setup(struct mem_ctl_info *mci, unsigned msec, + bool init) { edac_dbg(0, "\n"); @@ -566,7 +568,9 @@ static void edac_mc_workq_setup(struct mem_ctl_info *mci, unsigned msec) if (mci->op_state != OP_RUNNING_POLL) return; - INIT_DELAYED_WORK(&mci->work, edac_mc_workq_function); + if (init) + INIT_DELAYED_WORK(&mci->work, edac_mc_workq_function); + mod_delayed_work(edac_workqueue, &mci->work, msecs_to_jiffies(msec)); } @@ -600,7 +604,7 @@ static void edac_mc_workq_teardown(struct mem_ctl_info *mci) * user space has updated our poll period value, need to * reset our workq delays */ -void edac_mc_reset_delay_period(int value) +void edac_mc_reset_delay_period(unsigned long value) { struct mem_ctl_info *mci; struct list_head *item; @@ -610,7 +614,7 @@ void edac_mc_reset_delay_period(int value) list_for_each(item, &mc_devices) { mci = list_entry(item, struct mem_ctl_info, link); - edac_mc_workq_setup(mci, (unsigned long) value); + edac_mc_workq_setup(mci, value, false); } mutex_unlock(&mem_ctls_mutex); @@ -666,9 +670,9 @@ fail1: return 1; } -static void del_mc_from_global_list(struct mem_ctl_info *mci) +static int del_mc_from_global_list(struct mem_ctl_info *mci) { - atomic_dec(&edac_handlers); + int handlers = atomic_dec_return(&edac_handlers); list_del_rcu(&mci->link); /* these are for safe removal of devices from global list while @@ -676,6 +680,8 @@ static void del_mc_from_global_list(struct mem_ctl_info *mci) */ synchronize_rcu(); INIT_LIST_HEAD(&mci->link); + + return handlers; } /** @@ -719,8 +725,14 @@ EXPORT_SYMBOL(edac_mc_find); /* FIXME - should a warning be printed if no error detection? correction? */ int edac_mc_add_mc(struct mem_ctl_info *mci) { + int ret = -EINVAL; edac_dbg(0, "\n"); + if (mci->mc_idx >= EDAC_MAX_MCS) { + pr_warn_once("Too many memory controllers: %d\n", mci->mc_idx); + return -ENODEV; + } + #ifdef CONFIG_EDAC_DEBUG if (edac_debug_level >= 3) edac_mc_dump_mci(mci); @@ -749,12 +761,19 @@ int edac_mc_add_mc(struct mem_ctl_info *mci) #endif mutex_lock(&mem_ctls_mutex); + if (edac_mc_owner && edac_mc_owner != mci->mod_name) { + ret = -EPERM; + goto fail0; + } + if (add_mc_to_global_list(mci)) goto fail0; /* set load time so that error rate can be tracked */ mci->start_time = jiffies; + mci->bus = &mc_bus[mci->mc_idx]; + if (edac_create_sysfs_mci_device(mci)) { edac_mc_printk(mci, KERN_WARNING, "failed to create sysfs device\n"); @@ -766,14 +785,18 @@ int edac_mc_add_mc(struct mem_ctl_info *mci) /* This instance is NOW RUNNING */ mci->op_state = OP_RUNNING_POLL; - edac_mc_workq_setup(mci, edac_mc_get_poll_msec()); + edac_mc_workq_setup(mci, edac_mc_get_poll_msec(), true); } else { mci->op_state = OP_RUNNING_INTERRUPT; } /* Report action taken */ - edac_mc_printk(mci, KERN_INFO, "Giving out device to '%s' '%s':" - " DEV %s\n", mci->mod_name, mci->ctl_name, edac_dev_name(mci)); + edac_mc_printk(mci, KERN_INFO, + "Giving out device to module %s controller %s: DEV %s (%s)\n", + mci->mod_name, mci->ctl_name, mci->dev_name, + edac_op_state_to_string(mci->op_state)); + + edac_mc_owner = mci->mod_name; mutex_unlock(&mem_ctls_mutex); return 0; @@ -783,7 +806,7 @@ fail1: fail0: mutex_unlock(&mem_ctls_mutex); - return 1; + return ret; } EXPORT_SYMBOL_GPL(edac_mc_add_mc); @@ -809,7 +832,8 @@ struct mem_ctl_info *edac_mc_del_mc(struct device *dev) return NULL; } - del_mc_from_global_list(mci); + if (!del_mc_from_global_list(mci)) + edac_mc_owner = NULL; mutex_unlock(&mem_ctls_mutex); /* flush workq processes */ @@ -907,6 +931,7 @@ const char *edac_layer_name[] = { [EDAC_MC_LAYER_CHANNEL] = "channel", [EDAC_MC_LAYER_SLOT] = "slot", [EDAC_MC_LAYER_CHIP_SELECT] = "csrow", + [EDAC_MC_LAYER_ALL_MEM] = "memory", }; EXPORT_SYMBOL_GPL(edac_layer_name); @@ -993,7 +1018,7 @@ static void edac_ce_error(struct mem_ctl_info *mci, } edac_inc_ce_error(mci, enable_per_layer_report, pos, error_count); - if (mci->scrub_mode & SCRUB_SW_SRC) { + if (mci->scrub_mode == SCRUB_SW_SRC) { /* * Some memory controllers (called MCs below) can remap * memory so that it is still available at a different @@ -1054,7 +1079,46 @@ static void edac_ue_error(struct mem_ctl_info *mci, edac_inc_ue_error(mci, enable_per_layer_report, pos, error_count); } -#define OTHER_LABEL " or " +/** + * edac_raw_mc_handle_error - reports a memory event to userspace without doing + * anything to discover the error location + * + * @type: severity of the error (CE/UE/Fatal) + * @mci: a struct mem_ctl_info pointer + * @e: error description + * + * This raw function is used internally by edac_mc_handle_error(). It should + * only be called directly when the hardware error come directly from BIOS, + * like in the case of APEI GHES driver. + */ +void edac_raw_mc_handle_error(const enum hw_event_mc_err_type type, + struct mem_ctl_info *mci, + struct edac_raw_error_desc *e) +{ + char detail[80]; + int pos[EDAC_MAX_LAYERS] = { e->top_layer, e->mid_layer, e->low_layer }; + + /* Memory type dependent details about the error */ + if (type == HW_EVENT_ERR_CORRECTED) { + snprintf(detail, sizeof(detail), + "page:0x%lx offset:0x%lx grain:%ld syndrome:0x%lx", + e->page_frame_number, e->offset_in_page, + e->grain, e->syndrome); + edac_ce_error(mci, e->error_count, pos, e->msg, e->location, e->label, + detail, e->other_detail, e->enable_per_layer_report, + e->page_frame_number, e->offset_in_page, e->grain); + } else { + snprintf(detail, sizeof(detail), + "page:0x%lx offset:0x%lx grain:%ld", + e->page_frame_number, e->offset_in_page, e->grain); + + edac_ue_error(mci, e->error_count, pos, e->msg, e->location, e->label, + detail, e->other_detail, e->enable_per_layer_report); + } + + +} +EXPORT_SYMBOL_GPL(edac_raw_mc_handle_error); /** * edac_mc_handle_error - reports a memory event to userspace @@ -1086,19 +1150,27 @@ void edac_mc_handle_error(const enum hw_event_mc_err_type type, const char *msg, const char *other_detail) { - /* FIXME: too much for stack: move it to some pre-alocated area */ - char detail[80], location[80]; - char label[(EDAC_MC_LABEL_LEN + 1 + sizeof(OTHER_LABEL)) * mci->tot_dimms]; char *p; int row = -1, chan = -1; int pos[EDAC_MAX_LAYERS] = { top_layer, mid_layer, low_layer }; - int i; - long grain; - bool enable_per_layer_report = false; + int i, n_labels = 0; u8 grain_bits; + struct edac_raw_error_desc *e = &mci->error_desc; edac_dbg(3, "MC%d\n", mci->mc_idx); + /* Fills the error report buffer */ + memset(e, 0, sizeof (*e)); + e->error_count = error_count; + e->top_layer = top_layer; + e->mid_layer = mid_layer; + e->low_layer = low_layer; + e->page_frame_number = page_frame_number; + e->offset_in_page = offset_in_page; + e->syndrome = syndrome; + e->msg = msg; + e->other_detail = other_detail; + /* * Check if the event report is consistent and if the memory * location is known. If it is known, enable_per_layer_report will be @@ -1121,7 +1193,7 @@ void edac_mc_handle_error(const enum hw_event_mc_err_type type, pos[i] = -1; } if (pos[i] >= 0) - enable_per_layer_report = true; + e->enable_per_layer_report = true; } /* @@ -1135,8 +1207,7 @@ void edac_mc_handle_error(const enum hw_event_mc_err_type type, * where each memory belongs to a separate channel within the same * branch. */ - grain = 0; - p = label; + p = e->label; *p = '\0'; for (i = 0; i < mci->tot_dimms; i++) { @@ -1150,8 +1221,8 @@ void edac_mc_handle_error(const enum hw_event_mc_err_type type, continue; /* get the max grain, over the error match range */ - if (dimm->grain > grain) - grain = dimm->grain; + if (dimm->grain > e->grain) + e->grain = dimm->grain; /* * If the error is memory-controller wide, there's no need to @@ -1159,8 +1230,13 @@ void edac_mc_handle_error(const enum hw_event_mc_err_type type, * channel/memory controller/... may be affected. * Also, don't show errors for empty DIMM slots. */ - if (enable_per_layer_report && dimm->nr_pages) { - if (p != label) { + if (e->enable_per_layer_report && dimm->nr_pages) { + if (n_labels >= EDAC_MAX_LABELS) { + e->enable_per_layer_report = false; + break; + } + n_labels++; + if (p != e->label) { strcpy(p, OTHER_LABEL); p += strlen(OTHER_LABEL); } @@ -1173,7 +1249,7 @@ void edac_mc_handle_error(const enum hw_event_mc_err_type type, * incrementing the compat API counters */ edac_dbg(4, "%s csrows map: (%d,%d)\n", - mci->mem_is_per_rank ? "rank" : "dimm", + mci->csbased ? "rank" : "dimm", dimm->csrow, dimm->cschannel); if (row == -1) row = dimm->csrow; @@ -1187,12 +1263,12 @@ void edac_mc_handle_error(const enum hw_event_mc_err_type type, } } - if (!enable_per_layer_report) { - strcpy(label, "any memory"); + if (!e->enable_per_layer_report) { + strcpy(e->label, "any memory"); } else { edac_dbg(4, "csrow/channel to increment: (%d,%d)\n", row, chan); - if (p == label) - strcpy(label, "unknown memory"); + if (p == e->label) + strcpy(e->label, "unknown memory"); if (type == HW_EVENT_ERR_CORRECTED) { if (row >= 0) { mci->csrows[row]->ce_count += error_count; @@ -1205,7 +1281,7 @@ void edac_mc_handle_error(const enum hw_event_mc_err_type type, } /* Fill the RAM location data */ - p = location; + p = e->location; for (i = 0; i < mci->n_layers; i++) { if (pos[i] < 0) @@ -1215,32 +1291,16 @@ void edac_mc_handle_error(const enum hw_event_mc_err_type type, edac_layer_name[mci->layers[i].type], pos[i]); } - if (p > location) + if (p > e->location) *(p - 1) = '\0'; /* Report the error via the trace interface */ - grain_bits = fls_long(grain) + 1; - trace_mc_event(type, msg, label, error_count, - mci->mc_idx, top_layer, mid_layer, low_layer, - PAGES_TO_MiB(page_frame_number) | offset_in_page, - grain_bits, syndrome, other_detail); - - /* Memory type dependent details about the error */ - if (type == HW_EVENT_ERR_CORRECTED) { - snprintf(detail, sizeof(detail), - "page:0x%lx offset:0x%lx grain:%ld syndrome:0x%lx", - page_frame_number, offset_in_page, - grain, syndrome); - edac_ce_error(mci, error_count, pos, msg, location, label, - detail, other_detail, enable_per_layer_report, - page_frame_number, offset_in_page, grain); - } else { - snprintf(detail, sizeof(detail), - "page:0x%lx offset:0x%lx grain:%ld", - page_frame_number, offset_in_page, grain); + grain_bits = fls_long(e->grain) + 1; + trace_mc_event(type, e->msg, e->label, e->error_count, + mci->mc_idx, e->top_layer, e->mid_layer, e->low_layer, + PAGES_TO_MiB(e->page_frame_number) | e->offset_in_page, + grain_bits, e->syndrome, e->other_detail); - edac_ue_error(mci, error_count, pos, msg, location, label, - detail, other_detail, enable_per_layer_report); - } + edac_raw_mc_handle_error(type, mci, e); } EXPORT_SYMBOL_GPL(edac_mc_handle_error); diff --git a/drivers/edac/edac_mc_sysfs.c b/drivers/edac/edac_mc_sysfs.c index 0ca1ca71157..01fae8289cf 100644 --- a/drivers/edac/edac_mc_sysfs.c +++ b/drivers/edac/edac_mc_sysfs.c @@ -7,7 +7,7 @@ * * Written Doug Thompson <norsk5@xmission.com> www.softwarebitmaker.com * - * (c) 2012 - Mauro Carvalho Chehab <mchehab@redhat.com> + * (c) 2012-2013 - Mauro Carvalho Chehab * The entire API were re-written, and ported to use struct device * */ @@ -52,16 +52,20 @@ int edac_mc_get_poll_msec(void) static int edac_set_poll_msec(const char *val, struct kernel_param *kp) { - long l; + unsigned long l; int ret; if (!val) return -EINVAL; - ret = strict_strtol(val, 0, &l); - if (ret == -EINVAL || ((int)l != l)) + ret = kstrtoul(val, 0, &l); + if (ret) + return ret; + + if (l < 1000) return -EINVAL; - *((int *)kp->arg) = l; + + *((unsigned long *)kp->arg) = l; /* notify edac_mc engine to reset the poll period */ edac_mc_reset_delay_period(l); @@ -87,7 +91,7 @@ static struct device *mci_pdev; /* * various constants for Memory Controllers */ -static const char *mem_types[] = { +static const char * const mem_types[] = { [MEM_EMPTY] = "Empty", [MEM_RESERVED] = "Reserved", [MEM_UNKNOWN] = "Unknown", @@ -107,7 +111,7 @@ static const char *mem_types[] = { [MEM_RDDR3] = "Registered-DDR3" }; -static const char *dev_types[] = { +static const char * const dev_types[] = { [DEV_UNKNOWN] = "Unknown", [DEV_X1] = "x1", [DEV_X2] = "x2", @@ -118,7 +122,7 @@ static const char *dev_types[] = { [DEV_X64] = "x64" }; -static const char *edac_caps[] = { +static const char * const edac_caps[] = { [EDAC_UNKNOWN] = "Unknown", [EDAC_NONE] = "None", [EDAC_RESERVED] = "Reserved", @@ -143,7 +147,7 @@ static const char *edac_caps[] = { * and the per-dimm/per-rank one */ #define DEVICE_ATTR_LEGACY(_name, _mode, _show, _store) \ - struct device_attribute dev_attr_legacy_##_name = __ATTR(_name, _mode, _show, _store) + static struct device_attribute dev_attr_legacy_##_name = __ATTR(_name, _mode, _show, _store) struct dev_ch_attribute { struct device_attribute attr; @@ -180,9 +184,6 @@ static ssize_t csrow_size_show(struct device *dev, int i; u32 nr_pages = 0; - if (csrow->mci->csbased) - return sprintf(data, "%u\n", PAGES_TO_MiB(csrow->nr_pages)); - for (i = 0; i < csrow->nr_channels; i++) nr_pages += csrow->channels[i]->dimm->nr_pages; return sprintf(data, "%u\n", PAGES_TO_MiB(nr_pages)); @@ -330,17 +331,17 @@ static struct device_attribute *dynamic_csrow_dimm_attr[] = { }; /* possible dynamic channel ce_count attribute files */ -DEVICE_CHANNEL(ch0_ce_count, S_IRUGO | S_IWUSR, +DEVICE_CHANNEL(ch0_ce_count, S_IRUGO, channel_ce_count_show, NULL, 0); -DEVICE_CHANNEL(ch1_ce_count, S_IRUGO | S_IWUSR, +DEVICE_CHANNEL(ch1_ce_count, S_IRUGO, channel_ce_count_show, NULL, 1); -DEVICE_CHANNEL(ch2_ce_count, S_IRUGO | S_IWUSR, +DEVICE_CHANNEL(ch2_ce_count, S_IRUGO, channel_ce_count_show, NULL, 2); -DEVICE_CHANNEL(ch3_ce_count, S_IRUGO | S_IWUSR, +DEVICE_CHANNEL(ch3_ce_count, S_IRUGO, channel_ce_count_show, NULL, 3); -DEVICE_CHANNEL(ch4_ce_count, S_IRUGO | S_IWUSR, +DEVICE_CHANNEL(ch4_ce_count, S_IRUGO, channel_ce_count_show, NULL, 4); -DEVICE_CHANNEL(ch5_ce_count, S_IRUGO | S_IWUSR, +DEVICE_CHANNEL(ch5_ce_count, S_IRUGO, channel_ce_count_show, NULL, 5); /* Total possible dynamic ce_count attribute file table */ @@ -373,7 +374,7 @@ static int edac_create_csrow_object(struct mem_ctl_info *mci, return -ENODEV; csrow->dev.type = &csrow_attr_type; - csrow->dev.bus = &mci->bus; + csrow->dev.bus = mci->bus; device_initialize(&csrow->dev); csrow->dev.parent = &mci->dev; csrow->mci = mci; @@ -429,8 +430,12 @@ static int edac_create_csrow_objects(struct mem_ctl_info *mci) if (!nr_pages_per_csrow(csrow)) continue; err = edac_create_csrow_object(mci, mci->csrows[i], i); - if (err < 0) + if (err < 0) { + edac_dbg(1, + "failure: create csrow objects for csrow %d\n", + i); goto error; + } } return 0; @@ -604,11 +609,11 @@ static int edac_create_dimm_object(struct mem_ctl_info *mci, dimm->mci = mci; dimm->dev.type = &dimm_attr_type; - dimm->dev.bus = &mci->bus; + dimm->dev.bus = mci->bus; device_initialize(&dimm->dev); dimm->dev.parent = &mci->dev; - if (mci->mem_is_per_rank) + if (mci->csbased) dev_set_name(&dimm->dev, "rank%d", index); else dev_set_name(&dimm->dev, "dimm%d", index); @@ -677,10 +682,7 @@ static ssize_t mci_sdram_scrub_rate_store(struct device *dev, unsigned long bandwidth = 0; int new_bw = 0; - if (!mci->set_sdram_scrub_rate) - return -ENODEV; - - if (strict_strtoul(data, 10, &bandwidth) < 0) + if (kstrtoul(data, 10, &bandwidth) < 0) return -EINVAL; new_bw = mci->set_sdram_scrub_rate(mci, bandwidth); @@ -703,9 +705,6 @@ static ssize_t mci_sdram_scrub_rate_show(struct device *dev, struct mem_ctl_info *mci = to_mci(dev); int bandwidth = 0; - if (!mci->get_sdram_scrub_rate) - return -ENODEV; - bandwidth = mci->get_sdram_scrub_rate(mci); if (bandwidth < 0) { edac_printk(KERN_DEBUG, EDAC_MC, "Error reading scrub rate\n"); @@ -780,14 +779,10 @@ static ssize_t mci_size_mb_show(struct device *dev, for (csrow_idx = 0; csrow_idx < mci->nr_csrows; csrow_idx++) { struct csrow_info *csrow = mci->csrows[csrow_idx]; - if (csrow->mci->csbased) { - total_pages += csrow->nr_pages; - } else { - for (j = 0; j < csrow->nr_channels; j++) { - struct dimm_info *dimm = csrow->channels[j]->dimm; + for (j = 0; j < csrow->nr_channels; j++) { + struct dimm_info *dimm = csrow->channels[j]->dimm; - total_pages += dimm->nr_pages; - } + total_pages += dimm->nr_pages; } } @@ -866,8 +861,7 @@ DEVICE_ATTR(ce_count, S_IRUGO, mci_ce_count_show, NULL); DEVICE_ATTR(max_location, S_IRUGO, mci_max_location_show, NULL); /* memory scrubber attribute file */ -DEVICE_ATTR(sdram_scrub_rate, S_IRUGO | S_IWUSR, mci_sdram_scrub_rate_show, - mci_sdram_scrub_rate_store); +DEVICE_ATTR(sdram_scrub_rate, 0, NULL, NULL); static struct attribute *mci_attrs[] = { &dev_attr_reset_counters.attr, @@ -878,7 +872,6 @@ static struct attribute *mci_attrs[] = { &dev_attr_ce_noinfo_count.attr, &dev_attr_ue_count.attr, &dev_attr_ce_count.attr, - &dev_attr_sdram_scrub_rate.attr, &dev_attr_max_location.attr, NULL }; @@ -923,7 +916,7 @@ void __exit edac_debugfs_exit(void) debugfs_remove(edac_debugfs); } -int edac_create_debug_nodes(struct mem_ctl_info *mci) +static int edac_create_debug_nodes(struct mem_ctl_info *mci) { struct dentry *d, *parent; char name[80]; @@ -986,11 +979,13 @@ int edac_create_sysfs_mci_device(struct mem_ctl_info *mci) * The memory controller needs its own bus, in order to avoid * namespace conflicts at /sys/bus/edac. */ - mci->bus.name = kasprintf(GFP_KERNEL, "mc%d", mci->mc_idx); - if (!mci->bus.name) + mci->bus->name = kasprintf(GFP_KERNEL, "mc%d", mci->mc_idx); + if (!mci->bus->name) return -ENOMEM; - edac_dbg(0, "creating bus %s\n", mci->bus.name); - err = bus_register(&mci->bus); + + edac_dbg(0, "creating bus %s\n", mci->bus->name); + + err = bus_register(mci->bus); if (err < 0) return err; @@ -999,7 +994,7 @@ int edac_create_sysfs_mci_device(struct mem_ctl_info *mci) device_initialize(&mci->dev); mci->dev.parent = mci_pdev; - mci->dev.bus = &mci->bus; + mci->dev.bus = mci->bus; dev_set_name(&mci->dev, "mc%d", mci->mc_idx); dev_set_drvdata(&mci->dev, mci); pm_runtime_forbid(&mci->dev); @@ -1007,11 +1002,28 @@ int edac_create_sysfs_mci_device(struct mem_ctl_info *mci) edac_dbg(0, "creating device %s\n", dev_name(&mci->dev)); err = device_add(&mci->dev); if (err < 0) { - bus_unregister(&mci->bus); - kfree(mci->bus.name); + edac_dbg(1, "failure: create device %s\n", dev_name(&mci->dev)); + bus_unregister(mci->bus); + kfree(mci->bus->name); return err; } + if (mci->set_sdram_scrub_rate || mci->get_sdram_scrub_rate) { + if (mci->get_sdram_scrub_rate) { + dev_attr_sdram_scrub_rate.attr.mode |= S_IRUGO; + dev_attr_sdram_scrub_rate.show = &mci_sdram_scrub_rate_show; + } + if (mci->set_sdram_scrub_rate) { + dev_attr_sdram_scrub_rate.attr.mode |= S_IWUSR; + dev_attr_sdram_scrub_rate.store = &mci_sdram_scrub_rate_store; + } + err = device_create_file(&mci->dev, + &dev_attr_sdram_scrub_rate); + if (err) { + edac_dbg(1, "failure: create sdram_scrub_rate\n"); + goto fail2; + } + } /* * Create the dimm/rank devices */ @@ -1056,9 +1068,10 @@ fail: continue; device_unregister(&dimm->dev); } +fail2: device_unregister(&mci->dev); - bus_unregister(&mci->bus); - kfree(mci->bus.name); + bus_unregister(mci->bus); + kfree(mci->bus->name); return err; } @@ -1091,8 +1104,8 @@ void edac_unregister_sysfs(struct mem_ctl_info *mci) { edac_dbg(1, "Unregistering device %s\n", dev_name(&mci->dev)); device_unregister(&mci->dev); - bus_unregister(&mci->bus); - kfree(mci->bus.name); + bus_unregister(mci->bus); + kfree(mci->bus->name); } static void mc_attr_release(struct device *dev) diff --git a/drivers/edac/edac_module.c b/drivers/edac/edac_module.c index 12c951a2c33..a66941fea5a 100644 --- a/drivers/edac/edac_module.c +++ b/drivers/edac/edac_module.c @@ -146,7 +146,7 @@ static void __exit edac_exit(void) /* * Inform the kernel of our entry and exit points */ -module_init(edac_init); +subsys_initcall(edac_init); module_exit(edac_exit); MODULE_LICENSE("GPL"); diff --git a/drivers/edac/edac_module.h b/drivers/edac/edac_module.h index 3d139c6e7fe..f2118bfcf8d 100644 --- a/drivers/edac/edac_module.h +++ b/drivers/edac/edac_module.h @@ -52,7 +52,7 @@ extern void edac_device_workq_setup(struct edac_device_ctl_info *edac_dev, extern void edac_device_workq_teardown(struct edac_device_ctl_info *edac_dev); extern void edac_device_reset_delay_period(struct edac_device_ctl_info *edac_dev, unsigned long value); -extern void edac_mc_reset_delay_period(int value); +extern void edac_mc_reset_delay_period(unsigned long value); extern void *edac_align_ptr(void **p, unsigned size, int n_elems); diff --git a/drivers/edac/edac_pci.c b/drivers/edac/edac_pci.c index dd370f92ace..2cf44b4db80 100644 --- a/drivers/edac/edac_pci.c +++ b/drivers/edac/edac_pci.c @@ -358,11 +358,9 @@ int edac_pci_add_device(struct edac_pci_ctl_info *pci, int edac_idx) } edac_pci_printk(pci, KERN_INFO, - "Giving out device to module '%s' controller '%s':" - " DEV '%s' (%s)\n", - pci->mod_name, - pci->ctl_name, - edac_dev_name(pci), edac_op_state_to_string(pci->op_state)); + "Giving out device to module %s controller %s: DEV %s (%s)\n", + pci->mod_name, pci->ctl_name, pci->dev_name, + edac_op_state_to_string(pci->op_state)); mutex_unlock(&edac_pci_ctls_mutex); return 0; diff --git a/drivers/edac/edac_pci_sysfs.c b/drivers/edac/edac_pci_sysfs.c index dc6e905ee1a..e8658e45176 100644 --- a/drivers/edac/edac_pci_sysfs.c +++ b/drivers/edac/edac_pci_sysfs.c @@ -256,7 +256,7 @@ static ssize_t edac_pci_dev_store(struct kobject *kobj, struct edac_pci_dev_attribute *edac_pci_dev; edac_pci_dev = (struct edac_pci_dev_attribute *)attr; - if (edac_pci_dev->show) + if (edac_pci_dev->store) return edac_pci_dev->store(edac_pci_dev->value, buffer, count); return -EIO; } @@ -429,8 +429,8 @@ static void edac_pci_main_kobj_teardown(void) if (atomic_dec_return(&edac_pci_sysfs_refcount) == 0) { edac_dbg(0, "called kobject_put on main kobj\n"); kobject_put(edac_pci_top_main_kobj); + edac_put_sysfs_subsys(); } - edac_put_sysfs_subsys(); } /* diff --git a/drivers/edac/edac_stub.c b/drivers/edac/edac_stub.c index 351945fa2ec..9d9e18aefaa 100644 --- a/drivers/edac/edac_stub.c +++ b/drivers/edac/edac_stub.c @@ -29,6 +29,25 @@ EXPORT_SYMBOL_GPL(edac_err_assert); static atomic_t edac_subsys_valid = ATOMIC_INIT(0); +int edac_report_status = EDAC_REPORTING_ENABLED; +EXPORT_SYMBOL_GPL(edac_report_status); + +static int __init edac_report_setup(char *str) +{ + if (!str) + return -EINVAL; + + if (!strncmp(str, "on", 2)) + set_edac_report_status(EDAC_REPORTING_ENABLED); + else if (!strncmp(str, "off", 3)) + set_edac_report_status(EDAC_REPORTING_DISABLED); + else if (!strncmp(str, "force", 5)) + set_edac_report_status(EDAC_REPORTING_FORCE); + + return 0; +} +__setup("edac_report=", edac_report_setup); + /* * called to determine if there is an EDAC driver interested in * knowing an event (such as NMI) occurred diff --git a/drivers/edac/ghes_edac.c b/drivers/edac/ghes_edac.c new file mode 100644 index 00000000000..8399b4e16fe --- /dev/null +++ b/drivers/edac/ghes_edac.c @@ -0,0 +1,547 @@ +/* + * GHES/EDAC Linux driver + * + * This file may be distributed under the terms of the GNU General Public + * License version 2. + * + * Copyright (c) 2013 by Mauro Carvalho Chehab + * + * Red Hat Inc. http://www.redhat.com + */ + +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + +#include <acpi/ghes.h> +#include <linux/edac.h> +#include <linux/dmi.h> +#include "edac_core.h" +#include <ras/ras_event.h> + +#define GHES_EDAC_REVISION " Ver: 1.0.0" + +struct ghes_edac_pvt { + struct list_head list; + struct ghes *ghes; + struct mem_ctl_info *mci; + + /* Buffers for the error handling routine */ + char detail_location[240]; + char other_detail[160]; + char msg[80]; +}; + +static LIST_HEAD(ghes_reglist); +static DEFINE_MUTEX(ghes_edac_lock); +static int ghes_edac_mc_num; + + +/* Memory Device - Type 17 of SMBIOS spec */ +struct memdev_dmi_entry { + u8 type; + u8 length; + u16 handle; + u16 phys_mem_array_handle; + u16 mem_err_info_handle; + u16 total_width; + u16 data_width; + u16 size; + u8 form_factor; + u8 device_set; + u8 device_locator; + u8 bank_locator; + u8 memory_type; + u16 type_detail; + u16 speed; + u8 manufacturer; + u8 serial_number; + u8 asset_tag; + u8 part_number; + u8 attributes; + u32 extended_size; + u16 conf_mem_clk_speed; +} __attribute__((__packed__)); + +struct ghes_edac_dimm_fill { + struct mem_ctl_info *mci; + unsigned count; +}; + +char *memory_type[] = { + [MEM_EMPTY] = "EMPTY", + [MEM_RESERVED] = "RESERVED", + [MEM_UNKNOWN] = "UNKNOWN", + [MEM_FPM] = "FPM", + [MEM_EDO] = "EDO", + [MEM_BEDO] = "BEDO", + [MEM_SDR] = "SDR", + [MEM_RDR] = "RDR", + [MEM_DDR] = "DDR", + [MEM_RDDR] = "RDDR", + [MEM_RMBS] = "RMBS", + [MEM_DDR2] = "DDR2", + [MEM_FB_DDR2] = "FB_DDR2", + [MEM_RDDR2] = "RDDR2", + [MEM_XDR] = "XDR", + [MEM_DDR3] = "DDR3", + [MEM_RDDR3] = "RDDR3", +}; + +static void ghes_edac_count_dimms(const struct dmi_header *dh, void *arg) +{ + int *num_dimm = arg; + + if (dh->type == DMI_ENTRY_MEM_DEVICE) + (*num_dimm)++; +} + +static void ghes_edac_dmidecode(const struct dmi_header *dh, void *arg) +{ + struct ghes_edac_dimm_fill *dimm_fill = arg; + struct mem_ctl_info *mci = dimm_fill->mci; + + if (dh->type == DMI_ENTRY_MEM_DEVICE) { + struct memdev_dmi_entry *entry = (struct memdev_dmi_entry *)dh; + struct dimm_info *dimm = EDAC_DIMM_PTR(mci->layers, mci->dimms, + mci->n_layers, + dimm_fill->count, 0, 0); + + if (entry->size == 0xffff) { + pr_info("Can't get DIMM%i size\n", + dimm_fill->count); + dimm->nr_pages = MiB_TO_PAGES(32);/* Unknown */ + } else if (entry->size == 0x7fff) { + dimm->nr_pages = MiB_TO_PAGES(entry->extended_size); + } else { + if (entry->size & 1 << 15) + dimm->nr_pages = MiB_TO_PAGES((entry->size & + 0x7fff) << 10); + else + dimm->nr_pages = MiB_TO_PAGES(entry->size); + } + + switch (entry->memory_type) { + case 0x12: + if (entry->type_detail & 1 << 13) + dimm->mtype = MEM_RDDR; + else + dimm->mtype = MEM_DDR; + break; + case 0x13: + if (entry->type_detail & 1 << 13) + dimm->mtype = MEM_RDDR2; + else + dimm->mtype = MEM_DDR2; + break; + case 0x14: + dimm->mtype = MEM_FB_DDR2; + break; + case 0x18: + if (entry->type_detail & 1 << 13) + dimm->mtype = MEM_RDDR3; + else + dimm->mtype = MEM_DDR3; + break; + default: + if (entry->type_detail & 1 << 6) + dimm->mtype = MEM_RMBS; + else if ((entry->type_detail & ((1 << 7) | (1 << 13))) + == ((1 << 7) | (1 << 13))) + dimm->mtype = MEM_RDR; + else if (entry->type_detail & 1 << 7) + dimm->mtype = MEM_SDR; + else if (entry->type_detail & 1 << 9) + dimm->mtype = MEM_EDO; + else + dimm->mtype = MEM_UNKNOWN; + } + + /* + * Actually, we can only detect if the memory has bits for + * checksum or not + */ + if (entry->total_width == entry->data_width) + dimm->edac_mode = EDAC_NONE; + else + dimm->edac_mode = EDAC_SECDED; + + dimm->dtype = DEV_UNKNOWN; + dimm->grain = 128; /* Likely, worse case */ + + /* + * FIXME: It shouldn't be hard to also fill the DIMM labels + */ + + if (dimm->nr_pages) { + edac_dbg(1, "DIMM%i: %s size = %d MB%s\n", + dimm_fill->count, memory_type[dimm->mtype], + PAGES_TO_MiB(dimm->nr_pages), + (dimm->edac_mode != EDAC_NONE) ? "(ECC)" : ""); + edac_dbg(2, "\ttype %d, detail 0x%02x, width %d(total %d)\n", + entry->memory_type, entry->type_detail, + entry->total_width, entry->data_width); + } + + dimm_fill->count++; + } +} + +void ghes_edac_report_mem_error(struct ghes *ghes, int sev, + struct cper_sec_mem_err *mem_err) +{ + enum hw_event_mc_err_type type; + struct edac_raw_error_desc *e; + struct mem_ctl_info *mci; + struct ghes_edac_pvt *pvt = NULL; + char *p; + u8 grain_bits; + + list_for_each_entry(pvt, &ghes_reglist, list) { + if (ghes == pvt->ghes) + break; + } + if (!pvt) { + pr_err("Internal error: Can't find EDAC structure\n"); + return; + } + mci = pvt->mci; + e = &mci->error_desc; + + /* Cleans the error report buffer */ + memset(e, 0, sizeof (*e)); + e->error_count = 1; + strcpy(e->label, "unknown label"); + e->msg = pvt->msg; + e->other_detail = pvt->other_detail; + e->top_layer = -1; + e->mid_layer = -1; + e->low_layer = -1; + *pvt->other_detail = '\0'; + *pvt->msg = '\0'; + + switch (sev) { + case GHES_SEV_CORRECTED: + type = HW_EVENT_ERR_CORRECTED; + break; + case GHES_SEV_RECOVERABLE: + type = HW_EVENT_ERR_UNCORRECTED; + break; + case GHES_SEV_PANIC: + type = HW_EVENT_ERR_FATAL; + break; + default: + case GHES_SEV_NO: + type = HW_EVENT_ERR_INFO; + } + + edac_dbg(1, "error validation_bits: 0x%08llx\n", + (long long)mem_err->validation_bits); + + /* Error type, mapped on e->msg */ + if (mem_err->validation_bits & CPER_MEM_VALID_ERROR_TYPE) { + p = pvt->msg; + switch (mem_err->error_type) { + case 0: + p += sprintf(p, "Unknown"); + break; + case 1: + p += sprintf(p, "No error"); + break; + case 2: + p += sprintf(p, "Single-bit ECC"); + break; + case 3: + p += sprintf(p, "Multi-bit ECC"); + break; + case 4: + p += sprintf(p, "Single-symbol ChipKill ECC"); + break; + case 5: + p += sprintf(p, "Multi-symbol ChipKill ECC"); + break; + case 6: + p += sprintf(p, "Master abort"); + break; + case 7: + p += sprintf(p, "Target abort"); + break; + case 8: + p += sprintf(p, "Parity Error"); + break; + case 9: + p += sprintf(p, "Watchdog timeout"); + break; + case 10: + p += sprintf(p, "Invalid address"); + break; + case 11: + p += sprintf(p, "Mirror Broken"); + break; + case 12: + p += sprintf(p, "Memory Sparing"); + break; + case 13: + p += sprintf(p, "Scrub corrected error"); + break; + case 14: + p += sprintf(p, "Scrub uncorrected error"); + break; + case 15: + p += sprintf(p, "Physical Memory Map-out event"); + break; + default: + p += sprintf(p, "reserved error (%d)", + mem_err->error_type); + } + } else { + strcpy(pvt->msg, "unknown error"); + } + + /* Error address */ + if (mem_err->validation_bits & CPER_MEM_VALID_PA) { + e->page_frame_number = mem_err->physical_addr >> PAGE_SHIFT; + e->offset_in_page = mem_err->physical_addr & ~PAGE_MASK; + } + + /* Error grain */ + if (mem_err->validation_bits & CPER_MEM_VALID_PA_MASK) + e->grain = ~(mem_err->physical_addr_mask & ~PAGE_MASK); + + /* Memory error location, mapped on e->location */ + p = e->location; + if (mem_err->validation_bits & CPER_MEM_VALID_NODE) + p += sprintf(p, "node:%d ", mem_err->node); + if (mem_err->validation_bits & CPER_MEM_VALID_CARD) + p += sprintf(p, "card:%d ", mem_err->card); + if (mem_err->validation_bits & CPER_MEM_VALID_MODULE) + p += sprintf(p, "module:%d ", mem_err->module); + if (mem_err->validation_bits & CPER_MEM_VALID_RANK_NUMBER) + p += sprintf(p, "rank:%d ", mem_err->rank); + if (mem_err->validation_bits & CPER_MEM_VALID_BANK) + p += sprintf(p, "bank:%d ", mem_err->bank); + if (mem_err->validation_bits & CPER_MEM_VALID_ROW) + p += sprintf(p, "row:%d ", mem_err->row); + if (mem_err->validation_bits & CPER_MEM_VALID_COLUMN) + p += sprintf(p, "col:%d ", mem_err->column); + if (mem_err->validation_bits & CPER_MEM_VALID_BIT_POSITION) + p += sprintf(p, "bit_pos:%d ", mem_err->bit_pos); + if (mem_err->validation_bits & CPER_MEM_VALID_MODULE_HANDLE) { + const char *bank = NULL, *device = NULL; + dmi_memdev_name(mem_err->mem_dev_handle, &bank, &device); + if (bank != NULL && device != NULL) + p += sprintf(p, "DIMM location:%s %s ", bank, device); + else + p += sprintf(p, "DIMM DMI handle: 0x%.4x ", + mem_err->mem_dev_handle); + } + if (p > e->location) + *(p - 1) = '\0'; + + /* All other fields are mapped on e->other_detail */ + p = pvt->other_detail; + if (mem_err->validation_bits & CPER_MEM_VALID_ERROR_STATUS) { + u64 status = mem_err->error_status; + + p += sprintf(p, "status(0x%016llx): ", (long long)status); + switch ((status >> 8) & 0xff) { + case 1: + p += sprintf(p, "Error detected internal to the component "); + break; + case 16: + p += sprintf(p, "Error detected in the bus "); + break; + case 4: + p += sprintf(p, "Storage error in DRAM memory "); + break; + case 5: + p += sprintf(p, "Storage error in TLB "); + break; + case 6: + p += sprintf(p, "Storage error in cache "); + break; + case 7: + p += sprintf(p, "Error in one or more functional units "); + break; + case 8: + p += sprintf(p, "component failed self test "); + break; + case 9: + p += sprintf(p, "Overflow or undervalue of internal queue "); + break; + case 17: + p += sprintf(p, "Virtual address not found on IO-TLB or IO-PDIR "); + break; + case 18: + p += sprintf(p, "Improper access error "); + break; + case 19: + p += sprintf(p, "Access to a memory address which is not mapped to any component "); + break; + case 20: + p += sprintf(p, "Loss of Lockstep "); + break; + case 21: + p += sprintf(p, "Response not associated with a request "); + break; + case 22: + p += sprintf(p, "Bus parity error - must also set the A, C, or D Bits "); + break; + case 23: + p += sprintf(p, "Detection of a PATH_ERROR "); + break; + case 25: + p += sprintf(p, "Bus operation timeout "); + break; + case 26: + p += sprintf(p, "A read was issued to data that has been poisoned "); + break; + default: + p += sprintf(p, "reserved "); + break; + } + } + if (mem_err->validation_bits & CPER_MEM_VALID_REQUESTOR_ID) + p += sprintf(p, "requestorID: 0x%016llx ", + (long long)mem_err->requestor_id); + if (mem_err->validation_bits & CPER_MEM_VALID_RESPONDER_ID) + p += sprintf(p, "responderID: 0x%016llx ", + (long long)mem_err->responder_id); + if (mem_err->validation_bits & CPER_MEM_VALID_TARGET_ID) + p += sprintf(p, "targetID: 0x%016llx ", + (long long)mem_err->responder_id); + if (p > pvt->other_detail) + *(p - 1) = '\0'; + + /* Generate the trace event */ + grain_bits = fls_long(e->grain); + sprintf(pvt->detail_location, "APEI location: %s %s", + e->location, e->other_detail); + trace_mc_event(type, e->msg, e->label, e->error_count, + mci->mc_idx, e->top_layer, e->mid_layer, e->low_layer, + PAGES_TO_MiB(e->page_frame_number) | e->offset_in_page, + grain_bits, e->syndrome, pvt->detail_location); + + /* Report the error via EDAC API */ + edac_raw_mc_handle_error(type, mci, e); +} +EXPORT_SYMBOL_GPL(ghes_edac_report_mem_error); + +int ghes_edac_register(struct ghes *ghes, struct device *dev) +{ + bool fake = false; + int rc, num_dimm = 0; + struct mem_ctl_info *mci; + struct edac_mc_layer layers[1]; + struct ghes_edac_pvt *pvt; + struct ghes_edac_dimm_fill dimm_fill; + + /* Get the number of DIMMs */ + dmi_walk(ghes_edac_count_dimms, &num_dimm); + + /* Check if we've got a bogus BIOS */ + if (num_dimm == 0) { + fake = true; + num_dimm = 1; + } + + layers[0].type = EDAC_MC_LAYER_ALL_MEM; + layers[0].size = num_dimm; + layers[0].is_virt_csrow = true; + + /* + * We need to serialize edac_mc_alloc() and edac_mc_add_mc(), + * to avoid duplicated memory controller numbers + */ + mutex_lock(&ghes_edac_lock); + mci = edac_mc_alloc(ghes_edac_mc_num, ARRAY_SIZE(layers), layers, + sizeof(*pvt)); + if (!mci) { + pr_info("Can't allocate memory for EDAC data\n"); + mutex_unlock(&ghes_edac_lock); + return -ENOMEM; + } + + pvt = mci->pvt_info; + memset(pvt, 0, sizeof(*pvt)); + list_add_tail(&pvt->list, &ghes_reglist); + pvt->ghes = ghes; + pvt->mci = mci; + mci->pdev = dev; + + mci->mtype_cap = MEM_FLAG_EMPTY; + mci->edac_ctl_cap = EDAC_FLAG_NONE; + mci->edac_cap = EDAC_FLAG_NONE; + mci->mod_name = "ghes_edac.c"; + mci->mod_ver = GHES_EDAC_REVISION; + mci->ctl_name = "ghes_edac"; + mci->dev_name = "ghes"; + + if (!ghes_edac_mc_num) { + if (!fake) { + pr_info("This EDAC driver relies on BIOS to enumerate memory and get error reports.\n"); + pr_info("Unfortunately, not all BIOSes reflect the memory layout correctly.\n"); + pr_info("So, the end result of using this driver varies from vendor to vendor.\n"); + pr_info("If you find incorrect reports, please contact your hardware vendor\n"); + pr_info("to correct its BIOS.\n"); + pr_info("This system has %d DIMM sockets.\n", + num_dimm); + } else { + pr_info("This system has a very crappy BIOS: It doesn't even list the DIMMS.\n"); + pr_info("Its SMBIOS info is wrong. It is doubtful that the error report would\n"); + pr_info("work on such system. Use this driver with caution\n"); + } + } + + if (!fake) { + /* + * Fill DIMM info from DMI for the memory controller #0 + * + * Keep it in blank for the other memory controllers, as + * there's no reliable way to properly credit each DIMM to + * the memory controller, as different BIOSes fill the + * DMI bank location fields on different ways + */ + if (!ghes_edac_mc_num) { + dimm_fill.count = 0; + dimm_fill.mci = mci; + dmi_walk(ghes_edac_dmidecode, &dimm_fill); + } + } else { + struct dimm_info *dimm = EDAC_DIMM_PTR(mci->layers, mci->dimms, + mci->n_layers, 0, 0, 0); + + dimm->nr_pages = 1; + dimm->grain = 128; + dimm->mtype = MEM_UNKNOWN; + dimm->dtype = DEV_UNKNOWN; + dimm->edac_mode = EDAC_SECDED; + } + + rc = edac_mc_add_mc(mci); + if (rc < 0) { + pr_info("Can't register at EDAC core\n"); + edac_mc_free(mci); + mutex_unlock(&ghes_edac_lock); + return -ENODEV; + } + + ghes_edac_mc_num++; + mutex_unlock(&ghes_edac_lock); + return 0; +} +EXPORT_SYMBOL_GPL(ghes_edac_register); + +void ghes_edac_unregister(struct ghes *ghes) +{ + struct mem_ctl_info *mci; + struct ghes_edac_pvt *pvt, *tmp; + + list_for_each_entry_safe(pvt, tmp, &ghes_reglist, list) { + if (ghes == pvt->ghes) { + mci = pvt->mci; + edac_mc_del_mc(mci->pdev); + edac_mc_free(mci); + list_del(&pvt->list); + } + } +} +EXPORT_SYMBOL_GPL(ghes_edac_unregister); diff --git a/drivers/edac/highbank_l2_edac.c b/drivers/edac/highbank_l2_edac.c index c2bd8c6a434..2f193668ebc 100644 --- a/drivers/edac/highbank_l2_edac.c +++ b/drivers/edac/highbank_l2_edac.c @@ -50,8 +50,15 @@ static irqreturn_t highbank_l2_err_handler(int irq, void *dev_id) return IRQ_HANDLED; } +static const struct of_device_id hb_l2_err_of_match[] = { + { .compatible = "calxeda,hb-sregs-l2-ecc", }, + {}, +}; +MODULE_DEVICE_TABLE(of, hb_l2_err_of_match); + static int highbank_l2_err_probe(struct platform_device *pdev) { + const struct of_device_id *id; struct edac_device_ctl_info *dci; struct hb_l2_drvdata *drvdata; struct resource *r; @@ -90,28 +97,32 @@ static int highbank_l2_err_probe(struct platform_device *pdev) goto err; } + id = of_match_device(hb_l2_err_of_match, &pdev->dev); + dci->mod_name = pdev->dev.driver->name; + dci->ctl_name = id ? id->compatible : "unknown"; + dci->dev_name = dev_name(&pdev->dev); + + if (edac_device_add_device(dci)) + goto err; + drvdata->db_irq = platform_get_irq(pdev, 0); res = devm_request_irq(&pdev->dev, drvdata->db_irq, highbank_l2_err_handler, 0, dev_name(&pdev->dev), dci); if (res < 0) - goto err; + goto err2; drvdata->sb_irq = platform_get_irq(pdev, 1); res = devm_request_irq(&pdev->dev, drvdata->sb_irq, highbank_l2_err_handler, 0, dev_name(&pdev->dev), dci); if (res < 0) - goto err; - - dci->mod_name = dev_name(&pdev->dev); - dci->dev_name = dev_name(&pdev->dev); - - if (edac_device_add_device(dci)) - goto err; + goto err2; devres_close_group(&pdev->dev, NULL); return 0; +err2: + edac_device_del_device(&pdev->dev); err: devres_release_group(&pdev->dev, NULL); edac_device_free_ctl_info(dci); @@ -127,12 +138,6 @@ static int highbank_l2_err_remove(struct platform_device *pdev) return 0; } -static const struct of_device_id hb_l2_err_of_match[] = { - { .compatible = "calxeda,hb-sregs-l2-ecc", }, - {}, -}; -MODULE_DEVICE_TABLE(of, hb_l2_err_of_match); - static struct platform_driver highbank_l2_edac_driver = { .probe = highbank_l2_err_probe, .remove = highbank_l2_err_remove, diff --git a/drivers/edac/highbank_mc_edac.c b/drivers/edac/highbank_mc_edac.c index 4695dd2d71f..f784de1dc79 100644 --- a/drivers/edac/highbank_mc_edac.c +++ b/drivers/edac/highbank_mc_edac.c @@ -26,31 +26,40 @@ #include "edac_module.h" /* DDR Ctrlr Error Registers */ -#define HB_DDR_ECC_OPT 0x128 -#define HB_DDR_ECC_U_ERR_ADDR 0x130 -#define HB_DDR_ECC_U_ERR_STAT 0x134 -#define HB_DDR_ECC_U_ERR_DATAL 0x138 -#define HB_DDR_ECC_U_ERR_DATAH 0x13c -#define HB_DDR_ECC_C_ERR_ADDR 0x140 -#define HB_DDR_ECC_C_ERR_STAT 0x144 -#define HB_DDR_ECC_C_ERR_DATAL 0x148 -#define HB_DDR_ECC_C_ERR_DATAH 0x14c -#define HB_DDR_ECC_INT_STATUS 0x180 -#define HB_DDR_ECC_INT_ACK 0x184 -#define HB_DDR_ECC_U_ERR_ID 0x424 -#define HB_DDR_ECC_C_ERR_ID 0x428 -#define HB_DDR_ECC_INT_STAT_CE 0x8 -#define HB_DDR_ECC_INT_STAT_DOUBLE_CE 0x10 -#define HB_DDR_ECC_INT_STAT_UE 0x20 -#define HB_DDR_ECC_INT_STAT_DOUBLE_UE 0x40 +#define HB_DDR_ECC_ERR_BASE 0x128 +#define MW_DDR_ECC_ERR_BASE 0x1b4 + +#define HB_DDR_ECC_OPT 0x00 +#define HB_DDR_ECC_U_ERR_ADDR 0x08 +#define HB_DDR_ECC_U_ERR_STAT 0x0c +#define HB_DDR_ECC_U_ERR_DATAL 0x10 +#define HB_DDR_ECC_U_ERR_DATAH 0x14 +#define HB_DDR_ECC_C_ERR_ADDR 0x18 +#define HB_DDR_ECC_C_ERR_STAT 0x1c +#define HB_DDR_ECC_C_ERR_DATAL 0x20 +#define HB_DDR_ECC_C_ERR_DATAH 0x24 #define HB_DDR_ECC_OPT_MODE_MASK 0x3 #define HB_DDR_ECC_OPT_FWC 0x100 #define HB_DDR_ECC_OPT_XOR_SHIFT 16 +/* DDR Ctrlr Interrupt Registers */ + +#define HB_DDR_ECC_INT_BASE 0x180 +#define MW_DDR_ECC_INT_BASE 0x218 + +#define HB_DDR_ECC_INT_STATUS 0x00 +#define HB_DDR_ECC_INT_ACK 0x04 + +#define HB_DDR_ECC_INT_STAT_CE 0x8 +#define HB_DDR_ECC_INT_STAT_DOUBLE_CE 0x10 +#define HB_DDR_ECC_INT_STAT_UE 0x20 +#define HB_DDR_ECC_INT_STAT_DOUBLE_UE 0x40 + struct hb_mc_drvdata { - void __iomem *mc_vbase; + void __iomem *mc_err_base; + void __iomem *mc_int_base; }; static irqreturn_t highbank_mc_err_handler(int irq, void *dev_id) @@ -60,10 +69,10 @@ static irqreturn_t highbank_mc_err_handler(int irq, void *dev_id) u32 status, err_addr; /* Read the interrupt status register */ - status = readl(drvdata->mc_vbase + HB_DDR_ECC_INT_STATUS); + status = readl(drvdata->mc_int_base + HB_DDR_ECC_INT_STATUS); if (status & HB_DDR_ECC_INT_STAT_UE) { - err_addr = readl(drvdata->mc_vbase + HB_DDR_ECC_U_ERR_ADDR); + err_addr = readl(drvdata->mc_err_base + HB_DDR_ECC_U_ERR_ADDR); edac_mc_handle_error(HW_EVENT_ERR_UNCORRECTED, mci, 1, err_addr >> PAGE_SHIFT, err_addr & ~PAGE_MASK, 0, @@ -71,9 +80,9 @@ static irqreturn_t highbank_mc_err_handler(int irq, void *dev_id) mci->ctl_name, ""); } if (status & HB_DDR_ECC_INT_STAT_CE) { - u32 syndrome = readl(drvdata->mc_vbase + HB_DDR_ECC_C_ERR_STAT); + u32 syndrome = readl(drvdata->mc_err_base + HB_DDR_ECC_C_ERR_STAT); syndrome = (syndrome >> 8) & 0xff; - err_addr = readl(drvdata->mc_vbase + HB_DDR_ECC_C_ERR_ADDR); + err_addr = readl(drvdata->mc_err_base + HB_DDR_ECC_C_ERR_ADDR); edac_mc_handle_error(HW_EVENT_ERR_CORRECTED, mci, 1, err_addr >> PAGE_SHIFT, err_addr & ~PAGE_MASK, syndrome, @@ -82,66 +91,79 @@ static irqreturn_t highbank_mc_err_handler(int irq, void *dev_id) } /* clear the error, clears the interrupt */ - writel(status, drvdata->mc_vbase + HB_DDR_ECC_INT_ACK); + writel(status, drvdata->mc_int_base + HB_DDR_ECC_INT_ACK); return IRQ_HANDLED; } -#ifdef CONFIG_EDAC_DEBUG -static ssize_t highbank_mc_err_inject_write(struct file *file, - const char __user *data, - size_t count, loff_t *ppos) +static void highbank_mc_err_inject(struct mem_ctl_info *mci, u8 synd) { - struct mem_ctl_info *mci = file->private_data; struct hb_mc_drvdata *pdata = mci->pvt_info; - char buf[32]; - size_t buf_size; u32 reg; + + reg = readl(pdata->mc_err_base + HB_DDR_ECC_OPT); + reg &= HB_DDR_ECC_OPT_MODE_MASK; + reg |= (synd << HB_DDR_ECC_OPT_XOR_SHIFT) | HB_DDR_ECC_OPT_FWC; + writel(reg, pdata->mc_err_base + HB_DDR_ECC_OPT); +} + +#define to_mci(k) container_of(k, struct mem_ctl_info, dev) + +static ssize_t highbank_mc_inject_ctrl(struct device *dev, + struct device_attribute *attr, const char *buf, size_t count) +{ + struct mem_ctl_info *mci = to_mci(dev); u8 synd; - buf_size = min(count, (sizeof(buf)-1)); - if (copy_from_user(buf, data, buf_size)) - return -EFAULT; - buf[buf_size] = 0; + if (kstrtou8(buf, 16, &synd)) + return -EINVAL; - if (!kstrtou8(buf, 16, &synd)) { - reg = readl(pdata->mc_vbase + HB_DDR_ECC_OPT); - reg &= HB_DDR_ECC_OPT_MODE_MASK; - reg |= (synd << HB_DDR_ECC_OPT_XOR_SHIFT) | HB_DDR_ECC_OPT_FWC; - writel(reg, pdata->mc_vbase + HB_DDR_ECC_OPT); - } + highbank_mc_err_inject(mci, synd); return count; } -static const struct file_operations highbank_mc_debug_inject_fops = { - .open = simple_open, - .write = highbank_mc_err_inject_write, - .llseek = generic_file_llseek, +static DEVICE_ATTR(inject_ctrl, S_IWUSR, NULL, highbank_mc_inject_ctrl); + +struct hb_mc_settings { + int err_offset; + int int_offset; }; -static void highbank_mc_create_debugfs_nodes(struct mem_ctl_info *mci) -{ - if (mci->debugfs) - debugfs_create_file("inject_ctrl", S_IWUSR, mci->debugfs, mci, - &highbank_mc_debug_inject_fops); -; -} -#else -static void highbank_mc_create_debugfs_nodes(struct mem_ctl_info *mci) -{} -#endif +static struct hb_mc_settings hb_settings = { + .err_offset = HB_DDR_ECC_ERR_BASE, + .int_offset = HB_DDR_ECC_INT_BASE, +}; + +static struct hb_mc_settings mw_settings = { + .err_offset = MW_DDR_ECC_ERR_BASE, + .int_offset = MW_DDR_ECC_INT_BASE, +}; + +static struct of_device_id hb_ddr_ctrl_of_match[] = { + { .compatible = "calxeda,hb-ddr-ctrl", .data = &hb_settings }, + { .compatible = "calxeda,ecx-2000-ddr-ctrl", .data = &mw_settings }, + {}, +}; +MODULE_DEVICE_TABLE(of, hb_ddr_ctrl_of_match); static int highbank_mc_probe(struct platform_device *pdev) { + const struct of_device_id *id; + const struct hb_mc_settings *settings; struct edac_mc_layer layers[2]; struct mem_ctl_info *mci; struct hb_mc_drvdata *drvdata; struct dimm_info *dimm; struct resource *r; + void __iomem *base; u32 control; int irq; int res = 0; + id = of_match_device(hb_ddr_ctrl_of_match, &pdev->dev); + if (!id) + return -ENODEV; + layers[0].type = EDAC_MC_LAYER_CHIP_SELECT; layers[0].size = 1; layers[0].is_virt_csrow = true; @@ -174,35 +196,31 @@ static int highbank_mc_probe(struct platform_device *pdev) goto err; } - drvdata->mc_vbase = devm_ioremap(&pdev->dev, - r->start, resource_size(r)); - if (!drvdata->mc_vbase) { + base = devm_ioremap(&pdev->dev, r->start, resource_size(r)); + if (!base) { dev_err(&pdev->dev, "Unable to map regs\n"); res = -ENOMEM; goto err; } - control = readl(drvdata->mc_vbase + HB_DDR_ECC_OPT) & 0x3; + settings = id->data; + drvdata->mc_err_base = base + settings->err_offset; + drvdata->mc_int_base = base + settings->int_offset; + + control = readl(drvdata->mc_err_base + HB_DDR_ECC_OPT) & 0x3; if (!control || (control == 0x2)) { dev_err(&pdev->dev, "No ECC present, or ECC disabled\n"); res = -ENODEV; goto err; } - irq = platform_get_irq(pdev, 0); - res = devm_request_irq(&pdev->dev, irq, highbank_mc_err_handler, - 0, dev_name(&pdev->dev), mci); - if (res < 0) { - dev_err(&pdev->dev, "Unable to request irq %d\n", irq); - goto err; - } - mci->mtype_cap = MEM_FLAG_DDR3; mci->edac_ctl_cap = EDAC_FLAG_NONE | EDAC_FLAG_SECDED; mci->edac_cap = EDAC_FLAG_SECDED; - mci->mod_name = dev_name(&pdev->dev); + mci->mod_name = pdev->dev.driver->name; mci->mod_ver = "1"; - mci->ctl_name = dev_name(&pdev->dev); + mci->ctl_name = id->compatible; + mci->dev_name = dev_name(&pdev->dev); mci->scrub_mode = SCRUB_SW_SRC; /* Only a single 4GB DIMM is supported */ @@ -217,10 +235,20 @@ static int highbank_mc_probe(struct platform_device *pdev) if (res < 0) goto err; - highbank_mc_create_debugfs_nodes(mci); + irq = platform_get_irq(pdev, 0); + res = devm_request_irq(&pdev->dev, irq, highbank_mc_err_handler, + 0, dev_name(&pdev->dev), mci); + if (res < 0) { + dev_err(&pdev->dev, "Unable to request irq %d\n", irq); + goto err2; + } + + device_create_file(&mci->dev, &dev_attr_inject_ctrl); devres_close_group(&pdev->dev, NULL); return 0; +err2: + edac_mc_del_mc(&pdev->dev); err: devres_release_group(&pdev->dev, NULL); edac_mc_free(mci); @@ -231,17 +259,12 @@ static int highbank_mc_remove(struct platform_device *pdev) { struct mem_ctl_info *mci = platform_get_drvdata(pdev); + device_remove_file(&mci->dev, &dev_attr_inject_ctrl); edac_mc_del_mc(&pdev->dev); edac_mc_free(mci); return 0; } -static const struct of_device_id hb_ddr_ctrl_of_match[] = { - { .compatible = "calxeda,hb-ddr-ctrl", }, - {}, -}; -MODULE_DEVICE_TABLE(of, hb_ddr_ctrl_of_match); - static struct platform_driver highbank_mc_edac_driver = { .probe = highbank_mc_probe, .remove = highbank_mc_remove, diff --git a/drivers/edac/i3000_edac.c b/drivers/edac/i3000_edac.c index 694efcbf19c..cd28b968e5c 100644 --- a/drivers/edac/i3000_edac.c +++ b/drivers/edac/i3000_edac.c @@ -487,7 +487,7 @@ static void i3000_remove_one(struct pci_dev *pdev) edac_mc_free(mci); } -static DEFINE_PCI_DEVICE_TABLE(i3000_pci_tbl) = { +static const struct pci_device_id i3000_pci_tbl[] = { { PCI_VEND_DEV(INTEL, 3000_HB), PCI_ANY_ID, PCI_ANY_ID, 0, 0, I3000}, diff --git a/drivers/edac/i3200_edac.c b/drivers/edac/i3200_edac.c index 4e8337602e7..022a70273ad 100644 --- a/drivers/edac/i3200_edac.c +++ b/drivers/edac/i3200_edac.c @@ -106,16 +106,26 @@ static int nr_channels; static int how_many_channels(struct pci_dev *pdev) { + int n_channels; + unsigned char capid0_8b; /* 8th byte of CAPID0 */ pci_read_config_byte(pdev, I3200_CAPID0 + 8, &capid0_8b); + if (capid0_8b & 0x20) { /* check DCD: Dual Channel Disable */ edac_dbg(0, "In single channel mode\n"); - return 1; + n_channels = 1; } else { edac_dbg(0, "In dual channel mode\n"); - return 2; + n_channels = 2; } + + if (capid0_8b & 0x10) /* check if both channels are filled */ + edac_dbg(0, "2 DIMMS per channel disabled\n"); + else + edac_dbg(0, "2 DIMMS per channel enabled\n"); + + return n_channels; } static unsigned long eccerrlog_syndrome(u64 log) @@ -250,8 +260,7 @@ static void i3200_check(struct mem_ctl_info *mci) i3200_process_error_info(mci, &info); } - -void __iomem *i3200_map_mchbar(struct pci_dev *pdev) +static void __iomem *i3200_map_mchbar(struct pci_dev *pdev) { union { u64 mchbar; @@ -290,6 +299,8 @@ static void i3200_get_drbs(void __iomem *window, for (i = 0; i < I3200_RANKS_PER_CHANNEL; i++) { drbs[0][i] = readw(window + I3200_C0DRB + 2*i) & I3200_DRB_MASK; drbs[1][i] = readw(window + I3200_C1DRB + 2*i) & I3200_DRB_MASK; + + edac_dbg(0, "drb[0][%d] = %d, drb[1][%d] = %d\n", i, drbs[0][i], i, drbs[1][i]); } } @@ -311,6 +322,9 @@ static unsigned long drb_to_nr_pages( int n; n = drbs[channel][rank]; + if (!n) + return 0; + if (rank > 0) n -= drbs[channel][rank - 1]; if (stacked && (channel == 1) && @@ -377,19 +391,19 @@ static int i3200_probe1(struct pci_dev *pdev, int dev_idx) * cumulative; the last one will contain the total memory * contained in all ranks. */ - for (i = 0; i < mci->nr_csrows; i++) { + for (i = 0; i < I3200_DIMMS; i++) { unsigned long nr_pages; - struct csrow_info *csrow = mci->csrows[i]; - nr_pages = drb_to_nr_pages(drbs, stacked, - i / I3200_RANKS_PER_CHANNEL, - i % I3200_RANKS_PER_CHANNEL); + for (j = 0; j < nr_channels; j++) { + struct dimm_info *dimm = EDAC_DIMM_PTR(mci->layers, mci->dimms, + mci->n_layers, i, j, 0); - if (nr_pages == 0) - continue; + nr_pages = drb_to_nr_pages(drbs, stacked, j, i); + if (nr_pages == 0) + continue; - for (j = 0; j < nr_channels; j++) { - struct dimm_info *dimm = csrow->channels[j]->dimm; + edac_dbg(0, "csrow %d, channel %d%s, size = %ld Mb\n", i, j, + stacked ? " (stacked)" : "", PAGES_TO_MiB(nr_pages)); dimm->nr_pages = nr_pages; dimm->grain = nr_pages << PAGE_SHIFT; @@ -450,9 +464,11 @@ static void i3200_remove_one(struct pci_dev *pdev) iounmap(priv->window); edac_mc_free(mci); + + pci_disable_device(pdev); } -static DEFINE_PCI_DEVICE_TABLE(i3200_pci_tbl) = { +static const struct pci_device_id i3200_pci_tbl[] = { { PCI_VEND_DEV(INTEL, 3200_HB), PCI_ANY_ID, PCI_ANY_ID, 0, 0, I3200}, diff --git a/drivers/edac/i5000_edac.c b/drivers/edac/i5000_edac.c index 63b2194e8c2..72e07e3cf71 100644 --- a/drivers/edac/i5000_edac.c +++ b/drivers/edac/i5000_edac.c @@ -1530,7 +1530,7 @@ static void i5000_remove_one(struct pci_dev *pdev) * * The "E500P" device is the first device supported. */ -static DEFINE_PCI_DEVICE_TABLE(i5000_pci_tbl) = { +static const struct pci_device_id i5000_pci_tbl[] = { {PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_I5000_DEV16), .driver_data = I5000P}, diff --git a/drivers/edac/i5100_edac.c b/drivers/edac/i5100_edac.c index d6955b2cc99..6247d186177 100644 --- a/drivers/edac/i5100_edac.c +++ b/drivers/edac/i5100_edac.c @@ -27,6 +27,7 @@ #include <linux/edac.h> #include <linux/delay.h> #include <linux/mmzone.h> +#include <linux/debugfs.h> #include "edac_core.h" @@ -68,6 +69,14 @@ I5100_FERR_NF_MEM_M1ERR_MASK) #define I5100_NERR_NF_MEM 0xa4 /* MC Next Non-Fatal Errors */ #define I5100_EMASK_MEM 0xa8 /* MC Error Mask Register */ +#define I5100_MEM0EINJMSK0 0x200 /* Injection Mask0 Register Channel 0 */ +#define I5100_MEM1EINJMSK0 0x208 /* Injection Mask0 Register Channel 1 */ +#define I5100_MEMXEINJMSK0_EINJEN (1 << 27) +#define I5100_MEM0EINJMSK1 0x204 /* Injection Mask1 Register Channel 0 */ +#define I5100_MEM1EINJMSK1 0x206 /* Injection Mask1 Register Channel 1 */ + +/* Device 19, Function 0 */ +#define I5100_DINJ0 0x9a /* device 21 and 22, func 0 */ #define I5100_MTR_0 0x154 /* Memory Technology Registers 0-3 */ @@ -338,13 +347,26 @@ struct i5100_priv { unsigned ranksperchan; /* number of ranks per channel */ struct pci_dev *mc; /* device 16 func 1 */ + struct pci_dev *einj; /* device 19 func 0 */ struct pci_dev *ch0mm; /* device 21 func 0 */ struct pci_dev *ch1mm; /* device 22 func 0 */ struct delayed_work i5100_scrubbing; int scrub_enable; + + /* Error injection */ + u8 inject_channel; + u8 inject_hlinesel; + u8 inject_deviceptr1; + u8 inject_deviceptr2; + u16 inject_eccmask1; + u16 inject_eccmask2; + + struct dentry *debugfs; }; +static struct dentry *i5100_debugfs; + /* map a rank/chan to a slot number on the mainboard */ static int i5100_rank_to_slot(const struct mem_ctl_info *mci, int chan, int rank) @@ -847,29 +869,139 @@ static void i5100_init_csrows(struct mem_ctl_info *mci) chan, rank, 0); dimm->nr_pages = npages; - if (npages) { - dimm->grain = 32; - dimm->dtype = (priv->mtr[chan][rank].width == 4) ? - DEV_X4 : DEV_X8; - dimm->mtype = MEM_RDDR2; - dimm->edac_mode = EDAC_SECDED; - snprintf(dimm->label, sizeof(dimm->label), - "DIMM%u", - i5100_rank_to_slot(mci, chan, rank)); - } + dimm->grain = 32; + dimm->dtype = (priv->mtr[chan][rank].width == 4) ? + DEV_X4 : DEV_X8; + dimm->mtype = MEM_RDDR2; + dimm->edac_mode = EDAC_SECDED; + snprintf(dimm->label, sizeof(dimm->label), "DIMM%u", + i5100_rank_to_slot(mci, chan, rank)); edac_dbg(2, "dimm channel %d, rank %d, size %ld\n", chan, rank, (long)PAGES_TO_MiB(npages)); } } +/**************************************************************************** + * Error injection routines + ****************************************************************************/ + +static void i5100_do_inject(struct mem_ctl_info *mci) +{ + struct i5100_priv *priv = mci->pvt_info; + u32 mask0; + u16 mask1; + + /* MEM[1:0]EINJMSK0 + * 31 - ADDRMATCHEN + * 29:28 - HLINESEL + * 00 Reserved + * 01 Lower half of cache line + * 10 Upper half of cache line + * 11 Both upper and lower parts of cache line + * 27 - EINJEN + * 25:19 - XORMASK1 for deviceptr1 + * 9:5 - SEC2RAM for deviceptr2 + * 4:0 - FIR2RAM for deviceptr1 + */ + mask0 = ((priv->inject_hlinesel & 0x3) << 28) | + I5100_MEMXEINJMSK0_EINJEN | + ((priv->inject_eccmask1 & 0xffff) << 10) | + ((priv->inject_deviceptr2 & 0x1f) << 5) | + (priv->inject_deviceptr1 & 0x1f); + + /* MEM[1:0]EINJMSK1 + * 15:0 - XORMASK2 for deviceptr2 + */ + mask1 = priv->inject_eccmask2; + + if (priv->inject_channel == 0) { + pci_write_config_dword(priv->mc, I5100_MEM0EINJMSK0, mask0); + pci_write_config_word(priv->mc, I5100_MEM0EINJMSK1, mask1); + } else { + pci_write_config_dword(priv->mc, I5100_MEM1EINJMSK0, mask0); + pci_write_config_word(priv->mc, I5100_MEM1EINJMSK1, mask1); + } + + /* Error Injection Response Function + * Intel 5100 Memory Controller Hub Chipset (318378) datasheet + * hints about this register but carry no data about them. All + * data regarding device 19 is based on experimentation and the + * Intel 7300 Chipset Memory Controller Hub (318082) datasheet + * which appears to be accurate for the i5100 in this area. + * + * The injection code don't work without setting this register. + * The register needs to be flipped off then on else the hardware + * will only preform the first injection. + * + * Stop condition bits 7:4 + * 1010 - Stop after one injection + * 1011 - Never stop injecting faults + * + * Start condition bits 3:0 + * 1010 - Never start + * 1011 - Start immediately + */ + pci_write_config_byte(priv->einj, I5100_DINJ0, 0xaa); + pci_write_config_byte(priv->einj, I5100_DINJ0, 0xab); +} + +#define to_mci(k) container_of(k, struct mem_ctl_info, dev) +static ssize_t inject_enable_write(struct file *file, const char __user *data, + size_t count, loff_t *ppos) +{ + struct device *dev = file->private_data; + struct mem_ctl_info *mci = to_mci(dev); + + i5100_do_inject(mci); + + return count; +} + +static const struct file_operations i5100_inject_enable_fops = { + .open = simple_open, + .write = inject_enable_write, + .llseek = generic_file_llseek, +}; + +static int i5100_setup_debugfs(struct mem_ctl_info *mci) +{ + struct i5100_priv *priv = mci->pvt_info; + + if (!i5100_debugfs) + return -ENODEV; + + priv->debugfs = debugfs_create_dir(mci->bus->name, i5100_debugfs); + + if (!priv->debugfs) + return -ENOMEM; + + debugfs_create_x8("inject_channel", S_IRUGO | S_IWUSR, priv->debugfs, + &priv->inject_channel); + debugfs_create_x8("inject_hlinesel", S_IRUGO | S_IWUSR, priv->debugfs, + &priv->inject_hlinesel); + debugfs_create_x8("inject_deviceptr1", S_IRUGO | S_IWUSR, priv->debugfs, + &priv->inject_deviceptr1); + debugfs_create_x8("inject_deviceptr2", S_IRUGO | S_IWUSR, priv->debugfs, + &priv->inject_deviceptr2); + debugfs_create_x16("inject_eccmask1", S_IRUGO | S_IWUSR, priv->debugfs, + &priv->inject_eccmask1); + debugfs_create_x16("inject_eccmask2", S_IRUGO | S_IWUSR, priv->debugfs, + &priv->inject_eccmask2); + debugfs_create_file("inject_enable", S_IWUSR, priv->debugfs, + &mci->dev, &i5100_inject_enable_fops); + + return 0; + +} + static int i5100_init_one(struct pci_dev *pdev, const struct pci_device_id *id) { int rc; struct mem_ctl_info *mci; struct edac_mc_layer layers[2]; struct i5100_priv *priv; - struct pci_dev *ch0mm, *ch1mm; + struct pci_dev *ch0mm, *ch1mm, *einj; int ret = 0; u32 dw; int ranksperch; @@ -941,6 +1073,22 @@ static int i5100_init_one(struct pci_dev *pdev, const struct pci_device_id *id) goto bail_disable_ch1; } + + /* device 19, func 0, Error injection */ + einj = pci_get_device_func(PCI_VENDOR_ID_INTEL, + PCI_DEVICE_ID_INTEL_5100_19, 0); + if (!einj) { + ret = -ENODEV; + goto bail_einj; + } + + rc = pci_enable_device(einj); + if (rc < 0) { + ret = rc; + goto bail_disable_einj; + } + + mci->pdev = &pdev->dev; priv = mci->pvt_info; @@ -948,6 +1096,7 @@ static int i5100_init_one(struct pci_dev *pdev, const struct pci_device_id *id) priv->mc = pdev; priv->ch0mm = ch0mm; priv->ch1mm = ch1mm; + priv->einj = einj; INIT_DELAYED_WORK(&(priv->i5100_scrubbing), i5100_refresh_scrubbing); @@ -975,6 +1124,13 @@ static int i5100_init_one(struct pci_dev *pdev, const struct pci_device_id *id) mci->set_sdram_scrub_rate = i5100_set_scrub_rate; mci->get_sdram_scrub_rate = i5100_get_scrub_rate; + priv->inject_channel = 0; + priv->inject_hlinesel = 0; + priv->inject_deviceptr1 = 0; + priv->inject_deviceptr2 = 0; + priv->inject_eccmask1 = 0; + priv->inject_eccmask2 = 0; + i5100_init_csrows(mci); /* this strange construction seems to be in every driver, dunno why */ @@ -992,6 +1148,8 @@ static int i5100_init_one(struct pci_dev *pdev, const struct pci_device_id *id) goto bail_scrub; } + i5100_setup_debugfs(mci); + return ret; bail_scrub: @@ -999,6 +1157,12 @@ bail_scrub: cancel_delayed_work_sync(&(priv->i5100_scrubbing)); edac_mc_free(mci); +bail_disable_einj: + pci_disable_device(einj); + +bail_einj: + pci_dev_put(einj); + bail_disable_ch1: pci_disable_device(ch1mm); @@ -1030,19 +1194,23 @@ static void i5100_remove_one(struct pci_dev *pdev) priv = mci->pvt_info; + debugfs_remove_recursive(priv->debugfs); + priv->scrub_enable = 0; cancel_delayed_work_sync(&(priv->i5100_scrubbing)); pci_disable_device(pdev); pci_disable_device(priv->ch0mm); pci_disable_device(priv->ch1mm); + pci_disable_device(priv->einj); pci_dev_put(priv->ch0mm); pci_dev_put(priv->ch1mm); + pci_dev_put(priv->einj); edac_mc_free(mci); } -static DEFINE_PCI_DEVICE_TABLE(i5100_pci_tbl) = { +static const struct pci_device_id i5100_pci_tbl[] = { /* Device 16, Function 0, Channel 0 Memory Map, Error Flag/Mask, ... */ { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_5100_16) }, { 0, } @@ -1060,13 +1228,16 @@ static int __init i5100_init(void) { int pci_rc; - pci_rc = pci_register_driver(&i5100_driver); + i5100_debugfs = debugfs_create_dir("i5100_edac", NULL); + pci_rc = pci_register_driver(&i5100_driver); return (pci_rc < 0) ? pci_rc : 0; } static void __exit i5100_exit(void) { + debugfs_remove(i5100_debugfs); + pci_unregister_driver(&i5100_driver); } diff --git a/drivers/edac/i5400_edac.c b/drivers/edac/i5400_edac.c index 0a05bbceb08..6ef6ad1ba16 100644 --- a/drivers/edac/i5400_edac.c +++ b/drivers/edac/i5400_edac.c @@ -6,7 +6,7 @@ * * Copyright (c) 2008 by: * Ben Woodard <woodard@redhat.com> - * Mauro Carvalho Chehab <mchehab@redhat.com> + * Mauro Carvalho Chehab * * Red Hat Inc. http://www.redhat.com * @@ -1408,6 +1408,8 @@ static void i5400_remove_one(struct pci_dev *pdev) /* retrieve references to resources, and free those resources */ i5400_put_devices(mci); + pci_disable_device(pdev); + edac_mc_free(mci); } @@ -1416,7 +1418,7 @@ static void i5400_remove_one(struct pci_dev *pdev) * * The "E500P" device is the first device supported. */ -static DEFINE_PCI_DEVICE_TABLE(i5400_pci_tbl) = { +static const struct pci_device_id i5400_pci_tbl[] = { {PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_5400_ERR)}, {0,} /* 0 terminated list. */ }; @@ -1467,7 +1469,7 @@ module_exit(i5400_exit); MODULE_LICENSE("GPL"); MODULE_AUTHOR("Ben Woodard <woodard@redhat.com>"); -MODULE_AUTHOR("Mauro Carvalho Chehab <mchehab@redhat.com>"); +MODULE_AUTHOR("Mauro Carvalho Chehab"); MODULE_AUTHOR("Red Hat Inc. (http://www.redhat.com)"); MODULE_DESCRIPTION("MC Driver for Intel I5400 memory controllers - " I5400_REVISION); diff --git a/drivers/edac/i7300_edac.c b/drivers/edac/i7300_edac.c index 087c27bc5d4..dcac982fdc7 100644 --- a/drivers/edac/i7300_edac.c +++ b/drivers/edac/i7300_edac.c @@ -5,7 +5,7 @@ * GNU General Public License version 2 only. * * Copyright (c) 2010 by: - * Mauro Carvalho Chehab <mchehab@redhat.com> + * Mauro Carvalho Chehab * * Red Hat Inc. http://www.redhat.com * @@ -750,15 +750,23 @@ static int i7300_init_csrows(struct mem_ctl_info *mci) struct i7300_dimm_info *dinfo; int rc = -ENODEV; int mtr; - int ch, branch, slot, channel; + int ch, branch, slot, channel, max_channel, max_branch; struct dimm_info *dimm; pvt = mci->pvt_info; edac_dbg(2, "Memory Technology Registers:\n"); + if (IS_SINGLE_MODE(pvt->mc_settings_a)) { + max_branch = 1; + max_channel = 1; + } else { + max_branch = MAX_BRANCHES; + max_channel = MAX_CH_PER_BRANCH; + } + /* Get the AMB present registers for the four channels */ - for (branch = 0; branch < MAX_BRANCHES; branch++) { + for (branch = 0; branch < max_branch; branch++) { /* Read and dump branch 0's MTRs */ channel = to_channel(0, branch); pci_read_config_word(pvt->pci_dev_2x_0_fbd_branch[branch], @@ -767,6 +775,9 @@ static int i7300_init_csrows(struct mem_ctl_info *mci) edac_dbg(2, "\t\tAMB-present CH%d = 0x%x:\n", channel, pvt->ambpresent[channel]); + if (max_channel == 1) + continue; + channel = to_channel(1, branch); pci_read_config_word(pvt->pci_dev_2x_0_fbd_branch[branch], AMBPRESENT_1, @@ -778,11 +789,11 @@ static int i7300_init_csrows(struct mem_ctl_info *mci) /* Get the set of MTR[0-7] regs by each branch */ for (slot = 0; slot < MAX_SLOTS; slot++) { int where = mtr_regs[slot]; - for (branch = 0; branch < MAX_BRANCHES; branch++) { + for (branch = 0; branch < max_branch; branch++) { pci_read_config_word(pvt->pci_dev_2x_0_fbd_branch[branch], where, &pvt->mtr[slot][branch]); - for (ch = 0; ch < MAX_CH_PER_BRANCH; ch++) { + for (ch = 0; ch < max_channel; ch++) { int channel = to_channel(ch, branch); dimm = EDAC_DIMM_PTR(mci->layers, mci->dimms, @@ -932,33 +943,35 @@ static int i7300_get_devices(struct mem_ctl_info *mci) /* Attempt to 'get' the MCH register we want */ pdev = NULL; - while (!pvt->pci_dev_16_1_fsb_addr_map || - !pvt->pci_dev_16_2_fsb_err_regs) { - pdev = pci_get_device(PCI_VENDOR_ID_INTEL, - PCI_DEVICE_ID_INTEL_I7300_MCH_ERR, pdev); - if (!pdev) { - /* End of list, leave */ - i7300_printk(KERN_ERR, - "'system address,Process Bus' " - "device not found:" - "vendor 0x%x device 0x%x ERR funcs " - "(broken BIOS?)\n", - PCI_VENDOR_ID_INTEL, - PCI_DEVICE_ID_INTEL_I7300_MCH_ERR); - goto error; - } - + while ((pdev = pci_get_device(PCI_VENDOR_ID_INTEL, + PCI_DEVICE_ID_INTEL_I7300_MCH_ERR, + pdev))) { /* Store device 16 funcs 1 and 2 */ switch (PCI_FUNC(pdev->devfn)) { case 1: - pvt->pci_dev_16_1_fsb_addr_map = pdev; + if (!pvt->pci_dev_16_1_fsb_addr_map) + pvt->pci_dev_16_1_fsb_addr_map = + pci_dev_get(pdev); break; case 2: - pvt->pci_dev_16_2_fsb_err_regs = pdev; + if (!pvt->pci_dev_16_2_fsb_err_regs) + pvt->pci_dev_16_2_fsb_err_regs = + pci_dev_get(pdev); break; } } + if (!pvt->pci_dev_16_1_fsb_addr_map || + !pvt->pci_dev_16_2_fsb_err_regs) { + /* At least one device was not found */ + i7300_printk(KERN_ERR, + "'system address,Process Bus' device not found:" + "vendor 0x%x device 0x%x ERR funcs (broken BIOS?)\n", + PCI_VENDOR_ID_INTEL, + PCI_DEVICE_ID_INTEL_I7300_MCH_ERR); + goto error; + } + edac_dbg(1, "System Address, processor bus- PCI Bus ID: %s %x:%x\n", pci_name(pvt->pci_dev_16_0_fsb_ctlr), pvt->pci_dev_16_0_fsb_ctlr->vendor, @@ -1149,7 +1162,7 @@ static void i7300_remove_one(struct pci_dev *pdev) * * Has only 8086:360c PCI ID */ -static DEFINE_PCI_DEVICE_TABLE(i7300_pci_tbl) = { +static const struct pci_device_id i7300_pci_tbl[] = { {PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_I7300_MCH_ERR)}, {0,} /* 0 terminated list. */ }; @@ -1196,7 +1209,7 @@ module_init(i7300_init); module_exit(i7300_exit); MODULE_LICENSE("GPL"); -MODULE_AUTHOR("Mauro Carvalho Chehab <mchehab@redhat.com>"); +MODULE_AUTHOR("Mauro Carvalho Chehab"); MODULE_AUTHOR("Red Hat Inc. (http://www.redhat.com)"); MODULE_DESCRIPTION("MC Driver for Intel I7300 memory controllers - " I7300_REVISION); diff --git a/drivers/edac/i7core_edac.c b/drivers/edac/i7core_edac.c index e213d030b0d..9cd0b301f81 100644 --- a/drivers/edac/i7core_edac.c +++ b/drivers/edac/i7core_edac.c @@ -9,7 +9,7 @@ * GNU General Public License version 2 only. * * Copyright (c) 2009-2010 by: - * Mauro Carvalho Chehab <mchehab@redhat.com> + * Mauro Carvalho Chehab * * Red Hat Inc. http://www.redhat.com * @@ -394,7 +394,7 @@ static const struct pci_id_table pci_dev_table[] = { /* * pci_device_id table for which devices we are looking for */ -static DEFINE_PCI_DEVICE_TABLE(i7core_pci_tbl) = { +static const struct pci_device_id i7core_pci_tbl[] = { {PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_X58_HUB_MGMT)}, {PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_LYNNFIELD_QPI_LINK0)}, {0,} /* 0 terminated list. */ @@ -420,21 +420,21 @@ static inline int numdimms(u32 dimms) static inline int numrank(u32 rank) { - static int ranks[4] = { 1, 2, 4, -EINVAL }; + static const int ranks[] = { 1, 2, 4, -EINVAL }; return ranks[rank & 0x3]; } static inline int numbank(u32 bank) { - static int banks[4] = { 4, 8, 16, -EINVAL }; + static const int banks[] = { 4, 8, 16, -EINVAL }; return banks[bank & 0x3]; } static inline int numrow(u32 row) { - static int rows[8] = { + static const int rows[] = { 1 << 12, 1 << 13, 1 << 14, 1 << 15, 1 << 16, -EINVAL, -EINVAL, -EINVAL, }; @@ -444,7 +444,7 @@ static inline int numrow(u32 row) static inline int numcol(u32 col) { - static int cols[8] = { + static const int cols[] = { 1 << 10, 1 << 11, 1 << 12, -EINVAL, }; return cols[col & 0x3]; @@ -704,7 +704,7 @@ static ssize_t i7core_inject_section_store(struct device *dev, if (pvt->inject.enable) disable_inject(mci); - rc = strict_strtoul(data, 10, &value); + rc = kstrtoul(data, 10, &value); if ((rc < 0) || (value > 3)) return -EIO; @@ -741,7 +741,7 @@ struct i7core_pvt *pvt = mci->pvt_info; if (pvt->inject.enable) disable_inject(mci); - rc = strict_strtoul(data, 10, &value); + rc = kstrtoul(data, 10, &value); if ((rc < 0) || (value > 7)) return -EIO; @@ -781,7 +781,7 @@ static ssize_t i7core_inject_eccmask_store(struct device *dev, if (pvt->inject.enable) disable_inject(mci); - rc = strict_strtoul(data, 10, &value); + rc = kstrtoul(data, 10, &value); if (rc < 0) return -EIO; @@ -830,7 +830,7 @@ static ssize_t i7core_inject_store_##param( \ if (!strcasecmp(data, "any") || !strcasecmp(data, "any\n"))\ value = -1; \ else { \ - rc = strict_strtoul(data, 10, &value); \ + rc = kstrtoul(data, 10, &value); \ if ((rc < 0) || (value >= limit)) \ return -EIO; \ } \ @@ -934,7 +934,7 @@ static ssize_t i7core_inject_enable_store(struct device *dev, if (!pvt->pci_ch[pvt->inject.channel][0]) return 0; - rc = strict_strtoul(data, 10, &enable); + rc = kstrtoul(data, 10, &enable); if ((rc < 0)) return 0; @@ -1334,14 +1334,19 @@ static int i7core_get_onedevice(struct pci_dev **prev, * is at addr 8086:2c40, instead of 8086:2c41. So, we need * to probe for the alternate address in case of failure */ - if (dev_descr->dev_id == PCI_DEVICE_ID_INTEL_I7_NONCORE && !pdev) + if (dev_descr->dev_id == PCI_DEVICE_ID_INTEL_I7_NONCORE && !pdev) { + pci_dev_get(*prev); /* pci_get_device will put it */ pdev = pci_get_device(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_I7_NONCORE_ALT, *prev); + } - if (dev_descr->dev_id == PCI_DEVICE_ID_INTEL_LYNNFIELD_NONCORE && !pdev) + if (dev_descr->dev_id == PCI_DEVICE_ID_INTEL_LYNNFIELD_NONCORE && + !pdev) { + pci_dev_get(*prev); /* pci_get_device will put it */ pdev = pci_get_device(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_LYNNFIELD_NONCORE_ALT, *prev); + } if (!pdev) { if (*prev) { @@ -1703,7 +1708,7 @@ static void i7core_mce_output_error(struct mem_ctl_info *mci, const struct mce *m) { struct i7core_pvt *pvt = mci->pvt_info; - char *type, *optype, *err; + char *optype, *err; enum hw_event_mc_err_type tp_event; unsigned long error = m->status & 0x1ff0000l; bool uncorrected_error = m->mcgstatus & 1ll << 61; @@ -1716,15 +1721,11 @@ static void i7core_mce_output_error(struct mem_ctl_info *mci, u32 errnum = find_first_bit(&error, 32); if (uncorrected_error) { - if (ripv) { - type = "FATAL"; + if (ripv) tp_event = HW_EVENT_ERR_FATAL; - } else { - type = "NON_FATAL"; + else tp_event = HW_EVENT_ERR_UNCORRECTED; - } } else { - type = "CORRECTED"; tp_event = HW_EVENT_ERR_CORRECTED; } @@ -2456,7 +2457,7 @@ module_init(i7core_init); module_exit(i7core_exit); MODULE_LICENSE("GPL"); -MODULE_AUTHOR("Mauro Carvalho Chehab <mchehab@redhat.com>"); +MODULE_AUTHOR("Mauro Carvalho Chehab"); MODULE_AUTHOR("Red Hat Inc. (http://www.redhat.com)"); MODULE_DESCRIPTION("MC Driver for Intel i7 Core memory controllers - " I7CORE_REVISION); diff --git a/drivers/edac/i82443bxgx_edac.c b/drivers/edac/i82443bxgx_edac.c index 57fdb77903b..d730e276d1a 100644 --- a/drivers/edac/i82443bxgx_edac.c +++ b/drivers/edac/i82443bxgx_edac.c @@ -386,7 +386,7 @@ static void i82443bxgx_edacmc_remove_one(struct pci_dev *pdev) EXPORT_SYMBOL_GPL(i82443bxgx_edacmc_remove_one); -static DEFINE_PCI_DEVICE_TABLE(i82443bxgx_pci_tbl) = { +static const struct pci_device_id i82443bxgx_pci_tbl[] = { {PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82443BX_0)}, {PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82443BX_2)}, {PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82443GX_0)}, diff --git a/drivers/edac/i82860_edac.c b/drivers/edac/i82860_edac.c index 3e3e431c830..3382f6344e4 100644 --- a/drivers/edac/i82860_edac.c +++ b/drivers/edac/i82860_edac.c @@ -288,7 +288,7 @@ static void i82860_remove_one(struct pci_dev *pdev) edac_mc_free(mci); } -static DEFINE_PCI_DEVICE_TABLE(i82860_pci_tbl) = { +static const struct pci_device_id i82860_pci_tbl[] = { { PCI_VEND_DEV(INTEL, 82860_0), PCI_ANY_ID, PCI_ANY_ID, 0, 0, I82860}, diff --git a/drivers/edac/i82875p_edac.c b/drivers/edac/i82875p_edac.c index 2f8535fc451..64b68320249 100644 --- a/drivers/edac/i82875p_edac.c +++ b/drivers/edac/i82875p_edac.c @@ -275,7 +275,6 @@ static int i82875p_setup_overfl_dev(struct pci_dev *pdev, { struct pci_dev *dev; void __iomem *window; - int err; *ovrfl_pdev = NULL; *ovrfl_window = NULL; @@ -293,13 +292,8 @@ static int i82875p_setup_overfl_dev(struct pci_dev *pdev, if (dev == NULL) return 1; - err = pci_bus_add_device(dev); - if (err) { - i82875p_printk(KERN_ERR, - "%s(): pci_bus_add_device() Failed\n", - __func__); - } pci_bus_assign_resources(dev->bus); + pci_bus_add_device(dev); } *ovrfl_pdev = dev; @@ -406,8 +400,6 @@ static int i82875p_probe1(struct pci_dev *pdev, int dev_idx) edac_dbg(0, "\n"); - ovrfl_pdev = pci_get_device(PCI_VEND_DEV(INTEL, 82875_6), NULL); - if (i82875p_setup_overfl_dev(pdev, &ovrfl_pdev, &ovrfl_window)) return -ENODEV; drc = readl(ovrfl_window + I82875P_DRC); @@ -527,7 +519,7 @@ static void i82875p_remove_one(struct pci_dev *pdev) edac_mc_free(mci); } -static DEFINE_PCI_DEVICE_TABLE(i82875p_pci_tbl) = { +static const struct pci_device_id i82875p_pci_tbl[] = { { PCI_VEND_DEV(INTEL, 82875_0), PCI_ANY_ID, PCI_ANY_ID, 0, 0, I82875P}, diff --git a/drivers/edac/i82975x_edac.c b/drivers/edac/i82975x_edac.c index 0c8d4b0eaa3..10b10521f62 100644 --- a/drivers/edac/i82975x_edac.c +++ b/drivers/edac/i82975x_edac.c @@ -628,7 +628,7 @@ static void i82975x_remove_one(struct pci_dev *pdev) edac_mc_free(mci); } -static DEFINE_PCI_DEVICE_TABLE(i82975x_pci_tbl) = { +static const struct pci_device_id i82975x_pci_tbl[] = { { PCI_VEND_DEV(INTEL, 82975_0), PCI_ANY_ID, PCI_ANY_ID, 0, 0, I82975X diff --git a/drivers/edac/mce_amd.c b/drivers/edac/mce_amd.c index ad637572d8c..5f43620d580 100644 --- a/drivers/edac/mce_amd.c +++ b/drivers/edac/mce_amd.c @@ -6,7 +6,6 @@ static struct amd_decoder_ops *fam_ops; static u8 xec_mask = 0xf; -static u8 nb_err_cpumask = 0xf; static bool report_gart_errors; static void (*nb_bus_decoder)(int node_id, struct mce *m); @@ -39,30 +38,28 @@ EXPORT_SYMBOL_GPL(amd_unregister_ecc_decoder); */ /* transaction type */ -const char * const tt_msgs[] = { "INSN", "DATA", "GEN", "RESV" }; -EXPORT_SYMBOL_GPL(tt_msgs); +static const char * const tt_msgs[] = { "INSN", "DATA", "GEN", "RESV" }; /* cache level */ -const char * const ll_msgs[] = { "RESV", "L1", "L2", "L3/GEN" }; -EXPORT_SYMBOL_GPL(ll_msgs); +static const char * const ll_msgs[] = { "RESV", "L1", "L2", "L3/GEN" }; /* memory transaction type */ -const char * const rrrr_msgs[] = { +static const char * const rrrr_msgs[] = { "GEN", "RD", "WR", "DRD", "DWR", "IRD", "PRF", "EV", "SNP" }; -EXPORT_SYMBOL_GPL(rrrr_msgs); /* participating processor */ const char * const pp_msgs[] = { "SRC", "RES", "OBS", "GEN" }; EXPORT_SYMBOL_GPL(pp_msgs); /* request timeout */ -const char * const to_msgs[] = { "no timeout", "timed out" }; -EXPORT_SYMBOL_GPL(to_msgs); +static const char * const to_msgs[] = { "no timeout", "timed out" }; /* memory or i/o */ -const char * const ii_msgs[] = { "MEM", "RESV", "IO", "GEN" }; -EXPORT_SYMBOL_GPL(ii_msgs); +static const char * const ii_msgs[] = { "MEM", "RESV", "IO", "GEN" }; + +/* internal error type */ +static const char * const uu_msgs[] = { "RESV", "RESV", "HWA", "RESV" }; static const char * const f15h_mc1_mce_desc[] = { "UC during a demand linefill from L2", @@ -136,7 +133,8 @@ static const char * const mc5_mce_desc[] = { "Physical register file AG0 port", "Physical register file AG1 port", "Flag register file", - "DE error occurred" + "DE error occurred", + "Retire status queue" }; static bool f12h_mc0_mce(u16 ec, u8 xec) @@ -176,7 +174,7 @@ static bool k8_mc0_mce(u16 ec, u8 xec) return f10h_mc0_mce(ec, xec); } -static bool f14h_mc0_mce(u16 ec, u8 xec) +static bool cat_mc0_mce(u16 ec, u8 xec) { u8 r4 = R4(ec); bool ret = true; @@ -330,22 +328,28 @@ static bool k8_mc1_mce(u16 ec, u8 xec) return ret; } -static bool f14h_mc1_mce(u16 ec, u8 xec) +static bool cat_mc1_mce(u16 ec, u8 xec) { u8 r4 = R4(ec); bool ret = true; - if (MEM_ERROR(ec)) { - if (TT(ec) != 0 || LL(ec) != 1) - ret = false; + if (!MEM_ERROR(ec)) + return false; + + if (TT(ec) != TT_INSTR) + return false; + + if (r4 == R4_IRD) + pr_cont("Data/tag array parity error for a tag hit.\n"); + else if (r4 == R4_SNOOP) + pr_cont("Tag error during snoop/victimization.\n"); + else if (xec == 0x0) + pr_cont("Tag parity error from victim castout.\n"); + else if (xec == 0x2) + pr_cont("Microcode patch RAM parity error.\n"); + else + ret = false; - if (r4 == R4_IRD) - pr_cont("Data/tag array parity error for a tag hit.\n"); - else if (r4 == R4_SNOOP) - pr_cont("Tag error during snoop/victimization.\n"); - else - ret = false; - } return ret; } @@ -399,12 +403,9 @@ static void decode_mc1_mce(struct mce *m) pr_emerg(HW_ERR "Corrupted MC1 MCE info?\n"); } -static void decode_mc2_mce(struct mce *m) +static bool k8_mc2_mce(u16 ec, u8 xec) { - u16 ec = EC(m->status); - u8 xec = XEC(m->status, xec_mask); - - pr_emerg(HW_ERR "MC2 Error"); + bool ret = true; if (xec == 0x1) pr_cont(" in the write data buffers.\n"); @@ -429,24 +430,18 @@ static void decode_mc2_mce(struct mce *m) pr_cont(": %s parity/ECC error during data " "access from L2.\n", R4_MSG(ec)); else - goto wrong_mc2_mce; + ret = false; } else - goto wrong_mc2_mce; + ret = false; } else - goto wrong_mc2_mce; - - return; + ret = false; - wrong_mc2_mce: - pr_emerg(HW_ERR "Corrupted MC2 MCE info?\n"); + return ret; } -static void decode_f15_mc2_mce(struct mce *m) +static bool f15h_mc2_mce(u16 ec, u8 xec) { - u16 ec = EC(m->status); - u8 xec = XEC(m->status, xec_mask); - - pr_emerg(HW_ERR "MC2 Error: "); + bool ret = true; if (TLB_ERROR(ec)) { if (xec == 0x0) @@ -454,10 +449,10 @@ static void decode_f15_mc2_mce(struct mce *m) else if (xec == 0x1) pr_cont("Poison data provided for TLB fill.\n"); else - goto wrong_f15_mc2_mce; + ret = false; } else if (BUS_ERROR(ec)) { if (xec > 2) - goto wrong_f15_mc2_mce; + ret = false; pr_cont("Error during attempted NB data read.\n"); } else if (MEM_ERROR(ec)) { @@ -471,14 +466,63 @@ static void decode_f15_mc2_mce(struct mce *m) break; default: - goto wrong_f15_mc2_mce; + ret = false; } } - return; + return ret; +} + +static bool f16h_mc2_mce(u16 ec, u8 xec) +{ + u8 r4 = R4(ec); + + if (!MEM_ERROR(ec)) + return false; + + switch (xec) { + case 0x04 ... 0x05: + pr_cont("%cBUFF parity error.\n", (r4 == R4_RD) ? 'I' : 'O'); + break; + + case 0x09 ... 0x0b: + case 0x0d ... 0x0f: + pr_cont("ECC error in L2 tag (%s).\n", + ((r4 == R4_GEN) ? "BankReq" : + ((r4 == R4_SNOOP) ? "Prb" : "Fill"))); + break; + + case 0x10 ... 0x19: + case 0x1b: + pr_cont("ECC error in L2 data array (%s).\n", + (((r4 == R4_RD) && !(xec & 0x3)) ? "Hit" : + ((r4 == R4_GEN) ? "Attr" : + ((r4 == R4_EVICT) ? "Vict" : "Fill")))); + break; + + case 0x1c ... 0x1d: + case 0x1f: + pr_cont("Parity error in L2 attribute bits (%s).\n", + ((r4 == R4_RD) ? "Hit" : + ((r4 == R4_GEN) ? "Attr" : "Fill"))); + break; + + default: + return false; + } + + return true; +} + +static void decode_mc2_mce(struct mce *m) +{ + u16 ec = EC(m->status); + u8 xec = XEC(m->status, xec_mask); + + pr_emerg(HW_ERR "MC2 Error: "); - wrong_f15_mc2_mce: - pr_emerg(HW_ERR "Corrupted MC2 MCE info?\n"); + if (!fam_ops->mc2_mce(ec, xec)) + pr_cont(HW_ERR "Corrupted MC2 MCE info?\n"); } static void decode_mc3_mce(struct mce *m) @@ -547,7 +591,7 @@ static void decode_mc4_mce(struct mce *m) return; case 0x19: - if (boot_cpu_data.x86 == 0x15) + if (boot_cpu_data.x86 == 0x15 || boot_cpu_data.x86 == 0x16) pr_cont("Compute Unit Data Error.\n"); else goto wrong_mc4_mce; @@ -580,7 +624,7 @@ static void decode_mc5_mce(struct mce *m) if (xec == 0x0 || xec == 0xc) pr_cont("%s.\n", mc5_mce_desc[xec]); - else if (xec < 0xd) + else if (xec <= 0xd) pr_cont("%s parity error.\n", mc5_mce_desc[xec]); else goto wrong_mc5_mce; @@ -633,6 +677,10 @@ static void decode_mc6_mce(struct mce *m) static inline void amd_decode_err_code(u16 ec) { + if (INT_ERROR(ec)) { + pr_emerg(HW_ERR "internal: %s\n", UU_MSG(ec)); + return; + } pr_emerg(HW_ERR "cache level: %s", LL_MSG(ec)); @@ -692,6 +740,36 @@ int amd_decode_mce(struct notifier_block *nb, unsigned long val, void *data) if (amd_filter_mce(m)) return NOTIFY_STOP; + pr_emerg(HW_ERR "%s\n", decode_error_status(m)); + + pr_emerg(HW_ERR "CPU:%d (%x:%x:%x) MC%d_STATUS[%s|%s|%s|%s|%s", + m->extcpu, + c->x86, c->x86_model, c->x86_mask, + m->bank, + ((m->status & MCI_STATUS_OVER) ? "Over" : "-"), + ((m->status & MCI_STATUS_UC) ? "UE" : "CE"), + ((m->status & MCI_STATUS_MISCV) ? "MiscV" : "-"), + ((m->status & MCI_STATUS_PCC) ? "PCC" : "-"), + ((m->status & MCI_STATUS_ADDRV) ? "AddrV" : "-")); + + if (c->x86 == 0x15 || c->x86 == 0x16) + pr_cont("|%s|%s", + ((m->status & MCI_STATUS_DEFERRED) ? "Deferred" : "-"), + ((m->status & MCI_STATUS_POISON) ? "Poison" : "-")); + + /* do the two bits[14:13] together */ + ecc = (m->status >> 45) & 0x3; + if (ecc) + pr_cont("|%sECC", ((ecc == 2) ? "C" : "U")); + + pr_cont("]: 0x%016llx\n", m->status); + + if (m->status & MCI_STATUS_ADDRV) + pr_emerg(HW_ERR "MC%d_ADDR: 0x%016llx\n", m->bank, m->addr); + + if (!fam_ops) + goto err_code; + switch (m->bank) { case 0: decode_mc0_mce(m); @@ -702,10 +780,7 @@ int amd_decode_mce(struct notifier_block *nb, unsigned long val, void *data) break; case 2: - if (c->x86 == 0x15) - decode_f15_mc2_mce(m); - else - decode_mc2_mce(m); + decode_mc2_mce(m); break; case 3: @@ -728,33 +803,7 @@ int amd_decode_mce(struct notifier_block *nb, unsigned long val, void *data) break; } - pr_emerg(HW_ERR "Error Status: %s\n", decode_error_status(m)); - - pr_emerg(HW_ERR "CPU:%d (%x:%x:%x) MC%d_STATUS[%s|%s|%s|%s|%s", - m->extcpu, - c->x86, c->x86_model, c->x86_mask, - m->bank, - ((m->status & MCI_STATUS_OVER) ? "Over" : "-"), - ((m->status & MCI_STATUS_UC) ? "UE" : "CE"), - ((m->status & MCI_STATUS_MISCV) ? "MiscV" : "-"), - ((m->status & MCI_STATUS_PCC) ? "PCC" : "-"), - ((m->status & MCI_STATUS_ADDRV) ? "AddrV" : "-")); - - if (c->x86 == 0x15) - pr_cont("|%s|%s", - ((m->status & MCI_STATUS_DEFERRED) ? "Deferred" : "-"), - ((m->status & MCI_STATUS_POISON) ? "Poison" : "-")); - - /* do the two bits[14:13] together */ - ecc = (m->status >> 45) & 0x3; - if (ecc) - pr_cont("|%sECC", ((ecc == 2) ? "C" : "U")); - - pr_cont("]: 0x%016llx\n", m->status); - - if (m->status & MCI_STATUS_ADDRV) - pr_emerg(HW_ERR "MC%d_ADDR: 0x%016llx\n", m->bank, m->addr); - + err_code: amd_decode_err_code(m->status & 0xffff); return NOTIFY_STOP; @@ -770,10 +819,7 @@ static int __init mce_amd_init(void) struct cpuinfo_x86 *c = &boot_cpu_data; if (c->x86_vendor != X86_VENDOR_AMD) - return 0; - - if (c->x86 < 0xf || c->x86 > 0x15) - return 0; + return -ENODEV; fam_ops = kzalloc(sizeof(struct amd_decoder_ops), GFP_KERNEL); if (!fam_ops) @@ -783,39 +829,51 @@ static int __init mce_amd_init(void) case 0xf: fam_ops->mc0_mce = k8_mc0_mce; fam_ops->mc1_mce = k8_mc1_mce; + fam_ops->mc2_mce = k8_mc2_mce; break; case 0x10: fam_ops->mc0_mce = f10h_mc0_mce; fam_ops->mc1_mce = k8_mc1_mce; + fam_ops->mc2_mce = k8_mc2_mce; break; case 0x11: fam_ops->mc0_mce = k8_mc0_mce; fam_ops->mc1_mce = k8_mc1_mce; + fam_ops->mc2_mce = k8_mc2_mce; break; case 0x12: fam_ops->mc0_mce = f12h_mc0_mce; fam_ops->mc1_mce = k8_mc1_mce; + fam_ops->mc2_mce = k8_mc2_mce; break; case 0x14: - nb_err_cpumask = 0x3; - fam_ops->mc0_mce = f14h_mc0_mce; - fam_ops->mc1_mce = f14h_mc1_mce; + fam_ops->mc0_mce = cat_mc0_mce; + fam_ops->mc1_mce = cat_mc1_mce; + fam_ops->mc2_mce = k8_mc2_mce; break; case 0x15: xec_mask = 0x1f; fam_ops->mc0_mce = f15h_mc0_mce; fam_ops->mc1_mce = f15h_mc1_mce; + fam_ops->mc2_mce = f15h_mc2_mce; + break; + + case 0x16: + xec_mask = 0x1f; + fam_ops->mc0_mce = cat_mc0_mce; + fam_ops->mc1_mce = cat_mc1_mce; + fam_ops->mc2_mce = f16h_mc2_mce; break; default: printk(KERN_WARNING "Huh? What family is it: 0x%x?!\n", c->x86); kfree(fam_ops); - return -EINVAL; + fam_ops = NULL; } pr_info("MCE: In-kernel MCE decoding enabled.\n"); diff --git a/drivers/edac/mce_amd.h b/drivers/edac/mce_amd.h index 679679951e2..51b7e3a36e3 100644 --- a/drivers/edac/mce_amd.h +++ b/drivers/edac/mce_amd.h @@ -14,6 +14,7 @@ #define TLB_ERROR(x) (((x) & 0xFFF0) == 0x0010) #define MEM_ERROR(x) (((x) & 0xFF00) == 0x0100) #define BUS_ERROR(x) (((x) & 0xF800) == 0x0800) +#define INT_ERROR(x) (((x) & 0xF4FF) == 0x0400) #define TT(x) (((x) >> 2) & 0x3) #define TT_MSG(x) tt_msgs[TT(x)] @@ -25,6 +26,8 @@ #define TO_MSG(x) to_msgs[TO(x)] #define PP(x) (((x) >> 9) & 0x3) #define PP_MSG(x) pp_msgs[PP(x)] +#define UU(x) (((x) >> 8) & 0x3) +#define UU_MSG(x) uu_msgs[UU(x)] #define R4(x) (((x) >> 4) & 0xf) #define R4_MSG(x) ((R4(x) < 9) ? rrrr_msgs[R4(x)] : "Wrong R4!") @@ -32,6 +35,8 @@ #define MCI_STATUS_DEFERRED BIT_64(44) #define MCI_STATUS_POISON BIT_64(43) +extern const char * const pp_msgs[]; + enum tt_ids { TT_INSTR = 0, TT_DATA, @@ -65,19 +70,13 @@ enum rrrr_ids { R4_SNOOP, }; -extern const char * const tt_msgs[]; -extern const char * const ll_msgs[]; -extern const char * const rrrr_msgs[]; -extern const char * const pp_msgs[]; -extern const char * const to_msgs[]; -extern const char * const ii_msgs[]; - /* * per-family decoder ops */ struct amd_decoder_ops { bool (*mc0_mce)(u16, u8); bool (*mc1_mce)(u16, u8); + bool (*mc2_mce)(u16, u8); }; void amd_report_gart_errors(bool); diff --git a/drivers/edac/mce_amd_inj.c b/drivers/edac/mce_amd_inj.c index 2ae78f20cc2..5e46a9fea31 100644 --- a/drivers/edac/mce_amd_inj.c +++ b/drivers/edac/mce_amd_inj.c @@ -43,7 +43,7 @@ static ssize_t edac_inject_##reg##_store(struct kobject *kobj, \ int ret = 0; \ unsigned long value; \ \ - ret = strict_strtoul(data, 16, &value); \ + ret = kstrtoul(data, 16, &value); \ if (ret < 0) \ printk(KERN_ERR "Error writing MCE " #reg " field.\n"); \ \ @@ -83,7 +83,7 @@ static ssize_t edac_inject_bank_store(struct kobject *kobj, int ret = 0; unsigned long value; - ret = strict_strtoul(data, 10, &value); + ret = kstrtoul(data, 10, &value); if (ret < 0) { printk(KERN_ERR "Invalid bank value!\n"); return -EINVAL; diff --git a/drivers/edac/mpc85xx_edac.c b/drivers/edac/mpc85xx_edac.c index 42a840d530a..f4aec2e6ef5 100644 --- a/drivers/edac/mpc85xx_edac.c +++ b/drivers/edac/mpc85xx_edac.c @@ -1,6 +1,8 @@ /* * Freescale MPC85xx Memory Controller kenel module * + * Parts Copyrighted (c) 2013 by Freescale Semiconductor, Inc. + * * Author: Dave Jiang <djiang@mvista.com> * * 2006-2007 (c) MontaVista Software, Inc. This file is licensed under @@ -196,6 +198,42 @@ static void mpc85xx_pci_check(struct edac_pci_ctl_info *pci) edac_pci_handle_npe(pci, pci->ctl_name); } +static void mpc85xx_pcie_check(struct edac_pci_ctl_info *pci) +{ + struct mpc85xx_pci_pdata *pdata = pci->pvt_info; + u32 err_detect; + + err_detect = in_be32(pdata->pci_vbase + MPC85XX_PCI_ERR_DR); + + pr_err("PCIe error(s) detected\n"); + pr_err("PCIe ERR_DR register: 0x%08x\n", err_detect); + pr_err("PCIe ERR_CAP_STAT register: 0x%08x\n", + in_be32(pdata->pci_vbase + MPC85XX_PCI_GAS_TIMR)); + pr_err("PCIe ERR_CAP_R0 register: 0x%08x\n", + in_be32(pdata->pci_vbase + MPC85XX_PCIE_ERR_CAP_R0)); + pr_err("PCIe ERR_CAP_R1 register: 0x%08x\n", + in_be32(pdata->pci_vbase + MPC85XX_PCIE_ERR_CAP_R1)); + pr_err("PCIe ERR_CAP_R2 register: 0x%08x\n", + in_be32(pdata->pci_vbase + MPC85XX_PCIE_ERR_CAP_R2)); + pr_err("PCIe ERR_CAP_R3 register: 0x%08x\n", + in_be32(pdata->pci_vbase + MPC85XX_PCIE_ERR_CAP_R3)); + + /* clear error bits */ + out_be32(pdata->pci_vbase + MPC85XX_PCI_ERR_DR, err_detect); +} + +static int mpc85xx_pcie_find_capability(struct device_node *np) +{ + struct pci_controller *hose; + + if (!np) + return -EINVAL; + + hose = pci_find_hose_for_OF_device(np); + + return early_find_capability(hose, 0, 0, PCI_CAP_ID_EXP); +} + static irqreturn_t mpc85xx_pci_isr(int irq, void *dev_id) { struct edac_pci_ctl_info *pci = dev_id; @@ -207,7 +245,10 @@ static irqreturn_t mpc85xx_pci_isr(int irq, void *dev_id) if (!err_detect) return IRQ_NONE; - mpc85xx_pci_check(pci); + if (pdata->is_pcie) + mpc85xx_pcie_check(pci); + else + mpc85xx_pci_check(pci); return IRQ_HANDLED; } @@ -239,14 +280,22 @@ int mpc85xx_pci_err_probe(struct platform_device *op) pdata = pci->pvt_info; pdata->name = "mpc85xx_pci_err"; pdata->irq = NO_IRQ; + + if (mpc85xx_pcie_find_capability(op->dev.of_node) > 0) + pdata->is_pcie = true; + dev_set_drvdata(&op->dev, pci); pci->dev = &op->dev; pci->mod_name = EDAC_MOD_STR; pci->ctl_name = pdata->name; pci->dev_name = dev_name(&op->dev); - if (edac_op_state == EDAC_OPSTATE_POLL) - pci->edac_check = mpc85xx_pci_check; + if (edac_op_state == EDAC_OPSTATE_POLL) { + if (pdata->is_pcie) + pci->edac_check = mpc85xx_pcie_check; + else + pci->edac_check = mpc85xx_pci_check; + } pdata->edac_idx = edac_pci_idx++; @@ -275,16 +324,26 @@ int mpc85xx_pci_err_probe(struct platform_device *op) goto err; } - orig_pci_err_cap_dr = - in_be32(pdata->pci_vbase + MPC85XX_PCI_ERR_CAP_DR); + if (pdata->is_pcie) { + orig_pci_err_cap_dr = + in_be32(pdata->pci_vbase + MPC85XX_PCI_ERR_ADDR); + out_be32(pdata->pci_vbase + MPC85XX_PCI_ERR_ADDR, ~0); + orig_pci_err_en = + in_be32(pdata->pci_vbase + MPC85XX_PCI_ERR_EN); + out_be32(pdata->pci_vbase + MPC85XX_PCI_ERR_EN, 0); + } else { + orig_pci_err_cap_dr = + in_be32(pdata->pci_vbase + MPC85XX_PCI_ERR_CAP_DR); - /* PCI master abort is expected during config cycles */ - out_be32(pdata->pci_vbase + MPC85XX_PCI_ERR_CAP_DR, 0x40); + /* PCI master abort is expected during config cycles */ + out_be32(pdata->pci_vbase + MPC85XX_PCI_ERR_CAP_DR, 0x40); - orig_pci_err_en = in_be32(pdata->pci_vbase + MPC85XX_PCI_ERR_EN); + orig_pci_err_en = + in_be32(pdata->pci_vbase + MPC85XX_PCI_ERR_EN); - /* disable master abort reporting */ - out_be32(pdata->pci_vbase + MPC85XX_PCI_ERR_EN, ~0x40); + /* disable master abort reporting */ + out_be32(pdata->pci_vbase + MPC85XX_PCI_ERR_EN, ~0x40); + } /* clear error bits */ out_be32(pdata->pci_vbase + MPC85XX_PCI_ERR_DR, ~0); @@ -297,11 +356,12 @@ int mpc85xx_pci_err_probe(struct platform_device *op) if (edac_op_state == EDAC_OPSTATE_INT) { pdata->irq = irq_of_parse_and_map(op->dev.of_node, 0); res = devm_request_irq(&op->dev, pdata->irq, - mpc85xx_pci_isr, IRQF_DISABLED, + mpc85xx_pci_isr, + IRQF_SHARED, "[EDAC] PCI err", pci); if (res < 0) { printk(KERN_ERR - "%s: Unable to requiest irq %d for " + "%s: Unable to request irq %d for " "MPC85xx PCI err\n", __func__, pdata->irq); irq_dispose_mapping(pdata->irq); res = -ENODEV; @@ -312,6 +372,22 @@ int mpc85xx_pci_err_probe(struct platform_device *op) pdata->irq); } + if (pdata->is_pcie) { + /* + * Enable all PCIe error interrupt & error detect except invalid + * PEX_CONFIG_ADDR/PEX_CONFIG_DATA access interrupt generation + * enable bit and invalid PEX_CONFIG_ADDR/PEX_CONFIG_DATA access + * detection enable bit. Because PCIe bus code to initialize and + * configure these PCIe devices on booting will use some invalid + * PEX_CONFIG_ADDR/PEX_CONFIG_DATA, edac driver prints the much + * notice information. So disable this detect to fix ugly print. + */ + out_be32(pdata->pci_vbase + MPC85XX_PCI_ERR_EN, ~0 + & ~PEX_ERR_ICCAIE_EN_BIT); + out_be32(pdata->pci_vbase + MPC85XX_PCI_ERR_ADDR, 0 + | PEX_ERR_ICCAD_DISR_BIT); + } + devres_remove_group(&op->dev, mpc85xx_pci_err_probe); edac_dbg(3, "success\n"); printk(KERN_INFO EDAC_MOD_STR " PCI err registered\n"); @@ -327,28 +403,6 @@ err: } EXPORT_SYMBOL(mpc85xx_pci_err_probe); -static int mpc85xx_pci_err_remove(struct platform_device *op) -{ - struct edac_pci_ctl_info *pci = dev_get_drvdata(&op->dev); - struct mpc85xx_pci_pdata *pdata = pci->pvt_info; - - edac_dbg(0, "\n"); - - out_be32(pdata->pci_vbase + MPC85XX_PCI_ERR_CAP_DR, - orig_pci_err_cap_dr); - - out_be32(pdata->pci_vbase + MPC85XX_PCI_ERR_EN, orig_pci_err_en); - - edac_pci_del_device(pci->dev); - - if (edac_op_state == EDAC_OPSTATE_INT) - irq_dispose_mapping(pdata->irq); - - edac_pci_free_ctl_info(pci); - - return 0; -} - #endif /* CONFIG_PCI */ /**************************** L2 Err device ***************************/ @@ -579,11 +633,11 @@ static int mpc85xx_l2_err_probe(struct platform_device *op) if (edac_op_state == EDAC_OPSTATE_INT) { pdata->irq = irq_of_parse_and_map(op->dev.of_node, 0); res = devm_request_irq(&op->dev, pdata->irq, - mpc85xx_l2_isr, IRQF_DISABLED, + mpc85xx_l2_isr, 0, "[EDAC] L2 err", edac_dev); if (res < 0) { printk(KERN_ERR - "%s: Unable to requiest irq %d for " + "%s: Unable to request irq %d for " "MPC85xx L2 err\n", __func__, pdata->irq); irq_dispose_mapping(pdata->irq); res = -ENODEV; @@ -1079,7 +1133,7 @@ static int mpc85xx_mc_err_probe(struct platform_device *op) pdata->irq = irq_of_parse_and_map(op->dev.of_node, 0); res = devm_request_irq(&op->dev, pdata->irq, mpc85xx_mc_isr, - IRQF_DISABLED | IRQF_SHARED, + IRQF_SHARED, "[EDAC] MC err", mci); if (res < 0) { printk(KERN_ERR "%s: Unable to request irq %d for " diff --git a/drivers/edac/mpc85xx_edac.h b/drivers/edac/mpc85xx_edac.h index 932016f2cf0..8c625643622 100644 --- a/drivers/edac/mpc85xx_edac.h +++ b/drivers/edac/mpc85xx_edac.h @@ -134,13 +134,19 @@ #define MPC85XX_PCI_ERR_DR 0x0000 #define MPC85XX_PCI_ERR_CAP_DR 0x0004 #define MPC85XX_PCI_ERR_EN 0x0008 +#define PEX_ERR_ICCAIE_EN_BIT 0x00020000 #define MPC85XX_PCI_ERR_ATTRIB 0x000c #define MPC85XX_PCI_ERR_ADDR 0x0010 +#define PEX_ERR_ICCAD_DISR_BIT 0x00020000 #define MPC85XX_PCI_ERR_EXT_ADDR 0x0014 #define MPC85XX_PCI_ERR_DL 0x0018 #define MPC85XX_PCI_ERR_DH 0x001c #define MPC85XX_PCI_GAS_TIMR 0x0020 #define MPC85XX_PCI_PCIX_TIMR 0x0024 +#define MPC85XX_PCIE_ERR_CAP_R0 0x0028 +#define MPC85XX_PCIE_ERR_CAP_R1 0x002c +#define MPC85XX_PCIE_ERR_CAP_R2 0x0030 +#define MPC85XX_PCIE_ERR_CAP_R3 0x0034 struct mpc85xx_mc_pdata { char *name; @@ -158,6 +164,7 @@ struct mpc85xx_l2_pdata { struct mpc85xx_pci_pdata { char *name; + bool is_pcie; int edac_idx; void __iomem *pci_vbase; int irq; diff --git a/drivers/edac/octeon_edac-lmc.c b/drivers/edac/octeon_edac-lmc.c index 93412d6b3af..4bd10f94f06 100644 --- a/drivers/edac/octeon_edac-lmc.c +++ b/drivers/edac/octeon_edac-lmc.c @@ -5,12 +5,16 @@ * * Copyright (C) 2009 Wind River Systems, * written by Ralf Baechle <ralf@linux-mips.org> + * + * Copyright (c) 2013 by Cisco Systems, Inc. + * All rights reserved. */ #include <linux/module.h> #include <linux/init.h> #include <linux/slab.h> #include <linux/io.h> #include <linux/edac.h> +#include <linux/ctype.h> #include <asm/octeon/octeon.h> #include <asm/octeon/cvmx-lmcx-defs.h> @@ -20,6 +24,18 @@ #define OCTEON_MAX_MC 4 +#define to_mci(k) container_of(k, struct mem_ctl_info, dev) + +struct octeon_lmc_pvt { + unsigned long inject; + unsigned long error_type; + unsigned long dimm; + unsigned long rank; + unsigned long bank; + unsigned long row; + unsigned long col; +}; + static void octeon_lmc_edac_poll(struct mem_ctl_info *mci) { union cvmx_lmcx_mem_cfg0 cfg0; @@ -55,14 +71,31 @@ static void octeon_lmc_edac_poll(struct mem_ctl_info *mci) static void octeon_lmc_edac_poll_o2(struct mem_ctl_info *mci) { + struct octeon_lmc_pvt *pvt = mci->pvt_info; union cvmx_lmcx_int int_reg; bool do_clear = false; char msg[64]; - int_reg.u64 = cvmx_read_csr(CVMX_LMCX_INT(mci->mc_idx)); + if (!pvt->inject) + int_reg.u64 = cvmx_read_csr(CVMX_LMCX_INT(mci->mc_idx)); + else { + if (pvt->error_type == 1) + int_reg.s.sec_err = 1; + if (pvt->error_type == 2) + int_reg.s.ded_err = 1; + } + if (int_reg.s.sec_err || int_reg.s.ded_err) { union cvmx_lmcx_fadr fadr; - fadr.u64 = cvmx_read_csr(CVMX_LMCX_FADR(mci->mc_idx)); + if (likely(!pvt->inject)) + fadr.u64 = cvmx_read_csr(CVMX_LMCX_FADR(mci->mc_idx)); + else { + fadr.cn61xx.fdimm = pvt->dimm; + fadr.cn61xx.fbunk = pvt->rank; + fadr.cn61xx.fbank = pvt->bank; + fadr.cn61xx.frow = pvt->row; + fadr.cn61xx.fcol = pvt->col; + } snprintf(msg, sizeof(msg), "DIMM %d rank %d bank %d row %d col %d", fadr.cn61xx.fdimm, fadr.cn61xx.fbunk, @@ -82,8 +115,128 @@ static void octeon_lmc_edac_poll_o2(struct mem_ctl_info *mci) int_reg.s.ded_err = -1; /* Done, re-arm */ do_clear = true; } - if (do_clear) - cvmx_write_csr(CVMX_LMCX_INT(mci->mc_idx), int_reg.u64); + + if (do_clear) { + if (likely(!pvt->inject)) + cvmx_write_csr(CVMX_LMCX_INT(mci->mc_idx), int_reg.u64); + else + pvt->inject = 0; + } +} + +/************************ MC SYSFS parts ***********************************/ + +/* Only a couple naming differences per template, so very similar */ +#define TEMPLATE_SHOW(reg) \ +static ssize_t octeon_mc_inject_##reg##_show(struct device *dev, \ + struct device_attribute *attr, \ + char *data) \ +{ \ + struct mem_ctl_info *mci = to_mci(dev); \ + struct octeon_lmc_pvt *pvt = mci->pvt_info; \ + return sprintf(data, "%016llu\n", (u64)pvt->reg); \ +} + +#define TEMPLATE_STORE(reg) \ +static ssize_t octeon_mc_inject_##reg##_store(struct device *dev, \ + struct device_attribute *attr, \ + const char *data, size_t count) \ +{ \ + struct mem_ctl_info *mci = to_mci(dev); \ + struct octeon_lmc_pvt *pvt = mci->pvt_info; \ + if (isdigit(*data)) { \ + if (!kstrtoul(data, 0, &pvt->reg)) \ + return count; \ + } \ + return 0; \ +} + +TEMPLATE_SHOW(inject); +TEMPLATE_STORE(inject); +TEMPLATE_SHOW(dimm); +TEMPLATE_STORE(dimm); +TEMPLATE_SHOW(bank); +TEMPLATE_STORE(bank); +TEMPLATE_SHOW(rank); +TEMPLATE_STORE(rank); +TEMPLATE_SHOW(row); +TEMPLATE_STORE(row); +TEMPLATE_SHOW(col); +TEMPLATE_STORE(col); + +static ssize_t octeon_mc_inject_error_type_store(struct device *dev, + struct device_attribute *attr, + const char *data, + size_t count) +{ + struct mem_ctl_info *mci = to_mci(dev); + struct octeon_lmc_pvt *pvt = mci->pvt_info; + + if (!strncmp(data, "single", 6)) + pvt->error_type = 1; + else if (!strncmp(data, "double", 6)) + pvt->error_type = 2; + + return count; +} + +static ssize_t octeon_mc_inject_error_type_show(struct device *dev, + struct device_attribute *attr, + char *data) +{ + struct mem_ctl_info *mci = to_mci(dev); + struct octeon_lmc_pvt *pvt = mci->pvt_info; + if (pvt->error_type == 1) + return sprintf(data, "single"); + else if (pvt->error_type == 2) + return sprintf(data, "double"); + + return 0; +} + +static DEVICE_ATTR(inject, S_IRUGO | S_IWUSR, + octeon_mc_inject_inject_show, octeon_mc_inject_inject_store); +static DEVICE_ATTR(error_type, S_IRUGO | S_IWUSR, + octeon_mc_inject_error_type_show, octeon_mc_inject_error_type_store); +static DEVICE_ATTR(dimm, S_IRUGO | S_IWUSR, + octeon_mc_inject_dimm_show, octeon_mc_inject_dimm_store); +static DEVICE_ATTR(rank, S_IRUGO | S_IWUSR, + octeon_mc_inject_rank_show, octeon_mc_inject_rank_store); +static DEVICE_ATTR(bank, S_IRUGO | S_IWUSR, + octeon_mc_inject_bank_show, octeon_mc_inject_bank_store); +static DEVICE_ATTR(row, S_IRUGO | S_IWUSR, + octeon_mc_inject_row_show, octeon_mc_inject_row_store); +static DEVICE_ATTR(col, S_IRUGO | S_IWUSR, + octeon_mc_inject_col_show, octeon_mc_inject_col_store); + + +static int octeon_set_mc_sysfs_attributes(struct mem_ctl_info *mci) +{ + int rc; + + rc = device_create_file(&mci->dev, &dev_attr_inject); + if (rc < 0) + return rc; + rc = device_create_file(&mci->dev, &dev_attr_error_type); + if (rc < 0) + return rc; + rc = device_create_file(&mci->dev, &dev_attr_dimm); + if (rc < 0) + return rc; + rc = device_create_file(&mci->dev, &dev_attr_rank); + if (rc < 0) + return rc; + rc = device_create_file(&mci->dev, &dev_attr_bank); + if (rc < 0) + return rc; + rc = device_create_file(&mci->dev, &dev_attr_row); + if (rc < 0) + return rc; + rc = device_create_file(&mci->dev, &dev_attr_col); + if (rc < 0) + return rc; + + return 0; } static int octeon_lmc_edac_probe(struct platform_device *pdev) @@ -92,6 +245,8 @@ static int octeon_lmc_edac_probe(struct platform_device *pdev) struct edac_mc_layer layers[1]; int mc = pdev->id; + opstate_init(); + layers[0].type = EDAC_MC_LAYER_CHANNEL; layers[0].size = 1; layers[0].is_virt_csrow = false; @@ -105,7 +260,7 @@ static int octeon_lmc_edac_probe(struct platform_device *pdev) return 0; } - mci = edac_mc_alloc(mc, ARRAY_SIZE(layers), layers, 0); + mci = edac_mc_alloc(mc, ARRAY_SIZE(layers), layers, sizeof(struct octeon_lmc_pvt)); if (!mci) return -ENXIO; @@ -122,6 +277,12 @@ static int octeon_lmc_edac_probe(struct platform_device *pdev) return -ENXIO; } + if (octeon_set_mc_sysfs_attributes(mci)) { + dev_err(&pdev->dev, "octeon_set_mc_sysfs_attributes() failed\n"); + return -ENXIO; + } + + cfg0.u64 = cvmx_read_csr(CVMX_LMCX_MEM_CFG0(mc)); cfg0.s.intr_ded_ena = 0; /* We poll */ cfg0.s.intr_sec_ena = 0; @@ -137,7 +298,7 @@ static int octeon_lmc_edac_probe(struct platform_device *pdev) return 0; } - mci = edac_mc_alloc(mc, ARRAY_SIZE(layers), layers, 0); + mci = edac_mc_alloc(mc, ARRAY_SIZE(layers), layers, sizeof(struct octeon_lmc_pvt)); if (!mci) return -ENXIO; @@ -154,6 +315,12 @@ static int octeon_lmc_edac_probe(struct platform_device *pdev) return -ENXIO; } + if (octeon_set_mc_sysfs_attributes(mci)) { + dev_err(&pdev->dev, "octeon_set_mc_sysfs_attributes() failed\n"); + return -ENXIO; + } + + en.u64 = cvmx_read_csr(CVMX_LMCX_MEM_CFG0(mc)); en.s.intr_ded_ena = 0; /* We poll */ en.s.intr_sec_ena = 0; diff --git a/drivers/edac/r82600_edac.c b/drivers/edac/r82600_edac.c index 2fd6a549090..8f936bc7a01 100644 --- a/drivers/edac/r82600_edac.c +++ b/drivers/edac/r82600_edac.c @@ -383,7 +383,7 @@ static void r82600_remove_one(struct pci_dev *pdev) edac_mc_free(mci); } -static DEFINE_PCI_DEVICE_TABLE(r82600_pci_tbl) = { +static const struct pci_device_id r82600_pci_tbl[] = { { PCI_DEVICE(PCI_VENDOR_ID_RADISYS, R82600_BRIDGE_ID) }, diff --git a/drivers/edac/sb_edac.c b/drivers/edac/sb_edac.c index da7e2986e3d..deea0dc9999 100644 --- a/drivers/edac/sb_edac.c +++ b/drivers/edac/sb_edac.c @@ -7,7 +7,7 @@ * GNU General Public License version 2 only. * * Copyright (c) 2011 by: - * Mauro Carvalho Chehab <mchehab@redhat.com> + * Mauro Carvalho Chehab */ #include <linux/module.h> @@ -34,7 +34,7 @@ static int probed; /* * Alter this version for the module when modifications are made */ -#define SBRIDGE_REVISION " Ver: 1.0.0 " +#define SBRIDGE_REVISION " Ver: 1.1.0 " #define EDAC_MOD_STR "sbridge_edac" /* @@ -50,7 +50,7 @@ static int probed; * Get a bit field at register value <v>, from bit <lo> to bit <hi> */ #define GET_BITFIELD(v, lo, hi) \ - (((v) & ((1ULL << ((hi) - (lo) + 1)) - 1) << (lo)) >> (lo)) + (((v) & GENMASK_ULL(hi, lo)) >> (lo)) /* * sbridge Memory Controller Registers @@ -83,11 +83,17 @@ static int probed; #define PCI_DEVICE_ID_INTEL_SBRIDGE_IMC_ERR3 0x3c77 /* 16.7 */ /* Devices 12 Function 6, Offsets 0x80 to 0xcc */ -static const u32 dram_rule[] = { +static const u32 sbridge_dram_rule[] = { 0x80, 0x88, 0x90, 0x98, 0xa0, 0xa8, 0xb0, 0xb8, 0xc0, 0xc8, }; -#define MAX_SAD ARRAY_SIZE(dram_rule) + +static const u32 ibridge_dram_rule[] = { + 0x60, 0x68, 0x70, 0x78, 0x80, + 0x88, 0x90, 0x98, 0xa0, 0xa8, + 0xb0, 0xb8, 0xc0, 0xc8, 0xd0, + 0xd8, 0xe0, 0xe8, 0xf0, 0xf8, +}; #define SAD_LIMIT(reg) ((GET_BITFIELD(reg, 6, 25) << 26) | 0x3ffffff) #define DRAM_ATTR(reg) GET_BITFIELD(reg, 2, 3) @@ -108,43 +114,50 @@ static char *get_dram_attr(u32 reg) } } -static const u32 interleave_list[] = { +static const u32 sbridge_interleave_list[] = { 0x84, 0x8c, 0x94, 0x9c, 0xa4, 0xac, 0xb4, 0xbc, 0xc4, 0xcc, }; -#define MAX_INTERLEAVE ARRAY_SIZE(interleave_list) - -#define SAD_PKG0(reg) GET_BITFIELD(reg, 0, 2) -#define SAD_PKG1(reg) GET_BITFIELD(reg, 3, 5) -#define SAD_PKG2(reg) GET_BITFIELD(reg, 8, 10) -#define SAD_PKG3(reg) GET_BITFIELD(reg, 11, 13) -#define SAD_PKG4(reg) GET_BITFIELD(reg, 16, 18) -#define SAD_PKG5(reg) GET_BITFIELD(reg, 19, 21) -#define SAD_PKG6(reg) GET_BITFIELD(reg, 24, 26) -#define SAD_PKG7(reg) GET_BITFIELD(reg, 27, 29) - -static inline int sad_pkg(u32 reg, int interleave) + +static const u32 ibridge_interleave_list[] = { + 0x64, 0x6c, 0x74, 0x7c, 0x84, + 0x8c, 0x94, 0x9c, 0xa4, 0xac, + 0xb4, 0xbc, 0xc4, 0xcc, 0xd4, + 0xdc, 0xe4, 0xec, 0xf4, 0xfc, +}; + +struct interleave_pkg { + unsigned char start; + unsigned char end; +}; + +static const struct interleave_pkg sbridge_interleave_pkg[] = { + { 0, 2 }, + { 3, 5 }, + { 8, 10 }, + { 11, 13 }, + { 16, 18 }, + { 19, 21 }, + { 24, 26 }, + { 27, 29 }, +}; + +static const struct interleave_pkg ibridge_interleave_pkg[] = { + { 0, 3 }, + { 4, 7 }, + { 8, 11 }, + { 12, 15 }, + { 16, 19 }, + { 20, 23 }, + { 24, 27 }, + { 28, 31 }, +}; + +static inline int sad_pkg(const struct interleave_pkg *table, u32 reg, + int interleave) { - switch (interleave) { - case 0: - return SAD_PKG0(reg); - case 1: - return SAD_PKG1(reg); - case 2: - return SAD_PKG2(reg); - case 3: - return SAD_PKG3(reg); - case 4: - return SAD_PKG4(reg); - case 5: - return SAD_PKG5(reg); - case 6: - return SAD_PKG6(reg); - case 7: - return SAD_PKG7(reg); - default: - return -EINVAL; - } + return GET_BITFIELD(reg, table[interleave].start, + table[interleave].end); } /* Devices 12 Function 7 */ @@ -262,7 +275,9 @@ static const u32 correrrthrsld[] = { /* Device 17, function 0 */ -#define RANK_CFG_A 0x0328 +#define SB_RANK_CFG_A 0x0328 + +#define IB_RANK_CFG_A 0x0320 #define IS_RDIMM_ENABLED(reg) GET_BITFIELD(reg, 11, 11) @@ -273,8 +288,23 @@ static const u32 correrrthrsld[] = { #define NUM_CHANNELS 4 #define MAX_DIMMS 3 /* Max DIMMS per channel */ +enum type { + SANDY_BRIDGE, + IVY_BRIDGE, +}; + +struct sbridge_pvt; struct sbridge_info { - u32 mcmtr; + enum type type; + u32 mcmtr; + u32 rankcfgr; + u64 (*get_tolm)(struct sbridge_pvt *pvt); + u64 (*get_tohm)(struct sbridge_pvt *pvt); + const u32 *dram_rule; + const u32 *interleave_list; + const struct interleave_pkg *interleave_pkg; + u8 max_sad; + u8 max_interleave; }; struct sbridge_channel { @@ -305,8 +335,9 @@ struct sbridge_dev { struct sbridge_pvt { struct pci_dev *pci_ta, *pci_ddrio, *pci_ras; - struct pci_dev *pci_sad0, *pci_sad1, *pci_ha0; - struct pci_dev *pci_br; + struct pci_dev *pci_sad0, *pci_sad1; + struct pci_dev *pci_ha0, *pci_ha1; + struct pci_dev *pci_br0, *pci_br1; struct pci_dev *pci_tad[NUM_CHANNELS]; struct sbridge_dev *sbridge_dev; @@ -331,30 +362,31 @@ struct sbridge_pvt { u64 tolm, tohm; }; -#define PCI_DESCR(device, function, device_id) \ - .dev = (device), \ - .func = (function), \ - .dev_id = (device_id) +#define PCI_DESCR(device, function, device_id, opt) \ + .dev = (device), \ + .func = (function), \ + .dev_id = (device_id), \ + .optional = opt static const struct pci_id_descr pci_dev_descr_sbridge[] = { /* Processor Home Agent */ - { PCI_DESCR(14, 0, PCI_DEVICE_ID_INTEL_SBRIDGE_IMC_HA0) }, + { PCI_DESCR(14, 0, PCI_DEVICE_ID_INTEL_SBRIDGE_IMC_HA0, 0) }, /* Memory controller */ - { PCI_DESCR(15, 0, PCI_DEVICE_ID_INTEL_SBRIDGE_IMC_TA) }, - { PCI_DESCR(15, 1, PCI_DEVICE_ID_INTEL_SBRIDGE_IMC_RAS) }, - { PCI_DESCR(15, 2, PCI_DEVICE_ID_INTEL_SBRIDGE_IMC_TAD0) }, - { PCI_DESCR(15, 3, PCI_DEVICE_ID_INTEL_SBRIDGE_IMC_TAD1) }, - { PCI_DESCR(15, 4, PCI_DEVICE_ID_INTEL_SBRIDGE_IMC_TAD2) }, - { PCI_DESCR(15, 5, PCI_DEVICE_ID_INTEL_SBRIDGE_IMC_TAD3) }, - { PCI_DESCR(17, 0, PCI_DEVICE_ID_INTEL_SBRIDGE_IMC_DDRIO) }, + { PCI_DESCR(15, 0, PCI_DEVICE_ID_INTEL_SBRIDGE_IMC_TA, 0) }, + { PCI_DESCR(15, 1, PCI_DEVICE_ID_INTEL_SBRIDGE_IMC_RAS, 0) }, + { PCI_DESCR(15, 2, PCI_DEVICE_ID_INTEL_SBRIDGE_IMC_TAD0, 0) }, + { PCI_DESCR(15, 3, PCI_DEVICE_ID_INTEL_SBRIDGE_IMC_TAD1, 0) }, + { PCI_DESCR(15, 4, PCI_DEVICE_ID_INTEL_SBRIDGE_IMC_TAD2, 0) }, + { PCI_DESCR(15, 5, PCI_DEVICE_ID_INTEL_SBRIDGE_IMC_TAD3, 0) }, + { PCI_DESCR(17, 0, PCI_DEVICE_ID_INTEL_SBRIDGE_IMC_DDRIO, 1) }, /* System Address Decoder */ - { PCI_DESCR(12, 6, PCI_DEVICE_ID_INTEL_SBRIDGE_SAD0) }, - { PCI_DESCR(12, 7, PCI_DEVICE_ID_INTEL_SBRIDGE_SAD1) }, + { PCI_DESCR(12, 6, PCI_DEVICE_ID_INTEL_SBRIDGE_SAD0, 0) }, + { PCI_DESCR(12, 7, PCI_DEVICE_ID_INTEL_SBRIDGE_SAD1, 0) }, /* Broadcast Registers */ - { PCI_DESCR(13, 6, PCI_DEVICE_ID_INTEL_SBRIDGE_BR) }, + { PCI_DESCR(13, 6, PCI_DEVICE_ID_INTEL_SBRIDGE_BR, 0) }, }; #define PCI_ID_TABLE_ENTRY(A) { .descr=A, .n_devs = ARRAY_SIZE(A) } @@ -363,11 +395,75 @@ static const struct pci_id_table pci_dev_descr_sbridge_table[] = { {0,} /* 0 terminated list. */ }; +/* This changes depending if 1HA or 2HA: + * 1HA: + * 0x0eb8 (17.0) is DDRIO0 + * 2HA: + * 0x0ebc (17.4) is DDRIO0 + */ +#define PCI_DEVICE_ID_INTEL_IBRIDGE_IMC_1HA_DDRIO0 0x0eb8 +#define PCI_DEVICE_ID_INTEL_IBRIDGE_IMC_2HA_DDRIO0 0x0ebc + +/* pci ids */ +#define PCI_DEVICE_ID_INTEL_IBRIDGE_IMC_HA0 0x0ea0 +#define PCI_DEVICE_ID_INTEL_IBRIDGE_IMC_HA0_TA 0x0ea8 +#define PCI_DEVICE_ID_INTEL_IBRIDGE_IMC_HA0_RAS 0x0e71 +#define PCI_DEVICE_ID_INTEL_IBRIDGE_IMC_HA0_TAD0 0x0eaa +#define PCI_DEVICE_ID_INTEL_IBRIDGE_IMC_HA0_TAD1 0x0eab +#define PCI_DEVICE_ID_INTEL_IBRIDGE_IMC_HA0_TAD2 0x0eac +#define PCI_DEVICE_ID_INTEL_IBRIDGE_IMC_HA0_TAD3 0x0ead +#define PCI_DEVICE_ID_INTEL_IBRIDGE_SAD 0x0ec8 +#define PCI_DEVICE_ID_INTEL_IBRIDGE_BR0 0x0ec9 +#define PCI_DEVICE_ID_INTEL_IBRIDGE_BR1 0x0eca +#define PCI_DEVICE_ID_INTEL_IBRIDGE_IMC_HA1 0x0e60 +#define PCI_DEVICE_ID_INTEL_IBRIDGE_IMC_HA1_TA 0x0e68 +#define PCI_DEVICE_ID_INTEL_IBRIDGE_IMC_HA1_RAS 0x0e79 +#define PCI_DEVICE_ID_INTEL_IBRIDGE_IMC_HA1_TAD0 0x0e6a +#define PCI_DEVICE_ID_INTEL_IBRIDGE_IMC_HA1_TAD1 0x0e6b + +static const struct pci_id_descr pci_dev_descr_ibridge[] = { + /* Processor Home Agent */ + { PCI_DESCR(14, 0, PCI_DEVICE_ID_INTEL_IBRIDGE_IMC_HA0, 0) }, + + /* Memory controller */ + { PCI_DESCR(15, 0, PCI_DEVICE_ID_INTEL_IBRIDGE_IMC_HA0_TA, 0) }, + { PCI_DESCR(15, 1, PCI_DEVICE_ID_INTEL_IBRIDGE_IMC_HA0_RAS, 0) }, + { PCI_DESCR(15, 2, PCI_DEVICE_ID_INTEL_IBRIDGE_IMC_HA0_TAD0, 0) }, + { PCI_DESCR(15, 3, PCI_DEVICE_ID_INTEL_IBRIDGE_IMC_HA0_TAD1, 0) }, + { PCI_DESCR(15, 4, PCI_DEVICE_ID_INTEL_IBRIDGE_IMC_HA0_TAD2, 0) }, + { PCI_DESCR(15, 5, PCI_DEVICE_ID_INTEL_IBRIDGE_IMC_HA0_TAD3, 0) }, + + /* System Address Decoder */ + { PCI_DESCR(22, 0, PCI_DEVICE_ID_INTEL_IBRIDGE_SAD, 0) }, + + /* Broadcast Registers */ + { PCI_DESCR(22, 1, PCI_DEVICE_ID_INTEL_IBRIDGE_BR0, 1) }, + { PCI_DESCR(22, 2, PCI_DEVICE_ID_INTEL_IBRIDGE_BR1, 0) }, + + /* Optional, mode 2HA */ + { PCI_DESCR(28, 0, PCI_DEVICE_ID_INTEL_IBRIDGE_IMC_HA1, 1) }, +#if 0 + { PCI_DESCR(29, 0, PCI_DEVICE_ID_INTEL_IBRIDGE_IMC_HA1_TA, 1) }, + { PCI_DESCR(29, 1, PCI_DEVICE_ID_INTEL_IBRIDGE_IMC_HA1_RAS, 1) }, +#endif + { PCI_DESCR(29, 2, PCI_DEVICE_ID_INTEL_IBRIDGE_IMC_HA1_TAD0, 1) }, + { PCI_DESCR(29, 3, PCI_DEVICE_ID_INTEL_IBRIDGE_IMC_HA1_TAD1, 1) }, + + { PCI_DESCR(17, 0, PCI_DEVICE_ID_INTEL_IBRIDGE_IMC_1HA_DDRIO0, 1) }, + { PCI_DESCR(17, 4, PCI_DEVICE_ID_INTEL_IBRIDGE_IMC_2HA_DDRIO0, 1) }, +}; + +static const struct pci_id_table pci_dev_descr_ibridge_table[] = { + PCI_ID_TABLE_ENTRY(pci_dev_descr_ibridge), + {0,} /* 0 terminated list. */ +}; + /* * pci_device_id table for which devices we are looking for */ -static DEFINE_PCI_DEVICE_TABLE(sbridge_pci_tbl) = { +static const struct pci_device_id sbridge_pci_tbl[] = { {PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_SBRIDGE_IMC_TA)}, + {PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IBRIDGE_IMC_HA0_TA)}, {0,} /* 0 terminated list. */ }; @@ -457,6 +553,52 @@ static void free_sbridge_dev(struct sbridge_dev *sbridge_dev) kfree(sbridge_dev); } +static u64 sbridge_get_tolm(struct sbridge_pvt *pvt) +{ + u32 reg; + + /* Address range is 32:28 */ + pci_read_config_dword(pvt->pci_sad1, TOLM, ®); + return GET_TOLM(reg); +} + +static u64 sbridge_get_tohm(struct sbridge_pvt *pvt) +{ + u32 reg; + + pci_read_config_dword(pvt->pci_sad1, TOHM, ®); + return GET_TOHM(reg); +} + +static u64 ibridge_get_tolm(struct sbridge_pvt *pvt) +{ + u32 reg; + + pci_read_config_dword(pvt->pci_br1, TOLM, ®); + + return GET_TOLM(reg); +} + +static u64 ibridge_get_tohm(struct sbridge_pvt *pvt) +{ + u32 reg; + + pci_read_config_dword(pvt->pci_br1, TOHM, ®); + + return GET_TOHM(reg); +} + +static inline u8 sad_pkg_socket(u8 pkg) +{ + /* on Ivy Bridge, nodeID is SASS, where A is HA and S is node id */ + return (pkg >> 3) | (pkg & 0x3); +} + +static inline u8 sad_pkg_ha(u8 pkg) +{ + return (pkg >> 2) & 0x1; +} + /**************************************************************************** Memory check routines ****************************************************************************/ @@ -519,10 +661,10 @@ static int get_dimm_config(struct mem_ctl_info *mci) enum edac_type mode; enum mem_type mtype; - pci_read_config_dword(pvt->pci_br, SAD_TARGET, ®); + pci_read_config_dword(pvt->pci_br0, SAD_TARGET, ®); pvt->sbridge_dev->source_id = SOURCE_ID(reg); - pci_read_config_dword(pvt->pci_br, SAD_CONTROL, ®); + pci_read_config_dword(pvt->pci_br0, SAD_CONTROL, ®); pvt->sbridge_dev->node_id = NODE_ID(reg); edac_dbg(0, "mc#%d: Node ID: %d, source ID: %d\n", pvt->sbridge_dev->mc, @@ -556,14 +698,20 @@ static int get_dimm_config(struct mem_ctl_info *mci) pvt->is_close_pg = false; } - pci_read_config_dword(pvt->pci_ddrio, RANK_CFG_A, ®); - if (IS_RDIMM_ENABLED(reg)) { - /* FIXME: Can also be LRDIMM */ - edac_dbg(0, "Memory is registered\n"); - mtype = MEM_RDDR3; + if (pvt->pci_ddrio) { + pci_read_config_dword(pvt->pci_ddrio, pvt->info.rankcfgr, + ®); + if (IS_RDIMM_ENABLED(reg)) { + /* FIXME: Can also be LRDIMM */ + edac_dbg(0, "Memory is registered\n"); + mtype = MEM_RDDR3; + } else { + edac_dbg(0, "Memory is unregistered\n"); + mtype = MEM_DDR3; + } } else { - edac_dbg(0, "Memory is unregistered\n"); - mtype = MEM_DDR3; + edac_dbg(0, "Cannot determine memory type\n"); + mtype = MEM_UNKNOWN; } /* On all supported DDR3 DIMM types, there are 8 banks available */ @@ -623,23 +771,18 @@ static void get_memory_layout(const struct mem_ctl_info *mci) * Step 1) Get TOLM/TOHM ranges */ - /* Address range is 32:28 */ - pci_read_config_dword(pvt->pci_sad1, TOLM, - ®); - pvt->tolm = GET_TOLM(reg); + pvt->tolm = pvt->info.get_tolm(pvt); tmp_mb = (1 + pvt->tolm) >> 20; mb = div_u64_rem(tmp_mb, 1000, &kb); edac_dbg(0, "TOLM: %u.%03u GB (0x%016Lx)\n", mb, kb, (u64)pvt->tolm); /* Address range is already 45:25 */ - pci_read_config_dword(pvt->pci_sad1, TOHM, - ®); - pvt->tohm = GET_TOHM(reg); + pvt->tohm = pvt->info.get_tohm(pvt); tmp_mb = (1 + pvt->tohm) >> 20; mb = div_u64_rem(tmp_mb, 1000, &kb); - edac_dbg(0, "TOHM: %u.%03u GB (0x%016Lx)", mb, kb, (u64)pvt->tohm); + edac_dbg(0, "TOHM: %u.%03u GB (0x%016Lx)\n", mb, kb, (u64)pvt->tohm); /* * Step 2) Get SAD range and SAD Interleave list @@ -648,9 +791,9 @@ static void get_memory_layout(const struct mem_ctl_info *mci) * algorithm bellow. */ prv = 0; - for (n_sads = 0; n_sads < MAX_SAD; n_sads++) { + for (n_sads = 0; n_sads < pvt->info.max_sad; n_sads++) { /* SAD_LIMIT Address range is 45:26 */ - pci_read_config_dword(pvt->pci_sad0, dram_rule[n_sads], + pci_read_config_dword(pvt->pci_sad0, pvt->info.dram_rule[n_sads], ®); limit = SAD_LIMIT(reg); @@ -671,15 +814,16 @@ static void get_memory_layout(const struct mem_ctl_info *mci) reg); prv = limit; - pci_read_config_dword(pvt->pci_sad0, interleave_list[n_sads], + pci_read_config_dword(pvt->pci_sad0, pvt->info.interleave_list[n_sads], ®); - sad_interl = sad_pkg(reg, 0); + sad_interl = sad_pkg(pvt->info.interleave_pkg, reg, 0); for (j = 0; j < 8; j++) { - if (j > 0 && sad_interl == sad_pkg(reg, j)) + u32 pkg = sad_pkg(pvt->info.interleave_pkg, reg, j); + if (j > 0 && sad_interl == pkg) break; edac_dbg(0, "SAD#%d, interleave #%d: %d\n", - n_sads, j, sad_pkg(reg, j)); + n_sads, j, pkg); } } @@ -771,7 +915,7 @@ static void get_memory_layout(const struct mem_ctl_info *mci) } } -struct mem_ctl_info *get_mci_for_node_id(u8 node_id) +static struct mem_ctl_info *get_mci_for_node_id(u8 node_id) { struct sbridge_dev *sbridge_dev; @@ -791,16 +935,17 @@ static int get_memory_error_data(struct mem_ctl_info *mci, { struct mem_ctl_info *new_mci; struct sbridge_pvt *pvt = mci->pvt_info; + struct pci_dev *pci_ha; int n_rir, n_sads, n_tads, sad_way, sck_xch; int sad_interl, idx, base_ch; int interleave_mode; - unsigned sad_interleave[MAX_INTERLEAVE]; + unsigned sad_interleave[pvt->info.max_interleave]; u32 reg; - u8 ch_way,sck_way; + u8 ch_way, sck_way, pkg, sad_ha = 0; u32 tad_offset; u32 rir_way; u32 mb, kb; - u64 ch_addr, offset, limit, prv = 0; + u64 ch_addr, offset, limit = 0, prv = 0; /* @@ -822,8 +967,8 @@ static int get_memory_error_data(struct mem_ctl_info *mci, /* * Step 1) Get socket */ - for (n_sads = 0; n_sads < MAX_SAD; n_sads++) { - pci_read_config_dword(pvt->pci_sad0, dram_rule[n_sads], + for (n_sads = 0; n_sads < pvt->info.max_sad; n_sads++) { + pci_read_config_dword(pvt->pci_sad0, pvt->info.dram_rule[n_sads], ®); if (!DRAM_RULE_ENABLE(reg)) @@ -838,53 +983,65 @@ static int get_memory_error_data(struct mem_ctl_info *mci, break; prv = limit; } - if (n_sads == MAX_SAD) { + if (n_sads == pvt->info.max_sad) { sprintf(msg, "Can't discover the memory socket"); return -EINVAL; } *area_type = get_dram_attr(reg); interleave_mode = INTERLEAVE_MODE(reg); - pci_read_config_dword(pvt->pci_sad0, interleave_list[n_sads], + pci_read_config_dword(pvt->pci_sad0, pvt->info.interleave_list[n_sads], ®); - sad_interl = sad_pkg(reg, 0); - for (sad_way = 0; sad_way < 8; sad_way++) { - if (sad_way > 0 && sad_interl == sad_pkg(reg, sad_way)) + + if (pvt->info.type == SANDY_BRIDGE) { + sad_interl = sad_pkg(pvt->info.interleave_pkg, reg, 0); + for (sad_way = 0; sad_way < 8; sad_way++) { + u32 pkg = sad_pkg(pvt->info.interleave_pkg, reg, sad_way); + if (sad_way > 0 && sad_interl == pkg) + break; + sad_interleave[sad_way] = pkg; + edac_dbg(0, "SAD interleave #%d: %d\n", + sad_way, sad_interleave[sad_way]); + } + edac_dbg(0, "mc#%d: Error detected on SAD#%d: address 0x%016Lx < 0x%016Lx, Interleave [%d:6]%s\n", + pvt->sbridge_dev->mc, + n_sads, + addr, + limit, + sad_way + 7, + !interleave_mode ? "" : "XOR[18:16]"); + if (interleave_mode) + idx = ((addr >> 6) ^ (addr >> 16)) & 7; + else + idx = (addr >> 6) & 7; + switch (sad_way) { + case 1: + idx = 0; break; - sad_interleave[sad_way] = sad_pkg(reg, sad_way); - edac_dbg(0, "SAD interleave #%d: %d\n", - sad_way, sad_interleave[sad_way]); - } - edac_dbg(0, "mc#%d: Error detected on SAD#%d: address 0x%016Lx < 0x%016Lx, Interleave [%d:6]%s\n", - pvt->sbridge_dev->mc, - n_sads, - addr, - limit, - sad_way + 7, - interleave_mode ? "" : "XOR[18:16]"); - if (interleave_mode) - idx = ((addr >> 6) ^ (addr >> 16)) & 7; - else + case 2: + idx = idx & 1; + break; + case 4: + idx = idx & 3; + break; + case 8: + break; + default: + sprintf(msg, "Can't discover socket interleave"); + return -EINVAL; + } + *socket = sad_interleave[idx]; + edac_dbg(0, "SAD interleave index: %d (wayness %d) = CPU socket %d\n", + idx, sad_way, *socket); + } else { + /* Ivy Bridge's SAD mode doesn't support XOR interleave mode */ idx = (addr >> 6) & 7; - switch (sad_way) { - case 1: - idx = 0; - break; - case 2: - idx = idx & 1; - break; - case 4: - idx = idx & 3; - break; - case 8: - break; - default: - sprintf(msg, "Can't discover socket interleave"); - return -EINVAL; + pkg = sad_pkg(pvt->info.interleave_pkg, reg, idx); + *socket = sad_pkg_socket(pkg); + sad_ha = sad_pkg_ha(pkg); + edac_dbg(0, "SAD interleave package: %d = CPU socket %d, HA %d\n", + idx, *socket, sad_ha); } - *socket = sad_interleave[idx]; - edac_dbg(0, "SAD interleave index: %d (wayness %d) = CPU socket %d\n", - idx, sad_way, *socket); /* * Move to the proper node structure, in order to access the @@ -903,9 +1060,16 @@ static int get_memory_error_data(struct mem_ctl_info *mci, * Step 2) Get memory channel */ prv = 0; + if (pvt->info.type == SANDY_BRIDGE) + pci_ha = pvt->pci_ha0; + else { + if (sad_ha) + pci_ha = pvt->pci_ha1; + else + pci_ha = pvt->pci_ha0; + } for (n_tads = 0; n_tads < MAX_TAD; n_tads++) { - pci_read_config_dword(pvt->pci_ha0, tad_dram_rule[n_tads], - ®); + pci_read_config_dword(pci_ha, tad_dram_rule[n_tads], ®); limit = TAD_LIMIT(reg); if (limit <= prv) { sprintf(msg, "Can't discover the memory channel"); @@ -915,14 +1079,13 @@ static int get_memory_error_data(struct mem_ctl_info *mci, break; prv = limit; } + if (n_tads == MAX_TAD) { + sprintf(msg, "Can't discover the memory channel"); + return -EINVAL; + } + ch_way = TAD_CH(reg) + 1; sck_way = TAD_SOCK(reg) + 1; - /* - * FIXME: Is it right to always use channel 0 for offsets? - */ - pci_read_config_dword(pvt->pci_tad[0], - tad_ch_nilv_offset[n_tads], - &tad_offset); if (ch_way == 3) idx = addr >> 6; @@ -952,6 +1115,10 @@ static int get_memory_error_data(struct mem_ctl_info *mci, } *channel_mask = 1 << base_ch; + pci_read_config_dword(pvt->pci_tad[base_ch], + tad_ch_nilv_offset[n_tads], + &tad_offset); + if (pvt->is_mirrored) { *channel_mask |= 1 << ((base_ch + 2) % 4); switch(ch_way) { @@ -1085,12 +1252,6 @@ static void sbridge_put_all_devices(void) } } -/* - * sbridge_get_all_devices Find and perform 'get' operation on the MCH's - * device/functions we want to reference for this driver - * - * Need to 'get' device 16 func 1 and func 2 - */ static int sbridge_get_onedevice(struct pci_dev **prev, u8 *num_mc, const struct pci_id_table *table, @@ -1102,7 +1263,7 @@ static int sbridge_get_onedevice(struct pci_dev **prev, struct pci_dev *pdev = NULL; u8 bus = 0; - sbridge_printk(KERN_INFO, + sbridge_printk(KERN_DEBUG, "Seeking for: dev %02x.%d PCI ID %04x:%04x\n", dev_descr->dev, dev_descr->func, PCI_VENDOR_ID_INTEL, dev_descr->dev_id); @@ -1192,11 +1353,21 @@ static int sbridge_get_onedevice(struct pci_dev **prev, return 0; } -static int sbridge_get_all_devices(u8 *num_mc) +/* + * sbridge_get_all_devices - Find and perform 'get' operation on the MCH's + * device/functions we want to reference for this driver. + * Need to 'get' device 16 func 1 and func 2. + * @num_mc: pointer to the memory controllers count, to be incremented in case + * of success. + * @table: model specific table + * + * returns 0 in case of success or error code + */ +static int sbridge_get_all_devices(u8 *num_mc, + const struct pci_id_table *table) { int i, rc; struct pci_dev *pdev = NULL; - const struct pci_id_table *table = pci_dev_descr_sbridge_table; while (table && table->descr) { for (i = 0; i < table->n_devs; i++) { @@ -1220,8 +1391,8 @@ static int sbridge_get_all_devices(u8 *num_mc) return 0; } -static int mci_bind_devs(struct mem_ctl_info *mci, - struct sbridge_dev *sbridge_dev) +static int sbridge_mci_bind_devs(struct mem_ctl_info *mci, + struct sbridge_dev *sbridge_dev) { struct sbridge_pvt *pvt = mci->pvt_info; struct pci_dev *pdev; @@ -1249,7 +1420,7 @@ static int mci_bind_devs(struct mem_ctl_info *mci, case 13: switch (func) { case 6: - pvt->pci_br = pdev; + pvt->pci_br0 = pdev; break; default: goto error; @@ -1303,8 +1474,7 @@ static int mci_bind_devs(struct mem_ctl_info *mci, /* Check if everything were registered */ if (!pvt->pci_sad0 || !pvt->pci_sad1 || !pvt->pci_ha0 || - !pvt-> pci_tad || !pvt->pci_ras || !pvt->pci_ta || - !pvt->pci_ddrio) + !pvt-> pci_tad || !pvt->pci_ras || !pvt->pci_ta) goto enodev; for (i = 0; i < NUM_CHANNELS; i++) { @@ -1324,6 +1494,131 @@ error: return -EINVAL; } +static int ibridge_mci_bind_devs(struct mem_ctl_info *mci, + struct sbridge_dev *sbridge_dev) +{ + struct sbridge_pvt *pvt = mci->pvt_info; + struct pci_dev *pdev, *tmp; + int i, func, slot; + bool mode_2ha = false; + + tmp = pci_get_device(PCI_VENDOR_ID_INTEL, + PCI_DEVICE_ID_INTEL_IBRIDGE_IMC_HA1, NULL); + if (tmp) { + mode_2ha = true; + pci_dev_put(tmp); + } + + for (i = 0; i < sbridge_dev->n_devs; i++) { + pdev = sbridge_dev->pdev[i]; + if (!pdev) + continue; + slot = PCI_SLOT(pdev->devfn); + func = PCI_FUNC(pdev->devfn); + + switch (slot) { + case 14: + if (func == 0) { + pvt->pci_ha0 = pdev; + break; + } + goto error; + case 15: + switch (func) { + case 0: + pvt->pci_ta = pdev; + break; + case 1: + pvt->pci_ras = pdev; + break; + case 4: + case 5: + /* if we have 2 HAs active, channels 2 and 3 + * are in other device */ + if (mode_2ha) + break; + /* fall through */ + case 2: + case 3: + pvt->pci_tad[func - 2] = pdev; + break; + default: + goto error; + } + break; + case 17: + if (func == 4) { + pvt->pci_ddrio = pdev; + break; + } else if (func == 0) { + if (!mode_2ha) + pvt->pci_ddrio = pdev; + break; + } + goto error; + case 22: + switch (func) { + case 0: + pvt->pci_sad0 = pdev; + break; + case 1: + pvt->pci_br0 = pdev; + break; + case 2: + pvt->pci_br1 = pdev; + break; + default: + goto error; + } + break; + case 28: + if (func == 0) { + pvt->pci_ha1 = pdev; + break; + } + goto error; + case 29: + /* we shouldn't have this device if we have just one + * HA present */ + WARN_ON(!mode_2ha); + if (func == 2 || func == 3) { + pvt->pci_tad[func] = pdev; + break; + } + goto error; + default: + goto error; + } + + edac_dbg(0, "Associated PCI %02x.%02d.%d with dev = %p\n", + sbridge_dev->bus, + PCI_SLOT(pdev->devfn), PCI_FUNC(pdev->devfn), + pdev); + } + + /* Check if everything were registered */ + if (!pvt->pci_sad0 || !pvt->pci_ha0 || !pvt->pci_br0 || + !pvt->pci_br1 || !pvt->pci_tad || !pvt->pci_ras || + !pvt->pci_ta) + goto enodev; + + for (i = 0; i < NUM_CHANNELS; i++) { + if (!pvt->pci_tad[i]) + goto enodev; + } + return 0; + +enodev: + sbridge_printk(KERN_ERR, "Some needed devices are missing\n"); + return -ENODEV; + +error: + sbridge_printk(KERN_ERR, + "Device %d, function %d is out of the expected range\n", + slot, func); + return -EINVAL; +} + /**************************************************************************** Error check routines ****************************************************************************/ @@ -1344,7 +1639,7 @@ static void sbridge_mce_output_error(struct mem_ctl_info *mci, bool ripv = GET_BITFIELD(m->mcgstatus, 0, 0); bool overflow = GET_BITFIELD(m->status, 62, 62); bool uncorrected_error = GET_BITFIELD(m->status, 61, 61); - bool recoverable = GET_BITFIELD(m->status, 56, 56); + bool recoverable; u32 core_err_cnt = GET_BITFIELD(m->status, 38, 52); u32 mscod = GET_BITFIELD(m->status, 16, 31); u32 errcode = GET_BITFIELD(m->status, 0, 15); @@ -1355,6 +1650,11 @@ static void sbridge_mce_output_error(struct mem_ctl_info *mci, int rc, dimm; char *area_type = NULL; + if (pvt->info.type == IVY_BRIDGE) + recoverable = true; + else + recoverable = GET_BITFIELD(m->status, 56, 56); + if (uncorrected_error) { if (ripv) { type = "FATAL"; @@ -1404,6 +1704,10 @@ static void sbridge_mce_output_error(struct mem_ctl_info *mci, } } + /* Only decode errors with an valid address (ADDRV) */ + if (!GET_BITFIELD(m->status, 58, 58)) + return; + rc = get_memory_error_data(mci, m->addr, &socket, &channel_mask, &rank, &area_type, msg); if (rc < 0) @@ -1524,6 +1828,10 @@ static int sbridge_mce_check_error(struct notifier_block *nb, unsigned long val, struct mce *mce = (struct mce *)data; struct mem_ctl_info *mci; struct sbridge_pvt *pvt; + char *type; + + if (get_edac_report_status() == EDAC_REPORTING_DISABLED) + return NOTIFY_DONE; mci = get_mci_for_node_id(mce->socketid); if (!mci) @@ -1539,17 +1847,23 @@ static int sbridge_mce_check_error(struct notifier_block *nb, unsigned long val, if ((mce->status & 0xefff) >> 7 != 1) return NOTIFY_DONE; - printk("sbridge: HANDLING MCE MEMORY ERROR\n"); + if (mce->mcgstatus & MCG_STATUS_MCIP) + type = "Exception"; + else + type = "Event"; + + sbridge_mc_printk(mci, KERN_DEBUG, "HANDLING MCE MEMORY ERROR\n"); - printk("CPU %d: Machine Check Exception: %Lx Bank %d: %016Lx\n", - mce->extcpu, mce->mcgstatus, mce->bank, mce->status); - printk("TSC %llx ", mce->tsc); - printk("ADDR %llx ", mce->addr); - printk("MISC %llx ", mce->misc); + sbridge_mc_printk(mci, KERN_DEBUG, "CPU %d: Machine Check %s: %Lx " + "Bank %d: %016Lx\n", mce->extcpu, type, + mce->mcgstatus, mce->bank, mce->status); + sbridge_mc_printk(mci, KERN_DEBUG, "TSC %llx ", mce->tsc); + sbridge_mc_printk(mci, KERN_DEBUG, "ADDR %llx ", mce->addr); + sbridge_mc_printk(mci, KERN_DEBUG, "MISC %llx ", mce->misc); - printk("PROCESSOR %u:%x TIME %llu SOCKET %u APIC %x\n", - mce->cpuvendor, mce->cpuid, mce->time, - mce->socketid, mce->apicid); + sbridge_mc_printk(mci, KERN_DEBUG, "PROCESSOR %u:%x TIME %llu SOCKET " + "%u APIC %x\n", mce->cpuvendor, mce->cpuid, + mce->time, mce->socketid, mce->apicid); /* Only handle if it is the right mc controller */ if (cpu_data(mce->cpu).phys_proc_id != pvt->sbridge_dev->mc) @@ -1609,11 +1923,12 @@ static void sbridge_unregister_mci(struct sbridge_dev *sbridge_dev) sbridge_dev->mci = NULL; } -static int sbridge_register_mci(struct sbridge_dev *sbridge_dev) +static int sbridge_register_mci(struct sbridge_dev *sbridge_dev, enum type type) { struct mem_ctl_info *mci; struct edac_mc_layer layers[2]; struct sbridge_pvt *pvt; + struct pci_dev *pdev = sbridge_dev->pdev[0]; int rc; /* Check the number of active and not disabled channels */ @@ -1635,7 +1950,7 @@ static int sbridge_register_mci(struct sbridge_dev *sbridge_dev) return -ENOMEM; edac_dbg(0, "MC: mci = %p, dev = %p\n", - mci, &sbridge_dev->pdev[0]->dev); + mci, &pdev->dev); pvt = mci->pvt_info; memset(pvt, 0, sizeof(*pvt)); @@ -1649,24 +1964,52 @@ static int sbridge_register_mci(struct sbridge_dev *sbridge_dev) mci->edac_cap = EDAC_FLAG_NONE; mci->mod_name = "sbridge_edac.c"; mci->mod_ver = SBRIDGE_REVISION; - mci->ctl_name = kasprintf(GFP_KERNEL, "Sandy Bridge Socket#%d", mci->mc_idx); - mci->dev_name = pci_name(sbridge_dev->pdev[0]); + mci->dev_name = pci_name(pdev); mci->ctl_page_to_phys = NULL; /* Set the function pointer to an actual operation function */ mci->edac_check = sbridge_check_error; - /* Store pci devices at mci for faster access */ - rc = mci_bind_devs(mci, sbridge_dev); - if (unlikely(rc < 0)) - goto fail0; + pvt->info.type = type; + if (type == IVY_BRIDGE) { + pvt->info.rankcfgr = IB_RANK_CFG_A; + pvt->info.get_tolm = ibridge_get_tolm; + pvt->info.get_tohm = ibridge_get_tohm; + pvt->info.dram_rule = ibridge_dram_rule; + pvt->info.max_sad = ARRAY_SIZE(ibridge_dram_rule); + pvt->info.interleave_list = ibridge_interleave_list; + pvt->info.max_interleave = ARRAY_SIZE(ibridge_interleave_list); + pvt->info.interleave_pkg = ibridge_interleave_pkg; + mci->ctl_name = kasprintf(GFP_KERNEL, "Ivy Bridge Socket#%d", mci->mc_idx); + + /* Store pci devices at mci for faster access */ + rc = ibridge_mci_bind_devs(mci, sbridge_dev); + if (unlikely(rc < 0)) + goto fail0; + } else { + pvt->info.rankcfgr = SB_RANK_CFG_A; + pvt->info.get_tolm = sbridge_get_tolm; + pvt->info.get_tohm = sbridge_get_tohm; + pvt->info.dram_rule = sbridge_dram_rule; + pvt->info.max_sad = ARRAY_SIZE(sbridge_dram_rule); + pvt->info.interleave_list = sbridge_interleave_list; + pvt->info.max_interleave = ARRAY_SIZE(sbridge_interleave_list); + pvt->info.interleave_pkg = sbridge_interleave_pkg; + mci->ctl_name = kasprintf(GFP_KERNEL, "Sandy Bridge Socket#%d", mci->mc_idx); + + /* Store pci devices at mci for faster access */ + rc = sbridge_mci_bind_devs(mci, sbridge_dev); + if (unlikely(rc < 0)) + goto fail0; + } + /* Get dimm basic config and the memory layout */ get_dimm_config(mci); get_memory_layout(mci); /* record ptr to the generic device */ - mci->pdev = &sbridge_dev->pdev[0]->dev; + mci->pdev = &pdev->dev; /* add this new MC control structure to EDAC's list of MCs */ if (unlikely(edac_mc_add_mc(mci))) { @@ -1697,6 +2040,7 @@ static int sbridge_probe(struct pci_dev *pdev, const struct pci_device_id *id) int rc; u8 mc, num_mc = 0; struct sbridge_dev *sbridge_dev; + enum type type; /* get the pci devices we want to reserve for our use */ mutex_lock(&sbridge_edac_lock); @@ -1710,7 +2054,13 @@ static int sbridge_probe(struct pci_dev *pdev, const struct pci_device_id *id) } probed++; - rc = sbridge_get_all_devices(&num_mc); + if (pdev->device == PCI_DEVICE_ID_INTEL_IBRIDGE_IMC_HA0_TA) { + rc = sbridge_get_all_devices(&num_mc, pci_dev_descr_ibridge_table); + type = IVY_BRIDGE; + } else { + rc = sbridge_get_all_devices(&num_mc, pci_dev_descr_sbridge_table); + type = SANDY_BRIDGE; + } if (unlikely(rc < 0)) goto fail0; mc = 0; @@ -1719,7 +2069,7 @@ static int sbridge_probe(struct pci_dev *pdev, const struct pci_device_id *id) edac_dbg(0, "Registering MC#%d (%d of %d)\n", mc, mc + 1, num_mc); sbridge_dev->mc = mc++; - rc = sbridge_register_mci(sbridge_dev); + rc = sbridge_register_mci(sbridge_dev, type); if (unlikely(rc < 0)) goto fail1; } @@ -1802,9 +2152,10 @@ static int __init sbridge_init(void) opstate_init(); pci_rc = pci_register_driver(&sbridge_driver); - if (pci_rc >= 0) { mce_register_decode_chain(&sbridge_mce_dec); + if (get_edac_report_status() == EDAC_REPORTING_DISABLED) + sbridge_printk(KERN_WARNING, "Loading driver, error reporting disabled.\n"); return 0; } @@ -1832,7 +2183,7 @@ module_param(edac_op_state, int, 0444); MODULE_PARM_DESC(edac_op_state, "EDAC Error Reporting state: 0=Poll,1=NMI"); MODULE_LICENSE("GPL"); -MODULE_AUTHOR("Mauro Carvalho Chehab <mchehab@redhat.com>"); +MODULE_AUTHOR("Mauro Carvalho Chehab"); MODULE_AUTHOR("Red Hat Inc. (http://www.redhat.com)"); -MODULE_DESCRIPTION("MC Driver for Intel Sandy Bridge memory controllers - " +MODULE_DESCRIPTION("MC Driver for Intel Sandy Bridge and Ivy Bridge memory controllers - " SBRIDGE_REVISION); diff --git a/drivers/edac/tile_edac.c b/drivers/edac/tile_edac.c index a0820536b7d..578f915ee19 100644 --- a/drivers/edac/tile_edac.c +++ b/drivers/edac/tile_edac.c @@ -257,7 +257,6 @@ static void __exit tile_edac_exit(void) if (!pdev) continue; - platform_set_drvdata(pdev, NULL); platform_device_unregister(pdev); } platform_driver_unregister(&tile_edac_mc_driver); diff --git a/drivers/edac/x38_edac.c b/drivers/edac/x38_edac.c index c9db24d95ca..4891b450830 100644 --- a/drivers/edac/x38_edac.c +++ b/drivers/edac/x38_edac.c @@ -248,8 +248,7 @@ static void x38_check(struct mem_ctl_info *mci) x38_process_error_info(mci, &info); } - -void __iomem *x38_map_mchbar(struct pci_dev *pdev) +static void __iomem *x38_map_mchbar(struct pci_dev *pdev) { union { u64 mchbar; @@ -449,7 +448,7 @@ static void x38_remove_one(struct pci_dev *pdev) edac_mc_free(mci); } -static DEFINE_PCI_DEVICE_TABLE(x38_pci_tbl) = { +static const struct pci_device_id x38_pci_tbl[] = { { PCI_VEND_DEV(INTEL, X38_HB), PCI_ANY_ID, PCI_ANY_ID, 0, 0, X38}, |
