aboutsummaryrefslogtreecommitdiff
path: root/drivers/edac
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2011-11-02 16:55:15 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2011-11-02 16:55:15 -0700
commit6681ba7ec480bc839584fd0817991d248b4b9e44 (patch)
tree994fb1de40d58ce8dac821cf1fec727e2f902f47 /drivers/edac
parent06ef93e1b8405acac6ec900564e3ad1a8e3a72b2 (diff)
parent4d096ca7e65584dd5845e64c6400f920e694f672 (diff)
Merge branch 'linux_next' of git://git.kernel.org/pub/scm/linux/kernel/git/mchehab/linux-edac
* 'linux_next' of git://git.kernel.org/pub/scm/linux/kernel/git/mchehab/linux-edac: (21 commits) MAINTAINERS: add an entry for Edac Sandy Bridge driver edac: tag sb_edac as EXPERIMENTAL, as it requires more testing EDAC: Fix incorrect edac mode reporting in sb_edac edac: sb_edac: Add it to the building system edac: Add an experimental new driver to support Sandy Bridge CPU's i7300_edac: Fix error cleanup logic i7core_edac: Initialize memory name with cpu, channel, bank i7core_edac: Fix compilation on 32 bits arch i7core_edac: scrubbing fixups EDAC: Correct Kconfig dependencies i7core_edac: return -ENODEV if no MC is found i7core_edac: use edac's own way to print errors MAINTAINERS: remove dropped edac_mce.* from the file i7core_edac: Drop the edac_mce facility x86, MCE: Use notifier chain only for MCE decoding EDAC i7core: Use mce socketid for better compatibility i7core_edac: Don't enable memory scrubbing for Xeon 35xx i7core_edac: Add scrubbing support edac: Move edac main structs to include/linux/edac.h i7core_edac: Fix oops when trying to inject errors ...
Diffstat (limited to 'drivers/edac')
-rw-r--r--drivers/edac/Kconfig16
-rw-r--r--drivers/edac/Makefile2
-rw-r--r--drivers/edac/edac_core.h350
-rw-r--r--drivers/edac/edac_mce.c61
-rw-r--r--drivers/edac/i7300_edac.c51
-rw-r--r--drivers/edac/i7core_edac.c415
-rw-r--r--drivers/edac/sb_edac.c1893
7 files changed, 2308 insertions, 480 deletions
diff --git a/drivers/edac/Kconfig b/drivers/edac/Kconfig
index af1a17d42bd..203361e4ef9 100644
--- a/drivers/edac/Kconfig
+++ b/drivers/edac/Kconfig
@@ -41,7 +41,7 @@ config EDAC_DEBUG
config EDAC_DECODE_MCE
tristate "Decode MCEs in human-readable form (only on AMD for now)"
- depends on CPU_SUP_AMD && X86_MCE
+ depends on CPU_SUP_AMD && X86_MCE_AMD
default y
---help---
Enable this option if you want to decode Machine Check Exceptions
@@ -71,9 +71,6 @@ config EDAC_MM_EDAC
occurred so that a particular failing memory module can be
replaced. If unsure, select 'Y'.
-config EDAC_MCE
- bool
-
config EDAC_AMD64
tristate "AMD64 (Opteron, Athlon64) K8, F10h"
depends on EDAC_MM_EDAC && AMD_NB && X86_64 && EDAC_DECODE_MCE
@@ -173,8 +170,7 @@ config EDAC_I5400
config EDAC_I7CORE
tristate "Intel i7 Core (Nehalem) processors"
- depends on EDAC_MM_EDAC && PCI && X86
- select EDAC_MCE
+ depends on EDAC_MM_EDAC && PCI && X86 && X86_MCE_INTEL
help
Support for error detection and correction the Intel
i7 Core (Nehalem) Integrated Memory Controller that exists on
@@ -216,6 +212,14 @@ config EDAC_I7300
Support for error detection and correction the Intel
Clarksboro MCH (Intel 7300 chipset).
+config EDAC_SBRIDGE
+ tristate "Intel Sandy-Bridge Integrated MC"
+ depends on EDAC_MM_EDAC && PCI && X86 && X86_MCE_INTEL
+ depends on EXPERIMENTAL
+ help
+ Support for error detection and correction the Intel
+ Sandy Bridge Integrated Memory Controller.
+
config EDAC_MPC85XX
tristate "Freescale MPC83xx / MPC85xx"
depends on EDAC_MM_EDAC && FSL_SOC && (PPC_83xx || PPC_85xx)
diff --git a/drivers/edac/Makefile b/drivers/edac/Makefile
index 3e239133e29..196a63dd37c 100644
--- a/drivers/edac/Makefile
+++ b/drivers/edac/Makefile
@@ -8,7 +8,6 @@
obj-$(CONFIG_EDAC) := edac_stub.o
obj-$(CONFIG_EDAC_MM_EDAC) += edac_core.o
-obj-$(CONFIG_EDAC_MCE) += edac_mce.o
edac_core-y := edac_mc.o edac_device.o edac_mc_sysfs.o edac_pci_sysfs.o
edac_core-y += edac_module.o edac_device_sysfs.o
@@ -29,6 +28,7 @@ obj-$(CONFIG_EDAC_I5100) += i5100_edac.o
obj-$(CONFIG_EDAC_I5400) += i5400_edac.o
obj-$(CONFIG_EDAC_I7300) += i7300_edac.o
obj-$(CONFIG_EDAC_I7CORE) += i7core_edac.o
+obj-$(CONFIG_EDAC_SBRIDGE) += sb_edac.o
obj-$(CONFIG_EDAC_E7XXX) += e7xxx_edac.o
obj-$(CONFIG_EDAC_E752X) += e752x_edac.o
obj-$(CONFIG_EDAC_I82443BXGX) += i82443bxgx_edac.o
diff --git a/drivers/edac/edac_core.h b/drivers/edac/edac_core.h
index 55b8278bb17..fe90cd4a7eb 100644
--- a/drivers/edac/edac_core.h
+++ b/drivers/edac/edac_core.h
@@ -34,11 +34,10 @@
#include <linux/platform_device.h>
#include <linux/sysdev.h>
#include <linux/workqueue.h>
+#include <linux/edac.h>
-#define EDAC_MC_LABEL_LEN 31
#define EDAC_DEVICE_NAME_LEN 31
#define EDAC_ATTRIB_VALUE_LEN 15
-#define MC_PROC_NAME_MAX_LEN 7
#if PAGE_SHIFT < 20
#define PAGES_TO_MiB(pages) ((pages) >> (20 - PAGE_SHIFT))
@@ -101,353 +100,6 @@ extern int edac_debug_level;
#define edac_dev_name(dev) (dev)->dev_name
-/* memory devices */
-enum dev_type {
- DEV_UNKNOWN = 0,
- DEV_X1,
- DEV_X2,
- DEV_X4,
- DEV_X8,
- DEV_X16,
- DEV_X32, /* Do these parts exist? */
- DEV_X64 /* Do these parts exist? */
-};
-
-#define DEV_FLAG_UNKNOWN BIT(DEV_UNKNOWN)
-#define DEV_FLAG_X1 BIT(DEV_X1)
-#define DEV_FLAG_X2 BIT(DEV_X2)
-#define DEV_FLAG_X4 BIT(DEV_X4)
-#define DEV_FLAG_X8 BIT(DEV_X8)
-#define DEV_FLAG_X16 BIT(DEV_X16)
-#define DEV_FLAG_X32 BIT(DEV_X32)
-#define DEV_FLAG_X64 BIT(DEV_X64)
-
-/* memory types */
-enum mem_type {
- MEM_EMPTY = 0, /* Empty csrow */
- MEM_RESERVED, /* Reserved csrow type */
- MEM_UNKNOWN, /* Unknown csrow type */
- MEM_FPM, /* Fast page mode */
- MEM_EDO, /* Extended data out */
- MEM_BEDO, /* Burst Extended data out */
- MEM_SDR, /* Single data rate SDRAM */
- MEM_RDR, /* Registered single data rate SDRAM */
- MEM_DDR, /* Double data rate SDRAM */
- MEM_RDDR, /* Registered Double data rate SDRAM */
- MEM_RMBS, /* Rambus DRAM */
- MEM_DDR2, /* DDR2 RAM */
- MEM_FB_DDR2, /* fully buffered DDR2 */
- MEM_RDDR2, /* Registered DDR2 RAM */
- MEM_XDR, /* Rambus XDR */
- MEM_DDR3, /* DDR3 RAM */
- MEM_RDDR3, /* Registered DDR3 RAM */
-};
-
-#define MEM_FLAG_EMPTY BIT(MEM_EMPTY)
-#define MEM_FLAG_RESERVED BIT(MEM_RESERVED)
-#define MEM_FLAG_UNKNOWN BIT(MEM_UNKNOWN)
-#define MEM_FLAG_FPM BIT(MEM_FPM)
-#define MEM_FLAG_EDO BIT(MEM_EDO)
-#define MEM_FLAG_BEDO BIT(MEM_BEDO)
-#define MEM_FLAG_SDR BIT(MEM_SDR)
-#define MEM_FLAG_RDR BIT(MEM_RDR)
-#define MEM_FLAG_DDR BIT(MEM_DDR)
-#define MEM_FLAG_RDDR BIT(MEM_RDDR)
-#define MEM_FLAG_RMBS BIT(MEM_RMBS)
-#define MEM_FLAG_DDR2 BIT(MEM_DDR2)
-#define MEM_FLAG_FB_DDR2 BIT(MEM_FB_DDR2)
-#define MEM_FLAG_RDDR2 BIT(MEM_RDDR2)
-#define MEM_FLAG_XDR BIT(MEM_XDR)
-#define MEM_FLAG_DDR3 BIT(MEM_DDR3)
-#define MEM_FLAG_RDDR3 BIT(MEM_RDDR3)
-
-/* chipset Error Detection and Correction capabilities and mode */
-enum edac_type {
- EDAC_UNKNOWN = 0, /* Unknown if ECC is available */
- EDAC_NONE, /* Doesn't support ECC */
- EDAC_RESERVED, /* Reserved ECC type */
- EDAC_PARITY, /* Detects parity errors */
- EDAC_EC, /* Error Checking - no correction */
- EDAC_SECDED, /* Single bit error correction, Double detection */
- EDAC_S2ECD2ED, /* Chipkill x2 devices - do these exist? */
- EDAC_S4ECD4ED, /* Chipkill x4 devices */
- EDAC_S8ECD8ED, /* Chipkill x8 devices */
- EDAC_S16ECD16ED, /* Chipkill x16 devices */
-};
-
-#define EDAC_FLAG_UNKNOWN BIT(EDAC_UNKNOWN)
-#define EDAC_FLAG_NONE BIT(EDAC_NONE)
-#define EDAC_FLAG_PARITY BIT(EDAC_PARITY)
-#define EDAC_FLAG_EC BIT(EDAC_EC)
-#define EDAC_FLAG_SECDED BIT(EDAC_SECDED)
-#define EDAC_FLAG_S2ECD2ED BIT(EDAC_S2ECD2ED)
-#define EDAC_FLAG_S4ECD4ED BIT(EDAC_S4ECD4ED)
-#define EDAC_FLAG_S8ECD8ED BIT(EDAC_S8ECD8ED)
-#define EDAC_FLAG_S16ECD16ED BIT(EDAC_S16ECD16ED)
-
-/* scrubbing capabilities */
-enum scrub_type {
- SCRUB_UNKNOWN = 0, /* Unknown if scrubber is available */
- SCRUB_NONE, /* No scrubber */
- SCRUB_SW_PROG, /* SW progressive (sequential) scrubbing */
- SCRUB_SW_SRC, /* Software scrub only errors */
- SCRUB_SW_PROG_SRC, /* Progressive software scrub from an error */
- SCRUB_SW_TUNABLE, /* Software scrub frequency is tunable */
- SCRUB_HW_PROG, /* HW progressive (sequential) scrubbing */
- SCRUB_HW_SRC, /* Hardware scrub only errors */
- SCRUB_HW_PROG_SRC, /* Progressive hardware scrub from an error */
- SCRUB_HW_TUNABLE /* Hardware scrub frequency is tunable */
-};
-
-#define SCRUB_FLAG_SW_PROG BIT(SCRUB_SW_PROG)
-#define SCRUB_FLAG_SW_SRC BIT(SCRUB_SW_SRC)
-#define SCRUB_FLAG_SW_PROG_SRC BIT(SCRUB_SW_PROG_SRC)
-#define SCRUB_FLAG_SW_TUN BIT(SCRUB_SW_SCRUB_TUNABLE)
-#define SCRUB_FLAG_HW_PROG BIT(SCRUB_HW_PROG)
-#define SCRUB_FLAG_HW_SRC BIT(SCRUB_HW_SRC)
-#define SCRUB_FLAG_HW_PROG_SRC BIT(SCRUB_HW_PROG_SRC)
-#define SCRUB_FLAG_HW_TUN BIT(SCRUB_HW_TUNABLE)
-
-/* FIXME - should have notify capabilities: NMI, LOG, PROC, etc */
-
-/* EDAC internal operation states */
-#define OP_ALLOC 0x100
-#define OP_RUNNING_POLL 0x201
-#define OP_RUNNING_INTERRUPT 0x202
-#define OP_RUNNING_POLL_INTR 0x203
-#define OP_OFFLINE 0x300
-
-/*
- * There are several things to be aware of that aren't at all obvious:
- *
- *
- * SOCKETS, SOCKET SETS, BANKS, ROWS, CHIP-SELECT ROWS, CHANNELS, etc..
- *
- * These are some of the many terms that are thrown about that don't always
- * mean what people think they mean (Inconceivable!). In the interest of
- * creating a common ground for discussion, terms and their definitions
- * will be established.
- *
- * Memory devices: The individual chip on a memory stick. These devices
- * commonly output 4 and 8 bits each. Grouping several
- * of these in parallel provides 64 bits which is common
- * for a memory stick.
- *
- * Memory Stick: A printed circuit board that aggregates multiple
- * memory devices in parallel. This is the atomic
- * memory component that is purchaseable by Joe consumer
- * and loaded into a memory socket.
- *
- * Socket: A physical connector on the motherboard that accepts
- * a single memory stick.
- *
- * Channel: Set of memory devices on a memory stick that must be
- * grouped in parallel with one or more additional
- * channels from other memory sticks. This parallel
- * grouping of the output from multiple channels are
- * necessary for the smallest granularity of memory access.
- * Some memory controllers are capable of single channel -
- * which means that memory sticks can be loaded
- * individually. Other memory controllers are only
- * capable of dual channel - which means that memory
- * sticks must be loaded as pairs (see "socket set").
- *
- * Chip-select row: All of the memory devices that are selected together.
- * for a single, minimum grain of memory access.
- * This selects all of the parallel memory devices across
- * all of the parallel channels. Common chip-select rows
- * for single channel are 64 bits, for dual channel 128
- * bits.
- *
- * Single-Ranked stick: A Single-ranked stick has 1 chip-select row of memory.
- * Motherboards commonly drive two chip-select pins to
- * a memory stick. A single-ranked stick, will occupy
- * only one of those rows. The other will be unused.
- *
- * Double-Ranked stick: A double-ranked stick has two chip-select rows which
- * access different sets of memory devices. The two
- * rows cannot be accessed concurrently.
- *
- * Double-sided stick: DEPRECATED TERM, see Double-Ranked stick.
- * A double-sided stick has two chip-select rows which
- * access different sets of memory devices. The two
- * rows cannot be accessed concurrently. "Double-sided"
- * is irrespective of the memory devices being mounted
- * on both sides of the memory stick.
- *
- * Socket set: All of the memory sticks that are required for
- * a single memory access or all of the memory sticks
- * spanned by a chip-select row. A single socket set
- * has two chip-select rows and if double-sided sticks
- * are used these will occupy those chip-select rows.
- *
- * Bank: This term is avoided because it is unclear when
- * needing to distinguish between chip-select rows and
- * socket sets.
- *
- * Controller pages:
- *
- * Physical pages:
- *
- * Virtual pages:
- *
- *
- * STRUCTURE ORGANIZATION AND CHOICES
- *
- *
- *
- * PS - I enjoyed writing all that about as much as you enjoyed reading it.
- */
-
-struct channel_info {
- int chan_idx; /* channel index */
- u32 ce_count; /* Correctable Errors for this CHANNEL */
- char label[EDAC_MC_LABEL_LEN + 1]; /* DIMM label on motherboard */
- struct csrow_info *csrow; /* the parent */
-};
-
-struct csrow_info {
- unsigned long first_page; /* first page number in dimm */
- unsigned long last_page; /* last page number in dimm */
- unsigned long page_mask; /* used for interleaving -
- * 0UL for non intlv
- */
- u32 nr_pages; /* number of pages in csrow */
- u32 grain; /* granularity of reported error in bytes */
- int csrow_idx; /* the chip-select row */
- enum dev_type dtype; /* memory device type */
- u32 ue_count; /* Uncorrectable Errors for this csrow */
- u32 ce_count; /* Correctable Errors for this csrow */
- enum mem_type mtype; /* memory csrow type */
- enum edac_type edac_mode; /* EDAC mode for this csrow */
- struct mem_ctl_info *mci; /* the parent */
-
- struct kobject kobj; /* sysfs kobject for this csrow */
-
- /* channel information for this csrow */
- u32 nr_channels;
- struct channel_info *channels;
-};
-
-struct mcidev_sysfs_group {
- const char *name; /* group name */
- const struct mcidev_sysfs_attribute *mcidev_attr; /* group attributes */
-};
-
-struct mcidev_sysfs_group_kobj {
- struct list_head list; /* list for all instances within a mc */
-
- struct kobject kobj; /* kobj for the group */
-
- const struct mcidev_sysfs_group *grp; /* group description table */
- struct mem_ctl_info *mci; /* the parent */
-};
-
-/* mcidev_sysfs_attribute structure
- * used for driver sysfs attributes and in mem_ctl_info
- * sysfs top level entries
- */
-struct mcidev_sysfs_attribute {
- /* It should use either attr or grp */
- struct attribute attr;
- const struct mcidev_sysfs_group *grp; /* Points to a group of attributes */
-
- /* Ops for show/store values at the attribute - not used on group */
- ssize_t (*show)(struct mem_ctl_info *,char *);
- ssize_t (*store)(struct mem_ctl_info *, const char *,size_t);
-};
-
-/* MEMORY controller information structure
- */
-struct mem_ctl_info {
- struct list_head link; /* for global list of mem_ctl_info structs */
-
- struct module *owner; /* Module owner of this control struct */
-
- unsigned long mtype_cap; /* memory types supported by mc */
- unsigned long edac_ctl_cap; /* Mem controller EDAC capabilities */
- unsigned long edac_cap; /* configuration capabilities - this is
- * closely related to edac_ctl_cap. The
- * difference is that the controller may be
- * capable of s4ecd4ed which would be listed
- * in edac_ctl_cap, but if channels aren't
- * capable of s4ecd4ed then the edac_cap would
- * not have that capability.
- */
- unsigned long scrub_cap; /* chipset scrub capabilities */
- enum scrub_type scrub_mode; /* current scrub mode */
-
- /* Translates sdram memory scrub rate given in bytes/sec to the
- internal representation and configures whatever else needs
- to be configured.
- */
- int (*set_sdram_scrub_rate) (struct mem_ctl_info * mci, u32 bw);
-
- /* Get the current sdram memory scrub rate from the internal
- representation and converts it to the closest matching
- bandwidth in bytes/sec.
- */
- int (*get_sdram_scrub_rate) (struct mem_ctl_info * mci);
-
-
- /* pointer to edac checking routine */
- void (*edac_check) (struct mem_ctl_info * mci);
-
- /*
- * Remaps memory pages: controller pages to physical pages.
- * For most MC's, this will be NULL.
- */
- /* FIXME - why not send the phys page to begin with? */
- unsigned long (*ctl_page_to_phys) (struct mem_ctl_info * mci,
- unsigned long page);
- int mc_idx;
- int nr_csrows;
- struct csrow_info *csrows;
- /*
- * FIXME - what about controllers on other busses? - IDs must be
- * unique. dev pointer should be sufficiently unique, but
- * BUS:SLOT.FUNC numbers may not be unique.
- */
- struct device *dev;
- const char *mod_name;
- const char *mod_ver;
- const char *ctl_name;
- const char *dev_name;
- char proc_name[MC_PROC_NAME_MAX_LEN + 1];
- void *pvt_info;
- u32 ue_noinfo_count; /* Uncorrectable Errors w/o info */
- u32 ce_noinfo_count; /* Correctable Errors w/o info */
- u32 ue_count; /* Total Uncorrectable Errors for this MC */
- u32 ce_count; /* Total Correctable Errors for this MC */
- unsigned long start_time; /* mci load start time (in jiffies) */
-
- struct completion complete;
-
- /* edac sysfs device control */
- struct kobject edac_mci_kobj;
-
- /* list for all grp instances within a mc */
- struct list_head grp_kobj_list;
-
- /* Additional top controller level attributes, but specified
- * by the low level driver.
- *
- * Set by the low level driver to provide attributes at the
- * controller level, same level as 'ue_count' and 'ce_count' above.
- * An array of structures, NULL terminated
- *
- * If attributes are desired, then set to array of attributes
- * If no attributes are desired, leave NULL
- */
- const struct mcidev_sysfs_attribute *mc_driver_sysfs_attributes;
-
- /* work struct for this MC */
- struct delayed_work work;
-
- /* the internal state of this controller instance */
- int op_state;
-};
-
/*
* The following are the structures to provide for a generic
* or abstract 'edac_device'. This set of structures and the
diff --git a/drivers/edac/edac_mce.c b/drivers/edac/edac_mce.c
deleted file mode 100644
index 9ccdc5b140e..00000000000
--- a/drivers/edac/edac_mce.c
+++ /dev/null
@@ -1,61 +0,0 @@
-/* Provides edac interface to mcelog events
- *
- * This file may be distributed under the terms of the
- * GNU General Public License version 2.
- *
- * Copyright (c) 2009 by:
- * Mauro Carvalho Chehab <mchehab@redhat.com>
- *
- * Red Hat Inc. http://www.redhat.com
- */
-
-#include <linux/module.h>
-#include <linux/edac_mce.h>
-#include <asm/mce.h>
-
-int edac_mce_enabled;
-EXPORT_SYMBOL_GPL(edac_mce_enabled);
-
-
-/*
- * Extension interface
- */
-
-static LIST_HEAD(edac_mce_list);
-static DEFINE_MUTEX(edac_mce_lock);
-
-int edac_mce_register(struct edac_mce *edac_mce)
-{
- mutex_lock(&edac_mce_lock);
- list_add_tail(&edac_mce->list, &edac_mce_list);
- mutex_unlock(&edac_mce_lock);
- return 0;
-}
-EXPORT_SYMBOL(edac_mce_register);
-
-void edac_mce_unregister(struct edac_mce *edac_mce)
-{
- mutex_lock(&edac_mce_lock);
- list_del(&edac_mce->list);
- mutex_unlock(&edac_mce_lock);
-}
-EXPORT_SYMBOL(edac_mce_unregister);
-
-int edac_mce_parse(struct mce *mce)
-{
- struct edac_mce *edac_mce;
-
- list_for_each_entry(edac_mce, &edac_mce_list, list) {
- if (edac_mce->check_error(edac_mce->priv, mce))
- return 1;
- }
-
- /* Nobody queued the error */
- return 0;
-}
-EXPORT_SYMBOL_GPL(edac_mce_parse);
-
-MODULE_LICENSE("GPL");
-MODULE_AUTHOR("Mauro Carvalho Chehab <mchehab@redhat.com>");
-MODULE_AUTHOR("Red Hat Inc. (http://www.redhat.com)");
-MODULE_DESCRIPTION("EDAC Driver for mcelog captured errors");
diff --git a/drivers/edac/i7300_edac.c b/drivers/edac/i7300_edac.c
index a76fe8366b6..6104dba380b 100644
--- a/drivers/edac/i7300_edac.c
+++ b/drivers/edac/i7300_edac.c
@@ -372,7 +372,7 @@ static const char *get_err_from_table(const char *table[], int size, int pos)
static void i7300_process_error_global(struct mem_ctl_info *mci)
{
struct i7300_pvt *pvt;
- u32 errnum, value;
+ u32 errnum, error_reg;
unsigned long errors;
const char *specific;
bool is_fatal;
@@ -381,9 +381,9 @@ static void i7300_process_error_global(struct mem_ctl_info *mci)
/* read in the 1st FATAL error register */
pci_read_config_dword(pvt->pci_dev_16_2_fsb_err_regs,
- FERR_GLOBAL_HI, &value);
- if (unlikely(value)) {
- errors = value;
+ FERR_GLOBAL_HI, &error_reg);
+ if (unlikely(error_reg)) {
+ errors = error_reg;
errnum = find_first_bit(&errors,
ARRAY_SIZE(ferr_global_hi_name));
specific = GET_ERR_FROM_TABLE(ferr_global_hi_name, errnum);
@@ -391,15 +391,15 @@ static void i7300_process_error_global(struct mem_ctl_info *mci)
/* Clear the error bit */
pci_write_config_dword(pvt->pci_dev_16_2_fsb_err_regs,
- FERR_GLOBAL_HI, value);
+ FERR_GLOBAL_HI, error_reg);
goto error_global;
}
pci_read_config_dword(pvt->pci_dev_16_2_fsb_err_regs,
- FERR_GLOBAL_LO, &value);
- if (unlikely(value)) {
- errors = value;
+ FERR_GLOBAL_LO, &error_reg);
+ if (unlikely(error_reg)) {
+ errors = error_reg;
errnum = find_first_bit(&errors,
ARRAY_SIZE(ferr_global_lo_name));
specific = GET_ERR_FROM_TABLE(ferr_global_lo_name, errnum);
@@ -407,7 +407,7 @@ static void i7300_process_error_global(struct mem_ctl_info *mci)
/* Clear the error bit */
pci_write_config_dword(pvt->pci_dev_16_2_fsb_err_regs,
- FERR_GLOBAL_LO, value);
+ FERR_GLOBAL_LO, error_reg);
goto error_global;
}
@@ -427,7 +427,7 @@ error_global:
static void i7300_process_fbd_error(struct mem_ctl_info *mci)
{
struct i7300_pvt *pvt;
- u32 errnum, value;
+ u32 errnum, value, error_reg;
u16 val16;
unsigned branch, channel, bank, rank, cas, ras;
u32 syndrome;
@@ -440,14 +440,14 @@ static void i7300_process_fbd_error(struct mem_ctl_info *mci)
/* read in the 1st FATAL error register */
pci_read_config_dword(pvt->pci_dev_16_1_fsb_addr_map,
- FERR_FAT_FBD, &value);
- if (unlikely(value & FERR_FAT_FBD_ERR_MASK)) {
- errors = value & FERR_FAT_FBD_ERR_MASK ;
+ FERR_FAT_FBD, &error_reg);
+ if (unlikely(error_reg & FERR_FAT_FBD_ERR_MASK)) {
+ errors = error_reg & FERR_FAT_FBD_ERR_MASK ;
errnum = find_first_bit(&errors,
ARRAY_SIZE(ferr_fat_fbd_name));
specific = GET_ERR_FROM_TABLE(ferr_fat_fbd_name, errnum);
+ branch = (GET_FBD_FAT_IDX(error_reg) == 2) ? 1 : 0;
- branch = (GET_FBD_FAT_IDX(value) == 2) ? 1 : 0;
pci_read_config_word(pvt->pci_dev_16_1_fsb_addr_map,
NRECMEMA, &val16);
bank = NRECMEMA_BANK(val16);
@@ -455,11 +455,14 @@ static void i7300_process_fbd_error(struct mem_ctl_info *mci)
pci_read_config_dword(pvt->pci_dev_16_1_fsb_addr_map,
NRECMEMB, &value);
-
is_wr = NRECMEMB_IS_WR(value);
cas = NRECMEMB_CAS(value);
ras = NRECMEMB_RAS(value);
+ /* Clean the error register */
+ pci_write_config_dword(pvt->pci_dev_16_1_fsb_addr_map,
+ FERR_FAT_FBD, error_reg);
+
snprintf(pvt->tmp_prt_buffer, PAGE_SIZE,
"FATAL (Branch=%d DRAM-Bank=%d %s "
"RAS=%d CAS=%d Err=0x%lx (%s))",
@@ -476,21 +479,17 @@ static void i7300_process_fbd_error(struct mem_ctl_info *mci)
/* read in the 1st NON-FATAL error register */
pci_read_config_dword(pvt->pci_dev_16_1_fsb_addr_map,
- FERR_NF_FBD, &value);
- if (unlikely(value & FERR_NF_FBD_ERR_MASK)) {
- errors = value & FERR_NF_FBD_ERR_MASK;
+ FERR_NF_FBD, &error_reg);
+ if (unlikely(error_reg & FERR_NF_FBD_ERR_MASK)) {
+ errors = error_reg & FERR_NF_FBD_ERR_MASK;
errnum = find_first_bit(&errors,
ARRAY_SIZE(ferr_nf_fbd_name));
specific = GET_ERR_FROM_TABLE(ferr_nf_fbd_name, errnum);
-
- /* Clear the error bit */
- pci_write_config_dword(pvt->pci_dev_16_2_fsb_err_regs,
- FERR_GLOBAL_LO, value);
+ branch = (GET_FBD_FAT_IDX(error_reg) == 2) ? 1 : 0;
pci_read_config_dword(pvt->pci_dev_16_1_fsb_addr_map,
REDMEMA, &syndrome);
- branch = (GET_FBD_FAT_IDX(value) == 2) ? 1 : 0;
pci_read_config_word(pvt->pci_dev_16_1_fsb_addr_map,
RECMEMA, &val16);
bank = RECMEMA_BANK(val16);
@@ -498,18 +497,20 @@ static void i7300_process_fbd_error(struct mem_ctl_info *mci)
pci_read_config_dword(pvt->pci_dev_16_1_fsb_addr_map,
RECMEMB, &value);
-
is_wr = RECMEMB_IS_WR(value);
cas = RECMEMB_CAS(value);
ras = RECMEMB_RAS(value);
pci_read_config_dword(pvt->pci_dev_16_1_fsb_addr_map,
REDMEMB, &value);
-
channel = (branch << 1);
if (IS_SECOND_CH(value))
channel++;
+ /* Clear the error bit */
+ pci_write_config_dword(pvt->pci_dev_16_1_fsb_addr_map,
+ FERR_NF_FBD, error_reg);
+
/* Form out message */
snprintf(pvt->tmp_prt_buffer, PAGE_SIZE,
"Corrected error (Branch=%d, Channel %d), "
diff --git a/drivers/edac/i7core_edac.c b/drivers/edac/i7core_edac.c
index f6cf448d69b..70ad8923f1d 100644
--- a/drivers/edac/i7core_edac.c
+++ b/drivers/edac/i7core_edac.c
@@ -31,11 +31,13 @@
#include <linux/pci_ids.h>
#include <linux/slab.h>
#include <linux/delay.h>
+#include <linux/dmi.h>
#include <linux/edac.h>
#include <linux/mmzone.h>
-#include <linux/edac_mce.h>
#include <linux/smp.h>
+#include <asm/mce.h>
#include <asm/processor.h>
+#include <asm/div64.h>
#include "edac_core.h"
@@ -78,6 +80,8 @@ MODULE_PARM_DESC(use_pci_fixup, "Enable PCI fixup to seek for hidden devices");
/* OFFSETS for Device 0 Function 0 */
#define MC_CFG_CONTROL 0x90
+ #define MC_CFG_UNLOCK 0x02
+ #define MC_CFG_LOCK 0x00
/* OFFSETS for Device 3 Function 0 */
@@ -98,6 +102,15 @@ MODULE_PARM_DESC(use_pci_fixup, "Enable PCI fixup to seek for hidden devices");
#define DIMM0_COR_ERR(r) ((r) & 0x7fff)
/* OFFSETS for Device 3 Function 2, as inicated on Xeon 5500 datasheet */
+#define MC_SSRCONTROL 0x48
+ #define SSR_MODE_DISABLE 0x00
+ #define SSR_MODE_ENABLE 0x01
+ #define SSR_MODE_MASK 0x03
+
+#define MC_SCRUB_CONTROL 0x4c
+ #define STARTSCRUB (1 << 24)
+ #define SCRUBINTERVAL_MASK 0xffffff
+
#define MC_COR_ECC_CNT_0 0x80
#define MC_COR_ECC_CNT_1 0x84
#define MC_COR_ECC_CNT_2 0x88
@@ -253,10 +266,7 @@ struct i7core_pvt {
unsigned long rdimm_ce_count[NUM_CHANS][MAX_DIMMS];
int rdimm_last_ce_count[NUM_CHANS][MAX_DIMMS];
- unsigned int is_registered;
-
- /* mcelog glue */
- struct edac_mce edac_mce;
+ bool is_registered, enable_scrub;
/* Fifo double buffers */
struct mce mce_entry[MCE_LOG_LEN];
@@ -268,6 +278,9 @@ struct i7core_pvt {
/* Count indicator to show errors not got */
unsigned mce_overrun;
+ /* DCLK Frequency used for computing scrub rate */
+ int dclk_freq;
+
/* Struct to control EDAC polling */
struct edac_pci_ctl_info *i7core_pci;
};
@@ -281,8 +294,7 @@ static const struct pci_id_descr pci_dev_descr_i7core_nehalem[] = {
/* Memory controller */
{ PCI_DESCR(3, 0, PCI_DEVICE_ID_INTEL_I7_MCR) },
{ PCI_DESCR(3, 1, PCI_DEVICE_ID_INTEL_I7_MC_TAD) },
-
- /* Exists only for RDIMM */
+ /* Exists only for RDIMM */
{ PCI_DESCR(3, 2, PCI_DEVICE_ID_INTEL_I7_MC_RAS), .optional = 1 },
{ PCI_DESCR(3, 4, PCI_DEVICE_ID_INTEL_I7_MC_TEST) },
@@ -303,6 +315,16 @@ static const struct pci_id_descr pci_dev_descr_i7core_nehalem[] = {
{ PCI_DESCR(6, 1, PCI_DEVICE_ID_INTEL_I7_MC_CH2_ADDR) },
{ PCI_DESCR(6, 2, PCI_DEVICE_ID_INTEL_I7_MC_CH2_RANK) },
{ PCI_DESCR(6, 3, PCI_DEVICE_ID_INTEL_I7_MC_CH2_TC) },
+
+ /* Generic Non-core registers */
+ /*
+ * This is the PCI device on i7core and on Xeon 35xx (8086:2c41)
+ * On Xeon 55xx, however, it has a different id (8086:2c40). So,
+ * the probing code needs to test for the other address in case of
+ * failure of this one
+ */
+ { PCI_DESCR(0, 0, PCI_DEVICE_ID_INTEL_I7_NONCORE) },
+
};
static const struct pci_id_descr pci_dev_descr_lynnfield[] = {
@@ -319,6 +341,12 @@ static const struct pci_id_descr pci_dev_descr_lynnfield[] = {
{ PCI_DESCR( 5, 1, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH1_ADDR) },
{ PCI_DESCR( 5, 2, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH1_RANK) },
{ PCI_DESCR( 5, 3, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH1_TC) },
+
+ /*
+ * This is the PCI device has an alternate address on some
+ * processors like Core i7 860
+ */
+ { PCI_DESCR( 0, 0, PCI_DEVICE_ID_INTEL_LYNNFIELD_NONCORE) },
};
static const struct pci_id_descr pci_dev_descr_i7core_westmere[] = {
@@ -346,6 +374,10 @@ static const struct pci_id_descr pci_dev_descr_i7core_westmere[] = {
{ PCI_DESCR(6, 1, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH2_ADDR_REV2) },
{ PCI_DESCR(6, 2, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH2_RANK_REV2) },
{ PCI_DESCR(6, 3, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH2_TC_REV2) },
+
+ /* Generic Non-core registers */
+ { PCI_DESCR(0, 0, PCI_DEVICE_ID_INTEL_LYNNFIELD_NONCORE_REV2) },
+
};
#define PCI_ID_TABLE_ENTRY(A) { .descr=A, .n_devs = ARRAY_SIZE(A) }
@@ -714,6 +746,10 @@ static int get_dimm_config(const struct mem_ctl_info *mci)
csr->edac_mode = mode;
csr->mtype = mtype;
+ snprintf(csr->channels[0].label,
+ sizeof(csr->channels[0].label),
+ "CPU#%uChannel#%u_DIMM#%u",
+ pvt->i7core_dev->socket, i, j);
csrow++;
}
@@ -731,7 +767,7 @@ static int get_dimm_config(const struct mem_ctl_info *mci)
debugf1("\t\t%#x\t%#x\t%#x\n",
(value[j] >> 27) & 0x1,
(value[j] >> 24) & 0x7,
- (value[j] && ((1 << 24) - 1)));
+ (value[j] & ((1 << 24) - 1)));
}
return 0;
@@ -1324,6 +1360,20 @@ static int i7core_get_onedevice(struct pci_dev **prev,
pdev = pci_get_device(PCI_VENDOR_ID_INTEL,
dev_descr->dev_id, *prev);
+ /*
+ * On Xeon 55xx, the Intel Quckpath Arch Generic Non-core regs
+ * is at addr 8086:2c40, instead of 8086:2c41. So, we need
+ * to probe for the alternate address in case of failure
+ */
+ if (dev_descr->dev_id == PCI_DEVICE_ID_INTEL_I7_NONCORE && !pdev)
+ pdev = pci_get_device(PCI_VENDOR_ID_INTEL,
+ PCI_DEVICE_ID_INTEL_I7_NONCORE_ALT, *prev);
+
+ if (dev_descr->dev_id == PCI_DEVICE_ID_INTEL_LYNNFIELD_NONCORE && !pdev)
+ pdev = pci_get_device(PCI_VENDOR_ID_INTEL,
+ PCI_DEVICE_ID_INTEL_LYNNFIELD_NONCORE_ALT,
+ *prev);
+
if (!pdev) {
if (*prev) {
*prev = pdev;
@@ -1444,8 +1494,10 @@ static int mci_bind_devs(struct mem_ctl_info *mci,
struct i7core_pvt *pvt = mci->pvt_info;
struct pci_dev *pdev;
int i, func, slot;
+ char *family;
- pvt->is_registered = 0;
+ pvt->is_registered = false;
+ pvt->enable_scrub = false;
for (i = 0; i < i7core_dev->n_devs; i++) {
pdev = i7core_dev->pdev[i];
if (!pdev)
@@ -1461,9 +1513,37 @@ static int mci_bind_devs(struct mem_ctl_info *mci,
if (unlikely(func > MAX_CHAN_FUNC))
goto error;
pvt->pci_ch[slot - 4][func] = pdev;
- } else if (!slot && !func)
+ } else if (!slot && !func) {
pvt->pci_noncore = pdev;
- else
+
+ /* Detect the processor family */
+ switch (pdev->device) {
+ case PCI_DEVICE_ID_INTEL_I7_NONCORE:
+ family = "Xeon 35xx/ i7core";
+ pvt->enable_scrub = false;
+ break;
+ case PCI_DEVICE_ID_INTEL_LYNNFIELD_NONCORE_ALT:
+ family = "i7-800/i5-700";
+ pvt->enable_scrub = false;
+ break;
+ case PCI_DEVICE_ID_INTEL_LYNNFIELD_NONCORE:
+ family = "Xeon 34xx";
+ pvt->enable_scrub = false;
+ break;
+ case PCI_DEVICE_ID_INTEL_I7_NONCORE_ALT:
+ family = "Xeon 55xx";
+ pvt->enable_scrub = true;
+ break;
+ case PCI_DEVICE_ID_INTEL_LYNNFIELD_NONCORE_REV2:
+ family = "Xeon 56xx / i7-900";
+ pvt->enable_scrub = true;
+ break;
+ default:
+ family = "unknown";
+ pvt->enable_scrub = false;
+ }
+ debugf0("Detected a processor type %s\n", family);
+ } else
goto error;
debugf0("Associated fn %d.%d, dev = %p, socket %d\n",
@@ -1472,7 +1552,7 @@ static int mci_bind_devs(struct mem_ctl_info *mci,
if (PCI_SLOT(pdev->devfn) == 3 &&
PCI_FUNC(pdev->devfn) == 2)
- pvt->is_registered = 1;
+ pvt->is_registered = true;
}
return 0;
@@ -1826,33 +1906,43 @@ check_ce_error:
* WARNING: As this routine should be called at NMI time, extra care should
* be taken to avoid deadlocks, and to be as fast as possible.
*/
-static int i7core_mce_check_error(void *priv, struct mce *mce)
+static int i7core_mce_check_error(struct notifier_block *nb, unsigned long val,
+ void *data)
{
- struct mem_ctl_info *mci = priv;
- struct i7core_pvt *pvt = mci->pvt_info;
+ struct mce *mce = (struct mce *)data;
+ struct i7core_dev *i7_dev;
+ struct mem_ctl_info *mci;
+ struct i7core_pvt *pvt;
+
+ i7_dev = get_i7core_dev(mce->socketid);
+ if (!i7_dev)
+ return NOTIFY_BAD;
+
+ mci = i7_dev->mci;
+ pvt = mci->pvt_info;
/*
* Just let mcelog handle it if the error is
* outside the memory controller
*/
if (((mce->status & 0xffff) >> 7) != 1)
- return 0;
+ return NOTIFY_DONE;
/* Bank 8 registers are the only ones that we know how to handle */
if (mce->bank != 8)
- return 0;
+ return NOTIFY_DONE;
#ifdef CONFIG_SMP
/* Only handle if it is the right mc controller */
- if (cpu_data(mce->cpu).phys_proc_id != pvt->i7core_dev->socket)
- return 0;
+ if (mce->socketid != pvt->i7core_dev->socket)
+ return NOTIFY_DONE;
#endif
smp_rmb();
if ((pvt->mce_out + 1) % MCE_LOG_LEN == pvt->mce_in) {
smp_wmb();
pvt->mce_overrun++;
- return 0;
+ return NOTIFY_DONE;
}
/* Copy memory error at the ringbuffer */
@@ -1865,7 +1955,240 @@ static int i7core_mce_check_error(void *priv, struct mce *mce)
i7core_check_error(mci);
/* Advise mcelog that the errors were handled */
- return 1;
+ return NOTIFY_STOP;
+}
+
+static struct notifier_block i7_mce_dec = {
+ .notifier_call = i7core_mce_check_error,
+};
+
+struct memdev_dmi_entry {
+ u8 type;
+ u8 length;
+ u16 handle;
+ u16 phys_mem_array_handle;
+ u16 mem_err_info_handle;
+ u16 total_width;
+ u16 data_width;
+ u16 size;
+ u8 form;
+ u8 device_set;
+ u8 device_locator;
+ u8 bank_locator;
+ u8 memory_type;
+ u16 type_detail;
+ u16 speed;
+ u8 manufacturer;
+ u8 serial_number;
+ u8 asset_tag;
+ u8 part_number;
+ u8 attributes;
+ u32 extended_size;
+ u16 conf_mem_clk_speed;
+} __attribute__((__packed__));
+
+
+/*
+ * Decode the DRAM Clock Frequency, be paranoid, make sure that all
+ * memory devices show the same speed, and if they don't then consider
+ * all speeds to be invalid.
+ */
+static void decode_dclk(const struct dmi_header *dh, void *_dclk_freq)
+{
+ int *dclk_freq = _dclk_freq;