diff options
-rw-r--r-- | Documentation/edac.txt | 152 | ||||
-rw-r--r-- | arch/x86/include/asm/pci_x86.h | 2 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/mcheck/mce.c | 10 | ||||
-rw-r--r-- | arch/x86/pci/legacy.c | 42 | ||||
-rw-r--r-- | drivers/edac/Kconfig | 13 | ||||
-rw-r--r-- | drivers/edac/Makefile | 2 | ||||
-rw-r--r-- | drivers/edac/edac_core.h | 23 | ||||
-rw-r--r-- | drivers/edac/edac_mc_sysfs.c | 175 | ||||
-rw-r--r-- | drivers/edac/edac_mce.c | 61 | ||||
-rw-r--r-- | drivers/edac/i7core_edac.c | 2078 | ||||
-rw-r--r-- | include/linux/edac_mce.h | 31 | ||||
-rw-r--r-- | include/linux/pci.h | 1 | ||||
-rw-r--r-- | include/linux/pci_ids.h | 52 |
13 files changed, 2598 insertions, 44 deletions
diff --git a/Documentation/edac.txt b/Documentation/edac.txt index 79c53322376..0b875e8da96 100644 --- a/Documentation/edac.txt +++ b/Documentation/edac.txt @@ -6,6 +6,8 @@ Written by Doug Thompson <dougthompson@xmission.com> 7 Dec 2005 17 Jul 2007 Updated +(c) Mauro Carvalho Chehab <mchehab@redhat.com> +05 Aug 2009 Nehalem interface EDAC is maintained and written by: @@ -717,3 +719,153 @@ unique drivers for their hardware systems. The 'test_device_edac' sample driver is located at the bluesmoke.sourceforge.net project site for EDAC. +======================================================================= +NEHALEM USAGE OF EDAC APIs + +This chapter documents some EXPERIMENTAL mappings for EDAC API to handle +Nehalem EDAC driver. They will likely be changed on future versions +of the driver. + +Due to the way Nehalem exports Memory Controller data, some adjustments +were done at i7core_edac driver. This chapter will cover those differences + +1) On Nehalem, there are one Memory Controller per Quick Patch Interconnect + (QPI). At the driver, the term "socket" means one QPI. This is + associated with a physical CPU socket. + + Each MC have 3 physical read channels, 3 physical write channels and + 3 logic channels. The driver currenty sees it as just 3 channels. + Each channel can have up to 3 DIMMs. + + The minimum known unity is DIMMs. There are no information about csrows. + As EDAC API maps the minimum unity is csrows, the driver sequencially + maps channel/dimm into different csrows. + + For example, suposing the following layout: + Ch0 phy rd0, wr0 (0x063f4031): 2 ranks, UDIMMs + dimm 0 1024 Mb offset: 0, bank: 8, rank: 1, row: 0x4000, col: 0x400 + dimm 1 1024 Mb offset: 4, bank: 8, rank: 1, row: 0x4000, col: 0x400 + Ch1 phy rd1, wr1 (0x063f4031): 2 ranks, UDIMMs + dimm 0 1024 Mb offset: 0, bank: 8, rank: 1, row: 0x4000, col: 0x400 + Ch2 phy rd3, wr3 (0x063f4031): 2 ranks, UDIMMs + dimm 0 1024 Mb offset: 0, bank: 8, rank: 1, row: 0x4000, col: 0x400 + The driver will map it as: + csrow0: channel 0, dimm0 + csrow1: channel 0, dimm1 + csrow2: channel 1, dimm0 + csrow3: channel 2, dimm0 + +exports one + DIMM per csrow. + + Each QPI is exported as a different memory controller. + +2) Nehalem MC has the hability to generate errors. The driver implements this + functionality via some error injection nodes: + + For injecting a memory error, there are some sysfs nodes, under + /sys/devices/system/edac/mc/mc?/: + + inject_addrmatch/*: + Controls the error injection mask register. It is possible to specify + several characteristics of the address to match an error code: + dimm = the affected dimm. Numbers are relative to a channel; + rank = the memory rank; + channel = the channel that will generate an error; + bank = the affected bank; + page = the page address; + column (or col) = the address column. + each of the above values can be set to "any" to match any valid value. + + At driver init, all values are set to any. + + For example, to generate an error at rank 1 of dimm 2, for any channel, + any bank, any page, any column: + echo 2 >/sys/devices/system/edac/mc/mc0/inject_addrmatch/dimm + echo 1 >/sys/devices/system/edac/mc/mc0/inject_addrmatch/rank + + To return to the default behaviour of matching any, you can do: + echo any >/sys/devices/system/edac/mc/mc0/inject_addrmatch/dimm + echo any >/sys/devices/system/edac/mc/mc0/inject_addrmatch/rank + + inject_eccmask: + specifies what bits will have troubles, + + inject_section: + specifies what ECC cache section will get the error: + 3 for both + 2 for the highest + 1 for the lowest + + inject_type: + specifies the type of error, being a combination of the following bits: + bit 0 - repeat + bit 1 - ecc + bit 2 - parity + + inject_enable starts the error generation when something different + than 0 is written. + + All inject vars can be read. root permission is needed for write. + + Datasheet states that the error will only be generated after a write on an + address that matches inject_addrmatch. It seems, however, that reading will + also produce an error. + + For example, the following code will generate an error for any write access + at socket 0, on any DIMM/address on channel 2: + + echo 2 >/sys/devices/system/edac/mc/mc0/inject_addrmatch/channel + echo 2 >/sys/devices/system/edac/mc/mc0/inject_type + echo 64 >/sys/devices/system/edac/mc/mc0/inject_eccmask + echo 3 >/sys/devices/system/edac/mc/mc0/inject_section + echo 1 >/sys/devices/system/edac/mc/mc0/inject_enable + dd if=/dev/mem of=/dev/null seek=16k bs=4k count=1 >& /dev/null + + For socket 1, it is needed to replace "mc0" by "mc1" at the above + commands. + + The generated error message will look like: + + EDAC MC0: UE row 0, channel-a= 0 channel-b= 0 labels "-": NON_FATAL (addr = 0x0075b980, socket=0, Dimm=0, Channel=2, syndrome=0x00000040, count=1, Err=8c0000400001009f:4000080482 (read error: read ECC error)) + +3) Nehalem specific Corrected Error memory counters + + Nehalem have some registers to count memory errors. The driver uses those + registers to report Corrected Errors on devices with Registered Dimms. + + However, those counters don't work with Unregistered Dimms. As the chipset + offers some counters that also work with UDIMMS (but with a worse level of + granularity than the default ones), the driver exposes those registers for + UDIMM memories. + + They can be read by looking at the contents of all_channel_counts/ + + $ for i in /sys/devices/system/edac/mc/mc0/all_channel_counts/*; do echo $i; cat $i; done + /sys/devices/system/edac/mc/mc0/all_channel_counts/udimm0 + 0 + /sys/devices/system/edac/mc/mc0/all_channel_counts/udimm1 + 0 + /sys/devices/system/edac/mc/mc0/all_channel_counts/udimm2 + 0 + + What happens here is that errors on different csrows, but at the same + dimm number will increment the same counter. + So, in this memory mapping: + csrow0: channel 0, dimm0 + csrow1: channel 0, dimm1 + csrow2: channel 1, dimm0 + csrow3: channel 2, dimm0 + The hardware will increment udimm0 for an error at the first dimm at either + csrow0, csrow2 or csrow3; + The hardware will increment udimm1 for an error at the second dimm at either + csrow0, csrow2 or csrow3; + The hardware will increment udimm2 for an error at the third dimm at either + csrow0, csrow2 or csrow3; + +4) Standard error counters + + The standard error counters are generated when an mcelog error is received + by the driver. Since, with udimm, this is counted by software, it is + possible that some errors could be lost. With rdimm's, they displays the + contents of the registers diff --git a/arch/x86/include/asm/pci_x86.h b/arch/x86/include/asm/pci_x86.h index 8d8797eae5d..cd2a31dc5fb 100644 --- a/arch/x86/include/asm/pci_x86.h +++ b/arch/x86/include/asm/pci_x86.h @@ -53,6 +53,8 @@ extern int pcibios_last_bus; extern struct pci_bus *pci_root_bus; extern struct pci_ops pci_root_ops; +void pcibios_scan_specific_bus(int busn); + /* pci-irq.c */ struct irq_info { diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c index 707165dbc20..18cc4256225 100644 --- a/arch/x86/kernel/cpu/mcheck/mce.c +++ b/arch/x86/kernel/cpu/mcheck/mce.c @@ -36,6 +36,7 @@ #include <linux/fs.h> #include <linux/mm.h> #include <linux/debugfs.h> +#include <linux/edac_mce.h> #include <asm/processor.h> #include <asm/hw_irq.h> @@ -169,6 +170,15 @@ void mce_log(struct mce *mce) entry = rcu_dereference_check_mce(mcelog.next); for (;;) { /* + * If edac_mce is enabled, it will check the error type + * and will process it, if it is a known error. + * Otherwise, the error will be sent through mcelog + * interface + */ + if (edac_mce_parse(mce)) + return; + + /* * When the buffer fills up discard new entries. * Assume that the earlier errors are the more * interesting ones: diff --git a/arch/x86/pci/legacy.c b/arch/x86/pci/legacy.c index 0db5eaf5456..8d460eaf524 100644 --- a/arch/x86/pci/legacy.c +++ b/arch/x86/pci/legacy.c @@ -11,28 +11,14 @@ */ static void __devinit pcibios_fixup_peer_bridges(void) { - int n, devfn; - long node; + int n; if (pcibios_last_bus <= 0 || pcibios_last_bus > 0xff) return; DBG("PCI: Peer bridge fixup\n"); - for (n=0; n <= pcibios_last_bus; n++) { - u32 l; - if (pci_find_bus(0, n)) - continue; - node = get_mp_bus_to_node(n); - for (devfn = 0; devfn < 256; devfn += 8) { - if (!raw_pci_read(0, n, devfn, PCI_VENDOR_ID, 2, &l) && - l != 0x0000 && l != 0xffff) { - DBG("Found device at %02x:%02x [%04x]\n", n, devfn, l); - printk(KERN_INFO "PCI: Discovered peer bus %02x\n", n); - pci_scan_bus_on_node(n, &pci_root_ops, node); - break; - } - } - } + for (n=0; n <= pcibios_last_bus; n++) + pcibios_scan_specific_bus(n); } int __init pci_legacy_init(void) @@ -50,6 +36,28 @@ int __init pci_legacy_init(void) return 0; } +void pcibios_scan_specific_bus(int busn) +{ + int devfn; + long node; + u32 l; + + if (pci_find_bus(0, busn)) + return; + + node = get_mp_bus_to_node(busn); + for (devfn = 0; devfn < 256; devfn += 8) { + if (!raw_pci_read(0, busn, devfn, PCI_VENDOR_ID, 2, &l) && + l != 0x0000 && l != 0xffff) { + DBG("Found device at %02x:%02x [%04x]\n", busn, devfn, l); + printk(KERN_INFO "PCI: Discovered peer bus %02x\n", busn); + pci_scan_bus_on_node(busn, &pci_root_ops, node); + return; + } + } +} +EXPORT_SYMBOL_GPL(pcibios_scan_specific_bus); + int __init pci_subsys_init(void) { /* diff --git a/drivers/edac/Kconfig b/drivers/edac/Kconfig index 55c9c59b3f7..aedef7941b2 100644 --- a/drivers/edac/Kconfig +++ b/drivers/edac/Kconfig @@ -69,6 +69,9 @@ config EDAC_MM_EDAC occurred so that a particular failing memory module can be replaced. If unsure, select 'Y'. +config EDAC_MCE + bool + config EDAC_AMD64 tristate "AMD64 (Opteron, Athlon64) K8, F10h, F11h" depends on EDAC_MM_EDAC && K8_NB && X86_64 && PCI && EDAC_DECODE_MCE @@ -166,6 +169,16 @@ config EDAC_I5400 Support for error detection and correction the Intel i5400 MCH chipset (Seaburg). +config EDAC_I7CORE + tristate "Intel i7 Core (Nehalem) processors" + depends on EDAC_MM_EDAC && PCI && X86 + select EDAC_MCE + help + Support for error detection and correction the Intel + i7 Core (Nehalem) Integrated Memory Controller that exists on + newer processors like i7 Core, i7 Core Extreme, Xeon 35xx + and Xeon 55xx processors. + config EDAC_I82860 tristate "Intel 82860" depends on EDAC_MM_EDAC && PCI && X86_32 diff --git a/drivers/edac/Makefile b/drivers/edac/Makefile index bc5dc232a0f..ca6b1bb24cc 100644 --- a/drivers/edac/Makefile +++ b/drivers/edac/Makefile @@ -8,6 +8,7 @@ obj-$(CONFIG_EDAC) := edac_stub.o obj-$(CONFIG_EDAC_MM_EDAC) += edac_core.o +obj-$(CONFIG_EDAC_MCE) += edac_mce.o edac_core-objs := edac_mc.o edac_device.o edac_mc_sysfs.o edac_pci_sysfs.o edac_core-objs += edac_module.o edac_device_sysfs.o @@ -23,6 +24,7 @@ obj-$(CONFIG_EDAC_CPC925) += cpc925_edac.o obj-$(CONFIG_EDAC_I5000) += i5000_edac.o obj-$(CONFIG_EDAC_I5100) += i5100_edac.o obj-$(CONFIG_EDAC_I5400) += i5400_edac.o +obj-$(CONFIG_EDAC_I7CORE) += i7core_edac.o obj-$(CONFIG_EDAC_E7XXX) += e7xxx_edac.o obj-$(CONFIG_EDAC_E752X) += e752x_edac.o obj-$(CONFIG_EDAC_I82443BXGX) += i82443bxgx_edac.o diff --git a/drivers/edac/edac_core.h b/drivers/edac/edac_core.h index 001b2e797fb..efca9343d26 100644 --- a/drivers/edac/edac_core.h +++ b/drivers/edac/edac_core.h @@ -341,12 +341,30 @@ struct csrow_info { struct channel_info *channels; }; +struct mcidev_sysfs_group { + const char *name; /* group name */ + struct mcidev_sysfs_attribute *mcidev_attr; /* group attributes */ +}; + +struct mcidev_sysfs_group_kobj { + struct list_head list; /* list for all instances within a mc */ + + struct kobject kobj; /* kobj for the group */ + + struct mcidev_sysfs_group *grp; /* group description table */ + struct mem_ctl_info *mci; /* the parent */ +}; + /* mcidev_sysfs_attribute structure * used for driver sysfs attributes and in mem_ctl_info * sysfs top level entries */ struct mcidev_sysfs_attribute { - struct attribute attr; + /* It should use either attr or grp */ + struct attribute attr; + struct mcidev_sysfs_group *grp; /* Points to a group of attributes */ + + /* Ops for show/store values at the attribute - not used on group */ ssize_t (*show)(struct mem_ctl_info *,char *); ssize_t (*store)(struct mem_ctl_info *, const char *,size_t); }; @@ -424,6 +442,9 @@ struct mem_ctl_info { /* edac sysfs device control */ struct kobject edac_mci_kobj; + /* list for all grp instances within a mc */ + struct list_head grp_kobj_list; + /* Additional top controller level attributes, but specified * by the low level driver. * diff --git a/drivers/edac/edac_mc_sysfs.c b/drivers/edac/edac_mc_sysfs.c index 418b65f1a1d..c200c2fd43e 100644 --- a/drivers/edac/edac_mc_sysfs.c +++ b/drivers/edac/edac_mc_sysfs.c @@ -557,6 +557,8 @@ static ssize_t mcidev_show(struct kobject *kobj, struct attribute *attr, struct mem_ctl_info *mem_ctl_info = to_mci(kobj); struct mcidev_sysfs_attribute *mcidev_attr = to_mcidev_attr(attr); + debugf1("%s() mem_ctl_info %p\n", __func__, mem_ctl_info); + if (mcidev_attr->show) return mcidev_attr->show(mem_ctl_info, buffer); @@ -569,6 +571,8 @@ static ssize_t mcidev_store(struct kobject *kobj, struct attribute *attr, struct mem_ctl_info *mem_ctl_info = to_mci(kobj); struct mcidev_sysfs_attribute *mcidev_attr = to_mcidev_attr(attr); + debugf1("%s() mem_ctl_info %p\n", __func__, mem_ctl_info); + if (mcidev_attr->store) return mcidev_attr->store(mem_ctl_info, buffer, count); @@ -726,28 +730,118 @@ void edac_mc_unregister_sysfs_main_kobj(struct mem_ctl_info *mci) #define EDAC_DEVICE_SYMLINK "device" +#define grp_to_mci(k) (container_of(k, struct mcidev_sysfs_group_kobj, kobj)->mci) + +/* MCI show/store functions for top most object */ +static ssize_t inst_grp_show(struct kobject *kobj, struct attribute *attr, + char *buffer) +{ + struct mem_ctl_info *mem_ctl_info = grp_to_mci(kobj); + struct mcidev_sysfs_attribute *mcidev_attr = to_mcidev_attr(attr); + + debugf1("%s() mem_ctl_info %p\n", __func__, mem_ctl_info); + + if (mcidev_attr->show) + return mcidev_attr->show(mem_ctl_info, buffer); + + return -EIO; +} + +static ssize_t inst_grp_store(struct kobject *kobj, struct attribute *attr, + const char *buffer, size_t count) +{ + struct mem_ctl_info *mem_ctl_info = grp_to_mci(kobj); + struct mcidev_sysfs_attribute *mcidev_attr = to_mcidev_attr(attr); + + debugf1("%s() mem_ctl_info %p\n", __func__, mem_ctl_info); + + if (mcidev_attr->store) + return mcidev_attr->store(mem_ctl_info, buffer, count); + + return -EIO; +} + +/* No memory to release for this kobj */ +static void edac_inst_grp_release(struct kobject *kobj) +{ + struct mcidev_sysfs_group_kobj *grp; + struct mem_ctl_info *mci; + + debugf1("%s()\n", __func__); + + grp = container_of(kobj, struct mcidev_sysfs_group_kobj, kobj); + mci = grp->mci; + + kobject_put(&mci->edac_mci_kobj); +} + +/* Intermediate show/store table */ +static struct sysfs_ops inst_grp_ops = { + .show = inst_grp_show, + .store = inst_grp_store +}; + +/* the kobj_type instance for a instance group */ +static struct kobj_type ktype_inst_grp = { + .release = edac_inst_grp_release, + .sysfs_ops = &inst_grp_ops, +}; + + /* * edac_create_mci_instance_attributes - * create MC driver specific attributes at the topmost level - * directory of this mci instance. + * create MC driver specific attributes bellow an specified kobj + * This routine calls itself recursively, in order to create an entire + * object tree. */ -static int edac_create_mci_instance_attributes(struct mem_ctl_info *mci) +static int edac_create_mci_instance_attributes(struct mem_ctl_info *mci, + struct mcidev_sysfs_attribute *sysfs_attrib, + struct kobject *kobj) { int err; - struct mcidev_sysfs_attribute *sysfs_attrib; - /* point to the start of the array and iterate over it - * adding each attribute listed to this mci instance's kobject - */ - sysfs_attrib = mci->mc_driver_sysfs_attributes; + debugf1("%s()\n", __func__); + + while (sysfs_attrib) { + if (sysfs_attrib->grp) { + struct mcidev_sysfs_group_kobj *grp_kobj; + + grp_kobj = kzalloc(sizeof(*grp_kobj), GFP_KERNEL); + if (!grp_kobj) + return -ENOMEM; + + list_add_tail(&grp_kobj->list, &mci->grp_kobj_list); + + grp_kobj->grp = sysfs_attrib->grp; + grp_kobj->mci = mci; + + debugf0("%s() grp %s, mci %p\n", __func__, + sysfs_attrib->grp->name, mci); + + err = kobject_init_and_add(&grp_kobj->kobj, + &ktype_inst_grp, + &mci->edac_mci_kobj, + sysfs_attrib->grp->name); + if (err) + return err; + + err = edac_create_mci_instance_attributes(mci, + grp_kobj->grp->mcidev_attr, + &grp_kobj->kobj); + + if (err) + return err; + } else if (sysfs_attrib->attr.name) { + debugf0("%s() file %s\n", __func__, + sysfs_attrib->attr.name); + + err = sysfs_create_file(kobj, &sysfs_attrib->attr); + } else + break; - while (sysfs_attrib && sysfs_attrib->attr.name) { - err = sysfs_create_file(&mci->edac_mci_kobj, - (struct attribute*) sysfs_attrib); if (err) { return err; } - sysfs_attrib++; } @@ -759,21 +853,44 @@ static int edac_create_mci_instance_attributes(struct mem_ctl_info *mci) * remove MC driver specific attributes at the topmost level * directory of this mci instance. */ -static void edac_remove_mci_instance_attributes(struct mem_ctl_info *mci) +static void edac_remove_mci_instance_attributes(struct mem_ctl_info *mci, + struct mcidev_sysfs_attribute *sysfs_attrib, + struct kobject *kobj, int count) { - struct mcidev_sysfs_attribute *sysfs_attrib; + struct mcidev_sysfs_group_kobj *grp_kobj, *tmp; - /* point to the start of the array and iterate over it - * adding each attribute listed to this mci instance's kobject - */ - sysfs_attrib = mci->mc_driver_sysfs_attributes; + debugf1("%s()\n", __func__); - /* loop if there are attributes and until we hit a NULL entry */ - while (sysfs_attrib && sysfs_attrib->attr.name) { - sysfs_remove_file(&mci->edac_mci_kobj, - (struct attribute *) sysfs_attrib); + /* + * loop if there are attributes and until we hit a NULL entry + * Remove first all the atributes + */ + while (sysfs_attrib) { + if (sysfs_attrib->grp) { + list_for_each_entry(grp_kobj, &mci->grp_kobj_list, + list) + if (grp_kobj->grp == sysfs_attrib->grp) + edac_remove_mci_instance_attributes(mci, + grp_kobj->grp->mcidev_attr, + &grp_kobj->kobj, count + 1); + } else if (sysfs_attrib->attr.name) { + debugf0("%s() file %s\n", __func__, + sysfs_attrib->attr.name); + sysfs_remove_file(kobj, &sysfs_attrib->attr); + } else + break; sysfs_attrib++; } + + /* + * Now that all attributes got removed, it is save to remove all groups + */ + if (!count) + list_for_each_entry_safe(grp_kobj, tmp, &mci->grp_kobj_list, + list) { + debugf0("%s() grp %s\n", __func__, grp_kobj->grp->name); + kobject_put(&grp_kobj->kobj); + } } @@ -794,6 +911,8 @@ int edac_create_sysfs_mci_device(struct mem_ctl_info *mci) debugf0("%s() idx=%d\n", __func__, mci->mc_idx); + INIT_LIST_HEAD(&mci->grp_kobj_list); + /* create a symlink for the device */ err = sysfs_create_link(kobj_mci, &mci->dev->kobj, EDAC_DEVICE_SYMLINK); @@ -806,7 +925,9 @@ int edac_create_sysfs_mci_device(struct mem_ctl_info *mci) * then create them now for the driver. */ if (mci->mc_driver_sysfs_attributes) { - err = edac_create_mci_instance_attributes(mci); + err = edac_create_mci_instance_attributes(mci, + mci->mc_driver_sysfs_attributes, + &mci->edac_mci_kobj); if (err) { debugf1("%s() failure to create mci attributes\n", __func__); @@ -841,7 +962,8 @@ fail1: } /* remove the mci instance's attributes, if any */ - edac_remove_mci_instance_attributes(mci); + edac_remove_mci_instance_attributes(mci, + mci->mc_driver_sysfs_attributes, &mci->edac_mci_kobj, 0); /* remove the symlink */ sysfs_remove_link(kobj_mci, EDAC_DEVICE_SYMLINK); @@ -875,8 +997,9 @@ void edac_remove_sysfs_mci_device(struct mem_ctl_info *mci) debugf0("%s() remove_mci_instance\n", __func__); /* remove this mci instance's attribtes */ - edac_remove_mci_instance_attributes(mci); - + edac_remove_mci_instance_attributes(mci, + mci->mc_driver_sysfs_attributes, + &mci->edac_mci_kobj, 0); debugf0("%s() unregister this mci kobj\n", __func__); /* unregister this instance's kobject */ diff --git a/drivers/edac/edac_mce.c b/drivers/edac/edac_mce.c new file mode 100644 index 00000000000..9ccdc5b140e --- /dev/null +++ b/drivers/edac/edac_mce.c @@ -0,0 +1,61 @@ +/* Provides edac interface to mcelog events + * + * This file may be distributed under the terms of the + * GNU General Public License version 2. + * + * Copyright (c) 2009 by: + * Mauro Carvalho Chehab <mchehab@redhat.com> + * + * Red Hat Inc. http://www.redhat.com + */ + +#include <linux/module.h> +#include <linux/edac_mce.h> +#include <asm/mce.h> + +int edac_mce_enabled; +EXPORT_SYMBOL_GPL(edac_mce_enabled); + + +/* + * Extension interface + */ + +static LIST_HEAD(edac_mce_list); +static DEFINE_MUTEX(edac_mce_lock); + +int edac_mce_register(struct edac_mce *edac_mce) +{ + mutex_lock(&edac_mce_lock); + list_add_tail(&edac_mce->list, &edac_mce_list); + mutex_unlock(&edac_mce_lock); + return 0; +} +EXPORT_SYMBOL(edac_mce_register); + +void edac_mce_unregister(struct edac_mce *edac_mce) +{ + mutex_lock(&edac_mce_lock); + list_del(&edac_mce->list); + mutex_unlock(&edac_mce_lock); +} +EXPORT_SYMBOL(edac_mce_unregister); + +int edac_mce_parse(struct mce *mce) +{ + struct edac_mce *edac_mce; + + list_for_each_entry(edac_mce, &edac_mce_list, list) { + if (edac_mce->check_error(edac_mce->priv, mce)) + return 1; + } + + /* Nobody queued the error */ + return 0; +} +EXPORT_SYMBOL_GPL(edac_mce_parse); + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Mauro Carvalho Chehab <mchehab@redhat.com>"); +MODULE_AUTHOR("Red Hat Inc. (http://www.redhat.com)"); +MODULE_DESCRIPTION("EDAC Driver for mcelog captured errors"); diff --git a/drivers/edac/i7core_edac.c b/drivers/edac/i7core_edac.c new file mode 100644 index 00000000000..6b8b7b41ec5 --- /dev/null +++ b/drivers/edac/i7core_edac.c @@ -0,0 +1,2078 @@ +/* Intel i7 core/Nehalem Memory Controller kernel module + * + * This driver supports yhe memory controllers found on the Intel + * processor families i7core, i7core 7xx/8xx, i5core, Xeon 35xx, + * Xeon 55xx and Xeon 56xx also known as Nehalem, Nehalem-EP, Lynnfield + * and Westmere-EP. + * + * This file may be distributed under the terms of the + * GNU General Public License version 2 only. + * + * Copyright (c) 2009-2010 by: + * Mauro Carvalho Chehab <mchehab@redhat.com> + * + * Red Hat Inc. http://www.redhat.com + * + * Forked and adapted from the i5400_edac driver + * + * Based on the following public Intel datasheets: + * Intel Core i7 Processor Extreme Edition and Intel Core i7 Processor + * Datasheet, Volume 2: + * http://download.intel.com/design/processor/datashts/320835.pdf + * Intel Xeon Processor 5500 Series Datasheet Volume 2 + * http://www.intel.com/Assets/PDF/datasheet/321322.pdf + * also available at: + * http://www.arrownac.com/manufacturers/intel/s/nehalem/5500-datasheet-v2.pdf + */ + +#include <linux/module.h> +#include <linux/init.h> +#include <linux/pci.h> +#include <linux/pci_ids.h> +#include <linux/slab.h> +#include <linux/delay.h> +#include <linux/edac.h> +#include <linux/mmzone.h> +#include <linux/edac_mce.h> +#include <linux/smp.h> +#include <asm/processor.h> + +#include "edac_core.h" + +/* + * This is used for Nehalem-EP and Nehalem-EX devices, where the non-core + * registers start at bus 255, and are not reported by BIOS. + * We currently find devices with only 2 sockets. In order to support more QPI + * Quick Path Interconnect, just increment this number. + */ +#define MAX_SOCKET_BUSES 2 + + +/* + * Alter this version for the module when modifications are made + */ +#define I7CORE_REVISION " Ver: 1.0.0 " __DATE__ +#define EDAC_MOD_STR "i7core_edac" + +/* + * Debug macros + */ +#define i7core_printk(level, fmt, arg...) \ + edac_printk(level, "i7core", fmt, ##arg) + +#define i7core_mc_printk(mci, level, fmt, arg...) \ + edac_mc_chipset_printk(mci, level, "i7core", fmt, ##arg) + +/* + * i7core Memory Controller Registers + */ + + /* OFFSETS for Device 0 Function 0 */ + +#define MC_CFG_CONTROL 0x90 + + /* OFFSETS for Device 3 Function 0 */ + +#define MC_CONTROL 0x48 +#define MC_STATUS 0x4c +#define MC_MAX_DOD 0x64 + +/* + * OFFSETS for Device 3 Function 4, as inicated on Xeon 5500 datasheet: + * http://www.arrownac.com/manufacturers/intel/s/nehalem/5500-datasheet-v2.pdf + */ + +#define MC_TEST_ERR_RCV1 0x60 + #define DIMM2_COR_ERR(r) ((r) & 0x7fff) + +#define MC_TEST_ERR_RCV0 0x64 + #define DIMM1_COR_ERR(r) (((r) >> 16) & 0x7fff) + #define DIMM0_COR_ERR(r) ((r) & 0x7fff) + +/* OFFSETS for Device 3 Function 2, as inicated on Xeon 5500 datasheet */ +#define MC_COR_ECC_CNT_0 0x80 +#define MC_COR_ECC_CNT_1 0x84 +#define MC_COR_ECC_CNT_2 0x88 +#define MC_COR_ECC_CNT_3 0x8c +#define MC_COR_ECC_CNT_4 0x90 +#define MC_COR_ECC_CNT_5 0x94 + +#define DIMM_TOP_COR_ERR(r) (((r) >> 16) & 0x7fff) +#define DIMM_BOT_COR_ERR(r) ((r) & 0x7fff) + + + /* OFFSETS for Devices 4,5 and 6 Function 0 */ + +#define MC_CHANNEL_DIMM_INIT_PARAMS 0x58 + #define THREE_DIMMS_PRESENT (1 << 24) + #define SINGLE_QUAD_RANK_PRESENT (1 << 23) + #define QUAD_RANK_PRESENT (1 << 22) + #define REGISTERED_DIMM (1 << 15) + +#define MC_CHANNEL_MAPPER 0x60 + #define RDLCH(r, ch) ((((r) >> (3 + (ch * 6))) & 0x07) - 1) + #define WRLCH(r, ch) ((((r) >> (ch * 6)) & 0x07) - 1) + +#define MC_CHANNEL_RANK_PRESENT 0x7c + #define RANK_PRESENT_MASK 0xffff + +#define MC_CHANNEL_ADDR_MATCH 0xf0 +#define MC_CHANNEL_ERROR_MASK 0xf8 +#define MC_CHANNEL_ERROR_INJECT 0xfc + #define INJECT_ADDR_PARITY 0x10 + #define INJECT_ECC 0x08 + #define MASK_CACHELINE 0x06 + #define MASK_FULL_CACHELINE 0x06 + #define MASK_MSB32_CACHELINE 0x04 + #define MASK_LSB32_CACHELINE 0x02 + #define NO_MASK_CACHELINE 0x00 + #define REPEAT_EN 0x01 + + /* OFFSETS for Devices 4,5 and 6 Function 1 */ + +#define MC_DOD_CH_DIMM0 0x48 +#define MC_DOD_CH_DIMM1 0x4c +#define MC_DOD_CH_DIMM2 0x50 + #define RANKOFFSET_MASK ((1 << 12) | (1 << 11) | (1 << 10)) + #define RANKOFFSET(x) ((x & RANKOFFSET_MASK) >> 10) + #define DIMM_PRESENT_MASK (1 << 9) + #define DIMM_PRESENT(x) (((x) & DIMM_PRESENT_MASK) >> 9) + #define MC_DOD_NUMBANK_MASK ((1 << 8) | (1 << 7)) + #define MC_DOD_NUMBANK(x) (((x) & MC_DOD_NUMBANK_MASK) >> 7) + #define MC_DOD_NUMRANK_MASK ((1 << 6) | (1 << 5)) + #define MC_DOD_NUMRANK(x) (((x) & MC_DOD_NUMRANK_MASK) >> 5) + #define MC_DOD_NUMROW_MASK ((1 << 4) | (1 << 3) | (1 << 2)) + #define MC_DOD_NUMROW(x) (((x) & MC_DOD_NUMROW_MASK) >> 2) + #define MC_DOD_NUMCOL_MASK 3 + #define MC_DOD_NUMCOL(x) ((x) & MC_DOD_NUMCOL_MASK) + +#define MC_RANK_PRESENT 0x7c + +#define MC_SAG_CH_0 0x80 +#define MC_SAG_CH_1 0x84 +#define MC_SAG_CH_2 0x88 +#define MC_SAG_CH_3 0x8c +#define MC_SAG_CH_4 0x90 +#define MC_SAG_CH_5 0x94 +#define MC_SAG_CH_6 0x98 +#define MC_SAG_CH_7 0x9c + +#define MC_RIR_LIMIT_CH_0 0x40 +#define MC_RIR_LIMIT_CH_1 0x44 +#define MC_RIR_LIMIT_CH_2 0x48 +#define MC_RIR_LIMIT_CH_3 0x4C +#define MC_RIR_LIMIT_CH_4 0x50 +#define MC_RIR_LIMIT_CH_5 0x54 +#define MC_RIR_LIMIT_CH_6 0x58 +#define MC_RIR_LIMIT_CH_7 0x5C +#define MC_RIR_LIMIT_MASK ((1 << 10) - 1) + +#define MC_RIR_WAY_CH 0x80 + #define MC_RIR_WAY_OFFSET_MASK (((1 << 14) - 1) & ~0x7) + #define MC_RIR_WAY_RANK_MASK 0x7 + +/* + * i7core structs + */ + +#define NUM_CHANS 3 +#define MAX_DIMMS 3 /* Max DIMMS per channel */ +#define MAX_MCR_FUNC 4 +#define MAX_CHAN_FUNC 3 + +struct i7core_info { + u32 mc_control; + u32 mc_status; + u32 max_dod; + u32 ch_map; +}; + + +struct i7core_inject { + int enable; + + u32 section; + u32 type; + u32 eccmask; + + /* Error address mask */ + int channel, dimm, rank, bank, page, col; +}; + +struct i7core_channel { + u32 ranks; + u32 dimms; +}; + +struct pci_id_descr { + int dev; + int func; + int dev_id; + int optional; +}; + +struct pci_id_table { + struct pci_id_descr *descr; + int n_devs; +}; + +struct i7core_dev { + struct list_head list; + u8 socket; + struct pci_dev **pdev; + int n_devs; + struct mem_ctl_info *mci; +}; + +struct i7core_pvt { + struct pci_dev *pci_noncore; + struct pci_dev *pci_mcr[MAX_MCR_FUNC + 1]; + struct pci_dev *pci_ch[NUM_CHANS][MAX_CHAN_FUNC + 1]; + + struct i7core_dev *i7core_dev; + + struct i7core_info info; + struct i7core_inject inject; + struct i7core_channel channel[NUM_CHANS]; + + int channels; /* Number of active channels */ + + int ce_count_available; + int csrow_map[NUM_CHANS][MAX_DIMMS]; + + /* ECC corrected errors counts per udimm */ + unsigned long udimm_ce_count[MAX_DIMMS]; + int udimm_last_ce_count[MAX_DIMMS]; + /* ECC corrected errors counts per rdimm */ + unsigned long rdimm_ce_count[NUM_CHANS][MAX_DIMMS]; + int rdimm_last_ce_count[NUM_CHANS][MAX_DIMMS]; + + unsigned int is_registered; + + /* mcelog glue */ + struct edac_mce edac_mce; + + /* Fifo double buffers */ + struct mce mce_entry[MCE_LOG_LEN]; + struct mce mce_outentry[MCE_LOG_LEN]; + + /* Fifo in/out counters */ + unsigned mce_in, mce_out; + + /* Count indicator to show errors not got */ + unsigned mce_overrun; +}; + +/* Static vars */ +static LIST_HEAD(i7core_edac_list); +static DEFINE_MUTEX(i7core_edac_lock); + +#define PCI_DESCR(device, function, device_id) \ + .dev = (device), \ + .func = (function), \ + .dev_id = (device_id) + +struct pci_id_descr pci_dev_descr_i7core_nehalem[] = { + /* Memory controller */ + { PCI_DESCR(3, 0, PCI_DEVICE_ID_INTEL_I7_MCR) }, + { PCI_DESCR(3, 1, PCI_DEVICE_ID_INTEL_I7_MC_TAD) }, + /* Exists only for RDIMM */ + { PCI_DESCR(3, 2, PCI_DEVICE_ID_INTEL_I7_MC_RAS), .optional = 1 }, + { PCI_DESCR(3, 4, PCI_DEVICE_ID_INTEL_I7_MC_TEST) }, + + /* Channel 0 */ + { PCI_DESCR(4, 0, PCI_DEVICE_ID_INTEL_I7_MC_CH0_CTRL) }, + { PCI_DESCR(4, 1, PCI_DEVICE_ID_INTEL_I7_MC_CH0_ADDR) }, + { PCI_DESCR(4, 2, PCI_DEVICE_ID_INTEL_I7_MC_CH0_RANK) }, + { PCI_DESCR(4, 3, PCI_DEVICE_ID_INTEL_I7_MC_CH0_TC) }, + + /* Channel 1 */ + { PCI_DESCR(5, 0, PCI_DEVICE_ID_INTEL_I7_MC_CH1_CTRL) }, + { PCI_DESCR(5, 1, PCI_DEVICE_ID_INTEL_I7_MC_CH1_ADDR) }, + { PCI_DESCR(5, 2, PCI_DEVICE_ID_INTEL_I7_MC_CH1_RANK) }, + { PCI_DESCR(5, 3, PCI_DEVICE_ID_INTEL_I7_MC_CH1_TC) }, + + /* Channel 2 */ + { PCI_DESCR(6, 0, PCI_DEVICE_ID_INTEL_I7_MC_CH2_CTRL) }, + { PCI_DESCR(6, 1, PCI_DEVICE_ID_INTEL_I7_MC_CH2_ADDR) }, + { PCI_DESCR(6, 2, PCI_DEVICE_ID_INTEL_I7_MC_CH2_RANK) }, + { PCI_DESCR(6, 3, PCI_DEVICE_ID_INTEL_I7_MC_CH2_TC) }, + + /* Generic Non-core registers */ + /* + * This is the PCI device on i7core and on Xeon 35xx (8086:2c41) + * On Xeon 55xx, however, it has a different id (8086:2c40). So, + * the probing code needs to test for the other address in case of + * failure of this one + */ + { PCI_DESCR(0, 0, PCI_DEVICE_ID_INTEL_I7_NONCORE) }, + +}; + +struct pci_id_descr pci_dev_des |