diff options
Diffstat (limited to 'drivers/edac/mce_amd.c')
| -rw-r--r-- | drivers/edac/mce_amd.c | 568 |
1 files changed, 273 insertions, 295 deletions
diff --git a/drivers/edac/mce_amd.c b/drivers/edac/mce_amd.c index f6cf73d9335..5f43620d580 100644 --- a/drivers/edac/mce_amd.c +++ b/drivers/edac/mce_amd.c @@ -6,10 +6,9 @@ static struct amd_decoder_ops *fam_ops; static u8 xec_mask = 0xf; -static u8 nb_err_cpumask = 0xf; static bool report_gart_errors; -static void (*nb_bus_decoder)(int node_id, struct mce *m, u32 nbcfg); +static void (*nb_bus_decoder)(int node_id, struct mce *m); void amd_report_gart_errors(bool v) { @@ -17,13 +16,13 @@ void amd_report_gart_errors(bool v) } EXPORT_SYMBOL_GPL(amd_report_gart_errors); -void amd_register_ecc_decoder(void (*f)(int, struct mce *, u32)) +void amd_register_ecc_decoder(void (*f)(int, struct mce *)) { nb_bus_decoder = f; } EXPORT_SYMBOL_GPL(amd_register_ecc_decoder); -void amd_unregister_ecc_decoder(void (*f)(int, struct mce *, u32)) +void amd_unregister_ecc_decoder(void (*f)(int, struct mce *)) { if (nb_bus_decoder) { WARN_ON(nb_bus_decoder != f); @@ -39,43 +38,30 @@ EXPORT_SYMBOL_GPL(amd_unregister_ecc_decoder); */ /* transaction type */ -const char *tt_msgs[] = { "INSN", "DATA", "GEN", "RESV" }; -EXPORT_SYMBOL_GPL(tt_msgs); +static const char * const tt_msgs[] = { "INSN", "DATA", "GEN", "RESV" }; /* cache level */ -const char *ll_msgs[] = { "RESV", "L1", "L2", "L3/GEN" }; -EXPORT_SYMBOL_GPL(ll_msgs); +static const char * const ll_msgs[] = { "RESV", "L1", "L2", "L3/GEN" }; /* memory transaction type */ -const char *rrrr_msgs[] = { +static const char * const rrrr_msgs[] = { "GEN", "RD", "WR", "DRD", "DWR", "IRD", "PRF", "EV", "SNP" }; -EXPORT_SYMBOL_GPL(rrrr_msgs); /* participating processor */ -const char *pp_msgs[] = { "SRC", "RES", "OBS", "GEN" }; +const char * const pp_msgs[] = { "SRC", "RES", "OBS", "GEN" }; EXPORT_SYMBOL_GPL(pp_msgs); /* request timeout */ -const char *to_msgs[] = { "no timeout", "timed out" }; -EXPORT_SYMBOL_GPL(to_msgs); +static const char * const to_msgs[] = { "no timeout", "timed out" }; /* memory or i/o */ -const char *ii_msgs[] = { "MEM", "RESV", "IO", "GEN" }; -EXPORT_SYMBOL_GPL(ii_msgs); - -static const char *f10h_nb_mce_desc[] = { - "HT link data error", - "Protocol error (link, L3, probe filter, etc.)", - "Parity error in NB-internal arrays", - "Link Retry due to IO link transmission error", - "L3 ECC data cache error", - "ECC error in L3 cache tag", - "L3 LRU parity bits error", - "ECC Error in the Probe Filter directory" -}; +static const char * const ii_msgs[] = { "MEM", "RESV", "IO", "GEN" }; -static const char * const f15h_ic_mce_desc[] = { +/* internal error type */ +static const char * const uu_msgs[] = { "RESV", "RESV", "HWA", "RESV" }; + +static const char * const f15h_mc1_mce_desc[] = { "UC during a demand linefill from L2", "Parity error during data load from IC", "Parity error for IC valid bit", @@ -88,14 +74,14 @@ static const char * const f15h_ic_mce_desc[] = { "Parity error for IC probe tag valid bit", "PFB non-cacheable bit parity error", "PFB valid bit parity error", /* xec = 0xd */ - "patch RAM", /* xec = 010 */ + "Microcode Patch Buffer", /* xec = 010 */ "uop queue", "insn buffer", "predecode buffer", "fetch address FIFO" }; -static const char * const f15h_cu_mce_desc[] = { +static const char * const f15h_mc2_mce_desc[] = { "Fill ECC error on data fills", /* xec = 0x4 */ "Fill parity error on insn fills", "Prefetcher request FIFO parity error", @@ -104,7 +90,7 @@ static const char * const f15h_cu_mce_desc[] = { "WCC Tag ECC error", "WCC Data ECC error", "WCB Data parity error", - "VB Data/ECC error", + "VB Data ECC or parity error", "L2 Tag ECC error", /* xec = 0x10 */ "Hard L2 Tag ECC error", "Multiple hits on L2 tag", @@ -112,7 +98,29 @@ static const char * const f15h_cu_mce_desc[] = { "PRB address parity error" }; -static const char * const fr_ex_mce_desc[] = { +static const char * const mc4_mce_desc[] = { + "DRAM ECC error detected on the NB", + "CRC error detected on HT link", + "Link-defined sync error packets detected on HT link", + "HT Master abort", + "HT Target abort", + "Invalid GART PTE entry during GART table walk", + "Unsupported atomic RMW received from an IO link", + "Watchdog timeout due to lack of progress", + "DRAM ECC error detected on the NB", + "SVM DMA Exclusion Vector error", + "HT data error detected on link", + "Protocol error (link, L3, probe filter)", + "NB internal arrays parity error", + "DRAM addr/ctl signals parity error", + "IO link transmission error", + "L3 data cache ECC error", /* xec = 0x1c */ + "L3 cache tag error", + "L3 LRU parity bits error", + "ECC Error in the Probe Filter directory" +}; + +static const char * const mc5_mce_desc[] = { "CPU Watchdog timer expire", "Wakeup array dest tag", "AG payload array", @@ -125,10 +133,11 @@ static const char * const fr_ex_mce_desc[] = { "Physical register file AG0 port", "Physical register file AG1 port", "Flag register file", - "DE correctable error could not be corrected" + "DE error occurred", + "Retire status queue" }; -static bool f12h_dc_mce(u16 ec, u8 xec) +static bool f12h_mc0_mce(u16 ec, u8 xec) { bool ret = false; @@ -146,26 +155,26 @@ static bool f12h_dc_mce(u16 ec, u8 xec) return ret; } -static bool f10h_dc_mce(u16 ec, u8 xec) +static bool f10h_mc0_mce(u16 ec, u8 xec) { if (R4(ec) == R4_GEN && LL(ec) == LL_L1) { pr_cont("during data scrub.\n"); return true; } - return f12h_dc_mce(ec, xec); + return f12h_mc0_mce(ec, xec); } -static bool k8_dc_mce(u16 ec, u8 xec) +static bool k8_mc0_mce(u16 ec, u8 xec) { if (BUS_ERROR(ec)) { pr_cont("during system linefill.\n"); return true; } - return f10h_dc_mce(ec, xec); + return f10h_mc0_mce(ec, xec); } -static bool f14h_dc_mce(u16 ec, u8 xec) +static bool cat_mc0_mce(u16 ec, u8 xec) { u8 r4 = R4(ec); bool ret = true; @@ -217,7 +226,7 @@ static bool f14h_dc_mce(u16 ec, u8 xec) return ret; } -static bool f15h_dc_mce(u16 ec, u8 xec) +static bool f15h_mc0_mce(u16 ec, u8 xec) { bool ret = true; @@ -255,22 +264,21 @@ static bool f15h_dc_mce(u16 ec, u8 xec) } else if (BUS_ERROR(ec)) { if (!xec) - pr_cont("during system linefill.\n"); + pr_cont("System Read Data Error.\n"); else - pr_cont(" Internal %s condition.\n", - ((xec == 1) ? "livelock" : "deadlock")); + pr_cont(" Internal error condition type %d.\n", xec); } else ret = false; return ret; } -static void amd_decode_dc_mce(struct mce *m) +static void decode_mc0_mce(struct mce *m) { u16 ec = EC(m->status); u8 xec = XEC(m->status, xec_mask); - pr_emerg(HW_ERR "Data Cache Error: "); + pr_emerg(HW_ERR "MC0 Error: "); /* TLB error signatures are the same across families */ if (TLB_ERROR(ec)) { @@ -280,13 +288,13 @@ static void amd_decode_dc_mce(struct mce *m) : (xec ? "multimatch" : "parity"))); return; } - } else if (fam_ops->dc_mce(ec, xec)) + } else if (fam_ops->mc0_mce(ec, xec)) ; else - pr_emerg(HW_ERR "Corrupted DC MCE info?\n"); + pr_emerg(HW_ERR "Corrupted MC0 MCE info?\n"); } -static bool k8_ic_mce(u16 ec, u8 xec) +static bool k8_mc1_mce(u16 ec, u8 xec) { u8 ll = LL(ec); bool ret = true; @@ -320,26 +328,32 @@ static bool k8_ic_mce(u16 ec, u8 xec) return ret; } -static bool f14h_ic_mce(u16 ec, u8 xec) +static bool cat_mc1_mce(u16 ec, u8 xec) { u8 r4 = R4(ec); bool ret = true; - if (MEM_ERROR(ec)) { - if (TT(ec) != 0 || LL(ec) != 1) - ret = false; + if (!MEM_ERROR(ec)) + return false; + + if (TT(ec) != TT_INSTR) + return false; + + if (r4 == R4_IRD) + pr_cont("Data/tag array parity error for a tag hit.\n"); + else if (r4 == R4_SNOOP) + pr_cont("Tag error during snoop/victimization.\n"); + else if (xec == 0x0) + pr_cont("Tag parity error from victim castout.\n"); + else if (xec == 0x2) + pr_cont("Microcode patch RAM parity error.\n"); + else + ret = false; - if (r4 == R4_IRD) - pr_cont("Data/tag array parity error for a tag hit.\n"); - else if (r4 == R4_SNOOP) - pr_cont("Tag error during snoop/victimization.\n"); - else - ret = false; - } return ret; } -static bool f15h_ic_mce(u16 ec, u8 xec) +static bool f15h_mc1_mce(u16 ec, u8 xec) { bool ret = true; @@ -348,15 +362,19 @@ static bool f15h_ic_mce(u16 ec, u8 xec) switch (xec) { case 0x0 ... 0xa: - pr_cont("%s.\n", f15h_ic_mce_desc[xec]); + pr_cont("%s.\n", f15h_mc1_mce_desc[xec]); break; case 0xd: - pr_cont("%s.\n", f15h_ic_mce_desc[xec-2]); + pr_cont("%s.\n", f15h_mc1_mce_desc[xec-2]); + break; + + case 0x10: + pr_cont("%s.\n", f15h_mc1_mce_desc[xec-4]); break; - case 0x10 ... 0x14: - pr_cont("Decoder %s parity error.\n", f15h_ic_mce_desc[xec-4]); + case 0x11 ... 0x14: + pr_cont("Decoder %s parity error.\n", f15h_mc1_mce_desc[xec-4]); break; default: @@ -365,12 +383,12 @@ static bool f15h_ic_mce(u16 ec, u8 xec) return ret; } -static void amd_decode_ic_mce(struct mce *m) +static void decode_mc1_mce(struct mce *m) { u16 ec = EC(m->status); u8 xec = XEC(m->status, xec_mask); - pr_emerg(HW_ERR "Instruction Cache Error: "); + pr_emerg(HW_ERR "MC1 Error: "); if (TLB_ERROR(ec)) pr_cont("%s TLB %s.\n", LL_MSG(ec), @@ -379,18 +397,15 @@ static void amd_decode_ic_mce(struct mce *m) bool k8 = (boot_cpu_data.x86 == 0xf && (m->status & BIT_64(58))); pr_cont("during %s.\n", (k8 ? "system linefill" : "NB data read")); - } else if (fam_ops->ic_mce(ec, xec)) + } else if (fam_ops->mc1_mce(ec, xec)) ; else - pr_emerg(HW_ERR "Corrupted IC MCE info?\n"); + pr_emerg(HW_ERR "Corrupted MC1 MCE info?\n"); } -static void amd_decode_bu_mce(struct mce *m) +static bool k8_mc2_mce(u16 ec, u8 xec) { - u16 ec = EC(m->status); - u8 xec = XEC(m->status, xec_mask); - - pr_emerg(HW_ERR "Bus Unit Error"); + bool ret = true; if (xec == 0x1) pr_cont(" in the write data buffers.\n"); @@ -415,24 +430,18 @@ static void amd_decode_bu_mce(struct mce *m) pr_cont(": %s parity/ECC error during data " "access from L2.\n", R4_MSG(ec)); else - goto wrong_bu_mce; + ret = false; } else - goto wrong_bu_mce; + ret = false; } else - goto wrong_bu_mce; - - return; + ret = false; -wrong_bu_mce: - pr_emerg(HW_ERR "Corrupted BU MCE info?\n"); + return ret; } -static void amd_decode_cu_mce(struct mce *m) +static bool f15h_mc2_mce(u16 ec, u8 xec) { - u16 ec = EC(m->status); - u8 xec = XEC(m->status, xec_mask); - - pr_emerg(HW_ERR "Combined Unit Error: "); + bool ret = true; if (TLB_ERROR(ec)) { if (xec == 0x0) @@ -440,114 +449,136 @@ static void amd_decode_cu_mce(struct mce *m) else if (xec == 0x1) pr_cont("Poison data provided for TLB fill.\n"); else - goto wrong_cu_mce; + ret = false; } else if (BUS_ERROR(ec)) { if (xec > 2) - goto wrong_cu_mce; + ret = false; pr_cont("Error during attempted NB data read.\n"); } else if (MEM_ERROR(ec)) { switch (xec) { case 0x4 ... 0xc: - pr_cont("%s.\n", f15h_cu_mce_desc[xec - 0x4]); + pr_cont("%s.\n", f15h_mc2_mce_desc[xec - 0x4]); break; case 0x10 ... 0x14: - pr_cont("%s.\n", f15h_cu_mce_desc[xec - 0x7]); + pr_cont("%s.\n", f15h_mc2_mce_desc[xec - 0x7]); break; default: - goto wrong_cu_mce; + ret = false; } } - return; + return ret; +} -wrong_cu_mce: - pr_emerg(HW_ERR "Corrupted CU MCE info?\n"); +static bool f16h_mc2_mce(u16 ec, u8 xec) +{ + u8 r4 = R4(ec); + + if (!MEM_ERROR(ec)) + return false; + + switch (xec) { + case 0x04 ... 0x05: + pr_cont("%cBUFF parity error.\n", (r4 == R4_RD) ? 'I' : 'O'); + break; + + case 0x09 ... 0x0b: + case 0x0d ... 0x0f: + pr_cont("ECC error in L2 tag (%s).\n", + ((r4 == R4_GEN) ? "BankReq" : + ((r4 == R4_SNOOP) ? "Prb" : "Fill"))); + break; + + case 0x10 ... 0x19: + case 0x1b: + pr_cont("ECC error in L2 data array (%s).\n", + (((r4 == R4_RD) && !(xec & 0x3)) ? "Hit" : + ((r4 == R4_GEN) ? "Attr" : + ((r4 == R4_EVICT) ? "Vict" : "Fill")))); + break; + + case 0x1c ... 0x1d: + case 0x1f: + pr_cont("Parity error in L2 attribute bits (%s).\n", + ((r4 == R4_RD) ? "Hit" : + ((r4 == R4_GEN) ? "Attr" : "Fill"))); + break; + + default: + return false; + } + + return true; } -static void amd_decode_ls_mce(struct mce *m) +static void decode_mc2_mce(struct mce *m) +{ + u16 ec = EC(m->status); + u8 xec = XEC(m->status, xec_mask); + + pr_emerg(HW_ERR "MC2 Error: "); + + if (!fam_ops->mc2_mce(ec, xec)) + pr_cont(HW_ERR "Corrupted MC2 MCE info?\n"); +} + +static void decode_mc3_mce(struct mce *m) { u16 ec = EC(m->status); u8 xec = XEC(m->status, xec_mask); if (boot_cpu_data.x86 >= 0x14) { - pr_emerg("You shouldn't be seeing an LS MCE on this cpu family," + pr_emerg("You shouldn't be seeing MC3 MCE on this cpu family," " please report on LKML.\n"); return; } - pr_emerg(HW_ERR "Load Store Error"); + pr_emerg(HW_ERR "MC3 Error"); if (xec == 0x0) { u8 r4 = R4(ec); if (!BUS_ERROR(ec) || (r4 != R4_DRD && r4 != R4_DWR)) - goto wrong_ls_mce; + goto wrong_mc3_mce; pr_cont(" during %s.\n", R4_MSG(ec)); } else - goto wrong_ls_mce; + goto wrong_mc3_mce; return; -wrong_ls_mce: - pr_emerg(HW_ERR "Corrupted LS MCE info?\n"); + wrong_mc3_mce: + pr_emerg(HW_ERR "Corrupted MC3 MCE info?\n"); } -static bool k8_nb_mce(u16 ec, u8 xec) +static void decode_mc4_mce(struct mce *m) { - bool ret = true; - - switch (xec) { - case 0x1: - pr_cont("CRC error detected on HT link.\n"); - break; - - case 0x5: - pr_cont("Invalid GART PTE entry during GART table walk.\n"); - break; - - case 0x6: - pr_cont("Unsupported atomic RMW received from an IO link.\n"); - break; - - case 0x0: - case 0x8: - if (boot_cpu_data.x86 == 0x11) - return false; - - pr_cont("DRAM ECC error detected on the NB.\n"); - break; - - case 0xd: - pr_cont("Parity error on the DRAM addr/ctl signals.\n"); - break; - - default: - ret = false; - break; - } + struct cpuinfo_x86 *c = &boot_cpu_data; + int node_id = amd_get_nb_id(m->extcpu); + u16 ec = EC(m->status); + u8 xec = XEC(m->status, 0x1f); + u8 offset = 0; - return ret; -} + pr_emerg(HW_ERR "MC4 Error (node %d): ", node_id); -static bool f10h_nb_mce(u16 ec, u8 xec) -{ - bool ret = true; - u8 offset = 0; + switch (xec) { + case 0x0 ... 0xe: - if (k8_nb_mce(ec, xec)) - return true; + /* special handling for DRAM ECCs */ + if (xec == 0x0 || xec == 0x8) { + /* no ECCs on F11h */ + if (c->x86 == 0x11) + goto wrong_mc4_mce; - switch(xec) { - case 0xa ... 0xc: - offset = 10; - break; + pr_cont("%s.\n", mc4_mce_desc[xec]); - case 0xe: - offset = 11; + if (nb_bus_decoder) + nb_bus_decoder(node_id, m); + return; + } break; case 0xf: @@ -556,139 +587,59 @@ static bool f10h_nb_mce(u16 ec, u8 xec) else if (BUS_ERROR(ec)) pr_cont("DMA Exclusion Vector Table Walk error.\n"); else - ret = false; - - goto out; - break; + goto wrong_mc4_mce; + return; case 0x19: - if (boot_cpu_data.x86 == 0x15) + if (boot_cpu_data.x86 == 0x15 || boot_cpu_data.x86 == 0x16) pr_cont("Compute Unit Data Error.\n"); else - ret = false; - - goto out; - break; + goto wrong_mc4_mce; + return; case 0x1c ... 0x1f: - offset = 24; + offset = 13; break; default: - ret = false; - - goto out; - break; + goto wrong_mc4_mce; } - pr_cont("%s.\n", f10h_nb_mce_desc[xec - offset]); - -out: - return ret; -} - -static bool nb_noop_mce(u16 ec, u8 xec) -{ - return false; -} - -void amd_decode_nb_mce(int node_id, struct mce *m, u32 nbcfg) -{ - u16 ec = EC(m->status); - u8 xec = XEC(m->status, 0x1f); - u32 nbsh = (u32)(m->status >> 32); - int core = -1; - - pr_emerg(HW_ERR "Northbridge Error (node %d", node_id); - - /* F10h, revD can disable ErrCpu[3:0] through ErrCpuVal */ - if ((boot_cpu_data.x86 == 0x10) && - (boot_cpu_data.x86_model > 7)) { - if (nbsh & K8_NBSH_ERR_CPU_VAL) - core = nbsh & nb_err_cpumask; - } else { - u8 assoc_cpus = nbsh & nb_err_cpumask; - - if (assoc_cpus > 0) - core = fls(assoc_cpus) - 1; - } - - if (core >= 0) - pr_cont(", core %d): ", core); - else - pr_cont("): "); - - switch (xec) { - case 0x2: - pr_cont("Sync error (sync packets on HT link detected).\n"); - return; - - case 0x3: - pr_cont("HT Master abort.\n"); - return; - - case 0x4: - pr_cont("HT Target abort.\n"); - return; - - case 0x7: - pr_cont("NB Watchdog timeout.\n"); - return; - - case 0x9: - pr_cont("SVM DMA Exclusion Vector error.\n"); - return; - - default: - break; - } - - if (!fam_ops->nb_mce(ec, xec)) - goto wrong_nb_mce; - - if (boot_cpu_data.x86 == 0xf || boot_cpu_data.x86 == 0x10) - if ((xec == 0x8 || xec == 0x0) && nb_bus_decoder) - nb_bus_decoder(node_id, m, nbcfg); - + pr_cont("%s.\n", mc4_mce_desc[xec - offset]); return; -wrong_nb_mce: - pr_emerg(HW_ERR "Corrupted NB MCE info?\n"); + wrong_mc4_mce: + pr_emerg(HW_ERR "Corrupted MC4 MCE info?\n"); } -EXPORT_SYMBOL_GPL(amd_decode_nb_mce); -static void amd_decode_fr_mce(struct mce *m) +static void decode_mc5_mce(struct mce *m) { struct cpuinfo_x86 *c = &boot_cpu_data; u8 xec = XEC(m->status, xec_mask); if (c->x86 == 0xf || c->x86 == 0x11) - goto wrong_fr_mce; - - if (c->x86 != 0x15 && xec != 0x0) - goto wrong_fr_mce; + goto wrong_mc5_mce; - pr_emerg(HW_ERR "%s Error: ", - (c->x86 == 0x15 ? "Execution Unit" : "FIROB")); + pr_emerg(HW_ERR "MC5 Error: "); if (xec == 0x0 || xec == 0xc) - pr_cont("%s.\n", fr_ex_mce_desc[xec]); - else if (xec < 0xd) - pr_cont("%s parity error.\n", fr_ex_mce_desc[xec]); + pr_cont("%s.\n", mc5_mce_desc[xec]); + else if (xec <= 0xd) + pr_cont("%s parity error.\n", mc5_mce_desc[xec]); else - goto wrong_fr_mce; + goto wrong_mc5_mce; return; -wrong_fr_mce: - pr_emerg(HW_ERR "Corrupted FR MCE info?\n"); + wrong_mc5_mce: + pr_emerg(HW_ERR "Corrupted MC5 MCE info?\n"); } -static void amd_decode_fp_mce(struct mce *m) +static void decode_mc6_mce(struct mce *m) { u8 xec = XEC(m->status, xec_mask); - pr_emerg(HW_ERR "Floating Point Unit Error: "); + pr_emerg(HW_ERR "MC6 Error: "); switch (xec) { case 0x1: @@ -712,7 +663,7 @@ static void amd_decode_fp_mce(struct mce *m) break; default: - goto wrong_fp_mce; + goto wrong_mc6_mce; break; } @@ -720,12 +671,16 @@ static void amd_decode_fp_mce(struct mce *m) return; -wrong_fp_mce: - pr_emerg(HW_ERR "Corrupted FP MCE info?\n"); + wrong_mc6_mce: + pr_emerg(HW_ERR "Corrupted MC6 MCE info?\n"); } static inline void amd_decode_err_code(u16 ec) { + if (INT_ERROR(ec)) { + pr_emerg(HW_ERR "internal: %s\n", UU_MSG(ec)); + return; + } pr_emerg(HW_ERR "cache level: %s", LL_MSG(ec)); @@ -760,16 +715,36 @@ static bool amd_filter_mce(struct mce *m) return false; } +static const char *decode_error_status(struct mce *m) +{ + if (m->status & MCI_STATUS_UC) { + if (m->status & MCI_STATUS_PCC) + return "System Fatal error."; + if (m->mcgstatus & MCG_STATUS_RIPV) + return "Uncorrected, software restartable error."; + return "Uncorrected, software containable error."; + } + + if (m->status & MCI_STATUS_DEFERRED) + return "Deferred error."; + + return "Corrected error, no action required."; +} + int amd_decode_mce(struct notifier_block *nb, unsigned long val, void *data) { struct mce *m = (struct mce *)data; - struct cpuinfo_x86 *c = &boot_cpu_data; - int node, ecc; + struct cpuinfo_x86 *c = &cpu_data(m->extcpu); + int ecc; if (amd_filter_mce(m)) return NOTIFY_STOP; - pr_emerg(HW_ERR "MC%d_STATUS[%s|%s|%s|%s|%s", + pr_emerg(HW_ERR "%s\n", decode_error_status(m)); + + pr_emerg(HW_ERR "CPU:%d (%x:%x:%x) MC%d_STATUS[%s|%s|%s|%s|%s", + m->extcpu, + c->x86, c->x86_model, c->x86_mask, m->bank, ((m->status & MCI_STATUS_OVER) ? "Over" : "-"), ((m->status & MCI_STATUS_UC) ? "UE" : "CE"), @@ -777,10 +752,10 @@ int amd_decode_mce(struct notifier_block *nb, unsigned long val, void *data) ((m->status & MCI_STATUS_PCC) ? "PCC" : "-"), ((m->status & MCI_STATUS_ADDRV) ? "AddrV" : "-")); - if (c->x86 == 0x15) + if (c->x86 == 0x15 || c->x86 == 0x16) pr_cont("|%s|%s", - ((m->status & BIT_64(44)) ? "Deferred" : "-"), - ((m->status & BIT_64(43)) ? "Poison" : "-")); + ((m->status & MCI_STATUS_DEFERRED) ? "Deferred" : "-"), + ((m->status & MCI_STATUS_POISON) ? "Poison" : "-")); /* do the two bits[14:13] together */ ecc = (m->status >> 45) & 0x3; @@ -789,44 +764,46 @@ int amd_decode_mce(struct notifier_block *nb, unsigned long val, void *data) pr_cont("]: 0x%016llx\n", m->status); + if (m->status & MCI_STATUS_ADDRV) + pr_emerg(HW_ERR "MC%d_ADDR: 0x%016llx\n", m->bank, m->addr); + + if (!fam_ops) + goto err_code; switch (m->bank) { case 0: - amd_decode_dc_mce(m); + decode_mc0_mce(m); break; case 1: - amd_decode_ic_mce(m); + decode_mc1_mce(m); break; case 2: - if (c->x86 == 0x15) - amd_decode_cu_mce(m); - else - amd_decode_bu_mce(m); + decode_mc2_mce(m); break; case 3: - amd_decode_ls_mce(m); + decode_mc3_mce(m); break; case 4: - node = amd_get_nb_id(m->extcpu); - amd_decode_nb_mce(node, m, 0); + decode_mc4_mce(m); break; case 5: - amd_decode_fr_mce(m); + decode_mc5_mce(m); break; case 6: - amd_decode_fp_mce(m); + decode_mc6_mce(m); break; default: break; } + err_code: amd_decode_err_code(m->status & 0xffff); return NOTIFY_STOP; @@ -842,12 +819,7 @@ static int __init mce_amd_init(void) struct cpuinfo_x86 *c = &boot_cpu_data; if (c->x86_vendor != X86_VENDOR_AMD) - return 0; - - if ((c->x86 < 0xf || c->x86 > 0x12) && - (c->x86 != 0x14 || c->x86_model > 0xf) && - (c->x86 != 0x15 || c->x86_model > 0xf)) - return 0; + return -ENODEV; fam_ops = kzalloc(sizeof(struct amd_decoder_ops), GFP_KERNEL); if (!fam_ops) @@ -855,52 +827,58 @@ static int __init mce_amd_init(void) switch (c->x86) { case 0xf: - fam_ops->dc_mce = k8_dc_mce; - fam_ops->ic_mce = k8_ic_mce; - fam_ops->nb_mce = k8_nb_mce; + fam_ops->mc0_mce = k8_mc0_mce; + fam_ops->mc1_mce = k8_mc1_mce; + fam_ops->mc2_mce = k8_mc2_mce; break; case 0x10: - fam_ops->dc_mce = f10h_dc_mce; - fam_ops->ic_mce = k8_ic_mce; - fam_ops->nb_mce = f10h_nb_mce; + fam_ops->mc0_mce = f10h_mc0_mce; + fam_ops->mc1_mce = k8_mc1_mce; + fam_ops->mc2_mce = k8_mc2_mce; break; case 0x11: - fam_ops->dc_mce = k8_dc_mce; - fam_ops->ic_mce = k8_ic_mce; - fam_ops->nb_mce = f10h_nb_mce; + fam_ops->mc0_mce = k8_mc0_mce; + fam_ops->mc1_mce = k8_mc1_mce; + fam_ops->mc2_mce = k8_mc2_mce; break; case 0x12: - fam_ops->dc_mce = f12h_dc_mce; - fam_ops->ic_mce = k8_ic_mce; - fam_ops->nb_mce = nb_noop_mce; + fam_ops->mc0_mce = f12h_mc0_mce; + fam_ops->mc1_mce = k8_mc1_mce; + fam_ops->mc2_mce = k8_mc2_mce; break; case 0x14: - nb_err_cpumask = 0x3; - fam_ops->dc_mce = f14h_dc_mce; - fam_ops->ic_mce = f14h_ic_mce; - fam_ops->nb_mce = nb_noop_mce; + fam_ops->mc0_mce = cat_mc0_mce; + fam_ops->mc1_mce = cat_mc1_mce; + fam_ops->mc2_mce = k8_mc2_mce; break; case 0x15: xec_mask = 0x1f; - fam_ops->dc_mce = f15h_dc_mce; - fam_ops->ic_mce = f15h_ic_mce; - fam_ops->nb_mce = f10h_nb_mce; + fam_ops->mc0_mce = f15h_mc0_mce; + fam_ops->mc1_mce = f15h_mc1_mce; + fam_ops->mc2_mce = f15h_mc2_mce; + break; + + case 0x16: + xec_mask = 0x1f; + fam_ops->mc0_mce = cat_mc0_mce; + fam_ops->mc1_mce = cat_mc1_mce; + fam_ops->mc2_mce = f16h_mc2_mce; break; default: - printk(KERN_WARNING "Huh? What family is that: %d?!\n", c->x86); + printk(KERN_WARNING "Huh? What family is it: 0x%x?!\n", c->x86); kfree(fam_ops); - return -EINVAL; + fam_ops = NULL; } pr_info("MCE: In-kernel MCE decoding enabled.\n"); - atomic_notifier_chain_register(&x86_mce_decoder_chain, &amd_mce_dec_nb); + mce_register_decode_chain(&amd_mce_dec_nb); return 0; } @@ -909,7 +887,7 @@ early_initcall(mce_amd_init); #ifdef MODULE static void __exit mce_amd_exit(void) { - atomic_notifier_chain_unregister(&x86_mce_decoder_chain, &amd_mce_dec_nb); + mce_unregister_decode_chain(&amd_mce_dec_nb); kfree(fam_ops); } |
