diff options
author | Mauro Carvalho Chehab <mchehab@redhat.com> | 2012-07-29 21:11:05 -0300 |
---|---|---|
committer | Mauro Carvalho Chehab <mchehab@redhat.com> | 2012-07-29 21:11:05 -0300 |
commit | c2078e4c9120e7b38b1a02cd9fc6dd4f792110bf (patch) | |
tree | a30b29c0bf8cf2288a32ceaeb75013cb0b5d5865 /drivers/edac/edac_mc.c | |
parent | 73bcc49959e4e40911dd0dd634bf1b353827df66 (diff) | |
parent | f58d0dee07fe6328f775669eb6aa3a123efad6c2 (diff) |
Merge branch 'devel'
* devel: (33 commits)
edac i5000, i5400: fix pointer math in i5000_get_mc_regs()
edac: allow specifying the error count with fake_inject
edac: add support for Calxeda highbank L2 cache ecc
edac: add support for Calxeda highbank memory controller
edac: create top-level debugfs directory
sb_edac: properly handle error count
i7core_edac: properly handle error count
edac: edac_mc_handle_error(): add an error_count parameter
edac: remove arch-specific parameter for the error handler
amd64_edac: Don't pass driver name as an error parameter
edac_mc: check for allocation failure in edac_mc_alloc()
edac: Increase version to 3.0.0
edac_mc: Cleanup per-dimm_info debug messages
edac: Convert debugfX to edac_dbg(X,
edac: Use more normal debugging macro style
edac: Don't add __func__ or __FILE__ for debugf[0-9] msgs
Edac: Add ABI Documentation for the new device nodes
edac: move documentation ABI to ABI/testing/sysfs-devices-edac
i7core_edac: change the mem allocation scheme to make Documentation/kobject.txt happy
edac: change the mem allocation scheme to make Documentation/kobject.txt happy
...
Diffstat (limited to 'drivers/edac/edac_mc.c')
-rw-r--r-- | drivers/edac/edac_mc.c | 395 |
1 files changed, 241 insertions, 154 deletions
diff --git a/drivers/edac/edac_mc.c b/drivers/edac/edac_mc.c index de5ba86e8b8..616d90bcb3a 100644 --- a/drivers/edac/edac_mc.c +++ b/drivers/edac/edac_mc.c @@ -27,70 +27,95 @@ #include <linux/list.h> #include <linux/ctype.h> #include <linux/edac.h> +#include <linux/bitops.h> #include <asm/uaccess.h> #include <asm/page.h> #include <asm/edac.h> #include "edac_core.h" #include "edac_module.h" +#define CREATE_TRACE_POINTS +#define TRACE_INCLUDE_PATH ../../include/ras +#include <ras/ras_event.h> + /* lock to memory controller's control array */ static DEFINE_MUTEX(mem_ctls_mutex); static LIST_HEAD(mc_devices); +unsigned edac_dimm_info_location(struct dimm_info *dimm, char *buf, + unsigned len) +{ + struct mem_ctl_info *mci = dimm->mci; + int i, n, count = 0; + char *p = buf; + + for (i = 0; i < mci->n_layers; i++) { + n = snprintf(p, len, "%s %d ", + edac_layer_name[mci->layers[i].type], + dimm->location[i]); + p += n; + len -= n; + count += n; + if (!len) + break; + } + + return count; +} + #ifdef CONFIG_EDAC_DEBUG static void edac_mc_dump_channel(struct rank_info *chan) { - debugf4("\tchannel = %p\n", chan); - debugf4("\tchannel->chan_idx = %d\n", chan->chan_idx); - debugf4("\tchannel->csrow = %p\n\n", chan->csrow); - debugf4("\tchannel->dimm = %p\n", chan->dimm); + edac_dbg(4, " channel->chan_idx = %d\n", chan->chan_idx); + edac_dbg(4, " channel = %p\n", chan); + edac_dbg(4, " channel->csrow = %p\n", chan->csrow); + edac_dbg(4, " channel->dimm = %p\n", chan->dimm); } -static void edac_mc_dump_dimm(struct dimm_info *dimm) +static void edac_mc_dump_dimm(struct dimm_info *dimm, int number) { - int i; - - debugf4("\tdimm = %p\n", dimm); - debugf4("\tdimm->label = '%s'\n", dimm->label); - debugf4("\tdimm->nr_pages = 0x%x\n", dimm->nr_pages); - debugf4("\tdimm location "); - for (i = 0; i < dimm->mci->n_layers; i++) { - printk(KERN_CONT "%d", dimm->location[i]); - if (i < dimm->mci->n_layers - 1) - printk(KERN_CONT "."); - } - printk(KERN_CONT "\n"); - debugf4("\tdimm->grain = %d\n", dimm->grain); - debugf4("\tdimm->nr_pages = 0x%x\n", dimm->nr_pages); + char location[80]; + + edac_dimm_info_location(dimm, location, sizeof(location)); + + edac_dbg(4, "%s%i: %smapped as virtual row %d, chan %d\n", + dimm->mci->mem_is_per_rank ? "rank" : "dimm", + number, location, dimm->csrow, dimm->cschannel); + edac_dbg(4, " dimm = %p\n", dimm); + edac_dbg(4, " dimm->label = '%s'\n", dimm->label); + edac_dbg(4, " dimm->nr_pages = 0x%x\n", dimm->nr_pages); + edac_dbg(4, " dimm->grain = %d\n", dimm->grain); + edac_dbg(4, " dimm->nr_pages = 0x%x\n", dimm->nr_pages); } static void edac_mc_dump_csrow(struct csrow_info *csrow) { - debugf4("\tcsrow = %p\n", csrow); - debugf4("\tcsrow->csrow_idx = %d\n", csrow->csrow_idx); - debugf4("\tcsrow->first_page = 0x%lx\n", csrow->first_page); - debugf4("\tcsrow->last_page = 0x%lx\n", csrow->last_page); - debugf4("\tcsrow->page_mask = 0x%lx\n", csrow->page_mask); - debugf4("\tcsrow->nr_channels = %d\n", csrow->nr_channels); - debugf4("\tcsrow->channels = %p\n", csrow->channels); - debugf4("\tcsrow->mci = %p\n\n", csrow->mci); + edac_dbg(4, "csrow->csrow_idx = %d\n", csrow->csrow_idx); + edac_dbg(4, " csrow = %p\n", csrow); + edac_dbg(4, " csrow->first_page = 0x%lx\n", csrow->first_page); + edac_dbg(4, " csrow->last_page = 0x%lx\n", csrow->last_page); + edac_dbg(4, " csrow->page_mask = 0x%lx\n", csrow->page_mask); + edac_dbg(4, " csrow->nr_channels = %d\n", csrow->nr_channels); + edac_dbg(4, " csrow->channels = %p\n", csrow->channels); + edac_dbg(4, " csrow->mci = %p\n", csrow->mci); } static void edac_mc_dump_mci(struct mem_ctl_info *mci) { - debugf3("\tmci = %p\n", mci); - debugf3("\tmci->mtype_cap = %lx\n", mci->mtype_cap); - debugf3("\tmci->edac_ctl_cap = %lx\n", mci->edac_ctl_cap); - debugf3("\tmci->edac_cap = %lx\n", mci->edac_cap); - debugf4("\tmci->edac_check = %p\n", mci->edac_check); - debugf3("\tmci->nr_csrows = %d, csrows = %p\n", - mci->nr_csrows, mci->csrows); - debugf3("\tmci->nr_dimms = %d, dimms = %p\n", - mci->tot_dimms, mci->dimms); - debugf3("\tdev = %p\n", mci->dev); - debugf3("\tmod_name:ctl_name = %s:%s\n", mci->mod_name, mci->ctl_name); - debugf3("\tpvt_info = %p\n\n", mci->pvt_info); + edac_dbg(3, "\tmci = %p\n", mci); + edac_dbg(3, "\tmci->mtype_cap = %lx\n", mci->mtype_cap); + edac_dbg(3, "\tmci->edac_ctl_cap = %lx\n", mci->edac_ctl_cap); + edac_dbg(3, "\tmci->edac_cap = %lx\n", mci->edac_cap); + edac_dbg(4, "\tmci->edac_check = %p\n", mci->edac_check); + edac_dbg(3, "\tmci->nr_csrows = %d, csrows = %p\n", + mci->nr_csrows, mci->csrows); + edac_dbg(3, "\tmci->nr_dimms = %d, dimms = %p\n", + mci->tot_dimms, mci->dimms); + edac_dbg(3, "\tdev = %p\n", mci->pdev); + edac_dbg(3, "\tmod_name:ctl_name = %s:%s\n", + mci->mod_name, mci->ctl_name); + edac_dbg(3, "\tpvt_info = %p\n\n", mci->pvt_info); } #endif /* CONFIG_EDAC_DEBUG */ @@ -205,15 +230,15 @@ struct mem_ctl_info *edac_mc_alloc(unsigned mc_num, { struct mem_ctl_info *mci; struct edac_mc_layer *layer; - struct csrow_info *csi, *csr; - struct rank_info *chi, *chp, *chan; + struct csrow_info *csr; + struct rank_info *chan; struct dimm_info *dimm; u32 *ce_per_layer[EDAC_MAX_LAYERS], *ue_per_layer[EDAC_MAX_LAYERS]; unsigned pos[EDAC_MAX_LAYERS]; unsigned size, tot_dimms = 1, count = 1; unsigned tot_csrows = 1, tot_channels = 1, tot_errcount = 0; void *pvt, *p, *ptr = NULL; - int i, j, err, row, chn, n, len; + int i, j, row, chn, n, len, off; bool per_rank = false; BUG_ON(n_layers > EDAC_MAX_LAYERS || n_layers == 0); @@ -239,26 +264,24 @@ struct mem_ctl_info *edac_mc_alloc(unsigned mc_num, */ mci = edac_align_ptr(&ptr, sizeof(*mci), 1); layer = edac_align_ptr(&ptr, sizeof(*layer), n_layers); - csi = edac_align_ptr(&ptr, sizeof(*csi), tot_csrows); - chi = edac_align_ptr(&ptr, sizeof(*chi), tot_csrows * tot_channels); - dimm = edac_align_ptr(&ptr, sizeof(*dimm), tot_dimms); for (i = 0; i < n_layers; i++) { count *= layers[i].size; - debugf4("%s: errcount layer %d size %d\n", __func__, i, count); + edac_dbg(4, "errcount layer %d size %d\n", i, count); ce_per_layer[i] = edac_align_ptr(&ptr, sizeof(u32), count); ue_per_layer[i] = edac_align_ptr(&ptr, sizeof(u32), count); tot_errcount += 2 * count; } - debugf4("%s: allocating %d error counters\n", __func__, tot_errcount); + edac_dbg(4, "allocating %d error counters\n", tot_errcount); pvt = edac_align_ptr(&ptr, sz_pvt, 1); size = ((unsigned long)pvt) + sz_pvt; - debugf1("%s(): allocating %u bytes for mci data (%d %s, %d csrows/channels)\n", - __func__, size, - tot_dimms, - per_rank ? "ranks" : "dimms", - tot_csrows * tot_channels); + edac_dbg(1, "allocating %u bytes for mci data (%d %s, %d csrows/channels)\n", + size, + tot_dimms, + per_rank ? "ranks" : "dimms", + tot_csrows * tot_channels); + mci = kzalloc(size, GFP_KERNEL); if (mci == NULL) return NULL; @@ -267,9 +290,6 @@ struct mem_ctl_info *edac_mc_alloc(unsigned mc_num, * rather than an imaginary chunk of memory located at address 0. */ layer = (struct edac_mc_layer *)(((char *)mci) + ((unsigned long)layer)); - csi = (struct csrow_info *)(((char *)mci) + ((unsigned long)csi)); - chi = (struct rank_info *)(((char *)mci) + ((unsigned long)chi)); - dimm = (struct dimm_info *)(((char *)mci) + ((unsigned long)dimm)); for (i = 0; i < n_layers; i++) { mci->ce_per_layer[i] = (u32 *)((char *)mci + ((unsigned long)ce_per_layer[i])); mci->ue_per_layer[i] = (u32 *)((char *)mci + ((unsigned long)ue_per_layer[i])); @@ -278,8 +298,6 @@ struct mem_ctl_info *edac_mc_alloc(unsigned mc_num, /* setup index and various internal pointers */ mci->mc_idx = mc_num; - mci->csrows = csi; - mci->dimms = dimm; mci->tot_dimms = tot_dimms; mci->pvt_info = pvt; mci->n_layers = n_layers; @@ -290,40 +308,57 @@ struct mem_ctl_info *edac_mc_alloc(unsigned mc_num, mci->mem_is_per_rank = per_rank; /* - * Fill the csrow struct + * Alocate and fill the csrow/channels structs */ + mci->csrows = kcalloc(sizeof(*mci->csrows), tot_csrows, GFP_KERNEL); + if (!mci->csrows) + goto error; for (row = 0; row < tot_csrows; row++) { - csr = &csi[row]; + csr = kzalloc(sizeof(**mci->csrows), GFP_KERNEL); + if (!csr) + goto error; + mci->csrows[row] = csr; csr->csrow_idx = row; csr->mci = mci; csr->nr_channels = tot_channels; - chp = &chi[row * tot_channels]; - csr->channels = chp; + csr->channels = kcalloc(sizeof(*csr->channels), tot_channels, + GFP_KERNEL); + if (!csr->channels) + goto error; for (chn = 0; chn < tot_channels; chn++) { - chan = &chp[chn]; + chan = kzalloc(sizeof(**csr->channels), GFP_KERNEL); + if (!chan) + goto error; + csr->channels[chn] = chan; chan->chan_idx = chn; chan->csrow = csr; } } /* - * Fill the dimm struct + * Allocate and fill the dimm structs */ + mci->dimms = kcalloc(sizeof(*mci->dimms), tot_dimms, GFP_KERNEL); + if (!mci->dimms) + goto error; + memset(&pos, 0, sizeof(pos)); row = 0; chn = 0; - debugf4("%s: initializing %d %s\n", __func__, tot_dimms, - per_rank ? "ranks" : "dimms"); for (i = 0; i < tot_dimms; i++) { - chan = &csi[row].channels[chn]; - dimm = EDAC_DIMM_PTR(layer, mci->dimms, n_layers, - pos[0], pos[1], pos[2]); - dimm->mci = mci; + chan = mci->csrows[row]->channels[chn]; + off = EDAC_DIMM_OFF(layer, n_layers, pos[0], pos[1], pos[2]); + if (off < 0 || off >= tot_dimms) { + edac_mc_printk(mci, KERN_ERR, "EDAC core bug: EDAC_DIMM_OFF is trying to do an illegal data access\n"); + goto error; + } - debugf2("%s: %d: %s%zd (%d:%d:%d): row %d, chan %d\n", __func__, - i, per_rank ? "rank" : "dimm", (dimm - mci->dimms), - pos[0], pos[1], pos[2], row, chn); + dimm = kzalloc(sizeof(**mci->dimms), GFP_KERNEL); + if (!dimm) + goto error; + mci->dimms[off] = dimm; + dimm->mci = mci; /* * Copy DIMM location and initialize it. @@ -367,16 +402,6 @@ struct mem_ctl_info *edac_mc_alloc(unsigned mc_num, } mci->op_state = OP_ALLOC; - INIT_LIST_HEAD(&mci->grp_kobj_list); - - /* - * Initialize the 'root' kobj for the edac_mc controller - */ - err = edac_mc_register_sysfs_main_kobj(mci); - if (err) { - kfree(mci); - return NULL; - } /* at this point, the root kobj is valid, and in order to * 'free' the object, then the function: @@ -384,7 +409,30 @@ struct mem_ctl_info *edac_mc_alloc(unsigned mc_num, * which will perform kobj unregistration and the actual free * will occur during the kobject callback operation */ + return mci; + +error: + if (mci->dimms) { + for (i = 0; i < tot_dimms; i++) + kfree(mci->dimms[i]); + kfree(mci->dimms); + } + if (mci->csrows) { + for (chn = 0; chn < tot_channels; chn++) { + csr = mci->csrows[chn]; + if (csr) { + for (chn = 0; chn < tot_channels; chn++) + kfree(csr->channels[chn]); + kfree(csr); + } + kfree(mci->csrows[i]); + } + kfree(mci->csrows); + } + kfree(mci); + + return NULL; } EXPORT_SYMBOL_GPL(edac_mc_alloc); @@ -395,12 +443,10 @@ EXPORT_SYMBOL_GPL(edac_mc_alloc); */ void edac_mc_free(struct mem_ctl_info *mci) { - debugf1("%s()\n", __func__); + edac_dbg(1, "\n"); - edac_mc_unregister_sysfs_main_kobj(mci); - - /* free the mci instance memory here */ - kfree(mci); + /* the mci instance is freed here, when the sysfs object is dropped */ + edac_unregister_sysfs(mci); } EXPORT_SYMBOL_GPL(edac_mc_free); @@ -417,12 +463,12 @@ struct mem_ctl_info *find_mci_by_dev(struct device *dev) struct mem_ctl_info *mci; struct list_head *item; - debugf3("%s()\n", __func__); + edac_dbg(3, "\n"); list_for_each(item, &mc_devices) { mci = list_entry(item, struct mem_ctl_info, link); - if (mci->dev == dev) + if (mci->pdev == dev) return mci; } @@ -485,7 +531,7 @@ static void edac_mc_workq_function(struct work_struct *work_req) */ static void edac_mc_workq_setup(struct mem_ctl_info *mci, unsigned msec) { - debugf0("%s()\n", __func__); + edac_dbg(0, "\n"); /* if this instance is not in the POLL state, then simply return */ if (mci->op_state != OP_RUNNING_POLL) @@ -512,8 +558,7 @@ static void edac_mc_workq_teardown(struct mem_ctl_info *mci) status = cancel_delayed_work(&mci->work); if (status == 0) { - debugf0("%s() not canceled, flush the queue\n", - __func__); + edac_dbg(0, "not canceled, flush the queue\n"); /* workq instance might be running, wait for it */ flush_workqueue(edac_workqueue); @@ -574,7 +619,7 @@ static int add_mc_to_global_list(struct mem_ctl_info *mci) insert_before = &mc_devices; - p = find_mci_by_dev(mci->dev); + p = find_mci_by_dev(mci->pdev); if (unlikely(p != NULL)) goto fail0; @@ -596,7 +641,7 @@ static int add_mc_to_global_list(struct mem_ctl_info *mci) fail0: edac_printk(KERN_WARNING, EDAC_MC, - "%s (%s) %s %s already assigned %d\n", dev_name(p->dev), + "%s (%s) %s %s already assigned %d\n", dev_name(p->pdev), edac_dev_name(mci), p->mod_name, p->ctl_name, p->mc_idx); return 1; @@ -660,7 +705,7 @@ EXPORT_SYMBOL(edac_mc_find); /* FIXME - should a warning be printed if no error detection? correction? */ int edac_mc_add_mc(struct mem_ctl_info *mci) { - debugf0("%s()\n", __func__); + edac_dbg(0, "\n"); #ifdef CONFIG_EDAC_DEBUG if (edac_debug_level >= 3) @@ -670,15 +715,22 @@ int edac_mc_add_mc(struct mem_ctl_info *mci) int i; for (i = 0; i < mci->nr_csrows; i++) { + struct csrow_info *csrow = mci->csrows[i]; + u32 nr_pages = 0; int j; - edac_mc_dump_csrow(&mci->csrows[i]); - for (j = 0; j < mci->csrows[i].nr_channels; j++) - edac_mc_dump_channel(&mci->csrows[i]. - channels[j]); + for (j = 0; j < csrow->nr_channels; j++) + nr_pages += csrow->channels[j]->dimm->nr_pages; + if (!nr_pages) + continue; + edac_mc_dump_csrow(csrow); + for (j = 0; j < csrow->nr_channels; j++) + if (csrow->channels[j]->dimm->nr_pages) + edac_mc_dump_channel(csrow->channels[j]); } for (i = 0; i < mci->tot_dimms; i++) - edac_mc_dump_dimm(&mci->dimms[i]); + if (mci->dimms[i]->nr_pages) + edac_mc_dump_dimm(mci->dimms[i], i); } #endif mutex_lock(&mem_ctls_mutex); @@ -732,7 +784,7 @@ struct mem_ctl_info *edac_mc_del_mc(struct device *dev) { struct mem_ctl_info *mci; - debugf0("%s()\n", __func__); + edac_dbg(0, "\n"); mutex_lock(&mem_ctls_mutex); @@ -770,7 +822,7 @@ static void edac_mc_scrub_block(unsigned long page, unsigned long offset, void *virt_addr; unsigned long flags = 0; - debugf3("%s()\n", __func__); + edac_dbg(3, "\n"); /* ECC error page was not in our memory. Ignore it. */ if (!pfn_valid(page)) @@ -797,26 +849,26 @@ static void edac_mc_scrub_block(unsigned long page, unsigned long offset, /* FIXME - should return -1 */ int edac_mc_find_csrow_by_page(struct mem_ctl_info *mci, unsigned long page) { - struct csrow_info *csrows = mci->csrows; + struct csrow_info **csrows = mci->csrows; int row, i, j, n; - debugf1("MC%d: %s(): 0x%lx\n", mci->mc_idx, __func__, page); + edac_dbg(1, "MC%d: 0x%lx\n", mci->mc_idx, page); row = -1; for (i = 0; i < mci->nr_csrows; i++) { - struct csrow_info *csrow = &csrows[i]; + struct csrow_info *csrow = csrows[i]; n = 0; for (j = 0; j < csrow->nr_channels; j++) { - struct dimm_info *dimm = csrow->channels[j].dimm; + struct dimm_info *dimm = csrow->channels[j]->dimm; n += dimm->nr_pages; } if (n == 0) continue; - debugf3("MC%d: %s(): first(0x%lx) page(0x%lx) last(0x%lx) " - "mask(0x%lx)\n", mci->mc_idx, __func__, - csrow->first_page, page, csrow->last_page, - csrow->page_mask); + edac_dbg(3, "MC%d: first(0x%lx) page(0x%lx) last(0x%lx) mask(0x%lx)\n", + mci->mc_idx, + csrow->first_page, page, csrow->last_page, + csrow->page_mask); if ((page >= csrow->first_page) && (page <= csrow->last_page) && @@ -845,15 +897,16 @@ const char *edac_layer_name[] = { EXPORT_SYMBOL_GPL(edac_layer_name); static void edac_inc_ce_error(struct mem_ctl_info *mci, - bool enable_per_layer_report, - const int pos[EDAC_MAX_LAYERS]) + bool enable_per_layer_report, + const int pos[EDAC_MAX_LAYERS], + const u16 count) { int i, index = 0; - mci->ce_mc++; + mci->ce_mc += count; if (!enable_per_layer_report) { - mci->ce_noinfo_count++; + mci->ce_noinfo_count += count; return; } @@ -861,7 +914,7 @@ static void edac_inc_ce_error(struct mem_ctl_info *mci, if (pos[i] < 0) break; index += pos[i]; - mci->ce_per_layer[i][index]++; + mci->ce_per_layer[i][index] += count; if (i < mci->n_layers - 1) index *= mci->layers[i + 1].size; @@ -870,14 +923,15 @@ static void edac_inc_ce_error(struct mem_ctl_info *mci, static void edac_inc_ue_error(struct mem_ctl_info *mci, bool enable_per_layer_report, - const int pos[EDAC_MAX_LAYERS]) + const int pos[EDAC_MAX_LAYERS], + const u16 count) { int i, index = 0; - mci->ue_mc++; + mci->ue_mc += count; if (!enable_per_layer_report) { - mci->ce_noinfo_count++; + mci->ce_noinfo_count += count; return; } @@ -885,7 +939,7 @@ static void edac_inc_ue_error(struct mem_ctl_info *mci, if (pos[i] < 0) break; index += pos[i]; - mci->ue_per_layer[i][index]++; + mci->ue_per_layer[i][index] += count; if (i < mci->n_layers - 1) index *= mci->layers[i + 1].size; @@ -893,6 +947,7 @@ static void edac_inc_ue_error(struct mem_ctl_info *mci, } static void edac_ce_error(struct mem_ctl_info *mci, + const u16 error_count, const int pos[EDAC_MAX_LAYERS], const char *msg, const char *location, @@ -902,23 +957,25 @@ static void edac_ce_error(struct mem_ctl_info *mci, const bool enable_per_layer_report, const unsigned long page_frame_number, const unsigned long offset_in_page, - u32 grain) + long grain) { unsigned long remapped_page; if (edac_mc_get_log_ce()) { if (other_detail && *other_detail) edac_mc_printk(mci, KERN_WARNING, - "CE %s on %s (%s%s - %s)\n", + "%d CE %s on %s (%s %s - %s)\n", + error_count, msg, label, location, detail, other_detail); else edac_mc_printk(mci, KERN_WARNING, - "CE %s on %s (%s%s)\n", + "%d CE %s on %s (%s %s)\n", + error_count, msg, label, location, detail); } - edac_inc_ce_error(mci, enable_per_layer_report, pos); + edac_inc_ce_error(mci, enable_per_layer_report, pos, error_count); if (mci->scrub_mode & SCRUB_SW_SRC) { /* @@ -942,6 +999,7 @@ static void edac_ce_error(struct mem_ctl_info *mci, } static void edac_ue_error(struct mem_ctl_info *mci, + const u16 error_count, const int pos[EDAC_MAX_LAYERS], const char *msg, const char *location, @@ -953,12 +1011,14 @@ static void edac_ue_error(struct mem_ctl_info *mci, if (edac_mc_get_log_ue()) { if (other_detail && *other_detail) edac_mc_printk(mci, KERN_WARNING, - "UE %s on %s (%s%s - %s)\n", + "%d UE %s on %s (%s %s - %s)\n", + error_count, msg, label, location, detail, other_detail); else edac_mc_printk(mci, KERN_WARNING, - "UE %s on %s (%s%s)\n", + "%d UE %s on %s (%s %s)\n", + error_count, msg, label, location, detail); } @@ -971,33 +1031,53 @@ static void edac_ue_error(struct mem_ctl_info *mci, msg, label, location, detail); } - edac_inc_ue_error(mci, enable_per_layer_report, pos); + edac_inc_ue_error(mci, enable_per_layer_report, pos, error_count); } #define OTHER_LABEL " or " + +/** + * edac_mc_handle_error - reports a memory event to userspace + * + * @type: severity of the error (CE/UE/Fatal) + * @mci: a struct mem_ctl_info pointer + * @error_count: Number of errors of the same type + * @page_frame_number: mem page where the error occurred + * @offset_in_page: offset of the error inside the page + * @syndrome: ECC syndrome + * @top_layer: Memory layer[0] position + * @mid_layer: Memory layer[1] position + * @low_layer: Memory layer[2] position + * @msg: Message meaningful to the end users that + * explains the event + * @other_detail: Technical details about the event that + * may help hardware manufacturers and + * EDAC developers to analyse the event + */ void edac_mc_handle_error(const enum hw_event_mc_err_type type, struct mem_ctl_info *mci, + const u16 error_count, const unsigned long page_frame_number, const unsigned long offset_in_page, const unsigned long syndrome, - const int layer0, - const int layer1, - const int layer2, + const int top_layer, + const int mid_layer, + const int low_layer, const char *msg, - const char *other_detail, - const void *mcelog) + const char *other_detail) { /* FIXME: too much for stack: move it to some pre-alocated area */ char detail[80], location[80]; char label[(EDAC_MC_LABEL_LEN + 1 + sizeof(OTHER_LABEL)) * mci->tot_dimms]; char *p; int row = -1, chan = -1; - int pos[EDAC_MAX_LAYERS] = { layer0, layer1, layer2 }; + int pos[EDAC_MAX_LAYERS] = { top_layer, mid_layer, low_layer }; int i; - u32 grain; + long grain; bool enable_per_layer_report = false; + u8 grain_bits; - debugf3("MC%d: %s()\n", mci->mc_idx, __func__); + edac_dbg(3, "MC%d\n", mci->mc_idx); /* * Check if the event report is consistent and if the memory @@ -1043,13 +1123,13 @@ void edac_mc_handle_error(const enum hw_event_mc_err_type type, p = label; *p = '\0'; for (i = 0; i < mci->tot_dimms; i++) { - struct dimm_info *dimm = &mci->dimms[i]; + struct dimm_info *dimm = mci->dimms[i]; - if (layer0 >= 0 && layer0 != dimm->location[0]) + if (top_layer >= 0 && top_layer != dimm->location[0]) continue; - if (layer1 >= 0 && layer1 != dimm->location[1]) + if (mid_layer >= 0 && mid_layer != dimm->location[1]) continue; - if (layer2 >= 0 && layer2 != dimm->location[2]) + if (low_layer >= 0 && low_layer != dimm->location[2]) continue; /* get the max grain, over the error match range */ @@ -1075,11 +1155,9 @@ void edac_mc_handle_error(const enum hw_event_mc_err_type type, * get csrow/channel of the DIMM, in order to allow * incrementing the compat API counters */ - debugf4("%s: %s csrows map: (%d,%d)\n", - __func__, - mci->mem_is_per_rank ? "rank" : "dimm", - dimm->csrow, dimm->cschannel); - + edac_dbg(4, "%s csrows map: (%d,%d)\n", + mci->mem_is_per_rank ? "rank" : "dimm", + dimm->csrow, dimm->cschannel); if (row == -1) row = dimm->csrow; else if (row >= 0 && row != dimm->csrow) @@ -1095,19 +1173,18 @@ void edac_mc_handle_error(const enum hw_event_mc_err_type type, if (!enable_per_layer_report) { strcpy(label, "any memory"); } else { - debugf4("%s: csrow/channel to increment: (%d,%d)\n", - __func__, row, chan); + edac_dbg(4, "csrow/channel to increment: (%d,%d)\n", row, chan); if (p == label) strcpy(label, "unknown memory"); if (type == HW_EVENT_ERR_CORRECTED) { if (row >= 0) { - mci->csrows[row].ce_count++; + mci->csrows[row]->ce_count += error_count; if (chan >= 0) - mci->csrows[row].channels[chan].ce_count++; + mci->csrows[row]->channels[chan]->ce_count += error_count; } } else if (row >= 0) - mci->csrows[row].ue_count++; + mci->csrows[row]->ue_count += error_count; } /* Fill the RAM location data */ @@ -1120,23 +1197,33 @@ void edac_mc_handle_error(const enum hw_event_mc_err_type type, edac_layer_name[mci->layers[i].type], pos[i]); } + if (p > location) + *(p - 1) = '\0'; + + /* Report the error via the trace interface */ + + grain_bits = fls_long(grain) + 1; + trace_mc_event(type, msg, label, error_count, + mci->mc_idx, top_layer, mid_layer, low_layer, + PAGES_TO_MiB(page_frame_number) | offset_in_page, + grain_bits, syndrome, other_detail); /* Memory type dependent details about the error */ if (type == HW_EVENT_ERR_CORRECTED) { snprintf(detail, sizeof(detail), - "page:0x%lx offset:0x%lx grain:%d syndrome:0x%lx", + "page:0x%lx offset:0x%lx grain:%ld syndrome:0x%lx", page_frame_number, offset_in_page, grain, syndrome); - edac_ce_error(mci, pos, msg, location, label, detail, - other_detail, enable_per_layer_report, + edac_ce_error(mci, error_count, pos, msg, location, label, + detail, other_detail, enable_per_layer_report, page_frame_number, offset_in_page, grain); } else { snprintf(detail, sizeof(detail), - "page:0x%lx offset:0x%lx grain:%d", + "page:0x%lx offset:0x%lx grain:%ld", page_frame_number, offset_in_page, grain); - edac_ue_error(mci, pos, msg, location, label, detail, - other_detail, enable_per_layer_report); + edac_ue_error(mci, error_count, pos, msg, location, label, + detail, other_detail, enable_per_layer_report); } } EXPORT_SYMBOL_GPL(edac_mc_handle_error); |