aboutsummaryrefslogtreecommitdiff
path: root/drivers/acpi/apei
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/acpi/apei')
-rw-r--r--drivers/acpi/apei/Kconfig23
-rw-r--r--drivers/acpi/apei/Makefile2
-rw-r--r--drivers/acpi/apei/apei-base.c227
-rw-r--r--drivers/acpi/apei/apei-internal.h39
-rw-r--r--drivers/acpi/apei/cper.c84
-rw-r--r--drivers/acpi/apei/einj.c395
-rw-r--r--drivers/acpi/apei/erst-dbg.c41
-rw-r--r--drivers/acpi/apei/erst.c533
-rw-r--r--drivers/acpi/apei/ghes.c960
-rw-r--r--drivers/acpi/apei/hest.c100
10 files changed, 1970 insertions, 434 deletions
diff --git a/drivers/acpi/apei/Kconfig b/drivers/acpi/apei/Kconfig
index fca34ccfd29..c4dac715096 100644
--- a/drivers/acpi/apei/Kconfig
+++ b/drivers/acpi/apei/Kconfig
@@ -1,5 +1,8 @@
config ACPI_APEI
bool "ACPI Platform Error Interface (APEI)"
+ select MISC_FILESYSTEMS
+ select PSTORE
+ select UEFI_CPER
depends on X86
help
APEI allows to report errors (for example from the chipset)
@@ -8,9 +11,11 @@ config ACPI_APEI
error injection.
config ACPI_APEI_GHES
- tristate "APEI Generic Hardware Error Source"
- depends on ACPI_APEI && X86
+ bool "APEI Generic Hardware Error Source"
+ depends on ACPI_APEI
select ACPI_HED
+ select IRQ_WORK
+ select GENERIC_ALLOCATOR
help
Generic Hardware Error Source provides a way to report
platform hardware errors (such as that from chipset). It
@@ -21,6 +26,20 @@ config ACPI_APEI_GHES
by firmware to produce more valuable hardware error
information for Linux.
+config ACPI_APEI_PCIEAER
+ bool "APEI PCIe AER logging/recovering support"
+ depends on ACPI_APEI && PCIEAER
+ help
+ PCIe AER errors may be reported via APEI firmware first mode.
+ Turn on this option to enable the corresponding support.
+
+config ACPI_APEI_MEMORY_FAILURE
+ bool "APEI memory error recovering support"
+ depends on ACPI_APEI && MEMORY_FAILURE
+ help
+ Memory errors may be reported via APEI firmware first mode.
+ Turn on this option to enable the memory recovering support.
+
config ACPI_APEI_EINJ
tristate "APEI Error INJection (EINJ)"
depends on ACPI_APEI && DEBUG_FS
diff --git a/drivers/acpi/apei/Makefile b/drivers/acpi/apei/Makefile
index d1d1bc0a4ee..5d575a95594 100644
--- a/drivers/acpi/apei/Makefile
+++ b/drivers/acpi/apei/Makefile
@@ -3,4 +3,4 @@ obj-$(CONFIG_ACPI_APEI_GHES) += ghes.o
obj-$(CONFIG_ACPI_APEI_EINJ) += einj.o
obj-$(CONFIG_ACPI_APEI_ERST_DEBUG) += erst-dbg.o
-apei-y := apei-base.o hest.o cper.o erst.o
+apei-y := apei-base.o hest.o erst.o
diff --git a/drivers/acpi/apei/apei-base.c b/drivers/acpi/apei/apei-base.c
index 4a904a4bf05..8678dfe5366 100644
--- a/drivers/acpi/apei/apei-base.c
+++ b/drivers/acpi/apei/apei-base.c
@@ -40,7 +40,7 @@
#include <linux/rculist.h>
#include <linux/interrupt.h>
#include <linux/debugfs.h>
-#include <acpi/atomicio.h>
+#include <asm/unaligned.h>
#include "apei-internal.h"
@@ -70,7 +70,7 @@ int __apei_exec_read_register(struct acpi_whea_header *entry, u64 *val)
{
int rc;
- rc = acpi_atomic_read(val, &entry->register_region);
+ rc = apei_read(val, &entry->register_region);
if (rc)
return rc;
*val >>= entry->register_region.bit_offset;
@@ -116,13 +116,13 @@ int __apei_exec_write_register(struct acpi_whea_header *entry, u64 val)
val <<= entry->register_region.bit_offset;
if (entry->flags & APEI_EXEC_PRESERVE_REGISTER) {
u64 valr = 0;
- rc = acpi_atomic_read(&valr, &entry->register_region);
+ rc = apei_read(&valr, &entry->register_region);
if (rc)
return rc;
valr &= ~(entry->mask << entry->register_region.bit_offset);
val |= valr;
}
- rc = acpi_atomic_write(val, &entry->register_region);
+ rc = apei_write(val, &entry->register_region);
return rc;
}
@@ -157,9 +157,10 @@ EXPORT_SYMBOL_GPL(apei_exec_noop);
* Interpret the specified action. Go through whole action table,
* execute all instructions belong to the action.
*/
-int apei_exec_run(struct apei_exec_context *ctx, u8 action)
+int __apei_exec_run(struct apei_exec_context *ctx, u8 action,
+ bool optional)
{
- int rc;
+ int rc = -ENOENT;
u32 i, ip;
struct acpi_whea_header *entry;
apei_exec_ins_func_t run;
@@ -198,9 +199,9 @@ rewind:
goto rewind;
}
- return 0;
+ return !optional && rc < 0 ? rc : 0;
}
-EXPORT_SYMBOL_GPL(apei_exec_run);
+EXPORT_SYMBOL_GPL(__apei_exec_run);
typedef int (*apei_exec_entry_func_t)(struct apei_exec_context *ctx,
struct acpi_whea_header *entry,
@@ -242,7 +243,7 @@ static int pre_map_gar_callback(struct apei_exec_context *ctx,
u8 ins = entry->instruction;
if (ctx->ins_table[ins].flags & APEI_EXEC_INS_ACCESS_REGISTER)
- return acpi_pre_map_gar(&entry->register_region);
+ return apei_map_generic_address(&entry->register_region);
return 0;
}
@@ -275,7 +276,7 @@ static int post_unmap_gar_callback(struct apei_exec_context *ctx,
u8 ins = entry->instruction;
if (ctx->ins_table[ins].flags & APEI_EXEC_INS_ACCESS_REGISTER)
- acpi_post_unmap_gar(&entry->register_region);
+ apei_unmap_generic_address(&entry->register_region);
return 0;
}
@@ -420,6 +421,17 @@ static int apei_resources_merge(struct apei_resources *resources1,
return 0;
}
+int apei_resources_add(struct apei_resources *resources,
+ unsigned long start, unsigned long size,
+ bool iomem)
+{
+ if (iomem)
+ return apei_res_add(&resources->iomem, start, size);
+ else
+ return apei_res_add(&resources->ioport, start, size);
+}
+EXPORT_SYMBOL_GPL(apei_resources_add);
+
/*
* EINJ has two groups of GARs (EINJ table entry and trigger table
* entry), so common resources are subtracted from the trigger table
@@ -437,8 +449,19 @@ int apei_resources_sub(struct apei_resources *resources1,
}
EXPORT_SYMBOL_GPL(apei_resources_sub);
+static int apei_get_nvs_callback(__u64 start, __u64 size, void *data)
+{
+ struct apei_resources *resources = data;
+ return apei_res_add(&resources->iomem, start, size);
+}
+
+static int apei_get_nvs_resources(struct apei_resources *resources)
+{
+ return acpi_nvs_for_each_region(apei_get_nvs_callback, resources);
+}
+
/*
- * IO memory/port rersource management mechanism is used to check
+ * IO memory/port resource management mechanism is used to check
* whether memory/port area used by GARs conflicts with normal memory
* or IO memory/port of devices.
*/
@@ -447,21 +470,35 @@ int apei_resources_request(struct apei_resources *resources,
{
struct apei_res *res, *res_bak = NULL;
struct resource *r;
+ struct apei_resources nvs_resources;
int rc;
rc = apei_resources_sub(resources, &apei_resources_all);
if (rc)
return rc;
+ /*
+ * Some firmware uses ACPI NVS region, that has been marked as
+ * busy, so exclude it from APEI resources to avoid false
+ * conflict.
+ */
+ apei_resources_init(&nvs_resources);
+ rc = apei_get_nvs_resources(&nvs_resources);
+ if (rc)
+ goto res_fini;
+ rc = apei_resources_sub(resources, &nvs_resources);
+ if (rc)
+ goto res_fini;
+
rc = -EINVAL;
list_for_each_entry(res, &resources->iomem, list) {
r = request_mem_region(res->start, res->end - res->start,
desc);
if (!r) {
pr_err(APEI_PFX
- "Can not request iomem region <%016llx-%016llx> for GARs.\n",
+ "Can not request [mem %#010llx-%#010llx] for %s registers\n",
(unsigned long long)res->start,
- (unsigned long long)res->end);
+ (unsigned long long)res->end - 1, desc);
res_bak = res;
goto err_unmap_iomem;
}
@@ -471,9 +508,9 @@ int apei_resources_request(struct apei_resources *resources,
r = request_region(res->start, res->end - res->start, desc);
if (!r) {
pr_err(APEI_PFX
- "Can not request ioport region <%016llx-%016llx> for GARs.\n",
+ "Can not request [io %#06llx-%#06llx] for %s registers\n",
(unsigned long long)res->start,
- (unsigned long long)res->end);
+ (unsigned long long)res->end - 1, desc);
res_bak = res;
goto err_unmap_ioport;
}
@@ -499,6 +536,8 @@ err_unmap_iomem:
break;
release_mem_region(res->start, res->end - res->start);
}
+res_fini:
+ apei_resources_fini(&nvs_resources);
return rc;
}
EXPORT_SYMBOL_GPL(apei_resources_request);
@@ -519,38 +558,139 @@ void apei_resources_release(struct apei_resources *resources)
}
EXPORT_SYMBOL_GPL(apei_resources_release);
-static int apei_check_gar(struct acpi_generic_address *reg, u64 *paddr)
+static int apei_check_gar(struct acpi_generic_address *reg, u64 *paddr,
+ u32 *access_bit_width)
{
- u32 width, space_id;
+ u32 bit_width, bit_offset, access_size_code, space_id;
- width = reg->bit_width;
+ bit_width = reg->bit_width;
+ bit_offset = reg->bit_offset;
+ access_size_code = reg->access_width;
space_id = reg->space_id;
- /* Handle possible alignment issues */
- memcpy(paddr, &reg->address, sizeof(*paddr));
+ *paddr = get_unaligned(&reg->address);
if (!*paddr) {
pr_warning(FW_BUG APEI_PFX
- "Invalid physical address in GAR [0x%llx/%u/%u]\n",
- *paddr, width, space_id);
+ "Invalid physical address in GAR [0x%llx/%u/%u/%u/%u]\n",
+ *paddr, bit_width, bit_offset, access_size_code,
+ space_id);
return -EINVAL;
}
- if ((width != 8) && (width != 16) && (width != 32) && (width != 64)) {
+ if (access_size_code < 1 || access_size_code > 4) {
pr_warning(FW_BUG APEI_PFX
- "Invalid bit width in GAR [0x%llx/%u/%u]\n",
- *paddr, width, space_id);
+ "Invalid access size code in GAR [0x%llx/%u/%u/%u/%u]\n",
+ *paddr, bit_width, bit_offset, access_size_code,
+ space_id);
+ return -EINVAL;
+ }
+ *access_bit_width = 1UL << (access_size_code + 2);
+
+ /* Fixup common BIOS bug */
+ if (bit_width == 32 && bit_offset == 0 && (*paddr & 0x03) == 0 &&
+ *access_bit_width < 32)
+ *access_bit_width = 32;
+ else if (bit_width == 64 && bit_offset == 0 && (*paddr & 0x07) == 0 &&
+ *access_bit_width < 64)
+ *access_bit_width = 64;
+
+ if ((bit_width + bit_offset) > *access_bit_width) {
+ pr_warning(FW_BUG APEI_PFX
+ "Invalid bit width + offset in GAR [0x%llx/%u/%u/%u/%u]\n",
+ *paddr, bit_width, bit_offset, access_size_code,
+ space_id);
return -EINVAL;
}
if (space_id != ACPI_ADR_SPACE_SYSTEM_MEMORY &&
space_id != ACPI_ADR_SPACE_SYSTEM_IO) {
pr_warning(FW_BUG APEI_PFX
- "Invalid address space type in GAR [0x%llx/%u/%u]\n",
- *paddr, width, space_id);
+ "Invalid address space type in GAR [0x%llx/%u/%u/%u/%u]\n",
+ *paddr, bit_width, bit_offset, access_size_code,
+ space_id);
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+int apei_map_generic_address(struct acpi_generic_address *reg)
+{
+ int rc;
+ u32 access_bit_width;
+ u64 address;
+
+ rc = apei_check_gar(reg, &address, &access_bit_width);
+ if (rc)
+ return rc;
+ return acpi_os_map_generic_address(reg);
+}
+EXPORT_SYMBOL_GPL(apei_map_generic_address);
+
+/* read GAR in interrupt (including NMI) or process context */
+int apei_read(u64 *val, struct acpi_generic_address *reg)
+{
+ int rc;
+ u32 access_bit_width;
+ u64 address;
+ acpi_status status;
+
+ rc = apei_check_gar(reg, &address, &access_bit_width);
+ if (rc)
+ return rc;
+
+ *val = 0;
+ switch(reg->space_id) {
+ case ACPI_ADR_SPACE_SYSTEM_MEMORY:
+ status = acpi_os_read_memory((acpi_physical_address) address,
+ val, access_bit_width);
+ if (ACPI_FAILURE(status))
+ return -EIO;
+ break;
+ case ACPI_ADR_SPACE_SYSTEM_IO:
+ status = acpi_os_read_port(address, (u32 *)val,
+ access_bit_width);
+ if (ACPI_FAILURE(status))
+ return -EIO;
+ break;
+ default:
return -EINVAL;
}
return 0;
}
+EXPORT_SYMBOL_GPL(apei_read);
+
+/* write GAR in interrupt (including NMI) or process context */
+int apei_write(u64 val, struct acpi_generic_address *reg)
+{
+ int rc;
+ u32 access_bit_width;
+ u64 address;
+ acpi_status status;
+
+ rc = apei_check_gar(reg, &address, &access_bit_width);
+ if (rc)
+ return rc;
+
+ switch (reg->space_id) {
+ case ACPI_ADR_SPACE_SYSTEM_MEMORY:
+ status = acpi_os_write_memory((acpi_physical_address) address,
+ val, access_bit_width);
+ if (ACPI_FAILURE(status))
+ return -EIO;
+ break;
+ case ACPI_ADR_SPACE_SYSTEM_IO:
+ status = acpi_os_write_port(address, val, access_bit_width);
+ if (ACPI_FAILURE(status))
+ return -EIO;
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ return 0;
+}
+EXPORT_SYMBOL_GPL(apei_write);
static int collect_res_callback(struct apei_exec_context *ctx,
struct acpi_whea_header *entry,
@@ -559,23 +699,24 @@ static int collect_res_callback(struct apei_exec_context *ctx,
struct apei_resources *resources = data;
struct acpi_generic_address *reg = &entry->register_region;
u8 ins = entry->instruction;
+ u32 access_bit_width;
u64 paddr;
int rc;
if (!(ctx->ins_table[ins].flags & APEI_EXEC_INS_ACCESS_REGISTER))
return 0;
- rc = apei_check_gar(reg, &paddr);
+ rc = apei_check_gar(reg, &paddr, &access_bit_width);
if (rc)
return rc;
switch (reg->space_id) {
case ACPI_ADR_SPACE_SYSTEM_MEMORY:
return apei_res_add(&resources->iomem, paddr,
- reg->bit_width / 8);
+ access_bit_width / 8);
case ACPI_ADR_SPACE_SYSTEM_IO:
return apei_res_add(&resources->ioport, paddr,
- reg->bit_width / 8);
+ access_bit_width / 8);
default:
return -EINVAL;
}
@@ -603,3 +744,29 @@ struct dentry *apei_get_debugfs_dir(void)
return dapei;
}
EXPORT_SYMBOL_GPL(apei_get_debugfs_dir);
+
+int apei_osc_setup(void)
+{
+ static u8 whea_uuid_str[] = "ed855e0c-6c90-47bf-a62a-26de0fc5ad5c";
+ acpi_handle handle;
+ u32 capbuf[3];
+ struct acpi_osc_context context = {
+ .uuid_str = whea_uuid_str,
+ .rev = 1,
+ .cap.length = sizeof(capbuf),
+ .cap.pointer = capbuf,
+ };
+
+ capbuf[OSC_QUERY_DWORD] = OSC_QUERY_ENABLE;
+ capbuf[OSC_SUPPORT_DWORD] = 1;
+ capbuf[OSC_CONTROL_DWORD] = 0;
+
+ if (ACPI_FAILURE(acpi_get_handle(NULL, "\\_SB", &handle))
+ || ACPI_FAILURE(acpi_run_osc(handle, &context)))
+ return -EIO;
+ else {
+ kfree(context.ret.pointer);
+ return 0;
+ }
+}
+EXPORT_SYMBOL_GPL(apei_osc_setup);
diff --git a/drivers/acpi/apei/apei-internal.h b/drivers/acpi/apei/apei-internal.h
index 18df1e94027..e5bcd919d4e 100644
--- a/drivers/acpi/apei/apei-internal.h
+++ b/drivers/acpi/apei/apei-internal.h
@@ -7,6 +7,7 @@
#define APEI_INTERNAL_H
#include <linux/cper.h>
+#include <linux/acpi.h>
struct apei_exec_context;
@@ -50,13 +51,34 @@ static inline u64 apei_exec_ctx_get_output(struct apei_exec_context *ctx)
return ctx->value;
}
-int apei_exec_run(struct apei_exec_context *ctx, u8 action);
+int __apei_exec_run(struct apei_exec_context *ctx, u8 action, bool optional);
+
+static inline int apei_exec_run(struct apei_exec_context *ctx, u8 action)
+{
+ return __apei_exec_run(ctx, action, 0);
+}
+
+/* It is optional whether the firmware provides the action */
+static inline int apei_exec_run_optional(struct apei_exec_context *ctx, u8 action)
+{
+ return __apei_exec_run(ctx, action, 1);
+}
/* Common instruction implementation */
/* IP has been set in instruction function */
#define APEI_EXEC_SET_IP 1
+int apei_map_generic_address(struct acpi_generic_address *reg);
+
+static inline void apei_unmap_generic_address(struct acpi_generic_address *reg)
+{
+ acpi_os_unmap_generic_address(reg);
+}
+
+int apei_read(u64 *val, struct acpi_generic_address *reg);
+int apei_write(u64 val, struct acpi_generic_address *reg);
+
int __apei_exec_read_register(struct acpi_whea_header *entry, u64 *val);
int __apei_exec_write_register(struct acpi_whea_header *entry, u64 val);
int apei_exec_read_register(struct apei_exec_context *ctx,
@@ -84,6 +106,9 @@ static inline void apei_resources_init(struct apei_resources *resources)
}
void apei_resources_fini(struct apei_resources *resources);
+int apei_resources_add(struct apei_resources *resources,
+ unsigned long start, unsigned long size,
+ bool iomem);
int apei_resources_sub(struct apei_resources *resources1,
struct apei_resources *resources2);
int apei_resources_request(struct apei_resources *resources,
@@ -96,11 +121,11 @@ struct dentry;
struct dentry *apei_get_debugfs_dir(void);
#define apei_estatus_for_each_section(estatus, section) \
- for (section = (struct acpi_hest_generic_data *)(estatus + 1); \
+ for (section = (struct acpi_generic_data *)(estatus + 1); \
(void *)section - (void *)estatus < estatus->data_length; \
section = (void *)(section+1) + section->error_data_length)
-static inline u32 apei_estatus_len(struct acpi_hest_generic_status *estatus)
+static inline u32 cper_estatus_len(struct acpi_generic_status *estatus)
{
if (estatus->raw_data_length)
return estatus->raw_data_offset + \
@@ -109,6 +134,10 @@ static inline u32 apei_estatus_len(struct acpi_hest_generic_status *estatus)
return sizeof(*estatus) + estatus->data_length;
}
-int apei_estatus_check_header(const struct acpi_hest_generic_status *estatus);
-int apei_estatus_check(const struct acpi_hest_generic_status *estatus);
+void cper_estatus_print(const char *pfx,
+ const struct acpi_generic_status *estatus);
+int cper_estatus_check_header(const struct acpi_generic_status *estatus);
+int cper_estatus_check(const struct acpi_generic_status *estatus);
+
+int apei_osc_setup(void);
#endif
diff --git a/drivers/acpi/apei/cper.c b/drivers/acpi/apei/cper.c
deleted file mode 100644
index f4cf2fc4c8c..00000000000
--- a/drivers/acpi/apei/cper.c
+++ /dev/null
@@ -1,84 +0,0 @@
-/*
- * UEFI Common Platform Error Record (CPER) support
- *
- * Copyright (C) 2010, Intel Corp.
- * Author: Huang Ying <ying.huang@intel.com>
- *
- * CPER is the format used to describe platform hardware error by
- * various APEI tables, such as ERST, BERT and HEST etc.
- *
- * For more information about CPER, please refer to Appendix N of UEFI
- * Specification version 2.3.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License version
- * 2 as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
- */
-
-#include <linux/kernel.h>
-#include <linux/module.h>
-#include <linux/time.h>
-#include <linux/cper.h>
-#include <linux/acpi.h>
-
-/*
- * CPER record ID need to be unique even after reboot, because record
- * ID is used as index for ERST storage, while CPER records from
- * multiple boot may co-exist in ERST.
- */
-u64 cper_next_record_id(void)
-{
- static atomic64_t seq;
-
- if (!atomic64_read(&seq))
- atomic64_set(&seq, ((u64)get_seconds()) << 32);
-
- return atomic64_inc_return(&seq);
-}
-EXPORT_SYMBOL_GPL(cper_next_record_id);
-
-int apei_estatus_check_header(const struct acpi_hest_generic_status *estatus)
-{
- if (estatus->data_length &&
- estatus->data_length < sizeof(struct acpi_hest_generic_data))
- return -EINVAL;
- if (estatus->raw_data_length &&
- estatus->raw_data_offset < sizeof(*estatus) + estatus->data_length)
- return -EINVAL;
-
- return 0;
-}
-EXPORT_SYMBOL_GPL(apei_estatus_check_header);
-
-int apei_estatus_check(const struct acpi_hest_generic_status *estatus)
-{
- struct acpi_hest_generic_data *gdata;
- unsigned int data_len, gedata_len;
- int rc;
-
- rc = apei_estatus_check_header(estatus);
- if (rc)
- return rc;
- data_len = estatus->data_length;
- gdata = (struct acpi_hest_generic_data *)(estatus + 1);
- while (data_len > sizeof(*gdata)) {
- gedata_len = gdata->error_data_length;
- if (gedata_len > data_len - sizeof(*gdata))
- return -EINVAL;
- data_len -= gedata_len + sizeof(*gdata);
- }
- if (data_len)
- return -EINVAL;
-
- return 0;
-}
-EXPORT_SYMBOL_GPL(apei_estatus_check);
diff --git a/drivers/acpi/apei/einj.c b/drivers/acpi/apei/einj.c
index cf29df69380..a095d4f858d 100644
--- a/drivers/acpi/apei/einj.c
+++ b/drivers/acpi/apei/einj.c
@@ -32,21 +32,65 @@
#include <linux/seq_file.h>
#include <linux/nmi.h>
#include <linux/delay.h>
-#include <acpi/acpi.h>
+#include <linux/mm.h>
+#include <asm/unaligned.h>
#include "apei-internal.h"
#define EINJ_PFX "EINJ: "
#define SPIN_UNIT 100 /* 100ns */
-/* Firmware should respond within 1 miliseconds */
+/* Firmware should respond within 1 milliseconds */
#define FIRMWARE_TIMEOUT (1 * NSEC_PER_MSEC)
+#define ACPI5_VENDOR_BIT BIT(31)
+#define MEM_ERROR_MASK (ACPI_EINJ_MEMORY_CORRECTABLE | \
+ ACPI_EINJ_MEMORY_UNCORRECTABLE | \
+ ACPI_EINJ_MEMORY_FATAL)
+
+/*
+ * ACPI version 5 provides a SET_ERROR_TYPE_WITH_ADDRESS action.
+ */
+static int acpi5;
+
+struct set_error_type_with_address {
+ u32 type;
+ u32 vendor_extension;
+ u32 flags;
+ u32 apicid;
+ u64 memory_address;
+ u64 memory_address_range;
+ u32 pcie_sbdf;
+};
+enum {
+ SETWA_FLAGS_APICID = 1,
+ SETWA_FLAGS_MEM = 2,
+ SETWA_FLAGS_PCIE_SBDF = 4,
+};
+
+/*
+ * Vendor extensions for platform specific operations
+ */
+struct vendor_error_type_extension {
+ u32 length;
+ u32 pcie_sbdf;
+ u16 vendor_id;
+ u16 device_id;
+ u8 rev_id;
+ u8 reserved[3];
+};
+
+static u32 notrigger;
+
+static u32 vendor_flags;
+static struct debugfs_blob_wrapper vendor_blob;
+static char vendor_dev[64];
/*
* Some BIOSes allow parameters to the SET_ERROR_TYPE entries in the
* EINJ table through an unpublished extension. Use with caution as
* most will ignore the parameter and make their own choice of address
- * for error injection.
+ * for error injection. This extension is used only if
+ * param_extension module parameter is specified.
*/
struct einj_parameter {
u64 type;
@@ -65,6 +109,9 @@ struct einj_parameter {
((struct acpi_whea_header *)((char *)(tab) + \
sizeof(struct acpi_table_einj)))
+static bool param_extension;
+module_param(param_extension, bool, 0);
+
static struct acpi_table_einj *einj_tab;
static struct apei_resources einj_resources;
@@ -99,7 +146,7 @@ static struct apei_exec_ins_type einj_ins_type[] = {
*/
static DEFINE_MUTEX(einj_mutex);
-static struct einj_parameter *einj_param;
+static void *einj_param;
static void einj_exec_ctx_init(struct apei_exec_context *ctx)
{
@@ -146,10 +193,30 @@ static int einj_timedout(u64 *t)
return 0;
}
-static u64 einj_get_parameter_address(void)
+static void check_vendor_extension(u64 paddr,
+ struct set_error_type_with_address *v5param)
+{
+ int offset = v5param->vendor_extension;
+ struct vendor_error_type_extension *v;
+ u32 sbdf;
+
+ if (!offset)
+ return;
+ v = acpi_os_map_iomem(paddr + offset, sizeof(*v));
+ if (!v)
+ return;
+ sbdf = v->pcie_sbdf;
+ sprintf(vendor_dev, "%x:%x:%x.%x vendor_id=%x device_id=%x rev_id=%x\n",
+ sbdf >> 24, (sbdf >> 16) & 0xff,
+ (sbdf >> 11) & 0x1f, (sbdf >> 8) & 0x7,
+ v->vendor_id, v->device_id, v->rev_id);
+ acpi_os_unmap_iomem(v, sizeof(*v));
+}
+
+static void *einj_get_parameter_address(void)
{
int i;
- u64 paddr = 0;
+ u64 pa_v4 = 0, pa_v5 = 0;
struct acpi_whea_header *entry;
entry = EINJ_TAB_ENTRY(einj_tab);
@@ -158,12 +225,38 @@ static u64 einj_get_parameter_address(void)
entry->instruction == ACPI_EINJ_WRITE_REGISTER &&
entry->register_region.space_id ==
ACPI_ADR_SPACE_SYSTEM_MEMORY)
- memcpy(&paddr, &entry->register_region.address,
- sizeof(paddr));
+ pa_v4 = get_unaligned(&entry->register_region.address);
+ if (entry->action == ACPI_EINJ_SET_ERROR_TYPE_WITH_ADDRESS &&
+ entry->instruction == ACPI_EINJ_WRITE_REGISTER &&
+ entry->register_region.space_id ==
+ ACPI_ADR_SPACE_SYSTEM_MEMORY)
+ pa_v5 = get_unaligned(&entry->register_region.address);
entry++;
}
+ if (pa_v5) {
+ struct set_error_type_with_address *v5param;
+
+ v5param = acpi_os_map_iomem(pa_v5, sizeof(*v5param));
+ if (v5param) {
+ acpi5 = 1;
+ check_vendor_extension(pa_v5, v5param);
+ return v5param;
+ }
+ }
+ if (param_extension && pa_v4) {
+ struct einj_parameter *v4param;
+
+ v4param = acpi_os_map_iomem(pa_v4, sizeof(*v4param));
+ if (!v4param)
+ return NULL;
+ if (v4param->reserved1 || v4param->reserved2) {
+ acpi_os_unmap_iomem(v4param, sizeof(*v4param));
+ return NULL;
+ }
+ return v4param;
+ }
- return paddr;
+ return NULL;
}
/* do sanity check to trigger table */
@@ -172,7 +265,7 @@ static int einj_check_trigger_header(struct acpi_einj_trigger *trigger_tab)
if (trigger_tab->header_size != sizeof(struct acpi_einj_trigger))
return -EINVAL;
if (trigger_tab->table_size > PAGE_SIZE ||
- trigger_tab->table_size <= trigger_tab->header_size)
+ trigger_tab->table_size < trigger_tab->header_size)
return -EINVAL;
if (trigger_tab->entry_count !=
(trigger_tab->table_size - trigger_tab->header_size) /
@@ -182,8 +275,29 @@ static int einj_check_trigger_header(struct acpi_einj_trigger *trigger_tab)
return 0;
}
+static struct acpi_generic_address *einj_get_trigger_parameter_region(
+ struct acpi_einj_trigger *trigger_tab, u64 param1, u64 param2)
+{
+ int i;
+ struct acpi_whea_header *entry;
+
+ entry = (struct acpi_whea_header *)
+ ((char *)trigger_tab + sizeof(struct acpi_einj_trigger));
+ for (i = 0; i < trigger_tab->entry_count; i++) {
+ if (entry->action == ACPI_EINJ_TRIGGER_ERROR &&
+ entry->instruction == ACPI_EINJ_WRITE_REGISTER_VALUE &&
+ entry->register_region.space_id ==
+ ACPI_ADR_SPACE_SYSTEM_MEMORY &&
+ (entry->register_region.address & param2) == (param1 & param2))
+ return &entry->register_region;
+ entry++;
+ }
+
+ return NULL;
+}
/* Execute instructions in trigger error action table */
-static int __einj_error_trigger(u64 trigger_paddr)
+static int __einj_error_trigger(u64 trigger_paddr, u32 type,
+ u64 param1, u64 param2)
{
struct acpi_einj_trigger *trigger_tab = NULL;
struct apei_exec_context trigger_ctx;
@@ -192,14 +306,16 @@ static int __einj_error_trigger(u64 trigger_paddr)
struct resource *r;
u32 table_size;
int rc = -EIO;
+ struct acpi_generic_address *trigger_param_region = NULL;
r = request_mem_region(trigger_paddr, sizeof(*trigger_tab),
"APEI EINJ Trigger Table");
if (!r) {
pr_err(EINJ_PFX
- "Can not request iomem region <%016llx-%016llx> for Trigger table.\n",
+ "Can not request [mem %#010llx-%#010llx] for Trigger table\n",
(unsigned long long)trigger_paddr,
- (unsigned long long)trigger_paddr+sizeof(*trigger_tab));
+ (unsigned long long)trigger_paddr +
+ sizeof(*trigger_tab) - 1);
goto out;
}
trigger_tab = ioremap_cache(trigger_paddr, sizeof(*trigger_tab));
@@ -213,6 +329,11 @@ static int __einj_error_trigger(u64 trigger_paddr)
"The trigger error action table is invalid\n");
goto out_rel_header;
}
+
+ /* No action structures in the TRIGGER_ERROR table, nothing to do */
+ if (!trigger_tab->entry_count)
+ goto out_rel_header;
+
rc = -EIO;
table_size = trigger_tab->table_size;
r = request_mem_region(trigger_paddr + sizeof(*trigger_tab),
@@ -220,9 +341,9 @@ static int __einj_error_trigger(u64 trigger_paddr)
"APEI EINJ Trigger Table");
if (!r) {
pr_err(EINJ_PFX
-"Can not request iomem region <%016llx-%016llx> for Trigger Table Entry.\n",
- (unsigned long long)trigger_paddr+sizeof(*trigger_tab),
- (unsigned long long)trigger_paddr + table_size);
+"Can not request [mem %#010llx-%#010llx] for Trigger Table Entry\n",
+ (unsigned long long)trigger_paddr + sizeof(*trigger_tab),
+ (unsigned long long)trigger_paddr + table_size - 1);
goto out_rel_header;
}
iounmap(trigger_tab);
@@ -243,6 +364,30 @@ static int __einj_error_trigger(u64 trigger_paddr)
rc = apei_resources_sub(&trigger_resources, &einj_resources);
if (rc)
goto out_fini;
+ /*
+ * Some firmware will access target address specified in
+ * param1 to trigger the error when injecting memory error.
+ * This will cause resource conflict with regular memory. So
+ * remove it from trigger table resources.
+ */
+ if ((param_extension || acpi5) && (type & MEM_ERROR_MASK) && param2) {
+ struct apei_resources addr_resources;
+ apei_resources_init(&addr_resources);
+ trigger_param_region = einj_get_trigger_parameter_region(
+ trigger_tab, param1, param2);
+ if (trigger_param_region) {
+ rc = apei_resources_add(&addr_resources,
+ trigger_param_region->address,
+ trigger_param_region->bit_width/8, true);
+ if (rc)
+ goto out_fini;
+ rc = apei_resources_sub(&trigger_resources,
+ &addr_resources);
+ }
+ apei_resources_fini(&addr_resources);
+ if (rc)
+ goto out_fini;
+ }
rc = apei_resources_request(&trigger_resources, "APEI EINJ Trigger");
if (rc)
goto out_fini;
@@ -269,7 +414,8 @@ out:
return rc;
}
-static int __einj_error_inject(u32 type, u64 param1, u64 param2)
+static int __einj_error_inject(u32 type, u32 flags, u64 param1, u64 param2,
+ u64 param3, u64 param4)
{
struct apei_exec_context ctx;
u64 val, trigger_paddr, timeout = FIRMWARE_TIMEOUT;
@@ -277,16 +423,66 @@ static int __einj_error_inject(u32 type, u64 param1, u64 param2)
einj_exec_ctx_init(&ctx);
- rc = apei_exec_run(&ctx, ACPI_EINJ_BEGIN_OPERATION);
+ rc = apei_exec_run_optional(&ctx, ACPI_EINJ_BEGIN_OPERATION);
if (rc)
return rc;
apei_exec_ctx_set_input(&ctx, type);
- rc = apei_exec_run(&ctx, ACPI_EINJ_SET_ERROR_TYPE);
- if (rc)
- return rc;
- if (einj_param) {
- writeq(param1, &einj_param->param1);
- writeq(param2, &einj_param->param2);
+ if (acpi5) {
+ struct set_error_type_with_address *v5param = einj_param;
+
+ v5param->type = type;
+ if (type & ACPI5_VENDOR_BIT) {
+ switch (vendor_flags) {
+ case SETWA_FLAGS_APICID:
+ v5param->apicid = param1;
+ break;
+ case SETWA_FLAGS_MEM:
+ v5param->memory_address = param1;
+ v5param->memory_address_range = param2;
+ break;
+ case SETWA_FLAGS_PCIE_SBDF:
+ v5param->pcie_sbdf = param1;
+ break;
+ }
+ v5param->flags = vendor_flags;
+ } else if (flags) {
+ v5param->flags = flags;
+ v5param->memory_address = param1;
+ v5param->memory_address_range = param2;
+ v5param->apicid = param3;
+ v5param->pcie_sbdf = param4;
+ } else {
+ switch (type) {
+ case ACPI_EINJ_PROCESSOR_CORRECTABLE:
+ case ACPI_EINJ_PROCESSOR_UNCORRECTABLE:
+ case ACPI_EINJ_PROCESSOR_FATAL:
+ v5param->apicid = param1;
+ v5param->flags = SETWA_FLAGS_APICID;
+ break;
+ case ACPI_EINJ_MEMORY_CORRECTABLE:
+ case ACPI_EINJ_MEMORY_UNCORRECTABLE:
+ case ACPI_EINJ_MEMORY_FATAL:
+ v5param->memory_address = param1;
+ v5param->memory_address_range = param2;
+ v5param->flags = SETWA_FLAGS_MEM;
+ break;
+ case ACPI_EINJ_PCIX_CORRECTABLE:
+ case ACPI_EINJ_PCIX_UNCORRECTABLE:
+ case ACPI_EINJ_PCIX_FATAL:
+ v5param->pcie_sbdf = param1;
+ v5param->flags = SETWA_FLAGS_PCIE_SBDF;
+ break;
+ }
+ }
+ } else {
+ rc = apei_exec_run(&ctx, ACPI_EINJ_SET_ERROR_TYPE);
+ if (rc)
+ return rc;
+ if (einj_param) {
+ struct einj_parameter *v4param = einj_param;
+ v4param->param1 = param1;
+ v4param->param2 = param2;
+ }
}
rc = apei_exec_run(&ctx, ACPI_EINJ_EXECUTE_OPERATION);
if (rc)
@@ -312,29 +508,67 @@ static int __einj_error_inject(u32 type, u64 param1, u64 param2)
if (rc)
return rc;
trigger_paddr = apei_exec_ctx_get_output(&ctx);
- rc = __einj_error_trigger(trigger_paddr);
- if (rc)
- return rc;
- rc = apei_exec_run(&ctx, ACPI_EINJ_END_OPERATION);
+ if (notrigger == 0) {
+ rc = __einj_error_trigger(trigger_paddr, type, param1, param2);
+ if (rc)
+ return rc;
+ }
+ rc = apei_exec_run_optional(&ctx, ACPI_EINJ_END_OPERATION);
return rc;
}
/* Inject the specified hardware error */
-static int einj_error_inject(u32 type, u64 param1, u64 param2)
+static int einj_error_inject(u32 type, u32 flags, u64 param1, u64 param2,
+ u64 param3, u64 param4)
{
int rc;
+ unsigned long pfn;
+ /* If user manually set "flags", make sure it is legal */
+ if (flags && (flags &
+ ~(SETWA_FLAGS_APICID|SETWA_FLAGS_MEM|SETWA_FLAGS_PCIE_SBDF)))
+ return -EINVAL;
+
+ /*
+ * We need extra sanity checks for memory errors.
+ * Other types leap directly to injection.
+ */
+
+ /* ensure param1/param2 existed */
+ if (!(param_extension || acpi5))
+ goto inject;
+
+ /* ensure injection is memory related */
+ if (type & ACPI5_VENDOR_BIT) {
+ if (vendor_flags != SETWA_FLAGS_MEM)
+ goto inject;
+ } else if (!(type & MEM_ERROR_MASK) && !(flags & SETWA_FLAGS_MEM))
+ goto inject;
+
+ /*
+ * Disallow crazy address masks that give BIOS leeway to pick
+ * injection address almost anywhere. Insist on page or
+ * better granularity and that target address is normal RAM.
+ */
+ pfn = PFN_DOWN(param1 & param2);
+ if (!page_is_ram(pfn) || ((param2 & PAGE_MASK) != PAGE_MASK))
+ return -EINVAL;
+
+inject:
mutex_lock(&einj_mutex);
- rc = __einj_error_inject(type, param1, param2);
+ rc = __einj_error_inject(type, flags, param1, param2, param3, param4);
mutex_unlock(&einj_mutex);
return rc;
}
static u32 error_type;
+static u32 error_flags;
static u64 error_param1;
static u64 error_param2;
+static u64 error_param3;
+static u64 error_param4;
static struct dentry *einj_debug_dir;
static int available_error_type_show(struct seq_file *m, void *v)
@@ -396,15 +630,25 @@ static int error_type_set(void *data, u64 val)
{
int rc;
u32 available_error_type = 0;
+ u32 tval, vendor;
+
+ /*
+ * Vendor defined types have 0x80000000 bit set, and
+ * are not enumerated by ACPI_EINJ_GET_ERROR_TYPE
+ */
+ vendor = val & ACPI5_VENDOR_BIT;
+ tval = val & 0x7fffffff;
/* Only one error type can be specified */
- if (val & (val - 1))
- return -EINVAL;
- rc = einj_get_available_error_type(&available_error_type);
- if (rc)
- return rc;
- if (!(val & available_error_type))
+ if (tval & (tval - 1))
return -EINVAL;
+ if (!vendor) {
+ rc = einj_get_available_error_type(&available_error_type);
+ if (rc)
+ return rc;
+ if (!(val & available_error_type))
+ return -EINVAL;
+ }
error_type = val;
return 0;
@@ -418,7 +662,8 @@ static int error_inject_set(void *data, u64 val)
if (!error_type)
return -EINVAL;
- return einj_error_inject(error_type, error_param1, error_param2);
+ return einj_error_inject(error_type, error_flags, error_param1, error_param2,
+ error_param3, error_param4);
}
DEFINE_SIMPLE_ATTRIBUTE(error_inject_fops, NULL,
@@ -443,7 +688,6 @@ static int einj_check_table(struct acpi_table_einj *einj_tab)
static int __init einj_init(void)
{
int rc;
- u64 param_paddr;
acpi_status status;
struct dentry *fentry;
struct apei_exec_context ctx;
@@ -453,10 +697,9 @@ static int __init einj_init(void)
status = acpi_get_table(ACPI_SIG_EINJ, 0,
(struct acpi_table_header **)&einj_tab);
- if (status == AE_NOT_FOUND) {
- pr_info(EINJ_PFX "Table is not found!\n");
+ if (status == AE_NOT_FOUND)
return -ENODEV;
- } else if (ACPI_FAILURE(status)) {
+ else if (ACPI_FAILURE(status)) {
const char *msg = acpi_format_exception(status);
pr_err(EINJ_PFX "Failed to get table, %s\n", msg);
return -EINVAL;
@@ -481,14 +724,6 @@ static int __init einj_init(void)
einj_debug_dir, NULL, &error_type_fops);
if (!fentry)
goto err_cleanup;
- fentry = debugfs_create_x64("param1", S_IRUSR | S_IWUSR,
- einj_debug_dir, &error_param1);
- if (!fentry)
- goto err_cleanup;
- fentry = debugfs_create_x64("param2", S_IRUSR | S_IWUSR,
- einj_debug_dir, &error_param2);
- if (!fentry)
- goto err_cleanup;
fentry = debugfs_create_file("error_inject", S_IWUSR,
einj_debug_dir, NULL, &error_inject_fops);
if (!fentry)
@@ -505,11 +740,47 @@ static int __init einj_init(void)
rc = apei_exec_pre_map_gars(&ctx);
if (rc)
goto err_release;
- param_paddr = einj_get_parameter_address();
- if (param_paddr) {
- einj_param = ioremap(param_paddr, sizeof(*einj_param));
- rc = -ENOMEM;
- if (!einj_param)
+
+ rc = -ENOMEM;
+ einj_param = einj_get_parameter_address();
+ if ((param_extension || acpi5) && einj_param) {
+ fentry = debugfs_create_x32("flags", S_IRUSR | S_IWUSR,
+ einj_debug_dir, &error_flags);
+ if (!fentry)
+ goto err_unmap;
+ fentry = debugfs_create_x64("param1", S_IRUSR | S_IWUSR,
+ einj_debug_dir, &error_param1);
+ if (!fentry)
+ goto err_unmap;
+ fentry = debugfs_create_x64("param2", S_IRUSR | S_IWUSR,
+ einj_debug_dir, &error_param2);
+ if (!fentry)
+ goto err_unmap;
+ fentry = debugfs_create_x64("param3", S_IRUSR | S_IWUSR,
+ einj_debug_dir, &error_param3);
+ if (!fentry)
+ goto err_unmap;
+ fentry = debugfs_create_x64("param4", S_IRUSR | S_IWUSR,
+ einj_debug_dir, &error_param4);
+ if (!fentry)
+ goto err_unmap;
+
+ fentry = debugfs_create_x32("notrigger", S_IRUSR | S_IWUSR,
+ einj_debug_dir, &notrigger);
+ if (!fentry)
+ goto err_unmap;
+ }
+
+ if (vendor_dev[0]) {
+ vendor_blob.data = vendor_dev;
+ vendor_blob.size = strlen(vendor_dev);
+ fentry = debugfs_create_blob("vendor", S_IRUSR,
+ einj_debug_dir, &vendor_blob);
+ if (!fentry)
+ goto err_unmap;
+ fentry = debugfs_create_x32("vendor_flags", S_IRUSR | S_IWUSR,
+ einj_debug_dir, &vendor_flags);
+ if (!fentry)
goto err_unmap;
}
@@ -518,6 +789,13 @@ static int __init einj_init(void)
return 0;
err_unmap:
+ if (einj_param) {
+ acpi_size size = (acpi5) ?
+ sizeof(struct set_error_type_with_address) :
+ sizeof(struct einj_parameter);
+
+ acpi_os_unmap_iomem(einj_param, size);
+ }
apei_exec_post_unmap_gars(&ctx);
err_release:
apei_resources_release(&einj_resources);
@@ -533,8 +811,13 @@ static void __exit einj_exit(void)
{
struct apei_exec_context ctx;
- if (einj_param)
- iounmap(einj_param);
+ if (einj_param) {
+ acpi_size size = (acpi5) ?
+ sizeof(struct set_error_type_with_address) :
+ sizeof(struct einj_parameter);
+
+ acpi_os_unmap_iomem(einj_param, size);
+ }
einj_exec_ctx_init(&ctx);
apei_exec_post_unmap_gars(&ctx);
apei_resources_release(&einj_resources);
diff --git a/drivers/acpi/apei/erst-dbg.c b/drivers/acpi/apei/erst-dbg.c
index de73caf3ceb..04ab5c9d3ce 100644
--- a/drivers/acpi/apei/erst-dbg.c
+++ b/drivers/acpi/apei/erst-dbg.c
@@ -33,7 +33,7 @@
#define ERST_DBG_PFX "ERST DBG: "
-#define ERST_DBG_RECORD_LEN_MAX 4096
+#define ERST_DBG_RECORD_LEN_MAX 0x4000
static void *erst_dbg_buf;
static unsigned int erst_dbg_buf_len;
@@ -43,12 +43,27 @@ static DEFINE_MUTEX(erst_dbg_mutex);
static int erst_dbg_open(struct inode *inode, struct file *file)
{
+ int rc, *pos;
+
if (erst_disable)
return -ENODEV;
+ pos = (int *)&file->private_data;
+
+ rc = erst_get_record_id_begin(pos);
+ if (rc)
+ return rc;
+
return nonseekable_open(inode, file);
}
+static int erst_dbg_release(struct inode *inode, struct file *file)
+{
+ erst_get_record_id_end();
+
+ return 0;
+}
+
static long erst_dbg_ioctl(struct file *f, unsigned int cmd, unsigned long arg)
{
int rc;
@@ -79,23 +94,34 @@ static long erst_dbg_ioctl(struct file *f, unsigned int cmd, unsigned long arg)
static ssize_t erst_dbg_read(struct file *filp, char __user *ubuf,
size_t usize, loff_t *off)
{
- int rc;
+ int rc, *pos;
ssize_t len = 0;
u64 id;
- if (*off != 0)
+ if (*off)
return -EINVAL;
if (mutex_lock_interruptible(&erst_dbg_mutex) != 0)
return -EINTR;
+ pos = (int *)&filp->private_data;
+
retry_next:
- rc = erst_get_next_record_id(&id);
+ rc = erst_get_record_id_next(pos, &id);
if (rc)
goto out;
/* no more record */
- if (id == APEI_ERST_INVALID_RECORD_ID)
+ if (id == APEI_ERST_INVALID_RECORD_ID) {
+ /*
+ * If the persistent store is empty initially, the function
+ * 'erst_read' below will return "-ENOENT" value. This causes
+ * 'retry_next' label is entered again. The returned value
+ * should be zero indicating the read operation is EOF.
+ */
+ len = 0;
+
goto out;
+ }
retry:
rc = len = erst_read(id, erst_dbg_buf, erst_dbg_buf_len);
/* The record may be cleared by others, try read next record */
@@ -181,6 +207,7 @@ out:
static const struct file_operations erst_dbg_ops = {
.owner = THIS_MODULE,
.open = erst_dbg_open,
+ .release = erst_dbg_release,
.read = erst_dbg_read,
.write = erst_dbg_write,
.unlocked_ioctl = erst_dbg_ioctl,
@@ -195,6 +222,10 @@ static struct miscdevice erst_dbg_dev = {
static __init int erst_dbg_init(void)
{
+ if (erst_disable) {
+ pr_info(ERST_DBG_PFX "ERST support is disabled.\n");
+ return -ENODEV;
+ }
return misc_register(&erst_dbg_dev);
}
diff --git a/drivers/acpi/apei/erst.c b/drivers/acpi/apei/erst.c
index 1211c03149e..ed65e9c4b5b 100644
--- a/drivers/acpi/apei/erst.c
+++ b/drivers/acpi/apei/erst.c
@@ -34,11 +34,13 @@
#include <linux/cper.h>
#include <linux/nmi.h>
#include <linux/hardirq.h>
+#include <linux/pstore.h>
#include <acpi/apei.h>
#include "apei-internal.h"
-#define ERST_PFX "ERST: "
+#undef pr_fmt
+#define pr_fmt(fmt) "ERST: " fmt
/* ERST command status */
#define ERST_STATUS_SUCCESS 0x0
@@ -53,7 +55,7 @@
sizeof(struct acpi_table_erst)))
#define SPIN_UNIT 100 /* 100ns */
-/* Firmware should respond within 1 miliseconds */
+/* Firmware should respond within 1 milliseconds */
#define FIRMWARE_TIMEOUT (1 * NSEC_PER_MSEC)
#define FIRMWARE_MAX_STALL 50 /* 50us */
@@ -86,7 +88,7 @@ static struct erst_erange {
* It is used to provide exclusive accessing for ERST Error Log
* Address Range too.
*/
-static DEFINE_SPINLOCK(erst_lock);
+static DEFINE_RAW_SPINLOCK(erst_lock);
static inline int erst_errno(int command_status)
{
@@ -108,8 +110,7 @@ static inline int erst_errno(int command_status)
static int erst_timedout(u64 *t, u64 spin_unit)
{
if ((s64)*t < spin_unit) {
- pr_warning(FW_WARN ERST_PFX
- "Firmware does not respond in time\n");
+ pr_warn(FW_WARN "Firmware does not respond in time.\n");
return 1;
}
*t -= spin_unit;
@@ -185,8 +186,8 @@ static int erst_exec_stall(struct apei_exec_context *ctx,
if (ctx->value > FIRMWARE_MAX_STALL) {
if (!in_nmi())
- pr_warning(FW_WARN ERST_PFX
- "Too long stall time for stall instruction: %llx.\n",
+ pr_warn(FW_WARN
+ "Too long stall time for stall instruction: 0x%llx.\n",
ctx->value);
stall_time = FIRMWARE_MAX_STALL;
} else
@@ -205,8 +206,8 @@ static int erst_exec_stall_while_true(struct apei_exec_context *ctx,
if (ctx->var1 > FIRMWARE_MAX_STALL) {
if (!in_nmi())
- pr_warning(FW_WARN ERST_PFX
- "Too long stall time for stall while true instruction: %llx.\n",
+ pr_warn(FW_WARN
+ "Too long stall time for stall while true instruction: 0x%llx.\n",
ctx->var1);
stall_time = FIRMWARE_MAX_STALL;
} else
@@ -270,8 +271,7 @@ static int erst_exec_move_data(struct apei_exec_context *ctx,
/* ioremap does not work in interrupt context */
if (in_interrupt()) {
- pr_warning(ERST_PFX
- "MOVE_DATA can not be used in interrupt context");
+ pr_warn("MOVE_DATA can not be used in interrupt context.\n");
return -EBUSY;
}
@@ -283,8 +283,10 @@ static int erst_exec_move_data(struct apei_exec_context *ctx,
if (!src)
return -ENOMEM;
dst = ioremap(ctx->dst_base + offset, ctx->var2);
- if (!dst)
+ if (!dst) {
+ iounmap(src);
return -ENOMEM;
+ }
memmove(dst, src, ctx->var2);
@@ -421,14 +423,30 @@ ssize_t erst_get_record_count(void)
if (erst_disable)
return -ENODEV;
- spin_lock_irqsave(&erst_lock, flags);
+ raw_spin_lock_irqsave(&erst_lock, flags);
count = __erst_get_record_count();
- spin_unlock_irqrestore(&erst_lock, flags);
+ raw_spin_unlock_irqrestore(&erst_lock, flags);
return count;
}
EXPORT_SYMBOL_GPL(erst_get_record_count);
+#define ERST_RECORD_ID_CACHE_SIZE_MIN 16
+#define ERST_RECORD_ID_CACHE_SIZE_MAX 1024
+
+struct erst_record_id_cache {
+ struct mutex lock;
+ u64 *entries;
+ int len;
+ int size;
+ int refcount;
+};
+
+static struct erst_record_id_cache erst_record_id_cache = {
+ .lock = __MUTEX_INITIALIZER(erst_record_id_cache.lock),
+ .refcount = 0,
+};
+
static int __erst_get_next_record_id(u64 *record_id)
{
struct apei_exec_context ctx;
@@ -443,26 +461,178 @@ static int __erst_get_next_record_id(u64 *record_id)
return 0;
}
+int erst_get_record_id_begin(int *pos)
+{
+ int rc;
+
+ if (erst_disable)
+ return -ENODEV;
+
+ rc = mutex_lock_interruptible(&erst_record_id_cache.lock);
+ if (rc)
+ return rc;
+ erst_record_id_cache.refcount++;
+ mutex_unlock(&erst_record_id_cache.lock);
+
+ *pos = 0;
+
+ return 0;
+}
+EXPORT_SYMBOL_GPL(erst_get_record_id_begin);
+
+/* erst_record_id_cache.lock must be held by caller */
+static int __erst_record_id_cache_add_one(void)
+{
+ u64 id, prev_id, first_id;
+ int i, rc;
+ u64 *entries;
+ unsigned long flags;
+
+ id = prev_id = first_id = APEI_ERST_INVALID_RECORD_ID;
+retry:
+ raw_spin_lock_irqsave(&erst_lock, flags);
+ rc = __erst_get_next_record_id(&id);
+ raw_spin_unlock_irqrestore(&erst_lock, flags);
+ if (rc == -ENOENT)
+ return 0;
+ if (rc)
+ return rc;
+ if (id == APEI_ERST_INVALID_RECORD_ID)
+ return 0;
+ /* can not skip current ID, or loop back to first ID */
+ if (id == prev_id || id == first_id)
+ return 0;
+ if (first_id == APEI_ERST_INVALID_RECORD_ID)
+ first_id = id;
+ prev_id = id;
+
+ entries = erst_record_id_cache.entries;
+ for (i = 0; i < erst_record_id_cache.len; i++) {
+ if (entries[i] == id)
+ break;
+ }
+ /* record id already in cache, try next */
+ if (i < erst_record_id_cache.len)
+ goto retry;
+ if (erst_record_id_cache.len >= erst_record_id_cache.size) {
+ int new_size, alloc_size;
+ u64 *new_entries;
+
+ new_size = erst_record_id_cache.size * 2;
+ new_size = clamp_val(new_size, ERST_RECORD_ID_CACHE_SIZE_MIN,
+ ERST_RECORD_ID_CACHE_SIZE_MAX);
+ if (new_size <= erst_record_id_cache.size) {
+ if (printk_ratelimit())
+ pr_warn(FW_WARN "too many record IDs!\n");
+ return 0;
+ }
+ alloc_size = new_size * sizeof(entries[0]);
+ if (alloc_size < PAGE_SIZE)
+ new_entries = kmalloc(alloc_size, GFP_KERNEL);
+ else
+ new_entries = vmalloc(alloc_size);
+ if (!new_entries)
+ return -ENOMEM;
+ memcpy(new_entries, entries,
+ erst_record_id_cache.len * sizeof(entries[0]));
+ if (erst_record_id_cache.size < PAGE_SIZE)
+ kfree(entries);
+ else
+ vfree(entries);
+ erst_record_id_cache.entries = entries = new_entries;
+ erst_record_id_cache.size = new_size;
+ }
+ entries[i] = id;
+ erst_record_id_cache.len++;
+
+ return 1;
+}
+
/*
* Get the record ID of an existing error record on the persistent
* storage. If there is no error record on the persistent storage, the
* returned record_id is APEI_ERST_INVALID_RECORD_ID.
*/
-int erst_get_next_record_id(u64 *record_id)
+int erst_get_record_id_next(int *pos, u64 *record_id)
{
- int rc;
- unsigned long flags;
+ int rc = 0;
+ u64 *entries;
if (erst_disable)
return -ENODEV;
- spin_lock_irqsave(&erst_lock, flags);
- rc = __erst_get_next_record_id(record_id);
- spin_unlock_irqrestore(&erst_lock, flags);
+ /* must be enclosed by erst_get_record_id_begin/end */
+ BUG_ON(!erst_record_id_cache.refcount);
+ BUG_ON(*pos < 0 || *pos > erst_record_id_cache.len);
+
+ mutex_lock(&erst_record_id_cache.lock);
+ entries = erst_record_id_cache.entries;
+ for (; *pos < erst_record_id_cache.len; (*pos)++)
+ if (entries[*pos] != APEI_ERST_INVALID_RECORD_ID)
+ break;
+ /* found next record id in cache */
+ if (*pos < erst_record_id_cache.len) {
+ *record_id = entries[*pos];
+ (*pos)++;
+ goto out_unlock;
+ }
+
+ /* Try to add one more record ID to cache */
+ rc = __erst_record_id_cache_add_one();
+ if (rc < 0)
+ goto out_unlock;
+ /* successfully add one new ID */
+ if (rc == 1) {
+ *record_id = erst_record_id_cache.entries[*pos];
+ (*pos)++;
+ rc = 0;
+ } else {
+ *pos = -1;
+ *record_id = APEI_ERST_INVALID_RECORD_ID;
+ }
+out_unlock:
+ mutex_unlock(&erst_record_id_cache.lock);
return rc;
}
-EXPORT_SYMBOL_GPL(erst_get_next_record_id);
+EXPORT_SYMBOL_GPL(erst_get_record_id_next);
+
+/* erst_record_id_cache.lock must be held by caller */
+static void __erst_record_id_cache_compact(void)
+{
+ int i, wpos = 0;
+ u64 *entries;
+
+ if (erst_record_id_cache.refcount)
+ return;
+
+ entries = erst_record_id_cache.entries;
+ for (i = 0; i < erst_record_id_cache.len; i++) {
+ if (entries[i] == APEI_ERST_INVALID_RECORD_ID)
+ continue;
+ if (wpos != i)
+ entries[wpos] = entries[i];
+ wpos++;
+ }
+ erst_record_id_cache.len = wpos;
+}
+
+void erst_get_record_id_end(void)
+{
+ /*
+ * erst_disable != 0 should be detected by invoker via the
+ * return value of erst_get_record_id_begin/next, so this
+ * function should not be called for erst_disable != 0.
+ */
+ BUG_ON(erst_disable);
+
+ mutex_lock(&erst_record_id_cache.lock);
+ erst_record_id_cache.refcount--;
+ BUG_ON(erst_record_id_cache.refcount < 0);
+ __erst_record_id_cache_compact();
+ mutex_unlock(&erst_record_id_cache.lock);
+}
+EXPORT_SYMBOL_GPL(erst_get_record_id_end);
static int __erst_write_to_storage(u64 offset)
{
@@ -472,7 +642,7 @@ static int __erst_write_to_storage(u64 offset)
int rc;
erst_exec_ctx_init(&ctx);
- rc = apei_exec_run(&ctx, ACPI_ERST_BEGIN_WRITE);
+ rc = apei_exec_run_optional(&ctx, ACPI_ERST_BEGIN_WRITE);
if (rc)
return rc;
apei_exec_ctx_set_input(&ctx, offset);
@@ -496,7 +666,7 @@ static int __erst_write_to_storage(u64 offset)
if (rc)
return rc;
val = apei_exec_ctx_get_output(&ctx);
- rc = apei_exec_run(&ctx, ACPI_ERST_END);
+ rc = apei_exec_run_optional(&ctx, ACPI_ERST_END);
if (rc)
return rc;
@@ -511,7 +681,7 @@ static int __erst_read_from_storage(u64 record_id, u64 offset)
int rc;
erst_exec_ctx_init(&ctx);
- rc = apei_exec_run(&ctx, ACPI_ERST_BEGIN_READ);
+ rc = apei_exec_run_optional(&ctx, ACPI_ERST_BEGIN_READ);
if (rc)
return rc;
apei_exec_ctx_set_input(&ctx, offset);
@@ -539,7 +709,7 @@ static int __erst_read_from_storage(u64 record_id, u64 offset)
if (rc)
return rc;
val = apei_exec_ctx_get_output(&ctx);
- rc = apei_exec_run(&ctx, ACPI_ERST_END);
+ rc = apei_exec_run_optional(&ctx, ACPI_ERST_END);
if (rc)
return rc;
@@ -554,7 +724,7 @@ static int __erst_clear_from_storage(u64 record_id)
int rc;
erst_exec_ctx_init(&ctx);
- rc = apei_exec_run(&ctx, ACPI_ERST_BEGIN_CLEAR);
+ rc = apei_exec_run_optional(&ctx, ACPI_ERST_BEGIN_CLEAR);
if (rc)
return rc;
apei_exec_ctx_set_input(&ctx, record_id);
@@ -578,7 +748,7 @@ static int __erst_clear_from_storage(u64 record_id)
if (rc)
return rc;
val = apei_exec_ctx_get_output(&ctx);
- rc = apei_exec_run(&ctx, ACPI_ERST_END);
+ rc = apei_exec_run_optional(&ctx, ACPI_ERST_END);
if (rc)
return rc;
@@ -589,8 +759,7 @@ static int __erst_clear_from_storage(u64 record_id)
static void pr_unimpl_nvram(void)
{
if (printk_ratelimit())
- pr_warning(ERST_PFX
- "NVRAM ERST Log Address Range is not implemented yet\n");
+ pr_warn("NVRAM ERST Log Address Range not implemented yet.\n");
}
static int __erst_write_to_nvram(const struct cper_record_header *record)
@@ -624,17 +793,17 @@ int erst_write(const struct cper_record_header *record)
return -EINVAL;
if (erst_erange.attr & ERST_RANGE_NVRAM) {
- if (!spin_trylock_irqsave(&erst_lock, flags))
+ if (!raw_spin_trylock_irqsave(&erst_lock, flags))
return -EBUSY;
rc = __erst_write_to_nvram(record);
- spin_unlock_irqrestore(&erst_lock, flags);
+ raw_spin_unlock_irqrestore(&erst_lock, flags);
return rc;
}
if (record->record_length > erst_erange.size)
return -EINVAL;
- if (!spin_trylock_irqsave(&erst_lock, flags))
+ if (!raw_spin_trylock_irqsave(&erst_lock, flags))
return -EBUSY;
memcpy(erst_erange.vaddr, record, record->record_length);
rcd_erange = erst_erange.vaddr;
@@ -642,7 +811,7 @@ int erst_write(const struct cper_record_header *record)
memcpy(&rcd_erange->persistence_information, "ER", 2);
rc = __erst_write_to_storage(0);
- spin_unlock_irqrestore(&erst_lock, flags);
+ raw_spin_unlock_irqrestore(&erst_lock, flags);
return rc;
}
@@ -696,63 +865,41 @@ ssize_t erst_read(u64 record_id, struct cper_record_header *record,
if (erst_disable)
return -ENODEV;
- spin_lock_irqsave(&erst_lock, flags);
+ raw_spin_lock_irqsave(&erst_lock, flags);
len = __erst_read(record_id, record, buflen);
- spin_unlock_irqrestore(&erst_lock, flags);
+ raw_spin_unlock_irqrestore(&erst_lock, flags);
return len;
}
EXPORT_SYMBOL_GPL(erst_read);
-/*
- * If return value > buflen, the buffer size is not big enough,
- * else if return value = 0, there is no more record to read,
- * else if return value < 0, something goes wrong,
- * else everything is OK, and return value is record length
- */
-ssize_t erst_read_next(struct cper_record_header *record, size_t buflen)
-{
- int rc;
- ssize_t len;
- unsigned long flags;
- u64 record_id;
-
- if (erst_disable)
- return -ENODEV;
-
- spin_lock_irqsave(&erst_lock, flags);
- rc = __erst_get_next_record_id(&record_id);
- if (rc) {
- spin_unlock_irqrestore(&erst_lock, flags);
- return rc;
- }
- /* no more record */
- if (record_id == APEI_ERST_INVALID_RECORD_ID) {
- spin_unlock_irqrestore(&erst_lock, flags);
- return 0;
- }
-
- len = __erst_read(record_id, record, buflen);
- spin_unlock_irqrestore(&erst_lock, flags);
-
- return len;
-}
-EXPORT_SYMBOL_GPL(erst_read_next);
-
int erst_clear(u64 record_id)
{
- int rc;
+ int rc, i;
unsigned long flags;
+ u64 *entries;
if (erst_disable)
return -ENODEV;
- spin_lock_irqsave(&erst_lock, flags);
+ rc = mutex_lock_interruptible(&erst_record_id_cache.lock);
+ if (rc)
+ return rc;
+ raw_spin_lock_irqsave(&erst_lock, flags);
if (erst_erange.attr & ERST_RANGE_NVRAM)
rc = __erst_clear_from_nvram(record_id);
else
rc = __erst_clear_from_storage(record_id);
- spin_unlock_irqrestore(&erst_lock, flags);
-
+ raw_spin_unlock_irqrestore(&erst_lock, flags);
+ if (rc)
+ goto out;
+ entries = erst_record_id_cache.entries;
+ for (i = 0; i < erst_record_id_cache.len; i++) {
+ if (entries[i] == record_id)
+ entries[i] = APEI_ERST_INVALID_RECORD_ID;
+ }
+ __erst_record_id_cache_compact();
+out:
+ mutex_unlock(&erst_record_id_cache.lock);
return rc;
}
EXPORT_SYMBOL_GPL(erst_clear);
@@ -769,7 +916,7 @@ static int erst_check_table(struct acpi_table_erst *erst_tab)
{
if ((erst_tab->header_length !=
(sizeof(struct acpi_table_erst) - sizeof(erst_tab->header)))
- && (erst_tab->header_length != sizeof(struct acpi_table_einj)))
+ && (erst_tab->header_length != sizeof(struct acpi_table_erst)))
return -EINVAL;
if (erst_tab->header.length < sizeof(struct acpi_table_erst))
return -EINVAL;
@@ -781,6 +928,196 @@ static int erst_check_table(struct acpi_table_erst *erst_tab)
return 0;
}
+static int erst_open_pstore(struct pstore_info *psi);
+static int erst_close_pstore(struct pstore_info *psi);
+static ssize_t erst_reader(u64 *id, enum pstore_type_id *type, int *count,
+ struct timespec *time, char **buf,
+ bool *compressed, struct pstore_info *psi);
+static int erst_writer(enum pstore_type_id type, enum kmsg_dump_reason reason,
+ u64 *id, unsigned int part, int count, bool compressed,
+ size_t size, struct pstore_info *psi);
+static int erst_clearer(enum pstore_type_id type, u64 id, int count,
+ struct timespec time, struct pstore_info *psi);
+
+static struct pstore_info erst_info = {
+ .owner = THIS_MODULE,
+ .name = "erst",
+ .flags = PSTORE_FLAGS_FRAGILE,
+ .open = erst_open_pstore,
+ .close = erst_close_pstore,
+ .read = erst_reader,
+ .write = erst_writer,
+ .erase = erst_clearer
+};
+
+#define CPER_CREATOR_PSTORE \
+ UUID_LE(0x75a574e3, 0x5052, 0x4b29, 0x8a, 0x8e, 0xbe, 0x2c, \
+ 0x64, 0x90, 0xb8, 0x9d)
+#define CPER_SECTION_TYPE_DMESG \
+ UUID_LE(0xc197e04e, 0xd545, 0x4a70, 0x9c, 0x17, 0xa5, 0x54, \
+ 0x94, 0x19, 0xeb, 0x12)
+#define CPER_SECTION_TYPE_DMESG_Z \
+ UUID_LE(0x4f118707, 0x04dd, 0x4055, 0xb5, 0xdd, 0x95, 0x6d, \
+ 0x34, 0xdd, 0xfa, 0xc6)
+#define CPER_SECTION_TYPE_MCE \
+ UUID_LE(0xfe08ffbe, 0x95e4, 0x4be7, 0xbc, 0x73, 0x40, 0x96, \
+ 0x04, 0x4a, 0x38, 0xfc)
+
+struct cper_pstore_record {
+ struct cper_record_header hdr;
+ struct cper_section_descriptor sec_hdr;
+ char data[];
+} __packed;
+
+static int reader_pos;
+
+static int erst_open_pstore(struct pstore_info *psi)
+{
+ int rc;
+
+ if (erst_disable)
+ return -ENODEV;
+
+ rc = erst_get_record_id_begin(&reader_pos);
+
+ return rc;
+}
+
+static int erst_close_pstore(struct pstore_info *psi)
+{
+ erst_get_record_id_end();
+
+ return 0;
+}
+
+static ssize_t erst_reader(u64 *id, enum pstore_type_id *type, int *count,
+ struct timespec *time, char **buf,
+ bool *compressed, struct pstore_info *psi)
+{
+ int rc;
+ ssize_t len = 0;
+ u64 record_id;
+ struct cper_pstore_record *rcd;
+ size_t rcd_len = sizeof(*rcd) + erst_info.bufsize;
+
+ if (erst_disable)
+ return -ENODEV;
+
+ rcd = kmalloc(rcd_len, GFP_KERNEL);
+ if (!rcd) {
+ rc = -ENOMEM;
+ goto out;
+ }
+skip:
+ rc = erst_get_record_id_next(&reader_pos, &record_id);
+ if (rc)
+ goto out;
+
+ /* no more record */
+ if (record_id == APEI_ERST_INVALID_RECORD_ID) {
+ rc = -EINVAL;
+ goto out;
+ }
+
+ len = erst_read(record_id, &rcd->hdr, rcd_len);
+ /* The record may be cleared by others, try read next record */
+ if (len == -ENOENT)
+ goto skip;
+ else if (len < sizeof(*rcd)) {
+ rc = -EIO;
+ goto out;
+ }
+ if (uuid_le_cmp(rcd->hdr.creator_id, CPER_CREATOR_PSTORE) != 0)
+ goto skip;
+
+ *buf = kmalloc(len, GFP_KERNEL);
+ if (*buf == NULL) {
+ rc = -ENOMEM;
+ goto out;
+ }
+ memcpy(*buf, rcd->data, len - sizeof(*rcd));
+ *id = record_id;
+ *compressed = false;
+ if (uuid_le_cmp(rcd->sec_hdr.section_type,
+ CPER_SECTION_TYPE_DMESG_Z) == 0) {
+ *type = PSTORE_TYPE_DMESG;
+ *compressed = true;
+ } else if (uuid_le_cmp(rcd->sec_hdr.section_type,
+ CPER_SECTION_TYPE_DMESG) == 0)
+ *type = PSTORE_TYPE_DMESG;
+ else if (uuid_le_cmp(rcd->sec_hdr.section_type,
+ CPER_SECTION_TYPE_MCE) == 0)
+ *type = PSTORE_TYPE_MCE;
+ else
+ *type = PSTORE_TYPE_UNKNOWN;
+
+ if (rcd->hdr.validation_bits & CPER_VALID_TIMESTAMP)
+ time->tv_sec = rcd->hdr.timestamp;
+ else
+ time->tv_sec = 0;
+ time->tv_nsec = 0;
+
+out:
+ kfree(rcd);
+ return (rc < 0) ? rc : (len - sizeof(*rcd));
+}
+
+static int erst_writer(enum pstore_type_id type, enum kmsg_dump_reason reason,
+ u64 *id, unsigned int part, int count, bool compressed,
+ size_t size, struct pstore_info *psi)
+{
+ struct cper_pstore_record *rcd = (struct cper_pstore_record *)
+ (erst_info.buf - sizeof(*rcd));
+ int ret;
+
+ memset(rcd, 0, sizeof(*rcd));
+ memcpy(rcd->hdr.signature, CPER_SIG_RECORD, CPER_SIG_SIZE);
+ rcd->hdr.revision = CPER_RECORD_REV;
+ rcd->hdr.signature_end = CPER_SIG_END;
+ rcd->hdr.section_count = 1;
+ rcd->hdr.error_severity = CPER_SEV_FATAL;
+ /* timestamp valid. platform_id, partition_id are invalid */
+ rcd->hdr.validation_bits = CPER_VALID_TIMESTAMP;
+ rcd->hdr.timestamp = get_seconds();
+ rcd->hdr.record_length = sizeof(*rcd) + size;
+ rcd->hdr.creator_id = CPER_CREATOR_PSTORE;
+ rcd->hdr.notification_type = CPER_NOTIFY_MCE;
+ rcd->hdr.record_id = cper_next_record_id();
+ rcd->hdr.flags = CPER_HW_ERROR_FLAGS_PREVERR;
+
+ rcd->sec_hdr.section_offset = sizeof(*rcd);
+ rcd->sec_hdr.section_length = size;
+ rcd->sec_hdr.revision = CPER_SEC_REV;
+ /* fru_id and fru_text is invalid */
+ rcd->sec_hdr.validation_bits = 0;
+ rcd->sec_hdr.flags = CPER_SEC_PRIMARY;
+ switch (type) {
+ case PSTORE_TYPE_DMESG:
+ if (compressed)
+ rcd->sec_hdr.section_type = CPER_SECTION_TYPE_DMESG_Z;
+ else
+ rcd->sec_hdr.section_type = CPER_SECTION_TYPE_DMESG;
+ break;
+ case PSTORE_TYPE_MCE:
+ rcd->sec_hdr.section_type = CPER_SECTION_TYPE_MCE;
+ break;
+ default:
+ return -EINVAL;
+ }
+ rcd->sec_hdr.section_severity = CPER_SEV_FATAL;
+
+ ret = erst_write(&rcd->hdr);
+ *id = rcd->hdr.record_id;
+
+ return ret;
+}
+
+static int erst_clearer(enum pstore_type_id type, u64 id, int count,
+ struct timespec time, struct pstore_info *psi)
+{
+ return erst_clear(id);
+}
+
static int __init erst_init(void)
{
int rc = 0;
@@ -788,31 +1125,31 @@ static int __init erst_init(void)
struct apei_exec_context ctx;
struct apei_resources erst_resources;
struct resource *r;
+ char *buf;
if (acpi_disabled)
goto err;
if (erst_disable) {
- pr_info(ERST_PFX
+ pr_info(
"Error Record Serialization Table (ERST) support is disabled.\n");
goto err;
}
status = acpi_get_table(ACPI_SIG_ERST, 0,
(struct acpi_table_header **)&erst_tab);
- if (status == AE_NOT_FOUND) {
- pr_info(ERST_PFX "Table is not found!\n");
+ if (status == AE_NOT_FOUND)
goto err;
- } else if (ACPI_FAILURE(status)) {
+ else if (ACPI_FAILURE(status)) {
const char *msg = acpi_format_exception(status);
- pr_err(ERST_PFX "Failed to get table, %s\n", msg);
+ pr_err("Failed to get table, %s\n", msg);
rc = -EINVAL;
goto err;
}
rc = erst_check_table(erst_tab);
if (rc) {
- pr_err(FW_BUG ERST_PFX "ERST table is invalid\n");
+ pr_err(FW_BUG "ERST table is invalid.\n");
goto err;
}
@@ -830,21 +1167,19 @@ static int __init erst_init(void)
rc = erst_get_erange(&erst_erange);
if (rc) {
if (rc == -ENODEV)
- pr_info(ERST_PFX
+ pr_info(
"The corresponding hardware device or firmware implementation "
"is not available.\n");
else
- pr_err(ERST_PFX
- "Failed to get Error Log Address Range.\n");
+ pr_err("Failed to get Error Log Address Range.\n");
goto err_unmap_reg;
}
r = request_mem_region(erst_erange.base, erst_erange.size, "APEI ERST");
if (!r) {
- pr_err(ERST_PFX
- "Can not request iomem region <0x%16llx-0x%16llx> for ERST.\n",
- (unsigned long long)erst_erange.base,
- (unsigned long long)erst_erange.base + erst_erange.size);
+ pr_err("Can not request [mem %#010llx-%#010llx] for ERST.\n",
+ (unsigned long long)erst_erange.base,
+ (unsigned long long)erst_erange.base + erst_erange.size - 1);
rc = -EIO;
goto err_unmap_reg;
}
@@ -854,9 +1189,29 @@ static int __init erst_init(void)
if (!erst_erange.vaddr)
goto err_release_erange;
- pr_info(ERST_PFX
+ pr_info(
"Error Record Serialization Table (ERST) support is initialized.\n");
+ buf = kmalloc(erst_erange.size, GFP_KERNEL);
+ spin_lock_init(&erst_info.buf_lock);
+ if (buf) {
+ erst_info.buf = buf + sizeof(struct cper_pstore_record);
+ erst_info.bufsize = erst_erange.size -
+ sizeof(struct cper_pstore_record);
+ rc = pstore_register(&erst_info);
+ if (rc) {
+ if (rc != -EPERM)
+ pr_info(
+ "Could not register with persistent store.\n");
+ erst_info.buf = NULL;
+ erst_info.bufsize = 0;
+ kfree(buf);
+ }
+ } else
+ pr_err(
+ "Failed to allocate %lld bytes for persistent store error log.\n",
+ erst_erange.size);
+
return 0;
err_release_erange:
diff --git a/drivers/acpi/apei/ghes.c b/drivers/acpi/apei/ghes.c
index 0d505e59214..dab7cb7349d 100644
--- a/drivers/acpi/apei/ghes.c
+++ b/drivers/acpi/apei/ghes.c
@@ -12,11 +12,7 @@
* For more information about Generic Hardware Error Source, please
* refer to ACPI Specification version 4.0, section 17.3.2.6
*
- * Now, only SCI notification type and memory errors are
- * supported. More notification type and hardware error type will be
- * added later.
- *
- * Copyright 2010 Intel Corp.
+ * Copyright 2010,2011 Intel Corp.
* Author: Huang Ying <ying.huang@intel.com>
*
* This program is free software; you can redistribute it and/or
@@ -39,58 +35,225 @@
#include <linux/acpi.h>
#include <linux/io.h>
#include <linux/interrupt.h>
+#include <linux/timer.h>
#include <linux/cper.h>
#include <linux/kdebug.h>
#include <linux/platform_device.h>
#include <linux/mutex.h>
-#include <acpi/apei.h>
-#include <acpi/atomicio.h>
-#include <acpi/hed.h>
+#include <linux/ratelimit.h>
+#include <linux/vmalloc.h>
+#include <linux/irq_work.h>
+#include <linux/llist.h>
+#include <linux/genalloc.h>
+#include <linux/pci.h>
+#include <linux/aer.h>
+
+#include <acpi/ghes.h>
#include <asm/mce.h>
+#include <asm/tlbflush.h>
+#include <asm/nmi.h>
#include "apei-internal.h"
#define GHES_PFX "GHES: "
#define GHES_ESTATUS_MAX_SIZE 65536
+#define GHES_ESOURCE_PREALLOC_MAX_SIZE 65536
-/*
- * One struct ghes is created for each generic hardware error
- * source.
- *
- * It provides the context for APEI hardware error timer/IRQ/SCI/NMI
- * handler. Handler for one generic hardware error source is only
- * triggered after the previous one is done. So handler can uses
- * struct ghes without locking.
- *
- * estatus: memory buffer for error status block, allocated during
- * HEST parsing.
- */
-#define GHES_TO_CLEAR 0x0001
+#define GHES_ESTATUS_POOL_MIN_ALLOC_ORDER 3
-struct ghes {
- struct acpi_hest_generic *generic;
- struct acpi_hest_generic_status *estatus;
- struct list_head list;
- u64 buffer_paddr;
- unsigned long flags;
-};
+/* This is just an estimation for memory pool allocation */
+#define GHES_ESTATUS_CACHE_AVG_SIZE 512
+
+#define GHES_ESTATUS_CACHES_SIZE 4
+
+#define GHES_ESTATUS_IN_CACHE_MAX_NSEC 10000000000ULL
+/* Prevent too many caches are allocated because of RCU */
+#define GHES_ESTATUS_CACHE_ALLOCED_MAX (GHES_ESTATUS_CACHES_SIZE * 3 / 2)
+
+#define GHES_ESTATUS_CACHE_LEN(estatus_len) \
+ (sizeof(struct ghes_estatus_cache) + (estatus_len))
+#define GHES_ESTATUS_FROM_CACHE(estatus_cache) \
+ ((struct acpi_generic_status *) \
+ ((struct ghes_estatus_cache *)(estatus_cache) + 1))
+
+#define GHES_ESTATUS_NODE_LEN(estatus_len) \
+ (sizeof(struct ghes_estatus_node) + (estatus_len))
+#define GHES_ESTATUS_FROM_NODE(estatus_node) \
+ ((struct acpi_generic_status *) \
+ ((struct ghes_estatus_node *)(estatus_node) + 1))
+
+bool ghes_disable;
+module_param_named(disable, ghes_disable, bool, 0);
+
+static int ghes_panic_timeout __read_mostly = 30;
/*
- * Error source lists, one list for each notification method. The
- * members in lists are struct ghes.
- *
- * The list members are only added in HEST parsing and deleted during
- * module_exit, that is, single-threaded. So no lock is needed for
- * that.
+ * All error sources notified with SCI shares one notifier function,
+ * so they need to be linked and checked one by one. This is applied
+ * to NMI too.
*
- * But the mutual exclusion is needed between members adding/deleting
- * and timer/IRQ/SCI/NMI handler, which may traverse the list. RCU is
- * used for that.
+ * RCU is used for these lists, so ghes_list_mutex is only used for
+ * list changing, not for traversing.
*/
static LIST_HEAD(ghes_sci);
+static LIST_HEAD(ghes_nmi);
static DEFINE_MUTEX(ghes_list_mutex);
+/*
+ * NMI may be triggered on any CPU, so ghes_nmi_lock is used for
+ * mutual exclusion.
+ */
+static DEFINE_RAW_SPINLOCK(ghes_nmi_lock);
+
+/*
+ * Because the memory area used to transfer hardware error information
+ * from BIOS to Linux can be determined only in NMI, IRQ or timer
+ * handler, but general ioremap can not be used in atomic context, so
+ * a special version of atomic ioremap is implemented for that.
+ */
+
+/*
+ * Two virtual pages are used, one for NMI context, the other for
+ * IRQ/PROCESS context
+ */
+#define GHES_IOREMAP_PAGES 2
+#define GHES_IOREMAP_NMI_PAGE(base) (base)
+#define GHES_IOREMAP_IRQ_PAGE(base) ((base) + PAGE_SIZE)
+
+/* virtual memory area for atomic ioremap */
+static struct vm_struct *ghes_ioremap_area;
+/*
+ * These 2 spinlock is used to prevent atomic ioremap virtual memory
+ * area from being mapped simultaneously.
+ */
+static DEFINE_RAW_SPINLOCK(ghes_ioremap_lock_nmi);
+static DEFINE_SPINLOCK(ghes_ioremap_lock_irq);
+
+/*
+ * printk is not safe in NMI context. So in NMI handler, we allocate
+ * required memory from lock-less memory allocator
+ * (ghes_estatus_pool), save estatus into it, put them into lock-less
+ * list (ghes_estatus_llist), then delay printk into IRQ context via
+ * irq_work (ghes_proc_irq_work). ghes_estatus_size_request record
+ * required pool size by all NMI error source.
+ */
+static struct gen_pool *ghes_estatus_pool;
+static unsigned long ghes_estatus_pool_size_request;
+static struct llist_head ghes_estatus_llist;
+static struct irq_work ghes_proc_irq_work;
+
+struct ghes_estatus_cache *ghes_estatus_caches[GHES_ESTATUS_CACHES_SIZE];
+static atomic_t ghes_estatus_cache_alloced;
+
+static int ghes_ioremap_init(void)
+{
+ ghes_ioremap_area = __get_vm_area(PAGE_SIZE * GHES_IOREMAP_PAGES,
+ VM_IOREMAP, VMALLOC_START, VMALLOC_END);
+ if (!ghes_ioremap_area) {
+ pr_err(GHES_PFX "Failed to allocate virtual memory area for atomic ioremap.\n");
+ return -ENOMEM;
+ }
+
+ return 0;
+}
+
+static void ghes_ioremap_exit(void)
+{
+ free_vm_area(ghes_ioremap_area);
+}
+
+static void __iomem *ghes_ioremap_pfn_nmi(u64 pfn)
+{
+ unsigned long vaddr;
+
+ vaddr = (unsigned long)GHES_IOREMAP_NMI_PAGE(ghes_ioremap_area->addr);
+ ioremap_page_range(vaddr, vaddr + PAGE_SIZE,
+ pfn << PAGE_SHIFT, PAGE_KERNEL);
+
+ return (void __iomem *)vaddr;
+}
+
+static void __iomem *ghes_ioremap_pfn_irq(u64 pfn)
+{
+ unsigned long vaddr;
+
+ vaddr = (unsigned long)GHES_IOREMAP_IRQ_PAGE(ghes_ioremap_area->addr);
+ ioremap_page_range(vaddr, vaddr + PAGE_SIZE,
+ pfn << PAGE_SHIFT, PAGE_KERNEL);
+
+ return (void __iomem *)vaddr;
+}
+
+static void ghes_iounmap_nmi(void __iomem *vaddr_ptr)
+{
+ unsigned long vaddr = (unsigned long __force)vaddr_ptr;
+ void *base = ghes_ioremap_area->addr;
+
+ BUG_ON(vaddr != (unsigned long)GHES_IOREMAP_NMI_PAGE(base));
+ unmap_kernel_range_noflush(vaddr, PAGE_SIZE);
+ __flush_tlb_one(vaddr);
+}
+
+static void ghes_iounmap_irq(void __iomem *vaddr_ptr)
+{
+ unsigned long vaddr = (unsigned long __force)vaddr_ptr;
+ void *base = ghes_ioremap_area->addr;
+
+ BUG_ON(vaddr != (unsigned long)GHES_IOREMAP_IRQ_PAGE(base));
+ unmap_kernel_range_noflush(vaddr, PAGE_SIZE);
+ __flush_tlb_one(vaddr);
+}
+
+static int ghes_estatus_pool_init(void)
+{
+ ghes_estatus_pool = gen_pool_create(GHES_ESTATUS_POOL_MIN_ALLOC_ORDER, -1);
+ if (!ghes_estatus_pool)
+ return -ENOMEM;
+ return 0;
+}
+
+static void ghes_estatus_pool_free_chunk_page(struct gen_pool *pool,
+ struct gen_pool_chunk *chunk,
+ void *data)
+{
+ free_page(chunk->start_addr);
+}
+
+static void ghes_estatus_pool_exit(void)
+{
+ gen_pool_for_each_chunk(ghes_estatus_pool,
+ ghes_estatus_pool_free_chunk_page, NULL);
+ gen_pool_destroy(ghes_estatus_pool);
+}
+
+static int ghes_estatus_pool_expand(unsigned long len)
+{
+ unsigned long i, pages, size, addr;
+ int ret;
+
+ ghes_estatus_pool_size_request += PAGE_ALIGN(len);
+ size = gen_pool_size(ghes_estatus_pool);
+ if (size >= ghes_estatus_pool_size_request)
+ return 0;
+ pages = (ghes_estatus_pool_size_request - size) / PAGE_SIZE;
+ for (i = 0; i < pages; i++) {
+ addr = __get_free_page(GFP_KERNEL);
+ if (!addr)
+ return -ENOMEM;
+ ret = gen_pool_add(ghes_estatus_pool, addr, PAGE_SIZE, -1);
+ if (ret)
+ return ret;
+ }
+
+ return 0;
+}
+
+static void ghes_estatus_pool_shrink(unsigned long len)
+{
+ ghes_estatus_pool_size_request -= PAGE_ALIGN(len);
+}
+
static struct ghes *ghes_new(struct acpi_hest_generic *generic)
{
struct ghes *ghes;
@@ -101,8 +264,7 @@ static struct ghes *ghes_new(struct acpi_hest_generic *generic)
if (!ghes)
return ERR_PTR(-ENOMEM);
ghes->generic = generic;
- INIT_LIST_HEAD(&ghes->list);
- rc = acpi_pre_map_gar(&generic->error_status_address);
+ rc = apei_map_generic_address(&generic->error_status_address);
if (rc)
goto err_free;
error_block_length = generic->error_block_length;
@@ -122,7 +284,7 @@ static struct ghes *ghes_new(struct acpi_hest_generic *generic)
return ghes;
err_unmap:
- acpi_post_unmap_gar(&generic->error_status_address);
+ apei_unmap_generic_address(&generic->error_status_address);
err_free:
kfree(ghes);
return ERR_PTR(rc);
@@ -131,16 +293,9 @@ err_free:
static void ghes_fini(struct ghes *ghes)
{
kfree(ghes->estatus);
- acpi_post_unmap_gar(&ghes->generic->error_status_address);
+ apei_unmap_generic_address(&ghes->generic->error_status_address);
}
-enum {
- GHES_SEV_NO = 0x0,
- GHES_SEV_CORRECTED = 0x1,
- GHES_SEV_RECOVERABLE = 0x2,
- GHES_SEV_PANIC = 0x3,
-};
-
static inline int ghes_severity(int severity)
{
switch (severity) {
@@ -153,27 +308,46 @@ static inline int ghes_severity(int severity)
case CPER_SEV_FATAL:
return GHES_SEV_PANIC;
default:
- /* Unkown, go panic */
+ /* Unknown, go panic */
return GHES_SEV_PANIC;
}
}
-/* SCI handler run in work queue, so ioremap can be used here */
-static int ghes_copy_tofrom_phys(void *buffer, u64 paddr, u32 len,
- int from_phys)
+static void ghes_copy_tofrom_phys(void *buffer, u64 paddr, u32 len,
+ int from_phys)
{
- void *vaddr;
+ void __iomem *vaddr;
+ unsigned long flags = 0;
+ int in_nmi = in_nmi();
+ u64 offset;
+ u32 trunk;
- vaddr = ioremap_cache(paddr, len);
- if (!vaddr)
- return -ENOMEM;
- if (from_phys)
- memcpy(buffer, vaddr, len);
- else
- memcpy(vaddr, buffer, len);
- iounmap(vaddr);
-
- return 0;
+ while (len > 0) {
+ offset = paddr - (paddr & PAGE_MASK);
+ if (in_nmi) {
+ raw_spin_lock(&ghes_ioremap_lock_nmi);
+ vaddr = ghes_ioremap_pfn_nmi(paddr >> PAGE_SHIFT);
+ } else {
+ spin_lock_irqsave(&ghes_ioremap_lock_irq, flags);
+ vaddr = ghes_ioremap_pfn_irq(paddr >> PAGE_SHIFT);
+ }
+ trunk = PAGE_SIZE - offset;
+ trunk = min(trunk, len);
+ if (from_phys)
+ memcpy_fromio(buffer, vaddr + offset, trunk);
+ else
+ memcpy_toio(vaddr + offset, buffer, trunk);
+ len -= trunk;
+ paddr += trunk;
+ buffer += trunk;
+ if (in_nmi) {
+ ghes_iounmap_nmi(vaddr);
+ raw_spin_unlock(&ghes_ioremap_lock_nmi);
+ } else {
+ ghes_iounmap_irq(vaddr);
+ spin_unlock_irqrestore(&ghes_ioremap_lock_irq, flags);
+ }
+ }
}
static int ghes_read_estatus(struct ghes *ghes, int silent)
@@ -183,7 +357,7 @@ static int ghes_read_estatus(struct ghes *ghes, int silent)
u32 len;
int rc;
- rc = acpi_atomic_read(&buf_paddr, &g->error_status_address);
+ rc = apei_read(&buf_paddr, &g->error_status_address);
if (rc) {
if (!silent && printk_ratelimit())
pr_warning(FW_WARN GHES_PFX
@@ -194,10 +368,8 @@ static int ghes_read_estatus(struct ghes *ghes, int silent)
if (!buf_paddr)
return -ENOENT;
- rc = ghes_copy_tofrom_phys(ghes->estatus, buf_paddr,
- sizeof(*ghes->estatus), 1);
- if (rc)
- return rc;
+ ghes_copy_tofrom_phys(ghes->estatus, buf_paddr,
+ sizeof(*ghes->estatus), 1);
if (!ghes->estatus->block_status)
return -ENOENT;
@@ -205,24 +377,22 @@ static int ghes_read_estatus(struct ghes *ghes, int silent)
ghes->flags |= GHES_TO_CLEAR;
rc = -EIO;
- len = apei_estatus_len(ghes->estatus);
+ len = cper_estatus_len(ghes->estatus);
if (len < sizeof(*ghes->estatus))
goto err_read_block;
if (len > ghes->generic->error_block_length)
goto err_read_block;
- if (apei_estatus_check_header(ghes->estatus))
+ if (cper_estatus_check_header(ghes->estatus))
goto err_read_block;
- rc = ghes_copy_tofrom_phys(ghes->estatus + 1,
- buf_paddr + sizeof(*ghes->estatus),
- len - sizeof(*ghes->estatus), 1);
- if (rc)
- return rc;
- if (apei_estatus_check(ghes->estatus))
+ ghes_copy_tofrom_phys(ghes->estatus + 1,
+ buf_paddr + sizeof(*ghes->estatus),
+ len - sizeof(*ghes->estatus), 1);
+ if (cper_estatus_check(ghes->estatus))
goto err_read_block;
rc = 0;
err_read_block:
- if (rc && !silent)
+ if (rc && !silent && printk_ratelimit())
pr_warning(FW_WARN GHES_PFX
"Failed to read error status block!\n");
return rc;
@@ -238,28 +408,261 @@ static void ghes_clear_estatus(struct ghes *ghes)
ghes->flags &= ~GHES_TO_CLEAR;
}
-static void ghes_do_proc(struct ghes *ghes)
+static void ghes_handle_memory_failure(struct acpi_generic_data *gdata, int sev)
{
- int sev, processed = 0;
- struct acpi_hest_generic_data *gdata;
+#ifdef CONFIG_ACPI_APEI_MEMORY_FAILURE
+ unsigned long pfn;
+ int flags = -1;
+ int sec_sev = ghes_severity(gdata->error_severity);
+ struct cper_sec_mem_err *mem_err;
+ mem_err = (struct cper_sec_mem_err *)(gdata + 1);
- sev = ghes_severity(ghes->estatus->error_severity);
- apei_estatus_for_each_section(ghes->estatus, gdata) {
-#ifdef CONFIG_X86_MCE
+ if (!(mem_err->validation_bits & CPER_MEM_VALID_PA))
+ return;
+
+ pfn = mem_err->physical_addr >> PAGE_SHIFT;
+ if (!pfn_valid(pfn)) {
+ pr_warn_ratelimited(FW_WARN GHES_PFX
+ "Invalid address in generic error data: %#llx\n",
+ mem_err->physical_addr);
+ return;
+ }
+
+ /* iff following two events can be handled properly by now */
+ if (sec_sev == GHES_SEV_CORRECTED &&
+ (gdata->flags & CPER_SEC_ERROR_THRESHOLD_EXCEEDED))
+ flags = MF_SOFT_OFFLINE;
+ if (sev == GHES_SEV_RECOVERABLE && sec_sev == GHES_SEV_RECOVERABLE)
+ flags = 0;
+
+ if (flags != -1)
+ memory_failure_queue(pfn, 0, flags);
+#endif
+}
+
+static void ghes_do_proc(struct ghes *ghes,
+ const struct acpi_generic_status *estatus)
+{
+ int sev, sec_sev;
+ struct acpi_generic_data *gdata;
+
+ sev = ghes_severity(estatus->error_severity);
+ apei_estatus_for_each_section(estatus, gdata) {
+ sec_sev = ghes_severity(gdata->error_severity);
if (!uuid_le_cmp(*(uuid_le *)gdata->section_type,
CPER_SEC_PLATFORM_MEM)) {
- apei_mce_report_mem_error(
- sev == GHES_SEV_CORRECTED,
- (struct cper_sec_mem_err *)(gdata+1));
- processed = 1;
+ struct cper_sec_mem_err *mem_err;
+ mem_err = (struct cper_sec_mem_err *)(gdata+1);
+ ghes_edac_report_mem_error(ghes, sev, mem_err);
+
+#ifdef CONFIG_X86_MCE
+ apei_mce_report_mem_error(sev, mem_err);
+#endif
+ ghes_handle_memory_failure(gdata, sev);
+ }
+#ifdef CONFIG_ACPI_APEI_PCIEAER
+ else if (!uuid_le_cmp(*(uuid_le *)gdata->section_type,
+ CPER_SEC_PCIE)) {
+ struct cper_sec_pcie *pcie_err;
+ pcie_err = (struct cper_sec_pcie *)(gdata+1);
+ if (sev == GHES_SEV_RECOVERABLE &&
+ sec_sev == GHES_SEV_RECOVERABLE &&
+ pcie_err->validation_bits & CPER_PCIE_VALID_DEVICE_ID &&
+ pcie_err->validation_bits & CPER_PCIE_VALID_AER_INFO) {
+ unsigned int devfn;
+ int aer_severity;
+
+ devfn = PCI_DEVFN(pcie_err->device_id.device,
+ pcie_err->device_id.function);
+ aer_severity = cper_severity_to_aer(sev);
+
+ /*
+ * If firmware reset the component to contain
+ * the error, we must reinitialize it before
+ * use, so treat it as a fatal AER error.
+ */
+ if (gdata->flags & CPER_SEC_RESET)
+ aer_severity = AER_FATAL;
+
+ aer_recover_queue(pcie_err->device_id.segment,
+ pcie_err->device_id.bus,
+ devfn, aer_severity,
+ (struct aer_capability_regs *)
+ pcie_err->aer_info);
+ }
+
}
#endif
}
+}
+
+static void __ghes_print_estatus(const char *pfx,
+ const struct acpi_hest_generic *generic,
+ const struct acpi_generic_status *estatus)
+{
+ static atomic_t seqno;
+ unsigned int curr_seqno;
+ char pfx_seq[64];
+
+ if (pfx == NULL) {
+ if (ghes_severity(estatus->error_severity) <=
+ GHES_SEV_CORRECTED)
+ pfx = KERN_WARNING;
+ else
+ pfx = KERN_ERR;
+ }
+ curr_seqno = atomic_inc_return(&seqno);
+ snprintf(pfx_seq, sizeof(pfx_seq), "%s{%u}" HW_ERR, pfx, curr_seqno);
+ printk("%s""Hardware error from APEI Generic Hardware Error Source: %d\n",
+ pfx_seq, generic->header.source_id);
+ cper_estatus_print(pfx_seq, estatus);
+}
+
+static int ghes_print_estatus(const char *pfx,
+ const struct acpi_hest_generic *generic,
+ const struct acpi_generic_status *estatus)
+{
+ /* Not more than 2 messages every 5 seconds */
+ static DEFINE_RATELIMIT_STATE(ratelimit_corrected, 5*HZ, 2);
+ static DEFINE_RATELIMIT_STATE(ratelimit_uncorrected, 5*HZ, 2);
+ struct ratelimit_state *ratelimit;
+
+ if (ghes_severity(estatus->error_severity) <= GHES_SEV_CORRECTED)
+ ratelimit = &ratelimit_corrected;
+ else
+ ratelimit = &ratelimit_uncorrected;
+ if (__ratelimit(ratelimit)) {
+ __ghes_print_estatus(pfx, generic, estatus);
+ return 1;
+ }
+ return 0;
+}
+
+/*
+ * GHES error status reporting throttle, to report more kinds of
+ * errors, instead of just most frequently occurred errors.
+ */
+static int ghes_estatus_cached(struct acpi_generic_status *estatus)
+{
+ u32 len;
+ int i, cached = 0;
+ unsigned long long now;
+ struct ghes_estatus_cache *cache;
+ struct acpi_generic_status *cache_estatus;
+
+ len = cper_estatus_len(estatus);
+ rcu_read_lock();
+ for (i = 0; i < GHES_ESTATUS_CACHES_SIZE; i++) {
+ cache = rcu_dereference(ghes_estatus_caches[i]);
+ if (cache == NULL)
+ continue;
+ if (len != cache->estatus_len)
+ continue;
+ cache_estatus = GHES_ESTATUS_FROM_CACHE(cache);
+ if (memcmp(estatus, cache_estatus, len))
+ continue;
+ atomic_inc(&cache->count);
+ now = sched_clock();
+ if (now - cache->time_in < GHES_ESTATUS_IN_CACHE_MAX_NSEC)
+ cached = 1;
+ break;
+ }
+ rcu_read_unlock();
+ return cached;
+}
+
+static struct ghes_estatus_cache *ghes_estatus_cache_alloc(
+ struct acpi_hest_generic *generic,
+ struct acpi_generic_status *estatus)
+{
+ int alloced;
+ u32 len, cache_len;
+ struct ghes_estatus_cache *cache;
+ struct acpi_generic_status *cache_estatus;
+
+ alloced = atomic_add_return(1, &ghes_estatus_cache_alloced);
+ if (alloced > GHES_ESTATUS_CACHE_ALLOCED_MAX) {
+ atomic_dec(&ghes_estatus_cache_alloced);
+ return NULL;
+ }
+ len = cper_estatus_len(estatus);
+ cache_len = GHES_ESTATUS_CACHE_LEN(len);
+ cache = (void *)gen_pool_alloc(ghes_estatus_pool, cache_len);
+ if (!cache) {
+ atomic_dec(&ghes_estatus_cache_alloced);
+ return NULL;
+ }
+ cache_estatus = GHES_ESTATUS_FROM_CACHE(cache);
+ memcpy(cache_estatus, estatus, len);
+ cache->estatus_len = len;
+ atomic_set(&cache->count, 0);
+ cache->generic = generic;
+ cache->time_in = sched_clock();
+ return cache;
+}
+
+static void ghes_estatus_cache_free(struct ghes_estatus_cache *cache)
+{
+ u32 len;
+
+ len = cper_estatus_len(GHES_ESTATUS_FROM_CACHE(cache));
+ len = GHES_ESTATUS_CACHE_LEN(len);
+ gen_pool_free(ghes_estatus_pool, (unsigned long)cache, len);
+ atomic_dec(&ghes_estatus_cache_alloced);
+}
+
+static void ghes_estatus_cache_rcu_free(struct rcu_head *head)
+{
+ struct ghes_estatus_cache *cache;
- if (!processed && printk_ratelimit())
- pr_warning(GHES_PFX
- "Unknown error record from generic hardware error source: %d\n",
- ghes->generic->header.source_id);
+ cache = container_of(head, struct ghes_estatus_cache, rcu);
+ ghes_estatus_cache_free(cache);
+}
+
+static void ghes_estatus_cache_add(
+ struct acpi_hest_generic *generic,
+ struct acpi_generic_status *estatus)
+{
+ int i, slot = -1, count;
+ unsigned long long now, duration, period, max_period = 0;
+ struct ghes_estatus_cache *cache, *slot_cache = NULL, *new_cache;
+
+ new_cache = ghes_estatus_cache_alloc(generic, estatus);
+ if (new_cache == NULL)
+ return;
+ rcu_read_lock();
+ now = sched_clock();
+ for (i = 0; i < GHES_ESTATUS_CACHES_SIZE; i++) {
+ cache = rcu_dereference(ghes_estatus_caches[i]);
+ if (cache == NULL) {
+ slot = i;
+ slot_cache = NULL;
+ break;
+ }
+ duration = now - cache->time_in;
+ if (duration >= GHES_ESTATUS_IN_CACHE_MAX_NSEC) {
+ slot = i;
+ slot_cache = cache;
+ break;
+ }
+ count = atomic_read(&cache->count);
+ period = duration;
+ do_div(period, (count + 1));
+ if (period > max_period) {
+ max_period = period;
+ slot = i;
+ slot_cache = cache;
+ }
+ }
+ /* new_cache must be put into array after its contents are written */
+ smp_wmb();
+ if (slot != -1 && cmpxchg(ghes_estatus_caches + slot,
+ slot_cache, new_cache) == slot_cache) {
+ if (slot_cache)
+ call_rcu(&slot_cache->rcu, ghes_estatus_cache_rcu_free);
+ } else
+ ghes_estatus_cache_free(new_cache);
+ rcu_read_unlock();
}
static int ghes_proc(struct ghes *ghes)
@@ -269,13 +672,52 @@ static int ghes_proc(struct ghes *ghes)
rc = ghes_read_estatus(ghes, 0);
if (rc)
goto out;
- ghes_do_proc(ghes);
-
+ if (!ghes_estatus_cached(ghes->estatus)) {
+ if (ghes_print_estatus(NULL, ghes->generic, ghes->estatus))
+ ghes_estatus_cache_add(ghes->generic, ghes->estatus);
+ }
+ ghes_do_proc(ghes, ghes->estatus);
out:
ghes_clear_estatus(ghes);
return 0;
}
+static void ghes_add_timer(struct ghes *ghes)
+{
+ struct acpi_hest_generic *g = ghes->generic;
+ unsigned long expire;
+
+ if (!g->notify.poll_interval) {
+ pr_warning(FW_WARN GHES_PFX "Poll interval is 0 for generic hardware error source: %d, disabled.\n",
+ g->header.source_id);
+ return;
+ }
+ expire = jiffies + msecs_to_jiffies(g->notify.poll_interval);
+ ghes->timer.expires = round_jiffies_relative(expire);
+ add_timer(&ghes->timer);
+}
+
+static void ghes_poll_func(unsigned long data)
+{
+ struct ghes *ghes = (void *)data;
+
+ ghes_proc(ghes);
+ if (!(ghes->flags & GHES_EXITING))
+ ghes_add_timer(ghes);
+}
+
+static irqreturn_t ghes_irq_func(int irq, void *data)
+{
+ struct ghes *ghes = data;
+ int rc;
+
+ rc = ghes_proc(ghes);
+ if (rc)
+ return IRQ_NONE;
+
+ return IRQ_HANDLED;
+}
+
static int ghes_notify_sci(struct notifier_block *this,
unsigned long event, void *data)
{
@@ -292,32 +734,200 @@ static int ghes_notify_sci(struct notifier_block *this,
return ret;
}
+static struct llist_node *llist_nodes_reverse(struct llist_node *llnode)
+{
+ struct llist_node *next, *tail = NULL;
+
+ while (llnode) {
+ next = llnode->next;
+ llnode->next = tail;
+ tail = llnode;
+ llnode = next;
+ }
+
+ return tail;
+}
+
+static void ghes_proc_in_irq(struct irq_work *irq_work)
+{
+ struct llist_node *llnode, *next;
+ struct ghes_estatus_node *estatus_node;
+ struct acpi_hest_generic *generic;
+ struct acpi_generic_status *estatus;
+ u32 len, node_len;
+
+ llnode = llist_del_all(&ghes_estatus_llist);
+ /*
+ * Because the time order of estatus in list is reversed,
+ * revert it back to proper order.
+ */
+ llnode = llist_nodes_reverse(llnode);
+ while (llnode) {
+ next = llnode->next;
+ estatus_node = llist_entry(llnode, struct ghes_estatus_node,
+ llnode);
+ estatus = GHES_ESTATUS_FROM_NODE(estatus_node);
+ len = cper_estatus_len(estatus);
+ node_len = GHES_ESTATUS_NODE_LEN(len);
+ ghes_do_proc(estatus_node->ghes, estatus);
+ if (!ghes_estatus_cached(estatus)) {
+ generic = estatus_node->generic;
+ if (ghes_print_estatus(NULL, generic, estatus))
+ ghes_estatus_cache_add(generic, estatus);
+ }
+ gen_pool_free(ghes_estatus_pool, (unsigned long)estatus_node,
+ node_len);
+ llnode = next;
+ }
+}
+
+static void ghes_print_queued_estatus(void)
+{
+ struct llist_node *llnode;
+ struct ghes_estatus_node *estatus_node;
+ struct acpi_hest_generic *generic;
+ struct acpi_generic_status *estatus;
+ u32 len, node_len;
+
+ llnode = llist_del_all(&ghes_estatus_llist);
+ /*
+ * Because the time order of estatus in list is reversed,
+ * revert it back to proper order.
+ */
+ llnode = llist_nodes_reverse(llnode);
+ while (llnode) {
+ estatus_node = llist_entry(llnode, struct ghes_estatus_node,
+ llnode);
+ estatus = GHES_ESTATUS_FROM_NODE(estatus_node);
+ len = cper_estatus_len(estatus);
+ node_len = GHES_ESTATUS_NODE_LEN(len);
+ generic = estatus_node->generic;
+ ghes_print_estatus(NULL, generic, estatus);
+ llnode = llnode->next;
+ }
+}
+
+static int ghes_notify_nmi(unsigned int cmd, struct pt_regs *regs)
+{
+ struct ghes *ghes, *ghes_global = NULL;
+ int sev, sev_global = -1;
+ int ret = NMI_DONE;
+
+ raw_spin_lock(&ghes_nmi_lock);
+ list_for_each_entry_rcu(ghes, &ghes_nmi, list) {
+ if (ghes_read_estatus(ghes, 1)) {
+ ghes_clear_estatus(ghes);
+ continue;
+ }
+ sev = ghes_severity(ghes->estatus->error_severity);
+ if (sev > sev_global) {
+ sev_global = sev;
+ ghes_global = ghes;
+ }
+ ret = NMI_HANDLED;
+ }
+
+ if (ret == NMI_DONE)
+ goto out;
+
+ if (sev_global >= GHES_SEV_PANIC) {
+ oops_begin();
+ ghes_print_queued_estatus();
+ __ghes_print_estatus(KERN_EMERG, ghes_global->generic,
+ ghes_global->estatus);
+ /* reboot to log the error! */
+ if (panic_timeout == 0)
+ panic_timeout = ghes_panic_timeout;
+ panic("Fatal hardware error!");
+ }
+
+ list_for_each_entry_rcu(ghes, &ghes_nmi, list) {
+#ifdef CONFIG_ARCH_HAVE_NMI_SAFE_CMPXCHG
+ u32 len, node_len;
+ struct ghes_estatus_node *estatus_node;
+ struct acpi_generic_status *estatus;
+#endif
+ if (!(ghes->flags & GHES_TO_CLEAR))
+ continue;
+#ifdef CONFIG_ARCH_HAVE_NMI_SAFE_CMPXCHG
+ if (ghes_estatus_cached(ghes->estatus))
+ goto next;
+ /* Save estatus for further processing in IRQ context */
+ len = cper_estatus_len(ghes->estatus);
+ node_len = GHES_ESTATUS_NODE_LEN(len);
+ estatus_node = (void *)gen_pool_alloc(ghes_estatus_pool,
+ node_len);
+ if (estatus_node) {
+ estatus_node->ghes = ghes;
+ estatus_node->generic = ghes->generic;
+ estatus = GHES_ESTATUS_FROM_NODE(estatus_node);
+ memcpy(estatus, ghes->estatus, len);
+ llist_add(&estatus_node->llnode, &ghes_estatus_llist);
+ }
+next:
+#endif
+ ghes_clear_estatus(ghes);
+ }
+#ifdef CONFIG_ARCH_HAVE_NMI_SAFE_CMPXCHG
+ irq_work_queue(&ghes_proc_irq_work);
+#endif
+
+out:
+ raw_spin_unlock(&ghes_nmi_lock);
+ return ret;
+}
+
static struct notifier_block ghes_notifier_sci = {
.notifier_call = ghes_notify_sci,
};
-static int __devinit ghes_probe(struct platform_device *ghes_dev)
+static unsigned long ghes_esource_prealloc_size(
+ const struct acpi_hest_generic *generic)
+{
+ unsigned long block_length, prealloc_records, prealloc_size;
+
+ block_length = min_t(unsigned long, generic->error_block_length,
+ GHES_ESTATUS_MAX_SIZE);
+ prealloc_records = max_t(unsigned long,
+ generic->records_to_preallocate, 1);
+ prealloc_size = min_t(unsigned long, block_length * prealloc_records,
+ GHES_ESOURCE_PREALLOC_MAX_SIZE);
+
+ return prealloc_size;
+}
+
+static int ghes_probe(struct platform_device *ghes_dev)
{
struct acpi_hest_generic *generic;
struct ghes *ghes = NULL;
+ unsigned long len;
int rc = -EINVAL;
generic = *(struct acpi_hest_generic **)ghes_dev->dev.platform_data;
if (!generic->enabled)
return -ENODEV;
- if (generic->error_block_length <
- sizeof(struct acpi_hest_generic_status)) {
- pr_warning(FW_BUG GHES_PFX
-"Invalid error block length: %u for generic hardware error source: %d\n",
- generic->error_block_length,
+ switch (generic->notify.type) {
+ case ACPI_HEST_NOTIFY_POLLED:
+ case ACPI_HEST_NOTIFY_EXTERNAL:
+ case ACPI_HEST_NOTIFY_SCI:
+ case ACPI_HEST_NOTIFY_NMI:
+ break;
+ case ACPI_HEST_NOTIFY_LOCAL:
+ pr_warning(GHES_PFX "Generic hardware error source: %d notified via local interrupt is not supported!\n",
generic->header.source_id);
goto err;
+ default:
+ pr_warning(FW_WARN GHES_PFX "Unknown notification type: %u for generic hardware error source: %d\n",
+ generic->notify.type, generic->header.source_id);
+ goto err;
}
- if (generic->records_to_preallocate == 0) {
- pr_warning(FW_BUG GHES_PFX
-"Invalid records to preallocate: %u for generic hardware error source: %d\n",
- generic->records_to_preallocate,
+
+ rc = -EIO;
+ if (generic->error_block_length <
+ sizeof(struct acpi_generic_status)) {
+ pr_warning(FW_BUG GHES_PFX "Invalid error block length: %u for generic hardware error source: %d\n",
+ generic->error_block_length,
generic->header.source_id);
goto err;
}
@@ -327,42 +937,58 @@ static int __devinit ghes_probe(struct platform_device *ghes_dev)
ghes = NULL;
goto err;
}
- if (generic->notify.type == ACPI_HEST_NOTIFY_SCI) {
+
+ rc = ghes_edac_register(ghes, &ghes_dev->dev);
+ if (rc < 0)
+ goto err;
+
+ switch (generic->notify.type) {
+ case ACPI_HEST_NOTIFY_POLLED:
+ ghes->timer.function = ghes_poll_func;
+ ghes->timer.data = (unsigned long)ghes;
+ init_timer_deferrable(&ghes->timer);
+ ghes_add_timer(ghes);
+ break;
+ case ACPI_HEST_NOTIFY_EXTERNAL:
+ /* External interrupt vector is GSI */
+ rc = acpi_gsi_to_irq(generic->notify.vector, &ghes->irq);
+ if (rc) {
+ pr_err(GHES_PFX "Failed to map GSI to IRQ for generic hardware error source: %d\n",
+ generic->header.source_id);
+ goto err_edac_unreg;
+ }
+ rc = request_irq(ghes->irq, ghes_irq_func, 0, "GHES IRQ", ghes);
+ if (rc) {
+ pr_err(GHES_PFX "Failed to register IRQ for generic hardware error source: %d\n",
+ generic->header.source_id);
+ goto err_edac_unreg;
+ }
+ break;
+ case ACPI_HEST_NOTIFY_SCI:
mutex_lock(&ghes_list_mutex);
if (list_empty(&ghes_sci))
register_acpi_hed_notifier(&ghes_notifier_sci);
list_add_rcu(&ghes->list, &ghes_sci);
mutex_unlock(&ghes_list_mutex);
- } else {
- unsigned char *notify = NULL;
-
- switch (generic->notify.type) {
- case ACPI_HEST_NOTIFY_POLLED:
- notify = "POLL";
- break;
- case ACPI_HEST_NOTIFY_EXTERNAL:
- case ACPI_HEST_NOTIFY_LOCAL:
- notify = "IRQ";
- break;
- case ACPI_HEST_NOTIFY_NMI:
- notify = "NMI";
- break;
- }
- if (notify) {
- pr_warning(GHES_PFX
-"Generic hardware error source: %d notified via %s is not supported!\n",
- generic->header.source_id, notify);
- } else {
- pr_warning(FW_WARN GHES_PFX
-"Unknown notification type: %u for generic hardware error source: %d\n",
- generic->notify.type, generic->header.source_id);
- }
- rc = -ENODEV;
- goto err;
+ break;
+ case ACPI_HEST_NOTIFY_NMI:
+ len = ghes_esource_prealloc_size(generic);
+ ghes_estatus_pool_expand(len);
+ mutex_lock(&ghes_list_mutex);
+ if (list_empty(&ghes_nmi))
+ register_nmi_handler(NMI_LOCAL, ghes_notify_nmi, 0,
+ "ghes");
+ list_add_rcu(&ghes->list, &ghes_nmi);
+ mutex_unlock(&ghes_list_mutex);
+ break;
+ default:
+ BUG();
}
platform_set_drvdata(ghes_dev, ghes);
return 0;
+err_edac_unreg:
+ ghes_edac_unregister(ghes);
err:
if (ghes) {
ghes_fini(ghes);
@@ -371,15 +997,23 @@ err:
return rc;
}
-static int __devexit ghes_remove(struct platform_device *ghes_dev)
+static int ghes_remove(struct platform_device *ghes_dev)
{
struct ghes *ghes;
struct acpi_hest_generic *generic;
+ unsigned long len;
ghes = platform_get_drvdata(ghes_dev);
generic = ghes->generic;
+ ghes->flags |= GHES_EXITING;
switch (generic->notify.type) {
+ case ACPI_HEST_NOTIFY_POLLED:
+ del_timer_sync(&ghes->timer);
+ break;
+ case ACPI_HEST_NOTIFY_EXTERNAL:
+ free_irq(ghes->irq, ghes);
+ break;
case ACPI_HEST_NOTIFY_SCI:
mutex_lock(&ghes_list_mutex);
list_del_rcu(&ghes->list);
@@ -387,13 +1021,29 @@ static int __devexit ghes_remove(struct platform_device *ghes_dev)
unregister_acpi_hed_notifier(&ghes_notifier_sci);
mutex_unlock(&ghes_list_mutex);
break;
+ case ACPI_HEST_NOTIFY_NMI:
+ mutex_lock(&ghes_list_mutex);
+ list_del_rcu(&ghes->list);
+ if (list_empty(&ghes_nmi))
+ unregister_nmi_handler(NMI_LOCAL, "ghes");
+ mutex_unlock(&ghes_list_mutex);
+ /*
+ * To synchronize with NMI handler, ghes can only be
+ * freed after NMI handler finishes.
+ */
+ synchronize_rcu();
+ len = ghes_esource_prealloc_size(generic);
+ ghes_estatus_pool_shrink(len);
+ break;
default:
BUG();
break;
}
- synchronize_rcu();
ghes_fini(ghes);
+
+ ghes_edac_unregister(ghes);
+
kfree(ghes);
platform_set_drvdata(ghes_dev, NULL);
@@ -412,6 +1062,8 @@ static struct platform_driver ghes_platform_driver = {
static int __init ghes_init(void)
{
+ int rc;
+
if (acpi_disabled)
return -ENODEV;
@@ -420,12 +1072,54 @@ static int __init ghes_init(void)
return -EINVAL;
}
- return platform_driver_register(&ghes_platform_driver);
+ if (ghes_disable) {
+ pr_info(GHES_PFX "GHES is not enabled!\n");
+ return -EINVAL;
+ }
+
+ init_irq_work(&ghes_proc_irq_work, ghes_proc_in_irq);
+
+ rc = ghes_ioremap_init();
+ if (rc)
+ goto err;
+
+ rc = ghes_estatus_pool_init();
+ if (rc)
+ goto err_ioremap_exit;
+
+ rc = ghes_estatus_pool_expand(GHES_ESTATUS_CACHE_AVG_SIZE *
+ GHES_ESTATUS_CACHE_ALLOCED_MAX);
+ if (rc)
+ goto err_pool_exit;
+
+ rc = platform_driver_register(&ghes_platform_driver);
+ if (rc)
+ goto err_pool_exit;
+
+ rc = apei_osc_setup();
+ if (rc == 0 && osc_sb_apei_support_acked)
+ pr_info(GHES_PFX "APEI firmware first mode is enabled by APEI bit and WHEA _OSC.\n");
+ else if (rc == 0 && !osc_sb_apei_support_acked)
+ pr_info(GHES_PFX "APEI firmware first mode is enabled by WHEA _OSC.\n");
+ else if (rc && osc_sb_apei_support_acked)
+ pr_info(GHES_PFX "APEI firmware first mode is enabled by APEI bit.\n");
+ else
+ pr_info(GHES_PFX "Failed to enable APEI firmware first mode.\n");
+
+ return 0;
+err_pool_exit:
+ ghes_estatus_pool_exit();
+err_ioremap_exit:
+ ghes_ioremap_exit();
+err:
+ return rc;
}
static void __exit ghes_exit(void)
{
platform_driver_unregister(&ghes_platform_driver);
+ ghes_estatus_pool_exit();
+ ghes_ioremap_exit();
}
module_init(ghes_init);
diff --git a/drivers/acpi/apei/hest.c b/drivers/acpi/apei/hest.c
index 1a3508a7fe0..f5e37f32c71 100644
--- a/drivers/acpi/apei/hest.c
+++ b/drivers/acpi/apei/hest.c
@@ -36,19 +36,20 @@
#include <linux/io.h>
#include <linux/platform_device.h>
#include <acpi/apei.h>
+#include <asm/mce.h>
#include "apei-internal.h"
#define HEST_PFX "HEST: "
-int hest_disable;
+bool hest_disable;
EXPORT_SYMBOL_GPL(hest_disable);
/* HEST table parsing */
-static struct acpi_table_hest *hest_tab;
+static struct acpi_table_hest *__read_mostly hest_tab;
-static int hest_esrc_len_tab[ACPI_HEST_TYPE_RESERVED] = {
+static const int hest_esrc_len_tab[ACPI_HEST_TYPE_RESERVED] = {
[ACPI_HEST_TYPE_IA32_CHECK] = -1, /* need further calculation */
[ACPI_HEST_TYPE_IA32_CORRECTED_CHECK] = -1,
[ACPI_HEST_TYPE_IA32_NMI] = sizeof(struct acpi_hest_ia_nmi),
@@ -89,7 +90,7 @@ int apei_hest_parse(apei_hest_func_t func, void *data)
struct acpi_hest_header *hest_hdr;
int i, rc, len;
- if (hest_disable)
+ if (hest_disable || !hest_tab)
return -EINVAL;
hest_hdr = (struct acpi_hest_header *)(hest_tab + 1);
@@ -121,12 +122,47 @@ int apei_hest_parse(apei_hest_func_t func, void *data)
}
EXPORT_SYMBOL_GPL(apei_hest_parse);
+/*
+ * Check if firmware advertises firmware first mode. We need FF bit to be set
+ * along with a set of MC banks which work in FF mode.
+ */
+static int __init hest_parse_cmc(struct acpi_hest_header *hest_hdr, void *data)
+{
+#ifdef CONFIG_X86_MCE
+ int i;
+ struct acpi_hest_ia_corrected *cmc;
+ struct acpi_hest_ia_error_bank *mc_bank;
+
+ if (hest_hdr->type != ACPI_HEST_TYPE_IA32_CORRECTED_CHECK)
+ return 0;
+
+ cmc = (struct acpi_hest_ia_corrected *)hest_hdr;
+ if (!cmc->enabled)
+ return 0;
+
+ /*
+ * We expect HEST to provide a list of MC banks that report errors
+ * in firmware first mode. Otherwise, return non-zero value to
+ * indicate that we are done parsing HEST.
+ */
+ if (!(cmc->flags & ACPI_HEST_FIRMWARE_FIRST) || !cmc->num_hardware_banks)
+ return 1;
+
+ pr_info(HEST_PFX "Enabling Firmware First mode for corrected errors.\n");
+
+ mc_bank = (struct acpi_hest_ia_error_bank *)(cmc + 1);
+ for (i = 0; i < cmc->num_hardware_banks; i++, mc_bank++)
+ mce_disable_bank(mc_bank->bank_number);
+#endif
+ return 1;
+}
+
struct ghes_arr {
struct platform_device **ghes_devs;
unsigned int count;
};
-static int hest_parse_ghes_count(struct acpi_hest_header *hest_hdr, void *data)
+static int __init hest_parse_ghes_count(struct acpi_hest_header *hest_hdr, void *data)
{
int *count = data;
@@ -135,17 +171,27 @@ static int hest_parse_ghes_count(struct acpi_hest_header *hest_hdr, void *data)
return 0;
}
-static int hest_parse_ghes(struct acpi_hest_header *hest_hdr, void *data)
+static int __init hest_parse_ghes(struct acpi_hest_header *hest_hdr, void *data)
{
struct platform_device *ghes_dev;
struct ghes_arr *ghes_arr = data;
- int rc;
+ int rc, i;
if (hest_hdr->type != ACPI_HEST_TYPE_GENERIC_ERROR)
return 0;
if (!((struct acpi_hest_generic *)hest_hdr)->enabled)
return 0;
+ for (i = 0; i < ghes_arr->count; i++) {
+ struct acpi_hest_header *hdr;
+ ghes_dev = ghes_arr->ghes_devs[i];
+ hdr = *(struct acpi_hest_header **)ghes_dev->dev.platform_data;
+ if (hdr->source_id == hest_hdr->source_id) {
+ pr_warning(FW_WARN HEST_PFX "Duplicated hardware error source ID: %d.\n",
+ hdr->source_id);
+ return -EIO;
+ }
+ }
ghes_dev = platform_device_alloc("GHES", hest_hdr->source_id);
if (!ghes_dev)
return -ENOMEM;
@@ -165,7 +211,7 @@ err:
return rc;
}
-static int hest_ghes_dev_register(unsigned int ghes_count)
+static int __init hest_ghes_dev_register(unsigned int ghes_count)
{
int rc, i;
struct ghes_arr ghes_arr;
@@ -195,46 +241,42 @@ static int __init setup_hest_disable(char *str)
__setup("hest_disable", setup_hest_disable);
-static int __init hest_init(void)
+void __init acpi_hest_init(void)
{
acpi_status status;
int rc = -ENODEV;
unsigned int ghes_count = 0;
- if (acpi_disabled)
- goto err;
-
if (hest_disable) {
- pr_info(HEST_PFX "HEST tabling parsing is disabled.\n");
- goto err;
+ pr_info(HEST_PFX "Table parsing disabled.\n");
+ return;
}
status = acpi_get_table(ACPI_SIG_HEST, 0,
(struct acpi_table_header **)&hest_tab);
- if (status == AE_NOT_FOUND) {
- pr_info(HEST_PFX "Table is not found!\n");
+ if (status == AE_NOT_FOUND)
goto err;
- } else if (ACPI_FAILURE(status)) {
+ else if (ACPI_FAILURE(status)) {
const char *msg = acpi_format_exception(status);
pr_err(HEST_PFX "Failed to get table, %s\n", msg);
rc = -EINVAL;
goto err;
}
- rc = apei_hest_parse(hest_parse_ghes_count, &ghes_count);
- if (rc)
- goto err;
-
- rc = hest_ghes_dev_register(ghes_count);
- if (rc)
- goto err;
+ if (!acpi_disable_cmcff)
+ apei_hest_parse(hest_parse_cmc, NULL);
- pr_info(HEST_PFX "HEST table parsing is initialized.\n");
+ if (!ghes_disable) {
+ rc = apei_hest_parse(hest_parse_ghes_count, &ghes_count);
+ if (rc)
+ goto err;
+ rc = hest_ghes_dev_register(ghes_count);
+ if (rc)
+ goto err;
+ }
- return 0;
+ pr_info(HEST_PFX "Table parsing has been initialized.\n");
+ return;
err:
hest_disable = 1;
- return rc;
}
-
-subsys_initcall(hest_init);