@@ -29,12 +29,6 @@ static const uint32_t ged_supported_events[] = {
ACPI_GED_ERROR_EVT,
};
-/*
- * ACPI 5.0b: 5.6.6 Device Object Notifications
- * Table 5-135 Error Device Notification Values
- */
-#define ERROR_DEVICE_NOTIFICATION 0x80
-
/*
* The ACPI Generic Event Device (GED) is a hardware-reduced specific
* device[ACPI v6.1 Section 5.6.9] that handles all platform events,
@@ -124,9 +118,14 @@ void build_ged_aml(Aml *table, const char *name, HotplugHandler *hotplug_dev,
aml_int(0x80)));
break;
case ACPI_GED_ERROR_EVT:
+ /*
+ * ACPI 5.0b: 5.6.6 Device Object Notifications
+ * Table 5-135 Error Device Notification Values
+ * Defines 0x80 as the value to be used on notifications
+ */
aml_append(if_ctx,
aml_notify(aml_name(ACPI_APEI_ERROR_DEVICE),
- aml_int(ERROR_DEVICE_NOTIFICATION)));
+ aml_int(0x80)));
break;
case ACPI_GED_NVDIMM_HOTPLUG_EVT:
aml_append(if_ctx,
@@ -41,6 +41,12 @@
/* Address offset in Generic Address Structure(GAS) */
#define GAS_ADDR_OFFSET 4
+/*
+ * ACPI spec 1.0b
+ * 5.2.3 System Description Table Header
+ */
+#define ACPI_DESC_HEADER_OFFSET 36
+
/*
* The total size of Generic Error Data Entry
* ACPI 6.1/6.2: 18.3.2.7.1 Generic Error Data,
@@ -226,8 +232,8 @@ ghes_gen_err_data_uncorrectable_recoverable(GArray *block,
* Initialize "etc/hardware_errors" and "etc/hardware_errors_addr" fw_cfg blobs.
* See docs/specs/acpi_hest_ghes.rst for blobs format.
*/
-static void build_ghes_error_table(GArray *hardware_errors, BIOSLinker *linker,
- int num_sources)
+static void build_ghes_error_table(AcpiGhesState *ags, GArray *hardware_errors,
+ BIOSLinker *linker, int num_sources)
{
int i, error_status_block_offset;
@@ -272,13 +278,15 @@ static void build_ghes_error_table(GArray *hardware_errors, BIOSLinker *linker,
i * ACPI_GHES_MAX_RAW_DATA_LENGTH);
}
- /*
- * Tell firmware to write hardware_errors GPA into
- * hardware_errors_addr fw_cfg, once the former has been initialized.
- */
- bios_linker_loader_write_pointer(linker, ACPI_HW_ERROR_ADDR_FW_CFG_FILE, 0,
- sizeof(uint64_t),
- ACPI_HW_ERROR_FW_CFG_FILE, 0);
+ if (!ags->use_hest_addr) {
+ /*
+ * Tell firmware to write hardware_errors GPA into
+ * hardware_errors_addr fw_cfg, once the former has been initialized.
+ */
+ bios_linker_loader_write_pointer(linker, ACPI_HW_ERROR_ADDR_FW_CFG_FILE,
+ 0, sizeof(uint64_t),
+ ACPI_HW_ERROR_FW_CFG_FILE, 0);
+ }
}
/* Build Generic Hardware Error Source version 2 (GHESv2) */
@@ -365,11 +373,11 @@ void acpi_build_hest(AcpiGhesState *ags, GArray *table_data,
uint32_t hest_offset;
int i;
- build_ghes_error_table(hardware_errors, linker, num_sources);
+ hest_offset = table_data->len;
- acpi_table_begin(&table, table_data);
+ build_ghes_error_table(ags, hardware_errors, linker, num_sources);
- hest_offset = table_data->len;
+ acpi_table_begin(&table, table_data);
/* Error Source Count */
build_append_int_noprefix(table_data, num_sources, 4);
@@ -383,7 +391,6 @@ void acpi_build_hest(AcpiGhesState *ags, GArray *table_data,
* Tell firmware to write into GPA the address of HEST via fw_cfg,
* once initialized.
*/
-
if (ags->use_hest_addr) {
bios_linker_loader_write_pointer(linker,
ACPI_HEST_ADDR_FW_CFG_FILE, 0,
@@ -399,13 +406,13 @@ void acpi_ghes_add_fw_cfg(AcpiGhesState *ags, FWCfgState *s,
fw_cfg_add_file(s, ACPI_HW_ERROR_FW_CFG_FILE, hardware_error->data,
hardware_error->len);
- /* Create a read-write fw_cfg file for Address */
- fw_cfg_add_file_callback(s, ACPI_HW_ERROR_ADDR_FW_CFG_FILE, NULL, NULL,
- NULL, &(ags->hw_error_le), sizeof(ags->hw_error_le), false);
-
if (ags->use_hest_addr) {
fw_cfg_add_file_callback(s, ACPI_HEST_ADDR_FW_CFG_FILE, NULL, NULL,
NULL, &(ags->hest_addr_le), sizeof(ags->hest_addr_le), false);
+ } else {
+ /* Create a read-write fw_cfg file for Address */
+ fw_cfg_add_file_callback(s, ACPI_HW_ERROR_ADDR_FW_CFG_FILE, NULL, NULL,
+ NULL, &(ags->hw_error_le), sizeof(ags->hw_error_le), false);
}
}
@@ -432,7 +439,7 @@ static void get_hw_error_offsets(uint64_t ghes_addr,
}
static void get_ghes_source_offsets(uint16_t source_id,
- uint64_t hest_entry_addr,
+ uint64_t hest_addr,
uint64_t *cper_addr,
uint64_t *read_ack_start_addr,
Error **errp)
@@ -441,12 +448,13 @@ static void get_ghes_source_offsets(uint16_t source_id,
uint64_t err_source_entry, error_block_addr;
uint32_t num_sources, i;
+ hest_addr += ACPI_DESC_HEADER_OFFSET;
- cpu_physical_memory_read(hest_entry_addr, &num_sources,
+ cpu_physical_memory_read(hest_addr, &num_sources,
sizeof(num_sources));
num_sources = le32_to_cpu(num_sources);
- err_source_entry = hest_entry_addr + sizeof(num_sources);
+ err_source_entry = hest_addr + sizeof(num_sources);
/*
* Currently, HEST Error source navigates only for GHESv2 tables
@@ -468,7 +476,6 @@ static void get_ghes_source_offsets(uint16_t source_id,
/* Compare CPER source address at the GHESv2 structure */
addr += sizeof(type);
cpu_physical_memory_read(addr, &src_id, sizeof(src_id));
-
if (le16_to_cpu(src_id) == source_id) {
break;
}
@@ -956,8 +956,10 @@ void virt_acpi_build(VirtMachineState *vms, AcpiBuildTables *tables)
build_dbg2(tables_blob, tables->linker, vms);
if (vms->ras) {
- AcpiGhesState *ags;
+ static const AcpiNotificationSourceId *notify;
AcpiGedState *acpi_ged_state;
+ unsigned int notify_sz;
+ AcpiGhesState *ags;
acpi_ged_state = ACPI_GED(object_resolve_path_type("", TYPE_ACPI_GED,
NULL));
@@ -967,16 +969,16 @@ void virt_acpi_build(VirtMachineState *vms, AcpiBuildTables *tables)
acpi_add_table(table_offsets, tables_blob);
if (!ags->use_hest_addr) {
- acpi_build_hest(ags, tables_blob, tables->hardware_errors,
- tables->linker, hest_ghes_notify_9_2,
- ARRAY_SIZE(hest_ghes_notify_9_2),
- vms->oem_id, vms->oem_table_id);
+ notify = hest_ghes_notify_9_2;
+ notify_sz = ARRAY_SIZE(hest_ghes_notify_9_2);
} else {
- acpi_build_hest(ags, tables_blob, tables->hardware_errors,
- tables->linker, hest_ghes_notify,
- ARRAY_SIZE(hest_ghes_notify),
- vms->oem_id, vms->oem_table_id);
+ notify = hest_ghes_notify;
+ notify_sz = ARRAY_SIZE(hest_ghes_notify);
}
+
+ acpi_build_hest(ags, tables_blob, tables->hardware_errors,
+ tables->linker, notify, notify_sz,
+ vms->oem_id, vms->oem_table_id);
}
}
@@ -12,11 +12,110 @@
#
# - ARM registers: power_state, mpidr.
+"""
+Generates an ARM processor error CPER, compatible with
+UEFI 2.9A Errata.
+
+Injecting such errors can be done using:
+
+ $ ./scripts/ghes_inject.py arm
+ Error injected.
+
+Produces a simple CPER register, as detected on a Linux guest:
+
+[Hardware Error]: Hardware error from APEI Generic Hardware Error Source: 1
+[Hardware Error]: event severity: recoverable
+[Hardware Error]: Error 0, type: recoverable
+[Hardware Error]: section_type: ARM processor error
+[Hardware Error]: MIDR: 0x0000000000000000
+[Hardware Error]: running state: 0x0
+[Hardware Error]: Power State Coordination Interface state: 0
+[Hardware Error]: Error info structure 0:
+[Hardware Error]: num errors: 2
+[Hardware Error]: error_type: 0x02: cache error
+[Hardware Error]: error_info: 0x000000000091000f
+[Hardware Error]: transaction type: Data Access
+[Hardware Error]: cache error, operation type: Data write
+[Hardware Error]: cache level: 2
+[Hardware Error]: processor context not corrupted
+[Firmware Warn]: GHES: Unhandled processor error type 0x02: cache error
+
+The ARM Processor Error message can be customized via command line
+parameters. For instance:
+
+ $ ./scripts/ghes_inject.py arm --mpidr 0x444 --running --affinity 1 \
+ --error-info 12345678 --vendor 0x13,123,4,5,1 --ctx-array 0,1,2,3,4,5 \
+ -t cache tlb bus micro-arch tlb,micro-arch
+ Error injected.
+
+Injects this error, as detected on a Linux guest:
+
+[Hardware Error]: Hardware error from APEI Generic Hardware Error Source: 1
+[Hardware Error]: event severity: recoverable
+[Hardware Error]: Error 0, type: recoverable
+[Hardware Error]: section_type: ARM processor error
+[Hardware Error]: MIDR: 0x0000000000000000
+[Hardware Error]: Multiprocessor Affinity Register (MPIDR): 0x0000000000000000
+[Hardware Error]: error affinity level: 0
+[Hardware Error]: running state: 0x1
+[Hardware Error]: Power State Coordination Interface state: 0
+[Hardware Error]: Error info structure 0:
+[Hardware Error]: num errors: 2
+[Hardware Error]: error_type: 0x02: cache error
+[Hardware Error]: error_info: 0x0000000000bc614e
+[Hardware Error]: cache level: 2
+[Hardware Error]: processor context not corrupted
+[Hardware Error]: Error info structure 1:
+[Hardware Error]: num errors: 2
+[Hardware Error]: error_type: 0x04: TLB error
+[Hardware Error]: error_info: 0x000000000054007f
+[Hardware Error]: transaction type: Instruction
+[Hardware Error]: TLB error, operation type: Instruction fetch
+[Hardware Error]: TLB level: 1
+[Hardware Error]: processor context not corrupted
+[Hardware Error]: the error has not been corrected
+[Hardware Error]: PC is imprecise
+[Hardware Error]: Error info structure 2:
+[Hardware Error]: num errors: 2
+[Hardware Error]: error_type: 0x08: bus error
+[Hardware Error]: error_info: 0x00000080d6460fff
+[Hardware Error]: transaction type: Generic
+[Hardware Error]: bus error, operation type: Generic read (type of instruction or data request cannot be determined)
+[Hardware Error]: affinity level at which the bus error occurred: 1
+[Hardware Error]: processor context corrupted
+[Hardware Error]: the error has been corrected
+[Hardware Error]: PC is imprecise
+[Hardware Error]: Program execution can be restarted reliably at the PC associated with the error.
+[Hardware Error]: participation type: Local processor observed
+[Hardware Error]: request timed out
+[Hardware Error]: address space: External Memory Access
+[Hardware Error]: memory access attributes:0x20
+[Hardware Error]: access mode: secure
+[Hardware Error]: Error info structure 3:
+[Hardware Error]: num errors: 2
+[Hardware Error]: error_type: 0x10: micro-architectural error
+[Hardware Error]: error_info: 0x0000000078da03ff
+[Hardware Error]: Error info structure 4:
+[Hardware Error]: num errors: 2
+[Hardware Error]: error_type: 0x14: TLB error|micro-architectural error
+[Hardware Error]: Context info structure 0:
+[Hardware Error]: register context type: AArch64 EL1 context registers
+[Hardware Error]: 00000000: 00000000 00000000
+[Hardware Error]: Vendor specific error info has 5 bytes:
+[Hardware Error]: 00000000: 13 7b 04 05 01 .{...
+[Firmware Warn]: GHES: Unhandled processor error type 0x02: cache error
+[Firmware Warn]: GHES: Unhandled processor error type 0x04: TLB error
+[Firmware Warn]: GHES: Unhandled processor error type 0x08: bus error
+[Firmware Warn]: GHES: Unhandled processor error type 0x10: micro-architectural error
+[Firmware Warn]: GHES: Unhandled processor error type 0x14: TLB error|micro-architectural error
+"""
+
import argparse
import re
from qmp_helper import qmp, util, cper_guid
+
class ArmProcessorEinj:
"""
Implements ARM Processor Error injection via GHES
old mode 100644
new mode 100755
@@ -541,7 +541,7 @@ def send_cper_raw(self, cper_data):
self._connect()
- if self.send_cmd("inject-ghes-error", cmd_arg):
+ if self.send_cmd("inject-ghes-v2-error", cmd_arg):
print("Error injected.")
def send_cper(self, notif_type, payload):
@@ -2371,7 +2371,6 @@ void kvm_arch_on_sigbus_vcpu(CPUState *c, int code, void *addr)
assert(code == BUS_MCEERR_AR || code == BUS_MCEERR_AO);
ags = acpi_ghes_get_state();
-
if (ags && addr) {
ram_addr = qemu_ram_addr_from_host(addr);
if (ram_addr != RAM_ADDR_INVALID &&
deleted file mode 100644