@@ -388,7 +388,7 @@ static int __init xen_pcpu_init(void)
arch_initcall(xen_pcpu_init);
#ifdef CONFIG_ACPI
-bool __init xen_processor_present(uint32_t acpi_id)
+bool xen_processor_present(uint32_t acpi_id)
{
const struct pcpu *pcpu;
bool online = false;
@@ -403,6 +403,7 @@ bool __init xen_processor_present(uint32_t acpi_id)
return online;
}
+EXPORT_SYMBOL_GPL(xen_processor_present);
void xen_sanitize_proc_cap_bits(uint32_t *cap)
{
@@ -48,6 +48,8 @@ static unsigned long *acpi_id_cst_present;
/* Which ACPI P-State dependencies for a enumerated processor */
static struct acpi_psd_package *acpi_psd;
+static bool pr_initialized;
+
static int push_cxx_to_hypervisor(struct acpi_processor *_pr)
{
struct xen_platform_op op = {
@@ -172,8 +174,13 @@ static int xen_copy_psd_data(struct acpi_processor *_pr,
/* 'acpi_processor_preregister_performance' does not parse if the
* num_processors <= 1, but Xen still requires it. Do it manually here.
+ *
+ * Also init the field if not set, as that's possible if the physical
+ * CPUs on the system doesn't match the data provided in the MADT when
+ * running as a PVH dom0.
*/
- if (pdomain->num_processors <= 1) {
+ if (pdomain->num_processors <= 1 ||
+ dst->shared_type == CPUFREQ_SHARED_TYPE_NONE) {
if (pdomain->coord_type == DOMAIN_COORD_TYPE_SW_ALL)
dst->shared_type = CPUFREQ_SHARED_TYPE_ALL;
else if (pdomain->coord_type == DOMAIN_COORD_TYPE_HW_ALL)
@@ -313,6 +320,155 @@ static unsigned int __init get_max_acpi_id(void)
pr_debug("Max ACPI ID: %u\n", max_acpi_id);
return max_acpi_id;
}
+
+/*
+ * Custom version of the native acpi_processor_evaluate_cst() function, to
+ * avoid some sanity checks done based on the CPUID data. When running as a
+ * Xen domain the CPUID data provided to dom0 is not the native one, so C
+ * states cannot be sanity checked. Leave it to the hypervisor which is also
+ * the entity running the driver.
+ */
+static int xen_acpi_processor_evaluate_cst(acpi_handle handle,
+ struct acpi_processor_power *info)
+{
+ struct acpi_buffer buffer = { ACPI_ALLOCATE_BUFFER, NULL };
+ union acpi_object *cst;
+ acpi_status status;
+ u64 count;
+ int last_index = 0;
+ int i, ret = 0;
+
+ status = acpi_evaluate_object(handle, "_CST", NULL, &buffer);
+ if (ACPI_FAILURE(status)) {
+ acpi_handle_debug(handle, "No _CST\n");
+ return -ENODEV;
+ }
+
+ cst = buffer.pointer;
+
+ /* There must be at least 2 elements. */
+ if (!cst || cst->type != ACPI_TYPE_PACKAGE || cst->package.count < 2) {
+ acpi_handle_warn(handle, "Invalid _CST output\n");
+ ret = -EFAULT;
+ goto end;
+ }
+
+ count = cst->package.elements[0].integer.value;
+
+ /* Validate the number of C-states. */
+ if (count < 1 || count != cst->package.count - 1) {
+ acpi_handle_warn(handle, "Inconsistent _CST data\n");
+ ret = -EFAULT;
+ goto end;
+ }
+
+ for (i = 1; i <= count; i++) {
+ union acpi_object *element;
+ union acpi_object *obj;
+ struct acpi_power_register *reg;
+ struct acpi_processor_cx cx;
+
+ /*
+ * If there is not enough space for all C-states, skip the
+ * excess ones and log a warning.
+ */
+ if (last_index >= ACPI_PROCESSOR_MAX_POWER - 1) {
+ acpi_handle_warn(handle, "No room for more idle states (limit: %d)\n",
+ ACPI_PROCESSOR_MAX_POWER - 1);
+ break;
+ }
+
+ memset(&cx, 0, sizeof(cx));
+
+ element = &cst->package.elements[i];
+ if (element->type != ACPI_TYPE_PACKAGE) {
+ acpi_handle_info(handle, "_CST C%d type(%x) is not package, skip...\n",
+ i, element->type);
+ continue;
+ }
+
+ if (element->package.count != 4) {
+ acpi_handle_info(handle, "_CST C%d package count(%d) is not 4, skip...\n",
+ i, element->package.count);
+ continue;
+ }
+
+ obj = &element->package.elements[0];
+
+ if (obj->type != ACPI_TYPE_BUFFER) {
+ acpi_handle_info(handle, "_CST C%d package element[0] type(%x) is not buffer, skip...\n",
+ i, obj->type);
+ continue;
+ }
+
+ reg = (struct acpi_power_register *)obj->buffer.pointer;
+
+ obj = &element->package.elements[1];
+ if (obj->type != ACPI_TYPE_INTEGER) {
+ acpi_handle_info(handle, "_CST C[%d] package element[1] type(%x) is not integer, skip...\n",
+ i, obj->type);
+ continue;
+ }
+
+ cx.type = obj->integer.value;
+ /*
+ * There are known cases in which the _CST output does not
+ * contain C1, so if the type of the first state found is not
+ * C1, leave an empty slot for C1 to be filled in later.
+ */
+ if (i == 1 && cx.type != ACPI_STATE_C1)
+ last_index = 1;
+
+ cx.address = reg->address;
+ cx.index = last_index + 1;
+
+ switch (reg->space_id) {
+ case ACPI_ADR_SPACE_FIXED_HARDWARE:
+ cx.entry_method = ACPI_CSTATE_FFH;
+ break;
+
+ case ACPI_ADR_SPACE_SYSTEM_IO:
+ cx.entry_method = ACPI_CSTATE_SYSTEMIO;
+ break;
+
+ default:
+ acpi_handle_info(handle, "_CST C%d space_id(%x) neither FIXED_HARDWARE nor SYSTEM_IO, skip...\n",
+ i, reg->space_id);
+ continue;
+ }
+
+ if (cx.type == ACPI_STATE_C1)
+ cx.valid = 1;
+
+ obj = &element->package.elements[2];
+ if (obj->type != ACPI_TYPE_INTEGER) {
+ acpi_handle_info(handle, "_CST C%d package element[2] type(%x) not integer, skip...\n",
+ i, obj->type);
+ continue;
+ }
+
+ cx.latency = obj->integer.value;
+
+ obj = &element->package.elements[3];
+ if (obj->type != ACPI_TYPE_INTEGER) {
+ acpi_handle_info(handle, "_CST C%d package element[3] type(%x) not integer, skip...\n",
+ i, obj->type);
+ continue;
+ }
+
+ memcpy(&info->states[++last_index], &cx, sizeof(cx));
+ }
+
+ acpi_handle_info(handle, "Found %d idle states\n", last_index);
+
+ info->count = last_index;
+
+end:
+ kfree(buffer.pointer);
+
+ return ret;
+}
+
/*
* The read_acpi_id and check_acpi_ids are there to support the Xen
* oddity of virtual CPUs != physical CPUs in the initial domain.
@@ -331,6 +487,7 @@ read_acpi_id(acpi_handle handle, u32 lvl, void *context, void **rv)
unsigned long long tmp;
union acpi_object object = { 0 };
struct acpi_buffer buffer = { sizeof(union acpi_object), &object };
+ struct acpi_buffer cst_buf = { ACPI_ALLOCATE_BUFFER, NULL };
acpi_io_address pblk = 0;
status = acpi_get_type(handle, &acpi_type);
@@ -354,24 +511,44 @@ read_acpi_id(acpi_handle handle, u32 lvl, void *context, void **rv)
default:
return AE_OK;
}
- if (invalid_phys_cpuid(acpi_get_phys_id(handle,
- acpi_type == ACPI_TYPE_DEVICE,
- acpi_id))) {
+
+ if (!xen_processor_present(acpi_id)) {
pr_debug("CPU with ACPI ID %u is unavailable\n", acpi_id);
return AE_OK;
}
- /* There are more ACPI Processor objects than in x2APIC or MADT.
- * This can happen with incorrect ACPI SSDT declerations. */
- if (acpi_id >= nr_acpi_bits) {
- pr_debug("max acpi id %u, trying to set %u\n",
- nr_acpi_bits - 1, acpi_id);
- return AE_OK;
- }
+
/* OK, There is a ACPI Processor object */
__set_bit(acpi_id, acpi_id_present);
pr_debug("ACPI CPU%u w/ PBLK:0x%lx\n", acpi_id, (unsigned long)pblk);
+ if (!pr_initialized) {
+ struct acpi_processor *pr = context;
+ int rc;
+
+ /*
+ * There's no CPU on the system that has any performance or
+ * power related data, initialize all the required fields by
+ * fetching that info here.
+ *
+ * Note such information is only fetched once, and then reused
+ * for all pCPUs. This won't work on heterogeneous systems
+ * with different Cx anb/or Px states between CPUs.
+ */
+
+ pr->handle = handle;
+
+ rc = acpi_processor_get_performance_info(pr);
+ if (rc)
+ pr_debug("ACPI CPU%u failed to get performance data\n",
+ acpi_id);
+ rc = xen_acpi_processor_evaluate_cst(handle, &pr->power);
+ if (rc)
+ pr_debug("ACPI CPU%u failed to get _CST data\n", acpi_id);
+
+ pr_initialized = true;
+ }
+
/* It has P-state dependencies */
if (!acpi_processor_get_psd(handle, &acpi_psd[acpi_id])) {
pr_debug("ACPI CPU%u w/ PST:coord_type = %llu domain = %llu\n",
@@ -379,11 +556,13 @@ read_acpi_id(acpi_handle handle, u32 lvl, void *context, void **rv)
acpi_psd[acpi_id].domain);
}
- status = acpi_evaluate_object(handle, "_CST", NULL, &buffer);
+ status = acpi_evaluate_object(handle, "_CST", NULL, &cst_buf);
if (ACPI_FAILURE(status)) {
if (!pblk)
return AE_OK;
}
+ kfree(cst_buf.pointer);
+
/* .. and it has a C-state */
__set_bit(acpi_id, acpi_id_cst_present);
@@ -392,8 +571,7 @@ read_acpi_id(acpi_handle handle, u32 lvl, void *context, void **rv)
static int check_acpi_ids(struct acpi_processor *pr_backup)
{
- if (!pr_backup)
- return -ENODEV;
+ BUG_ON(!pr_backup);
if (acpi_id_present && acpi_id_cst_present)
/* OK, done this once .. skip to uploading */
@@ -422,8 +600,8 @@ static int check_acpi_ids(struct acpi_processor *pr_backup)
acpi_walk_namespace(ACPI_TYPE_PROCESSOR, ACPI_ROOT_OBJECT,
ACPI_UINT32_MAX,
- read_acpi_id, NULL, NULL, NULL);
- acpi_get_devices(ACPI_PROCESSOR_DEVICE_HID, read_acpi_id, NULL, NULL);
+ read_acpi_id, NULL, pr_backup, NULL);
+ acpi_get_devices(ACPI_PROCESSOR_DEVICE_HID, read_acpi_id, pr_backup, NULL);
upload:
if (!bitmap_equal(acpi_id_present, acpi_ids_done, nr_acpi_bits)) {
@@ -464,6 +642,7 @@ static int xen_upload_processor_pm_data(void)
struct acpi_processor *pr_backup = NULL;
int i;
int rc = 0;
+ bool free_perf = false;
pr_info("Uploading Xen processor PM info\n");
@@ -473,12 +652,29 @@ static int xen_upload_processor_pm_data(void)
if (!_pr)
continue;
- if (!pr_backup)
+ if (!pr_backup) {
pr_backup = kmemdup(_pr, sizeof(*_pr), GFP_KERNEL);
+ pr_initialized = true;
+ }
(void)upload_pm_data(_pr);
}
+ if (!pr_backup) {
+ pr_backup = kzalloc(sizeof(struct acpi_processor), GFP_KERNEL);
+ if (!pr_backup)
+ return -ENOMEM;
+ pr_backup->performance = kzalloc(sizeof(struct acpi_processor_performance),
+ GFP_KERNEL);
+ if (!pr_backup->performance) {
+ kfree(pr_backup);
+ return -ENOMEM;
+ }
+ free_perf = true;
+ }
+
rc = check_acpi_ids(pr_backup);
+ if (free_perf)
+ kfree(pr_backup->performance);
kfree(pr_backup);
return rc;
@@ -81,7 +81,7 @@ static inline void xen_free_unpopulated_pages(unsigned int nr_pages,
#endif
#if defined(CONFIG_XEN_DOM0) && defined(CONFIG_ACPI) && defined(CONFIG_X86)
-bool __init xen_processor_present(uint32_t acpi_id);
+bool xen_processor_present(uint32_t acpi_id);
#else
#include <linux/bug.h>
static inline bool xen_processor_present(uint32_t acpi_id)