Message ID | 20240711000233.684642-4-sathyanarayanan.kuppuswamy@linux.intel.com (mailing list archive) |
---|---|
State | Superseded |
Headers | show |
Series | Add SBAF test to IFS | expand |
On Wed, 10 Jul 2024, Kuppuswamy Sathyanarayanan wrote: > From: Jithu Joseph <jithu.joseph@intel.com> > > In a core, the SBAF test engine is shared between sibling CPUs. > > An SBAF test image contains multiple bundles. Each bundle is further > composed of subunits called programs. When a SBAF test (for a particular > core) is triggered by the user, each SBAF bundle from the loaded test > image is executed sequentially on all the threads on the core using > the stop_core_cpuslocked mechanism. Each bundle execution is initiated by > writing to MSR_ACTIVATE_SBAF. > > SBAF test bundle execution may be aborted when an interrupt occurs or > if the CPU does not have enough power budget for the test. In these > cases the kernel restarts the test from the aborted bundle. SBAF > execution is not retried if the test fails or if the test makes no > forward progress after 5 retries. > > Reviewed-by: Ashok Raj <ashok.raj@intel.com> > Reviewed-by: Tony Luck <tony.luck@intel.com> > Signed-off-by: Jithu Joseph <jithu.joseph@intel.com> > Signed-off-by: Kuppuswamy Sathyanarayanan <sathyanarayanan.kuppuswamy@linux.intel.com> > --- > drivers/platform/x86/intel/ifs/ifs.h | 30 +++ > drivers/platform/x86/intel/ifs/runtest.c | 234 +++++++++++++++++++++++ > 2 files changed, 264 insertions(+) > > diff --git a/drivers/platform/x86/intel/ifs/ifs.h b/drivers/platform/x86/intel/ifs/ifs.h > index 600bb8a1b285..b261be46bce8 100644 > --- a/drivers/platform/x86/intel/ifs/ifs.h > +++ b/drivers/platform/x86/intel/ifs/ifs.h > @@ -157,6 +157,8 @@ > #define MSR_SBAF_HASHES_STATUS 0x000002b9 > #define MSR_AUTHENTICATE_AND_COPY_SBAF_CHUNK 0x000002ba > #define MSR_SBAF_CHUNKS_AUTHENTICATION_STATUS 0x000002bb > +#define MSR_ACTIVATE_SBAF 0x000002bc > +#define MSR_SBAF_STATUS 0x000002bd > > #define MSR_COPY_SCAN_HASHES 0x000002c2 > #define MSR_SCAN_HASHES_STATUS 0x000002c3 > @@ -283,6 +285,34 @@ union ifs_array { > }; > }; > > +/* MSR_ACTIVATE_SBAF bit fields */ > +union ifs_sbaf { > + u64 data; > + struct { > + u32 bundle_idx :9; > + u32 rsvd1 :5; > + u32 pgm_idx :2; > + u32 rsvd2 :16; > + u32 delay :31; > + u32 sigmce :1; > + }; > +}; > + > +/* MSR_SBAF_STATUS bit fields */ > +union ifs_sbaf_status { > + u64 data; > + struct { > + u32 bundle_idx :9; > + u32 rsvd1 :5; > + u32 pgm_idx :2; > + u32 rsvd2 :16; > + u32 error_code :8; > + u32 rsvd3 :21; > + u32 test_fail :1; > + u32 sbaf_status :2; > + }; > +}; > + > /* > * Driver populated error-codes > * 0xFD: Test timed out before completing all the chunks. > diff --git a/drivers/platform/x86/intel/ifs/runtest.c b/drivers/platform/x86/intel/ifs/runtest.c > index 282e4bfe30da..a812ec7761b9 100644 > --- a/drivers/platform/x86/intel/ifs/runtest.c > +++ b/drivers/platform/x86/intel/ifs/runtest.c > @@ -29,6 +29,13 @@ struct run_params { > union ifs_status status; > }; > > +struct sbaf_run_params { > + struct ifs_data *ifsd; > + int *retry_cnt; > + union ifs_sbaf *activate; > + union ifs_sbaf_status status; > +}; > + > /* > * Number of TSC cycles that a logical CPU will wait for the other > * logical CPU on the core in the WRMSR(ACTIVATE_SCAN). > @@ -146,6 +153,7 @@ static bool can_restart(union ifs_status status) > #define SPINUNIT 100 /* 100 nsec */ > static atomic_t array_cpus_in; > static atomic_t scan_cpus_in; > +static atomic_t sbaf_cpus_in; > > /* > * Simplified cpu sibling rendezvous loop based on microcode loader __wait_for_cpus() > @@ -387,6 +395,226 @@ static void ifs_array_test_gen1(int cpu, struct device *dev) > ifsd->status = SCAN_TEST_PASS; > } > > +#define SBAF_STATUS_PASS 0 > +#define SBAF_STATUS_SIGN_FAIL 1 > +#define SBAF_STATUS_INTR 2 > +#define SBAF_STATUS_TEST_FAIL 3 > + > +enum sbaf_status_err_code { > + IFS_SBAF_NO_ERROR = 0, > + IFS_SBAF_OTHER_THREAD_COULD_NOT_JOIN = 1, > + IFS_SBAF_INTERRUPTED_BEFORE_RENDEZVOUS = 2, > + IFS_SBAF_UNASSIGNED_ERROR_CODE3 = 3, > + IFS_SBAF_INVALID_BUNDLE_INDEX = 4, > + IFS_SBAF_MISMATCH_ARGS_BETWEEN_THREADS = 5, > + IFS_SBAF_CORE_NOT_CAPABLE_CURRENTLY = 6, > + IFS_SBAF_UNASSIGNED_ERROR_CODE7 = 7, > + IFS_SBAF_EXCEED_NUMBER_OF_THREADS_CONCURRENT = 8, > + IFS_SBAF_INTERRUPTED_DURING_EXECUTION = 9, > + IFS_SBAF_INVALID_PROGRAM_INDEX = 0xA, > + IFS_SBAF_CORRUPTED_CHUNK = 0xB, > + IFS_SBAF_DID_NOT_START = 0xC, > +}; > + > +static const char * const sbaf_test_status[] = { > + [IFS_SBAF_NO_ERROR] = "SBAF no error", > + [IFS_SBAF_OTHER_THREAD_COULD_NOT_JOIN] = "Other thread could not join.", > + [IFS_SBAF_INTERRUPTED_BEFORE_RENDEZVOUS] = "Interrupt occurred prior to SBAF coordination.", > + [IFS_SBAF_UNASSIGNED_ERROR_CODE3] = "Unassigned error code 0x3", > + [IFS_SBAF_INVALID_BUNDLE_INDEX] = "Non valid sbaf bundles. Reload test image", Non-valid SBAF ...but given your define is named "INVALID", why not use just Invalid SBAF? > + [IFS_SBAF_MISMATCH_ARGS_BETWEEN_THREADS] = "Mismatch in arguments between threads T0/T1.", > + [IFS_SBAF_CORE_NOT_CAPABLE_CURRENTLY] = "Core not capable of performing SBAF currently", > + [IFS_SBAF_UNASSIGNED_ERROR_CODE7] = "Unassigned error code 0x7", > + [IFS_SBAF_EXCEED_NUMBER_OF_THREADS_CONCURRENT] = "Exceeded number of Logical Processors (LP) allowed to run Scan-At-Field concurrently", > + [IFS_SBAF_INTERRUPTED_DURING_EXECUTION] = "Interrupt occurred prior to SBAF start", > + [IFS_SBAF_INVALID_PROGRAM_INDEX] = "SBAF program index not valid", > + [IFS_SBAF_CORRUPTED_CHUNK] = "SBAF operation aborted due to corrupted chunk", > + [IFS_SBAF_DID_NOT_START] = "SBAF operation did not start", > +}; > + > +static void sbaf_message_not_tested(struct device *dev, int cpu, u64 status_data) > +{ > + union ifs_sbaf_status status = (union ifs_sbaf_status)status_data; > + > + if (status.error_code < ARRAY_SIZE(sbaf_test_status)) { > + dev_info(dev, "CPU(s) %*pbl: SBAF operation did not start. %s\n", > + cpumask_pr_args(cpu_smt_mask(cpu)), > + sbaf_test_status[status.error_code]); > + } else if (status.error_code == IFS_SW_TIMEOUT) { > + dev_info(dev, "CPU(s) %*pbl: software timeout during scan\n", > + cpumask_pr_args(cpu_smt_mask(cpu))); > + } else if (status.error_code == IFS_SW_PARTIAL_COMPLETION) { > + dev_info(dev, "CPU(s) %*pbl: %s\n", > + cpumask_pr_args(cpu_smt_mask(cpu)), > + "Not all SBAF bundles executed. Maximum forward progress retries exceeded"); > + } else { > + dev_info(dev, "CPU(s) %*pbl: SBAF unknown status %llx\n", > + cpumask_pr_args(cpu_smt_mask(cpu)), status.data); > + } > +} > + > +static void sbaf_message_fail(struct device *dev, int cpu, union ifs_sbaf_status status) > +{ > + /* Failed signature check is set when SBAF signature did not match the expected value */ > + if (status.sbaf_status == SBAF_STATUS_SIGN_FAIL) { > + dev_err(dev, "CPU(s) %*pbl: Failed signature check\n", > + cpumask_pr_args(cpu_smt_mask(cpu))); > + } > + > + /* Failed to reach end of test */ > + if (status.sbaf_status == SBAF_STATUS_TEST_FAIL) { > + dev_err(dev, "CPU(s) %*pbl: Failed to complete test\n", > + cpumask_pr_args(cpu_smt_mask(cpu))); > + } > +} > + > +static bool sbaf_bundle_completed(union ifs_sbaf_status status) > +{ > + if (status.sbaf_status || status.error_code) > + return false; > + return true; This is same as: return !status.sbaf_status && !status.error_code; > +} > + > +static bool sbaf_can_restart(union ifs_sbaf_status status) > +{ > + enum sbaf_status_err_code err_code = status.error_code; > + > + /* Signature for chunk is bad, or scan test failed */ > + if (status.sbaf_status == SBAF_STATUS_SIGN_FAIL || > + status.sbaf_status == SBAF_STATUS_TEST_FAIL) > + return false; > + > + switch (err_code) { > + case IFS_SBAF_NO_ERROR: > + case IFS_SBAF_OTHER_THREAD_COULD_NOT_JOIN: > + case IFS_SBAF_INTERRUPTED_BEFORE_RENDEZVOUS: > + case IFS_SBAF_EXCEED_NUMBER_OF_THREADS_CONCURRENT: > + case IFS_SBAF_INTERRUPTED_DURING_EXECUTION: > + return true; > + case IFS_SBAF_UNASSIGNED_ERROR_CODE3: > + case IFS_SBAF_INVALID_BUNDLE_INDEX: > + case IFS_SBAF_MISMATCH_ARGS_BETWEEN_THREADS: > + case IFS_SBAF_CORE_NOT_CAPABLE_CURRENTLY: > + case IFS_SBAF_UNASSIGNED_ERROR_CODE7: > + case IFS_SBAF_INVALID_PROGRAM_INDEX: > + case IFS_SBAF_CORRUPTED_CHUNK: > + case IFS_SBAF_DID_NOT_START: > + break; > + } > + return false; > +} > + > +/* > + * Execute the SBAF test. Called "simultaneously" on all threads of a core > + * at high priority using the stop_cpus mechanism. > + */ > +static int dosbaf(void *data) > +{ > + struct sbaf_run_params *run_params = data; > + int cpu = smp_processor_id(); > + union ifs_sbaf_status status; > + struct ifs_data *ifsd; > + int first; > + > + ifsd = run_params->ifsd; > + > + /* Only the first logical CPU on a core reports result */ > + first = cpumask_first(cpu_smt_mask(cpu)); > + wait_for_sibling_cpu(&sbaf_cpus_in, NSEC_PER_SEC); > + > + /* > + * This WRMSR will wait for other HT threads to also write > + * to this MSR (at most for activate.delay cycles). Then it > + * starts scan of each requested bundle. The core test happens > + * during the "execution" of the WRMSR. > + */ > + wrmsrl(MSR_ACTIVATE_SBAF, run_params->activate->data); > + rdmsrl(MSR_SBAF_STATUS, status.data); > + > + /* Pass back the result of the test */ > + if (cpu == first) > + run_params->status = status; > + > + return 0; > +} > + > +static void ifs_sbaf_test_core(int cpu, struct device *dev) > +{ > + struct sbaf_run_params run_params; > + union ifs_sbaf_status status = {}; > + union ifs_sbaf activate; > + unsigned long timeout; > + struct ifs_data *ifsd; > + int stop_bundle; > + int retries; > + > + ifsd = ifs_get_data(dev); > + > + activate.data = 0; > + activate.delay = IFS_THREAD_WAIT; > + > + timeout = jiffies + (2 * HZ); Unnecessary parenthesis. Other than those things mentioned above, Reviewed-by: Ilpo Järvinen <ilpo.jarvinen@linux.intel.com>
On 7/11/24 2:44 AM, Ilpo Järvinen wrote: > On Wed, 10 Jul 2024, Kuppuswamy Sathyanarayanan wrote: > >> From: Jithu Joseph <jithu.joseph@intel.com> >> >> In a core, the SBAF test engine is shared between sibling CPUs. >> >> An SBAF test image contains multiple bundles. Each bundle is further >> composed of subunits called programs. When a SBAF test (for a particular >> core) is triggered by the user, each SBAF bundle from the loaded test >> image is executed sequentially on all the threads on the core using >> the stop_core_cpuslocked mechanism. Each bundle execution is initiated by >> writing to MSR_ACTIVATE_SBAF. >> >> SBAF test bundle execution may be aborted when an interrupt occurs or >> if the CPU does not have enough power budget for the test. In these >> cases the kernel restarts the test from the aborted bundle. SBAF >> execution is not retried if the test fails or if the test makes no >> forward progress after 5 retries. >> >> Reviewed-by: Ashok Raj <ashok.raj@intel.com> >> Reviewed-by: Tony Luck <tony.luck@intel.com> >> Signed-off-by: Jithu Joseph <jithu.joseph@intel.com> >> Signed-off-by: Kuppuswamy Sathyanarayanan <sathyanarayanan.kuppuswamy@linux.intel.com> >> --- >> drivers/platform/x86/intel/ifs/ifs.h | 30 +++ >> drivers/platform/x86/intel/ifs/runtest.c | 234 +++++++++++++++++++++++ >> 2 files changed, 264 insertions(+) >> >> diff --git a/drivers/platform/x86/intel/ifs/ifs.h b/drivers/platform/x86/intel/ifs/ifs.h >> index 600bb8a1b285..b261be46bce8 100644 >> --- a/drivers/platform/x86/intel/ifs/ifs.h >> +++ b/drivers/platform/x86/intel/ifs/ifs.h >> @@ -157,6 +157,8 @@ >> #define MSR_SBAF_HASHES_STATUS 0x000002b9 >> #define MSR_AUTHENTICATE_AND_COPY_SBAF_CHUNK 0x000002ba >> #define MSR_SBAF_CHUNKS_AUTHENTICATION_STATUS 0x000002bb >> +#define MSR_ACTIVATE_SBAF 0x000002bc >> +#define MSR_SBAF_STATUS 0x000002bd >> >> #define MSR_COPY_SCAN_HASHES 0x000002c2 >> #define MSR_SCAN_HASHES_STATUS 0x000002c3 >> @@ -283,6 +285,34 @@ union ifs_array { >> }; >> }; >> >> +/* MSR_ACTIVATE_SBAF bit fields */ >> +union ifs_sbaf { >> + u64 data; >> + struct { >> + u32 bundle_idx :9; >> + u32 rsvd1 :5; >> + u32 pgm_idx :2; >> + u32 rsvd2 :16; >> + u32 delay :31; >> + u32 sigmce :1; >> + }; >> +}; >> + >> +/* MSR_SBAF_STATUS bit fields */ >> +union ifs_sbaf_status { >> + u64 data; >> + struct { >> + u32 bundle_idx :9; >> + u32 rsvd1 :5; >> + u32 pgm_idx :2; >> + u32 rsvd2 :16; >> + u32 error_code :8; >> + u32 rsvd3 :21; >> + u32 test_fail :1; >> + u32 sbaf_status :2; >> + }; >> +}; >> + >> /* >> * Driver populated error-codes >> * 0xFD: Test timed out before completing all the chunks. >> diff --git a/drivers/platform/x86/intel/ifs/runtest.c b/drivers/platform/x86/intel/ifs/runtest.c >> index 282e4bfe30da..a812ec7761b9 100644 >> --- a/drivers/platform/x86/intel/ifs/runtest.c >> +++ b/drivers/platform/x86/intel/ifs/runtest.c >> @@ -29,6 +29,13 @@ struct run_params { >> union ifs_status status; >> }; >> >> +struct sbaf_run_params { >> + struct ifs_data *ifsd; >> + int *retry_cnt; >> + union ifs_sbaf *activate; >> + union ifs_sbaf_status status; >> +}; >> + >> /* >> * Number of TSC cycles that a logical CPU will wait for the other >> * logical CPU on the core in the WRMSR(ACTIVATE_SCAN). >> @@ -146,6 +153,7 @@ static bool can_restart(union ifs_status status) >> #define SPINUNIT 100 /* 100 nsec */ >> static atomic_t array_cpus_in; >> static atomic_t scan_cpus_in; >> +static atomic_t sbaf_cpus_in; >> >> /* >> * Simplified cpu sibling rendezvous loop based on microcode loader __wait_for_cpus() >> @@ -387,6 +395,226 @@ static void ifs_array_test_gen1(int cpu, struct device *dev) >> ifsd->status = SCAN_TEST_PASS; >> } >> >> +#define SBAF_STATUS_PASS 0 >> +#define SBAF_STATUS_SIGN_FAIL 1 >> +#define SBAF_STATUS_INTR 2 >> +#define SBAF_STATUS_TEST_FAIL 3 >> + >> +enum sbaf_status_err_code { >> + IFS_SBAF_NO_ERROR = 0, >> + IFS_SBAF_OTHER_THREAD_COULD_NOT_JOIN = 1, >> + IFS_SBAF_INTERRUPTED_BEFORE_RENDEZVOUS = 2, >> + IFS_SBAF_UNASSIGNED_ERROR_CODE3 = 3, >> + IFS_SBAF_INVALID_BUNDLE_INDEX = 4, >> + IFS_SBAF_MISMATCH_ARGS_BETWEEN_THREADS = 5, >> + IFS_SBAF_CORE_NOT_CAPABLE_CURRENTLY = 6, >> + IFS_SBAF_UNASSIGNED_ERROR_CODE7 = 7, >> + IFS_SBAF_EXCEED_NUMBER_OF_THREADS_CONCURRENT = 8, >> + IFS_SBAF_INTERRUPTED_DURING_EXECUTION = 9, >> + IFS_SBAF_INVALID_PROGRAM_INDEX = 0xA, >> + IFS_SBAF_CORRUPTED_CHUNK = 0xB, >> + IFS_SBAF_DID_NOT_START = 0xC, >> +}; >> + >> +static const char * const sbaf_test_status[] = { >> + [IFS_SBAF_NO_ERROR] = "SBAF no error", >> + [IFS_SBAF_OTHER_THREAD_COULD_NOT_JOIN] = "Other thread could not join.", >> + [IFS_SBAF_INTERRUPTED_BEFORE_RENDEZVOUS] = "Interrupt occurred prior to SBAF coordination.", >> + [IFS_SBAF_UNASSIGNED_ERROR_CODE3] = "Unassigned error code 0x3", >> + [IFS_SBAF_INVALID_BUNDLE_INDEX] = "Non valid sbaf bundles. Reload test image", > Non-valid SBAF > > ...but given your define is named "INVALID", why not use just Invalid > SBAF? Above string is from the specification document.But I think it is ok to use "Invalid" or "Non-valid". Jithu, any concerns? >> + [IFS_SBAF_MISMATCH_ARGS_BETWEEN_THREADS] = "Mismatch in arguments between threads T0/T1.", >> + [IFS_SBAF_CORE_NOT_CAPABLE_CURRENTLY] = "Core not capable of performing SBAF currently", >> + [IFS_SBAF_UNASSIGNED_ERROR_CODE7] = "Unassigned error code 0x7", >> + [IFS_SBAF_EXCEED_NUMBER_OF_THREADS_CONCURRENT] = "Exceeded number of Logical Processors (LP) allowed to run Scan-At-Field concurrently", >> + [IFS_SBAF_INTERRUPTED_DURING_EXECUTION] = "Interrupt occurred prior to SBAF start", >> + [IFS_SBAF_INVALID_PROGRAM_INDEX] = "SBAF program index not valid", >> + [IFS_SBAF_CORRUPTED_CHUNK] = "SBAF operation aborted due to corrupted chunk", >> + [IFS_SBAF_DID_NOT_START] = "SBAF operation did not start", >> +}; >> + >> +static void sbaf_message_not_tested(struct device *dev, int cpu, u64 status_data) >> +{ >> + union ifs_sbaf_status status = (union ifs_sbaf_status)status_data; >> + >> + if (status.error_code < ARRAY_SIZE(sbaf_test_status)) { >> + dev_info(dev, "CPU(s) %*pbl: SBAF operation did not start. %s\n", >> + cpumask_pr_args(cpu_smt_mask(cpu)), >> + sbaf_test_status[status.error_code]); >> + } else if (status.error_code == IFS_SW_TIMEOUT) { >> + dev_info(dev, "CPU(s) %*pbl: software timeout during scan\n", >> + cpumask_pr_args(cpu_smt_mask(cpu))); >> + } else if (status.error_code == IFS_SW_PARTIAL_COMPLETION) { >> + dev_info(dev, "CPU(s) %*pbl: %s\n", >> + cpumask_pr_args(cpu_smt_mask(cpu)), >> + "Not all SBAF bundles executed. Maximum forward progress retries exceeded"); >> + } else { >> + dev_info(dev, "CPU(s) %*pbl: SBAF unknown status %llx\n", >> + cpumask_pr_args(cpu_smt_mask(cpu)), status.data); >> + } >> +} >> + >> +static void sbaf_message_fail(struct device *dev, int cpu, union ifs_sbaf_status status) >> +{ >> + /* Failed signature check is set when SBAF signature did not match the expected value */ >> + if (status.sbaf_status == SBAF_STATUS_SIGN_FAIL) { >> + dev_err(dev, "CPU(s) %*pbl: Failed signature check\n", >> + cpumask_pr_args(cpu_smt_mask(cpu))); >> + } >> + >> + /* Failed to reach end of test */ >> + if (status.sbaf_status == SBAF_STATUS_TEST_FAIL) { >> + dev_err(dev, "CPU(s) %*pbl: Failed to complete test\n", >> + cpumask_pr_args(cpu_smt_mask(cpu))); >> + } >> +} >> + >> +static bool sbaf_bundle_completed(union ifs_sbaf_status status) >> +{ >> + if (status.sbaf_status || status.error_code) >> + return false; >> + return true; > This is same as: > > return !status.sbaf_status && !status.error_code; Yes. Your version looks good. Do you want me to send a version with this change or you can include it when merging it? >> +} >> + >> +static bool sbaf_can_restart(union ifs_sbaf_status status) >> +{ >> + enum sbaf_status_err_code err_code = status.error_code; >> + >> + /* Signature for chunk is bad, or scan test failed */ >> + if (status.sbaf_status == SBAF_STATUS_SIGN_FAIL || >> + status.sbaf_status == SBAF_STATUS_TEST_FAIL) >> + return false; >> + >> + switch (err_code) { >> + case IFS_SBAF_NO_ERROR: >> + case IFS_SBAF_OTHER_THREAD_COULD_NOT_JOIN: >> + case IFS_SBAF_INTERRUPTED_BEFORE_RENDEZVOUS: >> + case IFS_SBAF_EXCEED_NUMBER_OF_THREADS_CONCURRENT: >> + case IFS_SBAF_INTERRUPTED_DURING_EXECUTION: >> + return true; >> + case IFS_SBAF_UNASSIGNED_ERROR_CODE3: >> + case IFS_SBAF_INVALID_BUNDLE_INDEX: >> + case IFS_SBAF_MISMATCH_ARGS_BETWEEN_THREADS: >> + case IFS_SBAF_CORE_NOT_CAPABLE_CURRENTLY: >> + case IFS_SBAF_UNASSIGNED_ERROR_CODE7: >> + case IFS_SBAF_INVALID_PROGRAM_INDEX: >> + case IFS_SBAF_CORRUPTED_CHUNK: >> + case IFS_SBAF_DID_NOT_START: >> + break; >> + } >> + return false; >> +} >> + >> +/* >> + * Execute the SBAF test. Called "simultaneously" on all threads of a core >> + * at high priority using the stop_cpus mechanism. >> + */ >> +static int dosbaf(void *data) >> +{ >> + struct sbaf_run_params *run_params = data; >> + int cpu = smp_processor_id(); >> + union ifs_sbaf_status status; >> + struct ifs_data *ifsd; >> + int first; >> + >> + ifsd = run_params->ifsd; >> + >> + /* Only the first logical CPU on a core reports result */ >> + first = cpumask_first(cpu_smt_mask(cpu)); >> + wait_for_sibling_cpu(&sbaf_cpus_in, NSEC_PER_SEC); >> + >> + /* >> + * This WRMSR will wait for other HT threads to also write >> + * to this MSR (at most for activate.delay cycles). Then it >> + * starts scan of each requested bundle. The core test happens >> + * during the "execution" of the WRMSR. >> + */ >> + wrmsrl(MSR_ACTIVATE_SBAF, run_params->activate->data); >> + rdmsrl(MSR_SBAF_STATUS, status.data); >> + >> + /* Pass back the result of the test */ >> + if (cpu == first) >> + run_params->status = status; >> + >> + return 0; >> +} >> + >> +static void ifs_sbaf_test_core(int cpu, struct device *dev) >> +{ >> + struct sbaf_run_params run_params; >> + union ifs_sbaf_status status = {}; >> + union ifs_sbaf activate; >> + unsigned long timeout; >> + struct ifs_data *ifsd; >> + int stop_bundle; >> + int retries; >> + >> + ifsd = ifs_get_data(dev); >> + >> + activate.data = 0; >> + activate.delay = IFS_THREAD_WAIT; >> + >> + timeout = jiffies + (2 * HZ); > Unnecessary parenthesis. > > Other than those things mentioned above, > > Reviewed-by: Ilpo Järvinen <ilpo.jarvinen@linux.intel.com> > >
On Thu, Jul 11, 2024 at 12:44:23PM +0300, Ilpo Järvinen wrote: > > +static bool sbaf_bundle_completed(union ifs_sbaf_status status) > > +{ > > + if (status.sbaf_status || status.error_code) > > + return false; > > + return true; > > This is same as: > > return !status.sbaf_status && !status.error_code; > Maybe another simplification return !(status.sbaf_status || status.error_code);
On 7/11/2024 9:28 PM, Kuppuswamy Sathyanarayanan wrote: >>> + >>> +static const char * const sbaf_test_status[] = { >>> + [IFS_SBAF_NO_ERROR] = "SBAF no error", >>> + [IFS_SBAF_OTHER_THREAD_COULD_NOT_JOIN] = "Other thread could not join.", >>> + [IFS_SBAF_INTERRUPTED_BEFORE_RENDEZVOUS] = "Interrupt occurred prior to SBAF coordination.", >>> + [IFS_SBAF_UNASSIGNED_ERROR_CODE3] = "Unassigned error code 0x3", >>> + [IFS_SBAF_INVALID_BUNDLE_INDEX] = "Non valid sbaf bundles. Reload test image", >> Non-valid SBAF >> >> ...but given your define is named "INVALID", why not use just Invalid >> SBAF? > > Above string is from the specification document.But I think it is ok to use > "Invalid" or "Non-valid". > > Jithu, any concerns? > Either seems okay, I think "Non-valid SBAF ..." is a bit more preferable as it is more in line with doc. Jithu
On 7/12/2024 2:51 AM, Ashok Raj wrote: > On Thu, Jul 11, 2024 at 12:44:23PM +0300, Ilpo Järvinen wrote: >>> +static bool sbaf_bundle_completed(union ifs_sbaf_status status) >>> +{ >>> + if (status.sbaf_status || status.error_code) >>> + return false; >>> + return true; >> >> This is same as: >> >> return !status.sbaf_status && !status.error_code; >> > > Maybe another simplification > > return !(status.sbaf_status || status.error_code); > Agree, this looks simpler Jithu
On Thu, 11 Jul 2024, Kuppuswamy Sathyanarayanan wrote: > On 7/11/24 2:44 AM, Ilpo Järvinen wrote: > > On Wed, 10 Jul 2024, Kuppuswamy Sathyanarayanan wrote: > > > >> From: Jithu Joseph <jithu.joseph@intel.com> > >> > >> In a core, the SBAF test engine is shared between sibling CPUs. > >> > >> An SBAF test image contains multiple bundles. Each bundle is further > >> composed of subunits called programs. When a SBAF test (for a particular > >> core) is triggered by the user, each SBAF bundle from the loaded test > >> image is executed sequentially on all the threads on the core using > >> the stop_core_cpuslocked mechanism. Each bundle execution is initiated by > >> writing to MSR_ACTIVATE_SBAF. > >> > >> SBAF test bundle execution may be aborted when an interrupt occurs or > >> if the CPU does not have enough power budget for the test. In these > >> cases the kernel restarts the test from the aborted bundle. SBAF > >> execution is not retried if the test fails or if the test makes no > >> forward progress after 5 retries. > >> > >> Reviewed-by: Ashok Raj <ashok.raj@intel.com> > >> Reviewed-by: Tony Luck <tony.luck@intel.com> > >> Signed-off-by: Jithu Joseph <jithu.joseph@intel.com> > >> Signed-off-by: Kuppuswamy Sathyanarayanan <sathyanarayanan.kuppuswamy@linux.intel.com> > >> --- > >> +static const char * const sbaf_test_status[] = { > >> + [IFS_SBAF_NO_ERROR] = "SBAF no error", > >> + [IFS_SBAF_OTHER_THREAD_COULD_NOT_JOIN] = "Other thread could not join.", > >> + [IFS_SBAF_INTERRUPTED_BEFORE_RENDEZVOUS] = "Interrupt occurred prior to SBAF coordination.", > >> + [IFS_SBAF_UNASSIGNED_ERROR_CODE3] = "Unassigned error code 0x3", > >> + [IFS_SBAF_INVALID_BUNDLE_INDEX] = "Non valid sbaf bundles. Reload test image", > > Non-valid SBAF > > > > ...but given your define is named "INVALID", why not use just Invalid > > SBAF? > > Above string is from the specification document.But I think it is ok to use > "Invalid" or "Non-valid". > > Jithu, any concerns? > > >> + [IFS_SBAF_MISMATCH_ARGS_BETWEEN_THREADS] = "Mismatch in arguments between threads T0/T1.", > >> + [IFS_SBAF_CORE_NOT_CAPABLE_CURRENTLY] = "Core not capable of performing SBAF currently", > >> + [IFS_SBAF_UNASSIGNED_ERROR_CODE7] = "Unassigned error code 0x7", > >> + [IFS_SBAF_EXCEED_NUMBER_OF_THREADS_CONCURRENT] = "Exceeded number of Logical Processors (LP) allowed to run Scan-At-Field concurrently", > >> + [IFS_SBAF_INTERRUPTED_DURING_EXECUTION] = "Interrupt occurred prior to SBAF start", > >> + [IFS_SBAF_INVALID_PROGRAM_INDEX] = "SBAF program index not valid", > >> + [IFS_SBAF_CORRUPTED_CHUNK] = "SBAF operation aborted due to corrupted chunk", > >> + [IFS_SBAF_DID_NOT_START] = "SBAF operation did not start", > >> +}; > >> + > >> +static void sbaf_message_not_tested(struct device *dev, int cpu, u64 status_data) > >> +{ > >> + union ifs_sbaf_status status = (union ifs_sbaf_status)status_data; > >> + > >> + if (status.error_code < ARRAY_SIZE(sbaf_test_status)) { > >> + dev_info(dev, "CPU(s) %*pbl: SBAF operation did not start. %s\n", > >> + cpumask_pr_args(cpu_smt_mask(cpu)), > >> + sbaf_test_status[status.error_code]); > >> + } else if (status.error_code == IFS_SW_TIMEOUT) { > >> + dev_info(dev, "CPU(s) %*pbl: software timeout during scan\n", > >> + cpumask_pr_args(cpu_smt_mask(cpu))); > >> + } else if (status.error_code == IFS_SW_PARTIAL_COMPLETION) { > >> + dev_info(dev, "CPU(s) %*pbl: %s\n", > >> + cpumask_pr_args(cpu_smt_mask(cpu)), > >> + "Not all SBAF bundles executed. Maximum forward progress retries exceeded"); > >> + } else { > >> + dev_info(dev, "CPU(s) %*pbl: SBAF unknown status %llx\n", > >> + cpumask_pr_args(cpu_smt_mask(cpu)), status.data); > >> + } > >> +} > >> + > >> +static void sbaf_message_fail(struct device *dev, int cpu, union ifs_sbaf_status status) > >> +{ > >> + /* Failed signature check is set when SBAF signature did not match the expected value */ > >> + if (status.sbaf_status == SBAF_STATUS_SIGN_FAIL) { > >> + dev_err(dev, "CPU(s) %*pbl: Failed signature check\n", > >> + cpumask_pr_args(cpu_smt_mask(cpu))); > >> + } > >> + > >> + /* Failed to reach end of test */ > >> + if (status.sbaf_status == SBAF_STATUS_TEST_FAIL) { > >> + dev_err(dev, "CPU(s) %*pbl: Failed to complete test\n", > >> + cpumask_pr_args(cpu_smt_mask(cpu))); > >> + } > >> +} > >> + > >> +static bool sbaf_bundle_completed(union ifs_sbaf_status status) > >> +{ > >> + if (status.sbaf_status || status.error_code) > >> + return false; > >> + return true; > > This is same as: > > > > return !status.sbaf_status && !status.error_code; > > Yes. Your version looks good. Do you want me to send a version with > this change or you can include it when merging it? Please do send a new version, there were too many things to change for me to do it while applying.
diff --git a/drivers/platform/x86/intel/ifs/ifs.h b/drivers/platform/x86/intel/ifs/ifs.h index 600bb8a1b285..b261be46bce8 100644 --- a/drivers/platform/x86/intel/ifs/ifs.h +++ b/drivers/platform/x86/intel/ifs/ifs.h @@ -157,6 +157,8 @@ #define MSR_SBAF_HASHES_STATUS 0x000002b9 #define MSR_AUTHENTICATE_AND_COPY_SBAF_CHUNK 0x000002ba #define MSR_SBAF_CHUNKS_AUTHENTICATION_STATUS 0x000002bb +#define MSR_ACTIVATE_SBAF 0x000002bc +#define MSR_SBAF_STATUS 0x000002bd #define MSR_COPY_SCAN_HASHES 0x000002c2 #define MSR_SCAN_HASHES_STATUS 0x000002c3 @@ -283,6 +285,34 @@ union ifs_array { }; }; +/* MSR_ACTIVATE_SBAF bit fields */ +union ifs_sbaf { + u64 data; + struct { + u32 bundle_idx :9; + u32 rsvd1 :5; + u32 pgm_idx :2; + u32 rsvd2 :16; + u32 delay :31; + u32 sigmce :1; + }; +}; + +/* MSR_SBAF_STATUS bit fields */ +union ifs_sbaf_status { + u64 data; + struct { + u32 bundle_idx :9; + u32 rsvd1 :5; + u32 pgm_idx :2; + u32 rsvd2 :16; + u32 error_code :8; + u32 rsvd3 :21; + u32 test_fail :1; + u32 sbaf_status :2; + }; +}; + /* * Driver populated error-codes * 0xFD: Test timed out before completing all the chunks. diff --git a/drivers/platform/x86/intel/ifs/runtest.c b/drivers/platform/x86/intel/ifs/runtest.c index 282e4bfe30da..a812ec7761b9 100644 --- a/drivers/platform/x86/intel/ifs/runtest.c +++ b/drivers/platform/x86/intel/ifs/runtest.c @@ -29,6 +29,13 @@ struct run_params { union ifs_status status; }; +struct sbaf_run_params { + struct ifs_data *ifsd; + int *retry_cnt; + union ifs_sbaf *activate; + union ifs_sbaf_status status; +}; + /* * Number of TSC cycles that a logical CPU will wait for the other * logical CPU on the core in the WRMSR(ACTIVATE_SCAN). @@ -146,6 +153,7 @@ static bool can_restart(union ifs_status status) #define SPINUNIT 100 /* 100 nsec */ static atomic_t array_cpus_in; static atomic_t scan_cpus_in; +static atomic_t sbaf_cpus_in; /* * Simplified cpu sibling rendezvous loop based on microcode loader __wait_for_cpus() @@ -387,6 +395,226 @@ static void ifs_array_test_gen1(int cpu, struct device *dev) ifsd->status = SCAN_TEST_PASS; } +#define SBAF_STATUS_PASS 0 +#define SBAF_STATUS_SIGN_FAIL 1 +#define SBAF_STATUS_INTR 2 +#define SBAF_STATUS_TEST_FAIL 3 + +enum sbaf_status_err_code { + IFS_SBAF_NO_ERROR = 0, + IFS_SBAF_OTHER_THREAD_COULD_NOT_JOIN = 1, + IFS_SBAF_INTERRUPTED_BEFORE_RENDEZVOUS = 2, + IFS_SBAF_UNASSIGNED_ERROR_CODE3 = 3, + IFS_SBAF_INVALID_BUNDLE_INDEX = 4, + IFS_SBAF_MISMATCH_ARGS_BETWEEN_THREADS = 5, + IFS_SBAF_CORE_NOT_CAPABLE_CURRENTLY = 6, + IFS_SBAF_UNASSIGNED_ERROR_CODE7 = 7, + IFS_SBAF_EXCEED_NUMBER_OF_THREADS_CONCURRENT = 8, + IFS_SBAF_INTERRUPTED_DURING_EXECUTION = 9, + IFS_SBAF_INVALID_PROGRAM_INDEX = 0xA, + IFS_SBAF_CORRUPTED_CHUNK = 0xB, + IFS_SBAF_DID_NOT_START = 0xC, +}; + +static const char * const sbaf_test_status[] = { + [IFS_SBAF_NO_ERROR] = "SBAF no error", + [IFS_SBAF_OTHER_THREAD_COULD_NOT_JOIN] = "Other thread could not join.", + [IFS_SBAF_INTERRUPTED_BEFORE_RENDEZVOUS] = "Interrupt occurred prior to SBAF coordination.", + [IFS_SBAF_UNASSIGNED_ERROR_CODE3] = "Unassigned error code 0x3", + [IFS_SBAF_INVALID_BUNDLE_INDEX] = "Non valid sbaf bundles. Reload test image", + [IFS_SBAF_MISMATCH_ARGS_BETWEEN_THREADS] = "Mismatch in arguments between threads T0/T1.", + [IFS_SBAF_CORE_NOT_CAPABLE_CURRENTLY] = "Core not capable of performing SBAF currently", + [IFS_SBAF_UNASSIGNED_ERROR_CODE7] = "Unassigned error code 0x7", + [IFS_SBAF_EXCEED_NUMBER_OF_THREADS_CONCURRENT] = "Exceeded number of Logical Processors (LP) allowed to run Scan-At-Field concurrently", + [IFS_SBAF_INTERRUPTED_DURING_EXECUTION] = "Interrupt occurred prior to SBAF start", + [IFS_SBAF_INVALID_PROGRAM_INDEX] = "SBAF program index not valid", + [IFS_SBAF_CORRUPTED_CHUNK] = "SBAF operation aborted due to corrupted chunk", + [IFS_SBAF_DID_NOT_START] = "SBAF operation did not start", +}; + +static void sbaf_message_not_tested(struct device *dev, int cpu, u64 status_data) +{ + union ifs_sbaf_status status = (union ifs_sbaf_status)status_data; + + if (status.error_code < ARRAY_SIZE(sbaf_test_status)) { + dev_info(dev, "CPU(s) %*pbl: SBAF operation did not start. %s\n", + cpumask_pr_args(cpu_smt_mask(cpu)), + sbaf_test_status[status.error_code]); + } else if (status.error_code == IFS_SW_TIMEOUT) { + dev_info(dev, "CPU(s) %*pbl: software timeout during scan\n", + cpumask_pr_args(cpu_smt_mask(cpu))); + } else if (status.error_code == IFS_SW_PARTIAL_COMPLETION) { + dev_info(dev, "CPU(s) %*pbl: %s\n", + cpumask_pr_args(cpu_smt_mask(cpu)), + "Not all SBAF bundles executed. Maximum forward progress retries exceeded"); + } else { + dev_info(dev, "CPU(s) %*pbl: SBAF unknown status %llx\n", + cpumask_pr_args(cpu_smt_mask(cpu)), status.data); + } +} + +static void sbaf_message_fail(struct device *dev, int cpu, union ifs_sbaf_status status) +{ + /* Failed signature check is set when SBAF signature did not match the expected value */ + if (status.sbaf_status == SBAF_STATUS_SIGN_FAIL) { + dev_err(dev, "CPU(s) %*pbl: Failed signature check\n", + cpumask_pr_args(cpu_smt_mask(cpu))); + } + + /* Failed to reach end of test */ + if (status.sbaf_status == SBAF_STATUS_TEST_FAIL) { + dev_err(dev, "CPU(s) %*pbl: Failed to complete test\n", + cpumask_pr_args(cpu_smt_mask(cpu))); + } +} + +static bool sbaf_bundle_completed(union ifs_sbaf_status status) +{ + if (status.sbaf_status || status.error_code) + return false; + return true; +} + +static bool sbaf_can_restart(union ifs_sbaf_status status) +{ + enum sbaf_status_err_code err_code = status.error_code; + + /* Signature for chunk is bad, or scan test failed */ + if (status.sbaf_status == SBAF_STATUS_SIGN_FAIL || + status.sbaf_status == SBAF_STATUS_TEST_FAIL) + return false; + + switch (err_code) { + case IFS_SBAF_NO_ERROR: + case IFS_SBAF_OTHER_THREAD_COULD_NOT_JOIN: + case IFS_SBAF_INTERRUPTED_BEFORE_RENDEZVOUS: + case IFS_SBAF_EXCEED_NUMBER_OF_THREADS_CONCURRENT: + case IFS_SBAF_INTERRUPTED_DURING_EXECUTION: + return true; + case IFS_SBAF_UNASSIGNED_ERROR_CODE3: + case IFS_SBAF_INVALID_BUNDLE_INDEX: + case IFS_SBAF_MISMATCH_ARGS_BETWEEN_THREADS: + case IFS_SBAF_CORE_NOT_CAPABLE_CURRENTLY: + case IFS_SBAF_UNASSIGNED_ERROR_CODE7: + case IFS_SBAF_INVALID_PROGRAM_INDEX: + case IFS_SBAF_CORRUPTED_CHUNK: + case IFS_SBAF_DID_NOT_START: + break; + } + return false; +} + +/* + * Execute the SBAF test. Called "simultaneously" on all threads of a core + * at high priority using the stop_cpus mechanism. + */ +static int dosbaf(void *data) +{ + struct sbaf_run_params *run_params = data; + int cpu = smp_processor_id(); + union ifs_sbaf_status status; + struct ifs_data *ifsd; + int first; + + ifsd = run_params->ifsd; + + /* Only the first logical CPU on a core reports result */ + first = cpumask_first(cpu_smt_mask(cpu)); + wait_for_sibling_cpu(&sbaf_cpus_in, NSEC_PER_SEC); + + /* + * This WRMSR will wait for other HT threads to also write + * to this MSR (at most for activate.delay cycles). Then it + * starts scan of each requested bundle. The core test happens + * during the "execution" of the WRMSR. + */ + wrmsrl(MSR_ACTIVATE_SBAF, run_params->activate->data); + rdmsrl(MSR_SBAF_STATUS, status.data); + + /* Pass back the result of the test */ + if (cpu == first) + run_params->status = status; + + return 0; +} + +static void ifs_sbaf_test_core(int cpu, struct device *dev) +{ + struct sbaf_run_params run_params; + union ifs_sbaf_status status = {}; + union ifs_sbaf activate; + unsigned long timeout; + struct ifs_data *ifsd; + int stop_bundle; + int retries; + + ifsd = ifs_get_data(dev); + + activate.data = 0; + activate.delay = IFS_THREAD_WAIT; + + timeout = jiffies + (2 * HZ); + retries = MAX_IFS_RETRIES; + activate.bundle_idx = 0; + stop_bundle = ifsd->max_bundle; + + while (activate.bundle_idx <= stop_bundle) { + if (time_after(jiffies, timeout)) { + status.error_code = IFS_SW_TIMEOUT; + break; + } + + atomic_set(&sbaf_cpus_in, 0); + + run_params.ifsd = ifsd; + run_params.activate = &activate; + run_params.retry_cnt = &retries; + stop_core_cpuslocked(cpu, dosbaf, &run_params); + + status = run_params.status; + + if (sbaf_bundle_completed(status)) { + activate.bundle_idx = status.bundle_idx + 1; + activate.pgm_idx = 0; + retries = MAX_IFS_RETRIES; + continue; + } + + /* Some cases can be retried, give up for others */ + if (!sbaf_can_restart(status)) + break; + + if (status.pgm_idx == activate.pgm_idx) { + /* If no progress retry */ + if (--retries == 0) { + if (status.error_code == IFS_NO_ERROR) + status.error_code = IFS_SW_PARTIAL_COMPLETION; + break; + } + } else { + /* if some progress, more pgms remaining in bundle, reset retries */ + retries = MAX_IFS_RETRIES; + activate.bundle_idx = status.bundle_idx; + activate.pgm_idx = status.pgm_idx; + } + } + + /* Update status for this core */ + ifsd->scan_details = status.data; + + if (status.sbaf_status == SBAF_STATUS_SIGN_FAIL || + status.sbaf_status == SBAF_STATUS_TEST_FAIL) { + ifsd->status = SCAN_TEST_FAIL; + sbaf_message_fail(dev, cpu, status); + } else if (status.error_code || status.sbaf_status == SBAF_STATUS_INTR || + (activate.bundle_idx < stop_bundle)) { + ifsd->status = SCAN_NOT_TESTED; + sbaf_message_not_tested(dev, cpu, status.data); + } else { + ifsd->status = SCAN_TEST_PASS; + } +} + /* * Initiate per core test. It wakes up work queue threads on the target cpu and * its sibling cpu. Once all sibling threads wake up, the scan test gets executed and @@ -420,6 +648,12 @@ int do_core_test(int cpu, struct device *dev) else ifs_array_test_gen1(cpu, dev); break; + case IFS_TYPE_SBAF: + if (!ifsd->loaded) + ret = -EPERM; + else + ifs_sbaf_test_core(cpu, dev); + break; default: ret = -EINVAL; }