diff mbox series

selftests: x86: skip the tests if prerequisites aren't fulfilled

Message ID 20240307183730.2858264-1-usama.anjum@collabora.com (mailing list archive)
State Superseded
Headers show
Series selftests: x86: skip the tests if prerequisites aren't fulfilled | expand

Commit Message

Muhammad Usama Anjum March 7, 2024, 6:37 p.m. UTC
Skip instead of failing when prerequisite conditions aren't fulfilled,
such as invalid xstate values etc. This patch would make the tests show
as skip when run by:
  make -C tools/testing/selftest/ TARGETS=x86 run_tests

  ...
  # timeout set to 45
  # selftests: x86: amx_64
  # # xstate cpuid: invalid tile data size/offset: 0/0
  ok 42 selftests: x86: amx_64 # SKIP
  # timeout set to 45
  # selftests: x86: lam_64
  # # Unsupported LAM feature!
  ok 43 selftests: x86: lam_64 # SKIP
  ...

Signed-off-by: Muhammad Usama Anjum <usama.anjum@collabora.com>
---
I'm not sure if xstate values should be correct on all the x86
processors. If the xstate is invalid on a CPU, the test should be
skipped instead of failing.
---
 tools/testing/selftests/x86/amx.c | 33 ++++++++++++++++++++-----------
 tools/testing/selftests/x86/lam.c |  2 +-
 2 files changed, 23 insertions(+), 12 deletions(-)

Comments

Chang S. Bae March 9, 2024, 1:06 a.m. UTC | #1
On 3/7/2024 10:37 AM, Muhammad Usama Anjum wrote:
> 
> diff --git a/tools/testing/selftests/x86/amx.c b/tools/testing/selftests/x86/amx.c
> index d884fd69dd510..5d1ca0bbaaae7 100644
> --- a/tools/testing/selftests/x86/amx.c
> +++ b/tools/testing/selftests/x86/amx.c
> @@ -103,9 +103,10 @@ static void clearhandler(int sig)
>   
>   #define CPUID_LEAF1_ECX_XSAVE_MASK	(1 << 26)
>   #define CPUID_LEAF1_ECX_OSXSAVE_MASK	(1 << 27)
> -static inline void check_cpuid_xsave(void)
> +static inline int check_cpuid_xsave(void)
>   {
>   	uint32_t eax, ebx, ecx, edx;
> +	int ret = 0;
>   
>   	/*
>   	 * CPUID.1:ECX.XSAVE[bit 26] enumerates general
> @@ -113,10 +114,16 @@ static inline void check_cpuid_xsave(void)
>   	 * XGETBV.
>   	 */
>   	__cpuid_count(1, 0, eax, ebx, ecx, edx);
> -	if (!(ecx & CPUID_LEAF1_ECX_XSAVE_MASK))
> -		fatal_error("cpuid: no CPU xsave support");
> -	if (!(ecx & CPUID_LEAF1_ECX_OSXSAVE_MASK))
> -		fatal_error("cpuid: no OS xsave support");
> +	if (!(ecx & CPUID_LEAF1_ECX_XSAVE_MASK)) {
> +		ksft_print_msg("cpuid: no CPU xsave support\n");
> +		ret = -1;
> +	}
> +	if (!(ecx & CPUID_LEAF1_ECX_OSXSAVE_MASK)) {
> +		ksft_print_msg("cpuid: no OS xsave support\n");
> +		ret = -1;
> +	}
> +
> +	return ret;
>   }

I thought check_cpuid_xsave() can go away [1] by simplifying the 
availability check through arch_prctl():

+#define ARCH_GET_XCOMP_SUPP    0x1021
  #define ARCH_GET_XCOMP_PERM    0x1022
  #define ARCH_REQ_XCOMP_PERM    0x1023

@@ -928,8 +911,15 @@ static void test_ptrace(void)

  int main(void)
  {
-       /* Check hardware availability at first */
-       check_cpuid_xsave();
+       unsigned long features;
+       long rc;
+
+       rc = syscall(SYS_arch_prctl, ARCH_GET_XCOMP_SUPP, &features);
+       if (rc || (features & XFEATURE_MASK_XTILE) != XFEATURE_MASK_XTILE) {
+               ksft_print_msg("no AMX support\n");
+               return KSFT_SKIP;
+       }

> -static void check_cpuid_xtiledata(void)
> +static int check_cpuid_xtiledata(void)
>   {
>   	uint32_t eax, ebx, ecx, edx;
>   
> @@ -153,12 +160,16 @@ static void check_cpuid_xtiledata(void)
>   	 * eax: XTILEDATA state component size
>   	 * ebx: XTILEDATA state component offset in user buffer
>   	 */
> -	if (!eax || !ebx)
> -		fatal_error("xstate cpuid: invalid tile data size/offset: %d/%d",
> -				eax, ebx);
> +	if (!eax || !ebx) {
> +		ksft_print_msg("xstate cpuid: invalid tile data size/offset: %d/%d\n",
> +			       eax, ebx);
> +		return -1;
> +	}
>   
>   	xtiledata.size	      = eax;
>   	xtiledata.xbuf_offset = ebx;
> +
> +	return 0;
>   }

I don't think it is okay to silently skip the test here. If the feature 
is available, the tile data size and offset should not be zero.

Thanks,
Chang

[1] 
https://lore.kernel.org/lkml/327cde12-daea-84ba-4b24-64fe12e89dea@intel.com/
Kirill A . Shutemov March 11, 2024, 12:39 p.m. UTC | #2
On Thu, Mar 07, 2024 at 11:37:22PM +0500, Muhammad Usama Anjum wrote:
> diff --git a/tools/testing/selftests/x86/lam.c b/tools/testing/selftests/x86/lam.c
> index 215b8150b7cca..c0f016f45ee17 100644
> --- a/tools/testing/selftests/x86/lam.c
> +++ b/tools/testing/selftests/x86/lam.c
> @@ -1183,7 +1183,7 @@ int main(int argc, char **argv)
>  
>  	if (!cpu_has_lam()) {
>  		ksft_print_msg("Unsupported LAM feature!\n");
> -		return -1;
> +		return KSFT_SKIP;
>  	}
>  
>  	while ((c = getopt(argc, argv, "ht:")) != -1) {

Looks good to me:

Acked-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Muhammad Usama Anjum March 11, 2024, 5:02 p.m. UTC | #3
On 3/9/24 6:06 AM, Chang S. Bae wrote:
> On 3/7/2024 10:37 AM, Muhammad Usama Anjum wrote:
>>
>> diff --git a/tools/testing/selftests/x86/amx.c
>> b/tools/testing/selftests/x86/amx.c
>> index d884fd69dd510..5d1ca0bbaaae7 100644
>> --- a/tools/testing/selftests/x86/amx.c
>> +++ b/tools/testing/selftests/x86/amx.c
>> @@ -103,9 +103,10 @@ static void clearhandler(int sig)
>>     #define CPUID_LEAF1_ECX_XSAVE_MASK    (1 << 26)
>>   #define CPUID_LEAF1_ECX_OSXSAVE_MASK    (1 << 27)
>> -static inline void check_cpuid_xsave(void)
>> +static inline int check_cpuid_xsave(void)
>>   {
>>       uint32_t eax, ebx, ecx, edx;
>> +    int ret = 0;
>>         /*
>>        * CPUID.1:ECX.XSAVE[bit 26] enumerates general
>> @@ -113,10 +114,16 @@ static inline void check_cpuid_xsave(void)
>>        * XGETBV.
>>        */
>>       __cpuid_count(1, 0, eax, ebx, ecx, edx);
>> -    if (!(ecx & CPUID_LEAF1_ECX_XSAVE_MASK))
>> -        fatal_error("cpuid: no CPU xsave support");
>> -    if (!(ecx & CPUID_LEAF1_ECX_OSXSAVE_MASK))
>> -        fatal_error("cpuid: no OS xsave support");
>> +    if (!(ecx & CPUID_LEAF1_ECX_XSAVE_MASK)) {
>> +        ksft_print_msg("cpuid: no CPU xsave support\n");
>> +        ret = -1;
>> +    }
>> +    if (!(ecx & CPUID_LEAF1_ECX_OSXSAVE_MASK)) {
>> +        ksft_print_msg("cpuid: no OS xsave support\n");
>> +        ret = -1;
>> +    }
>> +
>> +    return ret;
>>   }
> 
> I thought check_cpuid_xsave() can go away [1] by simplifying the
> availability check through arch_prctl():
In this patch, I'm just focusing on skip login on existing code. I'll make
this change when I'll transform the entire test to TAP.

> 
> +#define ARCH_GET_XCOMP_SUPP    0x1021
>  #define ARCH_GET_XCOMP_PERM    0x1022
>  #define ARCH_REQ_XCOMP_PERM    0x1023
> 
> @@ -928,8 +911,15 @@ static void test_ptrace(void)
> 
>  int main(void)
>  {
> -       /* Check hardware availability at first */
> -       check_cpuid_xsave();
> +       unsigned long features;
> +       long rc;
> +
> +       rc = syscall(SYS_arch_prctl, ARCH_GET_XCOMP_SUPP, &features);
> +       if (rc || (features & XFEATURE_MASK_XTILE) != XFEATURE_MASK_XTILE) {
> +               ksft_print_msg("no AMX support\n");
> +               return KSFT_SKIP;
> +       }
> 
>> -static void check_cpuid_xtiledata(void)
>> +static int check_cpuid_xtiledata(void)
>>   {
>>       uint32_t eax, ebx, ecx, edx;
>>   @@ -153,12 +160,16 @@ static void check_cpuid_xtiledata(void)
>>        * eax: XTILEDATA state component size
>>        * ebx: XTILEDATA state component offset in user buffer
>>        */
>> -    if (!eax || !ebx)
>> -        fatal_error("xstate cpuid: invalid tile data size/offset: %d/%d",
>> -                eax, ebx);
>> +    if (!eax || !ebx) {
>> +        ksft_print_msg("xstate cpuid: invalid tile data size/offset:
>> %d/%d\n",
>> +                   eax, ebx);
>> +        return -1;
>> +    }
>>         xtiledata.size          = eax;
>>       xtiledata.xbuf_offset = ebx;
>> +
>> +    return 0;
>>   }
> 
> I don't think it is okay to silently skip the test here. If the feature is
> available, the tile data size and offset should not be zero.
We are logging that data size/offset are invalid if either eax or ebx are
invalid and then we are skipping. Not sure what you are asking me to change.

> 
> Thanks,
> Chang
> 
> [1]
> https://lore.kernel.org/lkml/327cde12-daea-84ba-4b24-64fe12e89dea@intel.com/
>
Chang S. Bae March 11, 2024, 5:39 p.m. UTC | #4
On 3/11/2024 10:02 AM, Muhammad Usama Anjum wrote:
> On 3/9/24 6:06 AM, Chang S. Bae wrote:
>> On 3/7/2024 10:37 AM, Muhammad Usama Anjum wrote:
>>
>>> -static void check_cpuid_xtiledata(void)
>>> +static int check_cpuid_xtiledata(void)
>>>    {
>>>        uint32_t eax, ebx, ecx, edx;
>>>    @@ -153,12 +160,16 @@ static void check_cpuid_xtiledata(void)
>>>         * eax: XTILEDATA state component size
>>>         * ebx: XTILEDATA state component offset in user buffer
>>>         */
>>> -    if (!eax || !ebx)
>>> -        fatal_error("xstate cpuid: invalid tile data size/offset: %d/%d",
>>> -                eax, ebx);
>>> +    if (!eax || !ebx) {
>>> +        ksft_print_msg("xstate cpuid: invalid tile data size/offset:
>>> %d/%d\n",
>>> +                   eax, ebx);
>>> +        return -1;
>>> +    }
>>>          xtiledata.size          = eax;
>>>        xtiledata.xbuf_offset = ebx;
>>> +
>>> +    return 0;
>>>    }
>>
>> I don't think it is okay to silently skip the test here. If the feature is
>> available, the tile data size and offset should not be zero.
> We are logging that data size/offset are invalid if either eax or ebx are
> invalid and then we are skipping. Not sure what you are asking me to change.

You intention seems to skip the test when AMX is not available. But this 
function should only be invoked when AMX is actually available, not as 
part of the feature availability check. Therefore, I think this change 
is not relevant. Also, if we encounter invalid TILEDATA CPUID, it should 
be a reason to *fail* the test, rather than calling out a skip, right?

Thanks,
Chang
Binbin Wu March 12, 2024, 12:10 a.m. UTC | #5
On 3/8/2024 2:37 AM, Muhammad Usama Anjum wrote:
> diff --git a/tools/testing/selftests/x86/lam.c b/tools/testing/selftests/x86/lam.c
> index 215b8150b7cca..c0f016f45ee17 100644
> --- a/tools/testing/selftests/x86/lam.c
> +++ b/tools/testing/selftests/x86/lam.c
> @@ -1183,7 +1183,7 @@ int main(int argc, char **argv)
>   
>   	if (!cpu_has_lam()) {
>   		ksft_print_msg("Unsupported LAM feature!\n");
> -		return -1;
> +		return KSFT_SKIP;
>   	}
>   
>   	while ((c = getopt(argc, argv, "ht:")) != -1) {
Looks good to me.
Reviewed-by: Binbin Wu <binbin.wu@linux.intel.com>
Muhammad Usama Anjum March 12, 2024, 9:26 a.m. UTC | #6
On 3/11/24 10:39 PM, Chang S. Bae wrote:
> On 3/11/2024 10:02 AM, Muhammad Usama Anjum wrote:
>> On 3/9/24 6:06 AM, Chang S. Bae wrote:
>>> On 3/7/2024 10:37 AM, Muhammad Usama Anjum wrote:
>>>
>>>> -static void check_cpuid_xtiledata(void)
>>>> +static int check_cpuid_xtiledata(void)
>>>>    {
>>>>        uint32_t eax, ebx, ecx, edx;
>>>>    @@ -153,12 +160,16 @@ static void check_cpuid_xtiledata(void)
>>>>         * eax: XTILEDATA state component size
>>>>         * ebx: XTILEDATA state component offset in user buffer
>>>>         */
>>>> -    if (!eax || !ebx)
>>>> -        fatal_error("xstate cpuid: invalid tile data size/offset: %d/%d",
>>>> -                eax, ebx);
>>>> +    if (!eax || !ebx) {
>>>> +        ksft_print_msg("xstate cpuid: invalid tile data size/offset:
>>>> %d/%d\n",
>>>> +                   eax, ebx);
>>>> +        return -1;
>>>> +    }
>>>>          xtiledata.size          = eax;
>>>>        xtiledata.xbuf_offset = ebx;
>>>> +
>>>> +    return 0;
>>>>    }
>>>
>>> I don't think it is okay to silently skip the test here. If the feature is
>>> available, the tile data size and offset should not be zero.
>> We are logging that data size/offset are invalid if either eax or ebx are
>> invalid and then we are skipping. Not sure what you are asking me to change.
> 
> You intention seems to skip the test when AMX is not available. 
Yes,

> But this
> function should only be invoked when AMX is actually available, not as part
> of the feature availability check. 
How can we check if AMX is available or not?

> Therefore, I think this change is not
> relevant. Also, if we encounter invalid TILEDATA CPUID, it should be a
> reason to *fail* the test, rather than calling out a skip, right?
I see. But once we check if AMX is available.

> 
> Thanks,
> Chang
>
Chang S. Bae March 12, 2024, 4:07 p.m. UTC | #7
On 3/12/2024 2:26 AM, Muhammad Usama Anjum wrote:
 >
> How can we check if AMX is available or not?

After a successful check_cpuid_xsave(), examining CPUID(eax=0xd, ecx=0) 
EDX: EAX, which reports the support bits of XCR0, can give assurance of 
AMX availability. Perhaps, this change is considerable on top of your patch:

static int check_cpuid_xtiledata(void)
{
         uint32_t eax, ebx, ecx, edx;
+       uint64_t xfeatures;

         __cpuid_count(CPUID_LEAF_XSTATE, CPUID_SUBLEAF_XSTATE_USER,
                       eax, ebx, ecx, edx);

+       xfeatures = eax + ((uint64_t)edx << 32);
+       if ((xfeatures & XFEATURE_MASK_XTILE) != XFEATURE_MASK_XTILE) {
+               ksft_print_msg("no AMX support\n");
+               return -1;
+       }

Nevertheless, I still believe that using arch_prctl(ARCH_GET_XCOMP_SUPP, 
...) remains a simple and legitimate approach for directly checking 
dynamic feature support from the kernel: 
https://docs.kernel.org/arch/x86/xstate.html

Thanks,
Chang
Muhammad Usama Anjum March 12, 2024, 5:28 p.m. UTC | #8
Hi Chang,

On 3/12/24 9:07 PM, Chang S. Bae wrote:
> On 3/12/2024 2:26 AM, Muhammad Usama Anjum wrote:
>>
>> How can we check if AMX is available or not?
> 
> After a successful check_cpuid_xsave(), examining CPUID(eax=0xd, ecx=0)
> EDX: EAX, which reports the support bits of XCR0, can give assurance of AMX
> availability. Perhaps, this change is considerable on top of your patch:
> 
> static int check_cpuid_xtiledata(void)
> {
>         uint32_t eax, ebx, ecx, edx;
> +       uint64_t xfeatures;
> 
>         __cpuid_count(CPUID_LEAF_XSTATE, CPUID_SUBLEAF_XSTATE_USER,
>                       eax, ebx, ecx, edx);
> 
> +       xfeatures = eax + ((uint64_t)edx << 32);
> +       if ((xfeatures & XFEATURE_MASK_XTILE) != XFEATURE_MASK_XTILE) {
> +               ksft_print_msg("no AMX support\n");
> +               return -1;
> +       }
Now I understand. I'll use this snippet to skip the test and fail the test
if eax and ebx are zero. Sorry, I've lesser knowledge on the x86's
extensions side. I'll build up the knowledge.

> 
> Nevertheless, I still believe that using arch_prctl(ARCH_GET_XCOMP_SUPP,
> ..) remains a simple and legitimate approach for directly checking dynamic
> feature support from the kernel: https://docs.kernel.org/arch/x86/xstate.html
I'll use arch_prtcl in another patch on top of the current patch in v2.

> 
> Thanks,
> Chang
>
diff mbox series

Patch

diff --git a/tools/testing/selftests/x86/amx.c b/tools/testing/selftests/x86/amx.c
index d884fd69dd510..5d1ca0bbaaae7 100644
--- a/tools/testing/selftests/x86/amx.c
+++ b/tools/testing/selftests/x86/amx.c
@@ -103,9 +103,10 @@  static void clearhandler(int sig)
 
 #define CPUID_LEAF1_ECX_XSAVE_MASK	(1 << 26)
 #define CPUID_LEAF1_ECX_OSXSAVE_MASK	(1 << 27)
-static inline void check_cpuid_xsave(void)
+static inline int check_cpuid_xsave(void)
 {
 	uint32_t eax, ebx, ecx, edx;
+	int ret = 0;
 
 	/*
 	 * CPUID.1:ECX.XSAVE[bit 26] enumerates general
@@ -113,10 +114,16 @@  static inline void check_cpuid_xsave(void)
 	 * XGETBV.
 	 */
 	__cpuid_count(1, 0, eax, ebx, ecx, edx);
-	if (!(ecx & CPUID_LEAF1_ECX_XSAVE_MASK))
-		fatal_error("cpuid: no CPU xsave support");
-	if (!(ecx & CPUID_LEAF1_ECX_OSXSAVE_MASK))
-		fatal_error("cpuid: no OS xsave support");
+	if (!(ecx & CPUID_LEAF1_ECX_XSAVE_MASK)) {
+		ksft_print_msg("cpuid: no CPU xsave support\n");
+		ret = -1;
+	}
+	if (!(ecx & CPUID_LEAF1_ECX_OSXSAVE_MASK)) {
+		ksft_print_msg("cpuid: no OS xsave support\n");
+		ret = -1;
+	}
+
+	return ret;
 }
 
 static uint32_t xbuf_size;
@@ -131,7 +138,7 @@  static struct {
 #define TILE_CPUID			0x1d
 #define TILE_PALETTE_ID			0x1
 
-static void check_cpuid_xtiledata(void)
+static int check_cpuid_xtiledata(void)
 {
 	uint32_t eax, ebx, ecx, edx;
 
@@ -153,12 +160,16 @@  static void check_cpuid_xtiledata(void)
 	 * eax: XTILEDATA state component size
 	 * ebx: XTILEDATA state component offset in user buffer
 	 */
-	if (!eax || !ebx)
-		fatal_error("xstate cpuid: invalid tile data size/offset: %d/%d",
-				eax, ebx);
+	if (!eax || !ebx) {
+		ksft_print_msg("xstate cpuid: invalid tile data size/offset: %d/%d\n",
+			       eax, ebx);
+		return -1;
+	}
 
 	xtiledata.size	      = eax;
 	xtiledata.xbuf_offset = ebx;
+
+	return 0;
 }
 
 /* The helpers for managing XSAVE buffer and tile states: */
@@ -929,8 +940,8 @@  static void test_ptrace(void)
 int main(void)
 {
 	/* Check hardware availability at first */
-	check_cpuid_xsave();
-	check_cpuid_xtiledata();
+	if (check_cpuid_xsave() || check_cpuid_xtiledata())
+		return KSFT_SKIP;
 
 	init_stashed_xsave();
 	sethandler(SIGILL, handle_noperm, 0);
diff --git a/tools/testing/selftests/x86/lam.c b/tools/testing/selftests/x86/lam.c
index 215b8150b7cca..c0f016f45ee17 100644
--- a/tools/testing/selftests/x86/lam.c
+++ b/tools/testing/selftests/x86/lam.c
@@ -1183,7 +1183,7 @@  int main(int argc, char **argv)
 
 	if (!cpu_has_lam()) {
 		ksft_print_msg("Unsupported LAM feature!\n");
-		return -1;
+		return KSFT_SKIP;
 	}
 
 	while ((c = getopt(argc, argv, "ht:")) != -1) {