From patchwork Tue Mar 24 12:30:15 2020 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Jan Beulich X-Patchwork-Id: 11455283 Return-Path: Received: from mail.kernel.org (pdx-korg-mail-1.web.codeaurora.org [172.30.200.123]) by pdx-korg-patchwork-2.web.codeaurora.org (Postfix) with ESMTP id 851996CA for ; Tue, 24 Mar 2020 12:31:18 +0000 (UTC) Received: from lists.xenproject.org (lists.xenproject.org [192.237.175.120]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by mail.kernel.org (Postfix) with ESMTPS id 6BE882070A for ; Tue, 24 Mar 2020 12:31:18 +0000 (UTC) DMARC-Filter: OpenDMARC Filter v1.3.2 mail.kernel.org 6BE882070A Authentication-Results: mail.kernel.org; dmarc=none (p=none dis=none) header.from=suse.com Authentication-Results: mail.kernel.org; spf=pass smtp.mailfrom=xen-devel-bounces@lists.xenproject.org Received: from localhost ([127.0.0.1] helo=lists.xenproject.org) by lists.xenproject.org with esmtp (Exim 4.89) (envelope-from ) id 1jGihG-0006Kl-Sc; Tue, 24 Mar 2020 12:30:18 +0000 Received: from us1-rack-iad1.inumbo.com ([172.99.69.81]) by lists.xenproject.org with esmtp (Exim 4.89) (envelope-from ) id 1jGihF-0006KV-T3 for xen-devel@lists.xenproject.org; Tue, 24 Mar 2020 12:30:17 +0000 X-Inumbo-ID: 378ec2ca-6dcb-11ea-92cf-bc764e2007e4 Received: from mx2.suse.de (unknown [195.135.220.15]) by us1-rack-iad1.inumbo.com (Halon) with ESMTPS id 378ec2ca-6dcb-11ea-92cf-bc764e2007e4; Tue, 24 Mar 2020 12:30:17 +0000 (UTC) X-Virus-Scanned: by amavisd-new at test-mx.suse.de Received: from relay2.suse.de (unknown [195.135.220.254]) by mx2.suse.de (Postfix) with ESMTP id 5DE3AAFB2; Tue, 24 Mar 2020 12:30:16 +0000 (UTC) From: Jan Beulich To: "xen-devel@lists.xenproject.org" References: <6fa81b4d-528d-5c33-50c5-a18396b4383a@suse.com> Message-ID: <6e500b1e-2ff1-5fc3-de2f-e0ac0e4cf094@suse.com> Date: Tue, 24 Mar 2020 13:30:15 +0100 User-Agent: Mozilla/5.0 (Windows NT 10.0; WOW64; rv:68.0) Gecko/20100101 Thunderbird/68.6.0 MIME-Version: 1.0 In-Reply-To: <6fa81b4d-528d-5c33-50c5-a18396b4383a@suse.com> Content-Language: en-US Subject: [Xen-devel] [PATCH v5 01/10] x86emul: support AVX512_BF16 insns X-BeenThere: xen-devel@lists.xenproject.org X-Mailman-Version: 2.1.23 Precedence: list List-Id: Xen developer discussion List-Unsubscribe: , List-Post: List-Help: List-Subscribe: , Cc: Andrew Cooper , Paul Durrant , Wei Liu , Roger Pau Monne Errors-To: xen-devel-bounces@lists.xenproject.org Sender: "Xen-devel" Signed-off-by: Jan Beulich Acked-by: Andrew Cooper --- v5: New. --- (SDE: -cpx) --- a/tools/tests/x86_emulator/evex-disp8.c +++ b/tools/tests/x86_emulator/evex-disp8.c @@ -550,6 +550,12 @@ static const struct test avx512_4vnniw_5 INSN(p4dpwssds, f2, 0f38, 53, el_4, d, vl), }; +static const struct test avx512_bf16_all[] = { + INSN(vcvtne2ps2bf16, f2, 0f38, 72, vl, d, vl), + INSN(vcvtneps2bf16, f3, 0f38, 72, vl, d, vl), + INSN(vdpbf16ps, f3, 0f38, 52, vl, d, vl), +}; + static const struct test avx512_bitalg_all[] = { INSN(popcnt, 66, 0f38, 54, vl, bw, vl), INSN(pshufbitqmb, 66, 0f38, 8f, vl, b, vl), @@ -984,6 +990,7 @@ void evex_disp8_test(void *instr, struct RUN(avx512pf, 512); RUN(avx512_4fmaps, 512); RUN(avx512_4vnniw, 512); + RUN(avx512_bf16, all); RUN(avx512_bitalg, all); RUN(avx512_ifma, all); RUN(avx512_vbmi, all); --- a/tools/tests/x86_emulator/test_x86_emulator.c +++ b/tools/tests/x86_emulator/test_x86_emulator.c @@ -4516,6 +4516,80 @@ int main(int argc, char **argv) else printf("skipped\n"); + if ( stack_exec && cpu_has_avx512_bf16 ) + { + decl_insn(vcvtne2ps2bf16); + decl_insn(vcvtneps2bf16); + decl_insn(vdpbf16ps); + static const struct { + float f[16]; + } in1 = {{ + 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16 + }}, in2 = {{ + 1, -2, 3, -4, 5, -6, 7, -8, 9, -10, 11, -12, 13, -14, 15, -16 + }}, out = {{ + 1 * 1 + 2 * 2, 3 * 3 + 4 * 4, + 5 * 5 + 6 * 6, 7 * 7 + 8 * 8, + 9 * 9 + 10 * 10, 11 * 11 + 12 * 12, + 13 * 13 + 14 * 14, 15 * 15 + 16 * 16, + 1 * 1 - 2 * 2, 3 * 3 - 4 * 4, + 5 * 5 - 6 * 6, 7 * 7 - 8 * 8, + 9 * 9 - 10 * 10, 11 * 11 - 12 * 12, + 13 * 13 - 14 * 14, 15 * 15 - 16 * 16 + }}; + + printf("%-40s", "Testing vcvtne2ps2bf16 64(%ecx),%zmm1,%zmm2..."); + asm volatile ( "vmovups %1, %%zmm1\n" + put_insn(vcvtne2ps2bf16, + /* vcvtne2ps2bf16 64(%0), %%zmm1, %%zmm2 */ + ".byte 0x62, 0xf2, 0x77, 0x48, 0x72, 0x51, 0x01") + :: "c" (NULL), "m" (in2) ); + set_insn(vcvtne2ps2bf16); + regs.ecx = (unsigned long)&in1 - 64; + rc = x86_emulate(&ctxt, &emulops); + if ( rc != X86EMUL_OKAY || !check_eip(vcvtne2ps2bf16) ) + goto fail; + printf("pending\n"); + + printf("%-40s", "Testing vcvtneps2bf16 64(%ecx),%ymm3..."); + asm volatile ( put_insn(vcvtneps2bf16, + /* vcvtneps2bf16 64(%0), %%ymm3 */ + ".byte 0x62, 0xf2, 0x7e, 0x48, 0x72, 0x59, 0x01") + :: "c" (NULL) ); + set_insn(vcvtneps2bf16); + rc = x86_emulate(&ctxt, &emulops); + if ( rc != X86EMUL_OKAY || !check_eip(vcvtneps2bf16) ) + goto fail; + asm ( "vmovdqa %%ymm2, %%ymm5\n\t" + "vpcmpeqd %%zmm3, %%zmm5, %%k0\n\t" + "kmovw %%k0, %0" + : "=g" (rc) : "m" (out) ); + if ( rc != 0xffff ) + goto fail; + printf("pending\n"); + + printf("%-40s", "Testing vdpbf16ps 128(%ecx),%zmm2,%zmm4..."); + asm volatile ( "vmovdqa %%ymm3, %0\n\t" + "vmovdqa %%ymm3, %1\n" + put_insn(vdpbf16ps, + /* vdpbf16ps 128(%2), %%zmm2, %%zmm4 */ + ".byte 0x62, 0xf2, 0x6e, 0x48, 0x52, 0x61, 0x02") + : "=&m" (res[0]), "=&m" (res[8]) + : "c" (NULL) + : "memory" ); + set_insn(vdpbf16ps); + regs.ecx = (unsigned long)res - 128; + rc = x86_emulate(&ctxt, &emulops); + if ( rc != X86EMUL_OKAY || !check_eip(vdpbf16ps) ) + goto fail; + asm ( "vcmpeqps %1, %%zmm4, %%k0\n\t" + "kmovw %%k0, %0" + : "=g" (rc) : "m" (out) ); + if ( rc != 0xffff ) + goto fail; + printf("okay\n"); + } + printf("%-40s", "Testing invpcid 16(%ecx),%%edx..."); if ( stack_exec ) { --- a/tools/tests/x86_emulator/x86-emulate.h +++ b/tools/tests/x86_emulator/x86-emulate.h @@ -156,6 +156,7 @@ static inline bool xcr0_mask(uint64_t ma #define cpu_has_avx512_vpopcntdq (cp.feat.avx512_vpopcntdq && xcr0_mask(0xe6)) #define cpu_has_avx512_4vnniw (cp.feat.avx512_4vnniw && xcr0_mask(0xe6)) #define cpu_has_avx512_4fmaps (cp.feat.avx512_4fmaps && xcr0_mask(0xe6)) +#define cpu_has_avx512_bf16 (cp.feat.avx512_bf16 && xcr0_mask(0xe6)) #define cpu_has_xgetbv1 (cpu_has_xsave && cp.xstate.xgetbv1) --- a/xen/arch/x86/x86_emulate/x86_emulate.c +++ b/xen/arch/x86/x86_emulate/x86_emulate.c @@ -1904,6 +1904,7 @@ in_protmode( #define vcpu_has_rdpid() (ctxt->cpuid->feat.rdpid) #define vcpu_has_avx512_4vnniw() (ctxt->cpuid->feat.avx512_4vnniw) #define vcpu_has_avx512_4fmaps() (ctxt->cpuid->feat.avx512_4fmaps) +#define vcpu_has_avx512_bf16() (ctxt->cpuid->feat.avx512_bf16) #define vcpu_must_have(feat) \ generate_exception_if(!vcpu_has_##feat(), EXC_UD) @@ -9152,6 +9153,19 @@ x86_emulate( generate_exception_if(evex.w, EXC_UD); goto avx512f_no_sae; + case X86EMUL_OPC_EVEX_F2(0x0f38, 0x72): /* vcvtne2ps2bf16 [xyz]mm/mem,[xyz]mm,[xyz]mm{k} */ + case X86EMUL_OPC_EVEX_F3(0x0f38, 0x72): /* vcvtneps2bf16 [xyz]mm/mem,{x,y}mm{k} */ + if ( evex.pfx == vex_f2 ) + fault_suppression = false; + else + d |= TwoOp; + /* fall through */ + case X86EMUL_OPC_EVEX_F3(0x0f38, 0x52): /* vdpbf16ps [xyz]mm/mem,[xyz]mm,[xyz]mm{k} */ + host_and_vcpu_must_have(avx512_bf16); + generate_exception_if(evex.w, EXC_UD); + op_bytes = 16 << evex.lr; + goto avx512f_no_sae; + case X86EMUL_OPC_VEX_66(0x0f38, 0x58): /* vpbroadcastd xmm/m32,{x,y}mm */ case X86EMUL_OPC_VEX_66(0x0f38, 0x59): /* vpbroadcastq xmm/m64,{x,y}mm */ case X86EMUL_OPC_VEX_66(0x0f38, 0x78): /* vpbroadcastb xmm/m8,{x,y}mm */ --- a/xen/include/asm-x86/cpufeature.h +++ b/xen/include/asm-x86/cpufeature.h @@ -129,6 +129,9 @@ #define cpu_has_avx512_4fmaps boot_cpu_has(X86_FEATURE_AVX512_4FMAPS) #define cpu_has_tsx_force_abort boot_cpu_has(X86_FEATURE_TSX_FORCE_ABORT) +/* CPUID level 0x00000007:1.eax */ +#define cpu_has_avx512_bf16 boot_cpu_has(X86_FEATURE_AVX512_BF16) + /* Synthesized. */ #define cpu_has_arch_perfmon boot_cpu_has(X86_FEATURE_ARCH_PERFMON) #define cpu_has_cpuid_faulting boot_cpu_has(X86_FEATURE_CPUID_FAULTING) --- a/xen/include/public/arch-x86/cpufeatureset.h +++ b/xen/include/public/arch-x86/cpufeatureset.h @@ -262,7 +262,7 @@ XEN_CPUFEATURE(CORE_CAPS, 9*32+30) / XEN_CPUFEATURE(SSBD, 9*32+31) /*A MSR_SPEC_CTRL.SSBD available */ /* Intel-defined CPU features, CPUID level 0x00000007:1.eax, word 10 */ -XEN_CPUFEATURE(AVX512_BF16, 10*32+ 5) /* AVX512 BFloat16 Instructions */ +XEN_CPUFEATURE(AVX512_BF16, 10*32+ 5) /*A AVX512 BFloat16 Instructions */ #endif /* XEN_CPUFEATURE */ From patchwork Tue Mar 24 12:29:08 2020 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Jan Beulich X-Patchwork-Id: 11455277 Return-Path: Received: from mail.kernel.org (pdx-korg-mail-1.web.codeaurora.org [172.30.200.123]) by pdx-korg-patchwork-2.web.codeaurora.org (Postfix) with ESMTP id 2CE5D1667 for ; Tue, 24 Mar 2020 12:30:10 +0000 (UTC) Received: from lists.xenproject.org (lists.xenproject.org [192.237.175.120]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by mail.kernel.org (Postfix) with ESMTPS id 12C732080C for ; Tue, 24 Mar 2020 12:30:10 +0000 (UTC) DMARC-Filter: OpenDMARC Filter v1.3.2 mail.kernel.org 12C732080C Authentication-Results: mail.kernel.org; dmarc=none (p=none dis=none) header.from=suse.com Authentication-Results: mail.kernel.org; spf=pass smtp.mailfrom=xen-devel-bounces@lists.xenproject.org Received: from localhost ([127.0.0.1] helo=lists.xenproject.org) by lists.xenproject.org with esmtp (Exim 4.89) (envelope-from ) id 1jGigD-0005WF-4l; Tue, 24 Mar 2020 12:29:13 +0000 Received: from us1-rack-iad1.inumbo.com ([172.99.69.81]) by lists.xenproject.org with esmtp (Exim 4.89) (envelope-from ) id 1jGigB-0005W2-K6 for xen-devel@lists.xenproject.org; Tue, 24 Mar 2020 12:29:11 +0000 X-Inumbo-ID: 100a5ec6-6dcb-11ea-92cf-bc764e2007e4 Received: from mx2.suse.de (unknown [195.135.220.15]) by us1-rack-iad1.inumbo.com (Halon) with ESMTPS id 100a5ec6-6dcb-11ea-92cf-bc764e2007e4; Tue, 24 Mar 2020 12:29:10 +0000 (UTC) X-Virus-Scanned: by amavisd-new at test-mx.suse.de Received: from relay2.suse.de (unknown [195.135.220.254]) by mx2.suse.de (Postfix) with ESMTP id 10393AEA6; Tue, 24 Mar 2020 12:29:10 +0000 (UTC) From: Jan Beulich To: "xen-devel@lists.xenproject.org" References: <6fa81b4d-528d-5c33-50c5-a18396b4383a@suse.com> Message-ID: <9604579f-64a8-0dc9-8188-3797e2004d45@suse.com> Date: Tue, 24 Mar 2020 13:29:08 +0100 User-Agent: Mozilla/5.0 (Windows NT 10.0; WOW64; rv:68.0) Gecko/20100101 Thunderbird/68.6.0 MIME-Version: 1.0 In-Reply-To: <6fa81b4d-528d-5c33-50c5-a18396b4383a@suse.com> Content-Language: en-US Subject: [Xen-devel] [PATCH v5 02/10] x86emul: support AVX512_BF16 insns X-BeenThere: xen-devel@lists.xenproject.org X-Mailman-Version: 2.1.23 Precedence: list List-Id: Xen developer discussion List-Unsubscribe: , List-Post: List-Help: List-Subscribe: , Cc: Andrew Cooper , Paul Durrant , Wei Liu , Roger Pau Monne Errors-To: xen-devel-bounces@lists.xenproject.org Sender: "Xen-devel" Signed-off-by: Jan Beulich --- v5: New. --- (SDE: -cpx) --- a/tools/tests/x86_emulator/evex-disp8.c +++ b/tools/tests/x86_emulator/evex-disp8.c @@ -550,6 +550,12 @@ static const struct test avx512_4vnniw_5 INSN(p4dpwssds, f2, 0f38, 53, el_4, d, vl), }; +static const struct test avx512_bf16_all[] = { + INSN(vcvtne2ps2bf16, f2, 0f38, 72, vl, d, vl), + INSN(vcvtneps2bf16, f3, 0f38, 72, vl, d, vl), + INSN(vdpbf16ps, f3, 0f38, 52, vl, d, vl), +}; + static const struct test avx512_bitalg_all[] = { INSN(popcnt, 66, 0f38, 54, vl, bw, vl), INSN(pshufbitqmb, 66, 0f38, 8f, vl, b, vl), @@ -984,6 +990,7 @@ void evex_disp8_test(void *instr, struct RUN(avx512pf, 512); RUN(avx512_4fmaps, 512); RUN(avx512_4vnniw, 512); + RUN(avx512_bf16, all); RUN(avx512_bitalg, all); RUN(avx512_ifma, all); RUN(avx512_vbmi, all); --- a/tools/tests/x86_emulator/test_x86_emulator.c +++ b/tools/tests/x86_emulator/test_x86_emulator.c @@ -4516,6 +4516,80 @@ int main(int argc, char **argv) else printf("skipped\n"); + if ( stack_exec && cpu_has_avx512_bf16 ) + { + decl_insn(vcvtne2ps2bf16); + decl_insn(vcvtneps2bf16); + decl_insn(vdpbf16ps); + static const struct { + float f[16]; + } in1 = {{ + 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16 + }}, in2 = {{ + 1, -2, 3, -4, 5, -6, 7, -8, 9, -10, 11, -12, 13, -14, 15, -16 + }}, out = {{ + 1 * 1 + 2 * 2, 3 * 3 + 4 * 4, + 5 * 5 + 6 * 6, 7 * 7 + 8 * 8, + 9 * 9 + 10 * 10, 11 * 11 + 12 * 12, + 13 * 13 + 14 * 14, 15 * 15 + 16 * 16, + 1 * 1 - 2 * 2, 3 * 3 - 4 * 4, + 5 * 5 - 6 * 6, 7 * 7 - 8 * 8, + 9 * 9 - 10 * 10, 11 * 11 - 12 * 12, + 13 * 13 - 14 * 14, 15 * 15 - 16 * 16 + }}; + + printf("%-40s", "Testing vcvtne2ps2bf16 64(%ecx),%zmm1,%zmm2..."); + asm volatile ( "vmovups %1, %%zmm1\n" + put_insn(vcvtne2ps2bf16, + /* vcvtne2ps2bf16 64(%0), %%zmm1, %%zmm2 */ + ".byte 0x62, 0xf2, 0x77, 0x48, 0x72, 0x51, 0x01") + :: "c" (NULL), "m" (in2) ); + set_insn(vcvtne2ps2bf16); + regs.ecx = (unsigned long)&in1 - 64; + rc = x86_emulate(&ctxt, &emulops); + if ( rc != X86EMUL_OKAY || !check_eip(vcvtne2ps2bf16) ) + goto fail; + printf("pending\n"); + + printf("%-40s", "Testing vcvtneps2bf16 64(%ecx),%ymm3..."); + asm volatile ( put_insn(vcvtneps2bf16, + /* vcvtneps2bf16 64(%0), %%ymm3 */ + ".byte 0x62, 0xf2, 0x7e, 0x48, 0x72, 0x59, 0x01") + :: "c" (NULL) ); + set_insn(vcvtneps2bf16); + rc = x86_emulate(&ctxt, &emulops); + if ( rc != X86EMUL_OKAY || !check_eip(vcvtneps2bf16) ) + goto fail; + asm ( "vmovdqa %%ymm2, %%ymm5\n\t" + "vpcmpeqd %%zmm3, %%zmm5, %%k0\n\t" + "kmovw %%k0, %0" + : "=g" (rc) : "m" (out) ); + if ( rc != 0xffff ) + goto fail; + printf("pending\n"); + + printf("%-40s", "Testing vdpbf16ps 128(%ecx),%zmm2,%zmm4..."); + asm volatile ( "vmovdqa %%ymm3, %0\n\t" + "vmovdqa %%ymm3, %1\n" + put_insn(vdpbf16ps, + /* vdpbf16ps 128(%2), %%zmm2, %%zmm4 */ + ".byte 0x62, 0xf2, 0x6e, 0x48, 0x52, 0x61, 0x02") + : "=&m" (res[0]), "=&m" (res[8]) + : "c" (NULL) + : "memory" ); + set_insn(vdpbf16ps); + regs.ecx = (unsigned long)res - 128; + rc = x86_emulate(&ctxt, &emulops); + if ( rc != X86EMUL_OKAY || !check_eip(vdpbf16ps) ) + goto fail; + asm ( "vcmpeqps %1, %%zmm4, %%k0\n\t" + "kmovw %%k0, %0" + : "=g" (rc) : "m" (out) ); + if ( rc != 0xffff ) + goto fail; + printf("okay\n"); + } + printf("%-40s", "Testing invpcid 16(%ecx),%%edx..."); if ( stack_exec ) { --- a/tools/tests/x86_emulator/x86-emulate.h +++ b/tools/tests/x86_emulator/x86-emulate.h @@ -156,6 +156,7 @@ static inline bool xcr0_mask(uint64_t ma #define cpu_has_avx512_vpopcntdq (cp.feat.avx512_vpopcntdq && xcr0_mask(0xe6)) #define cpu_has_avx512_4vnniw (cp.feat.avx512_4vnniw && xcr0_mask(0xe6)) #define cpu_has_avx512_4fmaps (cp.feat.avx512_4fmaps && xcr0_mask(0xe6)) +#define cpu_has_avx512_bf16 (cp.feat.avx512_bf16 && xcr0_mask(0xe6)) #define cpu_has_xgetbv1 (cpu_has_xsave && cp.xstate.xgetbv1) --- a/xen/arch/x86/x86_emulate/x86_emulate.c +++ b/xen/arch/x86/x86_emulate/x86_emulate.c @@ -1904,6 +1904,7 @@ in_protmode( #define vcpu_has_rdpid() (ctxt->cpuid->feat.rdpid) #define vcpu_has_avx512_4vnniw() (ctxt->cpuid->feat.avx512_4vnniw) #define vcpu_has_avx512_4fmaps() (ctxt->cpuid->feat.avx512_4fmaps) +#define vcpu_has_avx512_bf16() (ctxt->cpuid->feat.avx512_bf16) #define vcpu_must_have(feat) \ generate_exception_if(!vcpu_has_##feat(), EXC_UD) @@ -9152,6 +9153,19 @@ x86_emulate( generate_exception_if(evex.w, EXC_UD); goto avx512f_no_sae; + case X86EMUL_OPC_EVEX_F2(0x0f38, 0x72): /* vcvtne2ps2bf16 [xyz]mm/mem,[xyz]mm,[xyz]mm{k} */ + case X86EMUL_OPC_EVEX_F3(0x0f38, 0x72): /* vcvtneps2bf16 [xyz]mm/mem,{x,y}mm{k} */ + if ( evex.pfx == vex_f2 ) + fault_suppression = false; + else + d |= TwoOp; + /* fall through */ + case X86EMUL_OPC_EVEX_F3(0x0f38, 0x52): /* vdpbf16ps [xyz]mm/mem,[xyz]mm,[xyz]mm{k} */ + host_and_vcpu_must_have(avx512_bf16); + generate_exception_if(evex.w, EXC_UD); + op_bytes = 16 << evex.lr; + goto avx512f_no_sae; + case X86EMUL_OPC_VEX_66(0x0f38, 0x58): /* vpbroadcastd xmm/m32,{x,y}mm */ case X86EMUL_OPC_VEX_66(0x0f38, 0x59): /* vpbroadcastq xmm/m64,{x,y}mm */ case X86EMUL_OPC_VEX_66(0x0f38, 0x78): /* vpbroadcastb xmm/m8,{x,y}mm */ --- a/xen/include/asm-x86/cpufeature.h +++ b/xen/include/asm-x86/cpufeature.h @@ -129,6 +129,9 @@ #define cpu_has_avx512_4fmaps boot_cpu_has(X86_FEATURE_AVX512_4FMAPS) #define cpu_has_tsx_force_abort boot_cpu_has(X86_FEATURE_TSX_FORCE_ABORT) +/* CPUID level 0x00000007:1.eax */ +#define cpu_has_avx512_bf16 boot_cpu_has(X86_FEATURE_AVX512_BF16) + /* Synthesized. */ #define cpu_has_arch_perfmon boot_cpu_has(X86_FEATURE_ARCH_PERFMON) #define cpu_has_cpuid_faulting boot_cpu_has(X86_FEATURE_CPUID_FAULTING) --- a/xen/include/public/arch-x86/cpufeatureset.h +++ b/xen/include/public/arch-x86/cpufeatureset.h @@ -262,7 +262,7 @@ XEN_CPUFEATURE(CORE_CAPS, 9*32+30) / XEN_CPUFEATURE(SSBD, 9*32+31) /*A MSR_SPEC_CTRL.SSBD available */ /* Intel-defined CPU features, CPUID level 0x00000007:1.eax, word 10 */ -XEN_CPUFEATURE(AVX512_BF16, 10*32+ 5) /* AVX512 BFloat16 Instructions */ +XEN_CPUFEATURE(AVX512_BF16, 10*32+ 5) /*A AVX512 BFloat16 Instructions */ #endif /* XEN_CPUFEATURE */ From patchwork Tue Mar 24 12:33:14 2020 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Jan Beulich X-Patchwork-Id: 11455309 Return-Path: Received: from mail.kernel.org (pdx-korg-mail-1.web.codeaurora.org [172.30.200.123]) by pdx-korg-patchwork-2.web.codeaurora.org (Postfix) with ESMTP id 3F2EB6CA for ; Tue, 24 Mar 2020 12:34:21 +0000 (UTC) Received: from lists.xenproject.org (lists.xenproject.org [192.237.175.120]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by mail.kernel.org (Postfix) with ESMTPS id 260DF2070A for ; Tue, 24 Mar 2020 12:34:21 +0000 (UTC) DMARC-Filter: OpenDMARC Filter v1.3.2 mail.kernel.org 260DF2070A Authentication-Results: mail.kernel.org; dmarc=none (p=none dis=none) header.from=suse.com Authentication-Results: mail.kernel.org; spf=pass smtp.mailfrom=xen-devel-bounces@lists.xenproject.org Received: from localhost ([127.0.0.1] helo=lists.xenproject.org) by lists.xenproject.org with esmtp (Exim 4.89) (envelope-from ) id 1jGikB-0006Yw-Do; Tue, 24 Mar 2020 12:33:19 +0000 Received: from all-amaz-eas1.inumbo.com ([34.197.232.57] helo=us1-amaz-eas2.inumbo.com) by lists.xenproject.org with esmtp (Exim 4.89) (envelope-from ) id 1jGik9-0006Yq-Ni for xen-devel@lists.xenproject.org; Tue, 24 Mar 2020 12:33:17 +0000 X-Inumbo-ID: a2a48158-6dcb-11ea-83e0-12813bfff9fa Received: from mx2.suse.de (unknown [195.135.220.15]) by us1-amaz-eas2.inumbo.com (Halon) with ESMTPS id a2a48158-6dcb-11ea-83e0-12813bfff9fa; Tue, 24 Mar 2020 12:33:16 +0000 (UTC) X-Virus-Scanned: by amavisd-new at test-mx.suse.de Received: from relay2.suse.de (unknown [195.135.220.254]) by mx2.suse.de (Postfix) with ESMTP id 6CE81AFDF; Tue, 24 Mar 2020 12:33:15 +0000 (UTC) From: Jan Beulich To: "xen-devel@lists.xenproject.org" References: <6fa81b4d-528d-5c33-50c5-a18396b4383a@suse.com> Message-ID: <2c83b876-6fd8-1315-3b28-b45e877187aa@suse.com> Date: Tue, 24 Mar 2020 13:33:14 +0100 User-Agent: Mozilla/5.0 (Windows NT 10.0; WOW64; rv:68.0) Gecko/20100101 Thunderbird/68.6.0 MIME-Version: 1.0 In-Reply-To: <6fa81b4d-528d-5c33-50c5-a18396b4383a@suse.com> Content-Language: en-US Subject: [Xen-devel] [PATCH v5 03/10] x86: determine HAVE_AS_* just once X-BeenThere: xen-devel@lists.xenproject.org X-Mailman-Version: 2.1.23 Precedence: list List-Id: Xen developer discussion List-Unsubscribe: , List-Post: List-Help: List-Subscribe: , Cc: Stefano Stabellini , Julien Grall , Wei Liu , Andrew Cooper , Ian Jackson , George Dunlap , Paul Durrant , Roger Pau Monne Errors-To: xen-devel-bounces@lists.xenproject.org Sender: "Xen-devel" With the exception of HAVE_AS_QUOTED_SYM, populate the results into a generated header instead of (at least once per [sub]directory) into CFLAGS. This results in proper rebuilds (via make dependencies) in case the compiler used changes between builds. It additionally eases inspection of which assembler features were actually found usable. Some trickery is needed to avoid header generation itself to try to include the to-be/not-yet-generated header. Since the definitions in generated/config.h, previously having been command line options, might even affect xen/config.h or its descendants, move adding of the -include option for the latter after inclusion of the per-arch Rules.mk. Use the occasion to also move the most general -I option to the common Rules.mk. Signed-off-by: Jan Beulich --- v5: Re-base. v4: New. --- An alternative to the $(MAKECMDGOALS) trickery would be to make generation of generated/config.h part of the asm-offsets.s rule, instead of adding it as a dependency there. Not sure whether either is truly better than the other. --- a/xen/Rules.mk +++ b/xen/Rules.mk @@ -55,7 +55,7 @@ endif CFLAGS += -nostdinc -fno-builtin -fno-common CFLAGS += -Werror -Wredundant-decls -Wno-pointer-arith $(call cc-option-add,CFLAGS,CC,-Wvla) -CFLAGS += -pipe -D__XEN__ -include $(BASEDIR)/include/xen/config.h +CFLAGS += -pipe -D__XEN__ -I$(BASEDIR)/include CFLAGS-$(CONFIG_DEBUG_INFO) += -g CFLAGS += '-D__OBJECT_FILE__="$@"' @@ -95,6 +95,9 @@ SPECIAL_DATA_SECTIONS := rodata $(foreac include $(BASEDIR)/arch/$(TARGET_ARCH)/Rules.mk +# Allow the arch to use -include ahead of this one. +CFLAGS += -include xen/config.h + include Makefile define gendep --- a/xen/arch/arm/Rules.mk +++ b/xen/arch/arm/Rules.mk @@ -6,8 +6,6 @@ # 'make clean' before rebuilding. # -CFLAGS += -I$(BASEDIR)/include - $(call cc-options-add,CFLAGS,CC,$(EMBEDDED_EXTRA_CFLAGS)) $(call cc-option-add,CFLAGS,CC,-Wnested-externs) --- a/xen/arch/x86/Makefile +++ b/xen/arch/x86/Makefile @@ -225,7 +225,8 @@ endif efi/boot.init.o efi/runtime.o efi/compat.o efi/buildid.o efi/relocs-dummy.o: $(BASEDIR)/arch/x86/efi/built_in.o efi/boot.init.o efi/runtime.o efi/compat.o efi/buildid.o efi/relocs-dummy.o: ; -asm-offsets.s: $(TARGET_SUBARCH)/asm-offsets.c $(BASEDIR)/include/asm-x86/asm-macros.h +asm-offsets.s: $(TARGET_SUBARCH)/asm-offsets.c $(BASEDIR)/include/asm-x86/asm-macros.h \ + $(BASEDIR)/include/generated/config.h $(CC) $(filter-out -Wa$(comma)% -flto,$(CFLAGS)) -S -o $@ $< asm-macros.i: CFLAGS += -D__ASSEMBLY__ -P @@ -241,6 +242,45 @@ $(BASEDIR)/include/asm-x86/asm-macros.h: echo '#endif' >>$@.new $(call move-if-changed,$@.new,$@) +# There are multiple invocations of this Makefile, one each for asm-offset.s, +# $(TARGET), built_in.o, and several more from the rules building $(TARGET) +# and $(TARGET).efi. The 2nd and 3rd may race with one another, and we don't +# want to re-generate config.h in that case anyway, so guard the logic +# accordingly. (We do want to have the FORCE dependency on the rule, to be +# sure we pick up changes when the compiler used has changed.) +ifeq ($(MAKECMDGOALS),asm-offsets.s) + +as-ISA-list := CLWB EPT FSGSBASE INVPCID RDRAND RDSEED SSE4_2 VMX XSAVEOPT + +CLWB-insn := clwb (%rax) +EPT-insn := invept (%rax),%rax +FSGSBASE-insn := rdfsbase %rax +INVPCID-insn := invpcid (%rax),%rax +RDRAND-insn := rdrand %eax +RDSEED-insn := rdseed %eax +SSE4_2-insn := crc32 %eax,%eax +VMX-insn := vmcall +XSAVEOPT-insn := xsaveopt (%rax) + +# GAS's idea of true is -1. Clang's idea is 1. +NEGATIVE_TRUE-insn := .if ((1 > 0) > 0); .error \"\"; .endif + +# Check to see whether the assembler supports the .nops directive. +NOPS_DIRECTIVE-insn := .L1: .L2: .nops (.L2 - .L1),9 + +as-features-list := $(as-ISA-list) NEGATIVE_TRUE NOPS_DIRECTIVE + +$(BASEDIR)/include/generated/config.h: FORCE + echo '/* Generated header, do not edit. */' >$@.new + $(foreach f,$(as-features-list), \ + $(if $($(f)-insn),,$(error $(f)-insn is empty)) \ + echo '#$(call as-insn,$(CC) $(CFLAGS),"$($(f)-insn)", \ + define, \ + undef) HAVE_AS_$(f) /* $($(f)-insn) */' >>$@.new;) + $(call move-if-changed,$@.new,$@) + +endif + efi.lds: AFLAGS += -DEFI xen.lds efi.lds: xen.lds.S $(CC) -P -E -Ui386 $(filter-out -Wa$(comma)%,$(AFLAGS)) -o $@ $< --- a/xen/arch/x86/Rules.mk +++ b/xen/arch/x86/Rules.mk @@ -3,7 +3,7 @@ XEN_IMG_OFFSET := 0x200000 -CFLAGS += -I$(BASEDIR)/include +CFLAGS += $(if $(filter asm-macros.% %/generated/config.h,$@),,-include generated/config.h) CFLAGS += -I$(BASEDIR)/include/asm-x86/mach-generic CFLAGS += -I$(BASEDIR)/include/asm-x86/mach-default CFLAGS += -DXEN_IMG_OFFSET=$(XEN_IMG_OFFSET) @@ -38,26 +38,9 @@ endif $(call cc-options-add,CFLAGS,CC,$(EMBEDDED_EXTRA_CFLAGS)) $(call cc-option-add,CFLAGS,CC,-Wnested-externs) -$(call as-option-add,CFLAGS,CC,"vmcall",-DHAVE_AS_VMX) -$(call as-option-add,CFLAGS,CC,"crc32 %eax$$(comma)%eax",-DHAVE_AS_SSE4_2) -$(call as-option-add,CFLAGS,CC,"invept (%rax)$$(comma)%rax",-DHAVE_AS_EPT) -$(call as-option-add,CFLAGS,CC,"rdrand %eax",-DHAVE_AS_RDRAND) -$(call as-option-add,CFLAGS,CC,"rdfsbase %rax",-DHAVE_AS_FSGSBASE) -$(call as-option-add,CFLAGS,CC,"xsaveopt (%rax)",-DHAVE_AS_XSAVEOPT) -$(call as-option-add,CFLAGS,CC,"rdseed %eax",-DHAVE_AS_RDSEED) -$(call as-option-add,CFLAGS,CC,"clwb (%rax)",-DHAVE_AS_CLWB) $(call as-option-add,CFLAGS,CC,".equ \"x\"$$(comma)1", \ -U__OBJECT_LABEL__ -DHAVE_AS_QUOTED_SYM \ '-D__OBJECT_LABEL__=$(subst $(BASEDIR)/,,$(CURDIR))/$$@') -$(call as-option-add,CFLAGS,CC,"invpcid (%rax)$$(comma)%rax",-DHAVE_AS_INVPCID) - -# GAS's idea of true is -1. Clang's idea is 1 -$(call as-option-add,CFLAGS,CC,\ - ".if ((1 > 0) < 0); .error \"\";.endif",,-DHAVE_AS_NEGATIVE_TRUE) - -# Check to see whether the assmbler supports the .nop directive. -$(call as-option-add,CFLAGS,CC,\ - ".L1: .L2: .nops (.L2 - .L1)$$(comma)9",-DHAVE_AS_NOPS_DIRECTIVE) CFLAGS += -mno-red-zone -fpic -fno-asynchronous-unwind-tables --- a/xen/include/Makefile +++ b/xen/include/Makefile @@ -64,7 +64,7 @@ compat/%.h: compat/%.i Makefile $(BASEDI mv -f $@.new $@ compat/%.i: compat/%.c Makefile - $(CPP) $(filter-out -Wa$(comma)% -M% %.d -include %/include/xen/config.h,$(CFLAGS)) $(cppflags-y) -o $@ $< + $(CPP) $(filter-out -Wa$(comma)% -M% %.d -include %/config.h,$(CFLAGS)) $(cppflags-y) -o $@ $< compat/%.c: public/%.h xlat.lst Makefile $(BASEDIR)/tools/compat-build-source.py mkdir -p $(@D) --- a/xen/scripts/Kbuild.include +++ b/xen/scripts/Kbuild.include @@ -10,7 +10,7 @@ DEPS_INCLUDE = $(addsuffix .d2, $(basena # as-insn: Check whether assembler supports an instruction. # Usage: cflags-y += $(call as-insn,CC FLAGS,"insn",option-yes,option-no) as-insn = $(if $(shell echo 'void _(void) { asm volatile ( $(2) ); }' \ - | $(filter-out -M% %.d -include %/include/xen/config.h,$(1)) \ + | $(filter-out -M% %.d -include %/config.h,$(1)) \ -c -x c -o /dev/null - 2>&1),$(4),$(3)) # as-option-add: Conditionally add options to flags From patchwork Tue Mar 24 12:33:41 2020 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 8bit X-Patchwork-Submitter: Jan Beulich X-Patchwork-Id: 11455311 Return-Path: Received: from mail.kernel.org (pdx-korg-mail-1.web.codeaurora.org [172.30.200.123]) by pdx-korg-patchwork-2.web.codeaurora.org (Postfix) with ESMTP id C8B486CA for ; Tue, 24 Mar 2020 12:34:42 +0000 (UTC) Received: from lists.xenproject.org (lists.xenproject.org [192.237.175.120]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by mail.kernel.org (Postfix) with ESMTPS id AFBBE2070A for ; Tue, 24 Mar 2020 12:34:42 +0000 (UTC) DMARC-Filter: OpenDMARC Filter v1.3.2 mail.kernel.org AFBBE2070A Authentication-Results: mail.kernel.org; dmarc=none (p=none dis=none) header.from=suse.com Authentication-Results: mail.kernel.org; spf=pass smtp.mailfrom=xen-devel-bounces@lists.xenproject.org Received: from localhost ([127.0.0.1] helo=lists.xenproject.org) by lists.xenproject.org with esmtp (Exim 4.89) (envelope-from ) id 1jGikc-0006cN-NL; Tue, 24 Mar 2020 12:33:46 +0000 Received: from all-amaz-eas1.inumbo.com ([34.197.232.57] helo=us1-amaz-eas2.inumbo.com) by lists.xenproject.org with esmtp (Exim 4.89) (envelope-from ) id 1jGika-0006bW-Nw for xen-devel@lists.xenproject.org; Tue, 24 Mar 2020 12:33:44 +0000 X-Inumbo-ID: b2b20a2a-6dcb-11ea-83e0-12813bfff9fa Received: from mx2.suse.de (unknown [195.135.220.15]) by us1-amaz-eas2.inumbo.com (Halon) with ESMTPS id b2b20a2a-6dcb-11ea-83e0-12813bfff9fa; Tue, 24 Mar 2020 12:33:43 +0000 (UTC) X-Virus-Scanned: by amavisd-new at test-mx.suse.de Received: from relay2.suse.de (unknown [195.135.220.254]) by mx2.suse.de (Postfix) with ESMTP id EAA12AFE8; Tue, 24 Mar 2020 12:33:42 +0000 (UTC) From: Jan Beulich To: "xen-devel@lists.xenproject.org" References: <6fa81b4d-528d-5c33-50c5-a18396b4383a@suse.com> Message-ID: Date: Tue, 24 Mar 2020 13:33:41 +0100 User-Agent: Mozilla/5.0 (Windows NT 10.0; WOW64; rv:68.0) Gecko/20100101 Thunderbird/68.6.0 MIME-Version: 1.0 In-Reply-To: <6fa81b4d-528d-5c33-50c5-a18396b4383a@suse.com> Content-Language: en-US Subject: [Xen-devel] [PATCH v5 04/10] x86: move back clang no integrated assembler tests X-BeenThere: xen-devel@lists.xenproject.org X-Mailman-Version: 2.1.23 Precedence: list List-Id: Xen developer discussion List-Unsubscribe: , List-Post: List-Help: List-Subscribe: , Cc: Andrew Cooper , Paul Durrant , Wei Liu , Roger Pau Monne Errors-To: xen-devel-bounces@lists.xenproject.org Sender: "Xen-devel" This largely reverts f19af2f1138e ("x86: re-order clang no integrated assembler tests"): Other CFLAGS setup would better happen first, in case any of it affects the behavior of the integrated assembler. The comment addition of course doesn't get undone. The only remaining as-option-add invocation gets moved down in addition. Signed-off-by: Jan Beulich Reviewed-by: Roger Pau Monné --- v5: Re-base. v4: New. --- a/xen/arch/x86/Rules.mk +++ b/xen/arch/x86/Rules.mk @@ -12,35 +12,8 @@ CFLAGS += '-D__OBJECT_LABEL__=$(subst /, # Prevent floating-point variables from creeping into Xen. CFLAGS += -msoft-float -ifeq ($(CONFIG_CC_IS_CLANG),y) -# Note: Any test which adds -no-integrated-as will cause subsequent tests to -# succeed, and not trigger further additions. -# -# The tests to select whether the integrated assembler is usable need to happen -# before testing any assembler features, or else the result of the tests would -# be stale if the integrated assembler is not used. - -# Older clang's built-in assembler doesn't understand .skip with labels: -# https://bugs.llvm.org/show_bug.cgi?id=27369 -$(call as-option-add,CFLAGS,CC,".L0: .L1: .skip (.L1 - .L0)",,\ - -no-integrated-as) - -# Check whether clang asm()-s support .include. -$(call as-option-add,CFLAGS,CC,".include \"asm-x86/indirect_thunk_asm.h\"",,\ - -no-integrated-as) - -# Check whether clang keeps .macro-s between asm()-s: -# https://bugs.llvm.org/show_bug.cgi?id=36110 -$(call as-option-add,CFLAGS,CC,\ - ".macro FOO;.endm"$$(close); asm volatile $$(open)".macro FOO;.endm",\ - -no-integrated-as) -endif - $(call cc-options-add,CFLAGS,CC,$(EMBEDDED_EXTRA_CFLAGS)) $(call cc-option-add,CFLAGS,CC,-Wnested-externs) -$(call as-option-add,CFLAGS,CC,".equ \"x\"$$(comma)1", \ - -U__OBJECT_LABEL__ -DHAVE_AS_QUOTED_SYM \ - '-D__OBJECT_LABEL__=$(subst $(BASEDIR)/,,$(CURDIR))/$$@') CFLAGS += -mno-red-zone -fpic -fno-asynchronous-unwind-tables @@ -70,3 +43,30 @@ endif # Set up the assembler include path properly for older toolchains. CFLAGS += -Wa,-I$(BASEDIR)/include +ifeq ($(CONFIG_CC_IS_CLANG),y) +# Note: Any test which adds -no-integrated-as will cause subsequent tests to +# succeed, and not trigger further additions. +# +# The tests to select whether the integrated assembler is usable need to happen +# before testing any assembler features, or else the result of the tests would +# be stale if the integrated assembler is not used. + +# Older clang's built-in assembler doesn't understand .skip with labels: +# https://bugs.llvm.org/show_bug.cgi?id=27369 +$(call as-option-add,CFLAGS,CC,".L0: .L1: .skip (.L1 - .L0)",,\ + -no-integrated-as) + +# Check whether clang asm()-s support .include. +$(call as-option-add,CFLAGS,CC,".include \"asm-x86/indirect_thunk_asm.h\"",,\ + -no-integrated-as) + +# Check whether clang keeps .macro-s between asm()-s: +# https://bugs.llvm.org/show_bug.cgi?id=36110 +$(call as-option-add,CFLAGS,CC,\ + ".macro FOO;.endm"$$(close); asm volatile $$(open)".macro FOO;.endm",\ + -no-integrated-as) +endif + +$(call as-option-add,CFLAGS,CC,".equ \"x\"$$(comma)1", \ + -U__OBJECT_LABEL__ -DHAVE_AS_QUOTED_SYM \ + '-D__OBJECT_LABEL__=$(subst $(BASEDIR)/,,$(CURDIR))/$$@') From patchwork Tue Mar 24 12:34:06 2020 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Jan Beulich X-Patchwork-Id: 11455313 Return-Path: Received: from mail.kernel.org (pdx-korg-mail-1.web.codeaurora.org [172.30.200.123]) by pdx-korg-patchwork-2.web.codeaurora.org (Postfix) with ESMTP id D2D76913 for ; Tue, 24 Mar 2020 12:35:05 +0000 (UTC) Received: from lists.xenproject.org (lists.xenproject.org [192.237.175.120]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by mail.kernel.org (Postfix) with ESMTPS id B6B712070A for ; Tue, 24 Mar 2020 12:35:05 +0000 (UTC) DMARC-Filter: OpenDMARC Filter v1.3.2 mail.kernel.org B6B712070A Authentication-Results: mail.kernel.org; dmarc=none (p=none dis=none) header.from=suse.com Authentication-Results: mail.kernel.org; spf=pass smtp.mailfrom=xen-devel-bounces@lists.xenproject.org Received: from localhost ([127.0.0.1] helo=lists.xenproject.org) by lists.xenproject.org with esmtp (Exim 4.89) (envelope-from ) id 1jGil2-0006gD-0O; Tue, 24 Mar 2020 12:34:12 +0000 Received: from us1-rack-iad1.inumbo.com ([172.99.69.81]) by lists.xenproject.org with esmtp (Exim 4.89) (envelope-from ) id 1jGikz-0006fu-SN for xen-devel@lists.xenproject.org; Tue, 24 Mar 2020 12:34:09 +0000 X-Inumbo-ID: c1a9d508-6dcb-11ea-b34e-bc764e2007e4 Received: from mx2.suse.de (unknown [195.135.220.15]) by us1-rack-iad1.inumbo.com (Halon) with ESMTPS id c1a9d508-6dcb-11ea-b34e-bc764e2007e4; Tue, 24 Mar 2020 12:34:08 +0000 (UTC) X-Virus-Scanned: by amavisd-new at test-mx.suse.de Received: from relay2.suse.de (unknown [195.135.220.254]) by mx2.suse.de (Postfix) with ESMTP id 13243AE3C; Tue, 24 Mar 2020 12:34:08 +0000 (UTC) From: Jan Beulich To: "xen-devel@lists.xenproject.org" References: <6fa81b4d-528d-5c33-50c5-a18396b4383a@suse.com> Message-ID: <81e7aade-9dfb-313a-ad81-30b2703c2136@suse.com> Date: Tue, 24 Mar 2020 13:34:06 +0100 User-Agent: Mozilla/5.0 (Windows NT 10.0; WOW64; rv:68.0) Gecko/20100101 Thunderbird/68.6.0 MIME-Version: 1.0 In-Reply-To: <6fa81b4d-528d-5c33-50c5-a18396b4383a@suse.com> Content-Language: en-US Subject: [Xen-devel] [PATCH v5 05/10] x86emul: support MOVDIR64B insn X-BeenThere: xen-devel@lists.xenproject.org X-Mailman-Version: 2.1.23 Precedence: list List-Id: Xen developer discussion List-Unsubscribe: , List-Post: List-Help: List-Subscribe: , Cc: Andrew Cooper , Paul Durrant , Wei Liu , Roger Pau Monne Errors-To: xen-devel-bounces@lists.xenproject.org Sender: "Xen-devel" Introduce a new blk() hook, paralleling the rmw() on in certain way, but being intended for larger data sizes, and hence its HVM intermediate handling function doesn't fall back to splitting the operation if the requested virtual address can't be mapped. Note that SDM revision 071 doesn't specify exception behavior for ModRM.mod == 0b11; assuming #UD here. Signed-off-by: Jan Beulich Reviewed-by: Paul Durrant Acked-by: Andrew Cooper --- TBD: If we want to avoid depending on correct MTRR settings, hvmemul_map_linear_addr() may need to gain a parameter to allow controlling cachability of the produced mapping(s). Of course the function will also need to be made capable of mapping at least p2m_mmio_direct pages for this and the two ENQCMD insns to be actually useful. --- v5: Introduce/use ->blk() hook. Correct asm() operands. v4: Split MOVDIRI and MOVDIR64B. Switch to using ->rmw(). Re-base. v3: Update description. --- (SDE: -tnt) --- a/tools/tests/x86_emulator/test_x86_emulator.c +++ b/tools/tests/x86_emulator/test_x86_emulator.c @@ -652,6 +652,18 @@ static int cmpxchg( return X86EMUL_OKAY; } +static int blk( + enum x86_segment seg, + unsigned long offset, + void *p_data, + unsigned int bytes, + uint32_t *eflags, + struct x86_emulate_state *state, + struct x86_emulate_ctxt *ctxt) +{ + return x86_emul_blk((void *)offset, p_data, bytes, eflags, state, ctxt); +} + static int read_segment( enum x86_segment seg, struct segment_register *reg, @@ -2208,6 +2220,35 @@ int main(int argc, char **argv) goto fail; printf("okay\n"); + printf("%-40s", "Testing movdir64b 144(%edx),%ecx..."); + if ( stack_exec && cpu_has_movdir64b ) + { + emulops.blk = blk; + + instr[0] = 0x66; instr[1] = 0x0f; instr[2] = 0x38; instr[3] = 0xf8; + instr[4] = 0x8a; instr[5] = 0x90; instr[8] = instr[7] = instr[6] = 0; + + regs.eip = (unsigned long)&instr[0]; + for ( i = 0; i < 64; ++i ) + res[i] = i - 20; + regs.edx = (unsigned long)res; + regs.ecx = (unsigned long)(res + 16); + + rc = x86_emulate(&ctxt, &emulops); + if ( (rc != X86EMUL_OKAY) || + (regs.eip != (unsigned long)&instr[9]) || + res[15] != -5 || res[32] != 12 ) + goto fail; + for ( i = 16; i < 32; ++i ) + if ( res[i] != i ) + goto fail; + printf("okay\n"); + + emulops.blk = NULL; + } + else + printf("skipped\n"); + printf("%-40s", "Testing movq %mm3,(%ecx)..."); if ( stack_exec && cpu_has_mmx ) { --- a/tools/tests/x86_emulator/x86-emulate.h +++ b/tools/tests/x86_emulator/x86-emulate.h @@ -154,6 +154,7 @@ static inline bool xcr0_mask(uint64_t ma #define cpu_has_avx512_vnni (cp.feat.avx512_vnni && xcr0_mask(0xe6)) #define cpu_has_avx512_bitalg (cp.feat.avx512_bitalg && xcr0_mask(0xe6)) #define cpu_has_avx512_vpopcntdq (cp.feat.avx512_vpopcntdq && xcr0_mask(0xe6)) +#define cpu_has_movdir64b cp.feat.movdir64b #define cpu_has_avx512_4vnniw (cp.feat.avx512_4vnniw && xcr0_mask(0xe6)) #define cpu_has_avx512_4fmaps (cp.feat.avx512_4fmaps && xcr0_mask(0xe6)) #define cpu_has_avx512_bf16 (cp.feat.avx512_bf16 && xcr0_mask(0xe6)) --- a/xen/arch/x86/Makefile +++ b/xen/arch/x86/Makefile @@ -250,12 +250,13 @@ $(BASEDIR)/include/asm-x86/asm-macros.h: # sure we pick up changes when the compiler used has changed.) ifeq ($(MAKECMDGOALS),asm-offsets.s) -as-ISA-list := CLWB EPT FSGSBASE INVPCID RDRAND RDSEED SSE4_2 VMX XSAVEOPT +as-ISA-list := CLWB EPT FSGSBASE INVPCID MOVDIR64B RDRAND RDSEED SSE4_2 VMX XSAVEOPT CLWB-insn := clwb (%rax) EPT-insn := invept (%rax),%rax FSGSBASE-insn := rdfsbase %rax INVPCID-insn := invpcid (%rax),%rax +MOVDIR64B-insn := movdir64b (%rax),%rax RDRAND-insn := rdrand %eax RDSEED-insn := rdseed %eax SSE4_2-insn := crc32 %eax,%eax --- a/xen/arch/x86/hvm/emulate.c +++ b/xen/arch/x86/hvm/emulate.c @@ -1409,6 +1409,44 @@ static int hvmemul_rmw( return rc; } +static int hvmemul_blk( + enum x86_segment seg, + unsigned long offset, + void *p_data, + unsigned int bytes, + uint32_t *eflags, + struct x86_emulate_state *state, + struct x86_emulate_ctxt *ctxt) +{ + struct hvm_emulate_ctxt *hvmemul_ctxt = + container_of(ctxt, struct hvm_emulate_ctxt, ctxt); + unsigned long addr; + uint32_t pfec = PFEC_page_present | PFEC_write_access; + int rc; + void *mapping = NULL; + + rc = hvmemul_virtual_to_linear( + seg, offset, bytes, NULL, hvm_access_write, hvmemul_ctxt, &addr); + if ( rc != X86EMUL_OKAY || !bytes ) + return rc; + + if ( is_x86_system_segment(seg) ) + pfec |= PFEC_implicit; + else if ( hvmemul_ctxt->seg_reg[x86_seg_ss].dpl == 3 ) + pfec |= PFEC_user_mode; + + mapping = hvmemul_map_linear_addr(addr, bytes, pfec, hvmemul_ctxt); + if ( IS_ERR(mapping) ) + return ~PTR_ERR(mapping); + if ( !mapping ) + return X86EMUL_UNHANDLEABLE; + + rc = x86_emul_blk(mapping, p_data, bytes, eflags, state, ctxt); + hvmemul_unmap_linear_addr(mapping, addr, bytes, hvmemul_ctxt); + + return rc; +} + static int hvmemul_write_discard( enum x86_segment seg, unsigned long offset, @@ -2475,6 +2513,7 @@ static const struct x86_emulate_ops hvm_ .write = hvmemul_write, .rmw = hvmemul_rmw, .cmpxchg = hvmemul_cmpxchg, + .blk = hvmemul_blk, .validate = hvmemul_validate, .rep_ins = hvmemul_rep_ins, .rep_outs = hvmemul_rep_outs, --- a/xen/arch/x86/x86_emulate/x86_emulate.c +++ b/xen/arch/x86/x86_emulate/x86_emulate.c @@ -548,6 +548,7 @@ static const struct ext0f38_table { [0xf1] = { .to_mem = 1, .two_op = 1 }, [0xf2 ... 0xf3] = {}, [0xf5 ... 0xf7] = {}, + [0xf8] = { .simd_size = simd_other }, [0xf9] = { .to_mem = 1 }, }; @@ -852,6 +853,9 @@ struct x86_emulate_state { rmw_xchg, rmw_xor, } rmw; + enum { + blk_movdir, + } blk; uint8_t modrm, modrm_mod, modrm_reg, modrm_rm; uint8_t sib_index, sib_scale; uint8_t rex_prefix; @@ -1904,6 +1908,7 @@ in_protmode( #define vcpu_has_avx512_vpopcntdq() (ctxt->cpuid->feat.avx512_vpopcntdq) #define vcpu_has_rdpid() (ctxt->cpuid->feat.rdpid) #define vcpu_has_movdiri() (ctxt->cpuid->feat.movdiri) +#define vcpu_has_movdir64b() (ctxt->cpuid->feat.movdir64b) #define vcpu_has_avx512_4vnniw() (ctxt->cpuid->feat.avx512_4vnniw) #define vcpu_has_avx512_4fmaps() (ctxt->cpuid->feat.avx512_4fmaps) #define vcpu_has_avx512_bf16() (ctxt->cpuid->feat.avx512_bf16) @@ -10079,6 +10084,23 @@ x86_emulate( : "0" ((uint32_t)src.val), "rm" (_regs.edx) ); break; + case X86EMUL_OPC_66(0x0f38, 0xf8): /* movdir64b r,m512 */ + host_and_vcpu_must_have(movdir64b); + generate_exception_if(ea.type != OP_MEM, EXC_UD); + src.val = truncate_ea(*dst.reg); + generate_exception_if(!is_aligned(x86_seg_es, src.val, 64, ctxt, ops), + EXC_GP, 0); + fail_if(!ops->blk); + state->blk = blk_movdir; + BUILD_BUG_ON(sizeof(*mmvalp) < 64); + if ( (rc = ops->read(ea.mem.seg, ea.mem.off, mmvalp, 64, + ctxt)) != X86EMUL_OKAY || + (rc = ops->blk(x86_seg_es, src.val, mmvalp, 64, &_regs.eflags, + state, ctxt)) != X86EMUL_OKAY ) + goto done; + state->simd_size = simd_none; + break; + case X86EMUL_OPC(0x0f38, 0xf9): /* movdiri mem,r */ vcpu_must_have(movdiri); generate_exception_if(dst.type != OP_MEM, EXC_UD); @@ -11345,6 +11367,45 @@ int x86_emul_rmw( return X86EMUL_OKAY; } + +int x86_emul_blk( + void *ptr, + void *data, + unsigned int bytes, + uint32_t *eflags, + struct x86_emulate_state *state, + struct x86_emulate_ctxt *ctxt) +{ + switch ( state->blk ) + { + /* + * Throughout this switch(), memory clobbers are used to compensate + * that other operands may not properly express the (full) memory + * ranges covered. + */ + case blk_movdir: + ASSERT(bytes == 64); + if ( ((unsigned long)ptr & 0x3f) ) + { + ASSERT_UNREACHABLE(); + return X86EMUL_UNHANDLEABLE; + } +#ifdef HAVE_AS_MOVDIR64B + asm ( "movdir64b (%0), %1" :: "r" (data), "r" (ptr) : "memory" ); +#else + /* movdir64b (%rsi), %rdi */ + asm ( ".byte 0x66, 0x0f, 0x38, 0xf8, 0x3e" + :: "S" (data), "D" (ptr) : "memory" ); +#endif + break; + + default: + ASSERT_UNREACHABLE(); + return X86EMUL_UNHANDLEABLE; + } + + return X86EMUL_OKAY; +} static void __init __maybe_unused build_assertions(void) { --- a/xen/arch/x86/x86_emulate/x86_emulate.h +++ b/xen/arch/x86/x86_emulate/x86_emulate.h @@ -310,6 +310,22 @@ struct x86_emulate_ops struct x86_emulate_ctxt *ctxt); /* + * blk: Emulate a large (block) memory access. + * @p_data: [IN/OUT] (optional) Pointer to source/destination buffer. + * @eflags: [IN/OUT] Pointer to EFLAGS to be updated according to + * instruction effects. + * @state: [IN/OUT] Pointer to (opaque) emulator state. + */ + int (*blk)( + enum x86_segment seg, + unsigned long offset, + void *p_data, + unsigned int bytes, + uint32_t *eflags, + struct x86_emulate_state *state, + struct x86_emulate_ctxt *ctxt); + + /* * validate: Post-decode, pre-emulate hook to allow caller controlled * filtering. */ @@ -793,6 +809,14 @@ x86_emul_rmw( unsigned int bytes, uint32_t *eflags, struct x86_emulate_state *state, + struct x86_emulate_ctxt *ctxt); +int +x86_emul_blk( + void *ptr, + void *data, + unsigned int bytes, + uint32_t *eflags, + struct x86_emulate_state *state, struct x86_emulate_ctxt *ctxt); static inline void x86_emul_hw_exception( --- a/xen/include/asm-x86/cpufeature.h +++ b/xen/include/asm-x86/cpufeature.h @@ -120,6 +120,7 @@ #define cpu_has_avx512_bitalg boot_cpu_has(X86_FEATURE_AVX512_BITALG) #define cpu_has_avx512_vpopcntdq boot_cpu_has(X86_FEATURE_AVX512_VPOPCNTDQ) #define cpu_has_rdpid boot_cpu_has(X86_FEATURE_RDPID) +#define cpu_has_movdir64b boot_cpu_has(X86_FEATURE_MOVDIR64B) /* CPUID level 0x80000007.edx */ #define cpu_has_itsc boot_cpu_has(X86_FEATURE_ITSC) --- a/xen/include/public/arch-x86/cpufeatureset.h +++ b/xen/include/public/arch-x86/cpufeatureset.h @@ -238,6 +238,7 @@ XEN_CPUFEATURE(AVX512_VPOPCNTDQ, 6*32+14 XEN_CPUFEATURE(RDPID, 6*32+22) /*A RDPID instruction */ XEN_CPUFEATURE(CLDEMOTE, 6*32+25) /*A CLDEMOTE instruction */ XEN_CPUFEATURE(MOVDIRI, 6*32+27) /*A MOVDIRI instruction */ +XEN_CPUFEATURE(MOVDIR64B, 6*32+28) /*A MOVDIR64B instruction */ /* AMD-defined CPU features, CPUID level 0x80000007.edx, word 7 */ XEN_CPUFEATURE(ITSC, 7*32+ 8) /* Invariant TSC */ From patchwork Tue Mar 24 12:34:45 2020 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Jan Beulich X-Patchwork-Id: 11455325 Return-Path: Received: from mail.kernel.org (pdx-korg-mail-1.web.codeaurora.org [172.30.200.123]) by pdx-korg-patchwork-2.web.codeaurora.org (Postfix) with ESMTP id CBDA1913 for ; Tue, 24 Mar 2020 12:36:10 +0000 (UTC) Received: from lists.xenproject.org (lists.xenproject.org [192.237.175.120]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by mail.kernel.org (Postfix) with ESMTPS id B30792070A for ; Tue, 24 Mar 2020 12:36:10 +0000 (UTC) DMARC-Filter: OpenDMARC Filter v1.3.2 mail.kernel.org B30792070A Authentication-Results: mail.kernel.org; dmarc=none (p=none dis=none) header.from=suse.com Authentication-Results: mail.kernel.org; spf=pass smtp.mailfrom=xen-devel-bounces@lists.xenproject.org Received: from localhost ([127.0.0.1] helo=lists.xenproject.org) by lists.xenproject.org with esmtp (Exim 4.89) (envelope-from ) id 1jGilc-0006n8-Er; Tue, 24 Mar 2020 12:34:48 +0000 Received: from us1-rack-iad1.inumbo.com ([172.99.69.81]) by lists.xenproject.org with esmtp (Exim 4.89) (envelope-from ) id 1jGilb-0006mu-Rq for xen-devel@lists.xenproject.org; Tue, 24 Mar 2020 12:34:47 +0000 X-Inumbo-ID: d87b0e00-6dcb-11ea-92cf-bc764e2007e4 Received: from mx2.suse.de (unknown [195.135.220.15]) by us1-rack-iad1.inumbo.com (Halon) with ESMTPS id d87b0e00-6dcb-11ea-92cf-bc764e2007e4; Tue, 24 Mar 2020 12:34:47 +0000 (UTC) X-Virus-Scanned: by amavisd-new at test-mx.suse.de Received: from relay2.suse.de (unknown [195.135.220.254]) by mx2.suse.de (Postfix) with ESMTP id 57B25AFE4; Tue, 24 Mar 2020 12:34:46 +0000 (UTC) From: Jan Beulich To: "xen-devel@lists.xenproject.org" References: <6fa81b4d-528d-5c33-50c5-a18396b4383a@suse.com> Message-ID: <9b0e0026-7624-707c-5200-f8c28c83cb50@suse.com> Date: Tue, 24 Mar 2020 13:34:45 +0100 User-Agent: Mozilla/5.0 (Windows NT 10.0; WOW64; rv:68.0) Gecko/20100101 Thunderbird/68.6.0 MIME-Version: 1.0 In-Reply-To: <6fa81b4d-528d-5c33-50c5-a18396b4383a@suse.com> Content-Language: en-US Subject: [Xen-devel] [PATCH v5 06/10] x86emul: support ENQCMD insn X-BeenThere: xen-devel@lists.xenproject.org X-Mailman-Version: 2.1.23 Precedence: list List-Id: Xen developer discussion List-Unsubscribe: , List-Post: List-Help: List-Subscribe: , Cc: Andrew Cooper , Paul Durrant , Wei Liu , Roger Pau Monne Errors-To: xen-devel-bounces@lists.xenproject.org Sender: "Xen-devel" Introduce a new blk() hook, paralleling the rmw() on in certain way, but being intended for larger data sizes, and hence its HVM intermediate handling function doesn't fall back to splitting the operation if the requested virtual address can't be mapped. Note that the ISA extensions document revision 037 doesn't specify exception behavior for ModRM.mod == 0b11; assuming #UD here. No tests are being added to the harness - this would be quite hard, we can't just issue the insns against RAM. Their similarity with MOVDIR64B should have the test case there be god enough to cover any fundamental flaws. Signed-off-by: Jan Beulich --- TBD: This doesn't (can't) consult PASID translation tables yet, as we have no VMX code for this so far. I guess for this we will want to replace the direct ->read_msr(MSR_IA32_PASID, ...) with a new ->read_pasid() hook. --- v5: New. --- a/xen/arch/x86/Makefile +++ b/xen/arch/x86/Makefile @@ -250,13 +250,14 @@ $(BASEDIR)/include/asm-x86/asm-macros.h: # sure we pick up changes when the compiler used has changed.) ifeq ($(MAKECMDGOALS),asm-offsets.s) -as-ISA-list := CLWB EPT FSGSBASE INVPCID MOVDIR64B RDRAND RDSEED SSE4_2 VMX XSAVEOPT +as-ISA-list := CLWB ENQCMD EPT FSGSBASE INVPCID MOVDIR64B RDRAND RDSEED SSE4_2 VMX XSAVEOPT CLWB-insn := clwb (%rax) EPT-insn := invept (%rax),%rax FSGSBASE-insn := rdfsbase %rax INVPCID-insn := invpcid (%rax),%rax MOVDIR64B-insn := movdir64b (%rax),%rax +ENQCMD-insn := enqcmd (%rax),%rax RDRAND-insn := rdrand %eax RDSEED-insn := rdseed %eax SSE4_2-insn := crc32 %eax,%eax --- a/xen/arch/x86/x86_emulate/x86_emulate.c +++ b/xen/arch/x86/x86_emulate/x86_emulate.c @@ -854,6 +854,7 @@ struct x86_emulate_state { rmw_xor, } rmw; enum { + blk_enqcmd, blk_movdir, } blk; uint8_t modrm, modrm_mod, modrm_reg, modrm_rm; @@ -900,6 +901,7 @@ typedef union { uint64_t __attribute__ ((aligned(16))) xmm[2]; uint64_t __attribute__ ((aligned(32))) ymm[4]; uint64_t __attribute__ ((aligned(64))) zmm[8]; + uint32_t data32[16]; } mmval_t; /* @@ -1909,6 +1911,7 @@ in_protmode( #define vcpu_has_rdpid() (ctxt->cpuid->feat.rdpid) #define vcpu_has_movdiri() (ctxt->cpuid->feat.movdiri) #define vcpu_has_movdir64b() (ctxt->cpuid->feat.movdir64b) +#define vcpu_has_enqcmd() (ctxt->cpuid->feat.enqcmd) #define vcpu_has_avx512_4vnniw() (ctxt->cpuid->feat.avx512_4vnniw) #define vcpu_has_avx512_4fmaps() (ctxt->cpuid->feat.avx512_4fmaps) #define vcpu_has_avx512_bf16() (ctxt->cpuid->feat.avx512_bf16) @@ -10101,6 +10104,36 @@ x86_emulate( state->simd_size = simd_none; break; + case X86EMUL_OPC_F2(0x0f38, 0xf8): /* enqcmd r,m512 */ + case X86EMUL_OPC_F3(0x0f38, 0xf8): /* enqcmds r,m512 */ + host_and_vcpu_must_have(enqcmd); + generate_exception_if(ea.type != OP_MEM, EXC_UD); + generate_exception_if(vex.pfx != vex_f2 && !mode_ring0(), EXC_GP, 0); + src.val = truncate_ea(*dst.reg); + generate_exception_if(!is_aligned(x86_seg_es, src.val, 64, ctxt, ops), + EXC_GP, 0); + fail_if(!ops->blk); + BUILD_BUG_ON(sizeof(*mmvalp) < 64); + if ( (rc = ops->read(ea.mem.seg, ea.mem.off, mmvalp, 64, + ctxt)) != X86EMUL_OKAY ) + goto done; + if ( vex.pfx == vex_f2 ) /* enqcmd */ + { + fail_if(!ops->read_msr); + if ( (rc = ops->read_msr(MSR_IA32_PASID, + &msr_val, ctxt)) != X86EMUL_OKAY ) + goto done; + generate_exception_if(!(msr_val & PASID_VALID), EXC_GP, 0); + mmvalp->data32[0] = MASK_EXTR(msr_val, PASID_PASID_MASK); + } + mmvalp->data32[0] &= ~0x7ff00000; + state->blk = blk_enqcmd; + if ( (rc = ops->blk(x86_seg_es, src.val, mmvalp, 64, &_regs.eflags, + state, ctxt)) != X86EMUL_OKAY ) + goto done; + state->simd_size = simd_none; + break; + case X86EMUL_OPC(0x0f38, 0xf9): /* movdiri mem,r */ vcpu_must_have(movdiri); generate_exception_if(dst.type != OP_MEM, EXC_UD); @@ -11378,11 +11411,36 @@ int x86_emul_blk( { switch ( state->blk ) { + bool zf; + /* * Throughout this switch(), memory clobbers are used to compensate * that other operands may not properly express the (full) memory * ranges covered. */ + case blk_enqcmd: + ASSERT(bytes == 64); + if ( ((unsigned long)ptr & 0x3f) ) + { + ASSERT_UNREACHABLE(); + return X86EMUL_UNHANDLEABLE; + } + *eflags &= ~EFLAGS_MASK; +#ifdef HAVE_AS_ENQCMD + asm ( "enqcmds (%[src]), %[dst]" ASM_FLAG_OUT(, "; setz %0") + : [zf] ASM_FLAG_OUT("=@ccz", "=qm") (zf) + : [src] "r" (data), [dst] "r" (ptr) : "memory" ); +#else + /* enqcmds (%rsi), %rdi */ + asm ( ".byte 0xf3, 0x0f, 0x38, 0xf8, 0x3e" + ASM_FLAG_OUT(, "; setz %[zf]") + : [zf] ASM_FLAG_OUT("=@ccz", "=qm") (zf) + : "S" (data), "D" (ptr) : "memory" ); +#endif + if ( zf ) + *eflags |= X86_EFLAGS_ZF; + break; + case blk_movdir: ASSERT(bytes == 64); if ( ((unsigned long)ptr & 0x3f) ) --- a/xen/include/asm-x86/cpufeature.h +++ b/xen/include/asm-x86/cpufeature.h @@ -121,6 +121,7 @@ #define cpu_has_avx512_vpopcntdq boot_cpu_has(X86_FEATURE_AVX512_VPOPCNTDQ) #define cpu_has_rdpid boot_cpu_has(X86_FEATURE_RDPID) #define cpu_has_movdir64b boot_cpu_has(X86_FEATURE_MOVDIR64B) +#define cpu_has_enqcmd boot_cpu_has(X86_FEATURE_ENQCMD) /* CPUID level 0x80000007.edx */ #define cpu_has_itsc boot_cpu_has(X86_FEATURE_ITSC) --- a/xen/include/asm-x86/msr-index.h +++ b/xen/include/asm-x86/msr-index.h @@ -412,6 +412,10 @@ #define MSR_IA32_TSC_DEADLINE 0x000006E0 #define MSR_IA32_ENERGY_PERF_BIAS 0x000001b0 +#define MSR_IA32_PASID 0x00000d93 +#define PASID_PASID_MASK 0x000fffff +#define PASID_VALID 0x80000000 + /* Platform Shared Resource MSRs */ #define MSR_IA32_CMT_EVTSEL 0x00000c8d #define MSR_IA32_CMT_EVTSEL_UE_MASK 0x0000ffff --- a/xen/include/public/arch-x86/cpufeatureset.h +++ b/xen/include/public/arch-x86/cpufeatureset.h @@ -239,6 +239,7 @@ XEN_CPUFEATURE(RDPID, 6*32+22) / XEN_CPUFEATURE(CLDEMOTE, 6*32+25) /*A CLDEMOTE instruction */ XEN_CPUFEATURE(MOVDIRI, 6*32+27) /*A MOVDIRI instruction */ XEN_CPUFEATURE(MOVDIR64B, 6*32+28) /*A MOVDIR64B instruction */ +XEN_CPUFEATURE(ENQCMD, 6*32+29) /* ENQCMD{,S} instructions */ /* AMD-defined CPU features, CPUID level 0x80000007.edx, word 7 */ XEN_CPUFEATURE(ITSC, 7*32+ 8) /* Invariant TSC */ From patchwork Tue Mar 24 12:35:15 2020 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Jan Beulich X-Patchwork-Id: 11455327 Return-Path: Received: from mail.kernel.org (pdx-korg-mail-1.web.codeaurora.org [172.30.200.123]) by pdx-korg-patchwork-2.web.codeaurora.org (Postfix) with ESMTP id 8ECDD913 for ; Tue, 24 Mar 2020 12:36:38 +0000 (UTC) Received: from lists.xenproject.org (lists.xenproject.org [192.237.175.120]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by mail.kernel.org (Postfix) with ESMTPS id 7542D2070A for ; Tue, 24 Mar 2020 12:36:38 +0000 (UTC) DMARC-Filter: OpenDMARC Filter v1.3.2 mail.kernel.org 7542D2070A Authentication-Results: mail.kernel.org; dmarc=none (p=none dis=none) header.from=suse.com Authentication-Results: mail.kernel.org; spf=pass smtp.mailfrom=xen-devel-bounces@lists.xenproject.org Received: from localhost ([127.0.0.1] helo=lists.xenproject.org) by lists.xenproject.org with esmtp (Exim 4.89) (envelope-from ) id 1jGim7-0006tC-Om; Tue, 24 Mar 2020 12:35:19 +0000 Received: from all-amaz-eas1.inumbo.com ([34.197.232.57] helo=us1-amaz-eas2.inumbo.com) by lists.xenproject.org with esmtp (Exim 4.89) (envelope-from ) id 1jGim6-0006t3-H8 for xen-devel@lists.xenproject.org; Tue, 24 Mar 2020 12:35:18 +0000 X-Inumbo-ID: eab21dc0-6dcb-11ea-83e0-12813bfff9fa Received: from mx2.suse.de (unknown [195.135.220.15]) by us1-amaz-eas2.inumbo.com (Halon) with ESMTPS id eab21dc0-6dcb-11ea-83e0-12813bfff9fa; Tue, 24 Mar 2020 12:35:17 +0000 (UTC) X-Virus-Scanned: by amavisd-new at test-mx.suse.de Received: from relay2.suse.de (unknown [195.135.220.254]) by mx2.suse.de (Postfix) with ESMTP id 9F0C5AFE4; Tue, 24 Mar 2020 12:35:16 +0000 (UTC) From: Jan Beulich To: "xen-devel@lists.xenproject.org" References: <6fa81b4d-528d-5c33-50c5-a18396b4383a@suse.com> Message-ID: <930cd2b2-544d-a1c5-2245-24087769b9e6@suse.com> Date: Tue, 24 Mar 2020 13:35:15 +0100 User-Agent: Mozilla/5.0 (Windows NT 10.0; WOW64; rv:68.0) Gecko/20100101 Thunderbird/68.6.0 MIME-Version: 1.0 In-Reply-To: <6fa81b4d-528d-5c33-50c5-a18396b4383a@suse.com> Content-Language: en-US Subject: [Xen-devel] [PATCH v5 07/10] x86/HVM: scale MPERF values reported to guests (on AMD) X-BeenThere: xen-devel@lists.xenproject.org X-Mailman-Version: 2.1.23 Precedence: list List-Id: Xen developer discussion List-Unsubscribe: , List-Post: List-Help: List-Subscribe: , Cc: Andrew Cooper , Paul Durrant , Wei Liu , Roger Pau Monne Errors-To: xen-devel-bounces@lists.xenproject.org Sender: "Xen-devel" AMD's PM specifies that MPERF (and its r/o counterpart) reads are affected by the TSC ratio. Hence when processing such reads in software we too should scale the values. While we don't currently (yet) expose the underlying feature flags, besides us allowing the MSRs to be read nevertheless, RDPRU is going to expose the values even to user space. Furthermore, due to the not exposed feature flags, this change has the effect of making properly inaccessible (for reads) the two MSRs. Note that writes to MPERF (and APERF) continue to be unsupported. Signed-off-by: Jan Beulich --- v3: New. --- I did consider whether to put the code in guest_rdmsr() instead, but decided that it's better to have it next to TSC handling. --- a/xen/arch/x86/hvm/hvm.c +++ b/xen/arch/x86/hvm/hvm.c @@ -3456,6 +3456,22 @@ int hvm_msr_read_intercept(unsigned int *msr_content = v->arch.hvm.msr_tsc_adjust; break; + case MSR_MPERF_RD_ONLY: + if ( !d->arch.cpuid->extd.efro ) + { + goto gp_fault; + + case MSR_IA32_MPERF: + if ( !(d->arch.cpuid->basic.raw[6].c & + CPUID6_ECX_APERFMPERF_CAPABILITY) ) + goto gp_fault; + } + if ( rdmsr_safe(msr, *msr_content) ) + goto gp_fault; + if ( d->arch.cpuid->x86_vendor & (X86_VENDOR_AMD | X86_VENDOR_HYGON) ) + *msr_content = hvm_get_guest_tsc_fixed(v, *msr_content); + break; + case MSR_APIC_BASE: *msr_content = vcpu_vlapic(v)->hw.apic_base_msr; break; --- a/xen/include/asm-x86/msr-index.h +++ b/xen/include/asm-x86/msr-index.h @@ -397,6 +397,9 @@ #define MSR_IA32_MPERF 0x000000e7 #define MSR_IA32_APERF 0x000000e8 +#define MSR_MPERF_RD_ONLY 0xc00000e7 +#define MSR_APERF_RD_ONLY 0xc00000e8 + #define MSR_IA32_THERM_CONTROL 0x0000019a #define MSR_IA32_THERM_INTERRUPT 0x0000019b #define MSR_IA32_THERM_STATUS 0x0000019c From patchwork Tue Mar 24 12:36:18 2020 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Jan Beulich X-Patchwork-Id: 11455329 Return-Path: Received: from mail.kernel.org (pdx-korg-mail-1.web.codeaurora.org [172.30.200.123]) by pdx-korg-patchwork-2.web.codeaurora.org (Postfix) with ESMTP id B58846CA for ; Tue, 24 Mar 2020 12:37:35 +0000 (UTC) Received: from lists.xenproject.org (lists.xenproject.org [192.237.175.120]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by mail.kernel.org (Postfix) with ESMTPS id 9CCDF2070A for ; Tue, 24 Mar 2020 12:37:35 +0000 (UTC) DMARC-Filter: OpenDMARC Filter v1.3.2 mail.kernel.org 9CCDF2070A Authentication-Results: mail.kernel.org; dmarc=none (p=none dis=none) header.from=suse.com Authentication-Results: mail.kernel.org; spf=pass smtp.mailfrom=xen-devel-bounces@lists.xenproject.org Received: from localhost ([127.0.0.1] helo=lists.xenproject.org) by lists.xenproject.org with esmtp (Exim 4.89) (envelope-from ) id 1jGin8-00071V-3P; Tue, 24 Mar 2020 12:36:22 +0000 Received: from us1-rack-iad1.inumbo.com ([172.99.69.81]) by lists.xenproject.org with esmtp (Exim 4.89) (envelope-from ) id 1jGin7-00071M-1H for xen-devel@lists.xenproject.org; Tue, 24 Mar 2020 12:36:21 +0000 X-Inumbo-ID: 0ffa5836-6dcc-11ea-92cf-bc764e2007e4 Received: from mx2.suse.de (unknown [195.135.220.15]) by us1-rack-iad1.inumbo.com (Halon) with ESMTPS id 0ffa5836-6dcc-11ea-92cf-bc764e2007e4; Tue, 24 Mar 2020 12:36:20 +0000 (UTC) X-Virus-Scanned: by amavisd-new at test-mx.suse.de Received: from relay2.suse.de (unknown [195.135.220.254]) by mx2.suse.de (Postfix) with ESMTP id 73AB7AC7C; Tue, 24 Mar 2020 12:36:19 +0000 (UTC) From: Jan Beulich To: "xen-devel@lists.xenproject.org" References: <6fa81b4d-528d-5c33-50c5-a18396b4383a@suse.com> Message-ID: Date: Tue, 24 Mar 2020 13:36:18 +0100 User-Agent: Mozilla/5.0 (Windows NT 10.0; WOW64; rv:68.0) Gecko/20100101 Thunderbird/68.6.0 MIME-Version: 1.0 In-Reply-To: <6fa81b4d-528d-5c33-50c5-a18396b4383a@suse.com> Content-Language: en-US Subject: [Xen-devel] [PATCH v5 08/10] x86emul: support RDPRU X-BeenThere: xen-devel@lists.xenproject.org X-Mailman-Version: 2.1.23 Precedence: list List-Id: Xen developer discussion List-Unsubscribe: , List-Post: List-Help: List-Subscribe: , Cc: Andrew Cooper , Paul Durrant , Wei Liu , Roger Pau Monne Errors-To: xen-devel-bounces@lists.xenproject.org Sender: "Xen-devel" While the PM doesn't say so, this assumes that the MPERF value read this way gets scaled similarly to its reading through RDMSR. Also introduce the SVM related constants at this occasion. Signed-off-by: Jan Beulich --- v5: The CPUID field used is just 8 bits wide. v4: Add GENERAL2_INTERCEPT_RDPRU and VMEXIT_RDPRU enumerators. Fold handling of out of bounds indexes into switch(). Avoid recalculate_misc() clobbering what recalculate_cpu_policy() has done. Re-base. v3: New. --- RFC: Andrew promised to take care of the CPUID side of this; re-base over his work once available. --- a/tools/libxl/libxl_cpuid.c +++ b/tools/libxl/libxl_cpuid.c @@ -260,6 +260,7 @@ int libxl_cpuid_parse_config(libxl_cpuid {"clzero", 0x80000008, NA, CPUID_REG_EBX, 0, 1}, {"rstr-fp-err-ptrs", 0x80000008, NA, CPUID_REG_EBX, 2, 1}, + {"rdpru", 0x80000008, NA, CPUID_REG_EBX, 4, 1}, {"wbnoinvd", 0x80000008, NA, CPUID_REG_EBX, 9, 1}, {"ibpb", 0x80000008, NA, CPUID_REG_EBX, 12, 1}, {"ppin", 0x80000008, NA, CPUID_REG_EBX, 23, 1}, --- a/tools/misc/xen-cpuid.c +++ b/tools/misc/xen-cpuid.c @@ -147,6 +147,8 @@ static const char *const str_e8b[32] = [ 0] = "clzero", [ 2] = "rstr-fp-err-ptrs", + [ 4] = "rdpru", + /* [ 8] */ [ 9] = "wbnoinvd", [12] = "ibpb", --- a/tools/tests/x86_emulator/test_x86_emulator.c +++ b/tools/tests/x86_emulator/test_x86_emulator.c @@ -683,6 +683,13 @@ static int read_msr( { switch ( reg ) { + case 0x000000e8: /* APERF */ + case 0xc00000e8: /* APERF_RD_ONLY */ +#define APERF_LO_VALUE 0xAEAEAEAE +#define APERF_HI_VALUE 0xEAEAEAEA + *val = ((uint64_t)APERF_HI_VALUE << 32) | APERF_LO_VALUE; + return X86EMUL_OKAY; + case 0xc0000080: /* EFER */ *val = ctxt->addr_size > 32 ? 0x500 /* LME|LMA */ : 0; return X86EMUL_OKAY; @@ -2249,6 +2256,30 @@ int main(int argc, char **argv) else printf("skipped\n"); + printf("%-40s", "Testing rdpru..."); + instr[0] = 0x0f; instr[1] = 0x01; instr[2] = 0xfd; + regs.eip = (unsigned long)&instr[0]; + regs.ecx = 1; + regs.eflags = EFLAGS_ALWAYS_SET; + rc = x86_emulate(&ctxt, &emulops); + if ( (rc != X86EMUL_OKAY) || + (regs.eax != APERF_LO_VALUE) || (regs.edx != APERF_HI_VALUE) || + !(regs.eflags & X86_EFLAGS_CF) || + (regs.eip != (unsigned long)&instr[3]) ) + goto fail; + if ( ctxt.cpuid->extd.rdpru_max < 0xffff ) + { + regs.eip = (unsigned long)&instr[0]; + regs.ecx = ctxt.cpuid->extd.rdpru_max + 1; + regs.eflags = EFLAGS_ALWAYS_SET | X86_EFLAGS_CF; + rc = x86_emulate(&ctxt, &emulops); + if ( (rc != X86EMUL_OKAY) || regs.eax || regs.edx || + (regs.eflags & X86_EFLAGS_CF) || + (regs.eip != (unsigned long)&instr[3]) ) + goto fail; + } + printf("okay\n"); + printf("%-40s", "Testing movq %mm3,(%ecx)..."); if ( stack_exec && cpu_has_mmx ) { --- a/tools/tests/x86_emulator/x86-emulate.c +++ b/tools/tests/x86_emulator/x86-emulate.c @@ -78,6 +78,8 @@ bool emul_test_init(void) cp.feat.rdpid = true; cp.feat.movdiri = true; cp.extd.clzero = true; + cp.extd.rdpru = true; + cp.extd.rdpru_max = 1; if ( cpu_has_xsave ) { @@ -150,11 +152,11 @@ int emul_test_cpuid( } /* - * The emulator doesn't itself use CLZERO, so we can always run the + * The emulator doesn't itself use CLZERO/RDPRU, so we can always run the * respective test(s). */ if ( leaf == 0x80000008 ) - res->b |= 1U << 0; + res->b |= (1U << 0) | (1U << 4); return X86EMUL_OKAY; } --- a/xen/arch/x86/cpuid.c +++ b/xen/arch/x86/cpuid.c @@ -243,8 +243,6 @@ static void recalculate_misc(struct cpui /* Most of Power/RAS hidden from guests. */ p->extd.raw[0x7].a = p->extd.raw[0x7].b = p->extd.raw[0x7].c = 0; - p->extd.raw[0x8].d = 0; - switch ( p->x86_vendor ) { case X86_VENDOR_INTEL: @@ -263,6 +261,7 @@ static void recalculate_misc(struct cpui p->extd.raw[0x8].a &= 0x0000ffff; p->extd.raw[0x8].c = 0; + p->extd.raw[0x8].d = 0; break; case X86_VENDOR_AMD: @@ -281,6 +280,7 @@ static void recalculate_misc(struct cpui p->extd.raw[0x8].a &= 0x0000ffff; /* GuestMaxPhysAddr hidden. */ p->extd.raw[0x8].c &= 0x0003f0ff; + p->extd.raw[0x8].d &= 0xffff0000; p->extd.raw[0x9] = EMPTY_LEAF; @@ -643,6 +643,11 @@ void recalculate_cpuid_policy(struct dom p->extd.maxlinaddr = p->extd.lm ? 48 : 32; + if ( p->extd.rdpru ) + p->extd.rdpru_max = min(p->extd.rdpru_max, max->extd.rdpru_max); + else + p->extd.rdpru_max = 0; + recalculate_xstate(p); recalculate_misc(p); --- a/xen/arch/x86/x86_emulate/x86_emulate.c +++ b/xen/arch/x86/x86_emulate/x86_emulate.c @@ -1877,6 +1877,7 @@ in_protmode( #define vcpu_has_fma4() (ctxt->cpuid->extd.fma4) #define vcpu_has_tbm() (ctxt->cpuid->extd.tbm) #define vcpu_has_clzero() (ctxt->cpuid->extd.clzero) +#define vcpu_has_rdpru() (ctxt->cpuid->extd.rdpru) #define vcpu_has_wbnoinvd() (ctxt->cpuid->extd.wbnoinvd) #define vcpu_has_bmi1() (ctxt->cpuid->feat.bmi1) @@ -5711,6 +5712,50 @@ x86_emulate( limit -= sizeof(zero); } break; + + case 0xfd: /* rdpru */ + vcpu_must_have(rdpru); + + if ( !mode_ring0() ) + { + fail_if(!ops->read_cr); + if ( (rc = ops->read_cr(4, &cr4, ctxt)) != X86EMUL_OKAY ) + goto done; + generate_exception_if(cr4 & X86_CR4_TSD, EXC_UD); + } + + switch ( _regs.ecx | -(_regs.ecx > ctxt->cpuid->extd.rdpru_max) ) + { + case 0: n = MSR_IA32_MPERF; break; + case 1: n = MSR_IA32_APERF; break; + default: n = 0; break; + } + + _regs.eflags &= ~EFLAGS_MASK; + if ( n ) + { + fail_if(!ops->read_msr); + switch ( rc = ops->read_msr(n, &msr_val, ctxt) ) + { + case X86EMUL_OKAY: + _regs.eflags |= X86_EFLAGS_CF; + break; + + case X86EMUL_EXCEPTION: + x86_emul_reset_event(ctxt); + rc = X86EMUL_OKAY; + break; + + default: + goto done; + } + } + + if ( !(_regs.eflags & X86_EFLAGS_CF) ) + msr_val = 0; + _regs.r(dx) = msr_val >> 32; + _regs.r(ax) = (uint32_t)msr_val; + break; } #define _GRP7(mod, reg) \ --- a/xen/include/asm-x86/hvm/svm/vmcb.h +++ b/xen/include/asm-x86/hvm/svm/vmcb.h @@ -74,7 +74,8 @@ enum GenericIntercept2bits GENERAL2_INTERCEPT_MONITOR = 1 << 10, GENERAL2_INTERCEPT_MWAIT = 1 << 11, GENERAL2_INTERCEPT_MWAIT_CONDITIONAL = 1 << 12, - GENERAL2_INTERCEPT_XSETBV = 1 << 13 + GENERAL2_INTERCEPT_XSETBV = 1 << 13, + GENERAL2_INTERCEPT_RDPRU = 1 << 14, }; @@ -298,6 +299,7 @@ enum VMEXIT_EXITCODE VMEXIT_MWAIT = 139, /* 0x8b */ VMEXIT_MWAIT_CONDITIONAL= 140, /* 0x8c */ VMEXIT_XSETBV = 141, /* 0x8d */ + VMEXIT_RDPRU = 142, /* 0x8e */ VMEXIT_NPF = 1024, /* 0x400, nested paging fault */ VMEXIT_INVALID = -1 }; --- a/xen/include/public/arch-x86/cpufeatureset.h +++ b/xen/include/public/arch-x86/cpufeatureset.h @@ -248,6 +248,7 @@ XEN_CPUFEATURE(EFRO, 7*32+10) / /* AMD-defined CPU features, CPUID level 0x80000008.ebx, word 8 */ XEN_CPUFEATURE(CLZERO, 8*32+ 0) /*A CLZERO instruction */ XEN_CPUFEATURE(RSTR_FP_ERR_PTRS, 8*32+ 2) /*A (F)X{SAVE,RSTOR} always saves/restores FPU Error pointers */ +XEN_CPUFEATURE(RDPRU, 8*32+ 4) /*A RDPRU instruction */ XEN_CPUFEATURE(WBNOINVD, 8*32+ 9) /* WBNOINVD instruction */ XEN_CPUFEATURE(IBPB, 8*32+12) /*A IBPB support only (no IBRS, used by AMD) */ XEN_CPUFEATURE(AMD_PPIN, 8*32+23) /* Protected Processor Inventory Number */ --- a/xen/include/xen/lib/x86/cpuid.h +++ b/xen/include/xen/lib/x86/cpuid.h @@ -264,7 +264,7 @@ struct cpuid_policy struct { DECL_BITFIELD(e8b); }; }; uint32_t nc:8, :4, apic_id_size:4, :16; - uint32_t /* d */:32; + uint8_t :8, :8, rdpru_max, :8; }; } extd; From patchwork Tue Mar 24 12:37:24 2020 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Jan Beulich X-Patchwork-Id: 11455331 Return-Path: Received: from mail.kernel.org (pdx-korg-mail-1.web.codeaurora.org [172.30.200.123]) by pdx-korg-patchwork-2.web.codeaurora.org (Postfix) with ESMTP id 93992913 for ; Tue, 24 Mar 2020 12:38:16 +0000 (UTC) Received: from lists.xenproject.org (lists.xenproject.org [192.237.175.120]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by mail.kernel.org (Postfix) with ESMTPS id 79F6D2070A for ; Tue, 24 Mar 2020 12:38:16 +0000 (UTC) DMARC-Filter: OpenDMARC Filter v1.3.2 mail.kernel.org 79F6D2070A Authentication-Results: mail.kernel.org; dmarc=none (p=none dis=none) header.from=suse.com Authentication-Results: mail.kernel.org; spf=pass smtp.mailfrom=xen-devel-bounces@lists.xenproject.org Received: from localhost ([127.0.0.1] helo=lists.xenproject.org) by lists.xenproject.org with esmtp (Exim 4.89) (envelope-from ) id 1jGioD-0007AP-OE; Tue, 24 Mar 2020 12:37:29 +0000 Received: from all-amaz-eas1.inumbo.com ([34.197.232.57] helo=us1-amaz-eas2.inumbo.com) by lists.xenproject.org with esmtp (Exim 4.89) (envelope-from ) id 1jGioC-0007AD-DB for xen-devel@lists.xenproject.org; Tue, 24 Mar 2020 12:37:28 +0000 X-Inumbo-ID: 38055524-6dcc-11ea-83e0-12813bfff9fa Received: from mx2.suse.de (unknown [195.135.220.15]) by us1-amaz-eas2.inumbo.com (Halon) with ESMTPS id 38055524-6dcc-11ea-83e0-12813bfff9fa; Tue, 24 Mar 2020 12:37:27 +0000 (UTC) X-Virus-Scanned: by amavisd-new at test-mx.suse.de Received: from relay2.suse.de (unknown [195.135.220.254]) by mx2.suse.de (Postfix) with ESMTP id 67E96AFF2; Tue, 24 Mar 2020 12:37:26 +0000 (UTC) From: Jan Beulich To: "xen-devel@lists.xenproject.org" References: <6fa81b4d-528d-5c33-50c5-a18396b4383a@suse.com> Message-ID: Date: Tue, 24 Mar 2020 13:37:24 +0100 User-Agent: Mozilla/5.0 (Windows NT 10.0; WOW64; rv:68.0) Gecko/20100101 Thunderbird/68.6.0 MIME-Version: 1.0 In-Reply-To: <6fa81b4d-528d-5c33-50c5-a18396b4383a@suse.com> Content-Language: en-US Subject: [Xen-devel] [PATCH v5 09/10] x86/HVM: don't needlessly intercept APERF/MPERF/TSC MSR reads X-BeenThere: xen-devel@lists.xenproject.org X-Mailman-Version: 2.1.23 Precedence: list List-Id: Xen developer discussion List-Unsubscribe: , List-Post: List-Help: List-Subscribe: , Cc: Kevin Tian , Wei Liu , Andrew Cooper , Paul Durrant , Jun Nakajima , Roger Pau Monne Errors-To: xen-devel-bounces@lists.xenproject.org Sender: "Xen-devel" If the hardware can handle accesses, we should allow it to do so. This way we can expose EFRO to HVM guests, and "all" that's left for exposing APERF/MPERF is to figure out how to handle writes to these MSRs. (Note that the leaf 6 guest CPUID checks will evaluate to false for now, as recalculate_misc() zaps the entire leaf for now.) For TSC the intercepts are made mirror the RDTSC ones. Signed-off-by: Jan Beulich Reviewed-by: Kevin Tian --- v4: Make TSC intercepts mirror RDTSC ones. Re-base. v3: New. --- a/xen/arch/x86/hvm/svm/svm.c +++ b/xen/arch/x86/hvm/svm/svm.c @@ -595,6 +595,7 @@ static void svm_cpuid_policy_changed(str struct vmcb_struct *vmcb = svm->vmcb; const struct cpuid_policy *cp = v->domain->arch.cpuid; u32 bitmap = vmcb_get_exception_intercepts(vmcb); + unsigned int mode; if ( opt_hvm_fep || (v->domain->arch.cpuid->x86_vendor != boot_cpu_data.x86_vendor) ) @@ -607,6 +608,17 @@ static void svm_cpuid_policy_changed(str /* Give access to MSR_PRED_CMD if the guest has been told about it. */ svm_intercept_msr(v, MSR_PRED_CMD, cp->extd.ibpb ? MSR_INTERCEPT_NONE : MSR_INTERCEPT_RW); + + /* Allow direct reads from APERF/MPERF if permitted by the policy. */ + mode = cp->basic.raw[6].c & CPUID6_ECX_APERFMPERF_CAPABILITY + ? MSR_INTERCEPT_WRITE : MSR_INTERCEPT_RW; + svm_intercept_msr(v, MSR_IA32_APERF, mode); + svm_intercept_msr(v, MSR_IA32_MPERF, mode); + + /* Allow direct access to their r/o counterparts if permitted. */ + mode = cp->extd.efro ? MSR_INTERCEPT_NONE : MSR_INTERCEPT_RW; + svm_intercept_msr(v, MSR_APERF_RD_ONLY, mode); + svm_intercept_msr(v, MSR_MPERF_RD_ONLY, mode); } void svm_sync_vmcb(struct vcpu *v, enum vmcb_sync_state new_state) @@ -860,7 +872,10 @@ static void svm_set_rdtsc_exiting(struct { general1_intercepts |= GENERAL1_INTERCEPT_RDTSC; general2_intercepts |= GENERAL2_INTERCEPT_RDTSCP; + svm_enable_intercept_for_msr(v, MSR_IA32_TSC); } + else + svm_intercept_msr(v, MSR_IA32_TSC, MSR_INTERCEPT_WRITE); vmcb_set_general1_intercepts(vmcb, general1_intercepts); vmcb_set_general2_intercepts(vmcb, general2_intercepts); --- a/xen/arch/x86/hvm/svm/vmcb.c +++ b/xen/arch/x86/hvm/svm/vmcb.c @@ -108,6 +108,7 @@ static int construct_vmcb(struct vcpu *v { vmcb->_general1_intercepts |= GENERAL1_INTERCEPT_RDTSC; vmcb->_general2_intercepts |= GENERAL2_INTERCEPT_RDTSCP; + svm_intercept_msr(v, MSR_IA32_TSC, MSR_INTERCEPT_WRITE); } /* Guest segment limits. */ --- a/xen/arch/x86/hvm/vmx/vmcs.c +++ b/xen/arch/x86/hvm/vmx/vmcs.c @@ -1141,8 +1141,13 @@ static int construct_vmcs(struct vcpu *v vmx_clear_msr_intercept(v, MSR_IA32_SYSENTER_CS, VMX_MSR_RW); vmx_clear_msr_intercept(v, MSR_IA32_SYSENTER_ESP, VMX_MSR_RW); vmx_clear_msr_intercept(v, MSR_IA32_SYSENTER_EIP, VMX_MSR_RW); + + if ( !(v->arch.hvm.vmx.exec_control & CPU_BASED_RDTSC_EXITING) ) + vmx_clear_msr_intercept(v, MSR_IA32_TSC, VMX_MSR_R); + if ( paging_mode_hap(d) && (!is_iommu_enabled(d) || iommu_snoop) ) vmx_clear_msr_intercept(v, MSR_IA32_CR_PAT, VMX_MSR_RW); + if ( (vmexit_ctl & VM_EXIT_CLEAR_BNDCFGS) && (vmentry_ctl & VM_ENTRY_LOAD_BNDCFGS) ) vmx_clear_msr_intercept(v, MSR_IA32_BNDCFGS, VMX_MSR_RW); --- a/xen/arch/x86/hvm/vmx/vmx.c +++ b/xen/arch/x86/hvm/vmx/vmx.c @@ -585,6 +585,18 @@ static void vmx_cpuid_policy_changed(str vmx_clear_msr_intercept(v, MSR_FLUSH_CMD, VMX_MSR_RW); else vmx_set_msr_intercept(v, MSR_FLUSH_CMD, VMX_MSR_RW); + + /* Allow direct reads from APERF/MPERF if permitted by the policy. */ + if ( cp->basic.raw[6].c & CPUID6_ECX_APERFMPERF_CAPABILITY ) + { + vmx_clear_msr_intercept(v, MSR_IA32_APERF, VMX_MSR_R); + vmx_clear_msr_intercept(v, MSR_IA32_MPERF, VMX_MSR_R); + } + else + { + vmx_set_msr_intercept(v, MSR_IA32_APERF, VMX_MSR_R); + vmx_set_msr_intercept(v, MSR_IA32_MPERF, VMX_MSR_R); + } } int vmx_guest_x86_mode(struct vcpu *v) @@ -1250,7 +1262,12 @@ static void vmx_set_rdtsc_exiting(struct vmx_vmcs_enter(v); v->arch.hvm.vmx.exec_control &= ~CPU_BASED_RDTSC_EXITING; if ( enable ) + { v->arch.hvm.vmx.exec_control |= CPU_BASED_RDTSC_EXITING; + vmx_set_msr_intercept(v, MSR_IA32_TSC, VMX_MSR_R); + } + else + vmx_clear_msr_intercept(v, MSR_IA32_TSC, VMX_MSR_R); vmx_update_cpu_exec_control(v); vmx_vmcs_exit(v); } --- a/xen/include/public/arch-x86/cpufeatureset.h +++ b/xen/include/public/arch-x86/cpufeatureset.h @@ -243,7 +243,7 @@ XEN_CPUFEATURE(ENQCMD, 6*32+29) / /* AMD-defined CPU features, CPUID level 0x80000007.edx, word 7 */ XEN_CPUFEATURE(ITSC, 7*32+ 8) /* Invariant TSC */ -XEN_CPUFEATURE(EFRO, 7*32+10) /* APERF/MPERF Read Only interface */ +XEN_CPUFEATURE(EFRO, 7*32+10) /*S APERF/MPERF Read Only interface */ /* AMD-defined CPU features, CPUID level 0x80000008.ebx, word 8 */ XEN_CPUFEATURE(CLZERO, 8*32+ 0) /*A CLZERO instruction */ From patchwork Tue Mar 24 12:37:48 2020 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Jan Beulich X-Patchwork-Id: 11455333 Return-Path: Received: from mail.kernel.org (pdx-korg-mail-1.web.codeaurora.org [172.30.200.123]) by pdx-korg-patchwork-2.web.codeaurora.org (Postfix) with ESMTP id 27D3E6CA for ; Tue, 24 Mar 2020 12:38:39 +0000 (UTC) Received: from lists.xenproject.org (lists.xenproject.org [192.237.175.120]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by mail.kernel.org (Postfix) with ESMTPS id 0B7392070A for ; Tue, 24 Mar 2020 12:38:39 +0000 (UTC) DMARC-Filter: OpenDMARC Filter v1.3.2 mail.kernel.org 0B7392070A Authentication-Results: mail.kernel.org; dmarc=none (p=none dis=none) header.from=suse.com Authentication-Results: mail.kernel.org; spf=pass smtp.mailfrom=xen-devel-bounces@lists.xenproject.org Received: from localhost ([127.0.0.1] helo=lists.xenproject.org) by lists.xenproject.org with esmtp (Exim 4.89) (envelope-from ) id 1jGiob-0007H7-5z; Tue, 24 Mar 2020 12:37:53 +0000 Received: from all-amaz-eas1.inumbo.com ([34.197.232.57] helo=us1-amaz-eas2.inumbo.com) by lists.xenproject.org with esmtp (Exim 4.89) (envelope-from ) id 1jGioZ-0007Gq-TB for xen-devel@lists.xenproject.org; Tue, 24 Mar 2020 12:37:51 +0000 X-Inumbo-ID: 45411445-6dcc-11ea-83e0-12813bfff9fa Received: from mx2.suse.de (unknown [195.135.220.15]) by us1-amaz-eas2.inumbo.com (Halon) with ESMTPS id 45411445-6dcc-11ea-83e0-12813bfff9fa; Tue, 24 Mar 2020 12:37:51 +0000 (UTC) X-Virus-Scanned: by amavisd-new at test-mx.suse.de Received: from relay2.suse.de (unknown [195.135.220.254]) by mx2.suse.de (Postfix) with ESMTP id 5D160ACED; Tue, 24 Mar 2020 12:37:50 +0000 (UTC) From: Jan Beulich To: "xen-devel@lists.xenproject.org" References: <6fa81b4d-528d-5c33-50c5-a18396b4383a@suse.com> Message-ID: Date: Tue, 24 Mar 2020 13:37:48 +0100 User-Agent: Mozilla/5.0 (Windows NT 10.0; WOW64; rv:68.0) Gecko/20100101 Thunderbird/68.6.0 MIME-Version: 1.0 In-Reply-To: <6fa81b4d-528d-5c33-50c5-a18396b4383a@suse.com> Content-Language: en-US Subject: [Xen-devel] [PATCH v5 10/10] x86emul: support MCOMMIT X-BeenThere: xen-devel@lists.xenproject.org X-Mailman-Version: 2.1.23 Precedence: list List-Id: Xen developer discussion List-Unsubscribe: , List-Post: List-Help: List-Subscribe: , Cc: Andrew Cooper , Paul Durrant , Wei Liu , Roger Pau Monne Errors-To: xen-devel-bounces@lists.xenproject.org Sender: "Xen-devel" The dependency on a new EFER bit implies that we need to set that bit ourselves in order to be able to successfully invoke the insn. Also once again introduce the SVM related constants at this occasion. Signed-off-by: Jan Beulich --- RFC: The exact meaning of the PM stating "any errors encountered by those stores have been signaled to associated error logging resources" is unclear. Depending on what this entails, blindly enabling EFER.MCOMMIT may not be a good idea. Hence the RFC. --- v5: Re-base. v4: New. --- a/tools/libxl/libxl_cpuid.c +++ b/tools/libxl/libxl_cpuid.c @@ -261,6 +261,7 @@ int libxl_cpuid_parse_config(libxl_cpuid {"clzero", 0x80000008, NA, CPUID_REG_EBX, 0, 1}, {"rstr-fp-err-ptrs", 0x80000008, NA, CPUID_REG_EBX, 2, 1}, {"rdpru", 0x80000008, NA, CPUID_REG_EBX, 4, 1}, + {"mcommit", 0x80000008, NA, CPUID_REG_EBX, 8, 1}, {"wbnoinvd", 0x80000008, NA, CPUID_REG_EBX, 9, 1}, {"ibpb", 0x80000008, NA, CPUID_REG_EBX, 12, 1}, {"ppin", 0x80000008, NA, CPUID_REG_EBX, 23, 1}, --- a/tools/misc/xen-cpuid.c +++ b/tools/misc/xen-cpuid.c @@ -149,7 +149,7 @@ static const char *const str_e8b[32] = [ 4] = "rdpru", - /* [ 8] */ [ 9] = "wbnoinvd", + [ 8] = "mcommit", [ 9] = "wbnoinvd", [12] = "ibpb", --- a/xen/arch/x86/cpu/amd.c +++ b/xen/arch/x86/cpu/amd.c @@ -798,6 +798,9 @@ static void init_amd(struct cpuinfo_x86 wrmsr(MSR_K7_HWCR, l, h); } + if (cpu_has(c, X86_FEATURE_MCOMMIT)) + write_efer(read_efer() | EFER_MCOMMIT); + /* Prevent TSC drift in non single-processor, single-core platforms. */ if ((smp_processor_id() == 1) && !cpu_has(c, X86_FEATURE_ITSC)) disable_c1_ramping(); --- a/xen/arch/x86/x86_emulate/x86_emulate.c +++ b/xen/arch/x86/x86_emulate/x86_emulate.c @@ -1877,6 +1877,7 @@ in_protmode( #define vcpu_has_fma4() (ctxt->cpuid->extd.fma4) #define vcpu_has_tbm() (ctxt->cpuid->extd.tbm) #define vcpu_has_clzero() (ctxt->cpuid->extd.clzero) +#define vcpu_has_mcommit() (ctxt->cpuid->extd.mcommit) #define vcpu_has_rdpru() (ctxt->cpuid->extd.rdpru) #define vcpu_has_wbnoinvd() (ctxt->cpuid->extd.wbnoinvd) @@ -5676,6 +5677,28 @@ x86_emulate( _regs.r(cx) = (uint32_t)msr_val; goto rdtsc; + case 0xfa: /* monitorx / mcommit */ + if ( vex.pfx == vex_f3 ) + { + bool cf; + + host_and_vcpu_must_have(mcommit); + if ( !ops->read_msr || + ops->read_msr(MSR_EFER, &msr_val, ctxt) != X86EMUL_OKAY ) + msr_val = 0; + generate_exception_if(!(msr_val & EFER_MCOMMIT), EXC_UD); + memcpy(get_stub(stub), + ((uint8_t[]){ 0xf3, 0x0f, 0x01, 0xfa, 0xc3 }), 5); + _regs.eflags &= ~EFLAGS_MASK; + invoke_stub("", ASM_FLAG_OUT(, "setc %[cf]"), + [cf] ASM_FLAG_OUT("=@ccc", "=qm") (cf) : "i" (0)); + if ( cf ) + _regs.eflags |= X86_EFLAGS_CF; + put_stub(stub); + goto done; + } + goto unrecognized_insn; + case 0xfc: /* clzero */ { unsigned long zero = 0; --- a/xen/include/asm-x86/cpufeature.h +++ b/xen/include/asm-x86/cpufeature.h @@ -131,6 +131,9 @@ #define cpu_has_avx512_4fmaps boot_cpu_has(X86_FEATURE_AVX512_4FMAPS) #define cpu_has_tsx_force_abort boot_cpu_has(X86_FEATURE_TSX_FORCE_ABORT) +/* CPUID level 0x80000008.ebx */ +#define cpu_has_mcommit boot_cpu_has(X86_FEATURE_MCOMMIT) + /* CPUID level 0x00000007:1.eax */ #define cpu_has_avx512_bf16 boot_cpu_has(X86_FEATURE_AVX512_BF16) --- a/xen/include/asm-x86/hvm/svm/vmcb.h +++ b/xen/include/asm-x86/hvm/svm/vmcb.h @@ -78,6 +78,11 @@ enum GenericIntercept2bits GENERAL2_INTERCEPT_RDPRU = 1 << 14, }; +/* general 3 intercepts */ +enum GenericIntercept3bits +{ + GENERAL3_INTERCEPT_MCOMMIT = 1 << 3, +}; /* control register intercepts */ enum CRInterceptBits @@ -300,6 +305,7 @@ enum VMEXIT_EXITCODE VMEXIT_MWAIT_CONDITIONAL= 140, /* 0x8c */ VMEXIT_XSETBV = 141, /* 0x8d */ VMEXIT_RDPRU = 142, /* 0x8e */ + VMEXIT_MCOMMIT = 163, /* 0xa3 */ VMEXIT_NPF = 1024, /* 0x400, nested paging fault */ VMEXIT_INVALID = -1 }; @@ -406,7 +412,8 @@ struct vmcb_struct { u32 _exception_intercepts; /* offset 0x08 - cleanbit 0 */ u32 _general1_intercepts; /* offset 0x0C - cleanbit 0 */ u32 _general2_intercepts; /* offset 0x10 - cleanbit 0 */ - u32 res01[10]; + u32 _general3_intercepts; /* offset 0x14 - cleanbit 0 */ + u32 res01[9]; u16 _pause_filter_thresh; /* offset 0x3C - cleanbit 0 */ u16 _pause_filter_count; /* offset 0x3E - cleanbit 0 */ u64 _iopm_base_pa; /* offset 0x40 - cleanbit 1 */ @@ -590,6 +597,7 @@ VMCB_ACCESSORS(dr_intercepts, intercepts VMCB_ACCESSORS(exception_intercepts, intercepts) VMCB_ACCESSORS(general1_intercepts, intercepts) VMCB_ACCESSORS(general2_intercepts, intercepts) +VMCB_ACCESSORS(general3_intercepts, intercepts) VMCB_ACCESSORS(pause_filter_count, intercepts) VMCB_ACCESSORS(pause_filter_thresh, intercepts) VMCB_ACCESSORS(tsc_offset, intercepts) --- a/xen/include/asm-x86/msr-index.h +++ b/xen/include/asm-x86/msr-index.h @@ -88,6 +88,7 @@ #define _EFER_NX 11 /* No execute enable */ #define _EFER_SVME 12 /* AMD: SVM enable */ #define _EFER_FFXSE 14 /* AMD: Fast FXSAVE/FXRSTOR enable */ +#define _EFER_MCOMMIT 17 /* AMD: MCOMMIT insn enable */ #define EFER_SCE (1<<_EFER_SCE) #define EFER_LME (1<<_EFER_LME) @@ -95,9 +96,10 @@ #define EFER_NX (1<<_EFER_NX) #define EFER_SVME (1<<_EFER_SVME) #define EFER_FFXSE (1<<_EFER_FFXSE) +#define EFER_MCOMMIT (1<<_EFER_MCOMMIT) #define EFER_KNOWN_MASK (EFER_SCE | EFER_LME | EFER_LMA | EFER_NX | \ - EFER_SVME | EFER_FFXSE) + EFER_SVME | EFER_FFXSE | EFER_MCOMMIT) /* Intel MSRs. Some also available on other CPUs */ #define MSR_IA32_PERFCTR0 0x000000c1 --- a/xen/include/public/arch-x86/cpufeatureset.h +++ b/xen/include/public/arch-x86/cpufeatureset.h @@ -249,6 +249,7 @@ XEN_CPUFEATURE(EFRO, 7*32+10) / XEN_CPUFEATURE(CLZERO, 8*32+ 0) /*A CLZERO instruction */ XEN_CPUFEATURE(RSTR_FP_ERR_PTRS, 8*32+ 2) /*A (F)X{SAVE,RSTOR} always saves/restores FPU Error pointers */ XEN_CPUFEATURE(RDPRU, 8*32+ 4) /*A RDPRU instruction */ +XEN_CPUFEATURE(MCOMMIT, 8*32+ 8) /*A MCOMMIT instruction */ XEN_CPUFEATURE(WBNOINVD, 8*32+ 9) /* WBNOINVD instruction */ XEN_CPUFEATURE(IBPB, 8*32+12) /*A IBPB support only (no IBRS, used by AMD) */ XEN_CPUFEATURE(AMD_PPIN, 8*32+23) /* Protected Processor Inventory Number */