diff mbox

KVM: SVM: fix trashing of MSR_TSC_AUX

Message ID 20160707160146.GH13648@pd.tnic (mailing list archive)
State New, archived
Headers show

Commit Message

Borislav Petkov July 7, 2016, 4:01 p.m. UTC
On Thu, Jul 07, 2016 at 03:16:21PM +0200, Paolo Bonzini wrote:
> Eduardo is the one to answer, but usually we add features to QEMU 
> before the processors are released (typically as soon as KVM supports 
> them).  So with a new enough QEMU this in theory should not be 
> necessary.
> 
> Adding a new feature that's not in a CPU model and that's not 
> associated to new state is really trivial:

Cool.

Btw, how about something like this?

Specifically, I'd like to test RAS features on the new upcoming AMD
Zen CPU and I've defined one from the stuff we know so far from kernel
patches.

The "filter=off" thing I've added in case I want to disable
x86_cpu_filter_features() but it works just fine without it when I boot
with -cpu Zen. So I can remove it too.

Would something like that be acceptable?

We can continue improving on this as features become known and even
implement some functionality in qemu/kvm as time allows.

---
From: Borislav Petkov <bp@suse.de>
Date: Tue, 5 Jul 2016 16:12:18 +0200
Subject: [PATCH] Zen emu: first working version

Boot with "-c Zen,filter=off" to disable CPUID bits filtering.

Signed-off-by: Borislav Petkov <bp@suse.de>
---
 target-i386/cpu.c | 60 ++++++++++++++++++++++++++++++++++++++++++++++++++++++-
 target-i386/cpu.h |  7 +++++++
 2 files changed, 66 insertions(+), 1 deletion(-)

Comments

Paolo Bonzini July 7, 2016, 4:17 p.m. UTC | #1
> On Thu, Jul 07, 2016 at 03:16:21PM +0200, Paolo Bonzini wrote:
> > Eduardo is the one to answer, but usually we add features to QEMU
> > before the processors are released (typically as soon as KVM supports
> > them).  So with a new enough QEMU this in theory should not be
> > necessary.
> > 
> > Adding a new feature that's not in a CPU model and that's not
> > associated to new state is really trivial:
> 
> Cool.
> 
> Btw, how about something like this?
> 
> Specifically, I'd like to test RAS features on the new upcoming AMD
> Zen CPU and I've defined one from the stuff we know so far from kernel
> patches.

It looks good from skimming it---but again this isn't quite my
territory.

Paolo

> The "filter=off" thing I've added in case I want to disable
> x86_cpu_filter_features() but it works just fine without it when I boot
> with -cpu Zen. So I can remove it too.
> 
> Would something like that be acceptable?
> 
> We can continue improving on this as features become known and even
> implement some functionality in qemu/kvm as time allows.
> 
> ---
> From: Borislav Petkov <bp@suse.de>
> Date: Tue, 5 Jul 2016 16:12:18 +0200
> Subject: [PATCH] Zen emu: first working version
> 
> Boot with "-c Zen,filter=off" to disable CPUID bits filtering.
> 
> Signed-off-by: Borislav Petkov <bp@suse.de>
> ---
>  target-i386/cpu.c | 60
>  ++++++++++++++++++++++++++++++++++++++++++++++++++++++-
>  target-i386/cpu.h |  7 +++++++
>  2 files changed, 66 insertions(+), 1 deletion(-)
> 
> diff --git a/target-i386/cpu.c b/target-i386/cpu.c
> index 3bd3cfc3ad16..cc9c97457387 100644
> --- a/target-i386/cpu.c
> +++ b/target-i386/cpu.c
> @@ -307,6 +307,17 @@ static const char *cpuid_6_feature_name[] = {
>      NULL, NULL, NULL, NULL,
>  };
>  
> +static const char *smca_feature_name[] = {
> +    "overflow_recov", "succor", NULL, "smca",
> +    NULL, NULL, NULL, NULL,
> +    NULL, NULL, NULL, NULL,
> +    NULL, NULL, NULL, NULL,
> +    NULL, NULL, NULL, NULL,
> +    NULL, NULL, NULL, NULL,
> +    NULL, NULL, NULL, NULL,
> +    NULL, NULL, NULL, NULL,
> +};
> +
>  #define I486_FEATURES (CPUID_FP87 | CPUID_VME | CPUID_PSE)
>  #define PENTIUM_FEATURES (I486_FEATURES | CPUID_DE | CPUID_TSC | \
>            CPUID_MSR | CPUID_MCE | CPUID_CX8 | CPUID_MMX | CPUID_APIC)
> @@ -449,6 +460,11 @@ static FeatureWordInfo feature_word_info[FEATURE_WORDS]
> = {
>          .cpuid_eax = 6, .cpuid_reg = R_EAX,
>          .tcg_features = TCG_6_EAX_FEATURES,
>      },
> +    [FEAT_8000_0007_EBX] = {
> +	.feat_names = smca_feature_name,
> +	.cpuid_eax = 0x80000007,
> +	.cpuid_reg = R_EBX,
> +    },
>  };
>  
>  typedef struct X86RegisterInfo32 {
> @@ -1449,6 +1465,44 @@ static X86CPUDefinition builtin_x86_defs[] = {
>          .xlevel = 0x8000001A,
>          .model_id = "AMD Opteron 63xx class CPU",
>      },
> +    {
> +        .name = "Zen",
> +        .level = 0xd,
> +        .vendor = CPUID_VENDOR_AMD,
> +        .family = 23,
> +        .model = 0,
> +        .stepping = 0,
> +        .features[FEAT_1_EDX] =
> +            CPUID_VME | CPUID_SSE2 | CPUID_SSE | CPUID_FXSR | CPUID_MMX |
> +            CPUID_CLFLUSH | CPUID_PSE36 | CPUID_PAT | CPUID_CMOV | CPUID_MCA
> |
> +            CPUID_PGE | CPUID_MTRR | CPUID_SEP | CPUID_APIC | CPUID_CX8 |
> +            CPUID_MCE | CPUID_PAE | CPUID_MSR | CPUID_TSC | CPUID_PSE |
> +            CPUID_DE | CPUID_FP87,
> +        .features[FEAT_1_ECX] =
> +            CPUID_EXT_F16C | CPUID_EXT_AVX | CPUID_EXT_XSAVE |
> +            CPUID_EXT_AES | CPUID_EXT_POPCNT | CPUID_EXT_SSE42 |
> +            CPUID_EXT_SSE41 | CPUID_EXT_CX16 | CPUID_EXT_FMA |
> +            CPUID_EXT_SSSE3 | CPUID_EXT_PCLMULQDQ | CPUID_EXT_SSE3,
> +        .features[FEAT_8000_0001_EDX] =
> +            CPUID_EXT2_LM | CPUID_EXT2_RDTSCP |
> +            CPUID_EXT2_PDPE1GB | CPUID_EXT2_FXSR | CPUID_EXT2_MMX |
> +            CPUID_EXT2_NX | CPUID_EXT2_PSE36 | CPUID_EXT2_PAT |
> +            CPUID_EXT2_CMOV | CPUID_EXT2_MCA | CPUID_EXT2_PGE |
> +            CPUID_EXT2_MTRR | CPUID_EXT2_SYSCALL | CPUID_EXT2_APIC |
> +            CPUID_EXT2_CX8 | CPUID_EXT2_MCE | CPUID_EXT2_PAE |
> CPUID_EXT2_MSR |
> +            CPUID_EXT2_TSC | CPUID_EXT2_PSE | CPUID_EXT2_DE |
> CPUID_EXT2_FPU,
> +        .features[FEAT_8000_0001_ECX] =
> +            CPUID_EXT3_TBM | CPUID_EXT3_FMA4 | CPUID_EXT3_XOP |
> +            CPUID_EXT3_3DNOWPREFETCH | CPUID_EXT3_MISALIGNSSE |
> +            CPUID_EXT3_SSE4A | CPUID_EXT3_ABM | CPUID_EXT3_SVM |
> +            CPUID_EXT3_LAHF_LM,
> +        /* no xsaveopt! */
> +        .features[FEAT_8000_0007_EBX] =
> +            CPUID_OVERFLOW_RECOV | CPUID_SUCCOR | CPUID_SMCA,
> +        .xlevel = 0x8000001A,
> +        .model_id = "AMD Zen CPU",
> +    },
> +
>  };
>  
>  typedef struct PropValue {
> @@ -2118,6 +2172,9 @@ static int x86_cpu_filter_features(X86CPU *cpu)
>      FeatureWord w;
>      int rv = 0;
>  
> +    if (!cpu->filter_cpuid)
> +        return 0;
> +
>      for (w = 0; w < FEATURE_WORDS; w++) {
>          uint32_t host_feat =
>              x86_cpu_get_supported_feature_word(w, cpu->migratable);
> @@ -2596,7 +2653,7 @@ void cpu_x86_cpuid(CPUX86State *env, uint32_t index,
> uint32_t count,
>          break;
>      case 0x80000007:
>          *eax = 0;
> -        *ebx = 0;
> +        *ebx = env->features[FEAT_8000_0007_EBX];
>          *ecx = 0;
>          *edx = env->features[FEAT_8000_0007_EDX];
>          break;
> @@ -3256,6 +3313,7 @@ static Property x86_cpu_properties[] = {
>      DEFINE_PROP_BOOL("hv-stimer", X86CPU, hyperv_stimer, false),
>      DEFINE_PROP_BOOL("check", X86CPU, check_cpuid, true),
>      DEFINE_PROP_BOOL("enforce", X86CPU, enforce_cpuid, false),
> +    DEFINE_PROP_BOOL("filter", X86CPU, filter_cpuid, false),
>      DEFINE_PROP_BOOL("kvm", X86CPU, expose_kvm, true),
>      DEFINE_PROP_UINT32("level", X86CPU, env.cpuid_level, 0),
>      DEFINE_PROP_UINT32("xlevel", X86CPU, env.cpuid_xlevel, 0),
> diff --git a/target-i386/cpu.h b/target-i386/cpu.h
> index 474b0b937d71..258c1b261cd2 100644
> --- a/target-i386/cpu.h
> +++ b/target-i386/cpu.h
> @@ -443,6 +443,7 @@ typedef enum FeatureWord {
>      FEAT_SVM,           /* CPUID[8000_000A].EDX */
>      FEAT_XSAVE,         /* CPUID[EAX=0xd,ECX=1].EAX */
>      FEAT_6_EAX,         /* CPUID[6].EAX */
> +    FEAT_8000_0007_EBX, /* CPUID[8000_0007].EBX */
>      FEATURE_WORDS,
>  } FeatureWord;
>  
> @@ -620,6 +621,11 @@ typedef uint32_t FeatureWordArray[FEATURE_WORDS];
>  #define CPUID_APM_INVTSC       (1U << 8)
>  
>  #define CPUID_VENDOR_SZ      12
> +/* CPUID[0x80000007].EBX flags: */
> +#define CPUID_OVERFLOW_RECOV   (1U << 0) /* MCA overflow recovery support */
> +#define CPUID_SUCCOR	       (1U << 1) /* Uncorrectable error containment and
> recovery */
> +#define CPUID_SMCA	       (1U << 3) /* Scalable MCA */
> +
>  
>  #define CPUID_VENDOR_INTEL_1 0x756e6547 /* "Genu" */
>  #define CPUID_VENDOR_INTEL_2 0x49656e69 /* "ineI" */
> @@ -1160,6 +1166,7 @@ struct X86CPU {
>      bool hyperv_stimer;
>      bool check_cpuid;
>      bool enforce_cpuid;
> +    bool filter_cpuid;
>      bool expose_kvm;
>      bool migratable;
>      bool host_features;
> --
> 2.7.3
> 
> --
> Regards/Gruss,
>     Boris.
> 
> ECO tip #101: Trim your mails when you reply.
> 
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Eduardo Habkost July 7, 2016, 4:27 p.m. UTC | #2
On Thu, Jul 07, 2016 at 06:01:46PM +0200, Borislav Petkov wrote:
> On Thu, Jul 07, 2016 at 03:16:21PM +0200, Paolo Bonzini wrote:
> > Eduardo is the one to answer, but usually we add features to QEMU 
> > before the processors are released (typically as soon as KVM supports 
> > them).  So with a new enough QEMU this in theory should not be 
> > necessary.
> > 
> > Adding a new feature that's not in a CPU model and that's not 
> > associated to new state is really trivial:
> 
> Cool.
> 
> Btw, how about something like this?
> 
> Specifically, I'd like to test RAS features on the new upcoming AMD
> Zen CPU and I've defined one from the stuff we know so far from kernel
> patches.

You mean KVM kernel patches? I assume the features require
additional KVM code to support them in guests. In that case, why
wouldn't the kernel return them in GET_SUPPORTED_CPUID? Then you
won't need filter=off.

> 
> The "filter=off" thing I've added in case I want to disable
> x86_cpu_filter_features() but it works just fine without it when I boot
> with -cpu Zen. So I can remove it too.
> 
> Would something like that be acceptable?

About filter=off: not sure. Do we really have valid use cases to
enable a feature even if the kernel reports it as unsupported in
GET_SUPPORTED_CPUID?

Specifically about the feature names, I have some question below:

> 
> We can continue improving on this as features become known and even
> implement some functionality in qemu/kvm as time allows.
> 
> ---
> From: Borislav Petkov <bp@suse.de>
> Date: Tue, 5 Jul 2016 16:12:18 +0200
> Subject: [PATCH] Zen emu: first working version
> 
> Boot with "-c Zen,filter=off" to disable CPUID bits filtering.
> 
> Signed-off-by: Borislav Petkov <bp@suse.de>
> ---
>  target-i386/cpu.c | 60 ++++++++++++++++++++++++++++++++++++++++++++++++++++++-
>  target-i386/cpu.h |  7 +++++++
>  2 files changed, 66 insertions(+), 1 deletion(-)
> 
> diff --git a/target-i386/cpu.c b/target-i386/cpu.c
> index 3bd3cfc3ad16..cc9c97457387 100644
> --- a/target-i386/cpu.c
> +++ b/target-i386/cpu.c
> @@ -307,6 +307,17 @@ static const char *cpuid_6_feature_name[] = {
>      NULL, NULL, NULL, NULL,
>  };
>  
> +static const char *smca_feature_name[] = {
> +    "overflow_recov", "succor", NULL, "smca",

Do those features introduce additional state that need migration
support? If they do, you need to add them to
feature_word_info[FEAT_8000_0007_EBX].unmigratable_flags until
migration support is implemented.


> +    NULL, NULL, NULL, NULL,
> +    NULL, NULL, NULL, NULL,
> +    NULL, NULL, NULL, NULL,
> +    NULL, NULL, NULL, NULL,
> +    NULL, NULL, NULL, NULL,
> +    NULL, NULL, NULL, NULL,
> +    NULL, NULL, NULL, NULL,
> +};
> +
>  #define I486_FEATURES (CPUID_FP87 | CPUID_VME | CPUID_PSE)
>  #define PENTIUM_FEATURES (I486_FEATURES | CPUID_DE | CPUID_TSC | \
>            CPUID_MSR | CPUID_MCE | CPUID_CX8 | CPUID_MMX | CPUID_APIC)
> @@ -449,6 +460,11 @@ static FeatureWordInfo feature_word_info[FEATURE_WORDS] = {
>          .cpuid_eax = 6, .cpuid_reg = R_EAX,
>          .tcg_features = TCG_6_EAX_FEATURES,
>      },
> +    [FEAT_8000_0007_EBX] = {
> +	.feat_names = smca_feature_name,
> +	.cpuid_eax = 0x80000007,
> +	.cpuid_reg = R_EBX,
> +    },
>  };
>  
>  typedef struct X86RegisterInfo32 {
[...]
Borislav Petkov July 7, 2016, 5:04 p.m. UTC | #3
On Thu, Jul 07, 2016 at 01:27:55PM -0300, Eduardo Habkost wrote:
> You mean KVM kernel patches?

No, other ones. Here's one example:

https://lkml.kernel.org/r/1467633035-32080-2-git-send-email-Yazen.Ghannam@amd.com

> I assume the features require additional KVM code to support them
> in guests. In that case, why wouldn't the kernel return them in
> GET_SUPPORTED_CPUID? Then you won't need filter=off.

Yeah, so in most cases they will need additional KVM code to enable
them. More often than not, this is not always at the top of the TODO
list of people so ...

That's why I did the quick thing of smoke-testing them by enabling only
CPUID bits and the filter=off thing.

Would it be nicer to see them actually implemented in qemu/kvm?
Definitely.

> About filter=off: not sure. Do we really have valid use cases to
> enable a feature even if the kernel reports it as unsupported in
> GET_SUPPORTED_CPUID?

Yeah, as said above, the filter=off thing was a dirty hack just to stop
x86_cpu_filter_features() from checking whether the host supports them
or not.

> Do those features introduce additional state that need migration
> support? If they do, you need to add them to
> feature_word_info[FEAT_8000_0007_EBX].unmigratable_flags until
> migration support is implemented.

I'm afraid you'd need to explain migration support to me: is the
question whether migrating the guest to an Intel platform and whether
the features would still work?

Because those three above are AMD-only and they won't work on an Intel
platform.

And if so, I'm guessing they should always remain unmigratable.

Which is not a problem as there are Intel features which are not present
on AMD so...

Thanks!
Eduardo Habkost July 7, 2016, 5:43 p.m. UTC | #4
On Thu, Jul 07, 2016 at 07:04:42PM +0200, Borislav Petkov wrote:
> On Thu, Jul 07, 2016 at 01:27:55PM -0300, Eduardo Habkost wrote:
> > You mean KVM kernel patches?
> 
> No, other ones. Here's one example:
> 
> https://lkml.kernel.org/r/1467633035-32080-2-git-send-email-Yazen.Ghannam@amd.com
> 
> > I assume the features require additional KVM code to support them
> > in guests. In that case, why wouldn't the kernel return them in
> > GET_SUPPORTED_CPUID? Then you won't need filter=off.
> 
> Yeah, so in most cases they will need additional KVM code to enable
> them. More often than not, this is not always at the top of the TODO
> list of people so ...
> 
> That's why I did the quick thing of smoke-testing them by enabling only
> CPUID bits and the filter=off thing.
> 
> Would it be nicer to see them actually implemented in qemu/kvm?
> Definitely.
> 
> > About filter=off: not sure. Do we really have valid use cases to
> > enable a feature even if the kernel reports it as unsupported in
> > GET_SUPPORTED_CPUID?
> 
> Yeah, as said above, the filter=off thing was a dirty hack just to stop
> x86_cpu_filter_features() from checking whether the host supports them
> or not.

I see. If you have an useful use case for it, we may consider
that. But first I would like to see an actual case where a
feature was not added to GET_SUPPORTED_CPUID yet, but would not
crash and burn if forcibly enabled by QEMU.

> 
> > Do those features introduce additional state that need migration
> > support? If they do, you need to add them to
> > feature_word_info[FEAT_8000_0007_EBX].unmigratable_flags until
> > migration support is implemented.
> 
> I'm afraid you'd need to explain migration support to me: is the
> question whether migrating the guest to an Intel platform and whether
> the features would still work?
> 
> Because those three above are AMD-only and they won't work on an Intel
> platform.
> 
> And if so, I'm guessing they should always remain unmigratable.
> 
> Which is not a problem as there are Intel features which are not present
> on AMD so...

I mean live migration to a different host (that normally has the
same CPU vendor). When you live-migrate or use savevm, you need
to send the machine state to the other host. This is implemented
using VMStateDescription structs describing the data to be
migrated. See vmstate_x86_cpu in target-i386/machine.c, for
example.

You need additional migration sections if the feature introduces
additional state (e.g. CPU registers) that need to be migrated
too, to keep the feature working. If there's new state but no
migration support is implemented yet, you need to add the feature
to unmigratable_flags.

For an example where no additional state is introduced by new
features, see:

Author: Xiao Guangrong <guangrong.xiao@linux.intel.com>
Date:   Thu Oct 29 15:31:39 2015 +0800

    target-i386: Enable clflushopt/clwb/pcommit instructions
    
    These instructions are used by NVDIMM drivers and the specification is
    located at:
    https://software.intel.com/sites/default/files/managed/0d/53/319433-022.pdf
    
    There instructions are available on Skylake Server.
    
    Signed-off-by: Xiao Guangrong <guangrong.xiao@linux.intel.com>
    Reviewed-by: Richard Henderson <rth@twiddle.net>
    Signed-off-by: Eduardo Habkost <ehabkost@redhat.com>

For an example where additional state is introduced by a CPU
feature and migration support was implemented, see:

commit f74eefe0b98cd7e13825de8e8d9f32e22aed102c
Author: Huaitong Han <huaitong.han@intel.com>
Date:   Wed Nov 18 10:20:15 2015 +0800

    target-i386: Add PKU and and OSPKE support
    
    Add PKU and OSPKE CPUID features, including xsave state and
    migration support.
    
    Signed-off-by: Huaitong Han <huaitong.han@intel.com>
    Reviewed-by: Eduardo Habkost <ehabkost@redhat.com>
    [ehabkost: squashed 3 patches together, edited patch description]
    Signed-off-by: Eduardo Habkost <ehabkost@redhat.com>

For an example where a feature was added without required
migration code and was added to unmigratable_flags, see:

commit 0bb0b2d2fe7f645ddaf1f0ff40ac669c9feb4aa1
Author: Paolo Bonzini <pbonzini@redhat.com>
Date:   Mon Nov 24 15:54:43 2014 +0100

    target-i386: add feature flags for CPUID[EAX=0xd,ECX=1]
    
    These represent xsave-related capabilities of the processor, and KVM may
    or may not support them.
    
    Add feature bits so that they are considered by "-cpu ...,enforce", and use
    the new feature work instead of calling kvm_arch_get_supported_cpuid.
    
    Bit 3 (XSAVES) is not migratables because it requires saving MSR_IA32_XSS.
    Neither KVM nor any commonly available hardware supports it anyway.
    
    Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Borislav Petkov July 8, 2016, 11:09 a.m. UTC | #5
On Thu, Jul 07, 2016 at 02:43:49PM -0300, Eduardo Habkost wrote:
> I see. If you have an useful use case for it, we may consider
> that. But first I would like to see an actual case where a
> feature was not added to GET_SUPPORTED_CPUID yet, but would not
> crash and burn if forcibly enabled by QEMU.

Ok.

> I mean live migration to a different host (that normally has the
> same CPU vendor). When you live-migrate or use savevm, you need
> to send the machine state to the other host. This is implemented
> using VMStateDescription structs describing the data to be
> migrated. See vmstate_x86_cpu in target-i386/machine.c, for
> example.
> 
> You need additional migration sections if the feature introduces
> additional state (e.g. CPU registers) that need to be migrated
> too, to keep the feature working. If there's new state but no
> migration support is implemented yet, you need to add the feature
> to unmigratable_flags.
> 
> For an example where no additional state is introduced by new
> features, see:

Thanks for the examples and the explanation - I see the deal now.

Ok, I'll go through the features and see what kind of state the kernel
programs in there and add them to a VMStateDescription thing. Hohumm,
makes sense to me.

Thanks.
Paolo Bonzini July 8, 2016, 11:15 a.m. UTC | #6
> Ok, I'll go through the features and see what kind of state the kernel
> programs in there and add them to a VMStateDescription thing. Hohumm,
> makes sense to me.

It does sometimes happen that there is no state.  For example it could be
an MSR that we are already getting in and out of KVM.  However, it is way
more common that you have to add support for reading/writing the MSR in
KVM as well, and then teach QEMU's target-i386/kvm.c about it as well.

It's hard to say without knowing exactly what the feature is about...
Is there an architecture manual out there that documents it?

Paolo
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Borislav Petkov July 8, 2016, 12:55 p.m. UTC | #7
On Fri, Jul 08, 2016 at 07:15:39AM -0400, Paolo Bonzini wrote:
> It does sometimes happen that there is no state.  For example it could be
> an MSR that we are already getting in and out of KVM.

Right.

> However, it is way more common that you have to add support for
> reading/writing the MSR in KVM as well, and then teach QEMU's
> target-i386/kvm.c about it as well.
>
> It's hard to say without knowing exactly what the feature is about...
> Is there an architecture manual out there that documents it?

Maybe section 2.16 here:
http://support.amd.com/TechDocs/50742_15h_Models_60h-6Fh_BKDG.pdf

In any case, here are two bit definitions:

1	SUCCOR: Software uncorrectable error containment and recovery
	capability. Value: 1. 1=The processor supports software containment of
	uncorrectable errors through context synchronizing data poisoning
	and deferred error interrupts; see 2.16.1.10 [Deferred Errors and Data
	Poisoning]; MSR MSRC000_0410 [Machine Check Deferred Error Configuration
	(CU_DEFER_ERR)] exists.

0	McaOverflowRecov: MCA overflow recovery support. Value: 1. 1=MCA
	overflow conditions (MCi_STATUS[Overflow]=1) are not fatal; software
	may safely ignore such conditions. 0=MCA overflow conditions require
	software to shut down the system. See 2.16.1.6 [Handling Machine Check
	Exceptions].

So AFAICT the McaOverflowRecov thing should be the easiest by making
sure MCi_STATUS[Overflow]=1 is set properly when MCEs happen.

The SUCCOR thing needs data poisoning and deferred error interrupts and
that's a lot more involved than the overflow handling. And we'll need to
touch a lot more places. But it doesn't hurt to start looking at them at
least.

Bottom line is, the more RAS features we could test with qemu/kvm the
better because generating those error conditions on a real system is
very very hard and sometimes even impossible. Especially if you try to
inject an error but then the BIOS facility which does that is b0rked
because vendor forgot it. Crap like that.

I'll do some looking into all that when I get free moments, who knows,
we might get something going...

Thanks.
diff mbox

Patch

diff --git a/target-i386/cpu.c b/target-i386/cpu.c
index 3bd3cfc3ad16..cc9c97457387 100644
--- a/target-i386/cpu.c
+++ b/target-i386/cpu.c
@@ -307,6 +307,17 @@  static const char *cpuid_6_feature_name[] = {
     NULL, NULL, NULL, NULL,
 };
 
+static const char *smca_feature_name[] = {
+    "overflow_recov", "succor", NULL, "smca",
+    NULL, NULL, NULL, NULL,
+    NULL, NULL, NULL, NULL,
+    NULL, NULL, NULL, NULL,
+    NULL, NULL, NULL, NULL,
+    NULL, NULL, NULL, NULL,
+    NULL, NULL, NULL, NULL,
+    NULL, NULL, NULL, NULL,
+};
+
 #define I486_FEATURES (CPUID_FP87 | CPUID_VME | CPUID_PSE)
 #define PENTIUM_FEATURES (I486_FEATURES | CPUID_DE | CPUID_TSC | \
           CPUID_MSR | CPUID_MCE | CPUID_CX8 | CPUID_MMX | CPUID_APIC)
@@ -449,6 +460,11 @@  static FeatureWordInfo feature_word_info[FEATURE_WORDS] = {
         .cpuid_eax = 6, .cpuid_reg = R_EAX,
         .tcg_features = TCG_6_EAX_FEATURES,
     },
+    [FEAT_8000_0007_EBX] = {
+	.feat_names = smca_feature_name,
+	.cpuid_eax = 0x80000007,
+	.cpuid_reg = R_EBX,
+    },
 };
 
 typedef struct X86RegisterInfo32 {
@@ -1449,6 +1465,44 @@  static X86CPUDefinition builtin_x86_defs[] = {
         .xlevel = 0x8000001A,
         .model_id = "AMD Opteron 63xx class CPU",
     },
+    {
+        .name = "Zen",
+        .level = 0xd,
+        .vendor = CPUID_VENDOR_AMD,
+        .family = 23,
+        .model = 0,
+        .stepping = 0,
+        .features[FEAT_1_EDX] =
+            CPUID_VME | CPUID_SSE2 | CPUID_SSE | CPUID_FXSR | CPUID_MMX |
+            CPUID_CLFLUSH | CPUID_PSE36 | CPUID_PAT | CPUID_CMOV | CPUID_MCA |
+            CPUID_PGE | CPUID_MTRR | CPUID_SEP | CPUID_APIC | CPUID_CX8 |
+            CPUID_MCE | CPUID_PAE | CPUID_MSR | CPUID_TSC | CPUID_PSE |
+            CPUID_DE | CPUID_FP87,
+        .features[FEAT_1_ECX] =
+            CPUID_EXT_F16C | CPUID_EXT_AVX | CPUID_EXT_XSAVE |
+            CPUID_EXT_AES | CPUID_EXT_POPCNT | CPUID_EXT_SSE42 |
+            CPUID_EXT_SSE41 | CPUID_EXT_CX16 | CPUID_EXT_FMA |
+            CPUID_EXT_SSSE3 | CPUID_EXT_PCLMULQDQ | CPUID_EXT_SSE3,
+        .features[FEAT_8000_0001_EDX] =
+            CPUID_EXT2_LM | CPUID_EXT2_RDTSCP |
+            CPUID_EXT2_PDPE1GB | CPUID_EXT2_FXSR | CPUID_EXT2_MMX |
+            CPUID_EXT2_NX | CPUID_EXT2_PSE36 | CPUID_EXT2_PAT |
+            CPUID_EXT2_CMOV | CPUID_EXT2_MCA | CPUID_EXT2_PGE |
+            CPUID_EXT2_MTRR | CPUID_EXT2_SYSCALL | CPUID_EXT2_APIC |
+            CPUID_EXT2_CX8 | CPUID_EXT2_MCE | CPUID_EXT2_PAE | CPUID_EXT2_MSR |
+            CPUID_EXT2_TSC | CPUID_EXT2_PSE | CPUID_EXT2_DE | CPUID_EXT2_FPU,
+        .features[FEAT_8000_0001_ECX] =
+            CPUID_EXT3_TBM | CPUID_EXT3_FMA4 | CPUID_EXT3_XOP |
+            CPUID_EXT3_3DNOWPREFETCH | CPUID_EXT3_MISALIGNSSE |
+            CPUID_EXT3_SSE4A | CPUID_EXT3_ABM | CPUID_EXT3_SVM |
+            CPUID_EXT3_LAHF_LM,
+        /* no xsaveopt! */
+        .features[FEAT_8000_0007_EBX] =
+            CPUID_OVERFLOW_RECOV | CPUID_SUCCOR | CPUID_SMCA,
+        .xlevel = 0x8000001A,
+        .model_id = "AMD Zen CPU",
+    },
+
 };
 
 typedef struct PropValue {
@@ -2118,6 +2172,9 @@  static int x86_cpu_filter_features(X86CPU *cpu)
     FeatureWord w;
     int rv = 0;
 
+    if (!cpu->filter_cpuid)
+        return 0;
+
     for (w = 0; w < FEATURE_WORDS; w++) {
         uint32_t host_feat =
             x86_cpu_get_supported_feature_word(w, cpu->migratable);
@@ -2596,7 +2653,7 @@  void cpu_x86_cpuid(CPUX86State *env, uint32_t index, uint32_t count,
         break;
     case 0x80000007:
         *eax = 0;
-        *ebx = 0;
+        *ebx = env->features[FEAT_8000_0007_EBX];
         *ecx = 0;
         *edx = env->features[FEAT_8000_0007_EDX];
         break;
@@ -3256,6 +3313,7 @@  static Property x86_cpu_properties[] = {
     DEFINE_PROP_BOOL("hv-stimer", X86CPU, hyperv_stimer, false),
     DEFINE_PROP_BOOL("check", X86CPU, check_cpuid, true),
     DEFINE_PROP_BOOL("enforce", X86CPU, enforce_cpuid, false),
+    DEFINE_PROP_BOOL("filter", X86CPU, filter_cpuid, false),
     DEFINE_PROP_BOOL("kvm", X86CPU, expose_kvm, true),
     DEFINE_PROP_UINT32("level", X86CPU, env.cpuid_level, 0),
     DEFINE_PROP_UINT32("xlevel", X86CPU, env.cpuid_xlevel, 0),
diff --git a/target-i386/cpu.h b/target-i386/cpu.h
index 474b0b937d71..258c1b261cd2 100644
--- a/target-i386/cpu.h
+++ b/target-i386/cpu.h
@@ -443,6 +443,7 @@  typedef enum FeatureWord {
     FEAT_SVM,           /* CPUID[8000_000A].EDX */
     FEAT_XSAVE,         /* CPUID[EAX=0xd,ECX=1].EAX */
     FEAT_6_EAX,         /* CPUID[6].EAX */
+    FEAT_8000_0007_EBX, /* CPUID[8000_0007].EBX */
     FEATURE_WORDS,
 } FeatureWord;
 
@@ -620,6 +621,11 @@  typedef uint32_t FeatureWordArray[FEATURE_WORDS];
 #define CPUID_APM_INVTSC       (1U << 8)
 
 #define CPUID_VENDOR_SZ      12
+/* CPUID[0x80000007].EBX flags: */
+#define CPUID_OVERFLOW_RECOV   (1U << 0) /* MCA overflow recovery support */
+#define CPUID_SUCCOR	       (1U << 1) /* Uncorrectable error containment and recovery */
+#define CPUID_SMCA	       (1U << 3) /* Scalable MCA */
+
 
 #define CPUID_VENDOR_INTEL_1 0x756e6547 /* "Genu" */
 #define CPUID_VENDOR_INTEL_2 0x49656e69 /* "ineI" */
@@ -1160,6 +1166,7 @@  struct X86CPU {
     bool hyperv_stimer;
     bool check_cpuid;
     bool enforce_cpuid;
+    bool filter_cpuid;
     bool expose_kvm;
     bool migratable;
     bool host_features;