diff mbox

[v4,2/3] target-i386: add migration support for Intel LMCE

Message ID 20160616060621.30422-3-haozhong.zhang@intel.com (mailing list archive)
State New, archived
Headers show

Commit Message

Haozhong Zhang June 16, 2016, 6:06 a.m. UTC
Migration is only allowed between VCPUs with the same lmce option.

Signed-off-by: Haozhong Zhang <haozhong.zhang@intel.com>
---
 target-i386/machine.c | 25 +++++++++++++++++++++++++
 1 file changed, 25 insertions(+)

Comments

Paolo Bonzini June 16, 2016, 9:51 a.m. UTC | #1
On 16/06/2016 08:06, Haozhong Zhang wrote:
> Migration is only allowed between VCPUs with the same lmce option.
> 
> Signed-off-by: Haozhong Zhang <haozhong.zhang@intel.com>
> ---
>  target-i386/machine.c | 25 +++++++++++++++++++++++++
>  1 file changed, 25 insertions(+)
> 
> diff --git a/target-i386/machine.c b/target-i386/machine.c
> index cb9adf2..00375a3 100644
> --- a/target-i386/machine.c
> +++ b/target-i386/machine.c
> @@ -347,6 +347,12 @@ static int cpu_post_load(void *opaque, int version_id)
>          return -EINVAL;
>      }
>  
> +    if (!cpu->enable_lmce && (env->mcg_cap & MCG_LMCE_P)) {
> +        error_report("Config mismatch: VCPU has LMCE enabled, "
> +                     "but \"lmce\" option is disabled");
> +        return -EINVAL;
> +    }
> +

I think this is unnecessary.  Apart from this, the patch is good and can
be squashed in patch 1 for v5.

Paolo

>      /*
>       * Real mode guest segments register DPL should be zero.
>       * Older KVM version were setting it wrongly.
> @@ -896,6 +902,24 @@ static const VMStateDescription vmstate_tsc_khz = {
>      }
>  };
>  
> +static bool mcg_ext_ctl_needed(void *opaque)
> +{
> +    X86CPU *cpu = opaque;
> +    CPUX86State *env = &cpu->env;
> +    return cpu->enable_lmce && env->mcg_ext_ctl;
> +}
> +
> +static const VMStateDescription vmstate_mcg_ext_ctl = {
> +    .name = "cpu/mcg_ext_ctl",
> +    .version_id = 1,
> +    .minimum_version_id = 1,
> +    .needed = mcg_ext_ctl_needed,
> +    .fields = (VMStateField[]) {
> +        VMSTATE_UINT64(env.mcg_ext_ctl, X86CPU),
> +        VMSTATE_END_OF_LIST()
> +    }
> +};
> +
>  VMStateDescription vmstate_x86_cpu = {
>      .name = "cpu",
>      .version_id = 12,
> @@ -1022,6 +1046,7 @@ VMStateDescription vmstate_x86_cpu = {
>  #ifdef TARGET_X86_64
>          &vmstate_pkru,
>  #endif
> +        &vmstate_mcg_ext_ctl,
>          NULL
>      }
>  };
> 
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Haozhong Zhang June 16, 2016, 10:29 a.m. UTC | #2
On 06/16/16 11:51, Paolo Bonzini wrote:
> 
> 
> On 16/06/2016 08:06, Haozhong Zhang wrote:
> > Migration is only allowed between VCPUs with the same lmce option.
> > 
> > Signed-off-by: Haozhong Zhang <haozhong.zhang@intel.com>
> > ---
> >  target-i386/machine.c | 25 +++++++++++++++++++++++++
> >  1 file changed, 25 insertions(+)
> > 
> > diff --git a/target-i386/machine.c b/target-i386/machine.c
> > index cb9adf2..00375a3 100644
> > --- a/target-i386/machine.c
> > +++ b/target-i386/machine.c
> > @@ -347,6 +347,12 @@ static int cpu_post_load(void *opaque, int version_id)
> >          return -EINVAL;
> >      }
> >  
> > +    if (!cpu->enable_lmce && (env->mcg_cap & MCG_LMCE_P)) {
> > +        error_report("Config mismatch: VCPU has LMCE enabled, "
> > +                     "but \"lmce\" option is disabled");
> > +        return -EINVAL;
> > +    }
> > +
> 
> I think this is unnecessary.  Apart from this, the patch is good and can
> be squashed in patch 1 for v5.
>

Without this check, the migration from LMCE enabled QEMU to LMCE
disabled QEMU will not fail. Is such configuration change considered
be error? If not, I will remove the error report and return, but add a
fix to remove MCG_LMCE_P from env->mcg_cap in this check.

Haozhong

> 
> >      /*
> >       * Real mode guest segments register DPL should be zero.
> >       * Older KVM version were setting it wrongly.
> > @@ -896,6 +902,24 @@ static const VMStateDescription vmstate_tsc_khz = {
> >      }
> >  };
> >  
> > +static bool mcg_ext_ctl_needed(void *opaque)
> > +{
> > +    X86CPU *cpu = opaque;
> > +    CPUX86State *env = &cpu->env;
> > +    return cpu->enable_lmce && env->mcg_ext_ctl;
> > +}
> > +
> > +static const VMStateDescription vmstate_mcg_ext_ctl = {
> > +    .name = "cpu/mcg_ext_ctl",
> > +    .version_id = 1,
> > +    .minimum_version_id = 1,
> > +    .needed = mcg_ext_ctl_needed,
> > +    .fields = (VMStateField[]) {
> > +        VMSTATE_UINT64(env.mcg_ext_ctl, X86CPU),
> > +        VMSTATE_END_OF_LIST()
> > +    }
> > +};
> > +
> >  VMStateDescription vmstate_x86_cpu = {
> >      .name = "cpu",
> >      .version_id = 12,
> > @@ -1022,6 +1046,7 @@ VMStateDescription vmstate_x86_cpu = {
> >  #ifdef TARGET_X86_64
> >          &vmstate_pkru,
> >  #endif
> > +        &vmstate_mcg_ext_ctl,
> >          NULL
> >      }
> >  };
> > 
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Paolo Bonzini June 16, 2016, 10:41 a.m. UTC | #3
On 16/06/2016 12:29, Haozhong Zhang wrote:
> On 06/16/16 11:51, Paolo Bonzini wrote:
>>
>>
>> On 16/06/2016 08:06, Haozhong Zhang wrote:
>>> Migration is only allowed between VCPUs with the same lmce option.
>>>
>>> Signed-off-by: Haozhong Zhang <haozhong.zhang@intel.com>
>>> ---
>>>  target-i386/machine.c | 25 +++++++++++++++++++++++++
>>>  1 file changed, 25 insertions(+)
>>>
>>> diff --git a/target-i386/machine.c b/target-i386/machine.c
>>> index cb9adf2..00375a3 100644
>>> --- a/target-i386/machine.c
>>> +++ b/target-i386/machine.c
>>> @@ -347,6 +347,12 @@ static int cpu_post_load(void *opaque, int version_id)
>>>          return -EINVAL;
>>>      }
>>>  
>>> +    if (!cpu->enable_lmce && (env->mcg_cap & MCG_LMCE_P)) {
>>> +        error_report("Config mismatch: VCPU has LMCE enabled, "
>>> +                     "but \"lmce\" option is disabled");
>>> +        return -EINVAL;
>>> +    }
>>> +
>>
>> I think this is unnecessary.  Apart from this, the patch is good and can
>> be squashed in patch 1 for v5.
>>
> 
> Without this check, the migration from LMCE enabled QEMU to LMCE
> disabled QEMU will not fail. Is such configuration change considered
> be error? If not, I will remove the error report and return, but add a
> fix to remove MCG_LMCE_P from env->mcg_cap in this check.

It's considered a user error.  You can skip the "if" completely.

Paolo

> Haozhong
> 
>>
>>>      /*
>>>       * Real mode guest segments register DPL should be zero.
>>>       * Older KVM version were setting it wrongly.
>>> @@ -896,6 +902,24 @@ static const VMStateDescription vmstate_tsc_khz = {
>>>      }
>>>  };
>>>  
>>> +static bool mcg_ext_ctl_needed(void *opaque)
>>> +{
>>> +    X86CPU *cpu = opaque;
>>> +    CPUX86State *env = &cpu->env;
>>> +    return cpu->enable_lmce && env->mcg_ext_ctl;
>>> +}
>>> +
>>> +static const VMStateDescription vmstate_mcg_ext_ctl = {
>>> +    .name = "cpu/mcg_ext_ctl",
>>> +    .version_id = 1,
>>> +    .minimum_version_id = 1,
>>> +    .needed = mcg_ext_ctl_needed,
>>> +    .fields = (VMStateField[]) {
>>> +        VMSTATE_UINT64(env.mcg_ext_ctl, X86CPU),
>>> +        VMSTATE_END_OF_LIST()
>>> +    }
>>> +};
>>> +
>>>  VMStateDescription vmstate_x86_cpu = {
>>>      .name = "cpu",
>>>      .version_id = 12,
>>> @@ -1022,6 +1046,7 @@ VMStateDescription vmstate_x86_cpu = {
>>>  #ifdef TARGET_X86_64
>>>          &vmstate_pkru,
>>>  #endif
>>> +        &vmstate_mcg_ext_ctl,
>>>          NULL
>>>      }
>>>  };
>>>
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Haozhong Zhang June 16, 2016, 10:55 a.m. UTC | #4
On 06/16/16 12:41, Paolo Bonzini wrote:
> 
> 
> On 16/06/2016 12:29, Haozhong Zhang wrote:
> > On 06/16/16 11:51, Paolo Bonzini wrote:
> >>
> >>
> >> On 16/06/2016 08:06, Haozhong Zhang wrote:
> >>> Migration is only allowed between VCPUs with the same lmce option.
> >>>
> >>> Signed-off-by: Haozhong Zhang <haozhong.zhang@intel.com>
> >>> ---
> >>>  target-i386/machine.c | 25 +++++++++++++++++++++++++
> >>>  1 file changed, 25 insertions(+)
> >>>
> >>> diff --git a/target-i386/machine.c b/target-i386/machine.c
> >>> index cb9adf2..00375a3 100644
> >>> --- a/target-i386/machine.c
> >>> +++ b/target-i386/machine.c
> >>> @@ -347,6 +347,12 @@ static int cpu_post_load(void *opaque, int version_id)
> >>>          return -EINVAL;
> >>>      }
> >>>  
> >>> +    if (!cpu->enable_lmce && (env->mcg_cap & MCG_LMCE_P)) {
> >>> +        error_report("Config mismatch: VCPU has LMCE enabled, "
> >>> +                     "but \"lmce\" option is disabled");
> >>> +        return -EINVAL;
> >>> +    }
> >>> +
> >>
> >> I think this is unnecessary.  Apart from this, the patch is good and can
> >> be squashed in patch 1 for v5.
> >>
> > 
> > Without this check, the migration from LMCE enabled QEMU to LMCE
> > disabled QEMU will not fail. Is such configuration change considered
> > be error? If not, I will remove the error report and return, but add a
> > fix to remove MCG_LMCE_P from env->mcg_cap in this check.
> 
> It's considered a user error.  You can skip the "if" completely.
>

Eduardo said nice for this part in previous version [1], so we may wait
for his comments?

[1] http://lists.nongnu.org/archive/html/qemu-devel/2016-06/msg01992.html

Thanks,
Haozhong
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Eduardo Habkost June 16, 2016, 5:36 p.m. UTC | #5
On Thu, Jun 16, 2016 at 06:55:29PM +0800, Haozhong Zhang wrote:
> On 06/16/16 12:41, Paolo Bonzini wrote:
> > 
> > 
> > On 16/06/2016 12:29, Haozhong Zhang wrote:
> > > On 06/16/16 11:51, Paolo Bonzini wrote:
> > >>
> > >>
> > >> On 16/06/2016 08:06, Haozhong Zhang wrote:
> > >>> Migration is only allowed between VCPUs with the same lmce option.
> > >>>
> > >>> Signed-off-by: Haozhong Zhang <haozhong.zhang@intel.com>
> > >>> ---
> > >>>  target-i386/machine.c | 25 +++++++++++++++++++++++++
> > >>>  1 file changed, 25 insertions(+)
> > >>>
> > >>> diff --git a/target-i386/machine.c b/target-i386/machine.c
> > >>> index cb9adf2..00375a3 100644
> > >>> --- a/target-i386/machine.c
> > >>> +++ b/target-i386/machine.c
> > >>> @@ -347,6 +347,12 @@ static int cpu_post_load(void *opaque, int version_id)
> > >>>          return -EINVAL;
> > >>>      }
> > >>>  
> > >>> +    if (!cpu->enable_lmce && (env->mcg_cap & MCG_LMCE_P)) {
> > >>> +        error_report("Config mismatch: VCPU has LMCE enabled, "
> > >>> +                     "but \"lmce\" option is disabled");
> > >>> +        return -EINVAL;
> > >>> +    }
> > >>> +
> > >>
> > >> I think this is unnecessary.  Apart from this, the patch is good and can
> > >> be squashed in patch 1 for v5.
> > >>
> > > 
> > > Without this check, the migration from LMCE enabled QEMU to LMCE
> > > disabled QEMU will not fail. Is such configuration change considered
> > > be error? If not, I will remove the error report and return, but add a
> > > fix to remove MCG_LMCE_P from env->mcg_cap in this check.
> > 
> > It's considered a user error.  You can skip the "if" completely.
> >
> 
> Eduardo said nice for this part in previous version [1], so we may wait
> for his comments?
> 
> [1] http://lists.nongnu.org/archive/html/qemu-devel/2016-06/msg01992.html

I agree we don't need this check, but I still believe it is a
nice thing to have.

In addition to detecting user errors, they don't hurt and are
useful for things like "-cpu host", that don't guarantee
live-migration compatibility but still allow migration if you
ensure host capabilities are the same on both sides.

(I was going to suggest enabling lmce automatically on "-cpu
host" as a follow-up patch, BTW.)
Paolo Bonzini June 16, 2016, 5:40 p.m. UTC | #6
On 16/06/2016 19:36, Eduardo Habkost wrote:
>> > 
>> > Eduardo said nice for this part in previous version [1], so we may wait
>> > for his comments?
>> > 
>> > [1] http://lists.nongnu.org/archive/html/qemu-devel/2016-06/msg01992.html
> I agree we don't need this check, but I still believe it is a
> nice thing to have.
> 
> In addition to detecting user errors, they don't hurt and are
> useful for things like "-cpu host", that don't guarantee
> live-migration compatibility but still allow migration if you
> ensure host capabilities are the same on both sides.

On the other hand we don't check for this on any other property, either
CPU or device, do we?  Considering "lmce=on" always breaks on an old
kernel (i.e. there's no need for an explicit ",enforce" on the -cpu
flag), I think it's unnecessary and makes things inconsistent.

> (I was going to suggest enabling lmce automatically on "-cpu
> host" as a follow-up patch, BTW.)

Interesting.  Technically it comes from the host kernel, not from the
host CPU.  But it does sounds like a good idea; -cpu host pretty much
implies the same kernel (in addition to the same processor) on both
sides of the migration.

Paolo
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Eduardo Habkost June 16, 2016, 5:58 p.m. UTC | #7
On Thu, Jun 16, 2016 at 07:40:20PM +0200, Paolo Bonzini wrote:
> 
> 
> On 16/06/2016 19:36, Eduardo Habkost wrote:
> >> > 
> >> > Eduardo said nice for this part in previous version [1], so we may wait
> >> > for his comments?
> >> > 
> >> > [1] http://lists.nongnu.org/archive/html/qemu-devel/2016-06/msg01992.html
> > I agree we don't need this check, but I still believe it is a
> > nice thing to have.
> > 
> > In addition to detecting user errors, they don't hurt and are
> > useful for things like "-cpu host", that don't guarantee
> > live-migration compatibility but still allow migration if you
> > ensure host capabilities are the same on both sides.
> 
> On the other hand we don't check for this on any other property, either
> CPU or device, do we?  Considering "lmce=on" always breaks on an old
> kernel (i.e. there's no need for an explicit ",enforce" on the -cpu
> flag), I think it's unnecessary and makes things inconsistent.

We don't check that because we normally can't: we usually don't
send any configuration data (or anything that could be used to
detect configuration mismatches) to the destination. When we do,
it's often by accident.

In this case, it looks like we never needed to send mcg_cap in
the migration stream. But we already send it, so let's use it for
something useful.

I believe we should have more checks like these, when possible. I
have been planning for a while to send CPUID data in the
migration stream, to detect migration compatibility errors
(either user errors or QEMU bugs).

In theory, those checks should never be necessary. In practice I
believe they would be very useful.

> 
> > (I was going to suggest enabling lmce automatically on "-cpu
> > host" as a follow-up patch, BTW.)
> 
> Interesting.  Technically it comes from the host kernel, not from the
> host CPU.  But it does sounds like a good idea; -cpu host pretty much
> implies the same kernel (in addition to the same processor) on both
> sides of the migration.

"-cpu host" already means "whatever is allowed by the host [CPU
and/or kernel]", not just "host CPU". It enables x2apic on all
hosts, for example.
Eduardo Habkost June 16, 2016, 7:53 p.m. UTC | #8
On Thu, Jun 16, 2016 at 07:40:20PM +0200, Paolo Bonzini wrote:
> 
> 
> On 16/06/2016 19:36, Eduardo Habkost wrote:
> >> > 
> >> > Eduardo said nice for this part in previous version [1], so we may wait
> >> > for his comments?
> >> > 
> >> > [1] http://lists.nongnu.org/archive/html/qemu-devel/2016-06/msg01992.html
> > I agree we don't need this check, but I still believe it is a
> > nice thing to have.
> > 
> > In addition to detecting user errors, they don't hurt and are
> > useful for things like "-cpu host", that don't guarantee
> > live-migration compatibility but still allow migration if you
> > ensure host capabilities are the same on both sides.
> 
> On the other hand we don't check for this on any other property, either
> CPU or device, do we?  Considering "lmce=on" always breaks on an old
> kernel (i.e. there's no need for an explicit ",enforce" on the -cpu
> flag), I think it's unnecessary and makes things inconsistent.

BTW, just found another case where we check for migration mismatches: TSC
frequency.

    if (env->tsc_khz && env->user_tsc_khz &&
        env->tsc_khz != env->user_tsc_khz) {
        error_report("Mismatch between user-specified TSC frequency and "
                     "migrated TSC frequency");
        return -EINVAL;
    }

We can do that because tsc_khz is unusual like mcg_cap: it can be
configured by the user but is also included in the migration
stream.
Haozhong Zhang June 17, 2016, 2:01 a.m. UTC | #9
On 06/16/16 14:58, Eduardo Habkost wrote:
> On Thu, Jun 16, 2016 at 07:40:20PM +0200, Paolo Bonzini wrote:
> > 
> > 
> > On 16/06/2016 19:36, Eduardo Habkost wrote:
> > >> > 
> > >> > Eduardo said nice for this part in previous version [1], so we may wait
> > >> > for his comments?
> > >> > 
> > >> > [1] http://lists.nongnu.org/archive/html/qemu-devel/2016-06/msg01992.html
> > > I agree we don't need this check, but I still believe it is a
> > > nice thing to have.
> > > 
> > > In addition to detecting user errors, they don't hurt and are
> > > useful for things like "-cpu host", that don't guarantee
> > > live-migration compatibility but still allow migration if you
> > > ensure host capabilities are the same on both sides.
> > 
> > On the other hand we don't check for this on any other property, either
> > CPU or device, do we?  Considering "lmce=on" always breaks on an old
> > kernel (i.e. there's no need for an explicit ",enforce" on the -cpu
> > flag), I think it's unnecessary and makes things inconsistent.
> 
> We don't check that because we normally can't: we usually don't
> send any configuration data (or anything that could be used to
> detect configuration mismatches) to the destination. When we do,
> it's often by accident.
> 
> In this case, it looks like we never needed to send mcg_cap in
> the migration stream. But we already send it, so let's use it for
> something useful.
> 
> I believe we should have more checks like these, when possible. I
> have been planning for a while to send CPUID data in the
> migration stream, to detect migration compatibility errors
> (either user errors or QEMU bugs).
> 
> In theory, those checks should never be necessary. In practice I
> believe they would be very useful.
>

Hi Eduardo and Paolo,

What will be the conclusion? Do we still need this check?

I'm fine to remove this check if we normally didn't make such kind of
checks and require users to avoid configuration mismatch.

> > 
> > > (I was going to suggest enabling lmce automatically on "-cpu
> > > host" as a follow-up patch, BTW.)
> > 
> > Interesting.  Technically it comes from the host kernel, not from the
> > host CPU.  But it does sounds like a good idea; -cpu host pretty much
> > implies the same kernel (in addition to the same processor) on both
> > sides of the migration.
> 
> "-cpu host" already means "whatever is allowed by the host [CPU
> and/or kernel]", not just "host CPU". It enables x2apic on all
> hosts, for example.
>

Does that mean we can automatically enable LMCE for "-cpu host"?

Thanks,
Haozhong
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Eduardo Habkost June 17, 2016, 5:20 p.m. UTC | #10
On Fri, Jun 17, 2016 at 10:01:05AM +0800, Haozhong Zhang wrote:
> On 06/16/16 14:58, Eduardo Habkost wrote:
> > On Thu, Jun 16, 2016 at 07:40:20PM +0200, Paolo Bonzini wrote:
> > > 
> > > 
> > > On 16/06/2016 19:36, Eduardo Habkost wrote:
> > > >> > 
> > > >> > Eduardo said nice for this part in previous version [1], so we may wait
> > > >> > for his comments?
> > > >> > 
> > > >> > [1] http://lists.nongnu.org/archive/html/qemu-devel/2016-06/msg01992.html
> > > > I agree we don't need this check, but I still believe it is a
> > > > nice thing to have.
> > > > 
> > > > In addition to detecting user errors, they don't hurt and are
> > > > useful for things like "-cpu host", that don't guarantee
> > > > live-migration compatibility but still allow migration if you
> > > > ensure host capabilities are the same on both sides.
> > > 
> > > On the other hand we don't check for this on any other property, either
> > > CPU or device, do we?  Considering "lmce=on" always breaks on an old
> > > kernel (i.e. there's no need for an explicit ",enforce" on the -cpu
> > > flag), I think it's unnecessary and makes things inconsistent.
> > 
> > We don't check that because we normally can't: we usually don't
> > send any configuration data (or anything that could be used to
> > detect configuration mismatches) to the destination. When we do,
> > it's often by accident.
> > 
> > In this case, it looks like we never needed to send mcg_cap in
> > the migration stream. But we already send it, so let's use it for
> > something useful.
> > 
> > I believe we should have more checks like these, when possible. I
> > have been planning for a while to send CPUID data in the
> > migration stream, to detect migration compatibility errors
> > (either user errors or QEMU bugs).
> > 
> > In theory, those checks should never be necessary. In practice I
> > believe they would be very useful.
> >
> 
> Hi Eduardo and Paolo,
> 
> What will be the conclusion? Do we still need this check?
> 
> I'm fine to remove this check if we normally didn't make such kind of
> checks and require users to avoid configuration mismatch.

I don't know yet if Paolo is convinced that the check is still
useful. :)

I suggest doing it as a separate patch, so we can apply the rest
of the series now and discuss/apply the check later.

> 
> > > 
> > > > (I was going to suggest enabling lmce automatically on "-cpu
> > > > host" as a follow-up patch, BTW.)
> > > 
> > > Interesting.  Technically it comes from the host kernel, not from the
> > > host CPU.  But it does sounds like a good idea; -cpu host pretty much
> > > implies the same kernel (in addition to the same processor) on both
> > > sides of the migration.
> > 
> > "-cpu host" already means "whatever is allowed by the host [CPU
> > and/or kernel]", not just "host CPU". It enables x2apic on all
> > hosts, for example.
> >
> 
> Does that mean we can automatically enable LMCE for "-cpu host"?

We can automatically enable LMCE for "-cpu host" if and only if
the host kernel supports LMCE.
Paolo Bonzini June 17, 2016, 5:26 p.m. UTC | #11
On 17/06/2016 19:20, Eduardo Habkost wrote:
>> > 
>> > What will be the conclusion? Do we still need this check?
>> > 
>> > I'm fine to remove this check if we normally didn't make such kind of
>> > checks and require users to avoid configuration mismatch.
> 
> I don't know yet if Paolo is convinced that the check is still
> useful. :)

I'm not. :)

> > Does that mean we can automatically enable LMCE for "-cpu host"?
>
> We can automatically enable LMCE for "-cpu host" if and only if
> the host kernel supports LMCE.

Yes, I agree here.  It's a start.

Paolo
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Haozhong Zhang June 20, 2016, 2:11 a.m. UTC | #12
On 06/17/16 14:20, Eduardo Habkost wrote:
> On Fri, Jun 17, 2016 at 10:01:05AM +0800, Haozhong Zhang wrote:
> > On 06/16/16 14:58, Eduardo Habkost wrote:
> > > On Thu, Jun 16, 2016 at 07:40:20PM +0200, Paolo Bonzini wrote:
> > > > 
> > > > 
> > > > On 16/06/2016 19:36, Eduardo Habkost wrote:
> > > > >> > 
> > > > >> > Eduardo said nice for this part in previous version [1], so we may wait
> > > > >> > for his comments?
> > > > >> > 
> > > > >> > [1] http://lists.nongnu.org/archive/html/qemu-devel/2016-06/msg01992.html
> > > > > I agree we don't need this check, but I still believe it is a
> > > > > nice thing to have.
> > > > > 
> > > > > In addition to detecting user errors, they don't hurt and are
> > > > > useful for things like "-cpu host", that don't guarantee
> > > > > live-migration compatibility but still allow migration if you
> > > > > ensure host capabilities are the same on both sides.
> > > > 
> > > > On the other hand we don't check for this on any other property, either
> > > > CPU or device, do we?  Considering "lmce=on" always breaks on an old
> > > > kernel (i.e. there's no need for an explicit ",enforce" on the -cpu
> > > > flag), I think it's unnecessary and makes things inconsistent.
> > > 
> > > We don't check that because we normally can't: we usually don't
> > > send any configuration data (or anything that could be used to
> > > detect configuration mismatches) to the destination. When we do,
> > > it's often by accident.
> > > 
> > > In this case, it looks like we never needed to send mcg_cap in
> > > the migration stream. But we already send it, so let's use it for
> > > something useful.
> > > 
> > > I believe we should have more checks like these, when possible. I
> > > have been planning for a while to send CPUID data in the
> > > migration stream, to detect migration compatibility errors
> > > (either user errors or QEMU bugs).
> > > 
> > > In theory, those checks should never be necessary. In practice I
> > > believe they would be very useful.
> > >
> > 
> > Hi Eduardo and Paolo,
> > 
> > What will be the conclusion? Do we still need this check?
> > 
> > I'm fine to remove this check if we normally didn't make such kind of
> > checks and require users to avoid configuration mismatch.
> 
> I don't know yet if Paolo is convinced that the check is still
> useful. :)
> 
> I suggest doing it as a separate patch, so we can apply the rest
> of the series now and discuss/apply the check later.
>

Yes, I'll move the check to a separate patch so that we can easily
drop it if not necessary. Thanks for the suggestion!

> > 
> > > > 
> > > > > (I was going to suggest enabling lmce automatically on "-cpu
> > > > > host" as a follow-up patch, BTW.)
> > > > 
> > > > Interesting.  Technically it comes from the host kernel, not from the
> > > > host CPU.  But it does sounds like a good idea; -cpu host pretty much
> > > > implies the same kernel (in addition to the same processor) on both
> > > > sides of the migration.
> > > 
> > > "-cpu host" already means "whatever is allowed by the host [CPU
> > > and/or kernel]", not just "host CPU". It enables x2apic on all
> > > hosts, for example.
> > >
> > 
> > Does that mean we can automatically enable LMCE for "-cpu host"?
> 
> We can automatically enable LMCE for "-cpu host" if and only if
> the host kernel supports LMCE.
>

According to our discussion for KVM Patch 3, we may have to disable it
by default by -cpu host, so that pc-2.7 will not require new kernels
unless LMCE is required explicitly by users.

Thanks,
Haozhong
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Paolo Bonzini June 20, 2016, 6:58 a.m. UTC | #13
On 20/06/2016 04:11, Haozhong Zhang wrote:
>>> > > Does that mean we can automatically enable LMCE for "-cpu host"?
>> > 
>> > We can automatically enable LMCE for "-cpu host" if and only if
>> > the host kernel supports LMCE.
>> >
> According to our discussion for KVM Patch 3, we may have to disable it
> by default by -cpu host, so that pc-2.7 will not require new kernels
> unless LMCE is required explicitly by users.

-cpu host is a bit special, it requires the same processor and kernel on
both sides of a migration.  So it can enable LMCE.

Paolo
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Haozhong Zhang June 20, 2016, 7:26 a.m. UTC | #14
On 06/20/16 08:58, Paolo Bonzini wrote:
> 
> 
> On 20/06/2016 04:11, Haozhong Zhang wrote:
> >>> > > Does that mean we can automatically enable LMCE for "-cpu host"?
> >> > 
> >> > We can automatically enable LMCE for "-cpu host" if and only if
> >> > the host kernel supports LMCE.
> >> >
> > According to our discussion for KVM Patch 3, we may have to disable it
> > by default by -cpu host, so that pc-2.7 will not require new kernels
> > unless LMCE is required explicitly by users.
> 
> -cpu host is a bit special, it requires the same processor and kernel on
> both sides of a migration.  So it can enable LMCE.
> 

OK, I'll make a separate patch in the next version to enable LMCE for
-cpu host.

Thanks,
Haozhong
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
diff mbox

Patch

diff --git a/target-i386/machine.c b/target-i386/machine.c
index cb9adf2..00375a3 100644
--- a/target-i386/machine.c
+++ b/target-i386/machine.c
@@ -347,6 +347,12 @@  static int cpu_post_load(void *opaque, int version_id)
         return -EINVAL;
     }
 
+    if (!cpu->enable_lmce && (env->mcg_cap & MCG_LMCE_P)) {
+        error_report("Config mismatch: VCPU has LMCE enabled, "
+                     "but \"lmce\" option is disabled");
+        return -EINVAL;
+    }
+
     /*
      * Real mode guest segments register DPL should be zero.
      * Older KVM version were setting it wrongly.
@@ -896,6 +902,24 @@  static const VMStateDescription vmstate_tsc_khz = {
     }
 };
 
+static bool mcg_ext_ctl_needed(void *opaque)
+{
+    X86CPU *cpu = opaque;
+    CPUX86State *env = &cpu->env;
+    return cpu->enable_lmce && env->mcg_ext_ctl;
+}
+
+static const VMStateDescription vmstate_mcg_ext_ctl = {
+    .name = "cpu/mcg_ext_ctl",
+    .version_id = 1,
+    .minimum_version_id = 1,
+    .needed = mcg_ext_ctl_needed,
+    .fields = (VMStateField[]) {
+        VMSTATE_UINT64(env.mcg_ext_ctl, X86CPU),
+        VMSTATE_END_OF_LIST()
+    }
+};
+
 VMStateDescription vmstate_x86_cpu = {
     .name = "cpu",
     .version_id = 12,
@@ -1022,6 +1046,7 @@  VMStateDescription vmstate_x86_cpu = {
 #ifdef TARGET_X86_64
         &vmstate_pkru,
 #endif
+        &vmstate_mcg_ext_ctl,
         NULL
     }
 };