diff mbox series

[v4,1/5] x86/HVM: split restore state checking from state loading

Message ID 05e69eea-79cc-4b3c-861b-855fa67c878a@suse.com (mailing list archive)
State Superseded
Headers show
Series x86/HVM: load state checking | expand

Commit Message

Jan Beulich Dec. 18, 2023, 2:39 p.m. UTC
..., at least as reasonably feasible without making a check hook
mandatory (in particular strict vs relaxed/zero-extend length checking
can't be done early this way).

Note that only one of the two uses of "real" hvm_load() is accompanied
with a "checking" one. The other directly consumes hvm_save() output,
which ought to be well-formed. This means that while input data related
checks don't need repeating in the "load" function when already done by
the "check" one (albeit assertions to this effect may be desirable),
domain state related checks (e.g. has_xyz(d)) will be required in both
places.

Suggested-by: Roger Pau Monné <roger.pau@citrix.com>
Signed-off-by: Jan Beulich <jbeulich@suse.com>
---
Now that this re-arranges hvm_load() anyway, wouldn't it be better to
down the vCPU-s ahead of calling arch_hvm_load() (which is now easy to
arrange for)?

Do we really need all the copying involved in use of _hvm_read_entry()
(backing hvm_load_entry()? Zero-extending loads are likely easier to
handle that way, but for strict loads all we gain is a reduced risk of
unaligned accesses (compared to simply pointing into h->data[]).
---
v4: Fold hvm_check() into hvm_load().
v2: New.

Comments

Roger Pau Monné Dec. 19, 2023, 2:36 p.m. UTC | #1
On Mon, Dec 18, 2023 at 03:39:55PM +0100, Jan Beulich wrote:
> ..., at least as reasonably feasible without making a check hook
> mandatory (in particular strict vs relaxed/zero-extend length checking
> can't be done early this way).
> 
> Note that only one of the two uses of "real" hvm_load() is accompanied
> with a "checking" one. The other directly consumes hvm_save() output,
> which ought to be well-formed. This means that while input data related
> checks don't need repeating in the "load" function when already done by
> the "check" one (albeit assertions to this effect may be desirable),
> domain state related checks (e.g. has_xyz(d)) will be required in both
> places.
> 
> Suggested-by: Roger Pau Monné <roger.pau@citrix.com>
> Signed-off-by: Jan Beulich <jbeulich@suse.com>
> ---
> Now that this re-arranges hvm_load() anyway, wouldn't it be better to
> down the vCPU-s ahead of calling arch_hvm_load() (which is now easy to
> arrange for)?

Seems OK to me.

> Do we really need all the copying involved in use of _hvm_read_entry()
> (backing hvm_load_entry()? Zero-extending loads are likely easier to
> handle that way, but for strict loads all we gain is a reduced risk of
> unaligned accesses (compared to simply pointing into h->data[]).

I do feel it's safer to copy the data so the checks are done against
what's loaded.  Albeit hvm_load() is already using hvm_get_entry().

> ---
> v4: Fold hvm_check() into hvm_load().
> v2: New.
> 
> --- a/xen/arch/x86/domctl.c
> +++ b/xen/arch/x86/domctl.c
> @@ -379,8 +379,12 @@ long arch_do_domctl(
>          if ( copy_from_guest(c.data, domctl->u.hvmcontext.buffer, c.size) != 0 )
>              goto sethvmcontext_out;
>  
> +        ret = hvm_load(d, false, &c);
> +        if ( ret )
> +            goto sethvmcontext_out;
> +
>          domain_pause(d);
> -        ret = hvm_load(d, &c);
> +        ret = hvm_load(d, true, &c);

Now that the check has been done ahead, do we want to somehow assert
that this cannot fail?  AIUI that's the expectation.

>          domain_unpause(d);
>  
>      sethvmcontext_out:
> --- a/xen/arch/x86/hvm/hvm.c
> +++ b/xen/arch/x86/hvm/hvm.c
> @@ -5397,7 +5397,7 @@ int hvm_copy_context_and_params(struct d
>      }
>  
>      c.cur = 0;
> -    rc = hvm_load(dst, &c);
> +    rc = hvm_load(dst, true, &c);
>  
>   out:
>      vfree(c.data);
> --- a/xen/arch/x86/hvm/save.c
> +++ b/xen/arch/x86/hvm/save.c
> @@ -30,7 +30,8 @@ static void arch_hvm_save(struct domain
>      d->arch.hvm.sync_tsc = rdtsc();
>  }
>  
> -static int arch_hvm_load(struct domain *d, const struct hvm_save_header *hdr)
> +static int arch_hvm_check(const struct domain *d,
> +                          const struct hvm_save_header *hdr)
>  {
>      uint32_t eax, ebx, ecx, edx;
>  
> @@ -55,6 +56,11 @@ static int arch_hvm_load(struct domain *
>                 "(%#"PRIx32") and restored on another (%#"PRIx32").\n",
>                 d->domain_id, hdr->cpuid, eax);
>  
> +    return 0;
> +}
> +
> +static void arch_hvm_load(struct domain *d, const struct hvm_save_header *hdr)
> +{
>      /* Restore guest's preferred TSC frequency. */
>      if ( hdr->gtsc_khz )
>          d->arch.tsc_khz = hdr->gtsc_khz;
> @@ -66,13 +72,12 @@ static int arch_hvm_load(struct domain *
>  
>      /* VGA state is not saved/restored, so we nobble the cache. */
>      d->arch.hvm.stdvga.cache = STDVGA_CACHE_DISABLED;
> -
> -    return 0;
>  }
>  
>  /* List of handlers for various HVM save and restore types */
>  static struct {
>      hvm_save_handler save;
> +    hvm_check_handler check;
>      hvm_load_handler load;
>      const char *name;
>      size_t size;
> @@ -88,6 +93,7 @@ void __init hvm_register_savevm(uint16_t
>  {
>      ASSERT(typecode <= HVM_SAVE_CODE_MAX);
>      ASSERT(hvm_sr_handlers[typecode].save == NULL);
> +    ASSERT(hvm_sr_handlers[typecode].check == NULL);
>      ASSERT(hvm_sr_handlers[typecode].load == NULL);
>      hvm_sr_handlers[typecode].save = save_state;
>      hvm_sr_handlers[typecode].load = load_state;
> @@ -275,12 +281,10 @@ int hvm_save(struct domain *d, hvm_domai
>      return 0;
>  }
>  
> -int hvm_load(struct domain *d, hvm_domain_context_t *h)
> +int hvm_load(struct domain *d, bool real, hvm_domain_context_t *h)

Maybe the 'real' parameter should instead be an enum:

enum hvm_load_action {
    CHECK,
    LOAD,
};
int hvm_load(struct domain *d, enum hvm_load_action action,
             hvm_domain_context_t *h);

Otherwise a comment might be warranted about how 'real' affects the
logic in the function.

>  {
>      const struct hvm_save_header *hdr;
>      struct hvm_save_descriptor *desc;
> -    hvm_load_handler handler;
> -    struct vcpu *v;
>      int rc;
>  
>      if ( d->is_dying )
> @@ -291,50 +295,91 @@ int hvm_load(struct domain *d, hvm_domai
>      if ( !hdr )
>          return -ENODATA;
>  
> -    rc = arch_hvm_load(d, hdr);
> -    if ( rc )
> -        return rc;
> +    rc = arch_hvm_check(d, hdr);

Shouldn't this _check function only be called when real == false?

> +    if ( real )
> +    {
> +        struct vcpu *v;
> +
> +        ASSERT(!rc);
> +        arch_hvm_load(d, hdr);
>  
> -    /* Down all the vcpus: we only re-enable the ones that had state saved. */
> -    for_each_vcpu(d, v)
> -        if ( !test_and_set_bit(_VPF_down, &v->pause_flags) )
> -            vcpu_sleep_nosync(v);
> +        /*
> +         * Down all the vcpus: we only re-enable the ones that had state
> +         * saved.
> +         */
> +        for_each_vcpu(d, v)
> +            if ( !test_and_set_bit(_VPF_down, &v->pause_flags) )
> +                vcpu_sleep_nosync(v);
> +    }
> +    else if ( rc )
> +        return rc;
>  
>      for ( ; ; )
>      {
> +        const char *name;
> +        hvm_load_handler load;
> +
>          if ( h->size - h->cur < sizeof(struct hvm_save_descriptor) )
>          {
>              /* Run out of data */
>              printk(XENLOG_G_ERR
>                     "HVM%d restore: save did not end with a null entry\n",
>                     d->domain_id);
> +            ASSERT(!real);
>              return -ENODATA;
>          }
>  
>          /* Read the typecode of the next entry  and check for the end-marker */
>          desc = (struct hvm_save_descriptor *)(&h->data[h->cur]);
> -        if ( desc->typecode == 0 )
> +        if ( desc->typecode == HVM_SAVE_CODE(END) )
> +        {
> +            /* Reset cursor for hvm_load(, true, ). */
> +            if ( !real )
> +                h->cur = 0;
>              return 0;
> +        }
>  
>          /* Find the handler for this entry */
> -        if ( (desc->typecode > HVM_SAVE_CODE_MAX) ||
> -             ((handler = hvm_sr_handlers[desc->typecode].load) == NULL) )
> +        if ( desc->typecode >= ARRAY_SIZE(hvm_sr_handlers) ||
> +             !(name = hvm_sr_handlers[desc->typecode].name) ||
> +             !(load = hvm_sr_handlers[desc->typecode].load) )
>          {
>              printk(XENLOG_G_ERR "HVM%d restore: unknown entry typecode %u\n",
>                     d->domain_id, desc->typecode);

The message is not very accurate here, it does fail when the typecode
is unknown, but also fails when such typecode has no name or load
function setup.

Thanks, Roger.
Jan Beulich Dec. 19, 2023, 3:24 p.m. UTC | #2
On 19.12.2023 15:36, Roger Pau Monné wrote:
> On Mon, Dec 18, 2023 at 03:39:55PM +0100, Jan Beulich wrote:
>> ..., at least as reasonably feasible without making a check hook
>> mandatory (in particular strict vs relaxed/zero-extend length checking
>> can't be done early this way).
>>
>> Note that only one of the two uses of "real" hvm_load() is accompanied
>> with a "checking" one. The other directly consumes hvm_save() output,
>> which ought to be well-formed. This means that while input data related
>> checks don't need repeating in the "load" function when already done by
>> the "check" one (albeit assertions to this effect may be desirable),
>> domain state related checks (e.g. has_xyz(d)) will be required in both
>> places.
>>
>> Suggested-by: Roger Pau Monné <roger.pau@citrix.com>
>> Signed-off-by: Jan Beulich <jbeulich@suse.com>
>> ---
>> Now that this re-arranges hvm_load() anyway, wouldn't it be better to
>> down the vCPU-s ahead of calling arch_hvm_load() (which is now easy to
>> arrange for)?
> 
> Seems OK to me.

As is, or with the suggested adjustment, or either way?

>> Do we really need all the copying involved in use of _hvm_read_entry()
>> (backing hvm_load_entry()? Zero-extending loads are likely easier to
>> handle that way, but for strict loads all we gain is a reduced risk of
>> unaligned accesses (compared to simply pointing into h->data[]).
> 
> I do feel it's safer to copy the data so the checks are done against
> what's loaded.  Albeit hvm_load() is already using hvm_get_entry().

The comment is about individual handlers, not hvm_load() itself. In some
cases - when copying directly into guest state structures - the copying
makes sense. In other cases, where there is separate copying anyway,
things could be done with less duplication of data (see hpet_load(),
which was already converted; along those lines hvm_load() itself also
was already switched away from the original copying).

Checking in any event can't be done against what _is_ loaded (as during
checking we want to avoid altering guest state); it'll always be only
against what is going to be loaded. The difference would be whether we
check data in the incoming buffer or in a copy of that data in a local
variable on the stack. But that applies to checking done in the load
hooks only anyway; the cases with split off check handlers should never
need to do any copying.

>> --- a/xen/arch/x86/domctl.c
>> +++ b/xen/arch/x86/domctl.c
>> @@ -379,8 +379,12 @@ long arch_do_domctl(
>>          if ( copy_from_guest(c.data, domctl->u.hvmcontext.buffer, c.size) != 0 )
>>              goto sethvmcontext_out;
>>  
>> +        ret = hvm_load(d, false, &c);
>> +        if ( ret )
>> +            goto sethvmcontext_out;
>> +
>>          domain_pause(d);
>> -        ret = hvm_load(d, &c);
>> +        ret = hvm_load(d, true, &c);
> 
> Now that the check has been done ahead, do we want to somehow assert
> that this cannot fail?  AIUI that's the expectation.

We certainly can't until all checking was moved out of the load handlers.
And even then I think there are still cases where load might produce an
error. (In fact I would have refused a little more strongly to folding
the prior hvm_check() into hvm_load() if indeed a separate hvm_load()
could have ended up returning void in the long run.)

>> @@ -275,12 +281,10 @@ int hvm_save(struct domain *d, hvm_domai
>>      return 0;
>>  }
>>  
>> -int hvm_load(struct domain *d, hvm_domain_context_t *h)
>> +int hvm_load(struct domain *d, bool real, hvm_domain_context_t *h)
> 
> Maybe the 'real' parameter should instead be an enum:
> 
> enum hvm_load_action {
>     CHECK,
>     LOAD,
> };
> int hvm_load(struct domain *d, enum hvm_load_action action,
>              hvm_domain_context_t *h);

Hmm, yes, it could. I'm not a fan of enums for boolean-like things,
though.

> Otherwise a comment might be warranted about how 'real' affects the
> logic in the function.

I can certainly add a comment immediately ahead of the function:

/*
 * @real = false requests checking of the incoming state, while @real = true
 * requests actual loading, which will then assume that checking was already
 * done or is unnecessary.
 */

>> @@ -291,50 +295,91 @@ int hvm_load(struct domain *d, hvm_domai
>>      if ( !hdr )
>>          return -ENODATA;
>>  
>> -    rc = arch_hvm_load(d, hdr);
>> -    if ( rc )
>> -        return rc;
>> +    rc = arch_hvm_check(d, hdr);
> 
> Shouldn't this _check function only be called when real == false?

Possibly. In v4 I directly transformed what I had in v3:

    ASSERT(!arch_hvm_check(d, hdr));

I.e. it is now the call above plus ...

>> +    if ( real )
>> +    {
>> +        struct vcpu *v;
>> +
>> +        ASSERT(!rc);

... this assertion. Really the little brother of the call site assertion
you're asking for (see above).

>> +        arch_hvm_load(d, hdr);
>>  
>> -    /* Down all the vcpus: we only re-enable the ones that had state saved. */
>> -    for_each_vcpu(d, v)
>> -        if ( !test_and_set_bit(_VPF_down, &v->pause_flags) )
>> -            vcpu_sleep_nosync(v);
>> +        /*
>> +         * Down all the vcpus: we only re-enable the ones that had state
>> +         * saved.
>> +         */
>> +        for_each_vcpu(d, v)
>> +            if ( !test_and_set_bit(_VPF_down, &v->pause_flags) )
>> +                vcpu_sleep_nosync(v);
>> +    }
>> +    else if ( rc )
>> +        return rc;
>>  
>>      for ( ; ; )
>>      {
>> +        const char *name;
>> +        hvm_load_handler load;
>> +
>>          if ( h->size - h->cur < sizeof(struct hvm_save_descriptor) )
>>          {
>>              /* Run out of data */
>>              printk(XENLOG_G_ERR
>>                     "HVM%d restore: save did not end with a null entry\n",
>>                     d->domain_id);
>> +            ASSERT(!real);
>>              return -ENODATA;
>>          }
>>  
>>          /* Read the typecode of the next entry  and check for the end-marker */
>>          desc = (struct hvm_save_descriptor *)(&h->data[h->cur]);
>> -        if ( desc->typecode == 0 )
>> +        if ( desc->typecode == HVM_SAVE_CODE(END) )
>> +        {
>> +            /* Reset cursor for hvm_load(, true, ). */
>> +            if ( !real )
>> +                h->cur = 0;
>>              return 0;
>> +        }
>>  
>>          /* Find the handler for this entry */
>> -        if ( (desc->typecode > HVM_SAVE_CODE_MAX) ||
>> -             ((handler = hvm_sr_handlers[desc->typecode].load) == NULL) )
>> +        if ( desc->typecode >= ARRAY_SIZE(hvm_sr_handlers) ||
>> +             !(name = hvm_sr_handlers[desc->typecode].name) ||
>> +             !(load = hvm_sr_handlers[desc->typecode].load) )
>>          {
>>              printk(XENLOG_G_ERR "HVM%d restore: unknown entry typecode %u\n",
>>                     d->domain_id, desc->typecode);
> 
> The message is not very accurate here, it does fail when the typecode
> is unknown, but also fails when such typecode has no name or load
> function setup.

Yes and no, and it's not changing in this patch. Are you suggesting I should
change it despite being unrelated? If so, there not being a name (which is
the new check I'm adding) still suggests the code is unknown. There not being
a load handler really indicates a bug in Xen (yet no reason to e.g. BUG() in
that case, the failed loading will hopefully be noticeable enough).

Jan
Roger Pau Monné Jan. 9, 2024, 10:26 a.m. UTC | #3
On Tue, Dec 19, 2023 at 04:24:02PM +0100, Jan Beulich wrote:
> On 19.12.2023 15:36, Roger Pau Monné wrote:
> > On Mon, Dec 18, 2023 at 03:39:55PM +0100, Jan Beulich wrote:
> >> ..., at least as reasonably feasible without making a check hook
> >> mandatory (in particular strict vs relaxed/zero-extend length checking
> >> can't be done early this way).
> >>
> >> Note that only one of the two uses of "real" hvm_load() is accompanied
> >> with a "checking" one. The other directly consumes hvm_save() output,
> >> which ought to be well-formed. This means that while input data related
> >> checks don't need repeating in the "load" function when already done by
> >> the "check" one (albeit assertions to this effect may be desirable),
> >> domain state related checks (e.g. has_xyz(d)) will be required in both
> >> places.
> >>
> >> Suggested-by: Roger Pau Monné <roger.pau@citrix.com>
> >> Signed-off-by: Jan Beulich <jbeulich@suse.com>
> >> ---
> >> Now that this re-arranges hvm_load() anyway, wouldn't it be better to
> >> down the vCPU-s ahead of calling arch_hvm_load() (which is now easy to
> >> arrange for)?
> > 
> > Seems OK to me.
> 
> As is, or with the suggested adjustment, or either way?

I'm fine either way if you don't want to do it as part of this
patch.

> >> --- a/xen/arch/x86/domctl.c
> >> +++ b/xen/arch/x86/domctl.c
> >> @@ -379,8 +379,12 @@ long arch_do_domctl(
> >>          if ( copy_from_guest(c.data, domctl->u.hvmcontext.buffer, c.size) != 0 )
> >>              goto sethvmcontext_out;
> >>  
> >> +        ret = hvm_load(d, false, &c);
> >> +        if ( ret )
> >> +            goto sethvmcontext_out;
> >> +
> >>          domain_pause(d);
> >> -        ret = hvm_load(d, &c);
> >> +        ret = hvm_load(d, true, &c);
> > 
> > Now that the check has been done ahead, do we want to somehow assert
> > that this cannot fail?  AIUI that's the expectation.
> 
> We certainly can't until all checking was moved out of the load handlers.
> And even then I think there are still cases where load might produce an
> error. (In fact I would have refused a little more strongly to folding
> the prior hvm_check() into hvm_load() if indeed a separate hvm_load()
> could have ended up returning void in the long run.)

I see, _load could fail even if all the data provided was correct, for
example because the hypervisor is OoM?

> >> @@ -275,12 +281,10 @@ int hvm_save(struct domain *d, hvm_domai
> >>      return 0;
> >>  }
> >>  
> >> -int hvm_load(struct domain *d, hvm_domain_context_t *h)
> >> +int hvm_load(struct domain *d, bool real, hvm_domain_context_t *h)
> > 
> > Maybe the 'real' parameter should instead be an enum:
> > 
> > enum hvm_load_action {
> >     CHECK,
> >     LOAD,
> > };
> > int hvm_load(struct domain *d, enum hvm_load_action action,
> >              hvm_domain_context_t *h);
> 
> Hmm, yes, it could. I'm not a fan of enums for boolean-like things,
> though.
> 
> > Otherwise a comment might be warranted about how 'real' affects the
> > logic in the function.
> 
> I can certainly add a comment immediately ahead of the function:
> 
> /*
>  * @real = false requests checking of the incoming state, while @real = true
>  * requests actual loading, which will then assume that checking was already
>  * done or is unnecessary.
>  */

Seems good to me.  I do think the usage of an action enum is clearer,
but I'm fine with the comment and the usage of a boolean.

> >> @@ -291,50 +295,91 @@ int hvm_load(struct domain *d, hvm_domai
> >>      if ( !hdr )
> >>          return -ENODATA;
> >>  
> >> -    rc = arch_hvm_load(d, hdr);
> >> -    if ( rc )
> >> -        return rc;
> >> +    rc = arch_hvm_check(d, hdr);
> > 
> > Shouldn't this _check function only be called when real == false?
> 
> Possibly. In v4 I directly transformed what I had in v3:
> 
>     ASSERT(!arch_hvm_check(d, hdr));
> 
> I.e. it is now the call above plus ...
> 
> >> +    if ( real )
> >> +    {
> >> +        struct vcpu *v;
> >> +
> >> +        ASSERT(!rc);
> 
> ... this assertion. Really the little brother of the call site assertion
> you're asking for (see above).
> 
> >> +        arch_hvm_load(d, hdr);
> >>  
> >> -    /* Down all the vcpus: we only re-enable the ones that had state saved. */
> >> -    for_each_vcpu(d, v)
> >> -        if ( !test_and_set_bit(_VPF_down, &v->pause_flags) )
> >> -            vcpu_sleep_nosync(v);
> >> +        /*
> >> +         * Down all the vcpus: we only re-enable the ones that had state
> >> +         * saved.
> >> +         */
> >> +        for_each_vcpu(d, v)
> >> +            if ( !test_and_set_bit(_VPF_down, &v->pause_flags) )
> >> +                vcpu_sleep_nosync(v);
> >> +    }
> >> +    else if ( rc )
> >> +        return rc;

The issue I see with this is that when built with debug=n the call to
arch_hvm_check() with real == true is useless, as the result is never
evaluated - IOW: would be clearer to just avoid the call altogether.

> >>      for ( ; ; )
> >>      {
> >> +        const char *name;
> >> +        hvm_load_handler load;
> >> +
> >>          if ( h->size - h->cur < sizeof(struct hvm_save_descriptor) )
> >>          {
> >>              /* Run out of data */
> >>              printk(XENLOG_G_ERR
> >>                     "HVM%d restore: save did not end with a null entry\n",
> >>                     d->domain_id);
> >> +            ASSERT(!real);
> >>              return -ENODATA;
> >>          }
> >>  
> >>          /* Read the typecode of the next entry  and check for the end-marker */
> >>          desc = (struct hvm_save_descriptor *)(&h->data[h->cur]);
> >> -        if ( desc->typecode == 0 )
> >> +        if ( desc->typecode == HVM_SAVE_CODE(END) )
> >> +        {
> >> +            /* Reset cursor for hvm_load(, true, ). */
> >> +            if ( !real )
> >> +                h->cur = 0;
> >>              return 0;
> >> +        }
> >>  
> >>          /* Find the handler for this entry */
> >> -        if ( (desc->typecode > HVM_SAVE_CODE_MAX) ||
> >> -             ((handler = hvm_sr_handlers[desc->typecode].load) == NULL) )
> >> +        if ( desc->typecode >= ARRAY_SIZE(hvm_sr_handlers) ||
> >> +             !(name = hvm_sr_handlers[desc->typecode].name) ||
> >> +             !(load = hvm_sr_handlers[desc->typecode].load) )
> >>          {
> >>              printk(XENLOG_G_ERR "HVM%d restore: unknown entry typecode %u\n",
> >>                     d->domain_id, desc->typecode);
> > 
> > The message is not very accurate here, it does fail when the typecode
> > is unknown, but also fails when such typecode has no name or load
> > function setup.
> 
> Yes and no, and it's not changing in this patch. Are you suggesting I should
> change it despite being unrelated? If so, there not being a name (which is
> the new check I'm adding) still suggests the code is unknown. There not being
> a load handler really indicates a bug in Xen (yet no reason to e.g. BUG() in
> that case, the failed loading will hopefully be noticeable enough).

Right, so not a lot of room for improvement anyway.  Let's leave as-is
then.

Thanks, Roger.
Jan Beulich Jan. 9, 2024, 10:58 a.m. UTC | #4
On 09.01.2024 11:26, Roger Pau Monné wrote:
> On Tue, Dec 19, 2023 at 04:24:02PM +0100, Jan Beulich wrote:
>> On 19.12.2023 15:36, Roger Pau Monné wrote:
>>> On Mon, Dec 18, 2023 at 03:39:55PM +0100, Jan Beulich wrote:
>>>> --- a/xen/arch/x86/domctl.c
>>>> +++ b/xen/arch/x86/domctl.c
>>>> @@ -379,8 +379,12 @@ long arch_do_domctl(
>>>>          if ( copy_from_guest(c.data, domctl->u.hvmcontext.buffer, c.size) != 0 )
>>>>              goto sethvmcontext_out;
>>>>  
>>>> +        ret = hvm_load(d, false, &c);
>>>> +        if ( ret )
>>>> +            goto sethvmcontext_out;
>>>> +
>>>>          domain_pause(d);
>>>> -        ret = hvm_load(d, &c);
>>>> +        ret = hvm_load(d, true, &c);
>>>
>>> Now that the check has been done ahead, do we want to somehow assert
>>> that this cannot fail?  AIUI that's the expectation.
>>
>> We certainly can't until all checking was moved out of the load handlers.
>> And even then I think there are still cases where load might produce an
>> error. (In fact I would have refused a little more strongly to folding
>> the prior hvm_check() into hvm_load() if indeed a separate hvm_load()
>> could have ended up returning void in the long run.)
> 
> I see, _load could fail even if all the data provided was correct, for
> example because the hypervisor is OoM?

That's the primary hypothetical cause for such a failure, yes.

>>>> @@ -291,50 +295,91 @@ int hvm_load(struct domain *d, hvm_domai
>>>>      if ( !hdr )
>>>>          return -ENODATA;
>>>>  
>>>> -    rc = arch_hvm_load(d, hdr);
>>>> -    if ( rc )
>>>> -        return rc;
>>>> +    rc = arch_hvm_check(d, hdr);
>>>
>>> Shouldn't this _check function only be called when real == false?
>>
>> Possibly. In v4 I directly transformed what I had in v3:
>>
>>     ASSERT(!arch_hvm_check(d, hdr));
>>
>> I.e. it is now the call above plus ...
>>
>>>> +    if ( real )
>>>> +    {
>>>> +        struct vcpu *v;
>>>> +
>>>> +        ASSERT(!rc);
>>
>> ... this assertion. Really the little brother of the call site assertion
>> you're asking for (see above).
>>
>>>> +        arch_hvm_load(d, hdr);
>>>>  
>>>> -    /* Down all the vcpus: we only re-enable the ones that had state saved. */
>>>> -    for_each_vcpu(d, v)
>>>> -        if ( !test_and_set_bit(_VPF_down, &v->pause_flags) )
>>>> -            vcpu_sleep_nosync(v);
>>>> +        /*
>>>> +         * Down all the vcpus: we only re-enable the ones that had state
>>>> +         * saved.
>>>> +         */
>>>> +        for_each_vcpu(d, v)
>>>> +            if ( !test_and_set_bit(_VPF_down, &v->pause_flags) )
>>>> +                vcpu_sleep_nosync(v);
>>>> +    }
>>>> +    else if ( rc )
>>>> +        return rc;
> 
> The issue I see with this is that when built with debug=n the call to
> arch_hvm_check() with real == true is useless, as the result is never
> evaluated - IOW: would be clearer to just avoid the call altogether.

Which, besides being imo slightly worse for then having two call sites,
puts me in a difficult position: It may not have been here, but on
another patch (but I think it was an earlier version of this one)
where Andrew commented on

    ASSERT(func());

as generally being a disliked pattern, for having a "side effect" in
the expression of an assertion. Plus the call isn't pointless even in
release builds, because of the log messages issued: Them appearing
twice in close succession might be a good hint of something fishy
going on.

Jan
Roger Pau Monné Jan. 9, 2024, 12:33 p.m. UTC | #5
On Tue, Jan 09, 2024 at 11:58:48AM +0100, Jan Beulich wrote:
> On 09.01.2024 11:26, Roger Pau Monné wrote:
> > On Tue, Dec 19, 2023 at 04:24:02PM +0100, Jan Beulich wrote:
> >> On 19.12.2023 15:36, Roger Pau Monné wrote:
> >>> On Mon, Dec 18, 2023 at 03:39:55PM +0100, Jan Beulich wrote:
> >>>> @@ -291,50 +295,91 @@ int hvm_load(struct domain *d, hvm_domai
> >>>>      if ( !hdr )
> >>>>          return -ENODATA;
> >>>>  
> >>>> -    rc = arch_hvm_load(d, hdr);
> >>>> -    if ( rc )
> >>>> -        return rc;
> >>>> +    rc = arch_hvm_check(d, hdr);
> >>>
> >>> Shouldn't this _check function only be called when real == false?
> >>
> >> Possibly. In v4 I directly transformed what I had in v3:
> >>
> >>     ASSERT(!arch_hvm_check(d, hdr));
> >>
> >> I.e. it is now the call above plus ...
> >>
> >>>> +    if ( real )
> >>>> +    {
> >>>> +        struct vcpu *v;
> >>>> +
> >>>> +        ASSERT(!rc);
> >>
> >> ... this assertion. Really the little brother of the call site assertion
> >> you're asking for (see above).
> >>
> >>>> +        arch_hvm_load(d, hdr);
> >>>>  
> >>>> -    /* Down all the vcpus: we only re-enable the ones that had state saved. */
> >>>> -    for_each_vcpu(d, v)
> >>>> -        if ( !test_and_set_bit(_VPF_down, &v->pause_flags) )
> >>>> -            vcpu_sleep_nosync(v);
> >>>> +        /*
> >>>> +         * Down all the vcpus: we only re-enable the ones that had state
> >>>> +         * saved.
> >>>> +         */
> >>>> +        for_each_vcpu(d, v)
> >>>> +            if ( !test_and_set_bit(_VPF_down, &v->pause_flags) )
> >>>> +                vcpu_sleep_nosync(v);
> >>>> +    }
> >>>> +    else if ( rc )
> >>>> +        return rc;
> > 
> > The issue I see with this is that when built with debug=n the call to
> > arch_hvm_check() with real == true is useless, as the result is never
> > evaluated - IOW: would be clearer to just avoid the call altogether.
> 
> Which, besides being imo slightly worse for then having two call sites,
> puts me in a difficult position: It may not have been here, but on
> another patch (but I think it was an earlier version of this one)
> where Andrew commented on
> 
>     ASSERT(func());
> 
> as generally being a disliked pattern, for having a "side effect" in
> the expression of an assertion.

I was going to suggest to add the pure attribute to the function, but
it does have side effects as it prints to the console.

> Plus the call isn't pointless even in
> release builds, because of the log messages issued: Them appearing
> twice in close succession might be a good hint of something fishy
> going on.

Why do you mention the messages appearing twice?  Won't the first
check call return error and thus should prevent the caller from
attempting to load the state?

Thanks, Roger.
Jan Beulich Jan. 9, 2024, 12:38 p.m. UTC | #6
On 09.01.2024 13:33, Roger Pau Monné wrote:
> On Tue, Jan 09, 2024 at 11:58:48AM +0100, Jan Beulich wrote:
>> Plus the call isn't pointless even in
>> release builds, because of the log messages issued: Them appearing
>> twice in close succession might be a good hint of something fishy
>> going on.
> 
> Why do you mention the messages appearing twice?  Won't the first
> check call return error and thus should prevent the caller from
> attempting to load the state?

Well, what exactly is going to be the reason for a failure on the
"real" invocation when the "dry-run" one supposedly failed is
unknown. But you're right, messages occurring twice indeed may not
be very likely. It's more like these messages appearing and then
loading still continuing which might make obvious that something
went wrong.

Jan
diff mbox series

Patch

--- a/xen/arch/x86/domctl.c
+++ b/xen/arch/x86/domctl.c
@@ -379,8 +379,12 @@  long arch_do_domctl(
         if ( copy_from_guest(c.data, domctl->u.hvmcontext.buffer, c.size) != 0 )
             goto sethvmcontext_out;
 
+        ret = hvm_load(d, false, &c);
+        if ( ret )
+            goto sethvmcontext_out;
+
         domain_pause(d);
-        ret = hvm_load(d, &c);
+        ret = hvm_load(d, true, &c);
         domain_unpause(d);
 
     sethvmcontext_out:
--- a/xen/arch/x86/hvm/hvm.c
+++ b/xen/arch/x86/hvm/hvm.c
@@ -5397,7 +5397,7 @@  int hvm_copy_context_and_params(struct d
     }
 
     c.cur = 0;
-    rc = hvm_load(dst, &c);
+    rc = hvm_load(dst, true, &c);
 
  out:
     vfree(c.data);
--- a/xen/arch/x86/hvm/save.c
+++ b/xen/arch/x86/hvm/save.c
@@ -30,7 +30,8 @@  static void arch_hvm_save(struct domain
     d->arch.hvm.sync_tsc = rdtsc();
 }
 
-static int arch_hvm_load(struct domain *d, const struct hvm_save_header *hdr)
+static int arch_hvm_check(const struct domain *d,
+                          const struct hvm_save_header *hdr)
 {
     uint32_t eax, ebx, ecx, edx;
 
@@ -55,6 +56,11 @@  static int arch_hvm_load(struct domain *
                "(%#"PRIx32") and restored on another (%#"PRIx32").\n",
                d->domain_id, hdr->cpuid, eax);
 
+    return 0;
+}
+
+static void arch_hvm_load(struct domain *d, const struct hvm_save_header *hdr)
+{
     /* Restore guest's preferred TSC frequency. */
     if ( hdr->gtsc_khz )
         d->arch.tsc_khz = hdr->gtsc_khz;
@@ -66,13 +72,12 @@  static int arch_hvm_load(struct domain *
 
     /* VGA state is not saved/restored, so we nobble the cache. */
     d->arch.hvm.stdvga.cache = STDVGA_CACHE_DISABLED;
-
-    return 0;
 }
 
 /* List of handlers for various HVM save and restore types */
 static struct {
     hvm_save_handler save;
+    hvm_check_handler check;
     hvm_load_handler load;
     const char *name;
     size_t size;
@@ -88,6 +93,7 @@  void __init hvm_register_savevm(uint16_t
 {
     ASSERT(typecode <= HVM_SAVE_CODE_MAX);
     ASSERT(hvm_sr_handlers[typecode].save == NULL);
+    ASSERT(hvm_sr_handlers[typecode].check == NULL);
     ASSERT(hvm_sr_handlers[typecode].load == NULL);
     hvm_sr_handlers[typecode].save = save_state;
     hvm_sr_handlers[typecode].load = load_state;
@@ -275,12 +281,10 @@  int hvm_save(struct domain *d, hvm_domai
     return 0;
 }
 
-int hvm_load(struct domain *d, hvm_domain_context_t *h)
+int hvm_load(struct domain *d, bool real, hvm_domain_context_t *h)
 {
     const struct hvm_save_header *hdr;
     struct hvm_save_descriptor *desc;
-    hvm_load_handler handler;
-    struct vcpu *v;
     int rc;
 
     if ( d->is_dying )
@@ -291,50 +295,91 @@  int hvm_load(struct domain *d, hvm_domai
     if ( !hdr )
         return -ENODATA;
 
-    rc = arch_hvm_load(d, hdr);
-    if ( rc )
-        return rc;
+    rc = arch_hvm_check(d, hdr);
+    if ( real )
+    {
+        struct vcpu *v;
+
+        ASSERT(!rc);
+        arch_hvm_load(d, hdr);
 
-    /* Down all the vcpus: we only re-enable the ones that had state saved. */
-    for_each_vcpu(d, v)
-        if ( !test_and_set_bit(_VPF_down, &v->pause_flags) )
-            vcpu_sleep_nosync(v);
+        /*
+         * Down all the vcpus: we only re-enable the ones that had state
+         * saved.
+         */
+        for_each_vcpu(d, v)
+            if ( !test_and_set_bit(_VPF_down, &v->pause_flags) )
+                vcpu_sleep_nosync(v);
+    }
+    else if ( rc )
+        return rc;
 
     for ( ; ; )
     {
+        const char *name;
+        hvm_load_handler load;
+
         if ( h->size - h->cur < sizeof(struct hvm_save_descriptor) )
         {
             /* Run out of data */
             printk(XENLOG_G_ERR
                    "HVM%d restore: save did not end with a null entry\n",
                    d->domain_id);
+            ASSERT(!real);
             return -ENODATA;
         }
 
         /* Read the typecode of the next entry  and check for the end-marker */
         desc = (struct hvm_save_descriptor *)(&h->data[h->cur]);
-        if ( desc->typecode == 0 )
+        if ( desc->typecode == HVM_SAVE_CODE(END) )
+        {
+            /* Reset cursor for hvm_load(, true, ). */
+            if ( !real )
+                h->cur = 0;
             return 0;
+        }
 
         /* Find the handler for this entry */
-        if ( (desc->typecode > HVM_SAVE_CODE_MAX) ||
-             ((handler = hvm_sr_handlers[desc->typecode].load) == NULL) )
+        if ( desc->typecode >= ARRAY_SIZE(hvm_sr_handlers) ||
+             !(name = hvm_sr_handlers[desc->typecode].name) ||
+             !(load = hvm_sr_handlers[desc->typecode].load) )
         {
             printk(XENLOG_G_ERR "HVM%d restore: unknown entry typecode %u\n",
                    d->domain_id, desc->typecode);
+            ASSERT(!real);
             return -EINVAL;
         }
 
-        /* Load the entry */
-        printk(XENLOG_G_INFO "HVM%d restore: %s %"PRIu16"\n", d->domain_id,
-               hvm_sr_handlers[desc->typecode].name, desc->instance);
-        rc = handler(d, h);
+        if ( real )
+        {
+            /* Load the entry */
+            printk(XENLOG_G_INFO "HVM restore %pd: %s %"PRIu16"\n", d,
+                   name, desc->instance);
+            rc = load(d, h);
+        }
+        else
+        {
+            /* Check the entry. */
+            hvm_check_handler check = hvm_sr_handlers[desc->typecode].check;
+
+            if ( !check )
+            {
+                if ( desc->length > h->size - h->cur - sizeof(*desc) )
+                    return -ENODATA;
+                h->cur += sizeof(*desc) + desc->length;
+                rc = 0;
+            }
+            else
+                rc = check(d, h);
+        }
+
         if ( rc )
         {
-            printk(XENLOG_G_ERR "HVM%d restore: failed to load entry %u/%u rc %d\n",
-                   d->domain_id, desc->typecode, desc->instance, rc);
+            printk(XENLOG_G_ERR "HVM restore %pd: failed to %s %s:%u rc %d\n",
+                   d, real ? "load" : "check", name, desc->instance, rc);
             return rc;
         }
+
         process_pending_softirqs();
     }
 
--- a/xen/arch/x86/include/asm/hvm/save.h
+++ b/xen/arch/x86/include/asm/hvm/save.h
@@ -103,6 +103,8 @@  static inline unsigned int hvm_load_inst
  * restoring.  Both return non-zero on error. */
 typedef int (*hvm_save_handler) (struct vcpu *v,
                                  hvm_domain_context_t *h);
+typedef int (*hvm_check_handler)(const struct domain *d,
+                                 hvm_domain_context_t *h);
 typedef int (*hvm_load_handler) (struct domain *d,
                                  hvm_domain_context_t *h);
 
@@ -140,6 +142,6 @@  size_t hvm_save_size(struct domain *d);
 int hvm_save(struct domain *d, hvm_domain_context_t *h);
 int hvm_save_one(struct domain *d, unsigned int typecode, unsigned int instance,
                  XEN_GUEST_HANDLE_64(uint8) handle, uint64_t *bufsz);
-int hvm_load(struct domain *d, hvm_domain_context_t *h);
+int hvm_load(struct domain *d, bool real, hvm_domain_context_t *h);
 
 #endif /* __XEN_HVM_SAVE_H__ */