diff mbox series

[V2,1/2] x86/altp2m: Add hypercall to set a range of sve bits

Message ID 20191106153442.12776-1-aisaila@bitdefender.com (mailing list archive)
State Superseded
Headers show
Series [V2,1/2] x86/altp2m: Add hypercall to set a range of sve bits | expand

Commit Message

Alexandru Stefan ISAILA Nov. 6, 2019, 3:35 p.m. UTC
By default the sve bits are not set.
This patch adds a new hypercall, xc_altp2m_set_supress_ve_multi(),
to set a range of sve bits.
The core function, p2m_set_suppress_ve_multi(), does not brake in case
of a error and it is doing a best effort for setting the bits in the
given range. A check for continuation is made in order to have
preemption on big ranges.

Signed-off-by: Alexandru Isaila <aisaila@bitdefender.com>

---
Changes since V1:
	- Remove "continue"
	- Add a new field in xen_hvm_altp2m_suppress_ve to store the
continuation value
	- Have p2m_set_suppress_ve_multi() take
xen_hvm_altp2m_suppress_ve as a param.
---
 tools/libxc/include/xenctrl.h   |  3 ++
 tools/libxc/xc_altp2m.c         | 25 ++++++++++++++
 xen/arch/x86/hvm/hvm.c          | 20 ++++++++++--
 xen/arch/x86/mm/p2m.c           | 58 +++++++++++++++++++++++++++++++++
 xen/include/public/hvm/hvm_op.h |  5 ++-
 xen/include/xen/mem_access.h    |  3 ++
 6 files changed, 111 insertions(+), 3 deletions(-)

Comments

Tamas K Lengyel Nov. 6, 2019, 9:06 p.m. UTC | #1
On Wed, Nov 6, 2019 at 7:35 AM Alexandru Stefan ISAILA
<aisaila@bitdefender.com> wrote:
>
> By default the sve bits are not set.
> This patch adds a new hypercall, xc_altp2m_set_supress_ve_multi(),
> to set a range of sve bits.
> The core function, p2m_set_suppress_ve_multi(), does not brake in case
> of a error and it is doing a best effort for setting the bits in the
> given range. A check for continuation is made in order to have
> preemption on big ranges.
>
> Signed-off-by: Alexandru Isaila <aisaila@bitdefender.com>
>
> ---
> Changes since V1:
>         - Remove "continue"
>         - Add a new field in xen_hvm_altp2m_suppress_ve to store the
> continuation value
>         - Have p2m_set_suppress_ve_multi() take
> xen_hvm_altp2m_suppress_ve as a param.
> ---
>  tools/libxc/include/xenctrl.h   |  3 ++
>  tools/libxc/xc_altp2m.c         | 25 ++++++++++++++
>  xen/arch/x86/hvm/hvm.c          | 20 ++++++++++--
>  xen/arch/x86/mm/p2m.c           | 58 +++++++++++++++++++++++++++++++++
>  xen/include/public/hvm/hvm_op.h |  5 ++-
>  xen/include/xen/mem_access.h    |  3 ++
>  6 files changed, 111 insertions(+), 3 deletions(-)
>
> diff --git a/tools/libxc/include/xenctrl.h b/tools/libxc/include/xenctrl.h
> index f4431687b3..21b644f459 100644
> --- a/tools/libxc/include/xenctrl.h
> +++ b/tools/libxc/include/xenctrl.h
> @@ -1923,6 +1923,9 @@ int xc_altp2m_switch_to_view(xc_interface *handle, uint32_t domid,
>                               uint16_t view_id);
>  int xc_altp2m_set_suppress_ve(xc_interface *handle, uint32_t domid,
>                                uint16_t view_id, xen_pfn_t gfn, bool sve);
> +int xc_altp2m_set_supress_ve_multi(xc_interface *handle, uint32_t domid,
> +                                   uint16_t view_id, xen_pfn_t start_gfn,
> +                                   uint32_t nr, bool sve);
>  int xc_altp2m_get_suppress_ve(xc_interface *handle, uint32_t domid,
>                                uint16_t view_id, xen_pfn_t gfn, bool *sve);
>  int xc_altp2m_set_mem_access(xc_interface *handle, uint32_t domid,
> diff --git a/tools/libxc/xc_altp2m.c b/tools/libxc/xc_altp2m.c
> index 09dad0355e..6605d9abbe 100644
> --- a/tools/libxc/xc_altp2m.c
> +++ b/tools/libxc/xc_altp2m.c
> @@ -234,6 +234,31 @@ int xc_altp2m_set_suppress_ve(xc_interface *handle, uint32_t domid,
>      return rc;
>  }
>
> +int xc_altp2m_set_supress_ve_multi(xc_interface *handle, uint32_t domid,
> +                                   uint16_t view_id, xen_pfn_t start_gfn,
> +                                   uint32_t nr, bool sve)
> +{
> +    int rc;
> +    DECLARE_HYPERCALL_BUFFER(xen_hvm_altp2m_op_t, arg);
> +
> +    arg = xc_hypercall_buffer_alloc(handle, arg, sizeof(*arg));

Does xc_hypercall_buffer_alloc null-initialize the structure?
Alexandru Stefan ISAILA Nov. 7, 2019, 7:46 a.m. UTC | #2
On 06.11.2019 23:06, Tamas K Lengyel wrote:
> On Wed, Nov 6, 2019 at 7:35 AM Alexandru Stefan ISAILA
> <aisaila@bitdefender.com> wrote:
>>
>> By default the sve bits are not set.
>> This patch adds a new hypercall, xc_altp2m_set_supress_ve_multi(),
>> to set a range of sve bits.
>> The core function, p2m_set_suppress_ve_multi(), does not brake in case
>> of a error and it is doing a best effort for setting the bits in the
>> given range. A check for continuation is made in order to have
>> preemption on big ranges.
>>
>> Signed-off-by: Alexandru Isaila <aisaila@bitdefender.com>
>>
>> ---
>> Changes since V1:
>>          - Remove "continue"
>>          - Add a new field in xen_hvm_altp2m_suppress_ve to store the
>> continuation value
>>          - Have p2m_set_suppress_ve_multi() take
>> xen_hvm_altp2m_suppress_ve as a param.
>> ---
>>   tools/libxc/include/xenctrl.h   |  3 ++
>>   tools/libxc/xc_altp2m.c         | 25 ++++++++++++++
>>   xen/arch/x86/hvm/hvm.c          | 20 ++++++++++--
>>   xen/arch/x86/mm/p2m.c           | 58 +++++++++++++++++++++++++++++++++
>>   xen/include/public/hvm/hvm_op.h |  5 ++-
>>   xen/include/xen/mem_access.h    |  3 ++
>>   6 files changed, 111 insertions(+), 3 deletions(-)
>>
>> diff --git a/tools/libxc/include/xenctrl.h b/tools/libxc/include/xenctrl.h
>> index f4431687b3..21b644f459 100644
>> --- a/tools/libxc/include/xenctrl.h
>> +++ b/tools/libxc/include/xenctrl.h
>> @@ -1923,6 +1923,9 @@ int xc_altp2m_switch_to_view(xc_interface *handle, uint32_t domid,
>>                                uint16_t view_id);
>>   int xc_altp2m_set_suppress_ve(xc_interface *handle, uint32_t domid,
>>                                 uint16_t view_id, xen_pfn_t gfn, bool sve);
>> +int xc_altp2m_set_supress_ve_multi(xc_interface *handle, uint32_t domid,
>> +                                   uint16_t view_id, xen_pfn_t start_gfn,
>> +                                   uint32_t nr, bool sve);
>>   int xc_altp2m_get_suppress_ve(xc_interface *handle, uint32_t domid,
>>                                 uint16_t view_id, xen_pfn_t gfn, bool *sve);
>>   int xc_altp2m_set_mem_access(xc_interface *handle, uint32_t domid,
>> diff --git a/tools/libxc/xc_altp2m.c b/tools/libxc/xc_altp2m.c
>> index 09dad0355e..6605d9abbe 100644
>> --- a/tools/libxc/xc_altp2m.c
>> +++ b/tools/libxc/xc_altp2m.c
>> @@ -234,6 +234,31 @@ int xc_altp2m_set_suppress_ve(xc_interface *handle, uint32_t domid,
>>       return rc;
>>   }
>>
>> +int xc_altp2m_set_supress_ve_multi(xc_interface *handle, uint32_t domid,
>> +                                   uint16_t view_id, xen_pfn_t start_gfn,
>> +                                   uint32_t nr, bool sve)
>> +{
>> +    int rc;
>> +    DECLARE_HYPERCALL_BUFFER(xen_hvm_altp2m_op_t, arg);
>> +
>> +    arg = xc_hypercall_buffer_alloc(handle, arg, sizeof(*arg));
> 
> Does xc_hypercall_buffer_alloc null-initialize the structure?
> 

It calls xencall_alloc_buffer_pages() which calls memset(p, 0, nr_pages 
* PAGE_SIZE) before returning.

Alex
Tamas K Lengyel Nov. 7, 2019, 3 p.m. UTC | #3
On Wed, Nov 6, 2019 at 11:46 PM Alexandru Stefan ISAILA
<aisaila@bitdefender.com> wrote:
>
>
>
> On 06.11.2019 23:06, Tamas K Lengyel wrote:
> > On Wed, Nov 6, 2019 at 7:35 AM Alexandru Stefan ISAILA
> > <aisaila@bitdefender.com> wrote:
> >>
> >> By default the sve bits are not set.
> >> This patch adds a new hypercall, xc_altp2m_set_supress_ve_multi(),
> >> to set a range of sve bits.
> >> The core function, p2m_set_suppress_ve_multi(), does not brake in case
> >> of a error and it is doing a best effort for setting the bits in the
> >> given range. A check for continuation is made in order to have
> >> preemption on big ranges.
> >>
> >> Signed-off-by: Alexandru Isaila <aisaila@bitdefender.com>
> >>
> >> ---
> >> Changes since V1:
> >>          - Remove "continue"
> >>          - Add a new field in xen_hvm_altp2m_suppress_ve to store the
> >> continuation value
> >>          - Have p2m_set_suppress_ve_multi() take
> >> xen_hvm_altp2m_suppress_ve as a param.
> >> ---
> >>   tools/libxc/include/xenctrl.h   |  3 ++
> >>   tools/libxc/xc_altp2m.c         | 25 ++++++++++++++
> >>   xen/arch/x86/hvm/hvm.c          | 20 ++++++++++--
> >>   xen/arch/x86/mm/p2m.c           | 58 +++++++++++++++++++++++++++++++++
> >>   xen/include/public/hvm/hvm_op.h |  5 ++-
> >>   xen/include/xen/mem_access.h    |  3 ++
> >>   6 files changed, 111 insertions(+), 3 deletions(-)
> >>
> >> diff --git a/tools/libxc/include/xenctrl.h b/tools/libxc/include/xenctrl.h
> >> index f4431687b3..21b644f459 100644
> >> --- a/tools/libxc/include/xenctrl.h
> >> +++ b/tools/libxc/include/xenctrl.h
> >> @@ -1923,6 +1923,9 @@ int xc_altp2m_switch_to_view(xc_interface *handle, uint32_t domid,
> >>                                uint16_t view_id);
> >>   int xc_altp2m_set_suppress_ve(xc_interface *handle, uint32_t domid,
> >>                                 uint16_t view_id, xen_pfn_t gfn, bool sve);
> >> +int xc_altp2m_set_supress_ve_multi(xc_interface *handle, uint32_t domid,
> >> +                                   uint16_t view_id, xen_pfn_t start_gfn,
> >> +                                   uint32_t nr, bool sve);
> >>   int xc_altp2m_get_suppress_ve(xc_interface *handle, uint32_t domid,
> >>                                 uint16_t view_id, xen_pfn_t gfn, bool *sve);
> >>   int xc_altp2m_set_mem_access(xc_interface *handle, uint32_t domid,
> >> diff --git a/tools/libxc/xc_altp2m.c b/tools/libxc/xc_altp2m.c
> >> index 09dad0355e..6605d9abbe 100644
> >> --- a/tools/libxc/xc_altp2m.c
> >> +++ b/tools/libxc/xc_altp2m.c
> >> @@ -234,6 +234,31 @@ int xc_altp2m_set_suppress_ve(xc_interface *handle, uint32_t domid,
> >>       return rc;
> >>   }
> >>
> >> +int xc_altp2m_set_supress_ve_multi(xc_interface *handle, uint32_t domid,
> >> +                                   uint16_t view_id, xen_pfn_t start_gfn,
> >> +                                   uint32_t nr, bool sve)
> >> +{
> >> +    int rc;
> >> +    DECLARE_HYPERCALL_BUFFER(xen_hvm_altp2m_op_t, arg);
> >> +
> >> +    arg = xc_hypercall_buffer_alloc(handle, arg, sizeof(*arg));
> >
> > Does xc_hypercall_buffer_alloc null-initialize the structure?
> >
>
> It calls xencall_alloc_buffer_pages() which calls memset(p, 0, nr_pages
> * PAGE_SIZE) before returning.

Thanks!

Reviewed-by: Tamas K Lengyel <tamas@tklengyel.com>
Alexandru Stefan ISAILA Nov. 8, 2019, 8:31 a.m. UTC | #4
Hi George,

Sorry for the early reminder but v1 you said "Everything else looks OK 
to me." and you did not give a specific ACK. Can you take a look at the 
changes when you have the time?

Thanks,
Alex

On 06.11.2019 17:35, Alexandru Stefan ISAILA wrote:
> By default the sve bits are not set.
> This patch adds a new hypercall, xc_altp2m_set_supress_ve_multi(),
> to set a range of sve bits.
> The core function, p2m_set_suppress_ve_multi(), does not brake in case
> of a error and it is doing a best effort for setting the bits in the
> given range. A check for continuation is made in order to have
> preemption on big ranges.
> 
> Signed-off-by: Alexandru Isaila <aisaila@bitdefender.com>
> 
> ---
> Changes since V1:
> 	- Remove "continue"
> 	- Add a new field in xen_hvm_altp2m_suppress_ve to store the
> continuation value
> 	- Have p2m_set_suppress_ve_multi() take
> xen_hvm_altp2m_suppress_ve as a param.
> ---
>   tools/libxc/include/xenctrl.h   |  3 ++
>   tools/libxc/xc_altp2m.c         | 25 ++++++++++++++
>   xen/arch/x86/hvm/hvm.c          | 20 ++++++++++--
>   xen/arch/x86/mm/p2m.c           | 58 +++++++++++++++++++++++++++++++++
>   xen/include/public/hvm/hvm_op.h |  5 ++-
>   xen/include/xen/mem_access.h    |  3 ++
>   6 files changed, 111 insertions(+), 3 deletions(-)
> 
> diff --git a/tools/libxc/include/xenctrl.h b/tools/libxc/include/xenctrl.h
> index f4431687b3..21b644f459 100644
> --- a/tools/libxc/include/xenctrl.h
> +++ b/tools/libxc/include/xenctrl.h
> @@ -1923,6 +1923,9 @@ int xc_altp2m_switch_to_view(xc_interface *handle, uint32_t domid,
>                                uint16_t view_id);
>   int xc_altp2m_set_suppress_ve(xc_interface *handle, uint32_t domid,
>                                 uint16_t view_id, xen_pfn_t gfn, bool sve);
> +int xc_altp2m_set_supress_ve_multi(xc_interface *handle, uint32_t domid,
> +                                   uint16_t view_id, xen_pfn_t start_gfn,
> +                                   uint32_t nr, bool sve);
>   int xc_altp2m_get_suppress_ve(xc_interface *handle, uint32_t domid,
>                                 uint16_t view_id, xen_pfn_t gfn, bool *sve);
>   int xc_altp2m_set_mem_access(xc_interface *handle, uint32_t domid,
> diff --git a/tools/libxc/xc_altp2m.c b/tools/libxc/xc_altp2m.c
> index 09dad0355e..6605d9abbe 100644
> --- a/tools/libxc/xc_altp2m.c
> +++ b/tools/libxc/xc_altp2m.c
> @@ -234,6 +234,31 @@ int xc_altp2m_set_suppress_ve(xc_interface *handle, uint32_t domid,
>       return rc;
>   }
>   
> +int xc_altp2m_set_supress_ve_multi(xc_interface *handle, uint32_t domid,
> +                                   uint16_t view_id, xen_pfn_t start_gfn,
> +                                   uint32_t nr, bool sve)
> +{
> +    int rc;
> +    DECLARE_HYPERCALL_BUFFER(xen_hvm_altp2m_op_t, arg);
> +
> +    arg = xc_hypercall_buffer_alloc(handle, arg, sizeof(*arg));
> +    if ( arg == NULL )
> +        return -1;
> +
> +    arg->version = HVMOP_ALTP2M_INTERFACE_VERSION;
> +    arg->cmd = HVMOP_altp2m_set_suppress_ve_multi;
> +    arg->domain = domid;
> +    arg->u.suppress_ve.view = view_id;
> +    arg->u.suppress_ve.gfn = start_gfn;
> +    arg->u.suppress_ve.suppress_ve = sve;
> +    arg->u.suppress_ve.nr = nr;
> +
> +    rc = xencall2(handle->xcall, __HYPERVISOR_hvm_op, HVMOP_altp2m,
> +                  HYPERCALL_BUFFER_AS_ARG(arg));
> +    xc_hypercall_buffer_free(handle, arg);
> +    return rc;
> +}
> +
>   int xc_altp2m_set_mem_access(xc_interface *handle, uint32_t domid,
>                                uint16_t view_id, xen_pfn_t gfn,
>                                xenmem_access_t access)
> diff --git a/xen/arch/x86/hvm/hvm.c b/xen/arch/x86/hvm/hvm.c
> index 06a7b40107..66ed8b8e3e 100644
> --- a/xen/arch/x86/hvm/hvm.c
> +++ b/xen/arch/x86/hvm/hvm.c
> @@ -4535,6 +4535,7 @@ static int do_altp2m_op(
>       case HVMOP_altp2m_destroy_p2m:
>       case HVMOP_altp2m_switch_p2m:
>       case HVMOP_altp2m_set_suppress_ve:
> +    case HVMOP_altp2m_set_suppress_ve_multi:
>       case HVMOP_altp2m_get_suppress_ve:
>       case HVMOP_altp2m_set_mem_access:
>       case HVMOP_altp2m_set_mem_access_multi:
> @@ -4681,7 +4682,7 @@ static int do_altp2m_op(
>           break;
>   
>       case HVMOP_altp2m_set_suppress_ve:
> -        if ( a.u.suppress_ve.pad1 || a.u.suppress_ve.pad2 )
> +        if ( a.u.suppress_ve.pad1 )
>               rc = -EINVAL;
>           else
>           {
> @@ -4693,8 +4694,23 @@ static int do_altp2m_op(
>           }
>           break;
>   
> +    case HVMOP_altp2m_set_suppress_ve_multi:
> +        if ( a.u.suppress_ve.pad1 || !a.u.suppress_ve.nr )
> +            rc = -EINVAL;
> +        else
> +        {
> +            rc = p2m_set_suppress_ve_multi(d, &a.u.suppress_ve);
> +
> +            if ( rc == -ERESTART )
> +                if ( __copy_field_to_guest(guest_handle_cast(arg,
> +                                           xen_hvm_altp2m_op_t),
> +                                           &a, u.suppress_ve.opaque) )
> +                    rc = -EFAULT;
> +        }
> +        break;
> +
>       case HVMOP_altp2m_get_suppress_ve:
> -        if ( a.u.suppress_ve.pad1 || a.u.suppress_ve.pad2 )
> +        if ( a.u.suppress_ve.pad1 )
>               rc = -EINVAL;
>           else
>           {
> diff --git a/xen/arch/x86/mm/p2m.c b/xen/arch/x86/mm/p2m.c
> index e5e4349dea..9e1335065d 100644
> --- a/xen/arch/x86/mm/p2m.c
> +++ b/xen/arch/x86/mm/p2m.c
> @@ -3054,6 +3054,64 @@ out:
>       return rc;
>   }
>   
> +/*
> + * Set/clear the #VE suppress bit for multiple pages.  Only available on VMX.
> + */
> +int p2m_set_suppress_ve_multi(struct domain *d,
> +                              struct xen_hvm_altp2m_suppress_ve* sve)
> +{
> +    struct p2m_domain *host_p2m = p2m_get_hostp2m(d);
> +    struct p2m_domain *ap2m = NULL;
> +    struct p2m_domain *p2m;
> +    uint64_t start = sve->opaque ?: sve->gfn;
> +    int rc = 0;
> +
> +    if ( sve->view > 0 )
> +    {
> +        if ( sve->view >= MAX_ALTP2M ||
> +             d->arch.altp2m_eptp[sve->view] == mfn_x(INVALID_MFN) )
> +            return -EINVAL;
> +
> +        p2m = ap2m = d->arch.altp2m_p2m[sve->view];
> +    }
> +    else
> +        p2m = host_p2m;
> +
> +    p2m_lock(host_p2m);
> +
> +    if ( ap2m )
> +        p2m_lock(ap2m);
> +
> +
> +    while ( start < sve->nr )
> +    {
> +        p2m_access_t a;
> +        p2m_type_t t;
> +        mfn_t mfn;
> +
> +        if ( altp2m_get_effective_entry(p2m, _gfn(start), &mfn, &t, &a, AP2MGET_query) )
> +            a = p2m->default_access;
> +
> +        p2m->set_entry(p2m, _gfn(start), mfn, PAGE_ORDER_4K, t, a, sve->suppress_ve);
> +
> +        /* Check for continuation if it's not the last iteration. */
> +        if ( sve->nr > ++start && hypercall_preempt_check() )
> +        {
> +            rc = -ERESTART;
> +            break;
> +        }
> +    }
> +
> +    sve->opaque = start;
> +
> +    if ( ap2m )
> +        p2m_unlock(ap2m);
> +
> +    p2m_unlock(host_p2m);
> +
> +    return rc;
> +}
> +
>   int p2m_get_suppress_ve(struct domain *d, gfn_t gfn, bool *suppress_ve,
>                           unsigned int altp2m_idx)
>   {
> diff --git a/xen/include/public/hvm/hvm_op.h b/xen/include/public/hvm/hvm_op.h
> index 353f8034d9..9834ce0aea 100644
> --- a/xen/include/public/hvm/hvm_op.h
> +++ b/xen/include/public/hvm/hvm_op.h
> @@ -42,8 +42,9 @@ struct xen_hvm_altp2m_suppress_ve {
>       uint16_t view;
>       uint8_t suppress_ve; /* Boolean type. */
>       uint8_t pad1;
> -    uint32_t pad2;
> +    uint32_t nr;
>       uint64_t gfn;
> +    uint64_t opaque;
>   };
>   
>   #if __XEN_INTERFACE_VERSION__ < 0x00040900
> @@ -339,6 +340,8 @@ struct xen_hvm_altp2m_op {
>   #define HVMOP_altp2m_vcpu_disable_notify  13
>   /* Get the active vcpu p2m index */
>   #define HVMOP_altp2m_get_p2m_idx          14
> +/* Set the "Supress #VE" bit for a range of pages */
> +#define HVMOP_altp2m_set_suppress_ve_multi 15
>       domid_t domain;
>       uint16_t pad1;
>       uint32_t pad2;
> diff --git a/xen/include/xen/mem_access.h b/xen/include/xen/mem_access.h
> index e4d24502e0..ffecd2650e 100644
> --- a/xen/include/xen/mem_access.h
> +++ b/xen/include/xen/mem_access.h
> @@ -75,6 +75,9 @@ long p2m_set_mem_access_multi(struct domain *d,
>   int p2m_set_suppress_ve(struct domain *d, gfn_t gfn, bool suppress_ve,
>                           unsigned int altp2m_idx);
>   
> +int p2m_set_suppress_ve_multi(struct domain *d,
> +                              struct xen_hvm_altp2m_suppress_ve* suppress_ve);
> +
>   int p2m_get_suppress_ve(struct domain *d, gfn_t gfn, bool *suppress_ve,
>                           unsigned int altp2m_idx);
>   
>
Jan Beulich Nov. 12, 2019, 11:54 a.m. UTC | #5
On 06.11.2019 16:35, Alexandru Stefan ISAILA wrote:
> @@ -4681,7 +4682,7 @@ static int do_altp2m_op(
>          break;
>  
>      case HVMOP_altp2m_set_suppress_ve:
> -        if ( a.u.suppress_ve.pad1 || a.u.suppress_ve.pad2 )
> +        if ( a.u.suppress_ve.pad1 )

Just because the field changes its name doesn't mean you can
drop the check. You even add a new field not used (yet) by
this sub-function, which then also would need checking here.

> @@ -4693,8 +4694,23 @@ static int do_altp2m_op(
>          }
>          break;
>  
> +    case HVMOP_altp2m_set_suppress_ve_multi:
> +        if ( a.u.suppress_ve.pad1 || !a.u.suppress_ve.nr )

A count of zero typically is taken as a no-op, not an error.

> +            rc = -EINVAL;
> +        else
> +        {
> +            rc = p2m_set_suppress_ve_multi(d, &a.u.suppress_ve);
> +
> +            if ( rc == -ERESTART )
> +                if ( __copy_field_to_guest(guest_handle_cast(arg,
> +                                           xen_hvm_altp2m_op_t),
> +                                           &a, u.suppress_ve.opaque) )
> +                    rc = -EFAULT;

If the operation is best effort, _some_ indication of failure should
still be handed back to the caller. Whether that's through the opaque
field or by some other means is secondary. If not via that field
(which would make the outer of the two if()-s disappear), please fold
the if()-s.

> +        }
> +        break;
> +
>      case HVMOP_altp2m_get_suppress_ve:
> -        if ( a.u.suppress_ve.pad1 || a.u.suppress_ve.pad2 )
> +        if ( a.u.suppress_ve.pad1 )

See above.

> --- a/xen/arch/x86/mm/p2m.c
> +++ b/xen/arch/x86/mm/p2m.c
> @@ -3054,6 +3054,64 @@ out:
>      return rc;
>  }
>  
> +/*
> + * Set/clear the #VE suppress bit for multiple pages.  Only available on VMX.
> + */
> +int p2m_set_suppress_ve_multi(struct domain *d,
> +                              struct xen_hvm_altp2m_suppress_ve* sve)

Misplaced *.

> +{
> +    struct p2m_domain *host_p2m = p2m_get_hostp2m(d);
> +    struct p2m_domain *ap2m = NULL;
> +    struct p2m_domain *p2m;
> +    uint64_t start = sve->opaque ?: sve->gfn;

According to this start (and hence ->opaque) are GFNs.

> +    int rc = 0;
> +
> +    if ( sve->view > 0 )
> +    {
> +        if ( sve->view >= MAX_ALTP2M ||
> +             d->arch.altp2m_eptp[sve->view] == mfn_x(INVALID_MFN) )
> +            return -EINVAL;
> +
> +        p2m = ap2m = d->arch.altp2m_p2m[sve->view];
> +    }
> +    else
> +        p2m = host_p2m;
> +
> +    p2m_lock(host_p2m);
> +
> +    if ( ap2m )
> +        p2m_lock(ap2m);
> +
> +
> +    while ( start < sve->nr )

According to this, start is an index. Which of the two do you
mean?

> --- a/xen/include/public/hvm/hvm_op.h
> +++ b/xen/include/public/hvm/hvm_op.h
> @@ -42,8 +42,9 @@ struct xen_hvm_altp2m_suppress_ve {
>      uint16_t view;
>      uint8_t suppress_ve; /* Boolean type. */
>      uint8_t pad1;
> -    uint32_t pad2;
> +    uint32_t nr;
>      uint64_t gfn;
> +    uint64_t opaque;
>  };

How is this addition of a field going to work compatibly with old
and new callers on old and new hypervisors? Recall in particular
that these operations are (almost?) all potentially usable by the
guest itself.

Jan
Tamas K Lengyel Nov. 12, 2019, 2:05 p.m. UTC | #6
On Tue, Nov 12, 2019 at 4:54 AM Jan Beulich <jbeulich@suse.com> wrote:
>
> On 06.11.2019 16:35, Alexandru Stefan ISAILA wrote:
> > @@ -4681,7 +4682,7 @@ static int do_altp2m_op(
> >          break;
> >
> >      case HVMOP_altp2m_set_suppress_ve:
> > -        if ( a.u.suppress_ve.pad1 || a.u.suppress_ve.pad2 )
> > +        if ( a.u.suppress_ve.pad1 )
>
> Just because the field changes its name doesn't mean you can
> drop the check. You even add a new field not used (yet) by
> this sub-function, which then also would need checking here.
>
> > @@ -4693,8 +4694,23 @@ static int do_altp2m_op(
> >          }
> >          break;
> >
> > +    case HVMOP_altp2m_set_suppress_ve_multi:
> > +        if ( a.u.suppress_ve.pad1 || !a.u.suppress_ve.nr )
>
> A count of zero typically is taken as a no-op, not an error.
>
> > +            rc = -EINVAL;
> > +        else
> > +        {
> > +            rc = p2m_set_suppress_ve_multi(d, &a.u.suppress_ve);
> > +
> > +            if ( rc == -ERESTART )
> > +                if ( __copy_field_to_guest(guest_handle_cast(arg,
> > +                                           xen_hvm_altp2m_op_t),
> > +                                           &a, u.suppress_ve.opaque) )
> > +                    rc = -EFAULT;
>
> If the operation is best effort, _some_ indication of failure should
> still be handed back to the caller. Whether that's through the opaque
> field or by some other means is secondary. If not via that field
> (which would make the outer of the two if()-s disappear), please fold
> the if()-s.

At least for mem_sharing_range_op we also do a best-effort and don't
return an error for pages where it wasn't possible to share. So I
don't think it's absolutely necessary to do that, especially if the
caller can't do anything about those errors anyway.

Tamas
Jan Beulich Nov. 12, 2019, 2:31 p.m. UTC | #7
On 12.11.2019 15:05, Tamas K Lengyel wrote:
> On Tue, Nov 12, 2019 at 4:54 AM Jan Beulich <jbeulich@suse.com> wrote:
>> On 06.11.2019 16:35, Alexandru Stefan ISAILA wrote:
>>> +        else
>>> +        {
>>> +            rc = p2m_set_suppress_ve_multi(d, &a.u.suppress_ve);
>>> +
>>> +            if ( rc == -ERESTART )
>>> +                if ( __copy_field_to_guest(guest_handle_cast(arg,
>>> +                                           xen_hvm_altp2m_op_t),
>>> +                                           &a, u.suppress_ve.opaque) )
>>> +                    rc = -EFAULT;
>>
>> If the operation is best effort, _some_ indication of failure should
>> still be handed back to the caller. Whether that's through the opaque
>> field or by some other means is secondary. If not via that field
>> (which would make the outer of the two if()-s disappear), please fold
>> the if()-s.
> 
> At least for mem_sharing_range_op we also do a best-effort and don't
> return an error for pages where it wasn't possible to share. So I
> don't think it's absolutely necessary to do that, especially if the
> caller can't do anything about those errors anyway.

mem-sharing is a little different in nature, isn't it? If you
can't share a page, both involved guests will continue to run
with their own instances. If you want to suppress #VE delivery
and it fails, behavior won't be transparently correct, as
there'll potentially be #VE when there should be none. Whether
that's benign to the guest very much depends on its handler.

Jan
Tamas K Lengyel Nov. 13, 2019, 2:51 p.m. UTC | #8
On Tue, Nov 12, 2019 at 7:31 AM Jan Beulich <jbeulich@suse.com> wrote:
>
> On 12.11.2019 15:05, Tamas K Lengyel wrote:
> > On Tue, Nov 12, 2019 at 4:54 AM Jan Beulich <jbeulich@suse.com> wrote:
> >> On 06.11.2019 16:35, Alexandru Stefan ISAILA wrote:
> >>> +        else
> >>> +        {
> >>> +            rc = p2m_set_suppress_ve_multi(d, &a.u.suppress_ve);
> >>> +
> >>> +            if ( rc == -ERESTART )
> >>> +                if ( __copy_field_to_guest(guest_handle_cast(arg,
> >>> +                                           xen_hvm_altp2m_op_t),
> >>> +                                           &a, u.suppress_ve.opaque) )
> >>> +                    rc = -EFAULT;
> >>
> >> If the operation is best effort, _some_ indication of failure should
> >> still be handed back to the caller. Whether that's through the opaque
> >> field or by some other means is secondary. If not via that field
> >> (which would make the outer of the two if()-s disappear), please fold
> >> the if()-s.
> >
> > At least for mem_sharing_range_op we also do a best-effort and don't
> > return an error for pages where it wasn't possible to share. So I
> > don't think it's absolutely necessary to do that, especially if the
> > caller can't do anything about those errors anyway.
>
> mem-sharing is a little different in nature, isn't it? If you
> can't share a page, both involved guests will continue to run
> with their own instances. If you want to suppress #VE delivery
> and it fails, behavior won't be transparently correct, as
> there'll potentially be #VE when there should be none. Whether
> that's benign to the guest very much depends on its handler.

Makes me wonder whether it would make more sense to flip this thing on
its head and have supress_ve be set by default (since its ignored by
default) and then have pages for which the EPT violation should be
convertible to #VE be specifically enabled by turning suppress_ve off.
That would eliminate the possibility of having the in-guest handler
getting #VE for pages it is not ready to handle. The hypervisor (and
the external VMI toolstack) OTOH should always be in a position to
handle EPT violations it itself causes by changing the page
permissions.

Tamas
Tamas K Lengyel Nov. 13, 2019, 2:57 p.m. UTC | #9
On Wed, Nov 13, 2019 at 7:51 AM Tamas K Lengyel <tamas@tklengyel.com> wrote:
>
> On Tue, Nov 12, 2019 at 7:31 AM Jan Beulich <jbeulich@suse.com> wrote:
> >
> > On 12.11.2019 15:05, Tamas K Lengyel wrote:
> > > On Tue, Nov 12, 2019 at 4:54 AM Jan Beulich <jbeulich@suse.com> wrote:
> > >> On 06.11.2019 16:35, Alexandru Stefan ISAILA wrote:
> > >>> +        else
> > >>> +        {
> > >>> +            rc = p2m_set_suppress_ve_multi(d, &a.u.suppress_ve);
> > >>> +
> > >>> +            if ( rc == -ERESTART )
> > >>> +                if ( __copy_field_to_guest(guest_handle_cast(arg,
> > >>> +                                           xen_hvm_altp2m_op_t),
> > >>> +                                           &a, u.suppress_ve.opaque) )
> > >>> +                    rc = -EFAULT;
> > >>
> > >> If the operation is best effort, _some_ indication of failure should
> > >> still be handed back to the caller. Whether that's through the opaque
> > >> field or by some other means is secondary. If not via that field
> > >> (which would make the outer of the two if()-s disappear), please fold
> > >> the if()-s.
> > >
> > > At least for mem_sharing_range_op we also do a best-effort and don't
> > > return an error for pages where it wasn't possible to share. So I
> > > don't think it's absolutely necessary to do that, especially if the
> > > caller can't do anything about those errors anyway.
> >
> > mem-sharing is a little different in nature, isn't it? If you
> > can't share a page, both involved guests will continue to run
> > with their own instances. If you want to suppress #VE delivery
> > and it fails, behavior won't be transparently correct, as
> > there'll potentially be #VE when there should be none. Whether
> > that's benign to the guest very much depends on its handler.
>
> Makes me wonder whether it would make more sense to flip this thing on
> its head and have supress_ve be set by default (since its ignored by
> default) and then have pages for which the EPT violation should be
> convertible to #VE be specifically enabled by turning suppress_ve off.
> That would eliminate the possibility of having the in-guest handler
> getting #VE for pages it is not ready to handle. The hypervisor (and
> the external VMI toolstack) OTOH should always be in a position to
> handle EPT violations it itself causes by changing the page
> permissions.

Actually, now that I looked at it, that's _exactly_ what we do
already. The suppress_ve bit is always set for all EPT pages. So this
operation here is going to be used to enable #VE for pages, not the
other way around. So there wouldn't be a case of "potentially be #VE
when there should be none".

Tamas
Jan Beulich Nov. 13, 2019, 4:52 p.m. UTC | #10
On 13.11.2019 15:57, Tamas K Lengyel wrote:
> On Wed, Nov 13, 2019 at 7:51 AM Tamas K Lengyel <tamas@tklengyel.com> wrote:
>>
>> On Tue, Nov 12, 2019 at 7:31 AM Jan Beulich <jbeulich@suse.com> wrote:
>>>
>>> On 12.11.2019 15:05, Tamas K Lengyel wrote:
>>>> On Tue, Nov 12, 2019 at 4:54 AM Jan Beulich <jbeulich@suse.com> wrote:
>>>>> On 06.11.2019 16:35, Alexandru Stefan ISAILA wrote:
>>>>>> +        else
>>>>>> +        {
>>>>>> +            rc = p2m_set_suppress_ve_multi(d, &a.u.suppress_ve);
>>>>>> +
>>>>>> +            if ( rc == -ERESTART )
>>>>>> +                if ( __copy_field_to_guest(guest_handle_cast(arg,
>>>>>> +                                           xen_hvm_altp2m_op_t),
>>>>>> +                                           &a, u.suppress_ve.opaque) )
>>>>>> +                    rc = -EFAULT;
>>>>>
>>>>> If the operation is best effort, _some_ indication of failure should
>>>>> still be handed back to the caller. Whether that's through the opaque
>>>>> field or by some other means is secondary. If not via that field
>>>>> (which would make the outer of the two if()-s disappear), please fold
>>>>> the if()-s.
>>>>
>>>> At least for mem_sharing_range_op we also do a best-effort and don't
>>>> return an error for pages where it wasn't possible to share. So I
>>>> don't think it's absolutely necessary to do that, especially if the
>>>> caller can't do anything about those errors anyway.
>>>
>>> mem-sharing is a little different in nature, isn't it? If you
>>> can't share a page, both involved guests will continue to run
>>> with their own instances. If you want to suppress #VE delivery
>>> and it fails, behavior won't be transparently correct, as
>>> there'll potentially be #VE when there should be none. Whether
>>> that's benign to the guest very much depends on its handler.
>>
>> Makes me wonder whether it would make more sense to flip this thing on
>> its head and have supress_ve be set by default (since its ignored by
>> default) and then have pages for which the EPT violation should be
>> convertible to #VE be specifically enabled by turning suppress_ve off.
>> That would eliminate the possibility of having the in-guest handler
>> getting #VE for pages it is not ready to handle. The hypervisor (and
>> the external VMI toolstack) OTOH should always be in a position to
>> handle EPT violations it itself causes by changing the page
>> permissions.
> 
> Actually, now that I looked at it, that's _exactly_ what we do
> already. The suppress_ve bit is always set for all EPT pages. So this
> operation here is going to be used to enable #VE for pages, not the
> other way around. So there wouldn't be a case of "potentially be #VE
> when there should be none".

But this doesn't change the bottom line of my earlier comment: It's
as bad to an OS to see too many #VE as it is to miss any that are
expected.

Jan
Tamas K Lengyel Nov. 13, 2019, 6:38 p.m. UTC | #11
On Wed, Nov 13, 2019 at 9:52 AM Jan Beulich <jbeulich@suse.com> wrote:
>
> On 13.11.2019 15:57, Tamas K Lengyel wrote:
> > On Wed, Nov 13, 2019 at 7:51 AM Tamas K Lengyel <tamas@tklengyel.com> wrote:
> >>
> >> On Tue, Nov 12, 2019 at 7:31 AM Jan Beulich <jbeulich@suse.com> wrote:
> >>>
> >>> On 12.11.2019 15:05, Tamas K Lengyel wrote:
> >>>> On Tue, Nov 12, 2019 at 4:54 AM Jan Beulich <jbeulich@suse.com> wrote:
> >>>>> On 06.11.2019 16:35, Alexandru Stefan ISAILA wrote:
> >>>>>> +        else
> >>>>>> +        {
> >>>>>> +            rc = p2m_set_suppress_ve_multi(d, &a.u.suppress_ve);
> >>>>>> +
> >>>>>> +            if ( rc == -ERESTART )
> >>>>>> +                if ( __copy_field_to_guest(guest_handle_cast(arg,
> >>>>>> +                                           xen_hvm_altp2m_op_t),
> >>>>>> +                                           &a, u.suppress_ve.opaque) )
> >>>>>> +                    rc = -EFAULT;
> >>>>>
> >>>>> If the operation is best effort, _some_ indication of failure should
> >>>>> still be handed back to the caller. Whether that's through the opaque
> >>>>> field or by some other means is secondary. If not via that field
> >>>>> (which would make the outer of the two if()-s disappear), please fold
> >>>>> the if()-s.
> >>>>
> >>>> At least for mem_sharing_range_op we also do a best-effort and don't
> >>>> return an error for pages where it wasn't possible to share. So I
> >>>> don't think it's absolutely necessary to do that, especially if the
> >>>> caller can't do anything about those errors anyway.
> >>>
> >>> mem-sharing is a little different in nature, isn't it? If you
> >>> can't share a page, both involved guests will continue to run
> >>> with their own instances. If you want to suppress #VE delivery
> >>> and it fails, behavior won't be transparently correct, as
> >>> there'll potentially be #VE when there should be none. Whether
> >>> that's benign to the guest very much depends on its handler.
> >>
> >> Makes me wonder whether it would make more sense to flip this thing on
> >> its head and have supress_ve be set by default (since its ignored by
> >> default) and then have pages for which the EPT violation should be
> >> convertible to #VE be specifically enabled by turning suppress_ve off.
> >> That would eliminate the possibility of having the in-guest handler
> >> getting #VE for pages it is not ready to handle. The hypervisor (and
> >> the external VMI toolstack) OTOH should always be in a position to
> >> handle EPT violations it itself causes by changing the page
> >> permissions.
> >
> > Actually, now that I looked at it, that's _exactly_ what we do
> > already. The suppress_ve bit is always set for all EPT pages. So this
> > operation here is going to be used to enable #VE for pages, not the
> > other way around. So there wouldn't be a case of "potentially be #VE
> > when there should be none".
>
> But this doesn't change the bottom line of my earlier comment: It's
> as bad to an OS to see too many #VE as it is to miss any that are
> expected.

Fair enough.

Tamas
Alexandru Stefan ISAILA Nov. 18, 2019, 1:39 p.m. UTC | #12
On 12.11.2019 13:54, Jan Beulich wrote:
> On 06.11.2019 16:35, Alexandru Stefan ISAILA wrote:
>> @@ -4681,7 +4682,7 @@ static int do_altp2m_op(
>>           break;
>>   
>>       case HVMOP_altp2m_set_suppress_ve:
>> -        if ( a.u.suppress_ve.pad1 || a.u.suppress_ve.pad2 )
>> +        if ( a.u.suppress_ve.pad1 )
> 
> Just because the field changes its name doesn't mean you can
> drop the check. You even add a new field not used (yet) by
> this sub-function, which then also would need checking here.

I will revert the change and check the new field.

> 
>> @@ -4693,8 +4694,23 @@ static int do_altp2m_op(
>>           }
>>           break;
>>   
>> +    case HVMOP_altp2m_set_suppress_ve_multi:
>> +        if ( a.u.suppress_ve.pad1 || !a.u.suppress_ve.nr )
> 
> A count of zero typically is taken as a no-op, not an error.

I will return -EPERM for !nr.

> 
>> +            rc = -EINVAL;
>> +        else
>> +        {
>> +            rc = p2m_set_suppress_ve_multi(d, &a.u.suppress_ve);
>> +
>> +            if ( rc == -ERESTART )
>> +                if ( __copy_field_to_guest(guest_handle_cast(arg,
>> +                                           xen_hvm_altp2m_op_t),
>> +                                           &a, u.suppress_ve.opaque) )
>> +                    rc = -EFAULT;
> 
> If the operation is best effort, _some_ indication of failure should
> still be handed back to the caller. Whether that's through the opaque
> field or by some other means is secondary. If not via that field
> (which would make the outer of the two if()-s disappear), please fold
> the if()-s.

This can be solved by having a int error_list that will get 
"copy_to_guest_offset()" at the end.

> 
>> --- a/xen/arch/x86/mm/p2m.c
>> +++ b/xen/arch/x86/mm/p2m.c
>> @@ -3054,6 +3054,64 @@ out:
>>       return rc;
>>   }
>>   
>> +/*
>> + * Set/clear the #VE suppress bit for multiple pages.  Only available on VMX.
>> + */
>> +int p2m_set_suppress_ve_multi(struct domain *d,
>> +                              struct xen_hvm_altp2m_suppress_ve* sve)
> 
> Misplaced *.

I've missed that, I'll have it the right way.

> 
>> +{
>> +    struct p2m_domain *host_p2m = p2m_get_hostp2m(d);
>> +    struct p2m_domain *ap2m = NULL;
>> +    struct p2m_domain *p2m;
>> +    uint64_t start = sve->opaque ?: sve->gfn;
> 
> According to this start (and hence ->opaque) are GFNs.
> 
>> +    int rc = 0;
>> +
>> +    if ( sve->view > 0 )
>> +    {
>> +        if ( sve->view >= MAX_ALTP2M ||
>> +             d->arch.altp2m_eptp[sve->view] == mfn_x(INVALID_MFN) )
>> +            return -EINVAL;
>> +
>> +        p2m = ap2m = d->arch.altp2m_p2m[sve->view];
>> +    }
>> +    else
>> +        p2m = host_p2m;
>> +
>> +    p2m_lock(host_p2m);
>> +
>> +    if ( ap2m )
>> +        p2m_lock(ap2m);
>> +
>> +
>> +    while ( start < sve->nr )
> 
> According to this, start is an index. Which of the two do you
> mean?

Start is a GFN.

> 
>> --- a/xen/include/public/hvm/hvm_op.h
>> +++ b/xen/include/public/hvm/hvm_op.h
>> @@ -42,8 +42,9 @@ struct xen_hvm_altp2m_suppress_ve {
>>       uint16_t view;
>>       uint8_t suppress_ve; /* Boolean type. */
>>       uint8_t pad1;
>> -    uint32_t pad2;
>> +    uint32_t nr;
>>       uint64_t gfn;
>> +    uint64_t opaque;
>>   };
> 
> How is this addition of a field going to work compatibly with old
> and new callers on old and new hypervisors? Recall in particular
> that these operations are (almost?) all potentially usable by the
> guest itself.
> 

For this HVMOP_ALTP2M_INTERFACE_VERSION shout be increased. I will leave 
it to Tamas to decide if we will need a different structure for 
xen_hvm_altp2m_suppress_ve_multi to keep the compatibility.

Regards,
Alex
Alexandru Stefan ISAILA Nov. 18, 2019, 1:39 p.m. UTC | #13
On 12.11.2019 13:54, Jan Beulich wrote:
> On 06.11.2019 16:35, Alexandru Stefan ISAILA wrote:
>> @@ -4681,7 +4682,7 @@ static int do_altp2m_op(
>>           break;
>>   
>>       case HVMOP_altp2m_set_suppress_ve:
>> -        if ( a.u.suppress_ve.pad1 || a.u.suppress_ve.pad2 )
>> +        if ( a.u.suppress_ve.pad1 )
> 
> Just because the field changes its name doesn't mean you can
> drop the check. You even add a new field not used (yet) by
> this sub-function, which then also would need checking here.

I will revert the change and check the new field.

> 
>> @@ -4693,8 +4694,23 @@ static int do_altp2m_op(
>>           }
>>           break;
>>   
>> +    case HVMOP_altp2m_set_suppress_ve_multi:
>> +        if ( a.u.suppress_ve.pad1 || !a.u.suppress_ve.nr )
> 
> A count of zero typically is taken as a no-op, not an error.

I will return -EPERM for !nr.

> 
>> +            rc = -EINVAL;
>> +        else
>> +        {
>> +            rc = p2m_set_suppress_ve_multi(d, &a.u.suppress_ve);
>> +
>> +            if ( rc == -ERESTART )
>> +                if ( __copy_field_to_guest(guest_handle_cast(arg,
>> +                                           xen_hvm_altp2m_op_t),
>> +                                           &a, u.suppress_ve.opaque) )
>> +                    rc = -EFAULT;
> 
> If the operation is best effort, _some_ indication of failure should
> still be handed back to the caller. Whether that's through the opaque
> field or by some other means is secondary. If not via that field
> (which would make the outer of the two if()-s disappear), please fold
> the if()-s.

This can be solved by having a int error_list that will get 
"copy_to_guest_offset()" at the end.

> 
>> --- a/xen/arch/x86/mm/p2m.c
>> +++ b/xen/arch/x86/mm/p2m.c
>> @@ -3054,6 +3054,64 @@ out:
>>       return rc;
>>   }
>>   
>> +/*
>> + * Set/clear the #VE suppress bit for multiple pages.  Only available on VMX.
>> + */
>> +int p2m_set_suppress_ve_multi(struct domain *d,
>> +                              struct xen_hvm_altp2m_suppress_ve* sve)
> 
> Misplaced *.

I've missed that, I'll have it the right way.

> 
>> +{
>> +    struct p2m_domain *host_p2m = p2m_get_hostp2m(d);
>> +    struct p2m_domain *ap2m = NULL;
>> +    struct p2m_domain *p2m;
>> +    uint64_t start = sve->opaque ?: sve->gfn;
> 
> According to this start (and hence ->opaque) are GFNs.
> 
>> +    int rc = 0;
>> +
>> +    if ( sve->view > 0 )
>> +    {
>> +        if ( sve->view >= MAX_ALTP2M ||
>> +             d->arch.altp2m_eptp[sve->view] == mfn_x(INVALID_MFN) )
>> +            return -EINVAL;
>> +
>> +        p2m = ap2m = d->arch.altp2m_p2m[sve->view];
>> +    }
>> +    else
>> +        p2m = host_p2m;
>> +
>> +    p2m_lock(host_p2m);
>> +
>> +    if ( ap2m )
>> +        p2m_lock(ap2m);
>> +
>> +
>> +    while ( start < sve->nr )
> 
> According to this, start is an index. Which of the two do you
> mean?

Start is a GFN.

> 
>> --- a/xen/include/public/hvm/hvm_op.h
>> +++ b/xen/include/public/hvm/hvm_op.h
>> @@ -42,8 +42,9 @@ struct xen_hvm_altp2m_suppress_ve {
>>       uint16_t view;
>>       uint8_t suppress_ve; /* Boolean type. */
>>       uint8_t pad1;
>> -    uint32_t pad2;
>> +    uint32_t nr;
>>       uint64_t gfn;
>> +    uint64_t opaque;
>>   };
> 
> How is this addition of a field going to work compatibly with old
> and new callers on old and new hypervisors? Recall in particular
> that these operations are (almost?) all potentially usable by the
> guest itself.
> 

For this HVMOP_ALTP2M_INTERFACE_VERSION shout be increased. I will leave 
it to Tamas to decide if we will need a different structure for 
xen_hvm_altp2m_suppress_ve_multi to keep the compatibility.

Regards,
Alex
Jan Beulich Nov. 18, 2019, 2:09 p.m. UTC | #14
On 18.11.2019 14:39, Alexandru Stefan ISAILA wrote:
> On 12.11.2019 13:54, Jan Beulich wrote:
>> On 06.11.2019 16:35, Alexandru Stefan ISAILA wrote:
>>> @@ -4693,8 +4694,23 @@ static int do_altp2m_op(
>>>           }
>>>           break;
>>>   
>>> +    case HVMOP_altp2m_set_suppress_ve_multi:
>>> +        if ( a.u.suppress_ve.pad1 || !a.u.suppress_ve.nr )
>>
>> A count of zero typically is taken as a no-op, not an error.
> 
> I will return -EPERM for !nr.

How is -EPERM better than ...

>>> +            rc = -EINVAL;

... this, and hence how is it addressing my remark?

>>> +        else
>>> +        {
>>> +            rc = p2m_set_suppress_ve_multi(d, &a.u.suppress_ve);
>>> +
>>> +            if ( rc == -ERESTART )
>>> +                if ( __copy_field_to_guest(guest_handle_cast(arg,
>>> +                                           xen_hvm_altp2m_op_t),
>>> +                                           &a, u.suppress_ve.opaque) )
>>> +                    rc = -EFAULT;
>>
>> If the operation is best effort, _some_ indication of failure should
>> still be handed back to the caller. Whether that's through the opaque
>> field or by some other means is secondary. If not via that field
>> (which would make the outer of the two if()-s disappear), please fold
>> the if()-s.
> 
> This can be solved by having a int error_list that will get 
> "copy_to_guest_offset()" at the end.

I was actually not meaning to suggest to go _that_ far, but I
wouldn't mind such a full solution. Since there's a "get"
counterpart, I was rather thinking that an indication of "there
was _some_ error" might suffice, suggesting to the caller to
inspect which settings actually took effect. Such an indication
could e.g. be an index value identifying the first failed
operation.

>>> --- a/xen/include/public/hvm/hvm_op.h
>>> +++ b/xen/include/public/hvm/hvm_op.h
>>> @@ -42,8 +42,9 @@ struct xen_hvm_altp2m_suppress_ve {
>>>       uint16_t view;
>>>       uint8_t suppress_ve; /* Boolean type. */
>>>       uint8_t pad1;
>>> -    uint32_t pad2;
>>> +    uint32_t nr;
>>>       uint64_t gfn;
>>> +    uint64_t opaque;
>>>   };
>>
>> How is this addition of a field going to work compatibly with old
>> and new callers on old and new hypervisors? Recall in particular
>> that these operations are (almost?) all potentially usable by the
>> guest itself.
> 
> For this HVMOP_ALTP2M_INTERFACE_VERSION shout be increased. I will leave 
> it to Tamas to decide if we will need a different structure for 
> xen_hvm_altp2m_suppress_ve_multi to keep the compatibility.

Wasn't is that due to the possible guest exposure it was decided
that the interface version approach was not suitable here, and hence
it shouldn't be bumped any further?

Jan
Alexandru Stefan ISAILA Nov. 19, 2019, 9:05 a.m. UTC | #15
On 18.11.2019 16:09, Jan Beulich wrote:
> On 18.11.2019 14:39, Alexandru Stefan ISAILA wrote:
>> On 12.11.2019 13:54, Jan Beulich wrote:
>>> On 06.11.2019 16:35, Alexandru Stefan ISAILA wrote:
>>>> @@ -4693,8 +4694,23 @@ static int do_altp2m_op(
>>>>            }
>>>>            break;
>>>>    
>>>> +    case HVMOP_altp2m_set_suppress_ve_multi:
>>>> +        if ( a.u.suppress_ve.pad1 || !a.u.suppress_ve.nr )
>>>
>>> A count of zero typically is taken as a no-op, not an error.
>>
>> I will return -EPERM for !nr.
> 
> How is -EPERM better than ...
> 
>>>> +            rc = -EINVAL;
> 
> ... this, and hence how is it addressing my remark?
> 
>>>> +        else
>>>> +        {
>>>> +            rc = p2m_set_suppress_ve_multi(d, &a.u.suppress_ve);
>>>> +
>>>> +            if ( rc == -ERESTART )
>>>> +                if ( __copy_field_to_guest(guest_handle_cast(arg,
>>>> +                                           xen_hvm_altp2m_op_t),
>>>> +                                           &a, u.suppress_ve.opaque) )
>>>> +                    rc = -EFAULT;
>>>
>>> If the operation is best effort, _some_ indication of failure should
>>> still be handed back to the caller. Whether that's through the opaque
>>> field or by some other means is secondary. If not via that field
>>> (which would make the outer of the two if()-s disappear), please fold
>>> the if()-s.
>>
>> This can be solved by having a int error_list that will get
>> "copy_to_guest_offset()" at the end.
> 
> I was actually not meaning to suggest to go _that_ far, but I
> wouldn't mind such a full solution. Since there's a "get"
> counterpart, I was rather thinking that an indication of "there
> was _some_ error" might suffice, suggesting to the caller to
> inspect which settings actually took effect. Such an indication
> could e.g. be an index value identifying the first failed
> operation.

This sound good, I can use the return for this or have a separate field 
in the structure.

> 
>>>> --- a/xen/include/public/hvm/hvm_op.h
>>>> +++ b/xen/include/public/hvm/hvm_op.h
>>>> @@ -42,8 +42,9 @@ struct xen_hvm_altp2m_suppress_ve {
>>>>        uint16_t view;
>>>>        uint8_t suppress_ve; /* Boolean type. */
>>>>        uint8_t pad1;
>>>> -    uint32_t pad2;
>>>> +    uint32_t nr;
>>>>        uint64_t gfn;
>>>> +    uint64_t opaque;
>>>>    };
>>>
>>> How is this addition of a field going to work compatibly with old
>>> and new callers on old and new hypervisors? Recall in particular
>>> that these operations are (almost?) all potentially usable by the
>>> guest itself.
>>
>> For this HVMOP_ALTP2M_INTERFACE_VERSION shout be increased. I will leave
>> it to Tamas to decide if we will need a different structure for
>> xen_hvm_altp2m_suppress_ve_multi to keep the compatibility.
> 
> Wasn't is that due to the possible guest exposure it was decided
> that the interface version approach was not suitable here, and hence
> it shouldn't be bumped any further?
> 

That is correct but there was also requested to add the new opaque field 
so I don't know how to have that an still keep the same version.

Alex
Jan Beulich Nov. 19, 2019, 9:23 a.m. UTC | #16
On 19.11.2019 10:05, Alexandru Stefan ISAILA wrote:
> On 18.11.2019 16:09, Jan Beulich wrote:
>> On 18.11.2019 14:39, Alexandru Stefan ISAILA wrote:
>>> For this HVMOP_ALTP2M_INTERFACE_VERSION shout be increased. I will leave
>>> it to Tamas to decide if we will need a different structure for
>>> xen_hvm_altp2m_suppress_ve_multi to keep the compatibility.
>>
>> Wasn't is that due to the possible guest exposure it was decided
>> that the interface version approach was not suitable here, and hence
>> it shouldn't be bumped any further?
>>
> 
> That is correct but there was also requested to add the new opaque field 
> so I don't know how to have that an still keep the same version.

New sub-op?

Jan
Alexandru Stefan ISAILA Nov. 20, 2019, 8:29 a.m. UTC | #17
On 19.11.2019 11:23, Jan Beulich wrote:
> On 19.11.2019 10:05, Alexandru Stefan ISAILA wrote:
>> On 18.11.2019 16:09, Jan Beulich wrote:
>>> On 18.11.2019 14:39, Alexandru Stefan ISAILA wrote:
>>>> For this HVMOP_ALTP2M_INTERFACE_VERSION shout be increased. I will leave
>>>> it to Tamas to decide if we will need a different structure for
>>>> xen_hvm_altp2m_suppress_ve_multi to keep the compatibility.
>>>
>>> Wasn't is that due to the possible guest exposure it was decided
>>> that the interface version approach was not suitable here, and hence
>>> it shouldn't be bumped any further?
>>>
>>
>> That is correct but there was also requested to add the new opaque field
>> so I don't know how to have that an still keep the same version.
> 
> New sub-op?
> 

Wouldn't it be simpler to have a new structure to use for this, 
something like "struct xen_hvm_altp2m_suppress_ve_multi" and then the 
version will be unchanged

Alex
Jan Beulich Nov. 20, 2019, 8:41 a.m. UTC | #18
On 20.11.2019 09:29, Alexandru Stefan ISAILA wrote:
> On 19.11.2019 11:23, Jan Beulich wrote:
>> On 19.11.2019 10:05, Alexandru Stefan ISAILA wrote:
>>> On 18.11.2019 16:09, Jan Beulich wrote:
>>>> On 18.11.2019 14:39, Alexandru Stefan ISAILA wrote:
>>>>> For this HVMOP_ALTP2M_INTERFACE_VERSION shout be increased. I will leave
>>>>> it to Tamas to decide if we will need a different structure for
>>>>> xen_hvm_altp2m_suppress_ve_multi to keep the compatibility.
>>>>
>>>> Wasn't is that due to the possible guest exposure it was decided
>>>> that the interface version approach was not suitable here, and hence
>>>> it shouldn't be bumped any further?
>>>>
>>>
>>> That is correct but there was also requested to add the new opaque field
>>> so I don't know how to have that an still keep the same version.
>>
>> New sub-op?
> 
> Wouldn't it be simpler to have a new structure to use for this, 
> something like "struct xen_hvm_altp2m_suppress_ve_multi" and then the 
> version will be unchanged

Well, if the original sub-op is to remain entirely untouched,
then yes, sure. I have to admit that in the course of the
discussion it became decreasingly clear whether the original
sub-op also wanted some for of adjustment.

Jan
Alexandru Stefan ISAILA Nov. 20, 2019, 8:48 a.m. UTC | #19
On 20.11.2019 10:41, Jan Beulich wrote:
> On 20.11.2019 09:29, Alexandru Stefan ISAILA wrote:
>> On 19.11.2019 11:23, Jan Beulich wrote:
>>> On 19.11.2019 10:05, Alexandru Stefan ISAILA wrote:
>>>> On 18.11.2019 16:09, Jan Beulich wrote:
>>>>> On 18.11.2019 14:39, Alexandru Stefan ISAILA wrote:
>>>>>> For this HVMOP_ALTP2M_INTERFACE_VERSION shout be increased. I will leave
>>>>>> it to Tamas to decide if we will need a different structure for
>>>>>> xen_hvm_altp2m_suppress_ve_multi to keep the compatibility.
>>>>>
>>>>> Wasn't is that due to the possible guest exposure it was decided
>>>>> that the interface version approach was not suitable here, and hence
>>>>> it shouldn't be bumped any further?
>>>>>
>>>>
>>>> That is correct but there was also requested to add the new opaque field
>>>> so I don't know how to have that an still keep the same version.
>>>
>>> New sub-op?
>>
>> Wouldn't it be simpler to have a new structure to use for this,
>> something like "struct xen_hvm_altp2m_suppress_ve_multi" and then the
>> version will be unchanged
> 
> Well, if the original sub-op is to remain entirely untouched,
> then yes, sure. I have to admit that in the course of the
> discussion it became decreasingly clear whether the original
> sub-op also wanted some for of adjustment.
> 

I agree with the clearness here. So the original sub-op will remain 
untouched.

Thanks,
Alex
diff mbox series

Patch

diff --git a/tools/libxc/include/xenctrl.h b/tools/libxc/include/xenctrl.h
index f4431687b3..21b644f459 100644
--- a/tools/libxc/include/xenctrl.h
+++ b/tools/libxc/include/xenctrl.h
@@ -1923,6 +1923,9 @@  int xc_altp2m_switch_to_view(xc_interface *handle, uint32_t domid,
                              uint16_t view_id);
 int xc_altp2m_set_suppress_ve(xc_interface *handle, uint32_t domid,
                               uint16_t view_id, xen_pfn_t gfn, bool sve);
+int xc_altp2m_set_supress_ve_multi(xc_interface *handle, uint32_t domid,
+                                   uint16_t view_id, xen_pfn_t start_gfn,
+                                   uint32_t nr, bool sve);
 int xc_altp2m_get_suppress_ve(xc_interface *handle, uint32_t domid,
                               uint16_t view_id, xen_pfn_t gfn, bool *sve);
 int xc_altp2m_set_mem_access(xc_interface *handle, uint32_t domid,
diff --git a/tools/libxc/xc_altp2m.c b/tools/libxc/xc_altp2m.c
index 09dad0355e..6605d9abbe 100644
--- a/tools/libxc/xc_altp2m.c
+++ b/tools/libxc/xc_altp2m.c
@@ -234,6 +234,31 @@  int xc_altp2m_set_suppress_ve(xc_interface *handle, uint32_t domid,
     return rc;
 }
 
+int xc_altp2m_set_supress_ve_multi(xc_interface *handle, uint32_t domid,
+                                   uint16_t view_id, xen_pfn_t start_gfn,
+                                   uint32_t nr, bool sve)
+{
+    int rc;
+    DECLARE_HYPERCALL_BUFFER(xen_hvm_altp2m_op_t, arg);
+
+    arg = xc_hypercall_buffer_alloc(handle, arg, sizeof(*arg));
+    if ( arg == NULL )
+        return -1;
+
+    arg->version = HVMOP_ALTP2M_INTERFACE_VERSION;
+    arg->cmd = HVMOP_altp2m_set_suppress_ve_multi;
+    arg->domain = domid;
+    arg->u.suppress_ve.view = view_id;
+    arg->u.suppress_ve.gfn = start_gfn;
+    arg->u.suppress_ve.suppress_ve = sve;
+    arg->u.suppress_ve.nr = nr;
+
+    rc = xencall2(handle->xcall, __HYPERVISOR_hvm_op, HVMOP_altp2m,
+                  HYPERCALL_BUFFER_AS_ARG(arg));
+    xc_hypercall_buffer_free(handle, arg);
+    return rc;
+}
+
 int xc_altp2m_set_mem_access(xc_interface *handle, uint32_t domid,
                              uint16_t view_id, xen_pfn_t gfn,
                              xenmem_access_t access)
diff --git a/xen/arch/x86/hvm/hvm.c b/xen/arch/x86/hvm/hvm.c
index 06a7b40107..66ed8b8e3e 100644
--- a/xen/arch/x86/hvm/hvm.c
+++ b/xen/arch/x86/hvm/hvm.c
@@ -4535,6 +4535,7 @@  static int do_altp2m_op(
     case HVMOP_altp2m_destroy_p2m:
     case HVMOP_altp2m_switch_p2m:
     case HVMOP_altp2m_set_suppress_ve:
+    case HVMOP_altp2m_set_suppress_ve_multi:
     case HVMOP_altp2m_get_suppress_ve:
     case HVMOP_altp2m_set_mem_access:
     case HVMOP_altp2m_set_mem_access_multi:
@@ -4681,7 +4682,7 @@  static int do_altp2m_op(
         break;
 
     case HVMOP_altp2m_set_suppress_ve:
-        if ( a.u.suppress_ve.pad1 || a.u.suppress_ve.pad2 )
+        if ( a.u.suppress_ve.pad1 )
             rc = -EINVAL;
         else
         {
@@ -4693,8 +4694,23 @@  static int do_altp2m_op(
         }
         break;
 
+    case HVMOP_altp2m_set_suppress_ve_multi:
+        if ( a.u.suppress_ve.pad1 || !a.u.suppress_ve.nr )
+            rc = -EINVAL;
+        else
+        {
+            rc = p2m_set_suppress_ve_multi(d, &a.u.suppress_ve);
+
+            if ( rc == -ERESTART )
+                if ( __copy_field_to_guest(guest_handle_cast(arg,
+                                           xen_hvm_altp2m_op_t),
+                                           &a, u.suppress_ve.opaque) )
+                    rc = -EFAULT;
+        }
+        break;
+
     case HVMOP_altp2m_get_suppress_ve:
-        if ( a.u.suppress_ve.pad1 || a.u.suppress_ve.pad2 )
+        if ( a.u.suppress_ve.pad1 )
             rc = -EINVAL;
         else
         {
diff --git a/xen/arch/x86/mm/p2m.c b/xen/arch/x86/mm/p2m.c
index e5e4349dea..9e1335065d 100644
--- a/xen/arch/x86/mm/p2m.c
+++ b/xen/arch/x86/mm/p2m.c
@@ -3054,6 +3054,64 @@  out:
     return rc;
 }
 
+/*
+ * Set/clear the #VE suppress bit for multiple pages.  Only available on VMX.
+ */
+int p2m_set_suppress_ve_multi(struct domain *d,
+                              struct xen_hvm_altp2m_suppress_ve* sve)
+{
+    struct p2m_domain *host_p2m = p2m_get_hostp2m(d);
+    struct p2m_domain *ap2m = NULL;
+    struct p2m_domain *p2m;
+    uint64_t start = sve->opaque ?: sve->gfn;
+    int rc = 0;
+
+    if ( sve->view > 0 )
+    {
+        if ( sve->view >= MAX_ALTP2M ||
+             d->arch.altp2m_eptp[sve->view] == mfn_x(INVALID_MFN) )
+            return -EINVAL;
+
+        p2m = ap2m = d->arch.altp2m_p2m[sve->view];
+    }
+    else
+        p2m = host_p2m;
+
+    p2m_lock(host_p2m);
+
+    if ( ap2m )
+        p2m_lock(ap2m);
+
+
+    while ( start < sve->nr )
+    {
+        p2m_access_t a;
+        p2m_type_t t;
+        mfn_t mfn;
+
+        if ( altp2m_get_effective_entry(p2m, _gfn(start), &mfn, &t, &a, AP2MGET_query) )
+            a = p2m->default_access;
+
+        p2m->set_entry(p2m, _gfn(start), mfn, PAGE_ORDER_4K, t, a, sve->suppress_ve);
+
+        /* Check for continuation if it's not the last iteration. */
+        if ( sve->nr > ++start && hypercall_preempt_check() )
+        {
+            rc = -ERESTART;
+            break;
+        }
+    }
+
+    sve->opaque = start;
+
+    if ( ap2m )
+        p2m_unlock(ap2m);
+
+    p2m_unlock(host_p2m);
+
+    return rc;
+}
+
 int p2m_get_suppress_ve(struct domain *d, gfn_t gfn, bool *suppress_ve,
                         unsigned int altp2m_idx)
 {
diff --git a/xen/include/public/hvm/hvm_op.h b/xen/include/public/hvm/hvm_op.h
index 353f8034d9..9834ce0aea 100644
--- a/xen/include/public/hvm/hvm_op.h
+++ b/xen/include/public/hvm/hvm_op.h
@@ -42,8 +42,9 @@  struct xen_hvm_altp2m_suppress_ve {
     uint16_t view;
     uint8_t suppress_ve; /* Boolean type. */
     uint8_t pad1;
-    uint32_t pad2;
+    uint32_t nr;
     uint64_t gfn;
+    uint64_t opaque;
 };
 
 #if __XEN_INTERFACE_VERSION__ < 0x00040900
@@ -339,6 +340,8 @@  struct xen_hvm_altp2m_op {
 #define HVMOP_altp2m_vcpu_disable_notify  13
 /* Get the active vcpu p2m index */
 #define HVMOP_altp2m_get_p2m_idx          14
+/* Set the "Supress #VE" bit for a range of pages */
+#define HVMOP_altp2m_set_suppress_ve_multi 15
     domid_t domain;
     uint16_t pad1;
     uint32_t pad2;
diff --git a/xen/include/xen/mem_access.h b/xen/include/xen/mem_access.h
index e4d24502e0..ffecd2650e 100644
--- a/xen/include/xen/mem_access.h
+++ b/xen/include/xen/mem_access.h
@@ -75,6 +75,9 @@  long p2m_set_mem_access_multi(struct domain *d,
 int p2m_set_suppress_ve(struct domain *d, gfn_t gfn, bool suppress_ve,
                         unsigned int altp2m_idx);
 
+int p2m_set_suppress_ve_multi(struct domain *d,
+                              struct xen_hvm_altp2m_suppress_ve* suppress_ve);
+
 int p2m_get_suppress_ve(struct domain *d, gfn_t gfn, bool *suppress_ve,
                         unsigned int altp2m_idx);