diff mbox series

[v6,01/11] memory: batch processing in acquire_resource()

Message ID 02415890e4e8211513b495228c790e1d16de767f.1594150543.git.michal.leszczynski@cert.pl (mailing list archive)
State New, archived
Headers show
Series Implement support for external IPT monitoring | expand

Commit Message

Michał Leszczyński July 7, 2020, 7:39 p.m. UTC
From: Michal Leszczynski <michal.leszczynski@cert.pl>

Allow to acquire large resources by allowing acquire_resource()
to process items in batches, using hypercall continuation.

Be aware that this modifies the behavior of acquire_resource
call with frame_list=NULL. While previously it would return
the size of internal array (32), with this patch it returns
the maximal quantity of frames that could be requested at once,
i.e. UINT_MAX >> MEMOP_EXTENT_SHIFT.

Signed-off-by: Michal Leszczynski <michal.leszczynski@cert.pl>
---
 xen/common/memory.c | 49 ++++++++++++++++++++++++++++++++++++++++-----
 1 file changed, 44 insertions(+), 5 deletions(-)

Comments

Roger Pau Monne July 15, 2020, 9:36 a.m. UTC | #1
On Tue, Jul 07, 2020 at 09:39:40PM +0200, Michał Leszczyński wrote:
> From: Michal Leszczynski <michal.leszczynski@cert.pl>
> 
> Allow to acquire large resources by allowing acquire_resource()
> to process items in batches, using hypercall continuation.
> 
> Be aware that this modifies the behavior of acquire_resource
> call with frame_list=NULL. While previously it would return
> the size of internal array (32), with this patch it returns
> the maximal quantity of frames that could be requested at once,
> i.e. UINT_MAX >> MEMOP_EXTENT_SHIFT.
> 
> Signed-off-by: Michal Leszczynski <michal.leszczynski@cert.pl>
> ---

FWIW, I think I've also said on a previous version, I would prefer if
the changelog between versions is added to each patch, having it on
the cover letter is not very helpful as I usually care about specific
changes made to each patch.

I've just got one comment that needs addressing below.

>  xen/common/memory.c | 49 ++++++++++++++++++++++++++++++++++++++++-----
>  1 file changed, 44 insertions(+), 5 deletions(-)
> 
> diff --git a/xen/common/memory.c b/xen/common/memory.c
> index 714077c1e5..eb42f883df 100644
> --- a/xen/common/memory.c
> +++ b/xen/common/memory.c
> @@ -1046,10 +1046,12 @@ static int acquire_grant_table(struct domain *d, unsigned int id,
>  }
>  
>  static int acquire_resource(
> -    XEN_GUEST_HANDLE_PARAM(xen_mem_acquire_resource_t) arg)
> +    XEN_GUEST_HANDLE_PARAM(xen_mem_acquire_resource_t) arg,
> +    unsigned long *start_extent)
>  {
>      struct domain *d, *currd = current->domain;
>      xen_mem_acquire_resource_t xmar;
> +    uint32_t total_frames;
>      /*
>       * The mfn_list and gfn_list (below) arrays are ok on stack for the
>       * moment since they are small, but if they need to grow in future
> @@ -1069,7 +1071,7 @@ static int acquire_resource(
>          if ( xmar.nr_frames )
>              return -EINVAL;
>  
> -        xmar.nr_frames = ARRAY_SIZE(mfn_list);
> +        xmar.nr_frames = UINT_MAX >> MEMOP_EXTENT_SHIFT;
>  
>          if ( __copy_field_to_guest(arg, &xmar, nr_frames) )
>              return -EFAULT;
> @@ -1077,8 +1079,28 @@ static int acquire_resource(
>          return 0;
>      }
>  
> +    total_frames = xmar.nr_frames;
> +
> +    /* Is the size too large for us to encode a continuation? */
> +    if ( unlikely(xmar.nr_frames > (UINT_MAX >> MEMOP_EXTENT_SHIFT)) )
> +        return -EINVAL;
> +
> +    if ( *start_extent )
> +    {
> +        /*
> +         * Check whether start_extent is in bounds, as this
> +         * value if visible to the calling domain.
> +         */
> +        if ( *start_extent > xmar.nr_frames )
> +            return -EINVAL;
> +
> +        xmar.frame += *start_extent;
> +        xmar.nr_frames -= *start_extent;
> +        guest_handle_add_offset(xmar.frame_list, *start_extent);
> +    }
> +
>      if ( xmar.nr_frames > ARRAY_SIZE(mfn_list) )
> -        return -E2BIG;
> +        xmar.nr_frames = ARRAY_SIZE(mfn_list);
>  
>      rc = rcu_lock_remote_domain_by_id(xmar.domid, &d);
>      if ( rc )
> @@ -1135,6 +1157,14 @@ static int acquire_resource(
>          }
>      }
>  
> +    if ( !rc )
> +    {
> +        *start_extent += xmar.nr_frames;
> +
> +        if ( *start_extent != total_frames )
> +            rc = -ERESTART;
> +    }
> +
>   out:
>      rcu_unlock_domain(d);
>  
> @@ -1599,8 +1629,17 @@ long do_memory_op(unsigned long cmd, XEN_GUEST_HANDLE_PARAM(void) arg)
>  #endif
>  
>      case XENMEM_acquire_resource:
> -        rc = acquire_resource(
> -            guest_handle_cast(arg, xen_mem_acquire_resource_t));
> +        do {
> +            rc = acquire_resource(
> +                guest_handle_cast(arg, xen_mem_acquire_resource_t),
> +                &start_extent);

I think it would be interesting from a performance PoV to move the
xmar copy_from_guest here, so that each call to acquire_resource
in the loop doesn't need to perform a copy from guest. That's
more relevant for translated callers, where a copy_from_guest involves
a guest page table and a p2m walk.

> +
> +            if ( hypercall_preempt_check() )

You are missing a rc == -ERESTART check here, you don't want to encode
a continuation if rc is different than -ERESTART AFAICT.

Thanks, Roger.
Jan Beulich July 15, 2020, 12:13 p.m. UTC | #2
On 15.07.2020 11:36, Roger Pau Monné wrote:
> On Tue, Jul 07, 2020 at 09:39:40PM +0200, Michał Leszczyński wrote:
>> @@ -1599,8 +1629,17 @@ long do_memory_op(unsigned long cmd, XEN_GUEST_HANDLE_PARAM(void) arg)
>>  #endif
>>  
>>      case XENMEM_acquire_resource:
>> -        rc = acquire_resource(
>> -            guest_handle_cast(arg, xen_mem_acquire_resource_t));
>> +        do {
>> +            rc = acquire_resource(
>> +                guest_handle_cast(arg, xen_mem_acquire_resource_t),
>> +                &start_extent);
> 
> I think it would be interesting from a performance PoV to move the
> xmar copy_from_guest here, so that each call to acquire_resource
> in the loop doesn't need to perform a copy from guest. That's
> more relevant for translated callers, where a copy_from_guest involves
> a guest page table and a p2m walk.

This isn't just a nice-to-have for performance reasons, but a
correctness/consistency thing: A rogue (or buggy) guest may alter
the structure between two such reads. It _may_ be the case that
we're dealing fine with this right now, but it would feel like a
trap to fall into later on.

>> +
>> +            if ( hypercall_preempt_check() )
> 
> You are missing a rc == -ERESTART check here, you don't want to encode
> a continuation if rc is different than -ERESTART AFAICT.

At which point the subsequent containing do/while() likely wants
adjusting to, e.g. to "for( ; ; )".

Jan
Jan Beulich July 15, 2020, 12:28 p.m. UTC | #3
On 07.07.2020 21:39, Michał Leszczyński wrote:
> From: Michal Leszczynski <michal.leszczynski@cert.pl>
> 
> Allow to acquire large resources by allowing acquire_resource()
> to process items in batches, using hypercall continuation.
> 
> Be aware that this modifies the behavior of acquire_resource
> call with frame_list=NULL. While previously it would return
> the size of internal array (32), with this patch it returns
> the maximal quantity of frames that could be requested at once,
> i.e. UINT_MAX >> MEMOP_EXTENT_SHIFT.

This isn't really a behavioral change, and hence I'd prefer this
to be re-worded: It was and is the upper bound on request sizes
that gets reported here. It's just that this upper bound now
changes.

> --- a/xen/common/memory.c
> +++ b/xen/common/memory.c
> @@ -1046,10 +1046,12 @@ static int acquire_grant_table(struct domain *d, unsigned int id,
>  }
>  
>  static int acquire_resource(
> -    XEN_GUEST_HANDLE_PARAM(xen_mem_acquire_resource_t) arg)
> +    XEN_GUEST_HANDLE_PARAM(xen_mem_acquire_resource_t) arg,
> +    unsigned long *start_extent)
>  {
>      struct domain *d, *currd = current->domain;
>      xen_mem_acquire_resource_t xmar;
> +    uint32_t total_frames;

Please don't use fixed width types when plain C types will do
(unsigned int here).

> @@ -1069,7 +1071,7 @@ static int acquire_resource(
>          if ( xmar.nr_frames )
>              return -EINVAL;
>  
> -        xmar.nr_frames = ARRAY_SIZE(mfn_list);
> +        xmar.nr_frames = UINT_MAX >> MEMOP_EXTENT_SHIFT;
>  
>          if ( __copy_field_to_guest(arg, &xmar, nr_frames) )
>              return -EFAULT;
> @@ -1077,8 +1079,28 @@ static int acquire_resource(
>          return 0;
>      }
>  
> +    total_frames = xmar.nr_frames;
> +
> +    /* Is the size too large for us to encode a continuation? */
> +    if ( unlikely(xmar.nr_frames > (UINT_MAX >> MEMOP_EXTENT_SHIFT)) )
> +        return -EINVAL;
> +
> +    if ( *start_extent )
> +    {
> +        /*
> +         * Check whether start_extent is in bounds, as this
> +         * value if visible to the calling domain.
> +         */
> +        if ( *start_extent > xmar.nr_frames )
> +            return -EINVAL;
> +
> +        xmar.frame += *start_extent;
> +        xmar.nr_frames -= *start_extent;
> +        guest_handle_add_offset(xmar.frame_list, *start_extent);
> +    }

May I ask that you drop the if() around this block of code?

Also, looking at this, I wonder whether it's a good idea to use the
"start extent" model here anyway: You could as well update the
struct (saying that it may be clobbered in the public header) and
copy the whole thing back to the original guest struct. This would
then remove the pretty arbitrary "UINT_MAX >> MEMOP_EXTENT_SHIFT"
limit you currently need to enforce. The main question is whether
we'd consider such an adjustment to an existing interface
acceptable; there's an at least theoretical risk that it may break
existing callers. Then again no existing caller can sensibly have
specified a count above 32, and when the copying back would be
limited to just the continuation case, no such caller would be
affected in any way afaict.

Jan
Roger Pau Monne July 16, 2020, 8:14 a.m. UTC | #4
On Wed, Jul 15, 2020 at 02:13:42PM +0200, Jan Beulich wrote:
> On 15.07.2020 11:36, Roger Pau Monné wrote:
> > On Tue, Jul 07, 2020 at 09:39:40PM +0200, Michał Leszczyński wrote:
> >> @@ -1599,8 +1629,17 @@ long do_memory_op(unsigned long cmd, XEN_GUEST_HANDLE_PARAM(void) arg)
> >>  #endif
> >>  
> >>      case XENMEM_acquire_resource:
> >> -        rc = acquire_resource(
> >> -            guest_handle_cast(arg, xen_mem_acquire_resource_t));
> >> +        do {
> >> +            rc = acquire_resource(
> >> +                guest_handle_cast(arg, xen_mem_acquire_resource_t),
> >> +                &start_extent);
> > 
> > I think it would be interesting from a performance PoV to move the
> > xmar copy_from_guest here, so that each call to acquire_resource
> > in the loop doesn't need to perform a copy from guest. That's
> > more relevant for translated callers, where a copy_from_guest involves
> > a guest page table and a p2m walk.
> 
> This isn't just a nice-to-have for performance reasons, but a
> correctness/consistency thing: A rogue (or buggy) guest may alter
> the structure between two such reads. It _may_ be the case that
> we're dealing fine with this right now, but it would feel like a
> trap to fall into later on.

I *think* this is safe, given you copy from guest and perform the
checks for each iteration. I agree the copy should be pulled out of
the loop together with the checks. There's no point in performing it
for every iteration.

> >> +
> >> +            if ( hypercall_preempt_check() )
> > 
> > You are missing a rc == -ERESTART check here, you don't want to encode
> > a continuation if rc is different than -ERESTART AFAICT.
> 
> At which point the subsequent containing do/while() likely wants
> adjusting to, e.g. to "for( ; ; )".

That's another option, but you would need to add an extra
if ( rc != -ERESTART ) break; to the loop body if the while condition
is removed.

Roger.
Julien Grall July 17, 2020, 2:16 p.m. UTC | #5
Hi,

On 15/07/2020 13:28, Jan Beulich wrote:
> May I ask that you drop the if() around this block of code?
> 
> Also, looking at this, I wonder whether it's a good idea to use the
> "start extent" model here anyway: You could as well update the
> struct (saying that it may be clobbered in the public header) and
> copy the whole thing back to the original guest struct. This would
> then remove the pretty arbitrary "UINT_MAX >> MEMOP_EXTENT_SHIFT"
> limit you currently need to enforce. The main question is whether
> we'd consider such an adjustment to an existing interface
> acceptable; there's an at least theoretical risk that it may break
> existing callers. Then again no existing caller can sensibly have
> specified a count above 32, and when the copying back would be
> limited to just the continuation case, no such caller would be
> affected in any way afaict.

I can see two risks with this approach:
    1) There is no requirement for the 'arg' buffer to be in read-write 
memory.
    2) A guest may expect to re-use the value within the structure after 
the hypercalls.

So I think the "start extent" model is better. This is already used in 
other memory sub-hypercalls, so the risk to break existing users is more 
limited.

Cheers,
diff mbox series

Patch

diff --git a/xen/common/memory.c b/xen/common/memory.c
index 714077c1e5..eb42f883df 100644
--- a/xen/common/memory.c
+++ b/xen/common/memory.c
@@ -1046,10 +1046,12 @@  static int acquire_grant_table(struct domain *d, unsigned int id,
 }
 
 static int acquire_resource(
-    XEN_GUEST_HANDLE_PARAM(xen_mem_acquire_resource_t) arg)
+    XEN_GUEST_HANDLE_PARAM(xen_mem_acquire_resource_t) arg,
+    unsigned long *start_extent)
 {
     struct domain *d, *currd = current->domain;
     xen_mem_acquire_resource_t xmar;
+    uint32_t total_frames;
     /*
      * The mfn_list and gfn_list (below) arrays are ok on stack for the
      * moment since they are small, but if they need to grow in future
@@ -1069,7 +1071,7 @@  static int acquire_resource(
         if ( xmar.nr_frames )
             return -EINVAL;
 
-        xmar.nr_frames = ARRAY_SIZE(mfn_list);
+        xmar.nr_frames = UINT_MAX >> MEMOP_EXTENT_SHIFT;
 
         if ( __copy_field_to_guest(arg, &xmar, nr_frames) )
             return -EFAULT;
@@ -1077,8 +1079,28 @@  static int acquire_resource(
         return 0;
     }
 
+    total_frames = xmar.nr_frames;
+
+    /* Is the size too large for us to encode a continuation? */
+    if ( unlikely(xmar.nr_frames > (UINT_MAX >> MEMOP_EXTENT_SHIFT)) )
+        return -EINVAL;
+
+    if ( *start_extent )
+    {
+        /*
+         * Check whether start_extent is in bounds, as this
+         * value if visible to the calling domain.
+         */
+        if ( *start_extent > xmar.nr_frames )
+            return -EINVAL;
+
+        xmar.frame += *start_extent;
+        xmar.nr_frames -= *start_extent;
+        guest_handle_add_offset(xmar.frame_list, *start_extent);
+    }
+
     if ( xmar.nr_frames > ARRAY_SIZE(mfn_list) )
-        return -E2BIG;
+        xmar.nr_frames = ARRAY_SIZE(mfn_list);
 
     rc = rcu_lock_remote_domain_by_id(xmar.domid, &d);
     if ( rc )
@@ -1135,6 +1157,14 @@  static int acquire_resource(
         }
     }
 
+    if ( !rc )
+    {
+        *start_extent += xmar.nr_frames;
+
+        if ( *start_extent != total_frames )
+            rc = -ERESTART;
+    }
+
  out:
     rcu_unlock_domain(d);
 
@@ -1599,8 +1629,17 @@  long do_memory_op(unsigned long cmd, XEN_GUEST_HANDLE_PARAM(void) arg)
 #endif
 
     case XENMEM_acquire_resource:
-        rc = acquire_resource(
-            guest_handle_cast(arg, xen_mem_acquire_resource_t));
+        do {
+            rc = acquire_resource(
+                guest_handle_cast(arg, xen_mem_acquire_resource_t),
+                &start_extent);
+
+            if ( hypercall_preempt_check() )
+                return hypercall_create_continuation(
+                    __HYPERVISOR_memory_op, "lh",
+                    op | (start_extent << MEMOP_EXTENT_SHIFT), arg);
+        } while ( rc == -ERESTART );
+
         break;
 
     default: