diff mbox series

[for-4.14] mm: fix public declaration of struct xen_mem_acquire_resource

Message ID 20200623135246.66170-1-roger.pau@citrix.com (mailing list archive)
State Superseded
Headers show
Series [for-4.14] mm: fix public declaration of struct xen_mem_acquire_resource | expand

Commit Message

Roger Pau Monné June 23, 2020, 1:52 p.m. UTC
XENMEM_acquire_resource and it's related structure is currently inside
a __XEN__ or __XEN_TOOLS__ guarded section to limit it's scope to the
hypervisor or the toolstack only. This is wrong as the hypercall is
already being used by the Linux kernel at least, and as such needs to
be public.

Also switch the usage of uint64_aligned_t to plain uint64_t, as
uint64_aligned_t is only to be used by the toolstack. Note that the
layout of the structure will be the same, as the field is already
naturally aligned to a 8 byte boundary.

No functional change expected.

Fixes: 3f8f12281dd20 ('x86/mm: add HYPERVISOR_memory_op to acquire guest resources')
Signed-off-by: Roger Pau Monné <roger.pau@citrix.com>
---
Would be good to get this fixed before the release in order to avoid
shipping bogus headers. Should also be backported.
---
 xen/include/public/memory.h | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

Comments

Paul Durrant June 23, 2020, 1:59 p.m. UTC | #1
> -----Original Message-----
> From: Roger Pau Monne <roger.pau@citrix.com>
> Sent: 23 June 2020 14:53
> To: xen-devel@lists.xenproject.org
> Cc: paul@xen.org; Roger Pau Monne <roger.pau@citrix.com>; Andrew Cooper <andrew.cooper3@citrix.com>;
> George Dunlap <george.dunlap@citrix.com>; Ian Jackson <ian.jackson@eu.citrix.com>; Jan Beulich
> <jbeulich@suse.com>; Julien Grall <julien@xen.org>; Stefano Stabellini <sstabellini@kernel.org>; Wei
> Liu <wl@xen.org>
> Subject: [PATCH for-4.14] mm: fix public declaration of struct xen_mem_acquire_resource
> 
> XENMEM_acquire_resource and it's related structure is currently inside
> a __XEN__ or __XEN_TOOLS__ guarded section to limit it's scope to the
> hypervisor or the toolstack only. This is wrong as the hypercall is
> already being used by the Linux kernel at least, and as such needs to
> be public.
> 
> Also switch the usage of uint64_aligned_t to plain uint64_t, as
> uint64_aligned_t is only to be used by the toolstack. Note that the
> layout of the structure will be the same, as the field is already
> naturally aligned to a 8 byte boundary.
> 
> No functional change expected.
> 
> Fixes: 3f8f12281dd20 ('x86/mm: add HYPERVISOR_memory_op to acquire guest resources')
> Signed-off-by: Roger Pau Monné <roger.pau@citrix.com>
> ---
> Would be good to get this fixed before the release in order to avoid
> shipping bogus headers. Should also be backported.

Reviewed-by: Paul Durrant <paul@xen.org>
Release-acked-by: Paul Durrant <paul@xen.org>

> ---
>  xen/include/public/memory.h | 6 +++---
>  1 file changed, 3 insertions(+), 3 deletions(-)
> 
> diff --git a/xen/include/public/memory.h b/xen/include/public/memory.h
> index dbd35305df..1767d7d5f5 100644
> --- a/xen/include/public/memory.h
> +++ b/xen/include/public/memory.h
> @@ -607,6 +607,8 @@ struct xen_reserved_device_memory_map {
>  typedef struct xen_reserved_device_memory_map xen_reserved_device_memory_map_t;
>  DEFINE_XEN_GUEST_HANDLE(xen_reserved_device_memory_map_t);
> 
> +#endif /* defined(__XEN__) || defined(__XEN_TOOLS__) */
> +
>  /*
>   * Get the pages for a particular guest resource, so that they can be
>   * mapped directly by a tools domain.
> @@ -645,7 +647,7 @@ struct xen_mem_acquire_resource {
>       * IN - the index of the initial frame to be mapped. This parameter
>       *      is ignored if nr_frames is 0.
>       */
> -    uint64_aligned_t frame;
> +    uint64_t frame;
> 
>  #define XENMEM_resource_ioreq_server_frame_bufioreq 0
>  #define XENMEM_resource_ioreq_server_frame_ioreq(n) (1 + (n))
> @@ -666,8 +668,6 @@ struct xen_mem_acquire_resource {
>  typedef struct xen_mem_acquire_resource xen_mem_acquire_resource_t;
>  DEFINE_XEN_GUEST_HANDLE(xen_mem_acquire_resource_t);
> 
> -#endif /* defined(__XEN__) || defined(__XEN_TOOLS__) */
> -
>  /*
>   * XENMEM_get_vnumainfo used by guest to get
>   * vNUMA topology from hypervisor.
> --
> 2.26.2
Julien Grall June 23, 2020, 2:27 p.m. UTC | #2
On 23/06/2020 14:52, Roger Pau Monne wrote:
> XENMEM_acquire_resource and it's related structure is currently inside
> a __XEN__ or __XEN_TOOLS__ guarded section to limit it's scope to the
> hypervisor or the toolstack only. This is wrong as the hypercall is
> already being used by the Linux kernel at least, and as such needs to
> be public.
> 
> Also switch the usage of uint64_aligned_t to plain uint64_t, as
> uint64_aligned_t is only to be used by the toolstack. Note that the
> layout of the structure will be the same, as the field is already
> naturally aligned to a 8 byte boundary.
>
> No functional change expected.
> 
> Fixes: 3f8f12281dd20 ('x86/mm: add HYPERVISOR_memory_op to acquire guest resources')
> Signed-off-by: Roger Pau Monné <roger.pau@citrix.com>

Reviewed-by: Julien Grall <jgrall@amazon.com>

> ---
> Would be good to get this fixed before the release in order to avoid
> shipping bogus headers. Should also be backported.
> ---
>   xen/include/public/memory.h | 6 +++---
>   1 file changed, 3 insertions(+), 3 deletions(-)
> 
> diff --git a/xen/include/public/memory.h b/xen/include/public/memory.h
> index dbd35305df..1767d7d5f5 100644
> --- a/xen/include/public/memory.h
> +++ b/xen/include/public/memory.h
> @@ -607,6 +607,8 @@ struct xen_reserved_device_memory_map {
>   typedef struct xen_reserved_device_memory_map xen_reserved_device_memory_map_t;
>   DEFINE_XEN_GUEST_HANDLE(xen_reserved_device_memory_map_t);
>   
> +#endif /* defined(__XEN__) || defined(__XEN_TOOLS__) */
> +
>   /*
>    * Get the pages for a particular guest resource, so that they can be
>    * mapped directly by a tools domain.
> @@ -645,7 +647,7 @@ struct xen_mem_acquire_resource {
>        * IN - the index of the initial frame to be mapped. This parameter
>        *      is ignored if nr_frames is 0.
>        */
> -    uint64_aligned_t frame;
> +    uint64_t frame;
>   
>   #define XENMEM_resource_ioreq_server_frame_bufioreq 0
>   #define XENMEM_resource_ioreq_server_frame_ioreq(n) (1 + (n))
> @@ -666,8 +668,6 @@ struct xen_mem_acquire_resource {
>   typedef struct xen_mem_acquire_resource xen_mem_acquire_resource_t;
>   DEFINE_XEN_GUEST_HANDLE(xen_mem_acquire_resource_t);
>   
> -#endif /* defined(__XEN__) || defined(__XEN_TOOLS__) */
> -
>   /*
>    * XENMEM_get_vnumainfo used by guest to get
>    * vNUMA topology from hypervisor.
>
Jan Beulich June 23, 2020, 3:02 p.m. UTC | #3
On 23.06.2020 15:52, Roger Pau Monne wrote:
> XENMEM_acquire_resource and it's related structure is currently inside
> a __XEN__ or __XEN_TOOLS__ guarded section to limit it's scope to the
> hypervisor or the toolstack only. This is wrong as the hypercall is
> already being used by the Linux kernel at least, and as such needs to
> be public.
> 
> Also switch the usage of uint64_aligned_t to plain uint64_t, as
> uint64_aligned_t is only to be used by the toolstack. Note that the
> layout of the structure will be the same, as the field is already
> naturally aligned to a 8 byte boundary.

I'm afraid it's more complicated, and hence ...

> No functional change expected.

... this doesn't hold: As I notice only now the struct also wrongly
(if it was meant to be a tools-only interface) failed to use
XEN_GUEST_HANDLE_64(), which would in principle have been a problem
for 32-bit callers (passing garbage in the upper half of a handle).
It's not an actual problem because, unlike would typically be the
case for tools-only interfaces, there is compat translation for this
struct.

With this, however, the problem of your change becomes noticeable:
The structure's alignment for 32-bit x86, and with it the structure's
sizeof(), both change. You should be able to see this by comparing
xen/common/compat/memory.c's disassembly before and after your
change - the number of bytes to copy by the respective
copy_from_guest() ought to have changed.

The question now is whether we're willing to accept such a (marginal)
incompatibility. The chance of a 32-bit guest actually running into
it shouldn't be very high, but with the right placement of an
instance of the struct at the end of a page it would be possible to
observe afaict.

Or whether we go the alternative route and pad the struct suitably
for 32-bit.

> Fixes: 3f8f12281dd20 ('x86/mm: add HYPERVISOR_memory_op to acquire guest resources')
> Signed-off-by: Roger Pau Monné <roger.pau@citrix.com>
> ---
> Would be good to get this fixed before the release in order to avoid
> shipping bogus headers. Should also be backported.

This was already part of 4.13, as you imply by requesting a backport.
Hence the bogus header had already been shipped.

> --- a/xen/include/public/memory.h
> +++ b/xen/include/public/memory.h
> @@ -607,6 +607,8 @@ struct xen_reserved_device_memory_map {
>  typedef struct xen_reserved_device_memory_map xen_reserved_device_memory_map_t;
>  DEFINE_XEN_GUEST_HANDLE(xen_reserved_device_memory_map_t);
>  
> +#endif /* defined(__XEN__) || defined(__XEN_TOOLS__) */
> +
>  /*
>   * Get the pages for a particular guest resource, so that they can be
>   * mapped directly by a tools domain.

This comment is now stale.

Jan
Jan Beulich June 23, 2020, 3:04 p.m. UTC | #4
On 23.06.2020 15:52, Roger Pau Monne wrote:
> XENMEM_acquire_resource and it's related structure is currently inside
> a __XEN__ or __XEN_TOOLS__ guarded section to limit it's scope to the
> hypervisor or the toolstack only. This is wrong as the hypercall is
> already being used by the Linux kernel at least, and as such needs to
> be public.

Actually - how does this work for the Linux kernel, seeing

    rc = rcu_lock_remote_domain_by_id(xmar.domid, &d);
    if ( rc )
        return rc;

    rc = xsm_domain_resource_map(XSM_DM_PRIV, d);
    if ( rc )
        goto out;

in the function?

Jan
Roger Pau Monné June 23, 2020, 3:56 p.m. UTC | #5
On Tue, Jun 23, 2020 at 05:02:04PM +0200, Jan Beulich wrote:
> On 23.06.2020 15:52, Roger Pau Monne wrote:
> > XENMEM_acquire_resource and it's related structure is currently inside
> > a __XEN__ or __XEN_TOOLS__ guarded section to limit it's scope to the
> > hypervisor or the toolstack only. This is wrong as the hypercall is
> > already being used by the Linux kernel at least, and as such needs to
> > be public.
> > 
> > Also switch the usage of uint64_aligned_t to plain uint64_t, as
> > uint64_aligned_t is only to be used by the toolstack. Note that the
> > layout of the structure will be the same, as the field is already
> > naturally aligned to a 8 byte boundary.
> 
> I'm afraid it's more complicated, and hence ...
> 
> > No functional change expected.
> 
> ... this doesn't hold: As I notice only now the struct also wrongly
> (if it was meant to be a tools-only interface) failed to use
> XEN_GUEST_HANDLE_64(), which would in principle have been a problem
> for 32-bit callers (passing garbage in the upper half of a handle).
> It's not an actual problem because, unlike would typically be the
> case for tools-only interfaces, there is compat translation for this
> struct.

Yes, there's already compat translation for the frame_list array.

> With this, however, the problem of your change becomes noticeable:
> The structure's alignment for 32-bit x86, and with it the structure's
> sizeof(), both change. You should be able to see this by comparing
> xen/common/compat/memory.c's disassembly before and after your
> change - the number of bytes to copy by the respective
> copy_from_guest() ought to have changed.

Right, this is the layout with frame aligned to 8 bytes:

struct xen_mem_acquire_resource {
	uint16_t                   domid;                /*     0     2 */
	uint16_t                   type;                 /*     2     2 */
	uint32_t                   id;                   /*     4     4 */
	uint32_t                   nr_frames;            /*     8     4 */
	uint32_t                   pad;                  /*    12     4 */
	uint64_t                   frame;                /*    16     8 */
	long unsigned int *        frame_list;           /*    24     4 */

	/* size: 32, cachelines: 1, members: 7 */
	/* padding: 4 */
	/* last cacheline: 32 bytes */
};

And this is without:

struct xen_mem_acquire_resource {
	uint16_t                   domid;                /*     0     2 */
	uint16_t                   type;                 /*     2     2 */
	uint32_t                   id;                   /*     4     4 */
	uint32_t                   nr_frames;            /*     8     4 */
	uint32_t                   pad;                  /*    12     4 */
	uint64_t                   frame;                /*    16     8 */
	long unsigned int *        frame_list;           /*    24     4 */

	/* size: 28, cachelines: 1, members: 7 */
	/* last cacheline: 28 bytes */
};

Note Xen has already been copying those extra 4 padding bytes from
32bit Linux kernels AFAICT, as the struct declaration in Linux has
dropped the aligned attribute.

> The question now is whether we're willing to accept such a (marginal)
> incompatibility. The chance of a 32-bit guest actually running into
> it shouldn't be very high, but with the right placement of an
> instance of the struct at the end of a page it would be possible to
> observe afaict.
> 
> Or whether we go the alternative route and pad the struct suitably
> for 32-bit.

I guess we are trapped with what Linux has on it's headers now, so we
should handle the struct size difference in Xen?

I'm very tempted to just say switch the XEN_GUEST_HANDLE to
XEN_GUEST_HANDLE_64, but I guess it's risky. I'm not sure how much
people still run a toolstack from a 32bit domain however.

> > Fixes: 3f8f12281dd20 ('x86/mm: add HYPERVISOR_memory_op to acquire guest resources')
> > Signed-off-by: Roger Pau Monné <roger.pau@citrix.com>
> > ---
> > Would be good to get this fixed before the release in order to avoid
> > shipping bogus headers. Should also be backported.
> 
> This was already part of 4.13, as you imply by requesting a backport.
> Hence the bogus header had already been shipped.

Indeed. I would however try to prevent from shipping it in 4.14.

> > --- a/xen/include/public/memory.h
> > +++ b/xen/include/public/memory.h
> > @@ -607,6 +607,8 @@ struct xen_reserved_device_memory_map {
> >  typedef struct xen_reserved_device_memory_map xen_reserved_device_memory_map_t;
> >  DEFINE_XEN_GUEST_HANDLE(xen_reserved_device_memory_map_t);
> >  
> > +#endif /* defined(__XEN__) || defined(__XEN_TOOLS__) */
> > +
> >  /*
> >   * Get the pages for a particular guest resource, so that they can be
> >   * mapped directly by a tools domain.
> 
> This comment is now stale.

Hm, I'm not so sure. This is indeed used by Xen related tools (or
emulators) and then those domains running such tools would also be
tools domains?

Anyway, I don't mind removing it, but I also don't think it's so
off.

Roger.
Jan Beulich June 23, 2020, 4:18 p.m. UTC | #6
On 23.06.2020 17:56, Roger Pau Monné wrote:
> On Tue, Jun 23, 2020 at 05:02:04PM +0200, Jan Beulich wrote:
>> On 23.06.2020 15:52, Roger Pau Monne wrote:
>>> XENMEM_acquire_resource and it's related structure is currently inside
>>> a __XEN__ or __XEN_TOOLS__ guarded section to limit it's scope to the
>>> hypervisor or the toolstack only. This is wrong as the hypercall is
>>> already being used by the Linux kernel at least, and as such needs to
>>> be public.
>>>
>>> Also switch the usage of uint64_aligned_t to plain uint64_t, as
>>> uint64_aligned_t is only to be used by the toolstack. Note that the
>>> layout of the structure will be the same, as the field is already
>>> naturally aligned to a 8 byte boundary.
>>
>> I'm afraid it's more complicated, and hence ...
>>
>>> No functional change expected.
>>
>> ... this doesn't hold: As I notice only now the struct also wrongly
>> (if it was meant to be a tools-only interface) failed to use
>> XEN_GUEST_HANDLE_64(), which would in principle have been a problem
>> for 32-bit callers (passing garbage in the upper half of a handle).
>> It's not an actual problem because, unlike would typically be the
>> case for tools-only interfaces, there is compat translation for this
>> struct.
> 
> Yes, there's already compat translation for the frame_list array.
> 
>> With this, however, the problem of your change becomes noticeable:
>> The structure's alignment for 32-bit x86, and with it the structure's
>> sizeof(), both change. You should be able to see this by comparing
>> xen/common/compat/memory.c's disassembly before and after your
>> change - the number of bytes to copy by the respective
>> copy_from_guest() ought to have changed.
> 
> Right, this is the layout with frame aligned to 8 bytes:
> 
> struct xen_mem_acquire_resource {
> 	uint16_t                   domid;                /*     0     2 */
> 	uint16_t                   type;                 /*     2     2 */
> 	uint32_t                   id;                   /*     4     4 */
> 	uint32_t                   nr_frames;            /*     8     4 */
> 	uint32_t                   pad;                  /*    12     4 */
> 	uint64_t                   frame;                /*    16     8 */
> 	long unsigned int *        frame_list;           /*    24     4 */
> 
> 	/* size: 32, cachelines: 1, members: 7 */
> 	/* padding: 4 */
> 	/* last cacheline: 32 bytes */
> };
> 
> And this is without:
> 
> struct xen_mem_acquire_resource {
> 	uint16_t                   domid;                /*     0     2 */
> 	uint16_t                   type;                 /*     2     2 */
> 	uint32_t                   id;                   /*     4     4 */
> 	uint32_t                   nr_frames;            /*     8     4 */
> 	uint32_t                   pad;                  /*    12     4 */
> 	uint64_t                   frame;                /*    16     8 */
> 	long unsigned int *        frame_list;           /*    24     4 */
> 
> 	/* size: 28, cachelines: 1, members: 7 */
> 	/* last cacheline: 28 bytes */
> };
> 
> Note Xen has already been copying those extra 4 padding bytes from
> 32bit Linux kernels AFAICT, as the struct declaration in Linux has
> dropped the aligned attribute.
> 
>> The question now is whether we're willing to accept such a (marginal)
>> incompatibility. The chance of a 32-bit guest actually running into
>> it shouldn't be very high, but with the right placement of an
>> instance of the struct at the end of a page it would be possible to
>> observe afaict.
>>
>> Or whether we go the alternative route and pad the struct suitably
>> for 32-bit.
> 
> I guess we are trapped with what Linux has on it's headers now, so we
> should handle the struct size difference in Xen?

How do you mean to notice the difference in Xen? You don't know
what the caller's environment's sizeof() would yield.

> I'm very tempted to just say switch the XEN_GUEST_HANDLE to
> XEN_GUEST_HANDLE_64, but I guess it's risky.

Plus, like uint64_aligned_t, iirc it's a construct exposed through
tool-stack-only interfaces, but not generally.

>>> --- a/xen/include/public/memory.h
>>> +++ b/xen/include/public/memory.h
>>> @@ -607,6 +607,8 @@ struct xen_reserved_device_memory_map {
>>>  typedef struct xen_reserved_device_memory_map xen_reserved_device_memory_map_t;
>>>  DEFINE_XEN_GUEST_HANDLE(xen_reserved_device_memory_map_t);
>>>  
>>> +#endif /* defined(__XEN__) || defined(__XEN_TOOLS__) */
>>> +
>>>  /*
>>>   * Get the pages for a particular guest resource, so that they can be
>>>   * mapped directly by a tools domain.
>>
>> This comment is now stale.
> 
> Hm, I'm not so sure. This is indeed used by Xen related tools (or
> emulators) and then those domains running such tools would also be
> tools domains?
> 
> Anyway, I don't mind removing it, but I also don't think it's so
> off.

Well - if this isn't a tool stack interface (see also my 2nd reply
to your original patch), then the comment shouldn't suggest it is.

Jan
Roger Pau Monné June 23, 2020, 5:26 p.m. UTC | #7
On Tue, Jun 23, 2020 at 06:18:52PM +0200, Jan Beulich wrote:
> On 23.06.2020 17:56, Roger Pau Monné wrote:
> > On Tue, Jun 23, 2020 at 05:02:04PM +0200, Jan Beulich wrote:
> >> On 23.06.2020 15:52, Roger Pau Monne wrote:
> >>> XENMEM_acquire_resource and it's related structure is currently inside
> >>> a __XEN__ or __XEN_TOOLS__ guarded section to limit it's scope to the
> >>> hypervisor or the toolstack only. This is wrong as the hypercall is
> >>> already being used by the Linux kernel at least, and as such needs to
> >>> be public.
> >>>
> >>> Also switch the usage of uint64_aligned_t to plain uint64_t, as
> >>> uint64_aligned_t is only to be used by the toolstack. Note that the
> >>> layout of the structure will be the same, as the field is already
> >>> naturally aligned to a 8 byte boundary.
> >>
> >> I'm afraid it's more complicated, and hence ...
> >>
> >>> No functional change expected.
> >>
> >> ... this doesn't hold: As I notice only now the struct also wrongly
> >> (if it was meant to be a tools-only interface) failed to use
> >> XEN_GUEST_HANDLE_64(), which would in principle have been a problem
> >> for 32-bit callers (passing garbage in the upper half of a handle).
> >> It's not an actual problem because, unlike would typically be the
> >> case for tools-only interfaces, there is compat translation for this
> >> struct.
> > 
> > Yes, there's already compat translation for the frame_list array.
> > 
> >> With this, however, the problem of your change becomes noticeable:
> >> The structure's alignment for 32-bit x86, and with it the structure's
> >> sizeof(), both change. You should be able to see this by comparing
> >> xen/common/compat/memory.c's disassembly before and after your
> >> change - the number of bytes to copy by the respective
> >> copy_from_guest() ought to have changed.
> > 
> > Right, this is the layout with frame aligned to 8 bytes:
> > 
> > struct xen_mem_acquire_resource {
> > 	uint16_t                   domid;                /*     0     2 */
> > 	uint16_t                   type;                 /*     2     2 */
> > 	uint32_t                   id;                   /*     4     4 */
> > 	uint32_t                   nr_frames;            /*     8     4 */
> > 	uint32_t                   pad;                  /*    12     4 */
> > 	uint64_t                   frame;                /*    16     8 */
> > 	long unsigned int *        frame_list;           /*    24     4 */
> > 
> > 	/* size: 32, cachelines: 1, members: 7 */
> > 	/* padding: 4 */
> > 	/* last cacheline: 32 bytes */
> > };
> > 
> > And this is without:
> > 
> > struct xen_mem_acquire_resource {
> > 	uint16_t                   domid;                /*     0     2 */
> > 	uint16_t                   type;                 /*     2     2 */
> > 	uint32_t                   id;                   /*     4     4 */
> > 	uint32_t                   nr_frames;            /*     8     4 */
> > 	uint32_t                   pad;                  /*    12     4 */
> > 	uint64_t                   frame;                /*    16     8 */
> > 	long unsigned int *        frame_list;           /*    24     4 */
> > 
> > 	/* size: 28, cachelines: 1, members: 7 */
> > 	/* last cacheline: 28 bytes */
> > };
> > 
> > Note Xen has already been copying those extra 4 padding bytes from
> > 32bit Linux kernels AFAICT, as the struct declaration in Linux has
> > dropped the aligned attribute.
> > 
> >> The question now is whether we're willing to accept such a (marginal)
> >> incompatibility. The chance of a 32-bit guest actually running into
> >> it shouldn't be very high, but with the right placement of an
> >> instance of the struct at the end of a page it would be possible to
> >> observe afaict.
> >>
> >> Or whether we go the alternative route and pad the struct suitably
> >> for 32-bit.
> > 
> > I guess we are trapped with what Linux has on it's headers now, so we
> > should handle the struct size difference in Xen?
> 
> How do you mean to notice the difference in Xen? You don't know
> what the caller's environment's sizeof() would yield.

I'm confused. Couldn't we switch from uint64_aligned_t to plain
uint64_t (like it's currently on the Linux headers), and then use the
compat layer in Xen to handle the size difference when called from
32bit environments?

This would of course assume that no toolstack has implemented direct
calls using this interface, which seems likely because it either
returns mfns to be mapped in the PV case or require gfns to be
provided for HVM.

> > I'm very tempted to just say switch the XEN_GUEST_HANDLE to
> > XEN_GUEST_HANDLE_64, but I guess it's risky.
> 
> Plus, like uint64_aligned_t, iirc it's a construct exposed through
> tool-stack-only interfaces, but not generally.

Ack.

> >>> --- a/xen/include/public/memory.h
> >>> +++ b/xen/include/public/memory.h
> >>> @@ -607,6 +607,8 @@ struct xen_reserved_device_memory_map {
> >>>  typedef struct xen_reserved_device_memory_map xen_reserved_device_memory_map_t;
> >>>  DEFINE_XEN_GUEST_HANDLE(xen_reserved_device_memory_map_t);
> >>>  
> >>> +#endif /* defined(__XEN__) || defined(__XEN_TOOLS__) */
> >>> +
> >>>  /*
> >>>   * Get the pages for a particular guest resource, so that they can be
> >>>   * mapped directly by a tools domain.
> >>
> >> This comment is now stale.
> > 
> > Hm, I'm not so sure. This is indeed used by Xen related tools (or
> > emulators) and then those domains running such tools would also be
> > tools domains?
> > 
> > Anyway, I don't mind removing it, but I also don't think it's so
> > off.
> 
> Well - if this isn't a tool stack interface (see also my 2nd reply
> to your original patch), then the comment shouldn't suggest it is.

OK, let me take a look.

Roger.
Roger Pau Monné June 23, 2020, 5:32 p.m. UTC | #8
On Tue, Jun 23, 2020 at 05:04:53PM +0200, Jan Beulich wrote:
> On 23.06.2020 15:52, Roger Pau Monne wrote:
> > XENMEM_acquire_resource and it's related structure is currently inside
> > a __XEN__ or __XEN_TOOLS__ guarded section to limit it's scope to the
> > hypervisor or the toolstack only. This is wrong as the hypercall is
> > already being used by the Linux kernel at least, and as such needs to
> > be public.
> 
> Actually - how does this work for the Linux kernel, seeing
> 
>     rc = rcu_lock_remote_domain_by_id(xmar.domid, &d);
>     if ( rc )
>         return rc;
> 
>     rc = xsm_domain_resource_map(XSM_DM_PRIV, d);
>     if ( rc )
>         goto out;
> 
> in the function?

It's my understanding (I haven't tried to use that hypercall yet on
FreeBSD, so I cannot say I've tested it), that xmar.domid is the
remote domain, which the functions locks and then uses
xsm_domain_resource_map to check whether the current domain has
permissions to do privileged operations against it.

Roger.
Jan Beulich June 24, 2020, 10:05 a.m. UTC | #9
On 23.06.2020 19:32, Roger Pau Monné wrote:
> On Tue, Jun 23, 2020 at 05:04:53PM +0200, Jan Beulich wrote:
>> On 23.06.2020 15:52, Roger Pau Monne wrote:
>>> XENMEM_acquire_resource and it's related structure is currently inside
>>> a __XEN__ or __XEN_TOOLS__ guarded section to limit it's scope to the
>>> hypervisor or the toolstack only. This is wrong as the hypercall is
>>> already being used by the Linux kernel at least, and as such needs to
>>> be public.
>>
>> Actually - how does this work for the Linux kernel, seeing
>>
>>     rc = rcu_lock_remote_domain_by_id(xmar.domid, &d);
>>     if ( rc )
>>         return rc;
>>
>>     rc = xsm_domain_resource_map(XSM_DM_PRIV, d);
>>     if ( rc )
>>         goto out;
>>
>> in the function?
> 
> It's my understanding (I haven't tried to use that hypercall yet on
> FreeBSD, so I cannot say I've tested it), that xmar.domid is the
> remote domain, which the functions locks and then uses
> xsm_domain_resource_map to check whether the current domain has
> permissions to do privileged operations against it.

Yes, but that's a tool stack operation, not something the kernel
would do all by itself. The kernel would only ever pass DOMID_SELF
(or the actual local domain ID), I would think.

Jan
Jan Beulich June 24, 2020, 10:12 a.m. UTC | #10
On 23.06.2020 19:26, Roger Pau Monné wrote:
> On Tue, Jun 23, 2020 at 06:18:52PM +0200, Jan Beulich wrote:
>> On 23.06.2020 17:56, Roger Pau Monné wrote:
>>> On Tue, Jun 23, 2020 at 05:02:04PM +0200, Jan Beulich wrote:
>>>> On 23.06.2020 15:52, Roger Pau Monne wrote:
>>>>> XENMEM_acquire_resource and it's related structure is currently inside
>>>>> a __XEN__ or __XEN_TOOLS__ guarded section to limit it's scope to the
>>>>> hypervisor or the toolstack only. This is wrong as the hypercall is
>>>>> already being used by the Linux kernel at least, and as such needs to
>>>>> be public.
>>>>>
>>>>> Also switch the usage of uint64_aligned_t to plain uint64_t, as
>>>>> uint64_aligned_t is only to be used by the toolstack. Note that the
>>>>> layout of the structure will be the same, as the field is already
>>>>> naturally aligned to a 8 byte boundary.
>>>>
>>>> I'm afraid it's more complicated, and hence ...
>>>>
>>>>> No functional change expected.
>>>>
>>>> ... this doesn't hold: As I notice only now the struct also wrongly
>>>> (if it was meant to be a tools-only interface) failed to use
>>>> XEN_GUEST_HANDLE_64(), which would in principle have been a problem
>>>> for 32-bit callers (passing garbage in the upper half of a handle).
>>>> It's not an actual problem because, unlike would typically be the
>>>> case for tools-only interfaces, there is compat translation for this
>>>> struct.
>>>
>>> Yes, there's already compat translation for the frame_list array.
>>>
>>>> With this, however, the problem of your change becomes noticeable:
>>>> The structure's alignment for 32-bit x86, and with it the structure's
>>>> sizeof(), both change. You should be able to see this by comparing
>>>> xen/common/compat/memory.c's disassembly before and after your
>>>> change - the number of bytes to copy by the respective
>>>> copy_from_guest() ought to have changed.
>>>
>>> Right, this is the layout with frame aligned to 8 bytes:
>>>
>>> struct xen_mem_acquire_resource {
>>> 	uint16_t                   domid;                /*     0     2 */
>>> 	uint16_t                   type;                 /*     2     2 */
>>> 	uint32_t                   id;                   /*     4     4 */
>>> 	uint32_t                   nr_frames;            /*     8     4 */
>>> 	uint32_t                   pad;                  /*    12     4 */
>>> 	uint64_t                   frame;                /*    16     8 */
>>> 	long unsigned int *        frame_list;           /*    24     4 */
>>>
>>> 	/* size: 32, cachelines: 1, members: 7 */
>>> 	/* padding: 4 */
>>> 	/* last cacheline: 32 bytes */
>>> };
>>>
>>> And this is without:
>>>
>>> struct xen_mem_acquire_resource {
>>> 	uint16_t                   domid;                /*     0     2 */
>>> 	uint16_t                   type;                 /*     2     2 */
>>> 	uint32_t                   id;                   /*     4     4 */
>>> 	uint32_t                   nr_frames;            /*     8     4 */
>>> 	uint32_t                   pad;                  /*    12     4 */
>>> 	uint64_t                   frame;                /*    16     8 */
>>> 	long unsigned int *        frame_list;           /*    24     4 */
>>>
>>> 	/* size: 28, cachelines: 1, members: 7 */
>>> 	/* last cacheline: 28 bytes */
>>> };
>>>
>>> Note Xen has already been copying those extra 4 padding bytes from
>>> 32bit Linux kernels AFAICT, as the struct declaration in Linux has
>>> dropped the aligned attribute.
>>>
>>>> The question now is whether we're willing to accept such a (marginal)
>>>> incompatibility. The chance of a 32-bit guest actually running into
>>>> it shouldn't be very high, but with the right placement of an
>>>> instance of the struct at the end of a page it would be possible to
>>>> observe afaict.
>>>>
>>>> Or whether we go the alternative route and pad the struct suitably
>>>> for 32-bit.
>>>
>>> I guess we are trapped with what Linux has on it's headers now, so we
>>> should handle the struct size difference in Xen?
>>
>> How do you mean to notice the difference in Xen? You don't know
>> what the caller's environment's sizeof() would yield.
> 
> I'm confused. Couldn't we switch from uint64_aligned_t to plain
> uint64_t (like it's currently on the Linux headers), and then use the
> compat layer in Xen to handle the size difference when called from
> 32bit environments?

And which size would we use there? The old or the new one (breaking
future or existing callers respectively)? Meanwhile I think that if
this indeed needs to not be tools-only (which I still question),
then our only possible route is to add explicit padding for the
32-bit case alongside the change you're already making.

> This would of course assume that no toolstack has implemented direct
> calls using this interface, which seems likely because it either
> returns mfns to be mapped in the PV case or require gfns to be
> provided for HVM.

What are "direct calls" in the case here? We aren't talking about
a dmop interface, but a memop one here, are we? It's merely an
interface that an ioreq server implementation ought to use to get
hold of the ring pages (i.e. there's only a weak connection to
dmop).

Jan
Julien Grall June 24, 2020, 10:52 a.m. UTC | #11
Hi Jan,

On 24/06/2020 11:05, Jan Beulich wrote:
> On 23.06.2020 19:32, Roger Pau Monné wrote:
>> On Tue, Jun 23, 2020 at 05:04:53PM +0200, Jan Beulich wrote:
>>> On 23.06.2020 15:52, Roger Pau Monne wrote:
>>>> XENMEM_acquire_resource and it's related structure is currently inside
>>>> a __XEN__ or __XEN_TOOLS__ guarded section to limit it's scope to the
>>>> hypervisor or the toolstack only. This is wrong as the hypercall is
>>>> already being used by the Linux kernel at least, and as such needs to
>>>> be public.
>>>
>>> Actually - how does this work for the Linux kernel, seeing
>>>
>>>      rc = rcu_lock_remote_domain_by_id(xmar.domid, &d);
>>>      if ( rc )
>>>          return rc;
>>>
>>>      rc = xsm_domain_resource_map(XSM_DM_PRIV, d);
>>>      if ( rc )
>>>          goto out;
>>>
>>> in the function?
>>
>> It's my understanding (I haven't tried to use that hypercall yet on
>> FreeBSD, so I cannot say I've tested it), that xmar.domid is the
>> remote domain, which the functions locks and then uses
>> xsm_domain_resource_map to check whether the current domain has
>> permissions to do privileged operations against it.
> 
> Yes, but that's a tool stack operation, not something the kernel
> would do all by itself. The kernel would only ever pass DOMID_SELF
> (or the actual local domain ID), I would think.

You can't issue that hypercall directly from userspace because you need 
to map the page in the physical address space of the toolstack domain.

So the kernel has to act as the proxy for the hypercall. This is 
implemented as mmap() in Linux.

Cheers,
Jan Beulich June 24, 2020, 12:08 p.m. UTC | #12
On 24.06.2020 12:52, Julien Grall wrote:
> Hi Jan,
> 
> On 24/06/2020 11:05, Jan Beulich wrote:
>> On 23.06.2020 19:32, Roger Pau Monné wrote:
>>> On Tue, Jun 23, 2020 at 05:04:53PM +0200, Jan Beulich wrote:
>>>> On 23.06.2020 15:52, Roger Pau Monne wrote:
>>>>> XENMEM_acquire_resource and it's related structure is currently inside
>>>>> a __XEN__ or __XEN_TOOLS__ guarded section to limit it's scope to the
>>>>> hypervisor or the toolstack only. This is wrong as the hypercall is
>>>>> already being used by the Linux kernel at least, and as such needs to
>>>>> be public.
>>>>
>>>> Actually - how does this work for the Linux kernel, seeing
>>>>
>>>>      rc = rcu_lock_remote_domain_by_id(xmar.domid, &d);
>>>>      if ( rc )
>>>>          return rc;
>>>>
>>>>      rc = xsm_domain_resource_map(XSM_DM_PRIV, d);
>>>>      if ( rc )
>>>>          goto out;
>>>>
>>>> in the function?
>>>
>>> It's my understanding (I haven't tried to use that hypercall yet on
>>> FreeBSD, so I cannot say I've tested it), that xmar.domid is the
>>> remote domain, which the functions locks and then uses
>>> xsm_domain_resource_map to check whether the current domain has
>>> permissions to do privileged operations against it.
>>
>> Yes, but that's a tool stack operation, not something the kernel
>> would do all by itself. The kernel would only ever pass DOMID_SELF
>> (or the actual local domain ID), I would think.
> 
> You can't issue that hypercall directly from userspace because you need 
> to map the page in the physical address space of the toolstack domain.
> 
> So the kernel has to act as the proxy for the hypercall. This is 
> implemented as mmap() in Linux.

Oh, and there's no generic wrapping available here, unlike for
dmop. Makes me wonder whether, for this purpose, there should
be (have been) a new dmop with identical functionality, to
allow such funneling.

Janan
Julien Grall June 24, 2020, 12:47 p.m. UTC | #13
Hi,

On 24/06/2020 13:08, Jan Beulich wrote:
> On 24.06.2020 12:52, Julien Grall wrote:
>> Hi Jan,
>>
>> On 24/06/2020 11:05, Jan Beulich wrote:
>>> On 23.06.2020 19:32, Roger Pau Monné wrote:
>>>> On Tue, Jun 23, 2020 at 05:04:53PM +0200, Jan Beulich wrote:
>>>>> On 23.06.2020 15:52, Roger Pau Monne wrote:
>>>>>> XENMEM_acquire_resource and it's related structure is currently inside
>>>>>> a __XEN__ or __XEN_TOOLS__ guarded section to limit it's scope to the
>>>>>> hypervisor or the toolstack only. This is wrong as the hypercall is
>>>>>> already being used by the Linux kernel at least, and as such needs to
>>>>>> be public.
>>>>>
>>>>> Actually - how does this work for the Linux kernel, seeing
>>>>>
>>>>>       rc = rcu_lock_remote_domain_by_id(xmar.domid, &d);
>>>>>       if ( rc )
>>>>>           return rc;
>>>>>
>>>>>       rc = xsm_domain_resource_map(XSM_DM_PRIV, d);
>>>>>       if ( rc )
>>>>>           goto out;
>>>>>
>>>>> in the function?
>>>>
>>>> It's my understanding (I haven't tried to use that hypercall yet on
>>>> FreeBSD, so I cannot say I've tested it), that xmar.domid is the
>>>> remote domain, which the functions locks and then uses
>>>> xsm_domain_resource_map to check whether the current domain has
>>>> permissions to do privileged operations against it.
>>>
>>> Yes, but that's a tool stack operation, not something the kernel
>>> would do all by itself. The kernel would only ever pass DOMID_SELF
>>> (or the actual local domain ID), I would think.
>>
>> You can't issue that hypercall directly from userspace because you need
>> to map the page in the physical address space of the toolstack domain.
>>
>> So the kernel has to act as the proxy for the hypercall. This is
>> implemented as mmap() in Linux.
> 
> Oh, and there's no generic wrapping available here, unlike for
> dmop. 

It is not clear to me the sort of generic wrapping you are referring to. 
Are you referring to a stable interface for an application?

> Makes me wonder whether, for this purpose, there should
> be (have been) a new dmop with identical functionality, to
> allow such funneling.

I am not sure how using DMOP will allow us to implement it fully in 
userspace. Do you mind expanding it?

Cheers,
Jan Beulich June 24, 2020, 12:52 p.m. UTC | #14
On 24.06.2020 14:47, Julien Grall wrote:
> Hi,
> 
> On 24/06/2020 13:08, Jan Beulich wrote:
>> On 24.06.2020 12:52, Julien Grall wrote:
>>> Hi Jan,
>>>
>>> On 24/06/2020 11:05, Jan Beulich wrote:
>>>> On 23.06.2020 19:32, Roger Pau Monné wrote:
>>>>> On Tue, Jun 23, 2020 at 05:04:53PM +0200, Jan Beulich wrote:
>>>>>> On 23.06.2020 15:52, Roger Pau Monne wrote:
>>>>>>> XENMEM_acquire_resource and it's related structure is currently inside
>>>>>>> a __XEN__ or __XEN_TOOLS__ guarded section to limit it's scope to the
>>>>>>> hypervisor or the toolstack only. This is wrong as the hypercall is
>>>>>>> already being used by the Linux kernel at least, and as such needs to
>>>>>>> be public.
>>>>>>
>>>>>> Actually - how does this work for the Linux kernel, seeing
>>>>>>
>>>>>>       rc = rcu_lock_remote_domain_by_id(xmar.domid, &d);
>>>>>>       if ( rc )
>>>>>>           return rc;
>>>>>>
>>>>>>       rc = xsm_domain_resource_map(XSM_DM_PRIV, d);
>>>>>>       if ( rc )
>>>>>>           goto out;
>>>>>>
>>>>>> in the function?
>>>>>
>>>>> It's my understanding (I haven't tried to use that hypercall yet on
>>>>> FreeBSD, so I cannot say I've tested it), that xmar.domid is the
>>>>> remote domain, which the functions locks and then uses
>>>>> xsm_domain_resource_map to check whether the current domain has
>>>>> permissions to do privileged operations against it.
>>>>
>>>> Yes, but that's a tool stack operation, not something the kernel
>>>> would do all by itself. The kernel would only ever pass DOMID_SELF
>>>> (or the actual local domain ID), I would think.
>>>
>>> You can't issue that hypercall directly from userspace because you need
>>> to map the page in the physical address space of the toolstack domain.
>>>
>>> So the kernel has to act as the proxy for the hypercall. This is
>>> implemented as mmap() in Linux.
>>
>> Oh, and there's no generic wrapping available here, unlike for
>> dmop. 
> 
> It is not clear to me the sort of generic wrapping you are referring to. 
> Are you referring to a stable interface for an application?
> 
>> Makes me wonder whether, for this purpose, there should
>> be (have been) a new dmop with identical functionality, to
>> allow such funneling.
> 
> I am not sure how using DMOP will allow us to implement it fully in 
> userspace. Do you mind expanding it?

dmop was designed so that a kernel proxying requests wouldn't need
updating for every new request added to the interface. If the
request here was made through a new dmop, the kernel would never
have had a need to know of an interface structure that's of no
interest to it, but only to the tool stack.

Jan
Paul Durrant June 24, 2020, 12:53 p.m. UTC | #15
> -----Original Message-----
> From: Jan Beulich <jbeulich@suse.com>
> Sent: 24 June 2020 13:52
> To: Julien Grall <julien@xen.org>
> Cc: Roger Pau Monné <roger.pau@citrix.com>; xen-devel@lists.xenproject.org; paul@xen.org; Andrew
> Cooper <andrew.cooper3@citrix.com>; George Dunlap <george.dunlap@citrix.com>; Ian Jackson
> <ian.jackson@eu.citrix.com>; Stefano Stabellini <sstabellini@kernel.org>; Wei Liu <wl@xen.org>
> Subject: Re: [PATCH for-4.14] mm: fix public declaration of struct xen_mem_acquire_resource
> 
> On 24.06.2020 14:47, Julien Grall wrote:
> > Hi,
> >
> > On 24/06/2020 13:08, Jan Beulich wrote:
> >> On 24.06.2020 12:52, Julien Grall wrote:
> >>> Hi Jan,
> >>>
> >>> On 24/06/2020 11:05, Jan Beulich wrote:
> >>>> On 23.06.2020 19:32, Roger Pau Monné wrote:
> >>>>> On Tue, Jun 23, 2020 at 05:04:53PM +0200, Jan Beulich wrote:
> >>>>>> On 23.06.2020 15:52, Roger Pau Monne wrote:
> >>>>>>> XENMEM_acquire_resource and it's related structure is currently inside
> >>>>>>> a __XEN__ or __XEN_TOOLS__ guarded section to limit it's scope to the
> >>>>>>> hypervisor or the toolstack only. This is wrong as the hypercall is
> >>>>>>> already being used by the Linux kernel at least, and as such needs to
> >>>>>>> be public.
> >>>>>>
> >>>>>> Actually - how does this work for the Linux kernel, seeing
> >>>>>>
> >>>>>>       rc = rcu_lock_remote_domain_by_id(xmar.domid, &d);
> >>>>>>       if ( rc )
> >>>>>>           return rc;
> >>>>>>
> >>>>>>       rc = xsm_domain_resource_map(XSM_DM_PRIV, d);
> >>>>>>       if ( rc )
> >>>>>>           goto out;
> >>>>>>
> >>>>>> in the function?
> >>>>>
> >>>>> It's my understanding (I haven't tried to use that hypercall yet on
> >>>>> FreeBSD, so I cannot say I've tested it), that xmar.domid is the
> >>>>> remote domain, which the functions locks and then uses
> >>>>> xsm_domain_resource_map to check whether the current domain has
> >>>>> permissions to do privileged operations against it.
> >>>>
> >>>> Yes, but that's a tool stack operation, not something the kernel
> >>>> would do all by itself. The kernel would only ever pass DOMID_SELF
> >>>> (or the actual local domain ID), I would think.
> >>>
> >>> You can't issue that hypercall directly from userspace because you need
> >>> to map the page in the physical address space of the toolstack domain.
> >>>
> >>> So the kernel has to act as the proxy for the hypercall. This is
> >>> implemented as mmap() in Linux.
> >>
> >> Oh, and there's no generic wrapping available here, unlike for
> >> dmop.
> >
> > It is not clear to me the sort of generic wrapping you are referring to.
> > Are you referring to a stable interface for an application?
> >
> >> Makes me wonder whether, for this purpose, there should
> >> be (have been) a new dmop with identical functionality, to
> >> allow such funneling.
> >
> > I am not sure how using DMOP will allow us to implement it fully in
> > userspace. Do you mind expanding it?
> 
> dmop was designed so that a kernel proxying requests wouldn't need
> updating for every new request added to the interface. If the
> request here was made through a new dmop, the kernel would never
> have had a need to know of an interface structure that's of no
> interest to it, but only to the tool stack.

How would the pages get mapped into process address space if the kernel doesn't know what's being done?

  Paul

> 
> Jan
Jan Beulich June 24, 2020, 1:07 p.m. UTC | #16
On 24.06.2020 14:53, Paul Durrant wrote:
>> -----Original Message-----
>> From: Jan Beulich <jbeulich@suse.com>
>> Sent: 24 June 2020 13:52
>> To: Julien Grall <julien@xen.org>
>> Cc: Roger Pau Monné <roger.pau@citrix.com>; xen-devel@lists.xenproject.org; paul@xen.org; Andrew
>> Cooper <andrew.cooper3@citrix.com>; George Dunlap <george.dunlap@citrix.com>; Ian Jackson
>> <ian.jackson@eu.citrix.com>; Stefano Stabellini <sstabellini@kernel.org>; Wei Liu <wl@xen.org>
>> Subject: Re: [PATCH for-4.14] mm: fix public declaration of struct xen_mem_acquire_resource
>>
>> On 24.06.2020 14:47, Julien Grall wrote:
>>> Hi,
>>>
>>> On 24/06/2020 13:08, Jan Beulich wrote:
>>>> On 24.06.2020 12:52, Julien Grall wrote:
>>>>> Hi Jan,
>>>>>
>>>>> On 24/06/2020 11:05, Jan Beulich wrote:
>>>>>> On 23.06.2020 19:32, Roger Pau Monné wrote:
>>>>>>> On Tue, Jun 23, 2020 at 05:04:53PM +0200, Jan Beulich wrote:
>>>>>>>> On 23.06.2020 15:52, Roger Pau Monne wrote:
>>>>>>>>> XENMEM_acquire_resource and it's related structure is currently inside
>>>>>>>>> a __XEN__ or __XEN_TOOLS__ guarded section to limit it's scope to the
>>>>>>>>> hypervisor or the toolstack only. This is wrong as the hypercall is
>>>>>>>>> already being used by the Linux kernel at least, and as such needs to
>>>>>>>>> be public.
>>>>>>>>
>>>>>>>> Actually - how does this work for the Linux kernel, seeing
>>>>>>>>
>>>>>>>>       rc = rcu_lock_remote_domain_by_id(xmar.domid, &d);
>>>>>>>>       if ( rc )
>>>>>>>>           return rc;
>>>>>>>>
>>>>>>>>       rc = xsm_domain_resource_map(XSM_DM_PRIV, d);
>>>>>>>>       if ( rc )
>>>>>>>>           goto out;
>>>>>>>>
>>>>>>>> in the function?
>>>>>>>
>>>>>>> It's my understanding (I haven't tried to use that hypercall yet on
>>>>>>> FreeBSD, so I cannot say I've tested it), that xmar.domid is the
>>>>>>> remote domain, which the functions locks and then uses
>>>>>>> xsm_domain_resource_map to check whether the current domain has
>>>>>>> permissions to do privileged operations against it.
>>>>>>
>>>>>> Yes, but that's a tool stack operation, not something the kernel
>>>>>> would do all by itself. The kernel would only ever pass DOMID_SELF
>>>>>> (or the actual local domain ID), I would think.
>>>>>
>>>>> You can't issue that hypercall directly from userspace because you need
>>>>> to map the page in the physical address space of the toolstack domain.
>>>>>
>>>>> So the kernel has to act as the proxy for the hypercall. This is
>>>>> implemented as mmap() in Linux.
>>>>
>>>> Oh, and there's no generic wrapping available here, unlike for
>>>> dmop.
>>>
>>> It is not clear to me the sort of generic wrapping you are referring to.
>>> Are you referring to a stable interface for an application?
>>>
>>>> Makes me wonder whether, for this purpose, there should
>>>> be (have been) a new dmop with identical functionality, to
>>>> allow such funneling.
>>>
>>> I am not sure how using DMOP will allow us to implement it fully in
>>> userspace. Do you mind expanding it?
>>
>> dmop was designed so that a kernel proxying requests wouldn't need
>> updating for every new request added to the interface. If the
>> request here was made through a new dmop, the kernel would never
>> have had a need to know of an interface structure that's of no
>> interest to it, but only to the tool stack.
> 
> How would the pages get mapped into process address space if the
> kernel doesn't know what's being done?

Urgh, yes, silly me.

Jan
Julien Grall June 24, 2020, 1:41 p.m. UTC | #17
Hi Jan,

On 24/06/2020 11:12, Jan Beulich wrote:
> On 23.06.2020 19:26, Roger Pau Monné wrote:
>> On Tue, Jun 23, 2020 at 06:18:52PM +0200, Jan Beulich wrote:
>>> On 23.06.2020 17:56, Roger Pau Monné wrote:
>>>> On Tue, Jun 23, 2020 at 05:02:04PM +0200, Jan Beulich wrote:
>>>>> On 23.06.2020 15:52, Roger Pau Monne wrote:
>>>>>> XENMEM_acquire_resource and it's related structure is currently inside
>>>>>> a __XEN__ or __XEN_TOOLS__ guarded section to limit it's scope to the
>>>>>> hypervisor or the toolstack only. This is wrong as the hypercall is
>>>>>> already being used by the Linux kernel at least, and as such needs to
>>>>>> be public.
>>>>>>
>>>>>> Also switch the usage of uint64_aligned_t to plain uint64_t, as
>>>>>> uint64_aligned_t is only to be used by the toolstack. Note that the
>>>>>> layout of the structure will be the same, as the field is already
>>>>>> naturally aligned to a 8 byte boundary.
>>>>>
>>>>> I'm afraid it's more complicated, and hence ...
>>>>>
>>>>>> No functional change expected.
>>>>>
>>>>> ... this doesn't hold: As I notice only now the struct also wrongly
>>>>> (if it was meant to be a tools-only interface) failed to use
>>>>> XEN_GUEST_HANDLE_64(), which would in principle have been a problem
>>>>> for 32-bit callers (passing garbage in the upper half of a handle).
>>>>> It's not an actual problem because, unlike would typically be the
>>>>> case for tools-only interfaces, there is compat translation for this
>>>>> struct.
>>>>
>>>> Yes, there's already compat translation for the frame_list array.
>>>>
>>>>> With this, however, the problem of your change becomes noticeable:
>>>>> The structure's alignment for 32-bit x86, and with it the structure's
>>>>> sizeof(), both change. You should be able to see this by comparing
>>>>> xen/common/compat/memory.c's disassembly before and after your
>>>>> change - the number of bytes to copy by the respective
>>>>> copy_from_guest() ought to have changed.
>>>>
>>>> Right, this is the layout with frame aligned to 8 bytes:
>>>>
>>>> struct xen_mem_acquire_resource {
>>>> 	uint16_t                   domid;                /*     0     2 */
>>>> 	uint16_t                   type;                 /*     2     2 */
>>>> 	uint32_t                   id;                   /*     4     4 */
>>>> 	uint32_t                   nr_frames;            /*     8     4 */
>>>> 	uint32_t                   pad;                  /*    12     4 */
>>>> 	uint64_t                   frame;                /*    16     8 */
>>>> 	long unsigned int *        frame_list;           /*    24     4 */
>>>>
>>>> 	/* size: 32, cachelines: 1, members: 7 */
>>>> 	/* padding: 4 */
>>>> 	/* last cacheline: 32 bytes */
>>>> };
>>>>
>>>> And this is without:
>>>>
>>>> struct xen_mem_acquire_resource {
>>>> 	uint16_t                   domid;                /*     0     2 */
>>>> 	uint16_t                   type;                 /*     2     2 */
>>>> 	uint32_t                   id;                   /*     4     4 */
>>>> 	uint32_t                   nr_frames;            /*     8     4 */
>>>> 	uint32_t                   pad;                  /*    12     4 */
>>>> 	uint64_t                   frame;                /*    16     8 */
>>>> 	long unsigned int *        frame_list;           /*    24     4 */
>>>>
>>>> 	/* size: 28, cachelines: 1, members: 7 */
>>>> 	/* last cacheline: 28 bytes */
>>>> };
>>>>
>>>> Note Xen has already been copying those extra 4 padding bytes from
>>>> 32bit Linux kernels AFAICT, as the struct declaration in Linux has
>>>> dropped the aligned attribute.
>>>>
>>>>> The question now is whether we're willing to accept such a (marginal)
>>>>> incompatibility. The chance of a 32-bit guest actually running into
>>>>> it shouldn't be very high, but with the right placement of an
>>>>> instance of the struct at the end of a page it would be possible to
>>>>> observe afaict.
>>>>>
>>>>> Or whether we go the alternative route and pad the struct suitably
>>>>> for 32-bit.
>>>>
>>>> I guess we are trapped with what Linux has on it's headers now, so we
>>>> should handle the struct size difference in Xen?
>>>
>>> How do you mean to notice the difference in Xen? You don't know
>>> what the caller's environment's sizeof() would yield.
>>
>> I'm confused. Couldn't we switch from uint64_aligned_t to plain
>> uint64_t (like it's currently on the Linux headers), and then use the
>> compat layer in Xen to handle the size difference when called from
>> 32bit environments?
> 
> And which size would we use there? The old or the new one (breaking
> future or existing callers respectively)? Meanwhile I think that if
> this indeed needs to not be tools-only (which I still question),

I think we now agreed on a subthread that the kernel needs to know the 
layout of the hypercall.

> then our only possible route is to add explicit padding for the
> 32-bit case alongside the change you're already making.

AFAICT Linux 32-bit doesn't have this padding. So wouldn't it make 
incompatible the two incompatible?

Cheers,
Jan Beulich June 24, 2020, 2:01 p.m. UTC | #18
On 24.06.2020 15:41, Julien Grall wrote:
> On 24/06/2020 11:12, Jan Beulich wrote:
>> On 23.06.2020 19:26, Roger Pau Monné wrote:
>>> I'm confused. Couldn't we switch from uint64_aligned_t to plain
>>> uint64_t (like it's currently on the Linux headers), and then use the
>>> compat layer in Xen to handle the size difference when called from
>>> 32bit environments?
>>
>> And which size would we use there? The old or the new one (breaking
>> future or existing callers respectively)? Meanwhile I think that if
>> this indeed needs to not be tools-only (which I still question),
> 
> I think we now agreed on a subthread that the kernel needs to know the 
> layout of the hypercall.
> 
>> then our only possible route is to add explicit padding for the
>> 32-bit case alongside the change you're already making.
> 
> AFAICT Linux 32-bit doesn't have this padding. So wouldn't it make 
> incompatible the two incompatible?

In principle yes. But they're putting the structure instance on the
stack, so there's not risk from Xen reading 4 bytes too many. I'd
prefer keeping the interface as is (i.e. with the previously
implicit padding made explicit) to avoid risking to break other
possible callers. But that's just my view on it, anyway ...

Jan
Roger Pau Monné June 25, 2020, 9:05 a.m. UTC | #19
On Wed, Jun 24, 2020 at 04:01:44PM +0200, Jan Beulich wrote:
> On 24.06.2020 15:41, Julien Grall wrote:
> > On 24/06/2020 11:12, Jan Beulich wrote:
> >> On 23.06.2020 19:26, Roger Pau Monné wrote:
> >>> I'm confused. Couldn't we switch from uint64_aligned_t to plain
> >>> uint64_t (like it's currently on the Linux headers), and then use the
> >>> compat layer in Xen to handle the size difference when called from
> >>> 32bit environments?
> >>
> >> And which size would we use there? The old or the new one (breaking
> >> future or existing callers respectively)? Meanwhile I think that if
> >> this indeed needs to not be tools-only (which I still question),
> > 
> > I think we now agreed on a subthread that the kernel needs to know the 
> > layout of the hypercall.
> > 
> >> then our only possible route is to add explicit padding for the
> >> 32-bit case alongside the change you're already making.
> > 
> > AFAICT Linux 32-bit doesn't have this padding. So wouldn't it make 
> > incompatible the two incompatible?
> 
> In principle yes. But they're putting the structure instance on the
> stack, so there's not risk from Xen reading 4 bytes too many. I'd
> prefer keeping the interface as is (i.e. with the previously
> implicit padding made explicit) to avoid risking to break other
> possible callers. But that's just my view on it, anyway ...

Adding the padding is cleaner because we don't need any compat stuff
in order to access the structure from the caller, and we also keep the
original layout currently present on Xen headers.

I can prepare a fix for the Linux kernel, if this approach is fine.

Roger.
Julien Grall June 25, 2020, 9:24 a.m. UTC | #20
Hi Jan,

On 24/06/2020 15:01, Jan Beulich wrote:
> On 24.06.2020 15:41, Julien Grall wrote:
>> On 24/06/2020 11:12, Jan Beulich wrote:
>>> On 23.06.2020 19:26, Roger Pau Monné wrote:
>>>> I'm confused. Couldn't we switch from uint64_aligned_t to plain
>>>> uint64_t (like it's currently on the Linux headers), and then use the
>>>> compat layer in Xen to handle the size difference when called from
>>>> 32bit environments?
>>>
>>> And which size would we use there? The old or the new one (breaking
>>> future or existing callers respectively)? Meanwhile I think that if
>>> this indeed needs to not be tools-only (which I still question),
>>
>> I think we now agreed on a subthread that the kernel needs to know the
>> layout of the hypercall.
>>
>>> then our only possible route is to add explicit padding for the
>>> 32-bit case alongside the change you're already making.
>>
>> AFAICT Linux 32-bit doesn't have this padding. So wouldn't it make
>> incompatible the two incompatible?
> 
> In principle yes. But they're putting the structure instance on the
> stack, so there's not risk from Xen reading 4 bytes too many. I'd
> prefer keeping the interface as is (i.e. with the previously
> implicit padding made explicit) to avoid risking to break other
> possible callers. But that's just my view on it, anyway ...

It makes sense to me. As Roger said, we would also want to update the 
Linux kernel header for future release.

Cheers,
Roger Pau Monné June 25, 2020, 4:10 p.m. UTC | #21
On Thu, Jun 25, 2020 at 11:05:52AM +0200, Roger Pau Monné wrote:
> On Wed, Jun 24, 2020 at 04:01:44PM +0200, Jan Beulich wrote:
> > On 24.06.2020 15:41, Julien Grall wrote:
> > > On 24/06/2020 11:12, Jan Beulich wrote:
> > >> On 23.06.2020 19:26, Roger Pau Monné wrote:
> > >>> I'm confused. Couldn't we switch from uint64_aligned_t to plain
> > >>> uint64_t (like it's currently on the Linux headers), and then use the
> > >>> compat layer in Xen to handle the size difference when called from
> > >>> 32bit environments?
> > >>
> > >> And which size would we use there? The old or the new one (breaking
> > >> future or existing callers respectively)? Meanwhile I think that if
> > >> this indeed needs to not be tools-only (which I still question),
> > > 
> > > I think we now agreed on a subthread that the kernel needs to know the 
> > > layout of the hypercall.
> > > 
> > >> then our only possible route is to add explicit padding for the
> > >> 32-bit case alongside the change you're already making.
> > > 
> > > AFAICT Linux 32-bit doesn't have this padding. So wouldn't it make 
> > > incompatible the two incompatible?
> > 
> > In principle yes. But they're putting the structure instance on the
> > stack, so there's not risk from Xen reading 4 bytes too many. I'd
> > prefer keeping the interface as is (i.e. with the previously
> > implicit padding made explicit) to avoid risking to break other
> > possible callers. But that's just my view on it, anyway ...
> 
> Adding the padding is cleaner because we don't need any compat stuff
> in order to access the structure from the caller, and we also keep the
> original layout currently present on Xen headers.
> 
> I can prepare a fix for the Linux kernel, if this approach is fine.

So I went over this, and I'm not sure the point of adding the padding
field at the end of the structure for 32bit x86.

The current situation is the following:

 - Linux will use a struct on 32bit x86 that doesn't have the 4byte
   padding at the end.
 - Xen will copy 4bytes of garbage in that case, since the struct on
   Linux is allocated on the stack.

So I suggest we take the approach found on this patch, that is remove
the 8byte alignment from the frame field, which will in turn remove
4bytes of padding from the tail of the structure on 32bit x86.

That would leave the following scenario:

 - The struct layout in Linux headers would be correct.
 - Xen already handles the struct size difference on x86 32bit vs
   64bit, as the compat layer is currently doing the copy in
   compat_memory_op taking into account the size of the compat
   structure.
 - Removing the padding will work for all use cases: Linux will
   already be using the correct layout on x86 32bits, so no change
   will be required there. Any consumers using the tail padded
   structure will continue to work without issues, as Xen simply won't
   copy the tailing 4bytes.

So I think the solution proposed in this patch is the correct one:
switch uint64_aligned_t to uint64_t, no tail padding added on x86
32bits. I will adjust the commit message and resubmit if that's fine.

Roger.
Jan Beulich June 26, 2020, 1:40 p.m. UTC | #22
On 25.06.2020 18:10, Roger Pau Monné wrote:
> On Thu, Jun 25, 2020 at 11:05:52AM +0200, Roger Pau Monné wrote:
>> On Wed, Jun 24, 2020 at 04:01:44PM +0200, Jan Beulich wrote:
>>> On 24.06.2020 15:41, Julien Grall wrote:
>>>> On 24/06/2020 11:12, Jan Beulich wrote:
>>>>> On 23.06.2020 19:26, Roger Pau Monné wrote:
>>>>>> I'm confused. Couldn't we switch from uint64_aligned_t to plain
>>>>>> uint64_t (like it's currently on the Linux headers), and then use the
>>>>>> compat layer in Xen to handle the size difference when called from
>>>>>> 32bit environments?
>>>>>
>>>>> And which size would we use there? The old or the new one (breaking
>>>>> future or existing callers respectively)? Meanwhile I think that if
>>>>> this indeed needs to not be tools-only (which I still question),
>>>>
>>>> I think we now agreed on a subthread that the kernel needs to know the 
>>>> layout of the hypercall.
>>>>
>>>>> then our only possible route is to add explicit padding for the
>>>>> 32-bit case alongside the change you're already making.
>>>>
>>>> AFAICT Linux 32-bit doesn't have this padding. So wouldn't it make 
>>>> incompatible the two incompatible?
>>>
>>> In principle yes. But they're putting the structure instance on the
>>> stack, so there's not risk from Xen reading 4 bytes too many. I'd
>>> prefer keeping the interface as is (i.e. with the previously
>>> implicit padding made explicit) to avoid risking to break other
>>> possible callers. But that's just my view on it, anyway ...
>>
>> Adding the padding is cleaner because we don't need any compat stuff
>> in order to access the structure from the caller, and we also keep the
>> original layout currently present on Xen headers.
>>
>> I can prepare a fix for the Linux kernel, if this approach is fine.
> 
> So I went over this, and I'm not sure the point of adding the padding
> field at the end of the structure for 32bit x86.
> 
> The current situation is the following:
> 
>  - Linux will use a struct on 32bit x86 that doesn't have the 4byte
>    padding at the end.
>  - Xen will copy 4bytes of garbage in that case, since the struct on
>    Linux is allocated on the stack.
> 
> So I suggest we take the approach found on this patch, that is remove
> the 8byte alignment from the frame field, which will in turn remove
> 4bytes of padding from the tail of the structure on 32bit x86.
> 
> That would leave the following scenario:
> 
>  - The struct layout in Linux headers would be correct.
>  - Xen already handles the struct size difference on x86 32bit vs
>    64bit, as the compat layer is currently doing the copy in
>    compat_memory_op taking into account the size of the compat
>    structure.

Hmm, I didn't even notice this until now - it looks to do so
indeed, but apparently because of a bug: The original
uint64_aligned_t gets translated to mere uint64_t in the
compat header, whereas it should have been retained. This
means that my concern of ...

>  - Removing the padding will work for all use cases: Linux will
>    already be using the correct layout on x86 32bits, so no change
>    will be required there. Any consumers using the tail padded
>    structure will continue to work without issues, as Xen simply won't
>    copy the tailing 4bytes.

... code using the new definition then potentially not working
correctly on  4.13, at least on versions not having this
backported, was not actually true.

I'll try to sort out this other bug then ...

> So I think the solution proposed in this patch is the correct one:
> switch uint64_aligned_t to uint64_t, no tail padding added on x86
> 32bits. I will adjust the commit message and resubmit if that's fine.

I think it is indeed.

Jan
Jan Beulich June 26, 2020, 2:19 p.m. UTC | #23
On 26.06.2020 15:40, Jan Beulich wrote:
> On 25.06.2020 18:10, Roger Pau Monné wrote:
>> On Thu, Jun 25, 2020 at 11:05:52AM +0200, Roger Pau Monné wrote:
>>> On Wed, Jun 24, 2020 at 04:01:44PM +0200, Jan Beulich wrote:
>>>> On 24.06.2020 15:41, Julien Grall wrote:
>>>>> On 24/06/2020 11:12, Jan Beulich wrote:
>>>>>> On 23.06.2020 19:26, Roger Pau Monné wrote:
>>>>>>> I'm confused. Couldn't we switch from uint64_aligned_t to plain
>>>>>>> uint64_t (like it's currently on the Linux headers), and then use the
>>>>>>> compat layer in Xen to handle the size difference when called from
>>>>>>> 32bit environments?
>>>>>>
>>>>>> And which size would we use there? The old or the new one (breaking
>>>>>> future or existing callers respectively)? Meanwhile I think that if
>>>>>> this indeed needs to not be tools-only (which I still question),
>>>>>
>>>>> I think we now agreed on a subthread that the kernel needs to know the 
>>>>> layout of the hypercall.
>>>>>
>>>>>> then our only possible route is to add explicit padding for the
>>>>>> 32-bit case alongside the change you're already making.
>>>>>
>>>>> AFAICT Linux 32-bit doesn't have this padding. So wouldn't it make 
>>>>> incompatible the two incompatible?
>>>>
>>>> In principle yes. But they're putting the structure instance on the
>>>> stack, so there's not risk from Xen reading 4 bytes too many. I'd
>>>> prefer keeping the interface as is (i.e. with the previously
>>>> implicit padding made explicit) to avoid risking to break other
>>>> possible callers. But that's just my view on it, anyway ...
>>>
>>> Adding the padding is cleaner because we don't need any compat stuff
>>> in order to access the structure from the caller, and we also keep the
>>> original layout currently present on Xen headers.
>>>
>>> I can prepare a fix for the Linux kernel, if this approach is fine.
>>
>> So I went over this, and I'm not sure the point of adding the padding
>> field at the end of the structure for 32bit x86.
>>
>> The current situation is the following:
>>
>>  - Linux will use a struct on 32bit x86 that doesn't have the 4byte
>>    padding at the end.
>>  - Xen will copy 4bytes of garbage in that case, since the struct on
>>    Linux is allocated on the stack.
>>
>> So I suggest we take the approach found on this patch, that is remove
>> the 8byte alignment from the frame field, which will in turn remove
>> 4bytes of padding from the tail of the structure on 32bit x86.
>>
>> That would leave the following scenario:
>>
>>  - The struct layout in Linux headers would be correct.
>>  - Xen already handles the struct size difference on x86 32bit vs
>>    64bit, as the compat layer is currently doing the copy in
>>    compat_memory_op taking into account the size of the compat
>>    structure.
> 
> Hmm, I didn't even notice this until now - it looks to do so
> indeed, but apparently because of a bug: The original
> uint64_aligned_t gets translated to mere uint64_t in the
> compat header, whereas it should have been retained. This
> means that my concern of ...
> 
>>  - Removing the padding will work for all use cases: Linux will
>>    already be using the correct layout on x86 32bits, so no change
>>    will be required there. Any consumers using the tail padded
>>    structure will continue to work without issues, as Xen simply won't
>>    copy the tailing 4bytes.
> 
> ... code using the new definition then potentially not working
> correctly on  4.13, at least on versions not having this
> backported, was not actually true.
> 
> I'll try to sort out this other bug then ...

I was wrong, there is no bug - translating uint64_aligned_t to
uint64_t is fine, as these are seen only by 64-bit code, where
both are identical anyway. Hence there still is the concern that
code working fine on the supposed 4.14 might then not work on
unfixed 4.13, due to 4.13 copying 4 extra bytes.

Jan
Roger Pau Monné June 26, 2020, 3:03 p.m. UTC | #24
On Fri, Jun 26, 2020 at 04:19:36PM +0200, Jan Beulich wrote:
> On 26.06.2020 15:40, Jan Beulich wrote:
> > On 25.06.2020 18:10, Roger Pau Monné wrote:
> >> On Thu, Jun 25, 2020 at 11:05:52AM +0200, Roger Pau Monné wrote:
> >>> On Wed, Jun 24, 2020 at 04:01:44PM +0200, Jan Beulich wrote:
> >>>> On 24.06.2020 15:41, Julien Grall wrote:
> >>>>> On 24/06/2020 11:12, Jan Beulich wrote:
> >>>>>> On 23.06.2020 19:26, Roger Pau Monné wrote:
> >>>>>>> I'm confused. Couldn't we switch from uint64_aligned_t to plain
> >>>>>>> uint64_t (like it's currently on the Linux headers), and then use the
> >>>>>>> compat layer in Xen to handle the size difference when called from
> >>>>>>> 32bit environments?
> >>>>>>
> >>>>>> And which size would we use there? The old or the new one (breaking
> >>>>>> future or existing callers respectively)? Meanwhile I think that if
> >>>>>> this indeed needs to not be tools-only (which I still question),
> >>>>>
> >>>>> I think we now agreed on a subthread that the kernel needs to know the 
> >>>>> layout of the hypercall.
> >>>>>
> >>>>>> then our only possible route is to add explicit padding for the
> >>>>>> 32-bit case alongside the change you're already making.
> >>>>>
> >>>>> AFAICT Linux 32-bit doesn't have this padding. So wouldn't it make 
> >>>>> incompatible the two incompatible?
> >>>>
> >>>> In principle yes. But they're putting the structure instance on the
> >>>> stack, so there's not risk from Xen reading 4 bytes too many. I'd
> >>>> prefer keeping the interface as is (i.e. with the previously
> >>>> implicit padding made explicit) to avoid risking to break other
> >>>> possible callers. But that's just my view on it, anyway ...
> >>>
> >>> Adding the padding is cleaner because we don't need any compat stuff
> >>> in order to access the structure from the caller, and we also keep the
> >>> original layout currently present on Xen headers.
> >>>
> >>> I can prepare a fix for the Linux kernel, if this approach is fine.
> >>
> >> So I went over this, and I'm not sure the point of adding the padding
> >> field at the end of the structure for 32bit x86.
> >>
> >> The current situation is the following:
> >>
> >>  - Linux will use a struct on 32bit x86 that doesn't have the 4byte
> >>    padding at the end.
> >>  - Xen will copy 4bytes of garbage in that case, since the struct on
> >>    Linux is allocated on the stack.
> >>
> >> So I suggest we take the approach found on this patch, that is remove
> >> the 8byte alignment from the frame field, which will in turn remove
> >> 4bytes of padding from the tail of the structure on 32bit x86.
> >>
> >> That would leave the following scenario:
> >>
> >>  - The struct layout in Linux headers would be correct.
> >>  - Xen already handles the struct size difference on x86 32bit vs
> >>    64bit, as the compat layer is currently doing the copy in
> >>    compat_memory_op taking into account the size of the compat
> >>    structure.
> > 
> > Hmm, I didn't even notice this until now - it looks to do so
> > indeed, but apparently because of a bug: The original
> > uint64_aligned_t gets translated to mere uint64_t in the
> > compat header, whereas it should have been retained. This
> > means that my concern of ...
> > 
> >>  - Removing the padding will work for all use cases: Linux will
> >>    already be using the correct layout on x86 32bits, so no change
> >>    will be required there. Any consumers using the tail padded
> >>    structure will continue to work without issues, as Xen simply won't
> >>    copy the tailing 4bytes.
> > 
> > ... code using the new definition then potentially not working
> > correctly on  4.13, at least on versions not having this
> > backported, was not actually true.
> > 
> > I'll try to sort out this other bug then ...
> 
> I was wrong, there is no bug - translating uint64_aligned_t to
> uint64_t is fine, as these are seen only by 64-bit code, where
> both are identical anyway. Hence there still is the concern that
> code working fine on the supposed 4.14 might then not work on
> unfixed 4.13, due to 4.13 copying 4 extra bytes.

So here are the structures on 64bit x86 according to pahole against
xen-syms:

struct xen_mem_acquire_resource {
        domid_t                    domid;                /*     0     2 */
        uint16_t                   type;                 /*     2     2 */
        uint32_t                   id;                   /*     4     4 */
        uint32_t                   nr_frames;            /*     8     4 */
        uint32_t                   pad;                  /*    12     4 */
        uint64_t                   frame;                /*    16     8 */
        __guest_handle_xen_pfn_t   frame_list;           /*    24     8 */

        /* size: 32, cachelines: 1, members: 7 */
        /* last cacheline: 32 bytes */
};

struct compat_mem_acquire_resource {
        domid_compat_t             domid;                /*     0     2 */
        uint16_t                   type;                 /*     2     2 */
        uint32_t                   id;                   /*     4     4 */
        uint32_t                   nr_frames;            /*     8     4 */
        uint32_t                   pad;                  /*    12     4 */
        uint64_t                   frame;                /*    16     8 */
        __compat_handle_compat_pfn_t frame_list;         /*    24     4 */

        /* size: 28, cachelines: 1, members: 7 */
        /* last cacheline: 28 bytes */
};

There's no tailing padding on the compat struct ATM, and hence the
current code will behave correctly when used against a compat
structure without the tailing padding (as it's already ignored).

There's a #pragma pack(4) at the top of compat/memory.h which forces
this AFAICT. So I think the suggested approach is fine and will avoid
any breakage.

Roger.
Jan Beulich June 26, 2020, 3:25 p.m. UTC | #25
On 26.06.2020 17:03, Roger Pau Monné wrote:
> On Fri, Jun 26, 2020 at 04:19:36PM +0200, Jan Beulich wrote:
>> On 26.06.2020 15:40, Jan Beulich wrote:
>>> On 25.06.2020 18:10, Roger Pau Monné wrote:
>>>> On Thu, Jun 25, 2020 at 11:05:52AM +0200, Roger Pau Monné wrote:
>>>>> On Wed, Jun 24, 2020 at 04:01:44PM +0200, Jan Beulich wrote:
>>>>>> On 24.06.2020 15:41, Julien Grall wrote:
>>>>>>> On 24/06/2020 11:12, Jan Beulich wrote:
>>>>>>>> On 23.06.2020 19:26, Roger Pau Monné wrote:
>>>>>>>>> I'm confused. Couldn't we switch from uint64_aligned_t to plain
>>>>>>>>> uint64_t (like it's currently on the Linux headers), and then use the
>>>>>>>>> compat layer in Xen to handle the size difference when called from
>>>>>>>>> 32bit environments?
>>>>>>>>
>>>>>>>> And which size would we use there? The old or the new one (breaking
>>>>>>>> future or existing callers respectively)? Meanwhile I think that if
>>>>>>>> this indeed needs to not be tools-only (which I still question),
>>>>>>>
>>>>>>> I think we now agreed on a subthread that the kernel needs to know the 
>>>>>>> layout of the hypercall.
>>>>>>>
>>>>>>>> then our only possible route is to add explicit padding for the
>>>>>>>> 32-bit case alongside the change you're already making.
>>>>>>>
>>>>>>> AFAICT Linux 32-bit doesn't have this padding. So wouldn't it make 
>>>>>>> incompatible the two incompatible?
>>>>>>
>>>>>> In principle yes. But they're putting the structure instance on the
>>>>>> stack, so there's not risk from Xen reading 4 bytes too many. I'd
>>>>>> prefer keeping the interface as is (i.e. with the previously
>>>>>> implicit padding made explicit) to avoid risking to break other
>>>>>> possible callers. But that's just my view on it, anyway ...
>>>>>
>>>>> Adding the padding is cleaner because we don't need any compat stuff
>>>>> in order to access the structure from the caller, and we also keep the
>>>>> original layout currently present on Xen headers.
>>>>>
>>>>> I can prepare a fix for the Linux kernel, if this approach is fine.
>>>>
>>>> So I went over this, and I'm not sure the point of adding the padding
>>>> field at the end of the structure for 32bit x86.
>>>>
>>>> The current situation is the following:
>>>>
>>>>  - Linux will use a struct on 32bit x86 that doesn't have the 4byte
>>>>    padding at the end.
>>>>  - Xen will copy 4bytes of garbage in that case, since the struct on
>>>>    Linux is allocated on the stack.
>>>>
>>>> So I suggest we take the approach found on this patch, that is remove
>>>> the 8byte alignment from the frame field, which will in turn remove
>>>> 4bytes of padding from the tail of the structure on 32bit x86.
>>>>
>>>> That would leave the following scenario:
>>>>
>>>>  - The struct layout in Linux headers would be correct.
>>>>  - Xen already handles the struct size difference on x86 32bit vs
>>>>    64bit, as the compat layer is currently doing the copy in
>>>>    compat_memory_op taking into account the size of the compat
>>>>    structure.
>>>
>>> Hmm, I didn't even notice this until now - it looks to do so
>>> indeed, but apparently because of a bug: The original
>>> uint64_aligned_t gets translated to mere uint64_t in the
>>> compat header, whereas it should have been retained. This
>>> means that my concern of ...
>>>
>>>>  - Removing the padding will work for all use cases: Linux will
>>>>    already be using the correct layout on x86 32bits, so no change
>>>>    will be required there. Any consumers using the tail padded
>>>>    structure will continue to work without issues, as Xen simply won't
>>>>    copy the tailing 4bytes.
>>>
>>> ... code using the new definition then potentially not working
>>> correctly on  4.13, at least on versions not having this
>>> backported, was not actually true.
>>>
>>> I'll try to sort out this other bug then ...
>>
>> I was wrong, there is no bug - translating uint64_aligned_t to
>> uint64_t is fine, as these are seen only by 64-bit code, where
>> both are identical anyway. Hence there still is the concern that
>> code working fine on the supposed 4.14 might then not work on
>> unfixed 4.13, due to 4.13 copying 4 extra bytes.
> 
> So here are the structures on 64bit x86 according to pahole against
> xen-syms:
> 
> struct xen_mem_acquire_resource {
>         domid_t                    domid;                /*     0     2 */
>         uint16_t                   type;                 /*     2     2 */
>         uint32_t                   id;                   /*     4     4 */
>         uint32_t                   nr_frames;            /*     8     4 */
>         uint32_t                   pad;                  /*    12     4 */
>         uint64_t                   frame;                /*    16     8 */
>         __guest_handle_xen_pfn_t   frame_list;           /*    24     8 */
> 
>         /* size: 32, cachelines: 1, members: 7 */
>         /* last cacheline: 32 bytes */
> };
> 
> struct compat_mem_acquire_resource {
>         domid_compat_t             domid;                /*     0     2 */
>         uint16_t                   type;                 /*     2     2 */
>         uint32_t                   id;                   /*     4     4 */
>         uint32_t                   nr_frames;            /*     8     4 */
>         uint32_t                   pad;                  /*    12     4 */
>         uint64_t                   frame;                /*    16     8 */
>         __compat_handle_compat_pfn_t frame_list;         /*    24     4 */
> 
>         /* size: 28, cachelines: 1, members: 7 */
>         /* last cacheline: 28 bytes */
> };
> 
> There's no tailing padding on the compat struct ATM, and hence the
> current code will behave correctly when used against a compat
> structure without the tailing padding (as it's already ignored).
> 
> There's a #pragma pack(4) at the top of compat/memory.h which forces
> this AFAICT. So I think the suggested approach is fine and will avoid
> any breakage.

Oh, so I was mislead to believe there's no bug with the uint64_aligned_t
handling because, after having it made survive the compat header
generation, the generated code didn't change. But that's only because
the aligned() attribute has no effect in a #pragma pack() region, not
because things work as intended. So indeed, another 180° turn later, I
again agree your change - with an extended description - ought to be
fine. The bug we'll have to deal with has become more difficult now,
though: We can't use #pragma pack() then, but we also can't attach
packed attributes to the structs and unions, as that would force 1-byte
packing instead of 4-byte one.

Jan
diff mbox series

Patch

diff --git a/xen/include/public/memory.h b/xen/include/public/memory.h
index dbd35305df..1767d7d5f5 100644
--- a/xen/include/public/memory.h
+++ b/xen/include/public/memory.h
@@ -607,6 +607,8 @@  struct xen_reserved_device_memory_map {
 typedef struct xen_reserved_device_memory_map xen_reserved_device_memory_map_t;
 DEFINE_XEN_GUEST_HANDLE(xen_reserved_device_memory_map_t);
 
+#endif /* defined(__XEN__) || defined(__XEN_TOOLS__) */
+
 /*
  * Get the pages for a particular guest resource, so that they can be
  * mapped directly by a tools domain.
@@ -645,7 +647,7 @@  struct xen_mem_acquire_resource {
      * IN - the index of the initial frame to be mapped. This parameter
      *      is ignored if nr_frames is 0.
      */
-    uint64_aligned_t frame;
+    uint64_t frame;
 
 #define XENMEM_resource_ioreq_server_frame_bufioreq 0
 #define XENMEM_resource_ioreq_server_frame_ioreq(n) (1 + (n))
@@ -666,8 +668,6 @@  struct xen_mem_acquire_resource {
 typedef struct xen_mem_acquire_resource xen_mem_acquire_resource_t;
 DEFINE_XEN_GUEST_HANDLE(xen_mem_acquire_resource_t);
 
-#endif /* defined(__XEN__) || defined(__XEN_TOOLS__) */
-
 /*
  * XENMEM_get_vnumainfo used by guest to get
  * vNUMA topology from hypervisor.