diff mbox series

xen/common: Do not allocate magic pages 1:1 for direct mapped domains

Message ID 20240226011935.169462-1-xin.wang2@amd.com (mailing list archive)
State New, archived
Headers show
Series xen/common: Do not allocate magic pages 1:1 for direct mapped domains | expand

Commit Message

Henry Wang Feb. 26, 2024, 1:19 a.m. UTC
An error message can seen from the init-dom0less application on
direct-mapped 1:1 domains:
```
Allocating magic pages
memory.c:238:d0v0 mfn 0x39000 doesn't belong to d1
Error on alloc magic pages
```

This is because populate_physmap() automatically assumes gfn == mfn
for direct mapped domains. This cannot be true for the magic pages
that are allocated later for Dom0less DomUs from the init-dom0less
helper application executed in Dom0.

Force populate_physmap to take the "normal" memory allocation route for
the magic pages even for 1:1 Dom0less DomUs. This should work as long
as the 1:1 Dom0less DomU doesn't have anything else mapped at the same
guest address as the magic pages:
- gfn 0x39000 address 0x39000000
- gfn 0x39001 address 0x39001000
- gfn 0x39002 address 0x39002000
- gfn 0x39003 address 0x39003000
Create helper is_magic_gpfn() for Arm to assist this and stub helpers
for non-Arm architectures to avoid #ifdef. Move the definition of the
magic pages on Arm to a more common place.

Note that the init-dom0less application of the diffenent Xen version
may allocate all or part of four magic pages for each DomU.

Reported-by: Alec Kwapis <alec.kwapis@medtronic.com>
Signed-off-by: Stefano Stabellini <sstabellini@kernel.org>
Signed-off-by: Henry Wang <xin.wang2@amd.com>
---
 tools/libs/guest/xg_dom_arm.c   |  6 ------
 xen/arch/arm/include/asm/mm.h   | 13 +++++++++++++
 xen/arch/ppc/include/asm/mm.h   |  5 +++++
 xen/arch/riscv/include/asm/mm.h |  6 ++++++
 xen/arch/x86/include/asm/mm.h   |  5 +++++
 xen/common/memory.c             |  2 +-
 xen/include/public/arch-arm.h   |  6 ++++++
 7 files changed, 36 insertions(+), 7 deletions(-)

Comments

Jan Beulich Feb. 26, 2024, 8:25 a.m. UTC | #1
On 26.02.2024 02:19, Henry Wang wrote:
> --- a/xen/arch/arm/include/asm/mm.h
> +++ b/xen/arch/arm/include/asm/mm.h
> @@ -428,6 +428,19 @@ static inline void page_set_xenheap_gfn(struct page_info *p, gfn_t gfn)
>      } while ( (y = cmpxchg(&p->u.inuse.type_info, x, nx)) != x );
>  }
>  
> +#define MAGIC_PAGE_N_GPFN(n)     ((GUEST_MAGIC_BASE >> PAGE_SHIFT) + n)
> +static inline bool is_magic_gpfn(xen_pfn_t gpfn)
> +{
> +    unsigned int i;
> +    for ( i = 0; i < NR_MAGIC_PAGES; i++ )

Nit: Blank line please between declaration(s) and statement(s).

> --- a/xen/arch/ppc/include/asm/mm.h
> +++ b/xen/arch/ppc/include/asm/mm.h
> @@ -256,4 +256,9 @@ static inline bool arch_mfns_in_directmap(unsigned long mfn, unsigned long nr)
>      return true;
>  }
>  
> +static inline bool is_magic_gpfn(xen_pfn_t gpfn)
> +{
> +    return false;
> +}
> +
>  #endif /* _ASM_PPC_MM_H */
> --- a/xen/arch/riscv/include/asm/mm.h
> +++ b/xen/arch/riscv/include/asm/mm.h
> @@ -3,6 +3,7 @@
>  #ifndef _ASM_RISCV_MM_H
>  #define _ASM_RISCV_MM_H
>  
> +#include <public/xen.h>
>  #include <asm/page-bits.h>
>  
>  #define pfn_to_paddr(pfn) ((paddr_t)(pfn) << PAGE_SHIFT)
> @@ -20,4 +21,9 @@ unsigned long calc_phys_offset(void);
>  
>  void turn_on_mmu(unsigned long ra);
>  
> +static inline bool is_magic_gpfn(xen_pfn_t gpfn)
> +{
> +    return false;
> +}
> +
>  #endif /* _ASM_RISCV_MM_H */
> --- a/xen/arch/x86/include/asm/mm.h
> +++ b/xen/arch/x86/include/asm/mm.h
> @@ -628,4 +628,9 @@ static inline bool arch_mfns_in_directmap(unsigned long mfn, unsigned long nr)
>      return (mfn + nr) <= (virt_to_mfn(eva - 1) + 1);
>  }
>  
> +static inline bool is_magic_gpfn(xen_pfn_t gpfn)
> +{
> +    return false;
> +}

I don't think every arch should need to gain such a dummy function. Plus
the function name doesn't clarify at all what kind of "magic" this is
about. Plus I think the (being phased out) term "gpfn" would better not
be used in new functions anymore. Instead type-safe gfn_t would likely
better be used as parameter type.

> --- a/xen/common/memory.c
> +++ b/xen/common/memory.c
> @@ -219,7 +219,7 @@ static void populate_physmap(struct memop_args *a)
>          }
>          else
>          {
> -            if ( is_domain_direct_mapped(d) )
> +            if ( is_domain_direct_mapped(d) && !is_magic_gpfn(gpfn) )
>              {
>                  mfn = _mfn(gpfn);
>  

I wonder whether is_domain_direct_mapped() shouldn't either be cloned
into e.g. is_gfn_direct_mapped(d, gfn), or be adjusted in-place to gain
such a (then optional) 2nd parameter. (Of course there again shouldn't be
a need for every domain to define a stub is_domain_direct_mapped() - if
not defined by an arch header, the stub can be supplied in a single
central place.)

> --- a/xen/include/public/arch-arm.h
> +++ b/xen/include/public/arch-arm.h
> @@ -476,6 +476,12 @@ typedef uint64_t xen_callback_t;
>  #define GUEST_MAGIC_BASE  xen_mk_ullong(0x39000000)
>  #define GUEST_MAGIC_SIZE  xen_mk_ullong(0x01000000)
>  
> +#define NR_MAGIC_PAGES 4
> +#define CONSOLE_PFN_OFFSET 0
> +#define XENSTORE_PFN_OFFSET 1
> +#define MEMACCESS_PFN_OFFSET 2
> +#define VUART_PFN_OFFSET 3
> +
>  #define GUEST_RAM_BANKS   2

Of these only NR_MAGIC_PAGES is really used in Xen, afaics.

Also while this is added to a tools-only section, I'm also concerned of
the ongoing additions here without suitable XEN_ prefixes. Any number
of kinds of magic pages may exist for other reasons in a platform; which
ones are meant would therefore better be sufficiently clear from the
identifier used.

Jan
Julien Grall Feb. 26, 2024, 9:13 a.m. UTC | #2
Hi Henry,

Welcome back!

On 26/02/2024 01:19, Henry Wang wrote:
> An error message can seen from the init-dom0less application on
> direct-mapped 1:1 domains:
> ```
> Allocating magic pages
> memory.c:238:d0v0 mfn 0x39000 doesn't belong to d1
> Error on alloc magic pages
> ```
> 
> This is because populate_physmap() automatically assumes gfn == mfn
> for direct mapped domains. This cannot be true for the magic pages
> that are allocated later for Dom0less DomUs from the init-dom0less
> helper application executed in Dom0.
> 
> Force populate_physmap to take the "normal" memory allocation route for
> the magic pages even for 1:1 Dom0less DomUs. This should work as long
> as the 1:1 Dom0less DomU doesn't have anything else mapped at the same
> guest address as the magic pages:
> - gfn 0x39000 address 0x39000000
> - gfn 0x39001 address 0x39001000
> - gfn 0x39002 address 0x39002000
> - gfn 0x39003 address 0x39003000

This is very fragile. You are making the assumption that the magic pages 
are not clashing with any RAM region. The layout defined in arch-arm.h 
has been designed for guest where Xen is in full control of the layout. 
This is not the case for directmapped domain. I don't think it is 
correct to try to re-use part of the layout.

If you want to use 1:1 dom0less with xenstore & co, then you should find 
a different place in memory for the magic pages (TDB how to find that 
area). You will still have the problem of the 1:1 allocation, but I 
think this could be solved bty adding a flag to force a non-1:1 allocation.

> Create helper is_magic_gpfn() for Arm to assist this and stub helpers
> for non-Arm architectures to avoid #ifdef. Move the definition of the
> magic pages on Arm to a more common place.
> 
> Note that the init-dom0less application of the diffenent Xen version

s/diffenent/different/

> may allocate all or part of four magic pages for each DomU.
> 
> Reported-by: Alec Kwapis <alec.kwapis@medtronic.com>
> Signed-off-by: Stefano Stabellini <sstabellini@kernel.org>
> Signed-off-by: Henry Wang <xin.wang2@amd.com>
> ---
>   tools/libs/guest/xg_dom_arm.c   |  6 ------
>   xen/arch/arm/include/asm/mm.h   | 13 +++++++++++++
>   xen/arch/ppc/include/asm/mm.h   |  5 +++++
>   xen/arch/riscv/include/asm/mm.h |  6 ++++++
>   xen/arch/x86/include/asm/mm.h   |  5 +++++
>   xen/common/memory.c             |  2 +-
>   xen/include/public/arch-arm.h   |  6 ++++++
>   7 files changed, 36 insertions(+), 7 deletions(-)
> 
> diff --git a/tools/libs/guest/xg_dom_arm.c b/tools/libs/guest/xg_dom_arm.c
> index 2fd8ee7ad4..8c579d7576 100644
> --- a/tools/libs/guest/xg_dom_arm.c
> +++ b/tools/libs/guest/xg_dom_arm.c
> @@ -25,12 +25,6 @@
>   
>   #include "xg_private.h"
>   
> -#define NR_MAGIC_PAGES 4
> -#define CONSOLE_PFN_OFFSET 0
> -#define XENSTORE_PFN_OFFSET 1
> -#define MEMACCESS_PFN_OFFSET 2
> -#define VUART_PFN_OFFSET 3
> -
>   #define LPAE_SHIFT 9
>   
>   #define PFN_4K_SHIFT  (0)
> diff --git a/xen/arch/arm/include/asm/mm.h b/xen/arch/arm/include/asm/mm.h
> index cbcf3bf147..17149b4635 100644
> --- a/xen/arch/arm/include/asm/mm.h
> +++ b/xen/arch/arm/include/asm/mm.h
> @@ -428,6 +428,19 @@ static inline void page_set_xenheap_gfn(struct page_info *p, gfn_t gfn)
>       } while ( (y = cmpxchg(&p->u.inuse.type_info, x, nx)) != x );
>   }
>   
> +#define MAGIC_PAGE_N_GPFN(n)     ((GUEST_MAGIC_BASE >> PAGE_SHIFT) + n)
> +static inline bool is_magic_gpfn(xen_pfn_t gpfn)
> +{
> +    unsigned int i;
> +    for ( i = 0; i < NR_MAGIC_PAGES; i++ )
> +    {
> +        if ( gpfn == MAGIC_PAGE_N_GPFN(i) )
> +            return true;
> +    }
> +
> +    return false;
> +}
> +
>   #endif /*  __ARCH_ARM_MM__ */
>   /*
>    * Local variables:
> diff --git a/xen/arch/ppc/include/asm/mm.h b/xen/arch/ppc/include/asm/mm.h
> index a433936076..8ad81d9552 100644
> --- a/xen/arch/ppc/include/asm/mm.h
> +++ b/xen/arch/ppc/include/asm/mm.h
> @@ -256,4 +256,9 @@ static inline bool arch_mfns_in_directmap(unsigned long mfn, unsigned long nr)
>       return true;
>   }
>   
> +static inline bool is_magic_gpfn(xen_pfn_t gpfn)
> +{
> +    return false;
> +}
> +
>   #endif /* _ASM_PPC_MM_H */
> diff --git a/xen/arch/riscv/include/asm/mm.h b/xen/arch/riscv/include/asm/mm.h
> index 07c7a0abba..a376a77e29 100644
> --- a/xen/arch/riscv/include/asm/mm.h
> +++ b/xen/arch/riscv/include/asm/mm.h
> @@ -3,6 +3,7 @@
>   #ifndef _ASM_RISCV_MM_H
>   #define _ASM_RISCV_MM_H
>   
> +#include <public/xen.h>
>   #include <asm/page-bits.h>
>   
>   #define pfn_to_paddr(pfn) ((paddr_t)(pfn) << PAGE_SHIFT)
> @@ -20,4 +21,9 @@ unsigned long calc_phys_offset(void);
>   
>   void turn_on_mmu(unsigned long ra);
>   
> +static inline bool is_magic_gpfn(xen_pfn_t gpfn)
> +{
> +    return false;
> +}
> +
>   #endif /* _ASM_RISCV_MM_H */
> diff --git a/xen/arch/x86/include/asm/mm.h b/xen/arch/x86/include/asm/mm.h
> index 7d26d9cd2f..f385f36d78 100644
> --- a/xen/arch/x86/include/asm/mm.h
> +++ b/xen/arch/x86/include/asm/mm.h
> @@ -628,4 +628,9 @@ static inline bool arch_mfns_in_directmap(unsigned long mfn, unsigned long nr)
>       return (mfn + nr) <= (virt_to_mfn(eva - 1) + 1);
>   }
>   
> +static inline bool is_magic_gpfn(xen_pfn_t gpfn)
> +{
> +    return false;
> +}
> +
>   #endif /* __ASM_X86_MM_H__ */
> diff --git a/xen/common/memory.c b/xen/common/memory.c
> index b3b05c2ec0..ab4bad79e2 100644
> --- a/xen/common/memory.c
> +++ b/xen/common/memory.c
> @@ -219,7 +219,7 @@ static void populate_physmap(struct memop_args *a)
>           }
>           else
>           {
> -            if ( is_domain_direct_mapped(d) )
> +            if ( is_domain_direct_mapped(d) && !is_magic_gpfn(gpfn) )

This path will also be reached by dom0. Effectively, this will prevent 
dom0 to allocate the memory 1:1 for the magic GPFN (which is guest 
specific).

Also, why are you only checking the first GFN? What if the caller pass 
an overlapped region?

>               {
>                   mfn = _mfn(gpfn);
>   
> diff --git a/xen/include/public/arch-arm.h b/xen/include/public/arch-arm.h
> index a25e87dbda..58aa6ff05b 100644
> --- a/xen/include/public/arch-arm.h
> +++ b/xen/include/public/arch-arm.h
> @@ -476,6 +476,12 @@ typedef uint64_t xen_callback_t;
>   #define GUEST_MAGIC_BASE  xen_mk_ullong(0x39000000)
>   #define GUEST_MAGIC_SIZE  xen_mk_ullong(0x01000000)
>   
> +#define NR_MAGIC_PAGES 4
> +#define CONSOLE_PFN_OFFSET 0
> +#define XENSTORE_PFN_OFFSET 1
> +#define MEMACCESS_PFN_OFFSET 2
> +#define VUART_PFN_OFFSET 3

Regardless of what I wrote above, it is not clear to me why you need to 
move these macros in public header. Just above, we are defining the 
magic region (see GUEST_MAGIC_BASE and GUEST_MAGIC_SIZE). This should be 
sufficient to detect whether a GFN belongs to the magic region.

Cheers,
Michal Orzel Feb. 26, 2024, 10:29 a.m. UTC | #3
Hi Henry,

On 26/02/2024 02:19, Henry Wang wrote:
> An error message can seen from the init-dom0less application on
> direct-mapped 1:1 domains:
> ```
> Allocating magic pages
> memory.c:238:d0v0 mfn 0x39000 doesn't belong to d1
> Error on alloc magic pages
> ```
> 
> This is because populate_physmap() automatically assumes gfn == mfn
> for direct mapped domains. This cannot be true for the magic pages
> that are allocated later for Dom0less DomUs from the init-dom0less
> helper application executed in Dom0.
> 
> Force populate_physmap to take the "normal" memory allocation route for
> the magic pages even for 1:1 Dom0less DomUs. This should work as long
> as the 1:1 Dom0less DomU doesn't have anything else mapped at the same
> guest address as the magic pages:
> - gfn 0x39000 address 0x39000000
> - gfn 0x39001 address 0x39001000
> - gfn 0x39002 address 0x39002000
> - gfn 0x39003 address 0x39003000
> Create helper is_magic_gpfn() for Arm to assist this and stub helpers
> for non-Arm architectures to avoid #ifdef. Move the definition of the
> magic pages on Arm to a more common place.
> 
> Note that the init-dom0less application of the diffenent Xen version
> may allocate all or part of four magic pages for each DomU.
> 
> Reported-by: Alec Kwapis <alec.kwapis@medtronic.com>
> Signed-off-by: Stefano Stabellini <sstabellini@kernel.org>
> Signed-off-by: Henry Wang <xin.wang2@amd.com>
NIT: Generally, the first SOB is the same as author of the patch.

[...]
> diff --git a/xen/common/memory.c b/xen/common/memory.c
> index b3b05c2ec0..ab4bad79e2 100644
> --- a/xen/common/memory.c
> +++ b/xen/common/memory.c
> @@ -219,7 +219,7 @@ static void populate_physmap(struct memop_args *a)
>          }
>          else
>          {
> -            if ( is_domain_direct_mapped(d) )
> +            if ( is_domain_direct_mapped(d) && !is_magic_gpfn(gpfn) )
I struggle to understand the goal of this patch and the proposed solution.
The problem with magic pages applies to static mem domUs in general.
A direct mapped domU is a static mem domU whose memory is 1:1 mapped.
Let's say we try to map a magic page for a direct mapped domU. That check will be false
and the execution will move to the next one i.e. is_domain_using_staticmem(d).
This check will be true and acquire_reserved_page() will fail instead (similar to the
static mem (no direct map) scenario). The only thing that has changed is the message.

~Michal
Henry Wang Feb. 27, 2024, 1:17 p.m. UTC | #4
(-RISC-V and PPC people to avoid spamming their inbox as this is quite 
Arm specific)

Hi Julien,

On 2/26/2024 5:13 PM, Julien Grall wrote:
> Hi Henry,
>
> Welcome back!

Thanks!

> On 26/02/2024 01:19, Henry Wang wrote:
>> An error message can seen from the init-dom0less application on
>> direct-mapped 1:1 domains:
>> ```
>> Allocating magic pages
>> memory.c:238:d0v0 mfn 0x39000 doesn't belong to d1
>> Error on alloc magic pages
>> ```
>>
>> This is because populate_physmap() automatically assumes gfn == mfn
>> for direct mapped domains. This cannot be true for the magic pages
>> that are allocated later for Dom0less DomUs from the init-dom0less
>> helper application executed in Dom0.
>>
>> Force populate_physmap to take the "normal" memory allocation route for
>> the magic pages even for 1:1 Dom0less DomUs. This should work as long
>> as the 1:1 Dom0less DomU doesn't have anything else mapped at the same
>> guest address as the magic pages:
>> - gfn 0x39000 address 0x39000000
>> - gfn 0x39001 address 0x39001000
>> - gfn 0x39002 address 0x39002000
>> - gfn 0x39003 address 0x39003000
>
> This is very fragile. You are making the assumption that the magic 
> pages are not clashing with any RAM region. The layout defined in 
> arch-arm.h has been designed for guest where Xen is in full control of 
> the layout. This is not the case for directmapped domain. I don't 
> think it is correct to try to re-use part of the layout.

Apologies for the (a bit) late reply, it took a bit longer for me to 
understand the story about directmap stuff, and yes, now I agree with 
you, for those directmapped domains we should not reuse the guest layout 
directly.

> If you want to use 1:1 dom0less with xenstore & co, then you should 
> find a different place in memory for the magic pages (TDB how to find 
> that area). 

Yes, and maybe we can use similar strategy in find_unallocated_memory() 
or find_domU_holes() to do that.

> You will still have the problem of the 1:1 allocation, but I think 
> this could be solved bty adding a flag to force a non-1:1 allocation.

After checking the code flow, below rough plan came to my mind, I think 
what we need to do is:

(1) Find a range of un-used memory using similar method in 
find_unallocated_memory()/find_domU_holes()

(2) Change the base address, i.e. GUEST_MAGIC_BASE in alloc_xs_page() in 
init-dom0less.c to point to the address in (1) if static mem or 11 
directmap. (I think this is a bit tricky though, do you have any method 
that in your mind?)

(3) Use a flag or combination of existing flags (CDF_staticmem + 
CDF_directmap) in populate_physmap() to force the allocation of these 
magic pages using alloc_domheap_pages() - i.e. the "else" condition in 
the bottom

Would you mind sharing some thoughts on that? Thanks!

>> Create helper is_magic_gpfn() for Arm to assist this and stub helpers
>> for non-Arm architectures to avoid #ifdef. Move the definition of the
>> magic pages on Arm to a more common place.
>>
>> Note that the init-dom0less application of the diffenent Xen version
>
> s/diffenent/different/

Oops, will correct this in v2, thanks for spotting it.

>> +
>>   #endif /* __ASM_X86_MM_H__ */
>> diff --git a/xen/common/memory.c b/xen/common/memory.c
>> index b3b05c2ec0..ab4bad79e2 100644
>> --- a/xen/common/memory.c
>> +++ b/xen/common/memory.c
>> @@ -219,7 +219,7 @@ static void populate_physmap(struct memop_args *a)
>>           }
>>           else
>>           {
>> -            if ( is_domain_direct_mapped(d) )
>> +            if ( is_domain_direct_mapped(d) && !is_magic_gpfn(gpfn) )
>
> This path will also be reached by dom0. Effectively, this will prevent 
> dom0 to allocate the memory 1:1 for the magic GPFN (which is guest 
> specific).

I think above proposal will solve this issue.

> Also, why are you only checking the first GFN? What if the caller pass 
> an overlapped region?

I am a bit confused. My understanding is at this point we are handling 
one page at a time.

>>               {
>>                   mfn = _mfn(gpfn);
>>   diff --git a/xen/include/public/arch-arm.h 
>> b/xen/include/public/arch-arm.h
>> index a25e87dbda..58aa6ff05b 100644
>> --- a/xen/include/public/arch-arm.h
>> +++ b/xen/include/public/arch-arm.h
>> @@ -476,6 +476,12 @@ typedef uint64_t xen_callback_t;
>>   #define GUEST_MAGIC_BASE  xen_mk_ullong(0x39000000)
>>   #define GUEST_MAGIC_SIZE  xen_mk_ullong(0x01000000)
>>   +#define NR_MAGIC_PAGES 4
>> +#define CONSOLE_PFN_OFFSET 0
>> +#define XENSTORE_PFN_OFFSET 1
>> +#define MEMACCESS_PFN_OFFSET 2
>> +#define VUART_PFN_OFFSET 3
>
> Regardless of what I wrote above, it is not clear to me why you need 
> to move these macros in public header. Just above, we are defining the 
> magic region (see GUEST_MAGIC_BASE and GUEST_MAGIC_SIZE). This should 
> be sufficient to detect whether a GFN belongs to the magic region.

You are correct, I will undo the code movement in v2.

Kind regards,
Henry
Henry Wang Feb. 27, 2024, 1:24 p.m. UTC | #5
Hi Jan,

On 2/26/2024 4:25 PM, Jan Beulich wrote:
> On 26.02.2024 02:19, Henry Wang wrote:
>> --- a/xen/arch/arm/include/asm/mm.h
>> +++ b/xen/arch/arm/include/asm/mm.h
>> @@ -428,6 +428,19 @@ static inline void page_set_xenheap_gfn(struct page_info *p, gfn_t gfn)
>>       } while ( (y = cmpxchg(&p->u.inuse.type_info, x, nx)) != x );
>>   }
>>   
>> +#define MAGIC_PAGE_N_GPFN(n)     ((GUEST_MAGIC_BASE >> PAGE_SHIFT) + n)
>> +static inline bool is_magic_gpfn(xen_pfn_t gpfn)
>> +{
>> +    unsigned int i;
>> +    for ( i = 0; i < NR_MAGIC_PAGES; i++ )
> Nit: Blank line please between declaration(s) and statement(s).

Thanks, will correct in next version.

>> --- a/xen/arch/x86/include/asm/mm.h
>> +++ b/xen/arch/x86/include/asm/mm.h
>> @@ -628,4 +628,9 @@ static inline bool arch_mfns_in_directmap(unsigned long mfn, unsigned long nr)
>>       return (mfn + nr) <= (virt_to_mfn(eva - 1) + 1);
>>   }
>>   
>> +static inline bool is_magic_gpfn(xen_pfn_t gpfn)
>> +{
>> +    return false;
>> +}
> I don't think every arch should need to gain such a dummy function.

Thanks for raising the concern here and about the 
is_domain_direct_mapped(), I will try to do some clean-ups if necessary 
in next version.

> Plus
> the function name doesn't clarify at all what kind of "magic" this is
> about. Plus I think the (being phased out) term "gpfn" would better not
> be used in new functions anymore. Instead type-safe gfn_t would likely
> better be used as parameter type.

Sure, I will use gfn_t in the next version.

>> --- a/xen/common/memory.c
>> +++ b/xen/common/memory.c
>> @@ -219,7 +219,7 @@ static void populate_physmap(struct memop_args *a)
>>           }
>>           else
>>           {
>> -            if ( is_domain_direct_mapped(d) )
>> +            if ( is_domain_direct_mapped(d) && !is_magic_gpfn(gpfn) )
>>               {
>>                   mfn = _mfn(gpfn);
>>   
> I wonder whether is_domain_direct_mapped() shouldn't either be cloned
> into e.g. is_gfn_direct_mapped(d, gfn), or be adjusted in-place to gain
> such a (then optional) 2nd parameter. (Of course there again shouldn't be
> a need for every domain to define a stub is_domain_direct_mapped() - if
> not defined by an arch header, the stub can be supplied in a single
> central place.)

Same here, it looks like you prefer the centralized 
is_domain_direct_mapped() now, as we are having more archs. I can do the 
clean-up when sending v2. Just out of curiosity, do you think it is a 
good practice to place the is_domain_direct_mapped() implementation in 
xen/domain.h with proper arch #ifdefs? If not do you have any better 
ideas? Thanks!

>> --- a/xen/include/public/arch-arm.h
>> +++ b/xen/include/public/arch-arm.h
>> @@ -476,6 +476,12 @@ typedef uint64_t xen_callback_t;
>>   #define GUEST_MAGIC_BASE  xen_mk_ullong(0x39000000)
>>   #define GUEST_MAGIC_SIZE  xen_mk_ullong(0x01000000)
>>   
>> +#define NR_MAGIC_PAGES 4
>> +#define CONSOLE_PFN_OFFSET 0
>> +#define XENSTORE_PFN_OFFSET 1
>> +#define MEMACCESS_PFN_OFFSET 2
>> +#define VUART_PFN_OFFSET 3
>> +
>>   #define GUEST_RAM_BANKS   2
> Of these only NR_MAGIC_PAGES is really used in Xen, afaics.
> Also while this is added to a tools-only section, I'm also concerned of
> the ongoing additions here without suitable XEN_ prefixes. Any number
> of kinds of magic pages may exist for other reasons in a platform; which
> ones are meant would therefore better be sufficiently clear from the
> identifier used.

Yes you are correct, like I replied in another thread, I will undo the 
changes in next version.

Kind regards,
Henry
> Jan
Jan Beulich Feb. 27, 2024, 1:27 p.m. UTC | #6
On 27.02.2024 14:24, Henry Wang wrote:
> On 2/26/2024 4:25 PM, Jan Beulich wrote:
>> On 26.02.2024 02:19, Henry Wang wrote:
>>> --- a/xen/common/memory.c
>>> +++ b/xen/common/memory.c
>>> @@ -219,7 +219,7 @@ static void populate_physmap(struct memop_args *a)
>>>           }
>>>           else
>>>           {
>>> -            if ( is_domain_direct_mapped(d) )
>>> +            if ( is_domain_direct_mapped(d) && !is_magic_gpfn(gpfn) )
>>>               {
>>>                   mfn = _mfn(gpfn);
>>>   
>> I wonder whether is_domain_direct_mapped() shouldn't either be cloned
>> into e.g. is_gfn_direct_mapped(d, gfn), or be adjusted in-place to gain
>> such a (then optional) 2nd parameter. (Of course there again shouldn't be
>> a need for every domain to define a stub is_domain_direct_mapped() - if
>> not defined by an arch header, the stub can be supplied in a single
>> central place.)
> 
> Same here, it looks like you prefer the centralized 
> is_domain_direct_mapped() now, as we are having more archs. I can do the 
> clean-up when sending v2. Just out of curiosity, do you think it is a 
> good practice to place the is_domain_direct_mapped() implementation in 
> xen/domain.h with proper arch #ifdefs?

arch #ifdefs? I'd like to avoid such, if at all possible. Generic fallbacks
generally ought to key their conditionals to the very identifier not
(already) being defined.

Jan
Henry Wang Feb. 27, 2024, 1:27 p.m. UTC | #7
Hi Michal,

On 2/26/2024 6:29 PM, Michal Orzel wrote:
> Hi Henry,
>
> On 26/02/2024 02:19, Henry Wang wrote:
>> An error message can seen from the init-dom0less application on
>> direct-mapped 1:1 domains:
>> ```
>> Allocating magic pages
>> memory.c:238:d0v0 mfn 0x39000 doesn't belong to d1
>> Error on alloc magic pages
>> ```
>>
>> This is because populate_physmap() automatically assumes gfn == mfn
>> for direct mapped domains. This cannot be true for the magic pages
>> that are allocated later for Dom0less DomUs from the init-dom0less
>> helper application executed in Dom0.
>>
>> Force populate_physmap to take the "normal" memory allocation route for
>> the magic pages even for 1:1 Dom0less DomUs. This should work as long
>> as the 1:1 Dom0less DomU doesn't have anything else mapped at the same
>> guest address as the magic pages:
>> - gfn 0x39000 address 0x39000000
>> - gfn 0x39001 address 0x39001000
>> - gfn 0x39002 address 0x39002000
>> - gfn 0x39003 address 0x39003000
>> Create helper is_magic_gpfn() for Arm to assist this and stub helpers
>> for non-Arm architectures to avoid #ifdef. Move the definition of the
>> magic pages on Arm to a more common place.
>>
>> Note that the init-dom0less application of the diffenent Xen version
>> may allocate all or part of four magic pages for each DomU.
>>
>> Reported-by: Alec Kwapis <alec.kwapis@medtronic.com>
>> Signed-off-by: Stefano Stabellini <sstabellini@kernel.org>
>> Signed-off-by: Henry Wang <xin.wang2@amd.com>
> NIT: Generally, the first SOB is the same as author of the patch.

I will fix in the next version. Thanks.

> [...]
>> diff --git a/xen/common/memory.c b/xen/common/memory.c
>> index b3b05c2ec0..ab4bad79e2 100644
>> --- a/xen/common/memory.c
>> +++ b/xen/common/memory.c
>> @@ -219,7 +219,7 @@ static void populate_physmap(struct memop_args *a)
>>           }
>>           else
>>           {
>> -            if ( is_domain_direct_mapped(d) )
>> +            if ( is_domain_direct_mapped(d) && !is_magic_gpfn(gpfn) )
> I struggle to understand the goal of this patch and the proposed solution.
> The problem with magic pages applies to static mem domUs in general.
> A direct mapped domU is a static mem domU whose memory is 1:1 mapped.
> Let's say we try to map a magic page for a direct mapped domU. That check will be false
> and the execution will move to the next one i.e. is_domain_using_staticmem(d).
> This check will be true and acquire_reserved_page() will fail instead (similar to the
> static mem (no direct map) scenario). The only thing that has changed is the message.

Yes you are definitely correct, I missed the check for static mem 
domains. Will fix in v2 once the proposal in the other thread is agreed. 
Thanks for spotting this issue!

Kind regards,
Henry

> ~Michal
Henry Wang Feb. 27, 2024, 1:35 p.m. UTC | #8
Hi Jan,

On 2/27/2024 9:27 PM, Jan Beulich wrote:
> On 27.02.2024 14:24, Henry Wang wrote:
>> On 2/26/2024 4:25 PM, Jan Beulich wrote:
>>> On 26.02.2024 02:19, Henry Wang wrote:
>>>> --- a/xen/common/memory.c
>>>> +++ b/xen/common/memory.c
>>>> @@ -219,7 +219,7 @@ static void populate_physmap(struct memop_args *a)
>>>>            }
>>>>            else
>>>>            {
>>>> -            if ( is_domain_direct_mapped(d) )
>>>> +            if ( is_domain_direct_mapped(d) && !is_magic_gpfn(gpfn) )
>>>>                {
>>>>                    mfn = _mfn(gpfn);
>>>>    
>>> I wonder whether is_domain_direct_mapped() shouldn't either be cloned
>>> into e.g. is_gfn_direct_mapped(d, gfn), or be adjusted in-place to gain
>>> such a (then optional) 2nd parameter. (Of course there again shouldn't be
>>> a need for every domain to define a stub is_domain_direct_mapped() - if
>>> not defined by an arch header, the stub can be supplied in a single
>>> central place.)
>> Same here, it looks like you prefer the centralized
>> is_domain_direct_mapped() now, as we are having more archs. I can do the
>> clean-up when sending v2. Just out of curiosity, do you think it is a
>> good practice to place the is_domain_direct_mapped() implementation in
>> xen/domain.h with proper arch #ifdefs?
> arch #ifdefs? I'd like to avoid such, if at all possible. Generic fallbacks
> generally ought to key their conditionals to the very identifier not
> (already) being defined.

I meant something like this (as I saw CDF_directmap is currently gated 
in this way in xen/domain.h):

#ifdef CONFIG_ARM
#define is_domain_direct_mapped(d) ((d)->cdf & CDF_directmap)
#else
#define is_domain_direct_mapped(d) ((void)(d), 0)
#endif

I am having trouble to think of another way to keep the function in a 
centralized place while
avoiding the #ifdefs. Would you mind elaborating a bit? Thanks!

Kind regards,
Henry


> Jan
Jan Beulich Feb. 27, 2024, 1:51 p.m. UTC | #9
On 27.02.2024 14:35, Henry Wang wrote:
> Hi Jan,
> 
> On 2/27/2024 9:27 PM, Jan Beulich wrote:
>> On 27.02.2024 14:24, Henry Wang wrote:
>>> On 2/26/2024 4:25 PM, Jan Beulich wrote:
>>>> On 26.02.2024 02:19, Henry Wang wrote:
>>>>> --- a/xen/common/memory.c
>>>>> +++ b/xen/common/memory.c
>>>>> @@ -219,7 +219,7 @@ static void populate_physmap(struct memop_args *a)
>>>>>            }
>>>>>            else
>>>>>            {
>>>>> -            if ( is_domain_direct_mapped(d) )
>>>>> +            if ( is_domain_direct_mapped(d) && !is_magic_gpfn(gpfn) )
>>>>>                {
>>>>>                    mfn = _mfn(gpfn);
>>>>>    
>>>> I wonder whether is_domain_direct_mapped() shouldn't either be cloned
>>>> into e.g. is_gfn_direct_mapped(d, gfn), or be adjusted in-place to gain
>>>> such a (then optional) 2nd parameter. (Of course there again shouldn't be
>>>> a need for every domain to define a stub is_domain_direct_mapped() - if
>>>> not defined by an arch header, the stub can be supplied in a single
>>>> central place.)
>>> Same here, it looks like you prefer the centralized
>>> is_domain_direct_mapped() now, as we are having more archs. I can do the
>>> clean-up when sending v2. Just out of curiosity, do you think it is a
>>> good practice to place the is_domain_direct_mapped() implementation in
>>> xen/domain.h with proper arch #ifdefs?
>> arch #ifdefs? I'd like to avoid such, if at all possible. Generic fallbacks
>> generally ought to key their conditionals to the very identifier not
>> (already) being defined.
> 
> I meant something like this (as I saw CDF_directmap is currently gated 
> in this way in xen/domain.h):
> 
> #ifdef CONFIG_ARM
> #define is_domain_direct_mapped(d) ((d)->cdf & CDF_directmap)
> #else
> #define is_domain_direct_mapped(d) ((void)(d), 0)
> #endif
> 
> I am having trouble to think of another way to keep the function in a 
> centralized place while
> avoiding the #ifdefs. Would you mind elaborating a bit? Thanks!

What is already there is fine to change. I took your earlier reply to
mean that you want to add an "#ifndef CONFIG_ARM" to put in place some
new fallback handler.

Of course the above could also be done without any direct CONFIG_ARM
dependency. For example, CDF_directmap could simply evaluate to zero
when direct mapped memory isn't supported.

Jan
Henry Wang Feb. 27, 2024, 2:21 p.m. UTC | #10
Hi Jan,

On 2/27/2024 9:51 PM, Jan Beulich wrote:
> On 27.02.2024 14:35, Henry Wang wrote:
>> Hi Jan,
>>
>> On 2/27/2024 9:27 PM, Jan Beulich wrote:
>>> On 27.02.2024 14:24, Henry Wang wrote:
>>>> On 2/26/2024 4:25 PM, Jan Beulich wrote:
>>>>> On 26.02.2024 02:19, Henry Wang wrote:
>>>>>> --- a/xen/common/memory.c
>>>>>> +++ b/xen/common/memory.c
>>>>>> @@ -219,7 +219,7 @@ static void populate_physmap(struct memop_args *a)
>>>>>>             }
>>>>>>             else
>>>>>>             {
>>>>>> -            if ( is_domain_direct_mapped(d) )
>>>>>> +            if ( is_domain_direct_mapped(d) && !is_magic_gpfn(gpfn) )
>>>>>>                 {
>>>>>>                     mfn = _mfn(gpfn);
>>>>>>     
>>>>> I wonder whether is_domain_direct_mapped() shouldn't either be cloned
>>>>> into e.g. is_gfn_direct_mapped(d, gfn), or be adjusted in-place to gain
>>>>> such a (then optional) 2nd parameter. (Of course there again shouldn't be
>>>>> a need for every domain to define a stub is_domain_direct_mapped() - if
>>>>> not defined by an arch header, the stub can be supplied in a single
>>>>> central place.)
>>>> Same here, it looks like you prefer the centralized
>>>> is_domain_direct_mapped() now, as we are having more archs. I can do the
>>>> clean-up when sending v2. Just out of curiosity, do you think it is a
>>>> good practice to place the is_domain_direct_mapped() implementation in
>>>> xen/domain.h with proper arch #ifdefs?
>>> arch #ifdefs? I'd like to avoid such, if at all possible. Generic fallbacks
>>> generally ought to key their conditionals to the very identifier not
>>> (already) being defined.
>> I meant something like this (as I saw CDF_directmap is currently gated
>> in this way in xen/domain.h):
>>
>> #ifdef CONFIG_ARM
>> #define is_domain_direct_mapped(d) ((d)->cdf & CDF_directmap)
>> #else
>> #define is_domain_direct_mapped(d) ((void)(d), 0)
>> #endif
>>
>> I am having trouble to think of another way to keep the function in a
>> centralized place while
>> avoiding the #ifdefs. Would you mind elaborating a bit? Thanks!
> What is already there is fine to change. I took your earlier reply to
> mean that you want to add an "#ifndef CONFIG_ARM" to put in place some
> new fallback handler.
>
> Of course the above could also be done without any direct CONFIG_ARM
> dependency. For example, CDF_directmap could simply evaluate to zero
> when direct mapped memory isn't supported.

Yes correct, that will indeed simplify the code a lot. I can do the 
change accordingly in follow-up versions.

Kind regards,
Henry

> Jan
Julien Grall Feb. 28, 2024, 10:35 a.m. UTC | #11
Hi Henry,

On 27/02/2024 13:17, Henry Wang wrote:
> (-RISC-V and PPC people to avoid spamming their inbox as this is quite 
> Arm specific)
> 
> Hi Julien,
> 
> On 2/26/2024 5:13 PM, Julien Grall wrote:
>> Hi Henry,
>>
>> Welcome back!
> 
> Thanks!
> 
>> On 26/02/2024 01:19, Henry Wang wrote:
>>> An error message can seen from the init-dom0less application on
>>> direct-mapped 1:1 domains:
>>> ```
>>> Allocating magic pages
>>> memory.c:238:d0v0 mfn 0x39000 doesn't belong to d1
>>> Error on alloc magic pages
>>> ```
>>>
>>> This is because populate_physmap() automatically assumes gfn == mfn
>>> for direct mapped domains. This cannot be true for the magic pages
>>> that are allocated later for Dom0less DomUs from the init-dom0less
>>> helper application executed in Dom0.
>>>
>>> Force populate_physmap to take the "normal" memory allocation route for
>>> the magic pages even for 1:1 Dom0less DomUs. This should work as long
>>> as the 1:1 Dom0less DomU doesn't have anything else mapped at the same
>>> guest address as the magic pages:
>>> - gfn 0x39000 address 0x39000000
>>> - gfn 0x39001 address 0x39001000
>>> - gfn 0x39002 address 0x39002000
>>> - gfn 0x39003 address 0x39003000
>>
>> This is very fragile. You are making the assumption that the magic 
>> pages are not clashing with any RAM region. The layout defined in 
>> arch-arm.h has been designed for guest where Xen is in full control of 
>> the layout. This is not the case for directmapped domain. I don't 
>> think it is correct to try to re-use part of the layout.
> 
> Apologies for the (a bit) late reply, it took a bit longer for me to 
> understand the story about directmap stuff, and yes, now I agree with 
> you, for those directmapped domains we should not reuse the guest layout 
> directly.
> 
>> If you want to use 1:1 dom0less with xenstore & co, then you should 
>> find a different place in memory for the magic pages (TDB how to find 
>> that area). 
> 
> Yes, and maybe we can use similar strategy in find_unallocated_memory() 
> or find_domU_holes() to do that.
> 
>> You will still have the problem of the 1:1 allocation, but I think 
>> this could be solved bty adding a flag to force a non-1:1 allocation.
> 
> After checking the code flow, below rough plan came to my mind, I think 
> what we need to do is:
> 
> (1) Find a range of un-used memory using similar method in 
> find_unallocated_memory()/find_domU_holes()

AFAIK, the toolstack doesn't have any knowledge of the memeory layout 
for dom0less domUs today. We would need to expose it first.

Then the region could either be statically allocated (i.e. the admin 
provides it in the DTB) or dynamically.

> 
> (2) Change the base address, i.e. GUEST_MAGIC_BASE in alloc_xs_page() in 
> init-dom0less.c to point to the address in (1) if static mem or 11 
> directmap. (I think this is a bit tricky though, do you have any method 
> that in your mind?)

AFAIK, the toolstack doesn't know whether a domain is direct mapped or 
using static mem.

I think this ties with what I just wrote above. For dom0less domUs, we 
probably want Xen to prepare a memory layout (similar to the e820) that 
will then be retrieved by the toolstack.

> 
> (3) Use a flag or combination of existing flags (CDF_staticmem + 
> CDF_directmap) in populate_physmap() to force the allocation of these 
> magic pages using alloc_domheap_pages() - i.e. the "else" condition in 
> the bottom

If I am not mistaken, CDF_* are per-domain. So we would want to use MEMF_*.

>>> +
>>>   #endif /* __ASM_X86_MM_H__ */
>>> diff --git a/xen/common/memory.c b/xen/common/memory.c
>>> index b3b05c2ec0..ab4bad79e2 100644
>>> --- a/xen/common/memory.c
>>> +++ b/xen/common/memory.c
>>> @@ -219,7 +219,7 @@ static void populate_physmap(struct memop_args *a)
>>>           }
>>>           else
>>>           {
>>> -            if ( is_domain_direct_mapped(d) )
>>> +            if ( is_domain_direct_mapped(d) && !is_magic_gpfn(gpfn) )
>>
>> This path will also be reached by dom0. Effectively, this will prevent 
>> dom0 to allocate the memory 1:1 for the magic GPFN (which is guest 
>> specific).
> 
> I think above proposal will solve this issue.
> 
>> Also, why are you only checking the first GFN? What if the caller pass 
>> an overlapped region?
> 
> I am a bit confused. My understanding is at this point we are handling 
> one page at a time.

We are handling one "extent" at the time. This could be one or multiple 
pages (see extent_order).

Cheers,
Henry Wang Feb. 28, 2024, 11:53 a.m. UTC | #12
Hi Julien,

On 2/28/2024 6:35 PM, Julien Grall wrote:
> Hi Henry,
>>>>
>>>> Force populate_physmap to take the "normal" memory allocation route 
>>>> for
>>>> the magic pages even for 1:1 Dom0less DomUs. This should work as long
>>>> as the 1:1 Dom0less DomU doesn't have anything else mapped at the same
>>>> guest address as the magic pages:
>>>> - gfn 0x39000 address 0x39000000
>>>> - gfn 0x39001 address 0x39001000
>>>> - gfn 0x39002 address 0x39002000
>>>> - gfn 0x39003 address 0x39003000
>>>
>>> This is very fragile. You are making the assumption that the magic 
>>> pages are not clashing with any RAM region. The layout defined in 
>>> arch-arm.h has been designed for guest where Xen is in full control 
>>> of the layout. This is not the case for directmapped domain. I don't 
>>> think it is correct to try to re-use part of the layout.
>>
>> Apologies for the (a bit) late reply, it took a bit longer for me to 
>> understand the story about directmap stuff, and yes, now I agree with 
>> you, for those directmapped domains we should not reuse the guest 
>> layout directly.
>>
>>> If you want to use 1:1 dom0less with xenstore & co, then you should 
>>> find a different place in memory for the magic pages (TDB how to 
>>> find that area). 
>>
>> Yes, and maybe we can use similar strategy in 
>> find_unallocated_memory() or find_domU_holes() to do that.
>>
>>> You will still have the problem of the 1:1 allocation, but I think 
>>> this could be solved bty adding a flag to force a non-1:1 allocation.
>>
>> After checking the code flow, below rough plan came to my mind, I 
>> think what we need to do is:
>>
>> (1) Find a range of un-used memory using similar method in 
>> find_unallocated_memory()/find_domU_holes()
>
> AFAIK, the toolstack doesn't have any knowledge of the memeory layout 
> for dom0less domUs today. We would need to expose it first.

If I understand correctly, I think the issue you mentioned here and ...

> Then the region could either be statically allocated (i.e. the admin 
> provides it in the DTB) or dynamically.
>
>> (2) Change the base address, i.e. GUEST_MAGIC_BASE in alloc_xs_page() 
>> in init-dom0less.c to point to the address in (1) if static mem or 11 
>> directmap. (I think this is a bit tricky though, do you have any 
>> method that in your mind?)
>
> AFAIK, the toolstack doesn't know whether a domain is direct mapped or 
> using static mem.

...here basically means we want to do the finding of the unused region 
in toolstack. Since currently what we care about is only a couple of 
pages instead of the whole memory map, could it be possible that we do 
the opposite: in alloc_xs_page(), we issue a domctl hypercall to Xen and 
do the finding work in Xen and return with the found gfn? Then the page 
can be mapped by populate_physmap() from alloc_xs_page() and used for 
XenStore.

If above approach makes sense to you, I have a further question: Since I 
understand that the extended region is basically for safely foreign 
mapping pages, and init_dom0less.c uses foreign memory map for this 
XenStore page, should we find the wanted page in the extended region? or 
even extended region should be excluded?

> I think this ties with what I just wrote above. For dom0less domUs, we 
> probably want Xen to prepare a memory layout (similar to the e820) 
> that will then be retrieved by the toolstack.
>
>>
>> (3) Use a flag or combination of existing flags (CDF_staticmem + 
>> CDF_directmap) in populate_physmap() to force the allocation of these 
>> magic pages using alloc_domheap_pages() - i.e. the "else" condition 
>> in the bottom
>
> If I am not mistaken, CDF_* are per-domain. So we would want to use 
> MEMF_*.

Ah yes you are correct, I indeed missed MEMF_*

>>> Also, why are you only checking the first GFN? What if the caller 
>>> pass an overlapped region?
>>
>> I am a bit confused. My understanding is at this point we are 
>> handling one page at a time.
>
> We are handling one "extent" at the time. This could be one or 
> multiple pages (see extent_order).

I agree, sorry I didn't express myself well. For this specific XenStore 
page, I think the extent_order is
fixed as 0 so there is only 1 page. But you made a good point and I will 
remember to check if there are
multiple pages or an overlapped region. Thanks!

Kind regards,
Henry
Julien Grall Feb. 28, 2024, 12:27 p.m. UTC | #13
Hi Henry,

On 28/02/2024 11:53, Henry Wang wrote:
> On 2/28/2024 6:35 PM, Julien Grall wrote:
>> Hi Henry,
>>>>>
>>>>> Force populate_physmap to take the "normal" memory allocation route 
>>>>> for
>>>>> the magic pages even for 1:1 Dom0less DomUs. This should work as long
>>>>> as the 1:1 Dom0less DomU doesn't have anything else mapped at the same
>>>>> guest address as the magic pages:
>>>>> - gfn 0x39000 address 0x39000000
>>>>> - gfn 0x39001 address 0x39001000
>>>>> - gfn 0x39002 address 0x39002000
>>>>> - gfn 0x39003 address 0x39003000
>>>>
>>>> This is very fragile. You are making the assumption that the magic 
>>>> pages are not clashing with any RAM region. The layout defined in 
>>>> arch-arm.h has been designed for guest where Xen is in full control 
>>>> of the layout. This is not the case for directmapped domain. I don't 
>>>> think it is correct to try to re-use part of the layout.
>>>
>>> Apologies for the (a bit) late reply, it took a bit longer for me to 
>>> understand the story about directmap stuff, and yes, now I agree with 
>>> you, for those directmapped domains we should not reuse the guest 
>>> layout directly.
>>>
>>>> If you want to use 1:1 dom0less with xenstore & co, then you should 
>>>> find a different place in memory for the magic pages (TDB how to 
>>>> find that area). 
>>>
>>> Yes, and maybe we can use similar strategy in 
>>> find_unallocated_memory() or find_domU_holes() to do that.
>>>
>>>> You will still have the problem of the 1:1 allocation, but I think 
>>>> this could be solved bty adding a flag to force a non-1:1 allocation.
>>>
>>> After checking the code flow, below rough plan came to my mind, I 
>>> think what we need to do is:
>>>
>>> (1) Find a range of un-used memory using similar method in 
>>> find_unallocated_memory()/find_domU_holes()
>>
>> AFAIK, the toolstack doesn't have any knowledge of the memeory layout 
>> for dom0less domUs today. We would need to expose it first.
> 
> If I understand correctly, I think the issue you mentioned here and ...
> 
>> Then the region could either be statically allocated (i.e. the admin 
>> provides it in the DTB) or dynamically.
>>
>>> (2) Change the base address, i.e. GUEST_MAGIC_BASE in alloc_xs_page() 
>>> in init-dom0less.c to point to the address in (1) if static mem or 11 
>>> directmap. (I think this is a bit tricky though, do you have any 
>>> method that in your mind?)
>>
>> AFAIK, the toolstack doesn't know whether a domain is direct mapped or 
>> using static mem.
> 
> ...here basically means we want to do the finding of the unused region 
> in toolstack. Since currently what we care about is only a couple of 
> pages instead of the whole memory map, could it be possible that we do 
> the opposite: in alloc_xs_page(), we issue a domctl hypercall to Xen and 
> do the finding work in Xen and return with the found gfn? Then the page 
> can be mapped by populate_physmap() from alloc_xs_page() and used for 
> XenStore.

I know that DOMCTL hypercalls are not stable. But I am not overly happy 
with creating an hypercall which is just "fetch the magic regions". I 
think it need to be a more general one that would expose all the regions.

Also, you can't really find the magic regions when the hypercall is done 
as we don't have the guest memory layout. This will need to be done in 
advance.

Overall, I think it would be better if we provide an hypercall listing 
the regions currently occupied (similar to e820). One will have the type 
"magic pages".

> 
> If above approach makes sense to you, I have a further question: Since I 
> understand that the extended region is basically for safely foreign 
> mapping pages

This is not about safety. The extended region is optional. It was 
introduced so it is easy for Linux to find an unallocated region to map 
external pages (e.g. vCPU shared info) so it doesn't waste RAM pages.

, and init_dom0less.c uses foreign memory map for this
> XenStore page, 
> should we find the wanted page in the extended region? or 
> even extended region should be excluded?

How is the extended region found for dom0less domUs today? It would be 
fine to steal some part of the extended regions for the magic pages. But 
they would need to be reserved *before* the guest is first unpaused.

>> I think this ties with what I just wrote above. For dom0less domUs, we 
>> probably want Xen to prepare a memory layout (similar to the e820) 
>> that will then be retrieved by the toolstack.
>>
>>>
>>> (3) Use a flag or combination of existing flags (CDF_staticmem + 
>>> CDF_directmap) in populate_physmap() to force the allocation of these 
>>> magic pages using alloc_domheap_pages() - i.e. the "else" condition 
>>> in the bottom
>>
>> If I am not mistaken, CDF_* are per-domain. So we would want to use 
>> MEMF_*.
> 
> Ah yes you are correct, I indeed missed MEMF_*
> 
>>>> Also, why are you only checking the first GFN? What if the caller 
>>>> pass an overlapped region?
>>>
>>> I am a bit confused. My understanding is at this point we are 
>>> handling one page at a time.
>>
>> We are handling one "extent" at the time. This could be one or 
>> multiple pages (see extent_order).
> 
> I agree, sorry I didn't express myself well. For this specific XenStore 
> page, I think the extent_order is
> fixed as 0 so there is only 1 page.

Correct. But you should not rely on this :).

Cheers,
Henry Wang Feb. 28, 2024, 12:50 p.m. UTC | #14
Hi Julien,

On 2/28/2024 8:27 PM, Julien Grall wrote:
> Hi Henry,
>>>> After checking the code flow, below rough plan came to my mind, I 
>>>> think what we need to do is:
>>>>
>>>> (1) Find a range of un-used memory using similar method in 
>>>> find_unallocated_memory()/find_domU_holes()
>>>
>>> AFAIK, the toolstack doesn't have any knowledge of the memeory 
>>> layout for dom0less domUs today. We would need to expose it first.
>>
>> If I understand correctly, I think the issue you mentioned here and ...
>>
>>> Then the region could either be statically allocated (i.e. the admin 
>>> provides it in the DTB) or dynamically.
>>>
>>>> (2) Change the base address, i.e. GUEST_MAGIC_BASE in 
>>>> alloc_xs_page() in init-dom0less.c to point to the address in (1) 
>>>> if static mem or 11 directmap. (I think this is a bit tricky 
>>>> though, do you have any method that in your mind?)
>>>
>>> AFAIK, the toolstack doesn't know whether a domain is direct mapped 
>>> or using static mem.
>>
>> ...here basically means we want to do the finding of the unused 
>> region in toolstack. Since currently what we care about is only a 
>> couple of pages instead of the whole memory map, could it be possible 
>> that we do the opposite: in alloc_xs_page(), we issue a domctl 
>> hypercall to Xen and do the finding work in Xen and return with the 
>> found gfn? Then the page can be mapped by populate_physmap() from 
>> alloc_xs_page() and used for XenStore.
>
> I know that DOMCTL hypercalls are not stable. But I am not overly 
> happy with creating an hypercall which is just "fetch the magic 
> regions". I think it need to be a more general one that would expose 
> all the regions.
>
> Also, you can't really find the magic regions when the hypercall is 
> done as we don't have the guest memory layout. This will need to be 
> done in advance.
>
> Overall, I think it would be better if we provide an hypercall listing 
> the regions currently occupied (similar to e820). One will have the 
> type "magic pages".

Yeah now it is more clear. I agree your approach is indeed a lot better. 
I will check how e820 works and see if I can do something similar. Also 
it might not be related, I think we somehow had a similar discussion in 
[1] when I do static heap.

>> If above approach makes sense to you, I have a further question: 
>> Since I understand that the extended region is basically for safely 
>> foreign mapping pages
>
> This is not about safety. The extended region is optional. It was 
> introduced so it is easy for Linux to find an unallocated region to 
> map external pages (e.g. vCPU shared info) so it doesn't waste RAM pages.
>
> , and init_dom0less.c uses foreign memory map for this
>> XenStore page, should we find the wanted page in the extended region? 
>> or even extended region should be excluded?
>
> How is the extended region found for dom0less domUs today? 

The extended regions for dom0less domUs are found by function 
find_domU_holes() introduced by commit 2a24477 xen/arm: implement domU 
extended regions. I think this commit basically followed the original 
algorithm introduced by commit 57f8785 libxl/arm: Add handling of 
extended regions for DomU.

> It would be fine to steal some part of the extended regions for the 
> magic pages. But they would need to be reserved *before* the guest is 
> first unpaused.

I also thought this today, as I think writing a function basically doing 
the same as what we do for extended regions is probably too much, so 
stealing some part of the extended region is easier. What I worry about 
is that: Once the extended regions are allocated and written to the 
"reg" property of the device tree hypervisor node, the data structures 
to record these extended regions are freed. So we kind of "lost" the 
information about these extended regions if we want to get them later 
(for example my use case). Also, if we still some part of memory from 
the extended regions, should we carve out the "stolen" parts from the 
device tree as well?

>>>>> Also, why are you only checking the first GFN? What if the caller 
>>>>> pass an overlapped region?
>>>>
>>>> I am a bit confused. My understanding is at this point we are 
>>>> handling one page at a time.
>>>
>>> We are handling one "extent" at the time. This could be one or 
>>> multiple pages (see extent_order).
>>
>> I agree, sorry I didn't express myself well. For this specific 
>> XenStore page, I think the extent_order is
>> fixed as 0 so there is only 1 page.
>
> Correct. But you should not rely on this :).

Yeah definitely :)

[1] 
https://lore.kernel.org/xen-devel/e53601a1-a5ac-897a-334d-de45d96e9863@xen.org/

Kind regards,
Henry
Henry Wang March 1, 2024, 3:03 a.m. UTC | #15
Hi Julien,

On 2/28/2024 8:27 PM, Julien Grall wrote:
> Hi Henry,
>> ...here basically means we want to do the finding of the unused 
>> region in toolstack. Since currently what we care about is only a 
>> couple of pages instead of the whole memory map, could it be possible 
>> that we do the opposite: in alloc_xs_page(), we issue a domctl 
>> hypercall to Xen and do the finding work in Xen and return with the 
>> found gfn? Then the page can be mapped by populate_physmap() from 
>> alloc_xs_page() and used for XenStore.
>
> I know that DOMCTL hypercalls are not stable. But I am not overly 
> happy with creating an hypercall which is just "fetch the magic 
> regions". I think it need to be a more general one that would expose 
> all the regions.
>
> Also, you can't really find the magic regions when the hypercall is 
> done as we don't have the guest memory layout. This will need to be 
> done in advance.
>
> Overall, I think it would be better if we provide an hypercall listing 
> the regions currently occupied (similar to e820). One will have the 
> type "magic pages".

Would below design make sense to you:

(1) Similarly as the e820, we can firstly introduce some data structures 
in struct arch_domain:

#define MEM_REGIONS_MAX 4

// For now we only care the magic regions, but in the future this can be 
easily extended with other types such as RAM, MMIO etc.
enum mem_region_type {
     MEM_REGION_MAGIC,
};

struct mem_region {
     paddr_t start;
     paddr_t size;
     enum mem_region_type type;
};

struct domain_mem_map {
     unsigned int nr_mem_regions;
     struct mem_region region[MEM_REGIONS_MAX];
};

struct arch_domain {
...
     struct domain_mem_map mem_map;
};

(2) Then in domain creation time, for normal and static non-directmapped 
Dom0less DomU, set d->arch.mem_map.region[0].start = GUEST_MAGIC_BASE 
and the size to 4 pages. For static and directmapped Dom0less DomU, find 
a free region of 4 pages for magic pages. The finding of a free region 
can reuse the approach for extended region and be called before 
make_hypervisor_node(), and when make_hypervisor_node() is called. We 
carve the magic pages out from the actual extended region.

(3) Add a new hypercall XENMEM_memory_map_domid (or a domctl). Input 
will be domid and output will be the memory map of the domain recorded 
in struct arch_domain.

(4) In alloc_xs_page() and alloc_magic_pages, replace the hardcoded base 
address with the new address found by the hypercall.

Kind regards,
Henry
Julien Grall March 4, 2024, 6:38 p.m. UTC | #16
On 01/03/2024 03:03, Henry Wang wrote:
> Hi Julien,

Hi Henry,

> On 2/28/2024 8:27 PM, Julien Grall wrote:
>> Hi Henry,
>>> ...here basically means we want to do the finding of the unused 
>>> region in toolstack. Since currently what we care about is only a 
>>> couple of pages instead of the whole memory map, could it be possible 
>>> that we do the opposite: in alloc_xs_page(), we issue a domctl 
>>> hypercall to Xen and do the finding work in Xen and return with the 
>>> found gfn? Then the page can be mapped by populate_physmap() from 
>>> alloc_xs_page() and used for XenStore.
>>
>> I know that DOMCTL hypercalls are not stable. But I am not overly 
>> happy with creating an hypercall which is just "fetch the magic 
>> regions". I think it need to be a more general one that would expose 
>> all the regions.
>>
>> Also, you can't really find the magic regions when the hypercall is 
>> done as we don't have the guest memory layout. This will need to be 
>> done in advance.
>>
>> Overall, I think it would be better if we provide an hypercall listing 
>> the regions currently occupied (similar to e820). One will have the 
>> type "magic pages".
> 
> Would below design make sense to you:

This looks good in principle. Some comments below.

> 
> (1) Similarly as the e820, we can firstly introduce some data structures 
> in struct arch_domain:
> 
> #define MEM_REGIONS_MAX 4
> 
> // For now we only care the magic regions, but in the future this can be 
> easily extended with other types such as RAM, MMIO etc.
> enum mem_region_type {
>      MEM_REGION_MAGIC,
> };
> 
> struct mem_region {
>      paddr_t start;
>      paddr_t size;
>      enum mem_region_type type;
> };
> 
> struct domain_mem_map {
>      unsigned int nr_mem_regions;
>      struct mem_region region[MEM_REGIONS_MAX];
> };

If you plan to expose the same interface externally, then you will need 
to replace paddr_t with uint64_t and avoid using an enum in the structure.

You will also want to expose a dynamic array (even if this is fixed in 
the hypervisor).

> 
> struct arch_domain {
> ...
>      struct domain_mem_map mem_map;
> };
> 
> (2) Then in domain creation time, for normal and static non-directmapped 
> Dom0less DomU, set d->arch.mem_map.region[0].start = GUEST_MAGIC_BASE 
> and the size to 4 pages. For static and directmapped Dom0less DomU, find 
> a free region of 4 pages for magic pages. The finding of a free region 
> can reuse the approach for extended region and be called before 
> make_hypervisor_node(), and when make_hypervisor_node() is called. We 
> carve the magic pages out from the actual extended region.
> 
> (3) Add a new hypercall XENMEM_memory_map_domid (or a domctl). Input 
> will be domid and output will be the memory map of the domain recorded 
> in struct arch_domain.

XENMEM_* are supposed to be stable interface. I am not aware of any use 
in the guest yet, so I think it would be best to use a DOMCTL.

> 
> (4) In alloc_xs_page() and alloc_magic_pages, replace the hardcoded base 
> address with the new address found by the hypercall.

Cheers,
Henry Wang March 5, 2024, 12:22 a.m. UTC | #17
Hi Julien,

On 3/5/2024 2:38 AM, Julien Grall wrote:
> On 01/03/2024 03:03, Henry Wang wrote:
>> Hi Julien,
> Hi Henry,
>> On 2/28/2024 8:27 PM, Julien Grall wrote:
>>> Hi Henry,
>>>> ...here basically means we want to do the finding of the unused 
>>>> region in toolstack. Since currently what we care about is only a 
>>>> couple of pages instead of the whole memory map, could it be 
>>>> possible that we do the opposite: in alloc_xs_page(), we issue a 
>>>> domctl hypercall to Xen and do the finding work in Xen and return 
>>>> with the found gfn? Then the page can be mapped by 
>>>> populate_physmap() from alloc_xs_page() and used for XenStore.
>>>
>>> I know that DOMCTL hypercalls are not stable. But I am not overly 
>>> happy with creating an hypercall which is just "fetch the magic 
>>> regions". I think it need to be a more general one that would expose 
>>> all the regions.
>>>
>>> Also, you can't really find the magic regions when the hypercall is 
>>> done as we don't have the guest memory layout. This will need to be 
>>> done in advance.
>>>
>>> Overall, I think it would be better if we provide an hypercall 
>>> listing the regions currently occupied (similar to e820). One will 
>>> have the type "magic pages".
>>
>> Would below design make sense to you:
>
> This looks good in principle. 

Great! Actually Michal helped a lot to come up with the proposal (Thanks 
Michal!). I am really sorry that I missed to mention this in my last 
email and didn't credit him properly.

> Some comments below.
>> (1) Similarly as the e820, we can firstly introduce some data 
>> structures in struct arch_domain:
>>
>> #define MEM_REGIONS_MAX 4
>>
>> // For now we only care the magic regions, but in the future this can 
>> be easily extended with other types such as RAM, MMIO etc.
>> enum mem_region_type {
>>      MEM_REGION_MAGIC,
>> };
>>
>> struct mem_region {
>>      paddr_t start;
>>      paddr_t size;
>>      enum mem_region_type type;
>> };
>>
>> struct domain_mem_map {
>>      unsigned int nr_mem_regions;
>>      struct mem_region region[MEM_REGIONS_MAX];
>> };
>
> If you plan to expose the same interface externally, then you will 
> need to replace paddr_t with uint64_t and avoid using an enum in the 
> structure.

Yes you are correct. Maybe we can also try using xen_pfn_t and the 
number of pages to describe the range as an alternative. I will use the 
one that makes the implementation easier.

> You will also want to expose a dynamic array (even if this is fixed in 
> the hypervisor).

Ok.

>> struct arch_domain {
>> ...
>>      struct domain_mem_map mem_map;
>> };
>>
>> (2) Then in domain creation time, for normal and static 
>> non-directmapped Dom0less DomU, set d->arch.mem_map.region[0].start = 
>> GUEST_MAGIC_BASE and the size to 4 pages. For static and directmapped 
>> Dom0less DomU, find a free region of 4 pages for magic pages. The 
>> finding of a free region can reuse the approach for extended region 
>> and be called before make_hypervisor_node(), and when 
>> make_hypervisor_node() is called. We carve the magic pages out from 
>> the actual extended region.
>>
>> (3) Add a new hypercall XENMEM_memory_map_domid (or a domctl). Input 
>> will be domid and output will be the memory map of the domain 
>> recorded in struct arch_domain.
>
> XENMEM_* are supposed to be stable interface. I am not aware of any 
> use in the guest yet, so I think it would be best to use a DOMCTL.

Sounds good to me. Thanks for the input!

Kind regards,
Henry

>>
>> (4) In alloc_xs_page() and alloc_magic_pages, replace the hardcoded 
>> base address with the new address found by the hypercall.
>
> Cheers,
>
diff mbox series

Patch

diff --git a/tools/libs/guest/xg_dom_arm.c b/tools/libs/guest/xg_dom_arm.c
index 2fd8ee7ad4..8c579d7576 100644
--- a/tools/libs/guest/xg_dom_arm.c
+++ b/tools/libs/guest/xg_dom_arm.c
@@ -25,12 +25,6 @@ 
 
 #include "xg_private.h"
 
-#define NR_MAGIC_PAGES 4
-#define CONSOLE_PFN_OFFSET 0
-#define XENSTORE_PFN_OFFSET 1
-#define MEMACCESS_PFN_OFFSET 2
-#define VUART_PFN_OFFSET 3
-
 #define LPAE_SHIFT 9
 
 #define PFN_4K_SHIFT  (0)
diff --git a/xen/arch/arm/include/asm/mm.h b/xen/arch/arm/include/asm/mm.h
index cbcf3bf147..17149b4635 100644
--- a/xen/arch/arm/include/asm/mm.h
+++ b/xen/arch/arm/include/asm/mm.h
@@ -428,6 +428,19 @@  static inline void page_set_xenheap_gfn(struct page_info *p, gfn_t gfn)
     } while ( (y = cmpxchg(&p->u.inuse.type_info, x, nx)) != x );
 }
 
+#define MAGIC_PAGE_N_GPFN(n)     ((GUEST_MAGIC_BASE >> PAGE_SHIFT) + n)
+static inline bool is_magic_gpfn(xen_pfn_t gpfn)
+{
+    unsigned int i;
+    for ( i = 0; i < NR_MAGIC_PAGES; i++ )
+    {
+        if ( gpfn == MAGIC_PAGE_N_GPFN(i) )
+            return true;
+    }
+
+    return false;
+}
+
 #endif /*  __ARCH_ARM_MM__ */
 /*
  * Local variables:
diff --git a/xen/arch/ppc/include/asm/mm.h b/xen/arch/ppc/include/asm/mm.h
index a433936076..8ad81d9552 100644
--- a/xen/arch/ppc/include/asm/mm.h
+++ b/xen/arch/ppc/include/asm/mm.h
@@ -256,4 +256,9 @@  static inline bool arch_mfns_in_directmap(unsigned long mfn, unsigned long nr)
     return true;
 }
 
+static inline bool is_magic_gpfn(xen_pfn_t gpfn)
+{
+    return false;
+}
+
 #endif /* _ASM_PPC_MM_H */
diff --git a/xen/arch/riscv/include/asm/mm.h b/xen/arch/riscv/include/asm/mm.h
index 07c7a0abba..a376a77e29 100644
--- a/xen/arch/riscv/include/asm/mm.h
+++ b/xen/arch/riscv/include/asm/mm.h
@@ -3,6 +3,7 @@ 
 #ifndef _ASM_RISCV_MM_H
 #define _ASM_RISCV_MM_H
 
+#include <public/xen.h>
 #include <asm/page-bits.h>
 
 #define pfn_to_paddr(pfn) ((paddr_t)(pfn) << PAGE_SHIFT)
@@ -20,4 +21,9 @@  unsigned long calc_phys_offset(void);
 
 void turn_on_mmu(unsigned long ra);
 
+static inline bool is_magic_gpfn(xen_pfn_t gpfn)
+{
+    return false;
+}
+
 #endif /* _ASM_RISCV_MM_H */
diff --git a/xen/arch/x86/include/asm/mm.h b/xen/arch/x86/include/asm/mm.h
index 7d26d9cd2f..f385f36d78 100644
--- a/xen/arch/x86/include/asm/mm.h
+++ b/xen/arch/x86/include/asm/mm.h
@@ -628,4 +628,9 @@  static inline bool arch_mfns_in_directmap(unsigned long mfn, unsigned long nr)
     return (mfn + nr) <= (virt_to_mfn(eva - 1) + 1);
 }
 
+static inline bool is_magic_gpfn(xen_pfn_t gpfn)
+{
+    return false;
+}
+
 #endif /* __ASM_X86_MM_H__ */
diff --git a/xen/common/memory.c b/xen/common/memory.c
index b3b05c2ec0..ab4bad79e2 100644
--- a/xen/common/memory.c
+++ b/xen/common/memory.c
@@ -219,7 +219,7 @@  static void populate_physmap(struct memop_args *a)
         }
         else
         {
-            if ( is_domain_direct_mapped(d) )
+            if ( is_domain_direct_mapped(d) && !is_magic_gpfn(gpfn) )
             {
                 mfn = _mfn(gpfn);
 
diff --git a/xen/include/public/arch-arm.h b/xen/include/public/arch-arm.h
index a25e87dbda..58aa6ff05b 100644
--- a/xen/include/public/arch-arm.h
+++ b/xen/include/public/arch-arm.h
@@ -476,6 +476,12 @@  typedef uint64_t xen_callback_t;
 #define GUEST_MAGIC_BASE  xen_mk_ullong(0x39000000)
 #define GUEST_MAGIC_SIZE  xen_mk_ullong(0x01000000)
 
+#define NR_MAGIC_PAGES 4
+#define CONSOLE_PFN_OFFSET 0
+#define XENSTORE_PFN_OFFSET 1
+#define MEMACCESS_PFN_OFFSET 2
+#define VUART_PFN_OFFSET 3
+
 #define GUEST_RAM_BANKS   2
 
 /*