diff mbox series

[3/7] xen/p2m: put reference for superpage

Message ID 20240423082532.776623-4-luca.fancellu@arm.com (mailing list archive)
State Superseded
Headers show
Series Static shared memory followup v2 - pt2 | expand

Commit Message

Luca Fancellu April 23, 2024, 8:25 a.m. UTC
From: Penny Zheng <Penny.Zheng@arm.com>

We are doing foreign memory mapping for static shared memory, and
there is a great possibility that it could be super mapped.
But today, p2m_put_l3_page could not handle superpages.

This commits implements a new function p2m_put_superpage to handle superpages,
specifically for helping put extra references for foreign superpages.

Signed-off-by: Penny Zheng <penny.zheng@arm.com>
Signed-off-by: Luca Fancellu <luca.fancellu@arm.com>
---
v1:
 - patch from https://patchwork.kernel.org/project/xen-devel/patch/20231206090623.1932275-9-Penny.Zheng@arm.com/
---
 xen/arch/arm/mmu/p2m.c | 58 +++++++++++++++++++++++++++++++-----------
 1 file changed, 43 insertions(+), 15 deletions(-)

Comments

Michal Orzel May 7, 2024, 12:26 p.m. UTC | #1
Hi Luca,

On 23/04/2024 10:25, Luca Fancellu wrote:
> 
> 
> From: Penny Zheng <Penny.Zheng@arm.com>
> 
> We are doing foreign memory mapping for static shared memory, and
> there is a great possibility that it could be super mapped.
> But today, p2m_put_l3_page could not handle superpages.
> 
> This commits implements a new function p2m_put_superpage to handle superpages,
> specifically for helping put extra references for foreign superpages.
> 
> Signed-off-by: Penny Zheng <penny.zheng@arm.com>
> Signed-off-by: Luca Fancellu <luca.fancellu@arm.com>
> ---
> v1:
>  - patch from https://patchwork.kernel.org/project/xen-devel/patch/20231206090623.1932275-9-Penny.Zheng@arm.com/
> ---
>  xen/arch/arm/mmu/p2m.c | 58 +++++++++++++++++++++++++++++++-----------
>  1 file changed, 43 insertions(+), 15 deletions(-)
> 
> diff --git a/xen/arch/arm/mmu/p2m.c b/xen/arch/arm/mmu/p2m.c
> index 41fcca011cf4..479a80fbd4cf 100644
> --- a/xen/arch/arm/mmu/p2m.c
> +++ b/xen/arch/arm/mmu/p2m.c
> @@ -753,17 +753,9 @@ static int p2m_mem_access_radix_set(struct p2m_domain *p2m, gfn_t gfn,
>      return rc;
>  }
> 
> -/*
> - * Put any references on the single 4K page referenced by pte.
> - * TODO: Handle superpages, for now we only take special references for leaf
> - * pages (specifically foreign ones, which can't be super mapped today).
> - */
> -static void p2m_put_l3_page(const lpae_t pte)
> +/* Put any references on the single 4K page referenced by mfn. */
> +static void p2m_put_l3_page(mfn_t mfn, unsigned type)
Shouldn't type be of p2m_type_t?

>  {
> -    mfn_t mfn = lpae_get_mfn(pte);
> -
> -    ASSERT(p2m_is_valid(pte));
> -
>      /*
>       * TODO: Handle other p2m types
>       *
> @@ -771,16 +763,53 @@ static void p2m_put_l3_page(const lpae_t pte)
>       * flush the TLBs if the page is reallocated before the end of
>       * this loop.
>       */
> -    if ( p2m_is_foreign(pte.p2m.type) )
> +    if ( p2m_is_foreign(type) )
>      {
>          ASSERT(mfn_valid(mfn));
>          put_page(mfn_to_page(mfn));
>      }
>      /* Detect the xenheap page and mark the stored GFN as invalid. */
> -    else if ( p2m_is_ram(pte.p2m.type) && is_xen_heap_mfn(mfn) )
> +    else if ( p2m_is_ram(type) && is_xen_heap_mfn(mfn) )
>          page_set_xenheap_gfn(mfn_to_page(mfn), INVALID_GFN);
>  }
> 
> +/* Put any references on the superpage referenced by mfn. */
> +static void p2m_put_superpage(mfn_t mfn, unsigned int next_level, unsigned type)
Shouldn't type be of p2m_type_t?

> +{
> +    unsigned int i;
> +    unsigned int level_order = XEN_PT_LEVEL_ORDER(next_level);
> +
> +    for ( i = 0; i < XEN_PT_LPAE_ENTRIES; i++ )
> +    {
> +        if ( next_level == 3 )
> +            p2m_put_l3_page(mfn, type);
> +        else
> +            p2m_put_superpage(mfn, next_level + 1, type);
> +
> +        mfn = mfn_add(mfn, 1 << level_order);
> +    }
> +}
> +
> +/* Put any references on the page referenced by pte. */
> +static void p2m_put_page(const lpae_t pte, unsigned int level)
> +{
> +    mfn_t mfn = lpae_get_mfn(pte);
> +
> +    ASSERT(p2m_is_valid(pte));
> +
> +    /*
> +     * We are either having a first level 1G superpage or a
> +     * second level 2M superpage.
> +     */
> +    if ( p2m_is_superpage(pte, level) )
> +        return p2m_put_superpage(mfn, level + 1, pte.p2m.type);
> +    else
No need for this else

> +    {
> +        ASSERT(level == 3);
> +        return p2m_put_l3_page(mfn, pte.p2m.type);
> +    }
> +}
> +
>  /* Free lpae sub-tree behind an entry */
>  static void p2m_free_entry(struct p2m_domain *p2m,
>                             lpae_t entry, unsigned int level)
> @@ -809,9 +838,8 @@ static void p2m_free_entry(struct p2m_domain *p2m,
>  #endif
> 
>          p2m->stats.mappings[level]--;
> -        /* Nothing to do if the entry is a super-page. */
> -        if ( level == 3 )
> -            p2m_put_l3_page(entry);
> +        p2m_put_page(entry, level);
> +
>          return;
>      }
> 
> --
> 2.34.1
> 

~Michal
Julien Grall May 7, 2024, 1:20 p.m. UTC | #2
Hi Luca,

On 23/04/2024 09:25, Luca Fancellu wrote:
> From: Penny Zheng <Penny.Zheng@arm.com>
> 
> We are doing foreign memory mapping for static shared memory, and
> there is a great possibility that it could be super mapped.

Is this because we are mapping more than one page at the time? Can you 
point me to the code?

> But today, p2m_put_l3_page could not handle superpages.

This was done on purpose. Xen is not preemptible and therefore we need 
to be cautious how much work is done within the p2m code.

With the below proposal, for 1GB mapping, we may end up to call 
put_page() up to 512 * 512 = 262144 times. put_page() can free memory. 
This could be a very long operation.

Have you benchmark how long it would take?

Cheers,
Luca Fancellu May 7, 2024, 1:30 p.m. UTC | #3
Hi Julien,

> On 7 May 2024, at 14:20, Julien Grall <julien@xen.org> wrote:
> 
> Hi Luca,
> 
> On 23/04/2024 09:25, Luca Fancellu wrote:
>> From: Penny Zheng <Penny.Zheng@arm.com>
>> We are doing foreign memory mapping for static shared memory, and
>> there is a great possibility that it could be super mapped.
> 
> Is this because we are mapping more than one page at the time? Can you point me to the code?

So, to be honest this patch was originally in Penny’s serie, my knowledge of this side of the codebase
is very limited and so I pushed this one basically untouched.

From what I can see in the serie the mappings are made in handle_shared_mem_bank, and map_regions_p2mt
is called for one page at the time (allocated through the function allocate_domheap_memory (new function introduced in
the serie).

So is that the case that this patch is not needed?


> 
>> But today, p2m_put_l3_page could not handle superpages.
> 
> This was done on purpose. Xen is not preemptible and therefore we need to be cautious how much work is done within the p2m code.
> 
> With the below proposal, for 1GB mapping, we may end up to call put_page() up to 512 * 512 = 262144 times. put_page() can free memory. This could be a very long operation.
> 
> Have you benchmark how long it would take?

I did not, since its purpose was unclear to me and was not commented in the last serie from Penny.

Cheers,
Luca
Julien Grall May 8, 2024, 9:05 p.m. UTC | #4
On 07/05/2024 14:30, Luca Fancellu wrote:
> Hi Julien,

Hi Luca,

>> On 7 May 2024, at 14:20, Julien Grall <julien@xen.org> wrote:
>>
>> Hi Luca,
>>
>> On 23/04/2024 09:25, Luca Fancellu wrote:
>>> From: Penny Zheng <Penny.Zheng@arm.com>
>>> We are doing foreign memory mapping for static shared memory, and
>>> there is a great possibility that it could be super mapped.
>>
>> Is this because we are mapping more than one page at the time? Can you point me to the code?
> 
> So, to be honest this patch was originally in Penny’s serie, my knowledge of this side of the codebase
> is very limited and so I pushed this one basically untouched.
> 
>  From what I can see in the serie the mappings are made in handle_shared_mem_bank, and map_regions_p2mt
> is called for one page at the time (allocated through the function allocate_domheap_memory (new function introduced in
> the serie).
> 
> So is that the case that this patch is not needed?

I looked at the code and, if I am not mistake, we are passing 
PFN_DOWN(psize) to map_regions_p2mt. At the moment, it is unclear to me 
why would psize be < PAGE_SIZE.

>>> But today, p2m_put_l3_page could not handle superpages.
>>
>> This was done on purpose. Xen is not preemptible and therefore we need to be cautious how much work is done within the p2m code.
>>
>> With the below proposal, for 1GB mapping, we may end up to call put_page() up to 512 * 512 = 262144 times. put_page() can free memory. This could be a very long operation.
>>
>> Have you benchmark how long it would take?
> 
> I did not, since its purpose was unclear to me and was not commented in the last serie from Penny.

Honestly, I can't remember why it wasn't commented.

Cheers,
Julien Grall May 8, 2024, 10:11 p.m. UTC | #5
Hi,

CC-ing Roger as he is working on adding support for the foreign mapping 
on x86. Although, I am not expecting any implication as only 4KB mapping 
should be supported.

On 08/05/2024 22:05, Julien Grall wrote:
> On 07/05/2024 14:30, Luca Fancellu wrote:
>>> On 7 May 2024, at 14:20, Julien Grall <julien@xen.org> wrote:
>>>
>>> Hi Luca,
>>>
>>> On 23/04/2024 09:25, Luca Fancellu wrote:
>>>> From: Penny Zheng <Penny.Zheng@arm.com>
>>>> But today, p2m_put_l3_page could not handle superpages.
>>>
>>> This was done on purpose. Xen is not preemptible and therefore we 
>>> need to be cautious how much work is done within the p2m code.
>>>
>>> With the below proposal, for 1GB mapping, we may end up to call 
>>> put_page() up to 512 * 512 = 262144 times. put_page() can free 
>>> memory. This could be a very long operation.
>>>
>>> Have you benchmark how long it would take?
>>
>> I did not, since its purpose was unclear to me and was not commented 
>> in the last serie from Penny.
> 
> Honestly, I can't remember why it wasn't commented.

I skimmed through the code to check what we currently do for preemption.

{decrease, increase}_reservation() will allow to handle max_order() 
mapping at the time. On a default configuration, the max would be 4MB.

relinquish_p2m_mapping() is preempting every 512 iterations. One 
iteration is either a 4KB/2MB/1GB mapping.

relinquish_memory() is checking for preemption after every page.

So I think, it would be ok to allow 2MB mapping for static shared memory 
but not 1GB. relinquish_p2m_mapping() would also needs to be updated to 
take into account the larger foreign mapping.

I would consider to check for preemption if 't' is p2m_map_foreign and 
the order is above 9 (i.e. 2MB).

Cheers,
Roger Pau Monné May 9, 2024, 7:55 a.m. UTC | #6
On Tue, Apr 23, 2024 at 09:25:28AM +0100, Luca Fancellu wrote:
> From: Penny Zheng <Penny.Zheng@arm.com>
> 
> We are doing foreign memory mapping for static shared memory, and
> there is a great possibility that it could be super mapped.
> But today, p2m_put_l3_page could not handle superpages.
> 
> This commits implements a new function p2m_put_superpage to handle superpages,
> specifically for helping put extra references for foreign superpages.
> 
> Signed-off-by: Penny Zheng <penny.zheng@arm.com>
> Signed-off-by: Luca Fancellu <luca.fancellu@arm.com>
> ---
> v1:
>  - patch from https://patchwork.kernel.org/project/xen-devel/patch/20231206090623.1932275-9-Penny.Zheng@arm.com/
> ---
>  xen/arch/arm/mmu/p2m.c | 58 +++++++++++++++++++++++++++++++-----------
>  1 file changed, 43 insertions(+), 15 deletions(-)
> 
> diff --git a/xen/arch/arm/mmu/p2m.c b/xen/arch/arm/mmu/p2m.c
> index 41fcca011cf4..479a80fbd4cf 100644
> --- a/xen/arch/arm/mmu/p2m.c
> +++ b/xen/arch/arm/mmu/p2m.c
> @@ -753,17 +753,9 @@ static int p2m_mem_access_radix_set(struct p2m_domain *p2m, gfn_t gfn,
>      return rc;
>  }
>  
> -/*
> - * Put any references on the single 4K page referenced by pte.
> - * TODO: Handle superpages, for now we only take special references for leaf
> - * pages (specifically foreign ones, which can't be super mapped today).
> - */
> -static void p2m_put_l3_page(const lpae_t pte)
> +/* Put any references on the single 4K page referenced by mfn. */
> +static void p2m_put_l3_page(mfn_t mfn, unsigned type)
                                          ^ p2m_type_t?

Roger.
Roger Pau Monné May 9, 2024, 8:13 a.m. UTC | #7
On Wed, May 08, 2024 at 11:11:04PM +0100, Julien Grall wrote:
> Hi,
> 
> CC-ing Roger as he is working on adding support for the foreign mapping on
> x86. Although, I am not expecting any implication as only 4KB mapping should
> be supported.

I don't think we have plans on x86 to support foreign mappings with
order != 0 ATM.

We would need a new interface to allow creating such mappings, and
it's also not clear to me how the domain that creates such mappings
can identify super-pages on the remote domain.  IOW: the mapping
domain could request a super-page in the foreign domain gfn space,
but that could end up being a range of lower order mappings.

Also the interactions with the remote domain would need to be audited,
as the remote domain shattering the superpage would need to be
replicated in the mapping side in order to account for the changes.

> On 08/05/2024 22:05, Julien Grall wrote:
> > On 07/05/2024 14:30, Luca Fancellu wrote:
> > > > On 7 May 2024, at 14:20, Julien Grall <julien@xen.org> wrote:
> > > > 
> > > > Hi Luca,
> > > > 
> > > > On 23/04/2024 09:25, Luca Fancellu wrote:
> > > > > From: Penny Zheng <Penny.Zheng@arm.com>
> > > > > But today, p2m_put_l3_page could not handle superpages.
> > > > 
> > > > This was done on purpose. Xen is not preemptible and therefore
> > > > we need to be cautious how much work is done within the p2m
> > > > code.
> > > > 
> > > > With the below proposal, for 1GB mapping, we may end up to call
> > > > put_page() up to 512 * 512 = 262144 times. put_page() can free
> > > > memory. This could be a very long operation.
> > > > 
> > > > Have you benchmark how long it would take?
> > > 
> > > I did not, since its purpose was unclear to me and was not commented
> > > in the last serie from Penny.
> > 
> > Honestly, I can't remember why it wasn't commented.
> 
> I skimmed through the code to check what we currently do for preemption.
> 
> {decrease, increase}_reservation() will allow to handle max_order() mapping
> at the time. On a default configuration, the max would be 4MB.
> 
> relinquish_p2m_mapping() is preempting every 512 iterations. One iteration
> is either a 4KB/2MB/1GB mapping.
> 
> relinquish_memory() is checking for preemption after every page.
> 
> So I think, it would be ok to allow 2MB mapping for static shared memory but
> not 1GB. relinquish_p2m_mapping() would also needs to be updated to take
> into account the larger foreign mapping.

FWIW, relinquish_p2m_mapping() likely does more than what's strictly
needed, as you could just remove foreign mappings while leaving other
entries as-is?  The drain of the p2m pool and release of domain pages
should take care of dropping references to the RAM domain memory?

> I would consider to check for preemption if 't' is p2m_map_foreign and the
> order is above 9 (i.e. 2MB).

How can those mappings be removed?  Is it possible for the guest to
modify such foreign super-pages?  Not sure all paths will be easy to
audit for preemption if it's more than relinquish_p2m_mapping() that
you need to adjust.

Regards, Roger.
Julien Grall May 9, 2024, 9:50 a.m. UTC | #8
On 09/05/2024 09:13, Roger Pau Monné wrote:
> On Wed, May 08, 2024 at 11:11:04PM +0100, Julien Grall wrote:
>> Hi,
>>
>> CC-ing Roger as he is working on adding support for the foreign mapping on
>> x86. Although, I am not expecting any implication as only 4KB mapping should
>> be supported.
> 
> I don't think we have plans on x86 to support foreign mappings with
> order != 0 ATM.
> 
> We would need a new interface to allow creating such mappings, and
> it's also not clear to me how the domain that creates such mappings
> can identify super-pages on the remote domain.  IOW: the mapping
> domain could request a super-page in the foreign domain gfn space,
> but that could end up being a range of lower order mappings.

I agree with this. But ...

> 
> Also the interactions with the remote domain would need to be audited,
> as the remote domain shattering the superpage would need to be
> replicated in the mapping side in order to account for the changes.

... I don't understand this one. How is this different from today's 
where a domain can foreign map a 2MB which may be using a superpage in 
the remote domain?

> 
>> On 08/05/2024 22:05, Julien Grall wrote:
>>> On 07/05/2024 14:30, Luca Fancellu wrote:
>>>>> On 7 May 2024, at 14:20, Julien Grall <julien@xen.org> wrote:
>>>>>
>>>>> Hi Luca,
>>>>>
>>>>> On 23/04/2024 09:25, Luca Fancellu wrote:
>>>>>> From: Penny Zheng <Penny.Zheng@arm.com>
>>>>>> But today, p2m_put_l3_page could not handle superpages.
>>>>>
>>>>> This was done on purpose. Xen is not preemptible and therefore
>>>>> we need to be cautious how much work is done within the p2m
>>>>> code.
>>>>>
>>>>> With the below proposal, for 1GB mapping, we may end up to call
>>>>> put_page() up to 512 * 512 = 262144 times. put_page() can free
>>>>> memory. This could be a very long operation.
>>>>>
>>>>> Have you benchmark how long it would take?
>>>>
>>>> I did not, since its purpose was unclear to me and was not commented
>>>> in the last serie from Penny.
>>>
>>> Honestly, I can't remember why it wasn't commented.
>>
>> I skimmed through the code to check what we currently do for preemption.
>>
>> {decrease, increase}_reservation() will allow to handle max_order() mapping
>> at the time. On a default configuration, the max would be 4MB.
>>
>> relinquish_p2m_mapping() is preempting every 512 iterations. One iteration
>> is either a 4KB/2MB/1GB mapping.
>>
>> relinquish_memory() is checking for preemption after every page.
>>
>> So I think, it would be ok to allow 2MB mapping for static shared memory but
>> not 1GB. relinquish_p2m_mapping() would also needs to be updated to take
>> into account the larger foreign mapping.
> 
> FWIW, relinquish_p2m_mapping() likely does more than what's strictly
> needed, as you could just remove foreign mappings while leaving other
> entries as-is?  The drain of the p2m pool and release of domain pages
> should take care of dropping references to the RAM domain memory?
I believe the code was written in a way we could easily introduce 
reference for all the mappings. This has been discussed a few times in 
the past but we never implemented it.

> 
>> I would consider to check for preemption if 't' is p2m_map_foreign and the
>> order is above 9 (i.e. 2MB).
> 
> How can those mappings be removed?

 From any p2m_* functions. On Arm we don't (yet) care about which 
mapping is replaced.

>  Is it possible for the guest to
> modify such foreign super-pages?

Yes.

>  Not sure all paths will be easy to
> audit for preemption if it's more than relinquish_p2m_mapping() that
> you need to adjust.

I thought about it yesterday. But I came to the conclusion that if we 
have any concern about removing 1GB foreign superpage then we would 
already have the problem today as a domain can map contiguously 1GB 
worth of foreign mapping using small pages.

I went through the various code paths and, I believe, the only concern 
would be if an admin decides to change the values from max_order() using 
the command line option "memop-max-order".

Cheers,
Roger Pau Monné May 9, 2024, 11:28 a.m. UTC | #9
On Thu, May 09, 2024 at 10:50:56AM +0100, Julien Grall wrote:
> 
> 
> On 09/05/2024 09:13, Roger Pau Monné wrote:
> > On Wed, May 08, 2024 at 11:11:04PM +0100, Julien Grall wrote:
> > Also the interactions with the remote domain would need to be audited,
> > as the remote domain shattering the superpage would need to be
> > replicated in the mapping side in order to account for the changes.
> 
> ... I don't understand this one. How is this different from today's where a
> domain can foreign map a 2MB which may be using a superpage in the remote
> domain?

Hm, right, I was wrong with that I think, as long as proper references
as taken for the superpage entries it should be fine.

> >  Not sure all paths will be easy to
> > audit for preemption if it's more than relinquish_p2m_mapping() that
> > you need to adjust.
> 
> I thought about it yesterday. But I came to the conclusion that if we have
> any concern about removing 1GB foreign superpage then we would already have
> the problem today as a domain can map contiguously 1GB worth of foreign
> mapping using small pages.

Yeah, but in that case addition or removal is done in 4K chunks, and
hence we can preempt during the operation.

OTOH for 1GB given the code here the page could be freed in one go,
without a chance of preempting the operation.

Maybe you have to shatter superpages into 4K entries and then remove
them individually, as to allow for preemption to be possible by
calling put_page() for each 4K chunk?

Thanks, Roger.
Julien Grall May 9, 2024, 12:12 p.m. UTC | #10
Hi,

On 09/05/2024 12:28, Roger Pau Monné wrote:
> On Thu, May 09, 2024 at 10:50:56AM +0100, Julien Grall wrote:
>>
>>
>> On 09/05/2024 09:13, Roger Pau Monné wrote:
>>> On Wed, May 08, 2024 at 11:11:04PM +0100, Julien Grall wrote:
>>> Also the interactions with the remote domain would need to be audited,
>>> as the remote domain shattering the superpage would need to be
>>> replicated in the mapping side in order to account for the changes.
>>
>> ... I don't understand this one. How is this different from today's where a
>> domain can foreign map a 2MB which may be using a superpage in the remote
>> domain?
> 
> Hm, right, I was wrong with that I think, as long as proper references
> as taken for the superpage entries it should be fine.
> 
>>>   Not sure all paths will be easy to
>>> audit for preemption if it's more than relinquish_p2m_mapping() that
>>> you need to adjust.
>>
>> I thought about it yesterday. But I came to the conclusion that if we have
>> any concern about removing 1GB foreign superpage then we would already have
>> the problem today as a domain can map contiguously 1GB worth of foreign
>> mapping using small pages.
> 
> Yeah, but in that case addition or removal is done in 4K chunks, and
> hence we can preempt during the operation.

I am not entirely sure how that would work. From my understand, today, 
most of the users of the P2M code expects the operation to complete in 
one go and if preemption is needed then the caller is responsible to 
handle it by breaking up the happy.

With your suggestion, it sounds like you want to rework how the 
preemption today and push it to the P2M code. This would mean we would 
need to modify all the callers to check for -EERESTART (or similar) and 
also tell them how many pages were handled so the call can be restarted 
where it stopped. Is it what you had in mind?

I don't expect the work to be trivial, so I wonder if this is really 
worth it to try to change the way we preempt.

> 
> OTOH for 1GB given the code here the page could be freed in one go,
> without a chance of preempting the operation.
> 
> Maybe you have to shatter superpages into 4K entries and then remove
> them individually, as to allow for preemption to be possible by
> calling put_page() for each 4K chunk?
This would require to allocate some pages from the P2M pool for the 
tables. As the pool may be exhausted, it could be problematic when 
relinquishing the resources.

It may be possible to find a way to have memory available by removing 
other mappings first. But it feels a bit hackish and I would rather 
prefer if we avoid allocating any memory when relinquishing.

So I think we want to allow at most 2MB superpages for foreign mapping.

Cheers,
Roger Pau Monné May 9, 2024, 12:58 p.m. UTC | #11
On Thu, May 09, 2024 at 01:12:00PM +0100, Julien Grall wrote:
> Hi,
> 
> On 09/05/2024 12:28, Roger Pau Monné wrote:
> > On Thu, May 09, 2024 at 10:50:56AM +0100, Julien Grall wrote:
> > > 
> > > 
> > > On 09/05/2024 09:13, Roger Pau Monné wrote:
> > > > On Wed, May 08, 2024 at 11:11:04PM +0100, Julien Grall wrote:
> > > > Also the interactions with the remote domain would need to be audited,
> > > > as the remote domain shattering the superpage would need to be
> > > > replicated in the mapping side in order to account for the changes.
> > > 
> > > ... I don't understand this one. How is this different from today's where a
> > > domain can foreign map a 2MB which may be using a superpage in the remote
> > > domain?
> > 
> > Hm, right, I was wrong with that I think, as long as proper references
> > as taken for the superpage entries it should be fine.
> > 
> > > >   Not sure all paths will be easy to
> > > > audit for preemption if it's more than relinquish_p2m_mapping() that
> > > > you need to adjust.
> > > 
> > > I thought about it yesterday. But I came to the conclusion that if we have
> > > any concern about removing 1GB foreign superpage then we would already have
> > > the problem today as a domain can map contiguously 1GB worth of foreign
> > > mapping using small pages.
> > 
> > Yeah, but in that case addition or removal is done in 4K chunks, and
> > hence we can preempt during the operation.
> 
> I am not entirely sure how that would work. From my understand, today, most
> of the users of the P2M code expects the operation to complete in one go and
> if preemption is needed then the caller is responsible to handle it by
> breaking up the happy.
> 
> With your suggestion, it sounds like you want to rework how the preemption
> today and push it to the P2M code. This would mean we would need to modify
> all the callers to check for -EERESTART (or similar) and also tell them how
> many pages were handled so the call can be restarted where it stopped. Is it
> what you had in mind?

TBH, I didn't have a specific location in mind about where to do the
split.

One solution that could simplify it is allowing foreign entries to
only be removed by specific functions, so that the split required in
order to do the removal can be handled by the caller knowing it's
dealing with a foreign map superpage.

But that would require the superpage to be shattered, and hence will
require creating lower levle leaf entries in order to do the
shattering and the removal in 4K chunks.

> I don't expect the work to be trivial, so I wonder if this is really worth
> it to try to change the way we preempt.
> 
> > 
> > OTOH for 1GB given the code here the page could be freed in one go,
> > without a chance of preempting the operation.
> > 
> > Maybe you have to shatter superpages into 4K entries and then remove
> > them individually, as to allow for preemption to be possible by
> > calling put_page() for each 4K chunk?
> This would require to allocate some pages from the P2M pool for the tables.
> As the pool may be exhausted, it could be problematic when relinquishing the
> resources.

Indeed, it's not ideal.

> It may be possible to find a way to have memory available by removing other
> mappings first. But it feels a bit hackish and I would rather prefer if we
> avoid allocating any memory when relinquishing.

Maybe it could be helpful to provide a function to put a superpage,
that internally calls free_domheap_pages() with the appropriate order
so that freeing a superpage only takes a single free_domheap_pages()
call.  That could reduce some of the contention around the heap_lock
and d->page_alloc_lock locks.

Note also that a foreign unmap resulting in a page free is also not
the common case, as that should only happen when the foreign domain
has been destroyed, or the page ballooned out, so to benchmark the
worst case some effort will be needed in order to model this
scenario.

Thanks, Roger.
Julien Grall May 10, 2024, 9:37 p.m. UTC | #12
Hi Roger,

On 09/05/2024 13:58, Roger Pau Monné wrote:
> On Thu, May 09, 2024 at 01:12:00PM +0100, Julien Grall wrote:
>> Hi,
>>
>> On 09/05/2024 12:28, Roger Pau Monné wrote:
>>> On Thu, May 09, 2024 at 10:50:56AM +0100, Julien Grall wrote:
>>>>
>>>>
>>>> On 09/05/2024 09:13, Roger Pau Monné wrote:
>>>>> On Wed, May 08, 2024 at 11:11:04PM +0100, Julien Grall wrote:
>>>>> Also the interactions with the remote domain would need to be audited,
>>>>> as the remote domain shattering the superpage would need to be
>>>>> replicated in the mapping side in order to account for the changes.
>>>>
>>>> ... I don't understand this one. How is this different from today's where a
>>>> domain can foreign map a 2MB which may be using a superpage in the remote
>>>> domain?
>>>
>>> Hm, right, I was wrong with that I think, as long as proper references
>>> as taken for the superpage entries it should be fine.
>>>
>>>>>    Not sure all paths will be easy to
>>>>> audit for preemption if it's more than relinquish_p2m_mapping() that
>>>>> you need to adjust.
>>>>
>>>> I thought about it yesterday. But I came to the conclusion that if we have
>>>> any concern about removing 1GB foreign superpage then we would already have
>>>> the problem today as a domain can map contiguously 1GB worth of foreign
>>>> mapping using small pages.
>>>
>>> Yeah, but in that case addition or removal is done in 4K chunks, and
>>> hence we can preempt during the operation.
>>
>> I am not entirely sure how that would work. From my understand, today, most
>> of the users of the P2M code expects the operation to complete in one go and
>> if preemption is needed then the caller is responsible to handle it by
>> breaking up the happy.
>>
>> With your suggestion, it sounds like you want to rework how the preemption
>> today and push it to the P2M code. This would mean we would need to modify
>> all the callers to check for -EERESTART (or similar) and also tell them how
>> many pages were handled so the call can be restarted where it stopped. Is it
>> what you had in mind?
> 
> TBH, I didn't have a specific location in mind about where to do the
> split.
> 
> One solution that could simplify it is allowing foreign entries to
> only be removed by specific functions, so that the split required in
> order to do the removal can be handled by the caller knowing it's
> dealing with a foreign map superpage.

That would work. It would require quite a bit of work though.

> But that would require the superpage to be shattered, and hence will
> require creating lower levle leaf entries in order to do the
> shattering and the removal in 4K chunks.
> 
>> I don't expect the work to be trivial, so I wonder if this is really worth
>> it to try to change the way we preempt.
>>
>>>
>>> OTOH for 1GB given the code here the page could be freed in one go,
>>> without a chance of preempting the operation.
>>>
>>> Maybe you have to shatter superpages into 4K entries and then remove
>>> them individually, as to allow for preemption to be possible by
>>> calling put_page() for each 4K chunk?
>> This would require to allocate some pages from the P2M pool for the tables.
>> As the pool may be exhausted, it could be problematic when relinquishing the
>> resources.
> 
> Indeed, it's not ideal.
> 
>> It may be possible to find a way to have memory available by removing other
>> mappings first. But it feels a bit hackish and I would rather prefer if we
>> avoid allocating any memory when relinquishing.
> 
> Maybe it could be helpful to provide a function to put a superpage,
> that internally calls free_domheap_pages() with the appropriate order
> so that freeing a superpage only takes a single free_domheap_pages()
> call. 

Today, free_domheap_page() is only called when the last reference is 
removed. I don't thinkt here is any guarantee that all the references 
will dropped at the same time.

 >  That could reduce some of the contention around the heap_lock
 > and d->page_alloc_lock locks.

 From previous experience (when Hongyan and I worked on optimizing 
init_heap_pages() for Live-Update), the lock is actually not the biggest 
problem. The issue is adding the pages back to the heap (which may 
requiring merging). So as long as the pages are not freed contiguously, 
we may not gain anything.

Anyway, it sound like someone needs to spend some time investigating 
this issue.

> 
> Note also that a foreign unmap resulting in a page free is also not
> the common case, as that should only happen when the foreign domain
> has been destroyed, or the page ballooned out, so to benchmark the
> worst case some effort will be needed in order to model this
> scenario.

Good callout. It may be easier to reproduce it with some XTF tests. 
Unfortunately, I don't have the bandwith to look at it. Maybe Luca can?

Otherwise, we may have to accept not supporting 1GB superpage for the 
time being for shared memory. But I am not actually sure this is a big 
problem?

Cheers,
Roger Pau Monné May 13, 2024, 8:04 a.m. UTC | #13
On Fri, May 10, 2024 at 10:37:53PM +0100, Julien Grall wrote:
> Hi Roger,
> 
> On 09/05/2024 13:58, Roger Pau Monné wrote:
> > On Thu, May 09, 2024 at 01:12:00PM +0100, Julien Grall wrote:
> > > Hi,
> > > 
> > > On 09/05/2024 12:28, Roger Pau Monné wrote:
> > > > OTOH for 1GB given the code here the page could be freed in one go,
> > > > without a chance of preempting the operation.
> > > > 
> > > > Maybe you have to shatter superpages into 4K entries and then remove
> > > > them individually, as to allow for preemption to be possible by
> > > > calling put_page() for each 4K chunk?
> > > This would require to allocate some pages from the P2M pool for the tables.
> > > As the pool may be exhausted, it could be problematic when relinquishing the
> > > resources.
> > 
> > Indeed, it's not ideal.
> > 
> > > It may be possible to find a way to have memory available by removing other
> > > mappings first. But it feels a bit hackish and I would rather prefer if we
> > > avoid allocating any memory when relinquishing.
> > 
> > Maybe it could be helpful to provide a function to put a superpage,
> > that internally calls free_domheap_pages() with the appropriate order
> > so that freeing a superpage only takes a single free_domheap_pages()
> > call.
> 
> Today, free_domheap_page() is only called when the last reference is
> removed. I don't thinkt here is any guarantee that all the references will
> dropped at the same time.

I see, yes, we have no guarantee that removing the superpage entry in
the mapping domain will lead to either the whole superpage freed at
once, or not freed.  The source domain may have shattered the
super-page and hence freeing might need to be done at a smaller
granularity.

> >  That could reduce some of the contention around the heap_lock
> > and d->page_alloc_lock locks.
> 
> From previous experience (when Hongyan and I worked on optimizing
> init_heap_pages() for Live-Update), the lock is actually not the biggest
> problem. The issue is adding the pages back to the heap (which may requiring
> merging). So as long as the pages are not freed contiguously, we may not
> gain anything.

Would it help to defer the merging to the idle context, kind of
similar to what we do with scrubbing?

Thanks, Roger.
Luca Fancellu May 14, 2024, 7:55 a.m. UTC | #14
Hi Julien,

> 
>> Note also that a foreign unmap resulting in a page free is also not
>> the common case, as that should only happen when the foreign domain
>> has been destroyed, or the page ballooned out, so to benchmark the
>> worst case some effort will be needed in order to model this
>> scenario.
> 
> Good callout. It may be easier to reproduce it with some XTF tests. Unfortunately, I don't have the bandwith to look at it. Maybe Luca can?

Unfortunately I don’t have bandwidth for that,

> 
> Otherwise, we may have to accept not supporting 1GB superpage for the time being for shared memory.

I will try to do it. 

Cheers,
Luca
diff mbox series

Patch

diff --git a/xen/arch/arm/mmu/p2m.c b/xen/arch/arm/mmu/p2m.c
index 41fcca011cf4..479a80fbd4cf 100644
--- a/xen/arch/arm/mmu/p2m.c
+++ b/xen/arch/arm/mmu/p2m.c
@@ -753,17 +753,9 @@  static int p2m_mem_access_radix_set(struct p2m_domain *p2m, gfn_t gfn,
     return rc;
 }
 
-/*
- * Put any references on the single 4K page referenced by pte.
- * TODO: Handle superpages, for now we only take special references for leaf
- * pages (specifically foreign ones, which can't be super mapped today).
- */
-static void p2m_put_l3_page(const lpae_t pte)
+/* Put any references on the single 4K page referenced by mfn. */
+static void p2m_put_l3_page(mfn_t mfn, unsigned type)
 {
-    mfn_t mfn = lpae_get_mfn(pte);
-
-    ASSERT(p2m_is_valid(pte));
-
     /*
      * TODO: Handle other p2m types
      *
@@ -771,16 +763,53 @@  static void p2m_put_l3_page(const lpae_t pte)
      * flush the TLBs if the page is reallocated before the end of
      * this loop.
      */
-    if ( p2m_is_foreign(pte.p2m.type) )
+    if ( p2m_is_foreign(type) )
     {
         ASSERT(mfn_valid(mfn));
         put_page(mfn_to_page(mfn));
     }
     /* Detect the xenheap page and mark the stored GFN as invalid. */
-    else if ( p2m_is_ram(pte.p2m.type) && is_xen_heap_mfn(mfn) )
+    else if ( p2m_is_ram(type) && is_xen_heap_mfn(mfn) )
         page_set_xenheap_gfn(mfn_to_page(mfn), INVALID_GFN);
 }
 
+/* Put any references on the superpage referenced by mfn. */
+static void p2m_put_superpage(mfn_t mfn, unsigned int next_level, unsigned type)
+{
+    unsigned int i;
+    unsigned int level_order = XEN_PT_LEVEL_ORDER(next_level);
+
+    for ( i = 0; i < XEN_PT_LPAE_ENTRIES; i++ )
+    {
+        if ( next_level == 3 )
+            p2m_put_l3_page(mfn, type);
+        else
+            p2m_put_superpage(mfn, next_level + 1, type);
+
+        mfn = mfn_add(mfn, 1 << level_order);
+    }
+}
+
+/* Put any references on the page referenced by pte. */
+static void p2m_put_page(const lpae_t pte, unsigned int level)
+{
+    mfn_t mfn = lpae_get_mfn(pte);
+
+    ASSERT(p2m_is_valid(pte));
+
+    /*
+     * We are either having a first level 1G superpage or a
+     * second level 2M superpage.
+     */
+    if ( p2m_is_superpage(pte, level) )
+        return p2m_put_superpage(mfn, level + 1, pte.p2m.type);
+    else
+    {
+        ASSERT(level == 3);
+        return p2m_put_l3_page(mfn, pte.p2m.type);
+    }
+}
+
 /* Free lpae sub-tree behind an entry */
 static void p2m_free_entry(struct p2m_domain *p2m,
                            lpae_t entry, unsigned int level)
@@ -809,9 +838,8 @@  static void p2m_free_entry(struct p2m_domain *p2m,
 #endif
 
         p2m->stats.mappings[level]--;
-        /* Nothing to do if the entry is a super-page. */
-        if ( level == 3 )
-            p2m_put_l3_page(entry);
+        p2m_put_page(entry, level);
+
         return;
     }