diff mbox series

[RFC,71/84] x86/setup: start tearing down the direct map.

Message ID ccabf9b1ce3142ca65e453ec9a5ae1d34d28a992.1569489002.git.hongyax@amazon.com (mailing list archive)
State New, archived
Headers show
Series Remove direct map from Xen | expand

Commit Message

Xia, Hongyan Sept. 26, 2019, 9:46 a.m. UTC
From: Hongyan Xia <hongyax@amazon.com>

Signed-off-by: Hongyan Xia <hongyax@amazon.com>
---
 xen/arch/x86/setup.c    | 4 ++--
 xen/common/page_alloc.c | 2 +-
 2 files changed, 3 insertions(+), 3 deletions(-)

Comments

Julien Grall Sept. 26, 2019, 10:50 a.m. UTC | #1
Hi,

Title: Please remove the full stop.

On 9/26/19 10:46 AM, hongyax@amazon.com wrote:
> From: Hongyan Xia <hongyax@amazon.com>

Please describe what the patch does and why. For instance, why you do 
you replace maddr_to_mfn(map_e) with INVALID_MFN? Why not just removing 
the complete call?

> 
> Signed-off-by: Hongyan Xia <hongyax@amazon.com>
> ---
>   xen/arch/x86/setup.c    | 4 ++--
>   xen/common/page_alloc.c | 2 +-
>   2 files changed, 3 insertions(+), 3 deletions(-)
> 
> diff --git a/xen/arch/x86/setup.c b/xen/arch/x86/setup.c
> index e964c032f6..3dc2fad987 100644
> --- a/xen/arch/x86/setup.c
> +++ b/xen/arch/x86/setup.c
> @@ -1367,7 +1367,7 @@ void __init noreturn __start_xen(unsigned long mbi_p)
>   
>               if ( map_e < end )
>               {
> -                map_pages_to_xen((unsigned long)__va(map_e), maddr_to_mfn(map_e),
> +                map_pages_to_xen((unsigned long)__va(map_e), INVALID_MFN,
>                                    PFN_DOWN(end - map_e), PAGE_HYPERVISOR);
>                   init_boot_pages(map_e, end);
>                   map_e = end;
> @@ -1382,7 +1382,7 @@ void __init noreturn __start_xen(unsigned long mbi_p)
>           }
>           if ( s < map_s )
>           {
> -            map_pages_to_xen((unsigned long)__va(s), maddr_to_mfn(s),
> +            map_pages_to_xen((unsigned long)__va(s), INVALID_MFN,
>                                PFN_DOWN(map_s - s), PAGE_HYPERVISOR);
>               init_boot_pages(s, map_s);
>           }
> diff --git a/xen/common/page_alloc.c b/xen/common/page_alloc.c
> index a00db4c0d9..deeeac065c 100644
> --- a/xen/common/page_alloc.c
> +++ b/xen/common/page_alloc.c
> @@ -2157,7 +2157,7 @@ void *alloc_xenheap_pages(unsigned int order, unsigned int memflags)
>       map_pages_to_xen((unsigned long)ret, page_to_mfn(pg),
>                        1UL << order, PAGE_HYPERVISOR);
>   
> -    return page_to_virt(pg);
> +    return ret;

This change looks spurious. Did you intend to do it in a previous patch?

Cheers,
Wei Liu Sept. 26, 2019, 2:26 p.m. UTC | #2
On Thu, Sep 26, 2019 at 10:46:34AM +0100, hongyax@amazon.com wrote:
> From: Hongyan Xia <hongyax@amazon.com>
> 
> Signed-off-by: Hongyan Xia <hongyax@amazon.com>
> ---
>  xen/arch/x86/setup.c    | 4 ++--
>  xen/common/page_alloc.c | 2 +-
>  2 files changed, 3 insertions(+), 3 deletions(-)
> 
> diff --git a/xen/arch/x86/setup.c b/xen/arch/x86/setup.c
> index e964c032f6..3dc2fad987 100644
> --- a/xen/arch/x86/setup.c
> +++ b/xen/arch/x86/setup.c
> @@ -1367,7 +1367,7 @@ void __init noreturn __start_xen(unsigned long mbi_p)
>  
>              if ( map_e < end )
>              {
> -                map_pages_to_xen((unsigned long)__va(map_e), maddr_to_mfn(map_e),
> +                map_pages_to_xen((unsigned long)__va(map_e), INVALID_MFN,
>                                   PFN_DOWN(end - map_e), PAGE_HYPERVISOR);

Why don't you just remove the calls to map_pages_to_xen?

>                  init_boot_pages(map_e, end);
>                  map_e = end;
> @@ -1382,7 +1382,7 @@ void __init noreturn __start_xen(unsigned long mbi_p)
>          }
>          if ( s < map_s )
>          {
> -            map_pages_to_xen((unsigned long)__va(s), maddr_to_mfn(s),
> +            map_pages_to_xen((unsigned long)__va(s), INVALID_MFN,
>                               PFN_DOWN(map_s - s), PAGE_HYPERVISOR);
>              init_boot_pages(s, map_s);
>          }
> diff --git a/xen/common/page_alloc.c b/xen/common/page_alloc.c
> index a00db4c0d9..deeeac065c 100644
> --- a/xen/common/page_alloc.c
> +++ b/xen/common/page_alloc.c
> @@ -2157,7 +2157,7 @@ void *alloc_xenheap_pages(unsigned int order, unsigned int memflags)
>      map_pages_to_xen((unsigned long)ret, page_to_mfn(pg),
>                       1UL << order, PAGE_HYPERVISOR);
>  
> -    return page_to_virt(pg);
> +    return ret;

This hunk is a fix to a previous patch. It doesn't below here.

Wei.

>  }
>  
>  
> -- 
> 2.17.1
>
Xia, Hongyan Sept. 27, 2019, 12:54 p.m. UTC | #3
On 26/09/2019 15:26, Wei Liu wrote:
> On Thu, Sep 26, 2019 at 10:46:34AM +0100, hongyax@amazon.com wrote:
>> From: Hongyan Xia <hongyax@amazon.com>
>>
>> Signed-off-by: Hongyan Xia <hongyax@amazon.com>
>> ---
>>   xen/arch/x86/setup.c    | 4 ++--
>>   xen/common/page_alloc.c | 2 +-
>>   2 files changed, 3 insertions(+), 3 deletions(-)
>>
>> diff --git a/xen/arch/x86/setup.c b/xen/arch/x86/setup.c
>> index e964c032f6..3dc2fad987 100644
>> --- a/xen/arch/x86/setup.c
>> +++ b/xen/arch/x86/setup.c
>> @@ -1367,7 +1367,7 @@ void __init noreturn __start_xen(unsigned long mbi_p)
>>   
>>               if ( map_e < end )
>>               {
>> -                map_pages_to_xen((unsigned long)__va(map_e), maddr_to_mfn(map_e),
>> +                map_pages_to_xen((unsigned long)__va(map_e), INVALID_MFN,
>>                                    PFN_DOWN(end - map_e), PAGE_HYPERVISOR);
> 
> Why don't you just remove the calls to map_pages_to_xen?
> 

My intention is to pre-populate the range so that we don't have to do so later 
when there are xenheap allocations. But of course if there is superpage merging 
or shattering, page tables will be removed or allocated anyway. I will remove 
the calls in the next revision.

>>                   init_boot_pages(map_e, end);
>>                   map_e = end;
>> @@ -1382,7 +1382,7 @@ void __init noreturn __start_xen(unsigned long mbi_p)
>>           }
>>           if ( s < map_s )
>>           {
>> -            map_pages_to_xen((unsigned long)__va(s), maddr_to_mfn(s),
>> +            map_pages_to_xen((unsigned long)__va(s), INVALID_MFN,
>>                                PFN_DOWN(map_s - s), PAGE_HYPERVISOR);
>>               init_boot_pages(s, map_s);
>>           }
>> diff --git a/xen/common/page_alloc.c b/xen/common/page_alloc.c
>> index a00db4c0d9..deeeac065c 100644
>> --- a/xen/common/page_alloc.c
>> +++ b/xen/common/page_alloc.c
>> @@ -2157,7 +2157,7 @@ void *alloc_xenheap_pages(unsigned int order, unsigned int memflags)
>>       map_pages_to_xen((unsigned long)ret, page_to_mfn(pg),
>>                        1UL << order, PAGE_HYPERVISOR);
>>   
>> -    return page_to_virt(pg);
>> +    return ret;
> 
> This hunk is a fix to a previous patch. It doesn't below here.
> 
Noted.

Hongyan
Jan Beulich Sept. 27, 2019, 1 p.m. UTC | #4
On 27.09.2019 14:54, hongyax@amazon.com wrote:
> On 26/09/2019 15:26, Wei Liu wrote:
>> On Thu, Sep 26, 2019 at 10:46:34AM +0100, hongyax@amazon.com wrote:
>>> --- a/xen/arch/x86/setup.c
>>> +++ b/xen/arch/x86/setup.c
>>> @@ -1367,7 +1367,7 @@ void __init noreturn __start_xen(unsigned long mbi_p)
>>>   
>>>               if ( map_e < end )
>>>               {
>>> -                map_pages_to_xen((unsigned long)__va(map_e), maddr_to_mfn(map_e),
>>> +                map_pages_to_xen((unsigned long)__va(map_e), INVALID_MFN,
>>>                                    PFN_DOWN(end - map_e), PAGE_HYPERVISOR);
>>
>> Why don't you just remove the calls to map_pages_to_xen?
>>
> 
> My intention is to pre-populate the range so that we don't have to do so later 
> when there are xenheap allocations. But of course if there is superpage merging 
> or shattering, page tables will be removed or allocated anyway. I will remove 
> the calls in the next revision.

Pre-populate? There's some conceptional question then: When the
direct map is gone, are you mapping Xen heap pages into the place
they'd have lived at in the direct map? I'm not convinced that's
what we want. In fact I'm not convinced we'd want to retain the
distinction between Xen heap and domain heap then any further -
there's no reason anymore at that point (afaict).

Jan
Julien Grall Sept. 27, 2019, 1:01 p.m. UTC | #5
Hi,

On 27/09/2019 13:54, hongyax@amazon.com wrote:
> On 26/09/2019 15:26, Wei Liu wrote:
>> On Thu, Sep 26, 2019 at 10:46:34AM +0100, hongyax@amazon.com wrote:
>>> From: Hongyan Xia <hongyax@amazon.com>
>>>
>>> Signed-off-by: Hongyan Xia <hongyax@amazon.com>
>>> ---
>>>   xen/arch/x86/setup.c    | 4 ++--
>>>   xen/common/page_alloc.c | 2 +-
>>>   2 files changed, 3 insertions(+), 3 deletions(-)
>>>
>>> diff --git a/xen/arch/x86/setup.c b/xen/arch/x86/setup.c
>>> index e964c032f6..3dc2fad987 100644
>>> --- a/xen/arch/x86/setup.c
>>> +++ b/xen/arch/x86/setup.c
>>> @@ -1367,7 +1367,7 @@ void __init noreturn __start_xen(unsigned long mbi_p)
>>>               if ( map_e < end )
>>>               {
>>> -                map_pages_to_xen((unsigned long)__va(map_e), 
>>> maddr_to_mfn(map_e),
>>> +                map_pages_to_xen((unsigned long)__va(map_e), INVALID_MFN,
>>>                                    PFN_DOWN(end - map_e), PAGE_HYPERVISOR);
>>
>> Why don't you just remove the calls to map_pages_to_xen?
>>
> 
> My intention is to pre-populate the range so that we don't have to do so later 
> when there are xenheap allocations. But of course if there is superpage merging 
> or shattering, page tables will be removed or allocated anyway. I will remove 
> the calls in the next revision.

How about using populate_pt_range() in that case? This will pre-populate the 
page-tables for mapping with small pages.

I haven't fully read the series yet. But I would assume that only memory 
allocated for Xen internal would be kept mapped. Guest memory would still be 
unmapped, right?

If so, I don't think we often do big allocation for Xen. So it is probably more 
likely to use small pages. In that case, it would be fine to pre-allocate pages.

In another hand, Xen doesn't use a lot of memory (if you compare to guest 
memory). So maybe pre-populating the page-tables would be a waste of memory.

Cheers,
Xia, Hongyan Sept. 27, 2019, 2:02 p.m. UTC | #6
On 27/09/2019 14:00, Jan Beulich wrote:
> On 27.09.2019 14:54, hongyax@amazon.com wrote:
> 
> Pre-populate? There's some conceptional question then: When the
> direct map is gone, are you mapping Xen heap pages into the place
> they'd have lived at in the direct map? I'm not convinced that's
> what we want. In fact I'm not convinced we'd want to retain the
> distinction between Xen heap and domain heap then any further -
> there's no reason anymore at that point (afaict).

Yes. My patches map xenheap pages to where they would have lived on the direct 
map region, and unmap when xenheap pages are freed. The original proposal was 
to use vmap() which we find difficult to implement.

- vmap takes an array of mfns. Mapping a large region require 
allocating/freeing memory for a large array of mfns, unless we change or add 
another vmap variant.
- va<->pa conversion. Mapping xenheap to direct map region makes all the 
xenheap conversion macros still work. The vmap proposal needs to add another 
field in page_info (breaking the power of 2) or to have a separate structure 
somewhere else for va/pa conversion.

Of course, we could change all the code for xenheap to use the same domheap 
mapping structure, which is probably another large patch series.

Hongyan
Jan Beulich Sept. 27, 2019, 2:14 p.m. UTC | #7
On 27.09.2019 16:02, hongyax@amazon.com wrote:
> On 27/09/2019 14:00, Jan Beulich wrote:
>> On 27.09.2019 14:54, hongyax@amazon.com wrote:
>>
>> Pre-populate? There's some conceptional question then: When the
>> direct map is gone, are you mapping Xen heap pages into the place
>> they'd have lived at in the direct map? I'm not convinced that's
>> what we want. In fact I'm not convinced we'd want to retain the
>> distinction between Xen heap and domain heap then any further -
>> there's no reason anymore at that point (afaict).
> 
> Yes. My patches map xenheap pages to where they would have lived on the direct 
> map region, and unmap when xenheap pages are freed. The original proposal was 
> to use vmap() which we find difficult to implement.
> 
> - vmap takes an array of mfns. Mapping a large region require 
> allocating/freeing memory for a large array of mfns, unless we change or add 
> another vmap variant.
> - va<->pa conversion. Mapping xenheap to direct map region makes all the 
> xenheap conversion macros still work. The vmap proposal needs to add another 
> field in page_info (breaking the power of 2) or to have a separate structure 
> somewhere else for va/pa conversion.

But then why do the initial so many patches (inherited from Wei)
convert from domheap to xenheap allocations at all? If your
approach is to be at least an intermediate goal, then I think the
order of changes should be such that on-demand mapping of xenheap
pages occurs first, and then the xenheap -> domheap conversion
can happen in basically arbitrarily small steps.

Jan
Xia, Hongyan Sept. 27, 2019, 2:49 p.m. UTC | #8
On 27/09/2019 15:14, Jan Beulich wrote:
> On 27.09.2019 16:02, hongyax@amazon.com wrote:
> 
> But then why do the initial so many patches (inherited from Wei)
> convert from domheap to xenheap allocations at all? If your
> approach is to be at least an intermediate goal, then I think the
> order of changes should be such that on-demand mapping of xenheap
> pages occurs first, and then the xenheap -> domheap conversion
> can happen in basically arbitrarily small steps.

There is this problem that anything that maps/unmaps memory in the direct map 
region cannot itself rely on an always-mapped direct map. Unfortunately, if I 
map/unmap xenheap allocations, page table manipulation functions (like 
map_pages_to_xen, alloc_xen_pagetable) themselves rely on an always-mapped 
direct map, which often break if you leave holes in the direct map region. 
Wei's patches with some of my later patches break exactly this dependency, so 
page table manipulations themselves no longer rely on the direct map. Now we 
can actually start tearing down the direct map, including on-demand mapping of 
xenheap in the direct map region.

Hongyan
Xia, Hongyan Sept. 27, 2019, 3:01 p.m. UTC | #9
On 27/09/2019 15:14, Jan Beulich wrote:
> On 27.09.2019 16:02, hongyax@amazon.com wrote:
> 
> But then why do the initial so many patches (inherited from Wei)
> convert from domheap to xenheap allocations at all? If your
> approach is to be at least an intermediate goal, then I think the
> order of changes should be such that on-demand mapping of xenheap
> pages occurs first, and then the xenheap -> domheap conversion
> can happen in basically arbitrarily small steps.
> 

Also I have tested Wei's patches with fixes. It is pretty stable against my 
setup because the direct map has not been actually removed. I am able to run 
XTF tests, boot dom0, launch, restart and destroy guests without breakage. From 
a stability point of view, it probably makes more sense for Wei's patches to go 
in first. From the reviews, it looks like my patches to actually remove the 
direct map can benefit from more RFCs, and can be separated from Wei's into a 
second batch.

Hongyan
diff mbox series

Patch

diff --git a/xen/arch/x86/setup.c b/xen/arch/x86/setup.c
index e964c032f6..3dc2fad987 100644
--- a/xen/arch/x86/setup.c
+++ b/xen/arch/x86/setup.c
@@ -1367,7 +1367,7 @@  void __init noreturn __start_xen(unsigned long mbi_p)
 
             if ( map_e < end )
             {
-                map_pages_to_xen((unsigned long)__va(map_e), maddr_to_mfn(map_e),
+                map_pages_to_xen((unsigned long)__va(map_e), INVALID_MFN,
                                  PFN_DOWN(end - map_e), PAGE_HYPERVISOR);
                 init_boot_pages(map_e, end);
                 map_e = end;
@@ -1382,7 +1382,7 @@  void __init noreturn __start_xen(unsigned long mbi_p)
         }
         if ( s < map_s )
         {
-            map_pages_to_xen((unsigned long)__va(s), maddr_to_mfn(s),
+            map_pages_to_xen((unsigned long)__va(s), INVALID_MFN,
                              PFN_DOWN(map_s - s), PAGE_HYPERVISOR);
             init_boot_pages(s, map_s);
         }
diff --git a/xen/common/page_alloc.c b/xen/common/page_alloc.c
index a00db4c0d9..deeeac065c 100644
--- a/xen/common/page_alloc.c
+++ b/xen/common/page_alloc.c
@@ -2157,7 +2157,7 @@  void *alloc_xenheap_pages(unsigned int order, unsigned int memflags)
     map_pages_to_xen((unsigned long)ret, page_to_mfn(pg),
                      1UL << order, PAGE_HYPERVISOR);
 
-    return page_to_virt(pg);
+    return ret;
 }