diff mbox series

[V3,(resend),03/19] x86/pv: Rewrite how building PV dom0 handles domheap mappings

Message ID 20240513134046.82605-4-eliasely@amazon.com (mailing list archive)
State New
Headers show
Series Remove the directmap | expand

Commit Message

Elias El Yandouzi May 13, 2024, 1:40 p.m. UTC
From: Hongyan Xia <hongyxia@amazon.com>

Building a PV dom0 is allocating from the domheap but uses it like the
xenheap. Use the pages as they should be.

Signed-off-by: Hongyan Xia <hongyxia@amazon.com>
Signed-off-by: Julien Grall <jgrall@amazon.com>
Signed-off-by: Elias El Yandouzi <eliasely@amazon.com>

----
    Changes in V3:
        * Fold following patch 'x86/pv: Map L4 page table for shim domain'

    Changes in V2:
        * Clarify the commit message
        * Break the patch in two parts

    Changes since Hongyan's version:
        * Rebase
        * Remove spurious newline

Comments

Roger Pau Monné May 13, 2024, 4:49 p.m. UTC | #1
On Mon, May 13, 2024 at 01:40:30PM +0000, Elias El Yandouzi wrote:
> From: Hongyan Xia <hongyxia@amazon.com>
> 
> Building a PV dom0 is allocating from the domheap but uses it like the
> xenheap. Use the pages as they should be.
> 
> Signed-off-by: Hongyan Xia <hongyxia@amazon.com>
> Signed-off-by: Julien Grall <jgrall@amazon.com>
> Signed-off-by: Elias El Yandouzi <eliasely@amazon.com>
> 
> ----
>     Changes in V3:
>         * Fold following patch 'x86/pv: Map L4 page table for shim domain'
> 
>     Changes in V2:
>         * Clarify the commit message
>         * Break the patch in two parts
> 
>     Changes since Hongyan's version:
>         * Rebase
>         * Remove spurious newline
> 
> diff --git a/xen/arch/x86/pv/dom0_build.c b/xen/arch/x86/pv/dom0_build.c
> index 807296c280..ac910b438a 100644
> --- a/xen/arch/x86/pv/dom0_build.c
> +++ b/xen/arch/x86/pv/dom0_build.c
> @@ -382,6 +382,10 @@ int __init dom0_construct_pv(struct domain *d,
>      l3_pgentry_t *l3tab = NULL, *l3start = NULL;
>      l2_pgentry_t *l2tab = NULL, *l2start = NULL;
>      l1_pgentry_t *l1tab = NULL, *l1start = NULL;
> +    mfn_t l4start_mfn = INVALID_MFN;
> +    mfn_t l3start_mfn = INVALID_MFN;
> +    mfn_t l2start_mfn = INVALID_MFN;
> +    mfn_t l1start_mfn = INVALID_MFN;
>  
>      /*
>       * This fully describes the memory layout of the initial domain. All
> @@ -710,22 +714,32 @@ int __init dom0_construct_pv(struct domain *d,
>          v->arch.pv.event_callback_cs    = FLAT_COMPAT_KERNEL_CS;
>      }
>  
> +#define UNMAP_MAP_AND_ADVANCE(mfn_var, virt_var, maddr) \
> +do {                                                    \
> +    unmap_domain_page(virt_var);                        \
> +    mfn_var = maddr_to_mfn(maddr);                      \
> +    maddr += PAGE_SIZE;                                 \
> +    virt_var = map_domain_page(mfn_var);                \

FWIW, I would do the advance after the map, so that the order matches
the name of the function.

> +} while ( false )
> +
>      if ( !compat )
>      {
>          maddr_to_page(mpt_alloc)->u.inuse.type_info = PGT_l4_page_table;
> -        l4start = l4tab = __va(mpt_alloc); mpt_alloc += PAGE_SIZE;
> +        UNMAP_MAP_AND_ADVANCE(l4start_mfn, l4start, mpt_alloc);
> +        l4tab = l4start;

You could even make the macro return virt_var, and so use it like:

l4tab = l4start = UNMAP_MAP_AND_ADVANCE(l4start_mfn, mpt_alloc);

?

Anyway, no strong opinion.

Thanks, Roger.
Jan Beulich May 14, 2024, 2:58 p.m. UTC | #2
On 13.05.2024 18:49, Roger Pau Monné wrote:
> On Mon, May 13, 2024 at 01:40:30PM +0000, Elias El Yandouzi wrote:
>> @@ -710,22 +714,32 @@ int __init dom0_construct_pv(struct domain *d,
>>          v->arch.pv.event_callback_cs    = FLAT_COMPAT_KERNEL_CS;
>>      }
>>  
>> +#define UNMAP_MAP_AND_ADVANCE(mfn_var, virt_var, maddr) \
>> +do {                                                    \
>> +    unmap_domain_page(virt_var);                        \
>> +    mfn_var = maddr_to_mfn(maddr);                      \
>> +    maddr += PAGE_SIZE;                                 \
>> +    virt_var = map_domain_page(mfn_var);                \
> 
> FWIW, I would do the advance after the map, so that the order matches
> the name of the function.

Actually I was thinking kind of the same when looking at v3, even if I
may not have commented to that effect. Then again that goes somewhat
against the further suggestion below.

>> +} while ( false )
>> +
>>      if ( !compat )
>>      {
>>          maddr_to_page(mpt_alloc)->u.inuse.type_info = PGT_l4_page_table;
>> -        l4start = l4tab = __va(mpt_alloc); mpt_alloc += PAGE_SIZE;
>> +        UNMAP_MAP_AND_ADVANCE(l4start_mfn, l4start, mpt_alloc);
>> +        l4tab = l4start;
> 
> You could even make the macro return virt_var, and so use it like:
> 
> l4tab = l4start = UNMAP_MAP_AND_ADVANCE(l4start_mfn, mpt_alloc);
> 
> ?

Not quite, l4start also need to be an input to the macro:

    l4tab = l4start = UNMAP_MAP_AND_ADVANCE(l4start_mfn, l4start, mpt_alloc);

Else unmap_domain_page() has nothing to act upon. If anything that would
then (imo) likely better be

    l4tab = UNMAP_MAP_AND_ADVANCE(l4start_mfn, l4start, mpt_alloc);

with l4start still updated inside the macro.

Jan
Jan Beulich May 14, 2024, 3:03 p.m. UTC | #3
On 13.05.2024 15:40, Elias El Yandouzi wrote:
> --- a/xen/arch/x86/pv/dom0_build.c
> +++ b/xen/arch/x86/pv/dom0_build.c
> @@ -382,6 +382,10 @@ int __init dom0_construct_pv(struct domain *d,
>      l3_pgentry_t *l3tab = NULL, *l3start = NULL;
>      l2_pgentry_t *l2tab = NULL, *l2start = NULL;
>      l1_pgentry_t *l1tab = NULL, *l1start = NULL;
> +    mfn_t l4start_mfn = INVALID_MFN;
> +    mfn_t l3start_mfn = INVALID_MFN;
> +    mfn_t l2start_mfn = INVALID_MFN;
> +    mfn_t l1start_mfn = INVALID_MFN;

Just to mention it here again: By limiting the scope of these I'm pretty
sure no initializer would be needed even "just in case" (really I don't
think they're needed even when the all have function scope, as producer
and consumer are always close together afaics; quite different from
l<N>start and l<N>tab).

Jan
diff mbox series

Patch

diff --git a/xen/arch/x86/pv/dom0_build.c b/xen/arch/x86/pv/dom0_build.c
index 807296c280..ac910b438a 100644
--- a/xen/arch/x86/pv/dom0_build.c
+++ b/xen/arch/x86/pv/dom0_build.c
@@ -382,6 +382,10 @@  int __init dom0_construct_pv(struct domain *d,
     l3_pgentry_t *l3tab = NULL, *l3start = NULL;
     l2_pgentry_t *l2tab = NULL, *l2start = NULL;
     l1_pgentry_t *l1tab = NULL, *l1start = NULL;
+    mfn_t l4start_mfn = INVALID_MFN;
+    mfn_t l3start_mfn = INVALID_MFN;
+    mfn_t l2start_mfn = INVALID_MFN;
+    mfn_t l1start_mfn = INVALID_MFN;
 
     /*
      * This fully describes the memory layout of the initial domain. All
@@ -710,22 +714,32 @@  int __init dom0_construct_pv(struct domain *d,
         v->arch.pv.event_callback_cs    = FLAT_COMPAT_KERNEL_CS;
     }
 
+#define UNMAP_MAP_AND_ADVANCE(mfn_var, virt_var, maddr) \
+do {                                                    \
+    unmap_domain_page(virt_var);                        \
+    mfn_var = maddr_to_mfn(maddr);                      \
+    maddr += PAGE_SIZE;                                 \
+    virt_var = map_domain_page(mfn_var);                \
+} while ( false )
+
     if ( !compat )
     {
         maddr_to_page(mpt_alloc)->u.inuse.type_info = PGT_l4_page_table;
-        l4start = l4tab = __va(mpt_alloc); mpt_alloc += PAGE_SIZE;
+        UNMAP_MAP_AND_ADVANCE(l4start_mfn, l4start, mpt_alloc);
+        l4tab = l4start;
         clear_page(l4tab);
-        init_xen_l4_slots(l4tab, _mfn(virt_to_mfn(l4start)),
-                          d, INVALID_MFN, true);
-        v->arch.guest_table = pagetable_from_paddr(__pa(l4start));
+        init_xen_l4_slots(l4tab, l4start_mfn, d, INVALID_MFN, true);
+        v->arch.guest_table = pagetable_from_mfn(l4start_mfn);
     }
     else
     {
         /* Monitor table already created by switch_compat(). */
-        l4start = l4tab = __va(pagetable_get_paddr(v->arch.guest_table));
+        l4start_mfn = pagetable_get_mfn(v->arch.guest_table);
+        l4start = l4tab = map_domain_page(l4start_mfn);
         /* See public/xen.h on why the following is needed. */
         maddr_to_page(mpt_alloc)->u.inuse.type_info = PGT_l3_page_table;
         l3start = __va(mpt_alloc); mpt_alloc += PAGE_SIZE;
+        UNMAP_MAP_AND_ADVANCE(l3start_mfn, l3start, mpt_alloc);
     }
 
     l4tab += l4_table_offset(v_start);
@@ -735,14 +749,16 @@  int __init dom0_construct_pv(struct domain *d,
         if ( !((unsigned long)l1tab & (PAGE_SIZE-1)) )
         {
             maddr_to_page(mpt_alloc)->u.inuse.type_info = PGT_l1_page_table;
-            l1start = l1tab = __va(mpt_alloc); mpt_alloc += PAGE_SIZE;
+            UNMAP_MAP_AND_ADVANCE(l1start_mfn, l1start, mpt_alloc);
+            l1tab = l1start;
             clear_page(l1tab);
             if ( count == 0 )
                 l1tab += l1_table_offset(v_start);
             if ( !((unsigned long)l2tab & (PAGE_SIZE-1)) )
             {
                 maddr_to_page(mpt_alloc)->u.inuse.type_info = PGT_l2_page_table;
-                l2start = l2tab = __va(mpt_alloc); mpt_alloc += PAGE_SIZE;
+                UNMAP_MAP_AND_ADVANCE(l2start_mfn, l2start, mpt_alloc);
+                l2tab = l2start;
                 clear_page(l2tab);
                 if ( count == 0 )
                     l2tab += l2_table_offset(v_start);
@@ -752,19 +768,19 @@  int __init dom0_construct_pv(struct domain *d,
                     {
                         maddr_to_page(mpt_alloc)->u.inuse.type_info =
                             PGT_l3_page_table;
-                        l3start = __va(mpt_alloc); mpt_alloc += PAGE_SIZE;
+                        UNMAP_MAP_AND_ADVANCE(l3start_mfn, l3start, mpt_alloc);
                     }
                     l3tab = l3start;
                     clear_page(l3tab);
                     if ( count == 0 )
                         l3tab += l3_table_offset(v_start);
-                    *l4tab = l4e_from_paddr(__pa(l3start), L4_PROT);
+                    *l4tab = l4e_from_mfn(l3start_mfn, L4_PROT);
                     l4tab++;
                 }
-                *l3tab = l3e_from_paddr(__pa(l2start), L3_PROT);
+                *l3tab = l3e_from_mfn(l2start_mfn, L3_PROT);
                 l3tab++;
             }
-            *l2tab = l2e_from_paddr(__pa(l1start), L2_PROT);
+            *l2tab = l2e_from_mfn(l1start_mfn, L2_PROT);
             l2tab++;
         }
         if ( count < initrd_pfn || count >= initrd_pfn + PFN_UP(initrd_len) )
@@ -783,27 +799,31 @@  int __init dom0_construct_pv(struct domain *d,
 
     if ( compat )
     {
-        l2_pgentry_t *l2t;
-
         /* Ensure the first four L3 entries are all populated. */
         for ( i = 0, l3tab = l3start; i < 4; ++i, ++l3tab )
         {
             if ( !l3e_get_intpte(*l3tab) )
             {
                 maddr_to_page(mpt_alloc)->u.inuse.type_info = PGT_l2_page_table;
-                l2tab = __va(mpt_alloc); mpt_alloc += PAGE_SIZE;
-                clear_page(l2tab);
-                *l3tab = l3e_from_paddr(__pa(l2tab), L3_PROT);
+                UNMAP_MAP_AND_ADVANCE(l2start_mfn, l2start, mpt_alloc);
+                clear_page(l2start);
+                *l3tab = l3e_from_mfn(l2start_mfn, L3_PROT);
             }
             if ( i == 3 )
                 l3e_get_page(*l3tab)->u.inuse.type_info |= PGT_pae_xen_l2;
         }
 
-        l2t = map_l2t_from_l3e(l3start[3]);
-        init_xen_pae_l2_slots(l2t, d);
-        unmap_domain_page(l2t);
+        UNMAP_DOMAIN_PAGE(l2start);
+        l2start = map_l2t_from_l3e(l3start[3]);
+        init_xen_pae_l2_slots(l2start, d);
     }
 
+#undef UNMAP_MAP_AND_ADVANCE
+
+    UNMAP_DOMAIN_PAGE(l1start);
+    UNMAP_DOMAIN_PAGE(l2start);
+    UNMAP_DOMAIN_PAGE(l3start);
+
     /* Pages that are part of page tables must be read only. */
     mark_pv_pt_pages_rdonly(d, l4start, vpt_start, nr_pt_pages, &flush_flags);
 
@@ -976,6 +996,8 @@  int __init dom0_construct_pv(struct domain *d,
         pv_shim_setup_dom(d, l4start, v_start, vxenstore_start, vconsole_start,
                           vphysmap_start, si);
 
+    UNMAP_DOMAIN_PAGE(l4start);
+
 #ifdef CONFIG_COMPAT
     if ( compat )
         xlat_start_info(si, pv_shim ? XLAT_start_info_console_domU