diff mbox series

[early-RFC,4/5] xen/arm: mm: Rework switch_ttbr()

Message ID 20220309112048.17377-5-julien@xen.org (mailing list archive)
State New, archived
Headers show
Series xen/arm: Don't switch TTBR while the MMU is on | expand

Commit Message

Julien Grall March 9, 2022, 11:20 a.m. UTC
From: Julien Grall <jgrall@amazon.com>

At the moment, switch_ttbr() is switching the TTBR whilst the MMU is
still on.

Switching TTBR is like replacing existing mappings with new ones. So
we need to follow the break-before-make sequence.

In this case, it means the MMU needs to be switched off while the
TTBR is updated. In order to disable the MMU, we need to first
jump to an identity mapping.

Rename switch_ttbr() to switch_ttbr_id() and create an helper on
top to temporary map the identity mapping and call switch_ttbr()
via the identity address.

switch_ttbr_id() is now reworked to temporarily turn off the MMU
before updating the TTBR.

We also need to make sure the helper switch_ttbr() is part of the
identity mapping. So move _end_boot past it.

Take the opportunity to instruction cache flush as the operation is
only necessary when the memory is updated.

Signed-off-by: Julien Grall <jgrall@amazon.com>

---

    TODO:
        * Rename _end_boot to _end_id_mapping or similar
        * Check the memory barriers
        * I suspect the instruction cache flush will be necessary
          for cache coloring.
---
 xen/arch/arm/arm64/head.S | 31 ++++++++++++++++++++-----------
 xen/arch/arm/mm.c         | 14 +++++++++++++-
 2 files changed, 33 insertions(+), 12 deletions(-)

Comments

Stefano Stabellini March 12, 2022, 1:17 a.m. UTC | #1
On Wed, 9 Mar 2022, Julien Grall wrote:
> From: Julien Grall <jgrall@amazon.com>
> 
> At the moment, switch_ttbr() is switching the TTBR whilst the MMU is
> still on.
> 
> Switching TTBR is like replacing existing mappings with new ones. So
> we need to follow the break-before-make sequence.
> 
> In this case, it means the MMU needs to be switched off while the
> TTBR is updated. In order to disable the MMU, we need to first
> jump to an identity mapping.
> 
> Rename switch_ttbr() to switch_ttbr_id() and create an helper on
> top to temporary map the identity mapping and call switch_ttbr()
> via the identity address.
> 
> switch_ttbr_id() is now reworked to temporarily turn off the MMU
> before updating the TTBR.
> 
> We also need to make sure the helper switch_ttbr() is part of the
> identity mapping. So move _end_boot past it.
> 
> Take the opportunity to instruction cache flush as the operation is
> only necessary when the memory is updated.
> 
> Signed-off-by: Julien Grall <jgrall@amazon.com>
> 
> ---
> 
>     TODO:
>         * Rename _end_boot to _end_id_mapping or similar
>         * Check the memory barriers
>         * I suspect the instruction cache flush will be necessary
>           for cache coloring.
> ---
>  xen/arch/arm/arm64/head.S | 31 ++++++++++++++++++++-----------
>  xen/arch/arm/mm.c         | 14 +++++++++++++-
>  2 files changed, 33 insertions(+), 12 deletions(-)
> 
> diff --git a/xen/arch/arm/arm64/head.S b/xen/arch/arm/arm64/head.S
> index 878649280d73..c5cc72b8fe6f 100644
> --- a/xen/arch/arm/arm64/head.S
> +++ b/xen/arch/arm/arm64/head.S
> @@ -803,36 +803,45 @@ fail:   PRINT("- Boot failed -\r\n")
>          b     1b
>  ENDPROC(fail)
>  
> -GLOBAL(_end_boot)
> -
>  /*
>   * Switch TTBR
>   *
>   * x0    ttbr
>   *
> - * TODO: This code does not comply with break-before-make.
> + * XXX: Check the barriers
>   */
> -ENTRY(switch_ttbr)
> +ENTRY(switch_ttbr_id)
>          dsb   sy                     /* Ensure the flushes happen before
>                                        * continuing */
>          isb                          /* Ensure synchronization with previous
>                                        * changes to text */
> +
> +        /* Turn off MMU */
> +        mrs    x1, SCTLR_EL2
> +        bic    x1, x1, #SCTLR_Axx_ELx_M
> +        msr    SCTLR_EL2, x1
> +        dsb    sy
> +        isb
> +
>          tlbi   alle2                 /* Flush hypervisor TLB */
> -        ic     iallu                 /* Flush I-cache */
>          dsb    sy                    /* Ensure completion of TLB flush */
>          isb
>  
> -        msr    TTBR0_EL2, x0
> +        msr   TTBR0_EL2, x0
> +
> +        mrs   x1, SCTLR_EL2
> +        orr   x1, x1, #SCTLR_Axx_ELx_M  /* Enable MMU */
> +        msr   SCTLR_EL2, x1
>  
>          isb                          /* Ensure synchronization with previous
>                                        * changes to text */
> -        tlbi   alle2                 /* Flush hypervisor TLB */
> -        ic     iallu                 /* Flush I-cache */
> -        dsb    sy                    /* Ensure completion of TLB flush */
> -        isb
> +        /* Turn on the MMU */
> +
>  
>          ret
> -ENDPROC(switch_ttbr)
> +ENDPROC(switch_ttbr_id)
> +
> +GLOBAL(_end_boot)
>  
>  #ifdef CONFIG_EARLY_PRINTK
>  /*
> diff --git a/xen/arch/arm/mm.c b/xen/arch/arm/mm.c
> index 5c4dece16f7f..a53760af7af0 100644
> --- a/xen/arch/arm/mm.c
> +++ b/xen/arch/arm/mm.c
> @@ -660,7 +660,19 @@ static void xen_pt_enforce_wnx(void)
>      flush_xen_tlb_local();
>  }
>  
> -extern void switch_ttbr(uint64_t ttbr);
> +extern void switch_ttbr_id(uint64_t ttbr);
> +
> +typedef void (switch_ttbr_fn)(uint64_t ttbr);
> +
> +static void switch_ttbr(uint64_t ttbr)
> +{
> +    vaddr_t id_addr = virt_to_maddr(switch_ttbr_id);
> +    switch_ttbr_fn *fn = (switch_ttbr_fn *)id_addr;
> +
> +    update_identity_mapping(true);
> +    fn(ttbr);
> +    update_identity_mapping(false);
> +}

As far as I can tell this should work for coloring too. In the case of
coloring:

    /* running on the old mapping, same as non-coloring */
    update_identity_mapping(true);

    /* jumping to the 1:1 mapping of the old Xen and switching to the
     * new pagetable */
    fn(ttbr);

    /* new pagetable is enabled, now we are back to addresses greater
     * than XEN_VIRT_START, which correspond to new cache-colored Xen */
    update_identity_mapping(false);


The only doubt that I have is: are we sure than a single page of 1:1
mapping is enough? What if:

virt_to_maddr(switch_ttbr_id) - virt_to_maddr(_start) > PAGE_SIZE


We might have to do a 1:1 mapping of size = _end-_start. It would work
with coloring too because we are doing a 1:1 mapping of the old copy of
Xen which is non-colored and contiguous (not the new copy which is
colored and fragmented).


Thanks Julien very much for your help!
Stefano Stabellini March 12, 2022, 1:31 a.m. UTC | #2
On Wed, 9 Mar 2022, Julien Grall wrote:
> From: Julien Grall <jgrall@amazon.com>
> 
> At the moment, switch_ttbr() is switching the TTBR whilst the MMU is
> still on.
> 
> Switching TTBR is like replacing existing mappings with new ones. So
> we need to follow the break-before-make sequence.
> 
> In this case, it means the MMU needs to be switched off while the
> TTBR is updated. In order to disable the MMU, we need to first
> jump to an identity mapping.
> 
> Rename switch_ttbr() to switch_ttbr_id() and create an helper on
> top to temporary map the identity mapping and call switch_ttbr()
> via the identity address.
> 
> switch_ttbr_id() is now reworked to temporarily turn off the MMU
> before updating the TTBR.
> 
> We also need to make sure the helper switch_ttbr() is part of the
> identity mapping. So move _end_boot past it.
> 
> Take the opportunity to instruction cache flush as the operation is
> only necessary when the memory is updated.
> 
> Signed-off-by: Julien Grall <jgrall@amazon.com>
> 
> ---
> 
>     TODO:
>         * Rename _end_boot to _end_id_mapping or similar
>         * Check the memory barriers
>         * I suspect the instruction cache flush will be necessary
>           for cache coloring.
> ---
>  xen/arch/arm/arm64/head.S | 31 ++++++++++++++++++++-----------
>  xen/arch/arm/mm.c         | 14 +++++++++++++-
>  2 files changed, 33 insertions(+), 12 deletions(-)
> 
> diff --git a/xen/arch/arm/arm64/head.S b/xen/arch/arm/arm64/head.S
> index 878649280d73..c5cc72b8fe6f 100644
> --- a/xen/arch/arm/arm64/head.S
> +++ b/xen/arch/arm/arm64/head.S
> @@ -803,36 +803,45 @@ fail:   PRINT("- Boot failed -\r\n")
>          b     1b
>  ENDPROC(fail)
>  
> -GLOBAL(_end_boot)
> -
>  /*
>   * Switch TTBR
>   *
>   * x0    ttbr
>   *
> - * TODO: This code does not comply with break-before-make.
> + * XXX: Check the barriers
>   */
> -ENTRY(switch_ttbr)
> +ENTRY(switch_ttbr_id)
>          dsb   sy                     /* Ensure the flushes happen before
>                                        * continuing */
>          isb                          /* Ensure synchronization with previous
>                                        * changes to text */
> +
> +        /* Turn off MMU */
> +        mrs    x1, SCTLR_EL2
> +        bic    x1, x1, #SCTLR_Axx_ELx_M
> +        msr    SCTLR_EL2, x1
> +        dsb    sy
> +        isb
> +
>          tlbi   alle2                 /* Flush hypervisor TLB */
> -        ic     iallu                 /* Flush I-cache */
>          dsb    sy                    /* Ensure completion of TLB flush */
>          isb
>  
> -        msr    TTBR0_EL2, x0
> +        msr   TTBR0_EL2, x0
> +
> +        mrs   x1, SCTLR_EL2
> +        orr   x1, x1, #SCTLR_Axx_ELx_M  /* Enable MMU */
> +        msr   SCTLR_EL2, x1
>  
>          isb                          /* Ensure synchronization with previous
>                                        * changes to text */
> -        tlbi   alle2                 /* Flush hypervisor TLB */
> -        ic     iallu                 /* Flush I-cache */
> -        dsb    sy                    /* Ensure completion of TLB flush */
> -        isb
> +        /* Turn on the MMU */
> +
>  
>          ret
> -ENDPROC(switch_ttbr)
> +ENDPROC(switch_ttbr_id)
> +
> +GLOBAL(_end_boot)
>  
>  #ifdef CONFIG_EARLY_PRINTK
>  /*
> diff --git a/xen/arch/arm/mm.c b/xen/arch/arm/mm.c
> index 5c4dece16f7f..a53760af7af0 100644
> --- a/xen/arch/arm/mm.c
> +++ b/xen/arch/arm/mm.c
> @@ -660,7 +660,19 @@ static void xen_pt_enforce_wnx(void)
>      flush_xen_tlb_local();
>  }
>  
> -extern void switch_ttbr(uint64_t ttbr);
> +extern void switch_ttbr_id(uint64_t ttbr);
> +
> +typedef void (switch_ttbr_fn)(uint64_t ttbr);
> +
> +static void switch_ttbr(uint64_t ttbr)
> +{
> +    vaddr_t id_addr = virt_to_maddr(switch_ttbr_id);
> +    switch_ttbr_fn *fn = (switch_ttbr_fn *)id_addr;
> +
> +    update_identity_mapping(true);
> +    fn(ttbr);
> +    update_identity_mapping(false);
> +}

Controversial question: does it really matter that XEN_VIRT_START >
512GB and that _start < 512GB?

I am totally fine with the limit, I am just brainstorming: given that
the mapping is used very temporarely, it wouldn't really be an issue if
it conflicts with something important. Let's say that it conflicts with
the VMAP or the FRAMETABLE. As long as:

- we save the current mapping
- update it with the Xen 1:1
- switch_ttbr
- remove Xen 1:1
- restore mapping

It should work, right? Basically, a mapping conflict shouldn't be an
issue given that the mapping has only to live long enough to call
switch_ttbr_id.

I am less sure about patch #5 but it doesn't seem it would be a problem
there either.

That said, I am totally fine with _start < 512GB.
Julien Grall March 12, 2022, 6:20 p.m. UTC | #3
Hi Stefano,

On 12/03/2022 01:17, Stefano Stabellini wrote:
> On Wed, 9 Mar 2022, Julien Grall wrote:
> As far as I can tell this should work for coloring too. In the case of
> coloring:
> 
>      /* running on the old mapping, same as non-coloring */
>      update_identity_mapping(true);
> 
>      /* jumping to the 1:1 mapping of the old Xen and switching to the
>       * new pagetable */
>      fn(ttbr);
> 
>      /* new pagetable is enabled, now we are back to addresses greater
>       * than XEN_VIRT_START, which correspond to new cache-colored Xen */
>      update_identity_mapping(false);
> 
> 
> The only doubt that I have is: are we sure than a single page of 1:1
> mapping is enough? What if:
> 
> virt_to_maddr(switch_ttbr_id) - virt_to_maddr(_start) > PAGE_SIZE

switch_ttbr_id() is placed before _end_boot (this needs to be renamed). 
We are veryfing a link time (see the check in xen.lds.S) that _end_boot 
- _start is always smaller than 4KB.

At the moment, the size is less than 2KB. So we have plenty of space to 
grow. Also, there are some code that is technically not used while using 
the ID map. So if necessary we can shrink the size to always fit in a 
PAGE_SIZE.

> We might have to do a 1:1 mapping of size = _end-_start. It would work
> with coloring too because we are doing a 1:1 mapping of the old copy of
> Xen which is non-colored and contiguous (not the new copy which is
> colored and fragmented).

I would like to keep the size of the ID mapping to the strict minimum. A 
PAGE_SIZE should be sufficient for most of what we need in Xen.

This would help to get rid of the old copy of Xen in case of the cache 
coloring. Otherwise, you technically have to keep it forever if you plan 
to support suspend/resume or even allow CPU hotplug.

Furthemore, if you keep the two copy around, it is more difficult to 
know which mapping is used and we increase the risk to use the wrong 
one. For instance, the current implementation of cache coloring is 
clearing the wrong set of boot pagetables.

Cheers,
Julien Grall March 12, 2022, 6:54 p.m. UTC | #4
On 12/03/2022 01:31, Stefano Stabellini wrote:
> On Wed, 9 Mar 2022, Julien Grall wrote:
>> From: Julien Grall <jgrall@amazon.com>
>>
>> At the moment, switch_ttbr() is switching the TTBR whilst the MMU is
>> still on.
>>
>> Switching TTBR is like replacing existing mappings with new ones. So
>> we need to follow the break-before-make sequence.
>>
>> In this case, it means the MMU needs to be switched off while the
>> TTBR is updated. In order to disable the MMU, we need to first
>> jump to an identity mapping.
>>
>> Rename switch_ttbr() to switch_ttbr_id() and create an helper on
>> top to temporary map the identity mapping and call switch_ttbr()
>> via the identity address.
>>
>> switch_ttbr_id() is now reworked to temporarily turn off the MMU
>> before updating the TTBR.
>>
>> We also need to make sure the helper switch_ttbr() is part of the
>> identity mapping. So move _end_boot past it.
>>
>> Take the opportunity to instruction cache flush as the operation is
>> only necessary when the memory is updated.
>>
>> Signed-off-by: Julien Grall <jgrall@amazon.com>
>>
>> ---
>>
>>      TODO:
>>          * Rename _end_boot to _end_id_mapping or similar
>>          * Check the memory barriers
>>          * I suspect the instruction cache flush will be necessary
>>            for cache coloring.
>> ---
>>   xen/arch/arm/arm64/head.S | 31 ++++++++++++++++++++-----------
>>   xen/arch/arm/mm.c         | 14 +++++++++++++-
>>   2 files changed, 33 insertions(+), 12 deletions(-)
>>
>> diff --git a/xen/arch/arm/arm64/head.S b/xen/arch/arm/arm64/head.S
>> index 878649280d73..c5cc72b8fe6f 100644
>> --- a/xen/arch/arm/arm64/head.S
>> +++ b/xen/arch/arm/arm64/head.S
>> @@ -803,36 +803,45 @@ fail:   PRINT("- Boot failed -\r\n")
>>           b     1b
>>   ENDPROC(fail)
>>   
>> -GLOBAL(_end_boot)
>> -
>>   /*
>>    * Switch TTBR
>>    *
>>    * x0    ttbr
>>    *
>> - * TODO: This code does not comply with break-before-make.
>> + * XXX: Check the barriers
>>    */
>> -ENTRY(switch_ttbr)
>> +ENTRY(switch_ttbr_id)
>>           dsb   sy                     /* Ensure the flushes happen before
>>                                         * continuing */
>>           isb                          /* Ensure synchronization with previous
>>                                         * changes to text */
>> +
>> +        /* Turn off MMU */
>> +        mrs    x1, SCTLR_EL2
>> +        bic    x1, x1, #SCTLR_Axx_ELx_M
>> +        msr    SCTLR_EL2, x1
>> +        dsb    sy
>> +        isb
>> +
>>           tlbi   alle2                 /* Flush hypervisor TLB */
>> -        ic     iallu                 /* Flush I-cache */
>>           dsb    sy                    /* Ensure completion of TLB flush */
>>           isb
>>   
>> -        msr    TTBR0_EL2, x0
>> +        msr   TTBR0_EL2, x0
>> +
>> +        mrs   x1, SCTLR_EL2
>> +        orr   x1, x1, #SCTLR_Axx_ELx_M  /* Enable MMU */
>> +        msr   SCTLR_EL2, x1
>>   
>>           isb                          /* Ensure synchronization with previous
>>                                         * changes to text */
>> -        tlbi   alle2                 /* Flush hypervisor TLB */
>> -        ic     iallu                 /* Flush I-cache */
>> -        dsb    sy                    /* Ensure completion of TLB flush */
>> -        isb
>> +        /* Turn on the MMU */
>> +
>>   
>>           ret
>> -ENDPROC(switch_ttbr)
>> +ENDPROC(switch_ttbr_id)
>> +
>> +GLOBAL(_end_boot)
>>   
>>   #ifdef CONFIG_EARLY_PRINTK
>>   /*
>> diff --git a/xen/arch/arm/mm.c b/xen/arch/arm/mm.c
>> index 5c4dece16f7f..a53760af7af0 100644
>> --- a/xen/arch/arm/mm.c
>> +++ b/xen/arch/arm/mm.c
>> @@ -660,7 +660,19 @@ static void xen_pt_enforce_wnx(void)
>>       flush_xen_tlb_local();
>>   }
>>   
>> -extern void switch_ttbr(uint64_t ttbr);
>> +extern void switch_ttbr_id(uint64_t ttbr);
>> +
>> +typedef void (switch_ttbr_fn)(uint64_t ttbr);
>> +
>> +static void switch_ttbr(uint64_t ttbr)
>> +{
>> +    vaddr_t id_addr = virt_to_maddr(switch_ttbr_id);
>> +    switch_ttbr_fn *fn = (switch_ttbr_fn *)id_addr;
>> +
>> +    update_identity_mapping(true);
>> +    fn(ttbr);
>> +    update_identity_mapping(false);
>> +}
> 
> Controversial question: does it really matter that XEN_VIRT_START >
> 512GB and that _start < 512GB?
> 
> I am totally fine with the limit, I am just brainstorming: given that
> the mapping is used very temporarely, it wouldn't really be an issue if
> it conflicts with something important. Let's say that it conflicts with
> the VMAP or the FRAMETABLE. As long as:
> 
> - we save the current mapping
> - update it with the Xen 1:1
> - switch_ttbr
> - remove Xen 1:1
> - restore mapping
> 
> It should work, right? Basically, a mapping conflict shouldn't be an
> issue given that the mapping has only to live long enough to call
> switch_ttbr_id.

Today switch_ttbr() is called before we initialized most of the memory 
layout. So clashing with the VMAP and frametable is not a problem.

However, the identity mapping may also clash with the region used to map 
Xen. That said, technically, we are not able to handle Xen when its 
start address is in region 2MB + 4K to 4MB (Xen is loaded at a 4KB 
aligned address).

The trouble is some features (e.g. UBSAN, GCOV) can generate Xen image 
over 2MB. IOW, the range where Xen cannot be loaded will increase.

This is an issue because AFAIK, there is no away to tell GRUB "You can't 
load Xen at this region". But even if there were one, I feel this 
restriction is sort of random.

I already wrote a patch to get rid of the restriction. The code is not 
too bad (we only need an extra indirection). But I haven't sent it yet 
because it is less critical with the re-shuffling of the memory layout.

Anyway, that's a long way to say that it will soon become an issue if 
the ID mapping is clashing with Xen mappings.

> 
> I am less sure about patch #5 but it doesn't seem it would be a problem
> there either.

This is actually going to be problematic. On Arm64, the page-tables are 
shared with all the CPUs. You would need to prevent the CPUs to touch 
any of the mapping we removed.

While booting, idle pCPUs will usually scrub the pages. So the 
frametable will be used. In theory, we could make sure the CPUs are not 
scrubbing. This would get trick for CPU hotpluggling (not yet supported) 
as CPU would need to idle. IMHO, this would be unnaceptable to block all 
the CPUs just to bring a new one.

Furthermore, we would need to be careful anytime we define new regions 
in the memory layout or reshuffle it as we need to ensure that no-one 
else use them when the ID mapping is inplace.

The memory layout is far from been full on Arm64. So to me, the extra 
risk is not worth it. The same goes for Arm32 (even thought the memory 
has much less space).

Cheers,
Stefano Stabellini March 14, 2022, 11:27 p.m. UTC | #5
On Sat, 12 Mar 2022, Julien Grall wrote:
> On 12/03/2022 01:17, Stefano Stabellini wrote:
> > On Wed, 9 Mar 2022, Julien Grall wrote:
> > As far as I can tell this should work for coloring too. In the case of
> > coloring:
> > 
> >      /* running on the old mapping, same as non-coloring */
> >      update_identity_mapping(true);
> > 
> >      /* jumping to the 1:1 mapping of the old Xen and switching to the
> >       * new pagetable */
> >      fn(ttbr);
> > 
> >      /* new pagetable is enabled, now we are back to addresses greater
> >       * than XEN_VIRT_START, which correspond to new cache-colored Xen */
> >      update_identity_mapping(false);
> > 
> > 
> > The only doubt that I have is: are we sure than a single page of 1:1
> > mapping is enough? What if:
> > 
> > virt_to_maddr(switch_ttbr_id) - virt_to_maddr(_start) > PAGE_SIZE
> 
> switch_ttbr_id() is placed before _end_boot (this needs to be renamed). We are
> veryfing a link time (see the check in xen.lds.S) that _end_boot - _start is
> always smaller than 4KB.

Yes I see:
ASSERT( _end_boot - start <= PAGE_SIZE, "Boot code is larger than 4K")

Excellent!


> At the moment, the size is less than 2KB. So we have plenty of space to grow.
> Also, there are some code that is technically not used while using the ID map.
> So if necessary we can shrink the size to always fit in a PAGE_SIZE.

+1


> > We might have to do a 1:1 mapping of size = _end-_start. It would work
> > with coloring too because we are doing a 1:1 mapping of the old copy of
> > Xen which is non-colored and contiguous (not the new copy which is
> > colored and fragmented).
> 
> I would like to keep the size of the ID mapping to the strict minimum. A
> PAGE_SIZE should be sufficient for most of what we need in Xen.

Yep


> This would help to get rid of the old copy of Xen in case of the cache
> coloring. Otherwise, you technically have to keep it forever if you plan to
> support suspend/resume or even allow CPU hotplug.
> 
> Furthemore, if you keep the two copy around, it is more difficult to know
> which mapping is used and we increase the risk to use the wrong one. For
> instance, the current implementation of cache coloring is clearing the wrong
> set of boot pagetables.
 
Right. Given that we know it is a single page, then we could use a 1:1
of the colored copy without issues.
Stefano Stabellini March 14, 2022, 11:48 p.m. UTC | #6
On Sat, 12 Mar 2022, Julien Grall wrote:
> On 12/03/2022 01:31, Stefano Stabellini wrote:
> > On Wed, 9 Mar 2022, Julien Grall wrote:
> > > From: Julien Grall <jgrall@amazon.com>
> > > 
> > > At the moment, switch_ttbr() is switching the TTBR whilst the MMU is
> > > still on.
> > > 
> > > Switching TTBR is like replacing existing mappings with new ones. So
> > > we need to follow the break-before-make sequence.
> > > 
> > > In this case, it means the MMU needs to be switched off while the
> > > TTBR is updated. In order to disable the MMU, we need to first
> > > jump to an identity mapping.
> > > 
> > > Rename switch_ttbr() to switch_ttbr_id() and create an helper on
> > > top to temporary map the identity mapping and call switch_ttbr()
> > > via the identity address.
> > > 
> > > switch_ttbr_id() is now reworked to temporarily turn off the MMU
> > > before updating the TTBR.
> > > 
> > > We also need to make sure the helper switch_ttbr() is part of the
> > > identity mapping. So move _end_boot past it.
> > > 
> > > Take the opportunity to instruction cache flush as the operation is
> > > only necessary when the memory is updated.
> > > 
> > > Signed-off-by: Julien Grall <jgrall@amazon.com>
> > > 
> > > ---
> > > 
> > >      TODO:
> > >          * Rename _end_boot to _end_id_mapping or similar
> > >          * Check the memory barriers
> > >          * I suspect the instruction cache flush will be necessary
> > >            for cache coloring.
> > > ---
> > >   xen/arch/arm/arm64/head.S | 31 ++++++++++++++++++++-----------
> > >   xen/arch/arm/mm.c         | 14 +++++++++++++-
> > >   2 files changed, 33 insertions(+), 12 deletions(-)
> > > 
> > > diff --git a/xen/arch/arm/arm64/head.S b/xen/arch/arm/arm64/head.S
> > > index 878649280d73..c5cc72b8fe6f 100644
> > > --- a/xen/arch/arm/arm64/head.S
> > > +++ b/xen/arch/arm/arm64/head.S
> > > @@ -803,36 +803,45 @@ fail:   PRINT("- Boot failed -\r\n")
> > >           b     1b
> > >   ENDPROC(fail)
> > >   -GLOBAL(_end_boot)
> > > -
> > >   /*
> > >    * Switch TTBR
> > >    *
> > >    * x0    ttbr
> > >    *
> > > - * TODO: This code does not comply with break-before-make.
> > > + * XXX: Check the barriers
> > >    */
> > > -ENTRY(switch_ttbr)
> > > +ENTRY(switch_ttbr_id)
> > >           dsb   sy                     /* Ensure the flushes happen before
> > >                                         * continuing */
> > >           isb                          /* Ensure synchronization with
> > > previous
> > >                                         * changes to text */
> > > +
> > > +        /* Turn off MMU */
> > > +        mrs    x1, SCTLR_EL2
> > > +        bic    x1, x1, #SCTLR_Axx_ELx_M
> > > +        msr    SCTLR_EL2, x1
> > > +        dsb    sy
> > > +        isb
> > > +
> > >           tlbi   alle2                 /* Flush hypervisor TLB */
> > > -        ic     iallu                 /* Flush I-cache */
> > >           dsb    sy                    /* Ensure completion of TLB flush
> > > */
> > >           isb
> > >   -        msr    TTBR0_EL2, x0
> > > +        msr   TTBR0_EL2, x0
> > > +
> > > +        mrs   x1, SCTLR_EL2
> > > +        orr   x1, x1, #SCTLR_Axx_ELx_M  /* Enable MMU */
> > > +        msr   SCTLR_EL2, x1
> > >             isb                          /* Ensure synchronization with
> > > previous
> > >                                         * changes to text */
> > > -        tlbi   alle2                 /* Flush hypervisor TLB */
> > > -        ic     iallu                 /* Flush I-cache */
> > > -        dsb    sy                    /* Ensure completion of TLB flush */
> > > -        isb
> > > +        /* Turn on the MMU */
> > > +
> > >             ret
> > > -ENDPROC(switch_ttbr)
> > > +ENDPROC(switch_ttbr_id)
> > > +
> > > +GLOBAL(_end_boot)
> > >     #ifdef CONFIG_EARLY_PRINTK
> > >   /*
> > > diff --git a/xen/arch/arm/mm.c b/xen/arch/arm/mm.c
> > > index 5c4dece16f7f..a53760af7af0 100644
> > > --- a/xen/arch/arm/mm.c
> > > +++ b/xen/arch/arm/mm.c
> > > @@ -660,7 +660,19 @@ static void xen_pt_enforce_wnx(void)
> > >       flush_xen_tlb_local();
> > >   }
> > >   -extern void switch_ttbr(uint64_t ttbr);
> > > +extern void switch_ttbr_id(uint64_t ttbr);
> > > +
> > > +typedef void (switch_ttbr_fn)(uint64_t ttbr);
> > > +
> > > +static void switch_ttbr(uint64_t ttbr)
> > > +{
> > > +    vaddr_t id_addr = virt_to_maddr(switch_ttbr_id);
> > > +    switch_ttbr_fn *fn = (switch_ttbr_fn *)id_addr;
> > > +
> > > +    update_identity_mapping(true);
> > > +    fn(ttbr);
> > > +    update_identity_mapping(false);
> > > +}
> > 
> > Controversial question: does it really matter that XEN_VIRT_START >
> > 512GB and that _start < 512GB?
> > 
> > I am totally fine with the limit, I am just brainstorming: given that
> > the mapping is used very temporarely, it wouldn't really be an issue if
> > it conflicts with something important. Let's say that it conflicts with
> > the VMAP or the FRAMETABLE. As long as:
> > 
> > - we save the current mapping
> > - update it with the Xen 1:1
> > - switch_ttbr
> > - remove Xen 1:1
> > - restore mapping
> > 
> > It should work, right? Basically, a mapping conflict shouldn't be an
> > issue given that the mapping has only to live long enough to call
> > switch_ttbr_id.
> 
> Today switch_ttbr() is called before we initialized most of the memory layout.
> So clashing with the VMAP and frametable is not a problem.
> 
> However, the identity mapping may also clash with the region used to map Xen.
> That said, technically, we are not able to handle Xen when its start address
> is in region 2MB + 4K to 4MB (Xen is loaded at a 4KB aligned address).
> 
> The trouble is some features (e.g. UBSAN, GCOV) can generate Xen image over
> 2MB. IOW, the range where Xen cannot be loaded will increase.
> 
> This is an issue because AFAIK, there is no away to tell GRUB "You can't load
> Xen at this region". But even if there were one, I feel this restriction is
> sort of random.
> 
> I already wrote a patch to get rid of the restriction. The code is not too bad
> (we only need an extra indirection). But I haven't sent it yet because it is
> less critical with the re-shuffling of the memory layout.

Interesting! I am curious: how did you manage to do it?

For now and for this series the current approach and the 512GB limit are
fine. My replies here are brainstorming to see if there are potential
alternatives in the future in case the need arises.

I can see that a clash with Xen mapping could be problematic and the
chances of that happening are low but non-zero. We could make sure that
ImageBuilder always picks safe addresses and that would help but
wouldn't remove the issue if someone is not using ImageBuilder.
Julien Grall March 15, 2022, 7:01 p.m. UTC | #7
Hi Stefano,

On 14/03/2022 23:48, Stefano Stabellini wrote:
>>> - we save the current mapping
>>> - update it with the Xen 1:1
>>> - switch_ttbr
>>> - remove Xen 1:1
>>> - restore mapping
>>>
>>> It should work, right? Basically, a mapping conflict shouldn't be an
>>> issue given that the mapping has only to live long enough to call
>>> switch_ttbr_id.
>>
>> Today switch_ttbr() is called before we initialized most of the memory layout.
>> So clashing with the VMAP and frametable is not a problem.
>>
>> However, the identity mapping may also clash with the region used to map Xen.
>> That said, technically, we are not able to handle Xen when its start address
>> is in region 2MB + 4K to 4MB (Xen is loaded at a 4KB aligned address).
>>
>> The trouble is some features (e.g. UBSAN, GCOV) can generate Xen image over
>> 2MB. IOW, the range where Xen cannot be loaded will increase.
>>
>> This is an issue because AFAIK, there is no away to tell GRUB "You can't load
>> Xen at this region". But even if there were one, I feel this restriction is
>> sort of random.
>>
>> I already wrote a patch to get rid of the restriction. The code is not too bad
>> (we only need an extra indirection). But I haven't sent it yet because it is
>> less critical with the re-shuffling of the memory layout.
> 
> Interesting! I am curious: how did you manage to do it?

When the identity mapping is clashing with Xen runtime address, I am 
creating a temporary mapping for Xen at a different fixed address.

Once the MMU is turned on, we can jump to the temporary mapping. After 
that we are safe to remove the identity mapping and create the runtime 
Xen mapping. The last step is to jump on the runtime mapping and then 
remove the temporary mapping.

> 
> For now and for this series the current approach and the 512GB limit are
> fine. My replies here are brainstorming to see if there are potential
> alternatives in the future in case the need arises.

On Arm64, we have 256TB worth of virtual address. So I think we can 
easily spare 512GB for the foreseeable :).

If we are at the point where we can't space 512GB, then we would need to 
have a more dynamic layout as I plan on arm32.

Xen would still be mapped at a specific virtual address so we don't need 
to update the relocations. But we could decide at runtime the position 
of other large mappings (e.g. vmap, domheap).

Probably the safest way is to link Xen at a very high address. It is 
quite unlikely that Xen will be loaded at such high address.

If it is, we could exceptionally relocate Xen (with this series it 
should be safer to do). That said, I would like to avoid relocating Xen 
until we see a use for that.

> 
> I can see that a clash with Xen mapping could be problematic and the
> chances of that happening are low but non-zero. We could make sure that
> ImageBuilder always picks safe addresses and that would help but
> wouldn't remove the issue if someone is not using ImageBuilder.

AFAIU, ImageBuilder is here to cater U-boot users. I am not too worry 
about those setups because a user can pick any address they want. So as 
long as Xen print an error during the clash (already the case), the user 
can easily update their scripts.

This is more a problem for UEFI/GRUB where, AFAICT, we can't control 
where Xen will be loaded.

Cheers,
Stefano Stabellini March 16, 2022, 9:58 p.m. UTC | #8
On Tue, 15 Mar 2022, Julien Grall wrote:
> On 14/03/2022 23:48, Stefano Stabellini wrote:
> > > > - we save the current mapping
> > > > - update it with the Xen 1:1
> > > > - switch_ttbr
> > > > - remove Xen 1:1
> > > > - restore mapping
> > > > 
> > > > It should work, right? Basically, a mapping conflict shouldn't be an
> > > > issue given that the mapping has only to live long enough to call
> > > > switch_ttbr_id.
> > > 
> > > Today switch_ttbr() is called before we initialized most of the memory
> > > layout.
> > > So clashing with the VMAP and frametable is not a problem.
> > > 
> > > However, the identity mapping may also clash with the region used to map
> > > Xen.
> > > That said, technically, we are not able to handle Xen when its start
> > > address
> > > is in region 2MB + 4K to 4MB (Xen is loaded at a 4KB aligned address).
> > > 
> > > The trouble is some features (e.g. UBSAN, GCOV) can generate Xen image
> > > over
> > > 2MB. IOW, the range where Xen cannot be loaded will increase.
> > > 
> > > This is an issue because AFAIK, there is no away to tell GRUB "You can't
> > > load
> > > Xen at this region". But even if there were one, I feel this restriction
> > > is
> > > sort of random.
> > > 
> > > I already wrote a patch to get rid of the restriction. The code is not too
> > > bad
> > > (we only need an extra indirection). But I haven't sent it yet because it
> > > is
> > > less critical with the re-shuffling of the memory layout.
> > 
> > Interesting! I am curious: how did you manage to do it?
> 
> When the identity mapping is clashing with Xen runtime address, I am creating
> a temporary mapping for Xen at a different fixed address.
> 
> Once the MMU is turned on, we can jump to the temporary mapping. After that we
> are safe to remove the identity mapping and create the runtime Xen mapping.
> The last step is to jump on the runtime mapping and then remove the temporary
> mapping.

Cool! I was guessing something along those lines.
Bertrand Marquis March 25, 2022, 1:47 p.m. UTC | #9
Hi Julien,

> On 9 Mar 2022, at 12:20, Julien Grall <julien@xen.org> wrote:
> 
> From: Julien Grall <jgrall@amazon.com>
> 
> At the moment, switch_ttbr() is switching the TTBR whilst the MMU is
> still on.
> 
> Switching TTBR is like replacing existing mappings with new ones. So
> we need to follow the break-before-make sequence.
> 
> In this case, it means the MMU needs to be switched off while the
> TTBR is updated. In order to disable the MMU, we need to first
> jump to an identity mapping.
> 
> Rename switch_ttbr() to switch_ttbr_id() and create an helper on
> top to temporary map the identity mapping and call switch_ttbr()
> via the identity address.
> 
> switch_ttbr_id() is now reworked to temporarily turn off the MMU
> before updating the TTBR.
> 
> We also need to make sure the helper switch_ttbr() is part of the
> identity mapping. So move _end_boot past it.
> 
> Take the opportunity to instruction cache flush as the operation is
> only necessary when the memory is updated.

Your code is actually remove the instruction cache invalidation so
this sentence is a bit misleading.

Also an open question: shouldn’t we flush the data cache ?
As we switch from one TTBR to an other, there might be some data
in the cache dependent that could be flushed while the MMU is off or
that would have no mapping once it is reactivated.


> 
> Signed-off-by: Julien Grall <jgrall@amazon.com>
> 
> ---
> 
>    TODO:
>        * Rename _end_boot to _end_id_mapping or similar
>        * Check the memory barriers
>        * I suspect the instruction cache flush will be necessary
>          for cache coloring.
> ---
> xen/arch/arm/arm64/head.S | 31 ++++++++++++++++++++-----------
> xen/arch/arm/mm.c         | 14 +++++++++++++-
> 2 files changed, 33 insertions(+), 12 deletions(-)
> 
> diff --git a/xen/arch/arm/arm64/head.S b/xen/arch/arm/arm64/head.S
> index 878649280d73..c5cc72b8fe6f 100644
> --- a/xen/arch/arm/arm64/head.S
> +++ b/xen/arch/arm/arm64/head.S
> @@ -803,36 +803,45 @@ fail:   PRINT("- Boot failed -\r\n")
>         b     1b
> ENDPROC(fail)
> 
> -GLOBAL(_end_boot)
> -
> /*
>  * Switch TTBR
>  *
>  * x0    ttbr
>  *
> - * TODO: This code does not comply with break-before-make.
> + * XXX: Check the barriers
>  */
> -ENTRY(switch_ttbr)
> +ENTRY(switch_ttbr_id)
>         dsb   sy                     /* Ensure the flushes happen before
>                                       * continuing */
>         isb                          /* Ensure synchronization with previous
>                                       * changes to text */
> +
> +        /* Turn off MMU */
> +        mrs    x1, SCTLR_EL2
> +        bic    x1, x1, #SCTLR_Axx_ELx_M
> +        msr    SCTLR_EL2, x1
> +        dsb    sy
> +        isb
> +
>         tlbi   alle2                 /* Flush hypervisor TLB */
> -        ic     iallu                 /* Flush I-cache */
>         dsb    sy                    /* Ensure completion of TLB flush */
>         isb
> 
> -        msr    TTBR0_EL2, x0
> +        msr   TTBR0_EL2, x0
> +
> +        mrs   x1, SCTLR_EL2
> +        orr   x1, x1, #SCTLR_Axx_ELx_M  /* Enable MMU */
> +        msr   SCTLR_EL2, x1
> 
>         isb                          /* Ensure synchronization with previous
>                                       * changes to text */
> -        tlbi   alle2                 /* Flush hypervisor TLB */
> -        ic     iallu                 /* Flush I-cache */
> -        dsb    sy                    /* Ensure completion of TLB flush */
> -        isb
> +        /* Turn on the MMU */
> +
> 
>         ret
> -ENDPROC(switch_ttbr)
> +ENDPROC(switch_ttbr_id)
> +
> +GLOBAL(_end_boot)
> 
> #ifdef CONFIG_EARLY_PRINTK
> /*
> diff --git a/xen/arch/arm/mm.c b/xen/arch/arm/mm.c
> index 5c4dece16f7f..a53760af7af0 100644
> --- a/xen/arch/arm/mm.c
> +++ b/xen/arch/arm/mm.c
> @@ -660,7 +660,19 @@ static void xen_pt_enforce_wnx(void)
>     flush_xen_tlb_local();
> }
> 
> -extern void switch_ttbr(uint64_t ttbr);
> +extern void switch_ttbr_id(uint64_t ttbr);
> +
> +typedef void (switch_ttbr_fn)(uint64_t ttbr);
> +
> +static void switch_ttbr(uint64_t ttbr)
> +{
> +    vaddr_t id_addr = virt_to_maddr(switch_ttbr_id);
> +    switch_ttbr_fn *fn = (switch_ttbr_fn *)id_addr;
> +
> +    update_identity_mapping(true);
> +    fn(ttbr);
> +    update_identity_mapping(false);
> +}
> 
> /* Clear a translation table and clean & invalidate the cache */
> static void clear_table(void *table)
> -- 
> 2.32.0
> 

Cheers
Bertrand
Julien Grall March 25, 2022, 2:24 p.m. UTC | #10
On 25/03/2022 13:47, Bertrand Marquis wrote:
> Hi Julien,

Hi Bertrand,

>> On 9 Mar 2022, at 12:20, Julien Grall <julien@xen.org> wrote:
>>
>> From: Julien Grall <jgrall@amazon.com>
>>
>> At the moment, switch_ttbr() is switching the TTBR whilst the MMU is
>> still on.
>>
>> Switching TTBR is like replacing existing mappings with new ones. So
>> we need to follow the break-before-make sequence.
>>
>> In this case, it means the MMU needs to be switched off while the
>> TTBR is updated. In order to disable the MMU, we need to first
>> jump to an identity mapping.
>>
>> Rename switch_ttbr() to switch_ttbr_id() and create an helper on
>> top to temporary map the identity mapping and call switch_ttbr()
>> via the identity address.
>>
>> switch_ttbr_id() is now reworked to temporarily turn off the MMU
>> before updating the TTBR.
>>
>> We also need to make sure the helper switch_ttbr() is part of the
>> identity mapping. So move _end_boot past it.
>>
>> Take the opportunity to instruction cache flush as the operation is
>> only necessary when the memory is updated.
> 
> Your code is actually remove the instruction cache invalidation so
> this sentence is a bit misleading.

I forgot to add the word "remove" in the sentence.

> 
> Also an open question: shouldn’t we flush the data cache ?
Do you mean clean/invalidate to PoC/PoU? Something else?

> As we switch from one TTBR to an other, there might be some data
> in the cache dependent that could be flushed while the MMU is off 

I am a bit confused. Those flush could also happen with the MMU on. So 
how turning off the MMU would result to a problem? Note that the data 
cache is still enabled during the switch.

> or
> that would have no mapping once it is reactivated.
The cache line will be flushed at some point in the future. I would 
argue if the caller need it earlier, then it should make sure to issue 
the flush before switch_ttbr().

Cheers,
Bertrand Marquis March 25, 2022, 2:35 p.m. UTC | #11
Hi Julien,

> On 25 Mar 2022, at 15:24, Julien Grall <julien@xen.org> wrote:
> 
> 
> 
> On 25/03/2022 13:47, Bertrand Marquis wrote:
>> Hi Julien,
> 
> Hi Bertrand,
> 
>>> On 9 Mar 2022, at 12:20, Julien Grall <julien@xen.org> wrote:
>>> 
>>> From: Julien Grall <jgrall@amazon.com>
>>> 
>>> At the moment, switch_ttbr() is switching the TTBR whilst the MMU is
>>> still on.
>>> 
>>> Switching TTBR is like replacing existing mappings with new ones. So
>>> we need to follow the break-before-make sequence.
>>> 
>>> In this case, it means the MMU needs to be switched off while the
>>> TTBR is updated. In order to disable the MMU, we need to first
>>> jump to an identity mapping.
>>> 
>>> Rename switch_ttbr() to switch_ttbr_id() and create an helper on
>>> top to temporary map the identity mapping and call switch_ttbr()
>>> via the identity address.
>>> 
>>> switch_ttbr_id() is now reworked to temporarily turn off the MMU
>>> before updating the TTBR.
>>> 
>>> We also need to make sure the helper switch_ttbr() is part of the
>>> identity mapping. So move _end_boot past it.
>>> 
>>> Take the opportunity to instruction cache flush as the operation is
>>> only necessary when the memory is updated.
>> Your code is actually remove the instruction cache invalidation so
>> this sentence is a bit misleading.
> 
> I forgot to add the word "remove" in the sentence.

Ok (my sentence was also wrong by the way)

> 
>> Also an open question: shouldn’t we flush the data cache ?
> Do you mean clean/invalidate to PoC/PoU? Something else?

Yes, probably to PoU.

> 
>> As we switch from one TTBR to an other, there might be some data
>> in the cache dependent that could be flushed while the MMU is off 
> 
> I am a bit confused. Those flush could also happen with the MMU on. So how turning off the MMU would result to a problem? Note that the data cache is still enabled during the switch.

If the first level of cache is VIPT and we turn off the MMU, I am wondering if this could not create troubles and could require the cache to be flushed before turning the MMU off.
I have no idea if this is a problem or not, just raising the question.
I can try to dig on that at Arm when I am back in 10 days. 

> 
>> or
>> that would have no mapping once it is reactivated.
> The cache line will be flushed at some point in the future. I would argue if the caller need it earlier, then it should make sure to issue the flush before switch_ttbr().
Ok.

I will still try to check if there is some kind of recommandation to turn the MMU off.

Cheers
Bertrand

> 
> Cheers,
> 
> -- 
> Julien Grall
Julien Grall March 25, 2022, 2:42 p.m. UTC | #12
Hi Bertrand,

On 25/03/2022 14:35, Bertrand Marquis wrote:
>> On 25 Mar 2022, at 15:24, Julien Grall <julien@xen.org> wrote:
>> On 25/03/2022 13:47, Bertrand Marquis wrote:
>>> Hi Julien,
>>
>> Hi Bertrand,
>>
>>>> On 9 Mar 2022, at 12:20, Julien Grall <julien@xen.org> wrote:
>>>>
>>>> From: Julien Grall <jgrall@amazon.com>
>>>>
>>>> At the moment, switch_ttbr() is switching the TTBR whilst the MMU is
>>>> still on.
>>>>
>>>> Switching TTBR is like replacing existing mappings with new ones. So
>>>> we need to follow the break-before-make sequence.
>>>>
>>>> In this case, it means the MMU needs to be switched off while the
>>>> TTBR is updated. In order to disable the MMU, we need to first
>>>> jump to an identity mapping.
>>>>
>>>> Rename switch_ttbr() to switch_ttbr_id() and create an helper on
>>>> top to temporary map the identity mapping and call switch_ttbr()
>>>> via the identity address.
>>>>
>>>> switch_ttbr_id() is now reworked to temporarily turn off the MMU
>>>> before updating the TTBR.
>>>>
>>>> We also need to make sure the helper switch_ttbr() is part of the
>>>> identity mapping. So move _end_boot past it.
>>>>
>>>> Take the opportunity to instruction cache flush as the operation is
>>>> only necessary when the memory is updated.
>>> Your code is actually remove the instruction cache invalidation so
>>> this sentence is a bit misleading.
>>
>> I forgot to add the word "remove" in the sentence.
> 
> Ok (my sentence was also wrong by the way)
> 
>>
>>> Also an open question: shouldn’t we flush the data cache ?
>> Do you mean clean/invalidate to PoC/PoU? Something else?
> 
> Yes, probably to PoU.
> 
>>
>>> As we switch from one TTBR to an other, there might be some data
>>> in the cache dependent that could be flushed while the MMU is off
>>
>> I am a bit confused. Those flush could also happen with the MMU on. So how turning off the MMU would result to a problem? Note that the data cache is still enabled during the switch.
> 
> If the first level of cache is VIPT and we turn off the MMU, I am wondering if this could not create troubles and could require the cache to be flushed before turning the MMU off.
My reading of the Arm Arm (D5.11.1 "Data and unified caches" ARM DDI 
0487F.c) suggests the data cache is always PIPT.

> I have no idea if this is a problem or not, just raising the question.
> I can try to dig on that at Arm when I am back in 10 days.

Enjoy it!

Cheers,
Bertrand Marquis March 25, 2022, 2:48 p.m. UTC | #13
Hi Julien,

> On 25 Mar 2022, at 15:42, Julien Grall <julien@xen.org> wrote:
> 
> Hi Bertrand,
> 
> On 25/03/2022 14:35, Bertrand Marquis wrote:
>>> On 25 Mar 2022, at 15:24, Julien Grall <julien@xen.org> wrote:
>>> On 25/03/2022 13:47, Bertrand Marquis wrote:
>>>> Hi Julien,
>>> 
>>> Hi Bertrand,
>>> 
>>>>> On 9 Mar 2022, at 12:20, Julien Grall <julien@xen.org> wrote:
>>>>> 
>>>>> From: Julien Grall <jgrall@amazon.com>
>>>>> 
>>>>> At the moment, switch_ttbr() is switching the TTBR whilst the MMU is
>>>>> still on.
>>>>> 
>>>>> Switching TTBR is like replacing existing mappings with new ones. So
>>>>> we need to follow the break-before-make sequence.
>>>>> 
>>>>> In this case, it means the MMU needs to be switched off while the
>>>>> TTBR is updated. In order to disable the MMU, we need to first
>>>>> jump to an identity mapping.
>>>>> 
>>>>> Rename switch_ttbr() to switch_ttbr_id() and create an helper on
>>>>> top to temporary map the identity mapping and call switch_ttbr()
>>>>> via the identity address.
>>>>> 
>>>>> switch_ttbr_id() is now reworked to temporarily turn off the MMU
>>>>> before updating the TTBR.
>>>>> 
>>>>> We also need to make sure the helper switch_ttbr() is part of the
>>>>> identity mapping. So move _end_boot past it.
>>>>> 
>>>>> Take the opportunity to instruction cache flush as the operation is
>>>>> only necessary when the memory is updated.
>>>> Your code is actually remove the instruction cache invalidation so
>>>> this sentence is a bit misleading.
>>> 
>>> I forgot to add the word "remove" in the sentence.
>> Ok (my sentence was also wrong by the way)
>>> 
>>>> Also an open question: shouldn’t we flush the data cache ?
>>> Do you mean clean/invalidate to PoC/PoU? Something else?
>> Yes, probably to PoU.
>>> 
>>>> As we switch from one TTBR to an other, there might be some data
>>>> in the cache dependent that could be flushed while the MMU is off
>>> 
>>> I am a bit confused. Those flush could also happen with the MMU on. So how turning off the MMU would result to a problem? Note that the data cache is still enabled during the switch.
>> If the first level of cache is VIPT and we turn off the MMU, I am wondering if this could not create troubles and could require the cache to be flushed before turning the MMU off.
> My reading of the Arm Arm (D5.11.1 "Data and unified caches" ARM DDI 0487F.c) suggests the data cache is always PIPT.

You are right, only the instruction cache is VIPT.
So the problem most probably does not exist.

> 
>> I have no idea if this is a problem or not, just raising the question.
>> I can try to dig on that at Arm when I am back in 10 days.
> 
> Enjoy it!

Thanks
Bertrand

> 
> Cheers,
> 
> -- 
> Julien Grall
Julien Grall April 7, 2022, 3:38 p.m. UTC | #14
Hi,

On 25/03/2022 14:48, Bertrand Marquis wrote:
>> On 25 Mar 2022, at 15:42, Julien Grall <julien@xen.org> wrote:
>>
>> Hi Bertrand,
>>
>> On 25/03/2022 14:35, Bertrand Marquis wrote:
>>>> On 25 Mar 2022, at 15:24, Julien Grall <julien@xen.org> wrote:
>>>> On 25/03/2022 13:47, Bertrand Marquis wrote:
>>>>> Hi Julien,
>>>>
>>>> Hi Bertrand,
>>>>
>>>>>> On 9 Mar 2022, at 12:20, Julien Grall <julien@xen.org> wrote:
>>>>>>
>>>>>> From: Julien Grall <jgrall@amazon.com>
>>>>>>
>>>>>> At the moment, switch_ttbr() is switching the TTBR whilst the MMU is
>>>>>> still on.
>>>>>>
>>>>>> Switching TTBR is like replacing existing mappings with new ones. So
>>>>>> we need to follow the break-before-make sequence.
>>>>>>
>>>>>> In this case, it means the MMU needs to be switched off while the
>>>>>> TTBR is updated. In order to disable the MMU, we need to first
>>>>>> jump to an identity mapping.
>>>>>>
>>>>>> Rename switch_ttbr() to switch_ttbr_id() and create an helper on
>>>>>> top to temporary map the identity mapping and call switch_ttbr()
>>>>>> via the identity address.
>>>>>>
>>>>>> switch_ttbr_id() is now reworked to temporarily turn off the MMU
>>>>>> before updating the TTBR.
>>>>>>
>>>>>> We also need to make sure the helper switch_ttbr() is part of the
>>>>>> identity mapping. So move _end_boot past it.
>>>>>>
>>>>>> Take the opportunity to instruction cache flush as the operation is
>>>>>> only necessary when the memory is updated.
>>>>> Your code is actually remove the instruction cache invalidation so
>>>>> this sentence is a bit misleading.
>>>>
>>>> I forgot to add the word "remove" in the sentence.
>>> Ok (my sentence was also wrong by the way)
>>>>
>>>>> Also an open question: shouldn’t we flush the data cache ?
>>>> Do you mean clean/invalidate to PoC/PoU? Something else?
>>> Yes, probably to PoU.
>>>>
>>>>> As we switch from one TTBR to an other, there might be some data
>>>>> in the cache dependent that could be flushed while the MMU is off
>>>>
>>>> I am a bit confused. Those flush could also happen with the MMU on. So how turning off the MMU would result to a problem? Note that the data cache is still enabled during the switch.
>>> If the first level of cache is VIPT and we turn off the MMU, I am wondering if this could not create troubles and could require the cache to be flushed before turning the MMU off.
>> My reading of the Arm Arm (D5.11.1 "Data and unified caches" ARM DDI 0487F.c) suggests the data cache is always PIPT.
> 
> You are right, only the instruction cache is VIPT.
> So the problem most probably does not exist.

As discussed yesterda, I tweaked a bit switch_ttbr(). Below the version 
I plan to use:

         /* 1) Ensure any previous read/write have completed */
         dsb   sy /* XXX: Can this be a ish? */
         isb

         /* 2) Turn off MMU */
         mrs    x1, SCTLR_EL2
         bic    x1, x1, #SCTLR_Axx_ELx_M
         msr    SCTLR_EL2, x1
         isb

         /*
          * 3) Flush the TLBs.
          * See asm/arm64/flushtlb.h for the explanation of the sequence.
          */
         dsb   nshst
         tlbi  alle2
         dsb   nsh
         isb

         /* 4) Update the TTBR */
         msr   TTBR0_EL2, x0
         isb

         /* 5) Turn on the MMU */
         mrs   x1, SCTLR_EL2
         orr   x1, x1, #SCTLR_Axx_ELx_M  /* Enable MMU */
         msr   SCTLR_EL2, x1
         isb

         ret

Cheers,
Bertrand Marquis April 13, 2022, 2:02 p.m. UTC | #15
Hi Julien,

> On 7 Apr 2022, at 16:38, Julien Grall <julien@xen.org> wrote:
> 
> Hi,
> 
> On 25/03/2022 14:48, Bertrand Marquis wrote:
>>> On 25 Mar 2022, at 15:42, Julien Grall <julien@xen.org> wrote:
>>> 
>>> Hi Bertrand,
>>> 
>>> On 25/03/2022 14:35, Bertrand Marquis wrote:
>>>>> On 25 Mar 2022, at 15:24, Julien Grall <julien@xen.org> wrote:
>>>>> On 25/03/2022 13:47, Bertrand Marquis wrote:
>>>>>> Hi Julien,
>>>>> 
>>>>> Hi Bertrand,
>>>>> 
>>>>>>> On 9 Mar 2022, at 12:20, Julien Grall <julien@xen.org> wrote:
>>>>>>> 
>>>>>>> From: Julien Grall <jgrall@amazon.com>
>>>>>>> 
>>>>>>> At the moment, switch_ttbr() is switching the TTBR whilst the MMU is
>>>>>>> still on.
>>>>>>> 
>>>>>>> Switching TTBR is like replacing existing mappings with new ones. So
>>>>>>> we need to follow the break-before-make sequence.
>>>>>>> 
>>>>>>> In this case, it means the MMU needs to be switched off while the
>>>>>>> TTBR is updated. In order to disable the MMU, we need to first
>>>>>>> jump to an identity mapping.
>>>>>>> 
>>>>>>> Rename switch_ttbr() to switch_ttbr_id() and create an helper on
>>>>>>> top to temporary map the identity mapping and call switch_ttbr()
>>>>>>> via the identity address.
>>>>>>> 
>>>>>>> switch_ttbr_id() is now reworked to temporarily turn off the MMU
>>>>>>> before updating the TTBR.
>>>>>>> 
>>>>>>> We also need to make sure the helper switch_ttbr() is part of the
>>>>>>> identity mapping. So move _end_boot past it.
>>>>>>> 
>>>>>>> Take the opportunity to instruction cache flush as the operation is
>>>>>>> only necessary when the memory is updated.
>>>>>> Your code is actually remove the instruction cache invalidation so
>>>>>> this sentence is a bit misleading.
>>>>> 
>>>>> I forgot to add the word "remove" in the sentence.
>>>> Ok (my sentence was also wrong by the way)
>>>>> 
>>>>>> Also an open question: shouldn’t we flush the data cache ?
>>>>> Do you mean clean/invalidate to PoC/PoU? Something else?
>>>> Yes, probably to PoU.
>>>>> 
>>>>>> As we switch from one TTBR to an other, there might be some data
>>>>>> in the cache dependent that could be flushed while the MMU is off
>>>>> 
>>>>> I am a bit confused. Those flush could also happen with the MMU on. So how turning off the MMU would result to a problem? Note that the data cache is still enabled during the switch.
>>>> If the first level of cache is VIPT and we turn off the MMU, I am wondering if this could not create troubles and could require the cache to be flushed before turning the MMU off.
>>> My reading of the Arm Arm (D5.11.1 "Data and unified caches" ARM DDI 0487F.c) suggests the data cache is always PIPT.
>> You are right, only the instruction cache is VIPT.
>> So the problem most probably does not exist.
> 
> As discussed yesterda, I tweaked a bit switch_ttbr(). Below the version I plan to use:
> 
>        /* 1) Ensure any previous read/write have completed */
>        dsb   sy /* XXX: Can this be a ish? */

I think here “ish” is enough as we only need the domain impacted by the MMU off operation to have his operations done.

For reference this is an extract from the code of trusted firmware before enabling MMU:

        /*
         * Ensure all translation table writes have drained into memory, the TLB
         * invalidation is complete, and translation register writes are
         * committed before enabling the MMU
         */
        dsb ish
        isb


>        isb
> 
>        /* 2) Turn off MMU */
>        mrs    x1, SCTLR_EL2
>        bic    x1, x1, #SCTLR_Axx_ELx_M
>        msr    SCTLR_EL2, x1
>        isb
> 
>        /*
>         * 3) Flush the TLBs.
>         * See asm/arm64/flushtlb.h for the explanation of the sequence.
>         */
>        dsb   nshst
>        tlbi  alle2
>        dsb   nsh
>        isb
> 
>        /* 4) Update the TTBR */
>        msr   TTBR0_EL2, x0
>        isb
> 
>        /* 5) Turn on the MMU */
>        mrs   x1, SCTLR_EL2
>        orr   x1, x1, #SCTLR_Axx_ELx_M  /* Enable MMU */
>        msr   SCTLR_EL2, x1
>        isb

Rest here sounds ok to me.

Cheers
Bertrand

> 
>        ret
> 
> Cheers,
> 
> -- 
> Julien Grall
diff mbox series

Patch

diff --git a/xen/arch/arm/arm64/head.S b/xen/arch/arm/arm64/head.S
index 878649280d73..c5cc72b8fe6f 100644
--- a/xen/arch/arm/arm64/head.S
+++ b/xen/arch/arm/arm64/head.S
@@ -803,36 +803,45 @@  fail:   PRINT("- Boot failed -\r\n")
         b     1b
 ENDPROC(fail)
 
-GLOBAL(_end_boot)
-
 /*
  * Switch TTBR
  *
  * x0    ttbr
  *
- * TODO: This code does not comply with break-before-make.
+ * XXX: Check the barriers
  */
-ENTRY(switch_ttbr)
+ENTRY(switch_ttbr_id)
         dsb   sy                     /* Ensure the flushes happen before
                                       * continuing */
         isb                          /* Ensure synchronization with previous
                                       * changes to text */
+
+        /* Turn off MMU */
+        mrs    x1, SCTLR_EL2
+        bic    x1, x1, #SCTLR_Axx_ELx_M
+        msr    SCTLR_EL2, x1
+        dsb    sy
+        isb
+
         tlbi   alle2                 /* Flush hypervisor TLB */
-        ic     iallu                 /* Flush I-cache */
         dsb    sy                    /* Ensure completion of TLB flush */
         isb
 
-        msr    TTBR0_EL2, x0
+        msr   TTBR0_EL2, x0
+
+        mrs   x1, SCTLR_EL2
+        orr   x1, x1, #SCTLR_Axx_ELx_M  /* Enable MMU */
+        msr   SCTLR_EL2, x1
 
         isb                          /* Ensure synchronization with previous
                                       * changes to text */
-        tlbi   alle2                 /* Flush hypervisor TLB */
-        ic     iallu                 /* Flush I-cache */
-        dsb    sy                    /* Ensure completion of TLB flush */
-        isb
+        /* Turn on the MMU */
+
 
         ret
-ENDPROC(switch_ttbr)
+ENDPROC(switch_ttbr_id)
+
+GLOBAL(_end_boot)
 
 #ifdef CONFIG_EARLY_PRINTK
 /*
diff --git a/xen/arch/arm/mm.c b/xen/arch/arm/mm.c
index 5c4dece16f7f..a53760af7af0 100644
--- a/xen/arch/arm/mm.c
+++ b/xen/arch/arm/mm.c
@@ -660,7 +660,19 @@  static void xen_pt_enforce_wnx(void)
     flush_xen_tlb_local();
 }
 
-extern void switch_ttbr(uint64_t ttbr);
+extern void switch_ttbr_id(uint64_t ttbr);
+
+typedef void (switch_ttbr_fn)(uint64_t ttbr);
+
+static void switch_ttbr(uint64_t ttbr)
+{
+    vaddr_t id_addr = virt_to_maddr(switch_ttbr_id);
+    switch_ttbr_fn *fn = (switch_ttbr_fn *)id_addr;
+
+    update_identity_mapping(true);
+    fn(ttbr);
+    update_identity_mapping(false);
+}
 
 /* Clear a translation table and clean & invalidate the cache */
 static void clear_table(void *table)