diff mbox series

[v2,4/7] xen/arm: page: Clarify the Xen TLBs helpers name

Message ID 20190508161603.21964-5-julien.grall@arm.com (mailing list archive)
State Superseded
Headers show
Series xen/arm: TLB flush helpers rework | expand

Commit Message

Julien Grall May 8, 2019, 4:16 p.m. UTC
Now that we dropped flush_xen_text_tlb_local(), we have only one set of
helpers acting on Xen TLBs. There naming are quite confusing because the
TLB instructions used will act on both Data and Instruction TLBs.

Take the opportunity to rework the documentation that can be confusing
to read as they don't match the implementation.

Lastly, switch from unsigned lont to vaddr_t as the function technically
deal with virtual address.

Signed-off-by: Julien Grall <julien.grall@arm.com>
Reviewed-by: Andrii Anisov <andrii_anisov@epam.com>

---
    Changes in v2:
        - Add Andrii's reviewed-by
---
 xen/arch/arm/mm.c                | 18 +++++++++---------
 xen/include/asm-arm/arm32/page.h | 15 +++++----------
 xen/include/asm-arm/arm64/page.h | 15 +++++----------
 xen/include/asm-arm/page.h       | 28 ++++++++++++++--------------
 4 files changed, 33 insertions(+), 43 deletions(-)

Comments

Stefano Stabellini May 9, 2019, 8:13 p.m. UTC | #1
On Wed, 8 May 2019, Julien Grall wrote:
> Now that we dropped flush_xen_text_tlb_local(), we have only one set of
> helpers acting on Xen TLBs. There naming are quite confusing because the
> TLB instructions used will act on both Data and Instruction TLBs.
> 
> Take the opportunity to rework the documentation that can be confusing
> to read as they don't match the implementation.
> 
> Lastly, switch from unsigned lont to vaddr_t as the function technically
                               ^ long

One comment about the in-code comments below.


> deal with virtual address.
> 
> Signed-off-by: Julien Grall <julien.grall@arm.com>
> Reviewed-by: Andrii Anisov <andrii_anisov@epam.com>
> 
> ---
>     Changes in v2:
>         - Add Andrii's reviewed-by
> ---
>  xen/arch/arm/mm.c                | 18 +++++++++---------
>  xen/include/asm-arm/arm32/page.h | 15 +++++----------
>  xen/include/asm-arm/arm64/page.h | 15 +++++----------
>  xen/include/asm-arm/page.h       | 28 ++++++++++++++--------------
>  4 files changed, 33 insertions(+), 43 deletions(-)
> 
> diff --git a/xen/arch/arm/mm.c b/xen/arch/arm/mm.c
> index dfbe39c70a..8ee828d445 100644
> --- a/xen/arch/arm/mm.c
> +++ b/xen/arch/arm/mm.c
> @@ -335,7 +335,7 @@ void set_fixmap(unsigned map, mfn_t mfn, unsigned int flags)
>      pte.pt.table = 1; /* 4k mappings always have this bit set */
>      pte.pt.xn = 1;
>      write_pte(xen_fixmap + third_table_offset(FIXMAP_ADDR(map)), pte);
> -    flush_xen_data_tlb_range_va(FIXMAP_ADDR(map), PAGE_SIZE);
> +    flush_xen_tlb_range_va(FIXMAP_ADDR(map), PAGE_SIZE);
>  }
>  
>  /* Remove a mapping from a fixmap entry */
> @@ -343,7 +343,7 @@ void clear_fixmap(unsigned map)
>  {
>      lpae_t pte = {0};
>      write_pte(xen_fixmap + third_table_offset(FIXMAP_ADDR(map)), pte);
> -    flush_xen_data_tlb_range_va(FIXMAP_ADDR(map), PAGE_SIZE);
> +    flush_xen_tlb_range_va(FIXMAP_ADDR(map), PAGE_SIZE);
>  }
>  
>  /* Create Xen's mappings of memory.
> @@ -377,7 +377,7 @@ static void __init create_mappings(lpae_t *second,
>          write_pte(p + i, pte);
>          pte.pt.base += 1 << LPAE_SHIFT;
>      }
> -    flush_xen_data_tlb_local();
> +    flush_xen_tlb_local();
>  }
>  
>  #ifdef CONFIG_DOMAIN_PAGE
> @@ -455,7 +455,7 @@ void *map_domain_page(mfn_t mfn)
>       * We may not have flushed this specific subpage at map time,
>       * since we only flush the 4k page not the superpage
>       */
> -    flush_xen_data_tlb_range_va_local(va, PAGE_SIZE);
> +    flush_xen_tlb_range_va_local(va, PAGE_SIZE);
>  
>      return (void *)va;
>  }
> @@ -598,7 +598,7 @@ void __init remove_early_mappings(void)
>      write_pte(xen_second + second_table_offset(BOOT_FDT_VIRT_START), pte);
>      write_pte(xen_second + second_table_offset(BOOT_FDT_VIRT_START + SZ_2M),
>                pte);
> -    flush_xen_data_tlb_range_va(BOOT_FDT_VIRT_START, BOOT_FDT_SLOT_SIZE);
> +    flush_xen_tlb_range_va(BOOT_FDT_VIRT_START, BOOT_FDT_SLOT_SIZE);
>  }
>  
>  /*
> @@ -615,7 +615,7 @@ static void xen_pt_enforce_wnx(void)
>       * before flushing the TLBs.
>       */
>      isb();
> -    flush_xen_data_tlb_local();
> +    flush_xen_tlb_local();
>  }
>  
>  extern void switch_ttbr(uint64_t ttbr);
> @@ -879,7 +879,7 @@ void __init setup_xenheap_mappings(unsigned long base_mfn,
>          vaddr += FIRST_SIZE;
>      }
>  
> -    flush_xen_data_tlb_local();
> +    flush_xen_tlb_local();
>  }
>  #endif
>  
> @@ -1052,7 +1052,7 @@ static int create_xen_entries(enum xenmap_operation op,
>                  BUG();
>          }
>      }
> -    flush_xen_data_tlb_range_va(virt, PAGE_SIZE * nr_mfns);
> +    flush_xen_tlb_range_va(virt, PAGE_SIZE * nr_mfns);
>  
>      rc = 0;
>  
> @@ -1127,7 +1127,7 @@ static void set_pte_flags_on_range(const char *p, unsigned long l, enum mg mg)
>          }
>          write_pte(xen_xenmap + i, pte);
>      }
> -    flush_xen_data_tlb_local();
> +    flush_xen_tlb_local();
>  }
>  
>  /* Release all __init and __initdata ranges to be reused */
> diff --git a/xen/include/asm-arm/arm32/page.h b/xen/include/asm-arm/arm32/page.h
> index 40a77daa9d..0b41b9214b 100644
> --- a/xen/include/asm-arm/arm32/page.h
> +++ b/xen/include/asm-arm/arm32/page.h
> @@ -61,12 +61,8 @@ static inline void invalidate_icache_local(void)
>      isb();                      /* Synchronize fetched instruction stream. */
>  }
>  
> -/*
> - * Flush all hypervisor mappings from the data TLB of the local
> - * processor. This is not sufficient when changing code mappings or
> - * for self modifying code.
> - */
> -static inline void flush_xen_data_tlb_local(void)
> +/* Flush all hypervisor mappings from the TLB of the local processor. */

I realize that the statement "This is not sufficient when changing code
mappings or for self modifying code" is not quite accurate, but I would
prefer not to remove it completely. It would be good to retain a warning
somewhere about IC been needed when changing Xen's own mappings. Maybe
on top of invalidate_icache_local? 


> +static inline void flush_xen_tlb_local(void)
>  {
>      asm volatile("dsb;" /* Ensure preceding are visible */
>                   CMD_CP32(TLBIALLH)
> @@ -76,14 +72,13 @@ static inline void flush_xen_data_tlb_local(void)
>  }
>  
>  /* Flush TLB of local processor for address va. */
> -static inline void __flush_xen_data_tlb_one_local(vaddr_t va)
> +static inline void __flush_xen_tlb_one_local(vaddr_t va)
>  {
>      asm volatile(STORE_CP32(0, TLBIMVAH) : : "r" (va) : "memory");
>  }
>  
> -/* Flush TLB of all processors in the inner-shareable domain for
> - * address va. */
> -static inline void __flush_xen_data_tlb_one(vaddr_t va)
> +/* Flush TLB of all processors in the inner-shareable domain for address va. */
> +static inline void __flush_xen_tlb_one(vaddr_t va)
>  {
>      asm volatile(STORE_CP32(0, TLBIMVAHIS) : : "r" (va) : "memory");
>  }
> diff --git a/xen/include/asm-arm/arm64/page.h b/xen/include/asm-arm/arm64/page.h
> index 6c36d0210f..31d04ecf76 100644
> --- a/xen/include/asm-arm/arm64/page.h
> +++ b/xen/include/asm-arm/arm64/page.h
> @@ -45,12 +45,8 @@ static inline void invalidate_icache_local(void)
>      isb();
>  }
>  
> -/*
> - * Flush all hypervisor mappings from the data TLB of the local
> - * processor. This is not sufficient when changing code mappings or
> - * for self modifying code.
> - */
> -static inline void flush_xen_data_tlb_local(void)
> +/* Flush all hypervisor mappings from the TLB of the local processor. */
> +static inline void flush_xen_tlb_local(void)
>  {
>      asm volatile (
>          "dsb    sy;"                    /* Ensure visibility of PTE writes */
> @@ -61,14 +57,13 @@ static inline void flush_xen_data_tlb_local(void)
>  }
>  
>  /* Flush TLB of local processor for address va. */
> -static inline void  __flush_xen_data_tlb_one_local(vaddr_t va)
> +static inline void  __flush_xen_tlb_one_local(vaddr_t va)
>  {
>      asm volatile("tlbi vae2, %0;" : : "r" (va>>PAGE_SHIFT) : "memory");
>  }
>  
> -/* Flush TLB of all processors in the inner-shareable domain for
> - * address va. */
> -static inline void __flush_xen_data_tlb_one(vaddr_t va)
> +/* Flush TLB of all processors in the inner-shareable domain for address va. */
> +static inline void __flush_xen_tlb_one(vaddr_t va)
>  {
>      asm volatile("tlbi vae2is, %0;" : : "r" (va>>PAGE_SHIFT) : "memory");
>  }
> diff --git a/xen/include/asm-arm/page.h b/xen/include/asm-arm/page.h
> index 1a1713ce02..195345e24a 100644
> --- a/xen/include/asm-arm/page.h
> +++ b/xen/include/asm-arm/page.h
> @@ -234,18 +234,18 @@ static inline int clean_and_invalidate_dcache_va_range
>  } while (0)
>  
>  /*
> - * Flush a range of VA's hypervisor mappings from the data TLB of the
> - * local processor. This is not sufficient when changing code mappings
> - * or for self modifying code.
> + * Flush a range of VA's hypervisor mappings from the TLB of the local
> + * processor.
>   */
> -static inline void flush_xen_data_tlb_range_va_local(unsigned long va,
> -                                                     unsigned long size)
> +static inline void flush_xen_tlb_range_va_local(vaddr_t va,
> +                                                unsigned long size)
>  {
> -    unsigned long end = va + size;
> +    vaddr_t end = va + size;
> +
>      dsb(sy); /* Ensure preceding are visible */
>      while ( va < end )
>      {
> -        __flush_xen_data_tlb_one_local(va);
> +        __flush_xen_tlb_one_local(va);
>          va += PAGE_SIZE;
>      }
>      dsb(sy); /* Ensure completion of the TLB flush */
> @@ -253,18 +253,18 @@ static inline void flush_xen_data_tlb_range_va_local(unsigned long va,
>  }
>  
>  /*
> - * Flush a range of VA's hypervisor mappings from the data TLB of all
> - * processors in the inner-shareable domain. This is not sufficient
> - * when changing code mappings or for self modifying code.
> + * Flush a range of VA's hypervisor mappings from the TLB of all
> + * processors in the inner-shareable domain.
>   */
> -static inline void flush_xen_data_tlb_range_va(unsigned long va,
> -                                               unsigned long size)
> +static inline void flush_xen_tlb_range_va(vaddr_t va,
> +                                          unsigned long size)
>  {
> -    unsigned long end = va + size;
> +    vaddr_t end = va + size;
> +
>      dsb(sy); /* Ensure preceding are visible */
>      while ( va < end )
>      {
> -        __flush_xen_data_tlb_one(va);
> +        __flush_xen_tlb_one(va);
>          va += PAGE_SIZE;
>      }
>      dsb(sy); /* Ensure completion of the TLB flush */
> -- 
> 2.11.0
>
Julien Grall May 9, 2019, 8:32 p.m. UTC | #2
Hi,

On 09/05/2019 21:13, Stefano Stabellini wrote:
> On Wed, 8 May 2019, Julien Grall wrote:
>>   /* Release all __init and __initdata ranges to be reused */
>> diff --git a/xen/include/asm-arm/arm32/page.h b/xen/include/asm-arm/arm32/page.h
>> index 40a77daa9d..0b41b9214b 100644
>> --- a/xen/include/asm-arm/arm32/page.h
>> +++ b/xen/include/asm-arm/arm32/page.h
>> @@ -61,12 +61,8 @@ static inline void invalidate_icache_local(void)
>>       isb();                      /* Synchronize fetched instruction stream. */
>>   }
>>   
>> -/*
>> - * Flush all hypervisor mappings from the data TLB of the local
>> - * processor. This is not sufficient when changing code mappings or
>> - * for self modifying code.
>> - */
>> -static inline void flush_xen_data_tlb_local(void)
>> +/* Flush all hypervisor mappings from the TLB of the local processor. */
> 
> I realize that the statement "This is not sufficient when changing code
> mappings or for self modifying code" is not quite accurate, but I would
> prefer not to remove it completely. It would be good to retain a warning
> somewhere about IC been needed when changing Xen's own mappings. Maybe
> on top of invalidate_icache_local?

Can you please expand in which circumstance you need to invalid the 
instruction cache when changing Xen's own mappings?

Cheers,
Julien Grall May 9, 2019, 9:46 p.m. UTC | #3
Hi,

On 09/05/2019 21:32, Julien Grall wrote:
> Hi,
> 
> On 09/05/2019 21:13, Stefano Stabellini wrote:
>> On Wed, 8 May 2019, Julien Grall wrote:
>>>   /* Release all __init and __initdata ranges to be reused */
>>> diff --git a/xen/include/asm-arm/arm32/page.h 
>>> b/xen/include/asm-arm/arm32/page.h
>>> index 40a77daa9d..0b41b9214b 100644
>>> --- a/xen/include/asm-arm/arm32/page.h
>>> +++ b/xen/include/asm-arm/arm32/page.h
>>> @@ -61,12 +61,8 @@ static inline void invalidate_icache_local(void)
>>>       isb();                      /* Synchronize fetched instruction 
>>> stream. */
>>>   }
>>> -/*
>>> - * Flush all hypervisor mappings from the data TLB of the local
>>> - * processor. This is not sufficient when changing code mappings or
>>> - * for self modifying code.
>>> - */
>>> -static inline void flush_xen_data_tlb_local(void)
>>> +/* Flush all hypervisor mappings from the TLB of the local 
>>> processor. */
>>
>> I realize that the statement "This is not sufficient when changing code
>> mappings or for self modifying code" is not quite accurate, but I would
>> prefer not to remove it completely. It would be good to retain a warning
>> somewhere about IC been needed when changing Xen's own mappings. Maybe
>> on top of invalidate_icache_local?
> 
> Can you please expand in which circumstance you need to invalid the 
> instruction cache when changing Xen's own mappings?

Reading the Armv7 (B3.11.2 in ARM DDI 0406C.c) and Armv8 (D5.11.2 in ARM 
DDI 0487D.a), most of the instruction caches implement the IVIPT 
extension. This means that instruction cache maintenance is required 
only after write new data to a PA that holds instructions (see D5-2522 
in ARM DDI 0487D.a and B3.11.2 in ARM DDI 0406C.c).

The only one that differs with that behavior is ASID and VMID tagged 
VIVT instruction caches which is only present in Armv7 (I can't remember 
why it was dropped in Armv8). Instruction cache maintenance can be 
required when changing the translation of a virtual address to a 
physical address.

There are only a few limited places where Xen mappings can change and a 
instruction cache flush is required (namely livepatch, changing 
permission, free init). All the others are not necessary.

A comment on top of invalidate_icache_local() is not going to help as 
you rely on the developer knows which function to use. The one on top of 
flush tlb helpers is at best misleading without a long explanation. At 
which point, you better read the Arm Arm.

Cheers,
Julien Grall May 10, 2019, 2:38 p.m. UTC | #4
On 09/05/2019 22:46, Julien Grall wrote:
> Hi,
> 
> On 09/05/2019 21:32, Julien Grall wrote:
>> Hi,
>>
>> On 09/05/2019 21:13, Stefano Stabellini wrote:
>>> On Wed, 8 May 2019, Julien Grall wrote:
>>>>   /* Release all __init and __initdata ranges to be reused */
>>>> diff --git a/xen/include/asm-arm/arm32/page.h 
>>>> b/xen/include/asm-arm/arm32/page.h
>>>> index 40a77daa9d..0b41b9214b 100644
>>>> --- a/xen/include/asm-arm/arm32/page.h
>>>> +++ b/xen/include/asm-arm/arm32/page.h
>>>> @@ -61,12 +61,8 @@ static inline void invalidate_icache_local(void)
>>>>       isb();                      /* Synchronize fetched instruction stream. */
>>>>   }
>>>> -/*
>>>> - * Flush all hypervisor mappings from the data TLB of the local
>>>> - * processor. This is not sufficient when changing code mappings or
>>>> - * for self modifying code.
>>>> - */
>>>> -static inline void flush_xen_data_tlb_local(void)
>>>> +/* Flush all hypervisor mappings from the TLB of the local processor. */
>>>
>>> I realize that the statement "This is not sufficient when changing code
>>> mappings or for self modifying code" is not quite accurate, but I would
>>> prefer not to remove it completely. It would be good to retain a warning
>>> somewhere about IC been needed when changing Xen's own mappings. Maybe
>>> on top of invalidate_icache_local?
>>
>> Can you please expand in which circumstance you need to invalid the 
>> instruction cache when changing Xen's own mappings?
> 
> Reading the Armv7 (B3.11.2 in ARM DDI 0406C.c) and Armv8 (D5.11.2 in ARM DDI 
> 0487D.a), most of the instruction caches implement the IVIPT extension. This 
> means that instruction cache maintenance is required only after write new data 
> to a PA that holds instructions (see D5-2522 in ARM DDI 0487D.a and B3.11.2 in 
> ARM DDI 0406C.c).
> 
> The only one that differs with that behavior is ASID and VMID tagged VIVT 
> instruction caches which is only present in Armv7 (I can't remember why it was 
> dropped in Armv8). Instruction cache maintenance can be required when changing 
> the translation of a virtual address to a physical address.

I thought about this a bit more and chat with my team at Arm. Xen on Arm only 
support Cortex-A7, Cortex-A15 and Brahma 15 (see the CPU ID check in arm32/head.S).
	
None of them are actually using VIVT instruction caches. In general, VIVT caches 
are more difficult to deal with because they require more flush. So I would be 
more incline to deny booting Xen on platform where the instruction caches don't 
support IVIVT extension.

I don't think that will have a major impact on the user because of my point above.

Cheers,
Stefano Stabellini May 10, 2019, 5:57 p.m. UTC | #5
On Fri, 10 May 2019, Julien Grall wrote:
> On 09/05/2019 22:46, Julien Grall wrote:
> > Hi,
> > 
> > On 09/05/2019 21:32, Julien Grall wrote:
> > > Hi,
> > > 
> > > On 09/05/2019 21:13, Stefano Stabellini wrote:
> > > > On Wed, 8 May 2019, Julien Grall wrote:
> > > > >   /* Release all __init and __initdata ranges to be reused */
> > > > > diff --git a/xen/include/asm-arm/arm32/page.h
> > > > > b/xen/include/asm-arm/arm32/page.h
> > > > > index 40a77daa9d..0b41b9214b 100644
> > > > > --- a/xen/include/asm-arm/arm32/page.h
> > > > > +++ b/xen/include/asm-arm/arm32/page.h
> > > > > @@ -61,12 +61,8 @@ static inline void invalidate_icache_local(void)
> > > > >       isb();                      /* Synchronize fetched instruction
> > > > > stream. */
> > > > >   }
> > > > > -/*
> > > > > - * Flush all hypervisor mappings from the data TLB of the local
> > > > > - * processor. This is not sufficient when changing code mappings or
> > > > > - * for self modifying code.
> > > > > - */
> > > > > -static inline void flush_xen_data_tlb_local(void)
> > > > > +/* Flush all hypervisor mappings from the TLB of the local processor.
> > > > > */
> > > > 
> > > > I realize that the statement "This is not sufficient when changing code
> > > > mappings or for self modifying code" is not quite accurate, but I would
> > > > prefer not to remove it completely. It would be good to retain a warning
> > > > somewhere about IC been needed when changing Xen's own mappings. Maybe
> > > > on top of invalidate_icache_local?
> > > 
> > > Can you please expand in which circumstance you need to invalid the
> > > instruction cache when changing Xen's own mappings?
> > 
> > Reading the Armv7 (B3.11.2 in ARM DDI 0406C.c) and Armv8 (D5.11.2 in ARM DDI
> > 0487D.a), most of the instruction caches implement the IVIPT extension. This
> > means that instruction cache maintenance is required only after write new
> > data to a PA that holds instructions (see D5-2522 in ARM DDI 0487D.a and
> > B3.11.2 in ARM DDI 0406C.c).
> > 
> > The only one that differs with that behavior is ASID and VMID tagged VIVT
> > instruction caches which is only present in Armv7 (I can't remember why it
> > was dropped in Armv8). Instruction cache maintenance can be required when
> > changing the translation of a virtual address to a physical address.
> 
> I thought about this a bit more and chat with my team at Arm. Xen on Arm only
> support Cortex-A7, Cortex-A15 and Brahma 15 (see the CPU ID check in
> arm32/head.S).
> 	
> None of them are actually using VIVT instruction caches. In general, VIVT
> caches are more difficult to deal with because they require more flush. So I
> would be more incline to deny booting Xen on platform where the instruction
> caches don't support IVIVT extension.
> 
> I don't think that will have a major impact on the user because of my point
> above.

Thanks for looking this up in details. I think there are two interesting
points here:

1) what to do with VIVT
2) what to write in the in-code comment

For 1) I think it would be OK to deny booting. For sure we need at least
a warning. Would you be able to add the warning/boot-denial as part of
this series, or at least an in-code comment?

For 2) I would like this reasonining to be captured somewhere with a
in-code comment, if nothing else as a reference to what to search in
the Arm Arm. I don't know where is the best place for it. If
invalidate_icache_local is not good place for the comment please suggest
a better location.
Julien Grall May 10, 2019, 6:35 p.m. UTC | #6
Hi,

On 10/05/2019 18:57, Stefano Stabellini wrote:
> On Fri, 10 May 2019, Julien Grall wrote:
>> On 09/05/2019 22:46, Julien Grall wrote:
>>> Hi,
>>>
>>> On 09/05/2019 21:32, Julien Grall wrote:
>>>> Hi,
>>>>
>>>> On 09/05/2019 21:13, Stefano Stabellini wrote:
>>>>> On Wed, 8 May 2019, Julien Grall wrote:
>>>>>>    /* Release all __init and __initdata ranges to be reused */
>>>>>> diff --git a/xen/include/asm-arm/arm32/page.h
>>>>>> b/xen/include/asm-arm/arm32/page.h
>>>>>> index 40a77daa9d..0b41b9214b 100644
>>>>>> --- a/xen/include/asm-arm/arm32/page.h
>>>>>> +++ b/xen/include/asm-arm/arm32/page.h
>>>>>> @@ -61,12 +61,8 @@ static inline void invalidate_icache_local(void)
>>>>>>        isb();                      /* Synchronize fetched instruction
>>>>>> stream. */
>>>>>>    }
>>>>>> -/*
>>>>>> - * Flush all hypervisor mappings from the data TLB of the local
>>>>>> - * processor. This is not sufficient when changing code mappings or
>>>>>> - * for self modifying code.
>>>>>> - */
>>>>>> -static inline void flush_xen_data_tlb_local(void)
>>>>>> +/* Flush all hypervisor mappings from the TLB of the local processor.
>>>>>> */
>>>>>
>>>>> I realize that the statement "This is not sufficient when changing code
>>>>> mappings or for self modifying code" is not quite accurate, but I would
>>>>> prefer not to remove it completely. It would be good to retain a warning
>>>>> somewhere about IC been needed when changing Xen's own mappings. Maybe
>>>>> on top of invalidate_icache_local?
>>>>
>>>> Can you please expand in which circumstance you need to invalid the
>>>> instruction cache when changing Xen's own mappings?
>>>
>>> Reading the Armv7 (B3.11.2 in ARM DDI 0406C.c) and Armv8 (D5.11.2 in ARM DDI
>>> 0487D.a), most of the instruction caches implement the IVIPT extension. This
>>> means that instruction cache maintenance is required only after write new
>>> data to a PA that holds instructions (see D5-2522 in ARM DDI 0487D.a and
>>> B3.11.2 in ARM DDI 0406C.c).
>>>
>>> The only one that differs with that behavior is ASID and VMID tagged VIVT
>>> instruction caches which is only present in Armv7 (I can't remember why it
>>> was dropped in Armv8). Instruction cache maintenance can be required when
>>> changing the translation of a virtual address to a physical address.
>>
>> I thought about this a bit more and chat with my team at Arm. Xen on Arm only
>> support Cortex-A7, Cortex-A15 and Brahma 15 (see the CPU ID check in
>> arm32/head.S).
>> 	
>> None of them are actually using VIVT instruction caches. In general, VIVT
>> caches are more difficult to deal with because they require more flush. So I
>> would be more incline to deny booting Xen on platform where the instruction
>> caches don't support IVIVT extension.
>>
>> I don't think that will have a major impact on the user because of my point
>> above.
> 
> Thanks for looking this up in details. I think there are two interesting
> points here:
> 
> 1) what to do with VIVT
> 2) what to write in the in-code comment
> 
> For 1) I think it would be OK to deny booting. For sure we need at least
> a warning. Would you be able to add the warning/boot-denial as part of
> this series, or at least an in-code comment?

I am planning to deny booting Xen on such platforms.

> 
> For 2) I would like this reasonining to be captured somewhere with a
> in-code comment, if nothing else as a reference to what to search in
> the Arm Arm. I don't know where is the best place for it. If
> invalidate_icache_local is not good place for the comment please suggest
> a better location.

I still don't understand what reasoning you want to write. If we don't 
support VIVT then the instruction cache is very easy to maintain. I.e 
"You flush if you modify the instruction".

I am worry that if we overdo the explanation in the code, then you are 
going to confuse more than one person. So it would be better to blank 
out "VIVT" completely from then.

Feel free to suggest an in-code comment so we can discuss on the worthiness.

Cheers,
Stefano Stabellini May 20, 2019, 9:01 p.m. UTC | #7
On Fri, 10 May 2019, Julien Grall wrote:
> On 10/05/2019 18:57, Stefano Stabellini wrote:
> > On Fri, 10 May 2019, Julien Grall wrote:
> >> On 09/05/2019 22:46, Julien Grall wrote:
> >>> Hi,
> >>>
> >>> On 09/05/2019 21:32, Julien Grall wrote:
> >>>> Hi,
> >>>>
> >>>> On 09/05/2019 21:13, Stefano Stabellini wrote:
> >>>>> On Wed, 8 May 2019, Julien Grall wrote:
> >>>>>>    /* Release all __init and __initdata ranges to be reused */
> >>>>>> diff --git a/xen/include/asm-arm/arm32/page.h
> >>>>>> b/xen/include/asm-arm/arm32/page.h
> >>>>>> index 40a77daa9d..0b41b9214b 100644
> >>>>>> --- a/xen/include/asm-arm/arm32/page.h
> >>>>>> +++ b/xen/include/asm-arm/arm32/page.h
> >>>>>> @@ -61,12 +61,8 @@ static inline void invalidate_icache_local(void)
> >>>>>>        isb();                      /* Synchronize fetched instruction
> >>>>>> stream. */
> >>>>>>    }
> >>>>>> -/*
> >>>>>> - * Flush all hypervisor mappings from the data TLB of the local
> >>>>>> - * processor. This is not sufficient when changing code mappings or
> >>>>>> - * for self modifying code.
> >>>>>> - */
> >>>>>> -static inline void flush_xen_data_tlb_local(void)
> >>>>>> +/* Flush all hypervisor mappings from the TLB of the local processor.
> >>>>>> */
> >>>>>
> >>>>> I realize that the statement "This is not sufficient when changing code
> >>>>> mappings or for self modifying code" is not quite accurate, but I would
> >>>>> prefer not to remove it completely. It would be good to retain a warning
> >>>>> somewhere about IC been needed when changing Xen's own mappings. Maybe
> >>>>> on top of invalidate_icache_local?
> >>>>
> >>>> Can you please expand in which circumstance you need to invalid the
> >>>> instruction cache when changing Xen's own mappings?
> >>>
> >>> Reading the Armv7 (B3.11.2 in ARM DDI 0406C.c) and Armv8 (D5.11.2 in ARM DDI
> >>> 0487D.a), most of the instruction caches implement the IVIPT extension. This
> >>> means that instruction cache maintenance is required only after write new
> >>> data to a PA that holds instructions (see D5-2522 in ARM DDI 0487D.a and
> >>> B3.11.2 in ARM DDI 0406C.c).
> >>>
> >>> The only one that differs with that behavior is ASID and VMID tagged VIVT
> >>> instruction caches which is only present in Armv7 (I can't remember why it
> >>> was dropped in Armv8). Instruction cache maintenance can be required when
> >>> changing the translation of a virtual address to a physical address.
> >>
> >> I thought about this a bit more and chat with my team at Arm. Xen on Arm only
> >> support Cortex-A7, Cortex-A15 and Brahma 15 (see the CPU ID check in
> >> arm32/head.S).
> >> 	
> >> None of them are actually using VIVT instruction caches. In general, VIVT
> >> caches are more difficult to deal with because they require more flush. So I
> >> would be more incline to deny booting Xen on platform where the instruction
> >> caches don't support IVIVT extension.
> >>
> >> I don't think that will have a major impact on the user because of my point
> >> above.
> > 
> > Thanks for looking this up in details. I think there are two interesting
> > points here:
> > 
> > 1) what to do with VIVT
> > 2) what to write in the in-code comment
> > 
> > For 1) I think it would be OK to deny booting. For sure we need at least
> > a warning. Would you be able to add the warning/boot-denial as part of
> > this series, or at least an in-code comment?
> 
> I am planning to deny booting Xen on such platforms.
> 
> > 
> > For 2) I would like this reasonining to be captured somewhere with a
> > in-code comment, if nothing else as a reference to what to search in
> > the Arm Arm. I don't know where is the best place for it. If
> > invalidate_icache_local is not good place for the comment please suggest
> > a better location.
> 
> I still don't understand what reasoning you want to write. If we don't 
> support VIVT then the instruction cache is very easy to maintain. I.e 
> "You flush if you modify the instruction".
> 
> I am worry that if we overdo the explanation in the code, then you are 
> going to confuse more than one person. So it would be better to blank 
> out "VIVT" completely from then.
> 
> Feel free to suggest an in-code comment so we can discuss on the worthiness.

I suggest something like the following:

 /* 
  * Flush all hypervisor mappings from the TLB of the local processor. Note
  * that instruction cache maintenance might also be required when self
  * modifying Xen code, see D5-2522 in ARM DDI 0487D.a and B3.11.2 in ARM
  * DDI 0406C.c.
  */
Julien Grall May 20, 2019, 9:59 p.m. UTC | #8
On 20/05/2019 22:01, Stefano Stabellini wrote:
> On Fri, 10 May 2019, Julien Grall wrote:
>> Feel free to suggest an in-code comment so we can discuss on the worthiness.
> 
> I suggest something like the following:
> 
>   /*
>    * Flush all hypervisor mappings from the TLB of the local processor. Note
>    * that instruction cache maintenance might also be required when self
>    * modifying Xen code, see D5-2522 in ARM DDI 0487D.a and B3.11.2 in ARM
>    * DDI 0406C.c.
>    */

This looks quite out-of-context, what is the relation between 
self-modifying code and TLB flush?

Cheers,
Stefano Stabellini June 10, 2019, 8:51 p.m. UTC | #9
On Mon, 20 May 2019, Julien Grall wrote:
> On 20/05/2019 22:01, Stefano Stabellini wrote:
> > On Fri, 10 May 2019, Julien Grall wrote:
> >> Feel free to suggest an in-code comment so we can discuss on the worthiness.
> > 
> > I suggest something like the following:
> > 
> >   /*
> >    * Flush all hypervisor mappings from the TLB of the local processor. Note
> >    * that instruction cache maintenance might also be required when self
> >    * modifying Xen code, see D5-2522 in ARM DDI 0487D.a and B3.11.2 in ARM
> >    * DDI 0406C.c.
> >    */
> 
> This looks quite out-of-context, what is the relation between 
> self-modifying code and TLB flush?

"Flush all hypervisor mappings from the TLB of the local processor" is
the description of the function below (it cannot be seen here but it's
the function on top of which this comment is supposed to be on,
flush_xen_data_tlb_local). The rest of the comment is informative
regarding difficult cases such as self-modifying code, which was present
in the previous version of the code and I would like to retain. The
relation is that there is a good chance you need to do both.
Julien Grall June 10, 2019, 9:03 p.m. UTC | #10
Hi Stefano,

On 6/10/19 9:51 PM, Stefano Stabellini wrote:
> On Mon, 20 May 2019, Julien Grall wrote:
>> On 20/05/2019 22:01, Stefano Stabellini wrote:
>>> On Fri, 10 May 2019, Julien Grall wrote:
>>>> Feel free to suggest an in-code comment so we can discuss on the worthiness.
>>>
>>> I suggest something like the following:
>>>
>>>    /*
>>>     * Flush all hypervisor mappings from the TLB of the local processor. Note
>>>     * that instruction cache maintenance might also be required when self
>>>     * modifying Xen code, see D5-2522 in ARM DDI 0487D.a and B3.11.2 in ARM
>>>     * DDI 0406C.c.
>>>     */
>>
>> This looks quite out-of-context, what is the relation between
>> self-modifying code and TLB flush?
> 
> "Flush all hypervisor mappings from the TLB of the local processor" is
> the description of the function below (it cannot be seen here but it's
> the function on top of which this comment is supposed to be on,
> flush_xen_data_tlb_local). The rest of the comment is informative
> regarding difficult cases such as self-modifying code, which was present
> in the previous version of the code and I would like to retain. The
> relation is that there is a good chance you need to do both.
Sorry but this doesn't make sense to me. You are unlikely going to 
modify mapping when using self-modifying. And if you were, then because 
instructions caches are implementing the IVIPT extension (assuming we 
forbid IVIVT cache as suggested by patch #1 for Arm32) there are no need 
to modifying the cache because the physical address would be different.

All the self-modifying code in Xen (i.e alternative, livepatch) don't 
requires a TLB maintenance. I also can't see when the two would be 
necessary at the same.

Can you please give a concrete example where it would be necessary?

Cheers,
Stefano Stabellini June 11, 2019, 6:15 p.m. UTC | #11
On Mon, 10 Jun 2019, Julien Grall wrote:
> Hi Stefano,
> 
> On 6/10/19 9:51 PM, Stefano Stabellini wrote:
> > On Mon, 20 May 2019, Julien Grall wrote:
> > > On 20/05/2019 22:01, Stefano Stabellini wrote:
> > > > On Fri, 10 May 2019, Julien Grall wrote:
> > > > > Feel free to suggest an in-code comment so we can discuss on the
> > > > > worthiness.
> > > > 
> > > > I suggest something like the following:
> > > > 
> > > >    /*
> > > >     * Flush all hypervisor mappings from the TLB of the local processor.
> > > > Note
> > > >     * that instruction cache maintenance might also be required when
> > > > self
> > > >     * modifying Xen code, see D5-2522 in ARM DDI 0487D.a and B3.11.2 in
> > > > ARM
> > > >     * DDI 0406C.c.
> > > >     */
> > > 
> > > This looks quite out-of-context, what is the relation between
> > > self-modifying code and TLB flush?
> > 
> > "Flush all hypervisor mappings from the TLB of the local processor" is
> > the description of the function below (it cannot be seen here but it's
> > the function on top of which this comment is supposed to be on,
> > flush_xen_data_tlb_local). The rest of the comment is informative
> > regarding difficult cases such as self-modifying code, which was present
> > in the previous version of the code and I would like to retain. The
> > relation is that there is a good chance you need to do both.
> Sorry but this doesn't make sense to me. You are unlikely going to modify
> mapping when using self-modifying. And if you were, then because instructions
> caches are implementing the IVIPT extension (assuming we forbid IVIVT cache as
> suggested by patch #1 for Arm32) there are no need to modifying the cache
> because the physical address would be different.
> 
> All the self-modifying code in Xen (i.e alternative, livepatch) don't requires
> a TLB maintenance. I also can't see when the two would be necessary at the
> same.
> 
> Can you please give a concrete example where it would be necessary?

Given the scarcity of IVIVT platforms out there, the unlikely usefulness
in the IVIPT case, and that this is just a comment, I don't think this
issue is worth spending more time on.

For v3 of the patch:

Acked-by: Stefano Stabellini <sstabellini@kernel.org>
diff mbox series

Patch

diff --git a/xen/arch/arm/mm.c b/xen/arch/arm/mm.c
index dfbe39c70a..8ee828d445 100644
--- a/xen/arch/arm/mm.c
+++ b/xen/arch/arm/mm.c
@@ -335,7 +335,7 @@  void set_fixmap(unsigned map, mfn_t mfn, unsigned int flags)
     pte.pt.table = 1; /* 4k mappings always have this bit set */
     pte.pt.xn = 1;
     write_pte(xen_fixmap + third_table_offset(FIXMAP_ADDR(map)), pte);
-    flush_xen_data_tlb_range_va(FIXMAP_ADDR(map), PAGE_SIZE);
+    flush_xen_tlb_range_va(FIXMAP_ADDR(map), PAGE_SIZE);
 }
 
 /* Remove a mapping from a fixmap entry */
@@ -343,7 +343,7 @@  void clear_fixmap(unsigned map)
 {
     lpae_t pte = {0};
     write_pte(xen_fixmap + third_table_offset(FIXMAP_ADDR(map)), pte);
-    flush_xen_data_tlb_range_va(FIXMAP_ADDR(map), PAGE_SIZE);
+    flush_xen_tlb_range_va(FIXMAP_ADDR(map), PAGE_SIZE);
 }
 
 /* Create Xen's mappings of memory.
@@ -377,7 +377,7 @@  static void __init create_mappings(lpae_t *second,
         write_pte(p + i, pte);
         pte.pt.base += 1 << LPAE_SHIFT;
     }
-    flush_xen_data_tlb_local();
+    flush_xen_tlb_local();
 }
 
 #ifdef CONFIG_DOMAIN_PAGE
@@ -455,7 +455,7 @@  void *map_domain_page(mfn_t mfn)
      * We may not have flushed this specific subpage at map time,
      * since we only flush the 4k page not the superpage
      */
-    flush_xen_data_tlb_range_va_local(va, PAGE_SIZE);
+    flush_xen_tlb_range_va_local(va, PAGE_SIZE);
 
     return (void *)va;
 }
@@ -598,7 +598,7 @@  void __init remove_early_mappings(void)
     write_pte(xen_second + second_table_offset(BOOT_FDT_VIRT_START), pte);
     write_pte(xen_second + second_table_offset(BOOT_FDT_VIRT_START + SZ_2M),
               pte);
-    flush_xen_data_tlb_range_va(BOOT_FDT_VIRT_START, BOOT_FDT_SLOT_SIZE);
+    flush_xen_tlb_range_va(BOOT_FDT_VIRT_START, BOOT_FDT_SLOT_SIZE);
 }
 
 /*
@@ -615,7 +615,7 @@  static void xen_pt_enforce_wnx(void)
      * before flushing the TLBs.
      */
     isb();
-    flush_xen_data_tlb_local();
+    flush_xen_tlb_local();
 }
 
 extern void switch_ttbr(uint64_t ttbr);
@@ -879,7 +879,7 @@  void __init setup_xenheap_mappings(unsigned long base_mfn,
         vaddr += FIRST_SIZE;
     }
 
-    flush_xen_data_tlb_local();
+    flush_xen_tlb_local();
 }
 #endif
 
@@ -1052,7 +1052,7 @@  static int create_xen_entries(enum xenmap_operation op,
                 BUG();
         }
     }
-    flush_xen_data_tlb_range_va(virt, PAGE_SIZE * nr_mfns);
+    flush_xen_tlb_range_va(virt, PAGE_SIZE * nr_mfns);
 
     rc = 0;
 
@@ -1127,7 +1127,7 @@  static void set_pte_flags_on_range(const char *p, unsigned long l, enum mg mg)
         }
         write_pte(xen_xenmap + i, pte);
     }
-    flush_xen_data_tlb_local();
+    flush_xen_tlb_local();
 }
 
 /* Release all __init and __initdata ranges to be reused */
diff --git a/xen/include/asm-arm/arm32/page.h b/xen/include/asm-arm/arm32/page.h
index 40a77daa9d..0b41b9214b 100644
--- a/xen/include/asm-arm/arm32/page.h
+++ b/xen/include/asm-arm/arm32/page.h
@@ -61,12 +61,8 @@  static inline void invalidate_icache_local(void)
     isb();                      /* Synchronize fetched instruction stream. */
 }
 
-/*
- * Flush all hypervisor mappings from the data TLB of the local
- * processor. This is not sufficient when changing code mappings or
- * for self modifying code.
- */
-static inline void flush_xen_data_tlb_local(void)
+/* Flush all hypervisor mappings from the TLB of the local processor. */
+static inline void flush_xen_tlb_local(void)
 {
     asm volatile("dsb;" /* Ensure preceding are visible */
                  CMD_CP32(TLBIALLH)
@@ -76,14 +72,13 @@  static inline void flush_xen_data_tlb_local(void)
 }
 
 /* Flush TLB of local processor for address va. */
-static inline void __flush_xen_data_tlb_one_local(vaddr_t va)
+static inline void __flush_xen_tlb_one_local(vaddr_t va)
 {
     asm volatile(STORE_CP32(0, TLBIMVAH) : : "r" (va) : "memory");
 }
 
-/* Flush TLB of all processors in the inner-shareable domain for
- * address va. */
-static inline void __flush_xen_data_tlb_one(vaddr_t va)
+/* Flush TLB of all processors in the inner-shareable domain for address va. */
+static inline void __flush_xen_tlb_one(vaddr_t va)
 {
     asm volatile(STORE_CP32(0, TLBIMVAHIS) : : "r" (va) : "memory");
 }
diff --git a/xen/include/asm-arm/arm64/page.h b/xen/include/asm-arm/arm64/page.h
index 6c36d0210f..31d04ecf76 100644
--- a/xen/include/asm-arm/arm64/page.h
+++ b/xen/include/asm-arm/arm64/page.h
@@ -45,12 +45,8 @@  static inline void invalidate_icache_local(void)
     isb();
 }
 
-/*
- * Flush all hypervisor mappings from the data TLB of the local
- * processor. This is not sufficient when changing code mappings or
- * for self modifying code.
- */
-static inline void flush_xen_data_tlb_local(void)
+/* Flush all hypervisor mappings from the TLB of the local processor. */
+static inline void flush_xen_tlb_local(void)
 {
     asm volatile (
         "dsb    sy;"                    /* Ensure visibility of PTE writes */
@@ -61,14 +57,13 @@  static inline void flush_xen_data_tlb_local(void)
 }
 
 /* Flush TLB of local processor for address va. */
-static inline void  __flush_xen_data_tlb_one_local(vaddr_t va)
+static inline void  __flush_xen_tlb_one_local(vaddr_t va)
 {
     asm volatile("tlbi vae2, %0;" : : "r" (va>>PAGE_SHIFT) : "memory");
 }
 
-/* Flush TLB of all processors in the inner-shareable domain for
- * address va. */
-static inline void __flush_xen_data_tlb_one(vaddr_t va)
+/* Flush TLB of all processors in the inner-shareable domain for address va. */
+static inline void __flush_xen_tlb_one(vaddr_t va)
 {
     asm volatile("tlbi vae2is, %0;" : : "r" (va>>PAGE_SHIFT) : "memory");
 }
diff --git a/xen/include/asm-arm/page.h b/xen/include/asm-arm/page.h
index 1a1713ce02..195345e24a 100644
--- a/xen/include/asm-arm/page.h
+++ b/xen/include/asm-arm/page.h
@@ -234,18 +234,18 @@  static inline int clean_and_invalidate_dcache_va_range
 } while (0)
 
 /*
- * Flush a range of VA's hypervisor mappings from the data TLB of the
- * local processor. This is not sufficient when changing code mappings
- * or for self modifying code.
+ * Flush a range of VA's hypervisor mappings from the TLB of the local
+ * processor.
  */
-static inline void flush_xen_data_tlb_range_va_local(unsigned long va,
-                                                     unsigned long size)
+static inline void flush_xen_tlb_range_va_local(vaddr_t va,
+                                                unsigned long size)
 {
-    unsigned long end = va + size;
+    vaddr_t end = va + size;
+
     dsb(sy); /* Ensure preceding are visible */
     while ( va < end )
     {
-        __flush_xen_data_tlb_one_local(va);
+        __flush_xen_tlb_one_local(va);
         va += PAGE_SIZE;
     }
     dsb(sy); /* Ensure completion of the TLB flush */
@@ -253,18 +253,18 @@  static inline void flush_xen_data_tlb_range_va_local(unsigned long va,
 }
 
 /*
- * Flush a range of VA's hypervisor mappings from the data TLB of all
- * processors in the inner-shareable domain. This is not sufficient
- * when changing code mappings or for self modifying code.
+ * Flush a range of VA's hypervisor mappings from the TLB of all
+ * processors in the inner-shareable domain.
  */
-static inline void flush_xen_data_tlb_range_va(unsigned long va,
-                                               unsigned long size)
+static inline void flush_xen_tlb_range_va(vaddr_t va,
+                                          unsigned long size)
 {
-    unsigned long end = va + size;
+    vaddr_t end = va + size;
+
     dsb(sy); /* Ensure preceding are visible */
     while ( va < end )
     {
-        __flush_xen_data_tlb_one(va);
+        __flush_xen_tlb_one(va);
         va += PAGE_SIZE;
     }
     dsb(sy); /* Ensure completion of the TLB flush */