diff mbox series

[v3,12/15] arm64/mm: Split __flush_tlb_range() to elide trailing DSB

Message ID 20231204105440.61448-13-ryan.roberts@arm.com (mailing list archive)
State New, archived
Headers show
Series Transparent Contiguous PTEs for User Mappings | expand

Commit Message

Ryan Roberts Dec. 4, 2023, 10:54 a.m. UTC
Split __flush_tlb_range() into __flush_tlb_range_nosync() +
__flush_tlb_range(), in the same way as the existing flush_tlb_page()
arrangement. This allows calling __flush_tlb_range_nosync() to elide the
trailing DSB. Forthcoming "contpte" code will take advantage of this
when clearing the young bit from a contiguous range of ptes.

Signed-off-by: Ryan Roberts <ryan.roberts@arm.com>
---
 arch/arm64/include/asm/tlbflush.h | 13 +++++++++++--
 1 file changed, 11 insertions(+), 2 deletions(-)

Comments

Will Deacon Dec. 12, 2023, 11:35 a.m. UTC | #1
On Mon, Dec 04, 2023 at 10:54:37AM +0000, Ryan Roberts wrote:
> Split __flush_tlb_range() into __flush_tlb_range_nosync() +
> __flush_tlb_range(), in the same way as the existing flush_tlb_page()
> arrangement. This allows calling __flush_tlb_range_nosync() to elide the
> trailing DSB. Forthcoming "contpte" code will take advantage of this
> when clearing the young bit from a contiguous range of ptes.
> 
> Signed-off-by: Ryan Roberts <ryan.roberts@arm.com>
> ---
>  arch/arm64/include/asm/tlbflush.h | 13 +++++++++++--
>  1 file changed, 11 insertions(+), 2 deletions(-)
> 
> diff --git a/arch/arm64/include/asm/tlbflush.h b/arch/arm64/include/asm/tlbflush.h
> index bb2c2833a987..925ef3bdf9ed 100644
> --- a/arch/arm64/include/asm/tlbflush.h
> +++ b/arch/arm64/include/asm/tlbflush.h
> @@ -399,7 +399,7 @@ do {									\
>  #define __flush_s2_tlb_range_op(op, start, pages, stride, tlb_level) \
>  	__flush_tlb_range_op(op, start, pages, stride, 0, tlb_level, false)
>  
> -static inline void __flush_tlb_range(struct vm_area_struct *vma,
> +static inline void __flush_tlb_range_nosync(struct vm_area_struct *vma,
>  				     unsigned long start, unsigned long end,
>  				     unsigned long stride, bool last_level,
>  				     int tlb_level)
> @@ -431,10 +431,19 @@ static inline void __flush_tlb_range(struct vm_area_struct *vma,
>  	else
>  		__flush_tlb_range_op(vae1is, start, pages, stride, asid, tlb_level, true);
>  
> -	dsb(ish);
>  	mmu_notifier_arch_invalidate_secondary_tlbs(vma->vm_mm, start, end);
>  }
>  
> +static inline void __flush_tlb_range(struct vm_area_struct *vma,
> +				     unsigned long start, unsigned long end,
> +				     unsigned long stride, bool last_level,
> +				     int tlb_level)
> +{
> +	__flush_tlb_range_nosync(vma, start, end, stride,
> +				 last_level, tlb_level);
> +	dsb(ish);
> +}

Hmm, are you sure it's safe to defer the DSB until after the secondary TLB
invalidation? It will have a subtle effect on e.g. an SMMU participating
in broadcast TLB maintenance, because now the ATC will be invalidated
before completion of the TLB invalidation and it's not obviously safe to me.

Will
Ryan Roberts Dec. 12, 2023, 11:47 a.m. UTC | #2
On 12/12/2023 11:35, Will Deacon wrote:
> On Mon, Dec 04, 2023 at 10:54:37AM +0000, Ryan Roberts wrote:
>> Split __flush_tlb_range() into __flush_tlb_range_nosync() +
>> __flush_tlb_range(), in the same way as the existing flush_tlb_page()
>> arrangement. This allows calling __flush_tlb_range_nosync() to elide the
>> trailing DSB. Forthcoming "contpte" code will take advantage of this
>> when clearing the young bit from a contiguous range of ptes.
>>
>> Signed-off-by: Ryan Roberts <ryan.roberts@arm.com>
>> ---
>>  arch/arm64/include/asm/tlbflush.h | 13 +++++++++++--
>>  1 file changed, 11 insertions(+), 2 deletions(-)
>>
>> diff --git a/arch/arm64/include/asm/tlbflush.h b/arch/arm64/include/asm/tlbflush.h
>> index bb2c2833a987..925ef3bdf9ed 100644
>> --- a/arch/arm64/include/asm/tlbflush.h
>> +++ b/arch/arm64/include/asm/tlbflush.h
>> @@ -399,7 +399,7 @@ do {									\
>>  #define __flush_s2_tlb_range_op(op, start, pages, stride, tlb_level) \
>>  	__flush_tlb_range_op(op, start, pages, stride, 0, tlb_level, false)
>>  
>> -static inline void __flush_tlb_range(struct vm_area_struct *vma,
>> +static inline void __flush_tlb_range_nosync(struct vm_area_struct *vma,
>>  				     unsigned long start, unsigned long end,
>>  				     unsigned long stride, bool last_level,
>>  				     int tlb_level)
>> @@ -431,10 +431,19 @@ static inline void __flush_tlb_range(struct vm_area_struct *vma,
>>  	else
>>  		__flush_tlb_range_op(vae1is, start, pages, stride, asid, tlb_level, true);
>>  
>> -	dsb(ish);
>>  	mmu_notifier_arch_invalidate_secondary_tlbs(vma->vm_mm, start, end);
>>  }
>>  
>> +static inline void __flush_tlb_range(struct vm_area_struct *vma,
>> +				     unsigned long start, unsigned long end,
>> +				     unsigned long stride, bool last_level,
>> +				     int tlb_level)
>> +{
>> +	__flush_tlb_range_nosync(vma, start, end, stride,
>> +				 last_level, tlb_level);
>> +	dsb(ish);
>> +}
> 
> Hmm, are you sure it's safe to defer the DSB until after the secondary TLB
> invalidation? It will have a subtle effect on e.g. an SMMU participating
> in broadcast TLB maintenance, because now the ATC will be invalidated
> before completion of the TLB invalidation and it's not obviously safe to me.

I'll be honest; I don't know that it's safe. The notifier calls turned up during
a rebase and I stared at it for a while, before eventually concluding that I
should just follow the existing pattern in __flush_tlb_page_nosync(): That one
calls the mmu notifier without the dsb, then flush_tlb_page() does the dsb
after. So I assumed it was safe.

If you think it's not safe, I guess there is a bug to fix in
__flush_tlb_page_nosync()?



> 
> Will
Ryan Roberts Dec. 14, 2023, 11:53 a.m. UTC | #3
Hi Will,

On 12/12/2023 11:47, Ryan Roberts wrote:
> On 12/12/2023 11:35, Will Deacon wrote:
>> On Mon, Dec 04, 2023 at 10:54:37AM +0000, Ryan Roberts wrote:
>>> Split __flush_tlb_range() into __flush_tlb_range_nosync() +
>>> __flush_tlb_range(), in the same way as the existing flush_tlb_page()
>>> arrangement. This allows calling __flush_tlb_range_nosync() to elide the
>>> trailing DSB. Forthcoming "contpte" code will take advantage of this
>>> when clearing the young bit from a contiguous range of ptes.
>>>
>>> Signed-off-by: Ryan Roberts <ryan.roberts@arm.com>
>>> ---
>>>  arch/arm64/include/asm/tlbflush.h | 13 +++++++++++--
>>>  1 file changed, 11 insertions(+), 2 deletions(-)
>>>
>>> diff --git a/arch/arm64/include/asm/tlbflush.h b/arch/arm64/include/asm/tlbflush.h
>>> index bb2c2833a987..925ef3bdf9ed 100644
>>> --- a/arch/arm64/include/asm/tlbflush.h
>>> +++ b/arch/arm64/include/asm/tlbflush.h
>>> @@ -399,7 +399,7 @@ do {									\
>>>  #define __flush_s2_tlb_range_op(op, start, pages, stride, tlb_level) \
>>>  	__flush_tlb_range_op(op, start, pages, stride, 0, tlb_level, false)
>>>  
>>> -static inline void __flush_tlb_range(struct vm_area_struct *vma,
>>> +static inline void __flush_tlb_range_nosync(struct vm_area_struct *vma,
>>>  				     unsigned long start, unsigned long end,
>>>  				     unsigned long stride, bool last_level,
>>>  				     int tlb_level)
>>> @@ -431,10 +431,19 @@ static inline void __flush_tlb_range(struct vm_area_struct *vma,
>>>  	else
>>>  		__flush_tlb_range_op(vae1is, start, pages, stride, asid, tlb_level, true);
>>>  
>>> -	dsb(ish);
>>>  	mmu_notifier_arch_invalidate_secondary_tlbs(vma->vm_mm, start, end);
>>>  }
>>>  
>>> +static inline void __flush_tlb_range(struct vm_area_struct *vma,
>>> +				     unsigned long start, unsigned long end,
>>> +				     unsigned long stride, bool last_level,
>>> +				     int tlb_level)
>>> +{
>>> +	__flush_tlb_range_nosync(vma, start, end, stride,
>>> +				 last_level, tlb_level);
>>> +	dsb(ish);
>>> +}
>>
>> Hmm, are you sure it's safe to defer the DSB until after the secondary TLB
>> invalidation? It will have a subtle effect on e.g. an SMMU participating
>> in broadcast TLB maintenance, because now the ATC will be invalidated
>> before completion of the TLB invalidation and it's not obviously safe to me.
> 
> I'll be honest; I don't know that it's safe. The notifier calls turned up during
> a rebase and I stared at it for a while, before eventually concluding that I
> should just follow the existing pattern in __flush_tlb_page_nosync(): That one
> calls the mmu notifier without the dsb, then flush_tlb_page() does the dsb
> after. So I assumed it was safe.
> 
> If you think it's not safe, I guess there is a bug to fix in
> __flush_tlb_page_nosync()?

Did you have an opinion on this? I'm just putting together a v4 of this series,
and I'll remove this optimization if you think it's unsound. But in that case, I
guess we have an existing bug to fix too?

Thanks,
Ryan


> 
> 
> 
>>
>> Will
>
Will Deacon Dec. 14, 2023, 12:13 p.m. UTC | #4
On Thu, Dec 14, 2023 at 11:53:52AM +0000, Ryan Roberts wrote:
> On 12/12/2023 11:47, Ryan Roberts wrote:
> > On 12/12/2023 11:35, Will Deacon wrote:
> >> On Mon, Dec 04, 2023 at 10:54:37AM +0000, Ryan Roberts wrote:
> >>> diff --git a/arch/arm64/include/asm/tlbflush.h b/arch/arm64/include/asm/tlbflush.h
> >>> index bb2c2833a987..925ef3bdf9ed 100644
> >>> --- a/arch/arm64/include/asm/tlbflush.h
> >>> +++ b/arch/arm64/include/asm/tlbflush.h
> >>> @@ -399,7 +399,7 @@ do {									\
> >>>  #define __flush_s2_tlb_range_op(op, start, pages, stride, tlb_level) \
> >>>  	__flush_tlb_range_op(op, start, pages, stride, 0, tlb_level, false)
> >>>  
> >>> -static inline void __flush_tlb_range(struct vm_area_struct *vma,
> >>> +static inline void __flush_tlb_range_nosync(struct vm_area_struct *vma,
> >>>  				     unsigned long start, unsigned long end,
> >>>  				     unsigned long stride, bool last_level,
> >>>  				     int tlb_level)
> >>> @@ -431,10 +431,19 @@ static inline void __flush_tlb_range(struct vm_area_struct *vma,
> >>>  	else
> >>>  		__flush_tlb_range_op(vae1is, start, pages, stride, asid, tlb_level, true);
> >>>  
> >>> -	dsb(ish);
> >>>  	mmu_notifier_arch_invalidate_secondary_tlbs(vma->vm_mm, start, end);
> >>>  }
> >>>  
> >>> +static inline void __flush_tlb_range(struct vm_area_struct *vma,
> >>> +				     unsigned long start, unsigned long end,
> >>> +				     unsigned long stride, bool last_level,
> >>> +				     int tlb_level)
> >>> +{
> >>> +	__flush_tlb_range_nosync(vma, start, end, stride,
> >>> +				 last_level, tlb_level);
> >>> +	dsb(ish);
> >>> +}
> >>
> >> Hmm, are you sure it's safe to defer the DSB until after the secondary TLB
> >> invalidation? It will have a subtle effect on e.g. an SMMU participating
> >> in broadcast TLB maintenance, because now the ATC will be invalidated
> >> before completion of the TLB invalidation and it's not obviously safe to me.
> > 
> > I'll be honest; I don't know that it's safe. The notifier calls turned up during
> > a rebase and I stared at it for a while, before eventually concluding that I
> > should just follow the existing pattern in __flush_tlb_page_nosync(): That one
> > calls the mmu notifier without the dsb, then flush_tlb_page() does the dsb
> > after. So I assumed it was safe.
> > 
> > If you think it's not safe, I guess there is a bug to fix in
> > __flush_tlb_page_nosync()?
> 
> Did you have an opinion on this? I'm just putting together a v4 of this series,
> and I'll remove this optimization if you think it's unsound. But in that case, I
> guess we have an existing bug to fix too?

Sorry, Ryan, I've not had a chance to look into it in more detail. But as
you rightly point out, you're not introducing the issue (assuming it is
one), so I don't think it needs to hold you up. Your code just makes the
thing more "obvious" to me.

Robin, Jean-Philippe -- do we need to make sure that the SMMU has completed
its TLB invalidation before issuing an ATC invalidate? My half-baked worry
is whether or not an ATS request could refill the ATC before the TLBI
has completed, therefore rendering the ATC invalidation useless.

Will
Robin Murphy Dec. 14, 2023, 12:30 p.m. UTC | #5
On 2023-12-14 12:13 pm, Will Deacon wrote:
> On Thu, Dec 14, 2023 at 11:53:52AM +0000, Ryan Roberts wrote:
>> On 12/12/2023 11:47, Ryan Roberts wrote:
>>> On 12/12/2023 11:35, Will Deacon wrote:
>>>> On Mon, Dec 04, 2023 at 10:54:37AM +0000, Ryan Roberts wrote:
>>>>> diff --git a/arch/arm64/include/asm/tlbflush.h b/arch/arm64/include/asm/tlbflush.h
>>>>> index bb2c2833a987..925ef3bdf9ed 100644
>>>>> --- a/arch/arm64/include/asm/tlbflush.h
>>>>> +++ b/arch/arm64/include/asm/tlbflush.h
>>>>> @@ -399,7 +399,7 @@ do {									\
>>>>>   #define __flush_s2_tlb_range_op(op, start, pages, stride, tlb_level) \
>>>>>   	__flush_tlb_range_op(op, start, pages, stride, 0, tlb_level, false)
>>>>>   
>>>>> -static inline void __flush_tlb_range(struct vm_area_struct *vma,
>>>>> +static inline void __flush_tlb_range_nosync(struct vm_area_struct *vma,
>>>>>   				     unsigned long start, unsigned long end,
>>>>>   				     unsigned long stride, bool last_level,
>>>>>   				     int tlb_level)
>>>>> @@ -431,10 +431,19 @@ static inline void __flush_tlb_range(struct vm_area_struct *vma,
>>>>>   	else
>>>>>   		__flush_tlb_range_op(vae1is, start, pages, stride, asid, tlb_level, true);
>>>>>   
>>>>> -	dsb(ish);
>>>>>   	mmu_notifier_arch_invalidate_secondary_tlbs(vma->vm_mm, start, end);
>>>>>   }
>>>>>   
>>>>> +static inline void __flush_tlb_range(struct vm_area_struct *vma,
>>>>> +				     unsigned long start, unsigned long end,
>>>>> +				     unsigned long stride, bool last_level,
>>>>> +				     int tlb_level)
>>>>> +{
>>>>> +	__flush_tlb_range_nosync(vma, start, end, stride,
>>>>> +				 last_level, tlb_level);
>>>>> +	dsb(ish);
>>>>> +}
>>>>
>>>> Hmm, are you sure it's safe to defer the DSB until after the secondary TLB
>>>> invalidation? It will have a subtle effect on e.g. an SMMU participating
>>>> in broadcast TLB maintenance, because now the ATC will be invalidated
>>>> before completion of the TLB invalidation and it's not obviously safe to me.
>>>
>>> I'll be honest; I don't know that it's safe. The notifier calls turned up during
>>> a rebase and I stared at it for a while, before eventually concluding that I
>>> should just follow the existing pattern in __flush_tlb_page_nosync(): That one
>>> calls the mmu notifier without the dsb, then flush_tlb_page() does the dsb
>>> after. So I assumed it was safe.
>>>
>>> If you think it's not safe, I guess there is a bug to fix in
>>> __flush_tlb_page_nosync()?
>>
>> Did you have an opinion on this? I'm just putting together a v4 of this series,
>> and I'll remove this optimization if you think it's unsound. But in that case, I
>> guess we have an existing bug to fix too?
> 
> Sorry, Ryan, I've not had a chance to look into it in more detail. But as
> you rightly point out, you're not introducing the issue (assuming it is
> one), so I don't think it needs to hold you up. Your code just makes the
> thing more "obvious" to me.
> 
> Robin, Jean-Philippe -- do we need to make sure that the SMMU has completed
> its TLB invalidation before issuing an ATC invalidate? My half-baked worry
> is whether or not an ATS request could refill the ATC before the TLBI
> has completed, therefore rendering the ATC invalidation useless.

I would agree, and the spec for CMD_ATC_INV does call out a 
TLBI->sync->ATCI->sync sequence. At the moment the SVA notifier is 
issuing its own command-based TLBIs anyway so the necessary sync is 
implicit there, but if and when we get BTM support wired up properly it 
would be nice not to have to bodge in an additional sync/DSB.

Cheers,
Robin.
Ryan Roberts Dec. 14, 2023, 2:28 p.m. UTC | #6
On 14/12/2023 12:30, Robin Murphy wrote:
> On 2023-12-14 12:13 pm, Will Deacon wrote:
>> On Thu, Dec 14, 2023 at 11:53:52AM +0000, Ryan Roberts wrote:
>>> On 12/12/2023 11:47, Ryan Roberts wrote:
>>>> On 12/12/2023 11:35, Will Deacon wrote:
>>>>> On Mon, Dec 04, 2023 at 10:54:37AM +0000, Ryan Roberts wrote:
>>>>>> diff --git a/arch/arm64/include/asm/tlbflush.h
>>>>>> b/arch/arm64/include/asm/tlbflush.h
>>>>>> index bb2c2833a987..925ef3bdf9ed 100644
>>>>>> --- a/arch/arm64/include/asm/tlbflush.h
>>>>>> +++ b/arch/arm64/include/asm/tlbflush.h
>>>>>> @@ -399,7 +399,7 @@ do {                                    \
>>>>>>   #define __flush_s2_tlb_range_op(op, start, pages, stride, tlb_level) \
>>>>>>       __flush_tlb_range_op(op, start, pages, stride, 0, tlb_level, false)
>>>>>>   -static inline void __flush_tlb_range(struct vm_area_struct *vma,
>>>>>> +static inline void __flush_tlb_range_nosync(struct vm_area_struct *vma,
>>>>>>                        unsigned long start, unsigned long end,
>>>>>>                        unsigned long stride, bool last_level,
>>>>>>                        int tlb_level)
>>>>>> @@ -431,10 +431,19 @@ static inline void __flush_tlb_range(struct
>>>>>> vm_area_struct *vma,
>>>>>>       else
>>>>>>           __flush_tlb_range_op(vae1is, start, pages, stride, asid,
>>>>>> tlb_level, true);
>>>>>>   -    dsb(ish);
>>>>>>       mmu_notifier_arch_invalidate_secondary_tlbs(vma->vm_mm, start, end);
>>>>>>   }
>>>>>>   +static inline void __flush_tlb_range(struct vm_area_struct *vma,
>>>>>> +                     unsigned long start, unsigned long end,
>>>>>> +                     unsigned long stride, bool last_level,
>>>>>> +                     int tlb_level)
>>>>>> +{
>>>>>> +    __flush_tlb_range_nosync(vma, start, end, stride,
>>>>>> +                 last_level, tlb_level);
>>>>>> +    dsb(ish);
>>>>>> +}
>>>>>
>>>>> Hmm, are you sure it's safe to defer the DSB until after the secondary TLB
>>>>> invalidation? It will have a subtle effect on e.g. an SMMU participating
>>>>> in broadcast TLB maintenance, because now the ATC will be invalidated
>>>>> before completion of the TLB invalidation and it's not obviously safe to me.
>>>>
>>>> I'll be honest; I don't know that it's safe. The notifier calls turned up
>>>> during
>>>> a rebase and I stared at it for a while, before eventually concluding that I
>>>> should just follow the existing pattern in __flush_tlb_page_nosync(): That one
>>>> calls the mmu notifier without the dsb, then flush_tlb_page() does the dsb
>>>> after. So I assumed it was safe.
>>>>
>>>> If you think it's not safe, I guess there is a bug to fix in
>>>> __flush_tlb_page_nosync()?
>>>
>>> Did you have an opinion on this? I'm just putting together a v4 of this series,
>>> and I'll remove this optimization if you think it's unsound. But in that case, I
>>> guess we have an existing bug to fix too?
>>
>> Sorry, Ryan, I've not had a chance to look into it in more detail. But as
>> you rightly point out, you're not introducing the issue (assuming it is
>> one), so I don't think it needs to hold you up. Your code just makes the
>> thing more "obvious" to me.

OK thanks. I'll leave my code as is for now then - that makes it easier to do
A/B performance comparison with the existing code. And I can change it if/when
mainline changes (presumably to add the dsb between the tlbi and the mmu
notifier callback).

>>
>> Robin, Jean-Philippe -- do we need to make sure that the SMMU has completed
>> its TLB invalidation before issuing an ATC invalidate? My half-baked worry
>> is whether or not an ATS request could refill the ATC before the TLBI
>> has completed, therefore rendering the ATC invalidation useless.
> 
> I would agree, and the spec for CMD_ATC_INV does call out a
> TLBI->sync->ATCI->sync sequence. At the moment the SVA notifier is issuing its
> own command-based TLBIs anyway so the necessary sync is implicit there, but if
> and when we get BTM support wired up properly it would be nice not to have to
> bodge in an additional sync/DSB.
> 
> Cheers,
> Robin.
Jean-Philippe Brucker Dec. 14, 2023, 3:22 p.m. UTC | #7
On Thu, Dec 14, 2023 at 12:30:55PM +0000, Robin Murphy wrote:
> > Robin, Jean-Philippe -- do we need to make sure that the SMMU has completed
> > its TLB invalidation before issuing an ATC invalidate? My half-baked worry
> > is whether or not an ATS request could refill the ATC before the TLBI
> > has completed, therefore rendering the ATC invalidation useless.
> 
> I would agree, and the spec for CMD_ATC_INV does call out a
> TLBI->sync->ATCI->sync sequence. At the moment the SVA notifier is issuing
> its own command-based TLBIs anyway so the necessary sync is implicit there,
> but if and when we get BTM support wired up properly it would be nice not to
> have to bodge in an additional sync/DSB.

Yes agreed, with BTM the CPU must call the notifier that issues ATC
invalidation after completing the TLBI+DSB instructions.

SMMU IHI0070F.a  3.9.1 ATS Interface

	Software must ensure that the SMMU TLB invalidation is complete before
	initiating the ATC invalidation.

I'm guessing BTM will be enabled in the SMMU driver sometime soon, given
that there already is one implementation in the wild that could use it. I
think we didn't enable it because of the lack of separation between shared
and private VMIDs, but that may now be solvable with the recent rework of
the VMID allocator.

Thanks,
Jean
Jonathan Cameron Dec. 14, 2023, 4:45 p.m. UTC | #8
On Thu, 14 Dec 2023 15:22:06 +0000
Jean-Philippe Brucker <jean-philippe@linaro.org> wrote:

> On Thu, Dec 14, 2023 at 12:30:55PM +0000, Robin Murphy wrote:
> > > Robin, Jean-Philippe -- do we need to make sure that the SMMU has completed
> > > its TLB invalidation before issuing an ATC invalidate? My half-baked worry
> > > is whether or not an ATS request could refill the ATC before the TLBI
> > > has completed, therefore rendering the ATC invalidation useless.  
> > 
> > I would agree, and the spec for CMD_ATC_INV does call out a
> > TLBI->sync->ATCI->sync sequence. At the moment the SVA notifier is issuing
> > its own command-based TLBIs anyway so the necessary sync is implicit there,
> > but if and when we get BTM support wired up properly it would be nice not to
> > have to bodge in an additional sync/DSB.  
> 
> Yes agreed, with BTM the CPU must call the notifier that issues ATC
> invalidation after completing the TLBI+DSB instructions.
> 
> SMMU IHI0070F.a  3.9.1 ATS Interface
> 
> 	Software must ensure that the SMMU TLB invalidation is complete before
> 	initiating the ATC invalidation.
> 
> I'm guessing BTM will be enabled in the SMMU driver sometime soon, given
> that there already is one implementation in the wild that could use it. I
> think we didn't enable it because of the lack of separation between shared
> and private VMIDs, but that may now be solvable with the recent rework of
> the VMID allocator.
> 

+CC Shameer.  We'll indeed need to fix this when enabling BTM.

Thanks for the heads up.

Jonathan

> Thanks,
> Jean
>
diff mbox series

Patch

diff --git a/arch/arm64/include/asm/tlbflush.h b/arch/arm64/include/asm/tlbflush.h
index bb2c2833a987..925ef3bdf9ed 100644
--- a/arch/arm64/include/asm/tlbflush.h
+++ b/arch/arm64/include/asm/tlbflush.h
@@ -399,7 +399,7 @@  do {									\
 #define __flush_s2_tlb_range_op(op, start, pages, stride, tlb_level) \
 	__flush_tlb_range_op(op, start, pages, stride, 0, tlb_level, false)
 
-static inline void __flush_tlb_range(struct vm_area_struct *vma,
+static inline void __flush_tlb_range_nosync(struct vm_area_struct *vma,
 				     unsigned long start, unsigned long end,
 				     unsigned long stride, bool last_level,
 				     int tlb_level)
@@ -431,10 +431,19 @@  static inline void __flush_tlb_range(struct vm_area_struct *vma,
 	else
 		__flush_tlb_range_op(vae1is, start, pages, stride, asid, tlb_level, true);
 
-	dsb(ish);
 	mmu_notifier_arch_invalidate_secondary_tlbs(vma->vm_mm, start, end);
 }
 
+static inline void __flush_tlb_range(struct vm_area_struct *vma,
+				     unsigned long start, unsigned long end,
+				     unsigned long stride, bool last_level,
+				     int tlb_level)
+{
+	__flush_tlb_range_nosync(vma, start, end, stride,
+				 last_level, tlb_level);
+	dsb(ish);
+}
+
 static inline void flush_tlb_range(struct vm_area_struct *vma,
 				   unsigned long start, unsigned long end)
 {