diff mbox series

[v2,5/6] mm: tlb: Provide flush_*_tlb_range wrappers

Message ID 20200423135656.2712-6-yezhenyu2@huawei.com (mailing list archive)
State New, archived
Headers show
Series arm64: tlb: add support for TTL feature | expand

Commit Message

Zhenyu Ye April 23, 2020, 1:56 p.m. UTC
This patch provides flush_{pte|pmd|pud|p4d}_tlb_range() in generic
code, which are expressed through the mmu_gather APIs.  These
interface set tlb->cleared_* and finally call tlb_flush(), so we
can do the tlb invalidation according to the information in
struct mmu_gather.

Signed-off-by: Zhenyu Ye <yezhenyu2@huawei.com>
---
 include/asm-generic/pgtable.h | 12 ++++++++++--
 mm/pgtable-generic.c          | 22 ++++++++++++++++++++++
 2 files changed, 32 insertions(+), 2 deletions(-)

Comments

Catalin Marinas May 22, 2020, 3:42 p.m. UTC | #1
On Thu, Apr 23, 2020 at 09:56:55PM +0800, Zhenyu Ye wrote:
> diff --git a/mm/pgtable-generic.c b/mm/pgtable-generic.c
> index 3d7c01e76efc..3eff199d3507 100644
> --- a/mm/pgtable-generic.c
> +++ b/mm/pgtable-generic.c
> @@ -101,6 +101,28 @@ pte_t ptep_clear_flush(struct vm_area_struct *vma, unsigned long address,
>  
>  #ifdef CONFIG_TRANSPARENT_HUGEPAGE
>  
> +#ifndef __HAVE_ARCH_FLUSH_PMD_TLB_RANGE
> +
> +#define FLUSH_Pxx_TLB_RANGE(_pxx)					\
> +void flush_##_pxx##_tlb_range(struct vm_area_struct *vma,		\
> +			      unsigned long addr, unsigned long end)	\
> +{									\
> +		struct mmu_gather tlb;					\
> +									\
> +		tlb_gather_mmu(&tlb, vma->vm_mm, addr, end);		\
> +		tlb_start_vma(&tlb, vma);				\
> +		tlb_flush_##_pxx##_range(&tlb, addr, end - addr);	\
> +		tlb_end_vma(&tlb, vma);					\
> +		tlb_finish_mmu(&tlb, addr, end);			\
> +}

I may have confused myself (flush_p??_tlb_* vs. tlb_flush_p??_*) but do
actually need this whole tlb_gather thing here? IIUC (by grep'ing),
flush_p?d_tlb_range() is only called on huge pages, so we should know
the level already.
Zhenyu Ye May 25, 2020, 7:19 a.m. UTC | #2
On 2020/5/22 23:42, Catalin Marinas wrote:
> On Thu, Apr 23, 2020 at 09:56:55PM +0800, Zhenyu Ye wrote:
>> diff --git a/mm/pgtable-generic.c b/mm/pgtable-generic.c
>> index 3d7c01e76efc..3eff199d3507 100644
>> --- a/mm/pgtable-generic.c
>> +++ b/mm/pgtable-generic.c
>> @@ -101,6 +101,28 @@ pte_t ptep_clear_flush(struct vm_area_struct *vma, unsigned long address,
>>  
>>  #ifdef CONFIG_TRANSPARENT_HUGEPAGE
>>  
>> +#ifndef __HAVE_ARCH_FLUSH_PMD_TLB_RANGE
>> +
>> +#define FLUSH_Pxx_TLB_RANGE(_pxx)					\
>> +void flush_##_pxx##_tlb_range(struct vm_area_struct *vma,		\
>> +			      unsigned long addr, unsigned long end)	\
>> +{									\
>> +		struct mmu_gather tlb;					\
>> +									\
>> +		tlb_gather_mmu(&tlb, vma->vm_mm, addr, end);		\
>> +		tlb_start_vma(&tlb, vma);				\
>> +		tlb_flush_##_pxx##_range(&tlb, addr, end - addr);	\
>> +		tlb_end_vma(&tlb, vma);					\
>> +		tlb_finish_mmu(&tlb, addr, end);			\
>> +}
> 
> I may have confused myself (flush_p??_tlb_* vs. tlb_flush_p??_*) but do
> actually need this whole tlb_gather thing here? IIUC (by grep'ing),
> flush_p?d_tlb_range() is only called on huge pages, so we should know
> the level already.
> 

tlb_flush_##_pxx##_range() is used to set tlb->cleared_*,
flush_##_pxx##_tlb_range() will actually flush the TLB entry.

In arch64, tlb_flush_p?d_range() is defined as:

	#define flush_pmd_tlb_range(vma, addr, end)	flush_tlb_range(vma, addr, end)
	#define flush_pud_tlb_range(vma, addr, end)	flush_tlb_range(vma, addr, end)

So even if we know the level here, we can not pass the value to tlbi
instructions (flush_tlb_range() is a common kernel interface and retro-fit it
needs lots of changes), according to Peter's suggestion, I finally decide to
pass the value of TTL by the tlb_gather_* frame.[1]

[1] https://lore.kernel.org/linux-arm-kernel/20200331142927.1237-1-yezhenyu2@huawei.com/

Thanks,
Zhenyu
Catalin Marinas May 26, 2020, 2:52 p.m. UTC | #3
On Mon, May 25, 2020 at 03:19:42PM +0800, Zhenyu Ye wrote:
> On 2020/5/22 23:42, Catalin Marinas wrote:
> > On Thu, Apr 23, 2020 at 09:56:55PM +0800, Zhenyu Ye wrote:
> >> diff --git a/mm/pgtable-generic.c b/mm/pgtable-generic.c
> >> index 3d7c01e76efc..3eff199d3507 100644
> >> --- a/mm/pgtable-generic.c
> >> +++ b/mm/pgtable-generic.c
> >> @@ -101,6 +101,28 @@ pte_t ptep_clear_flush(struct vm_area_struct *vma, unsigned long address,
> >>  
> >>  #ifdef CONFIG_TRANSPARENT_HUGEPAGE
> >>  
> >> +#ifndef __HAVE_ARCH_FLUSH_PMD_TLB_RANGE
> >> +
> >> +#define FLUSH_Pxx_TLB_RANGE(_pxx)					\
> >> +void flush_##_pxx##_tlb_range(struct vm_area_struct *vma,		\
> >> +			      unsigned long addr, unsigned long end)	\
> >> +{									\
> >> +		struct mmu_gather tlb;					\
> >> +									\
> >> +		tlb_gather_mmu(&tlb, vma->vm_mm, addr, end);		\
> >> +		tlb_start_vma(&tlb, vma);				\
> >> +		tlb_flush_##_pxx##_range(&tlb, addr, end - addr);	\
> >> +		tlb_end_vma(&tlb, vma);					\
> >> +		tlb_finish_mmu(&tlb, addr, end);			\
> >> +}
> > 
> > I may have confused myself (flush_p??_tlb_* vs. tlb_flush_p??_*) but do
> > actually need this whole tlb_gather thing here? IIUC (by grep'ing),
> > flush_p?d_tlb_range() is only called on huge pages, so we should know
> > the level already.
> 
> tlb_flush_##_pxx##_range() is used to set tlb->cleared_*,
> flush_##_pxx##_tlb_range() will actually flush the TLB entry.
> 
> In arch64, tlb_flush_p?d_range() is defined as:
> 
> 	#define flush_pmd_tlb_range(vma, addr, end)	flush_tlb_range(vma, addr, end)
> 	#define flush_pud_tlb_range(vma, addr, end)	flush_tlb_range(vma, addr, end)

Currently, flush_p??_tlb_range() are generic and defined as above. I
think in the generic code they can remain an alias for
flush_tlb_range().

On arm64, we can redefine them as:

#define flush_pte_tlb_range(vma, addr, end)	__flush_tlb_range(vma, addr, end, 3)
#define flush_pmd_tlb_range(vma, addr, end)	__flush_tlb_range(vma, addr, end, 2)
#define flush_pud_tlb_range(vma, addr, end)	__flush_tlb_range(vma, addr, end, 1)
#define flush_p4d_tlb_range(vma, addr, end)	__flush_tlb_range(vma, addr, end, 0)

(unless the compiler optimises away all the mmu_gather stuff in your
macro above but they don't look trivial to me)

Also, I don't see the new flush_pte_* and flush_p4d_* macros used
anywhere and I don't think they are needed. The pte equivalent is
flush_tlb_page() (we need to make sure it's not used on a pmd in the
hugetlb context).

> So even if we know the level here, we can not pass the value to tlbi
> instructions (flush_tlb_range() is a common kernel interface and retro-fit it
> needs lots of changes), according to Peter's suggestion, I finally decide to
> pass the value of TTL by the tlb_gather_* frame.[1]

My comment was about the generic implementation using mmu_gather as you
are proposing. We don't need to change the flush_tlb_range() interface,
nor do we need to rewrite flush_p??_tlb_range().
Zhenyu Ye May 30, 2020, 10:24 a.m. UTC | #4
Hi Catalin,

Sorry for taking so long to reply to you.

On 2020/5/26 22:52, Catalin Marinas wrote:
> On Mon, May 25, 2020 at 03:19:42PM +0800, Zhenyu Ye wrote:
>>
>> tlb_flush_##_pxx##_range() is used to set tlb->cleared_*,
>> flush_##_pxx##_tlb_range() will actually flush the TLB entry.
>>
>> In arch64, tlb_flush_p?d_range() is defined as:
>>
>> 	#define flush_pmd_tlb_range(vma, addr, end)	flush_tlb_range(vma, addr, end)
>> 	#define flush_pud_tlb_range(vma, addr, end)	flush_tlb_range(vma, addr, end)
> 
> Currently, flush_p??_tlb_range() are generic and defined as above. I
> think in the generic code they can remain an alias for
> flush_tlb_range().
> 
> On arm64, we can redefine them as:
> 
> #define flush_pte_tlb_range(vma, addr, end)	__flush_tlb_range(vma, addr, end, 3)
> #define flush_pmd_tlb_range(vma, addr, end)	__flush_tlb_range(vma, addr, end, 2)
> #define flush_pud_tlb_range(vma, addr, end)	__flush_tlb_range(vma, addr, end, 1)
> #define flush_p4d_tlb_range(vma, addr, end)	__flush_tlb_range(vma, addr, end, 0)
> 
> (unless the compiler optimises away all the mmu_gather stuff in your
> macro above but they don't look trivial to me)
> 

I changed generic code before considering that other structures may also
use this feature, such as Power9. And Peter may want to replace all
flush_tlb_range() by tlb_flush() in the future, see [1] for details.

If only enable this feature on aarch64, your codes are better.

[1] https://lore.kernel.org/linux-arm-kernel/20200402163849.GM20713@hirez.programming.kicks-ass.net/

> Also, I don't see the new flush_pte_* and flush_p4d_* macros used
> anywhere and I don't think they are needed. The pte equivalent is
> flush_tlb_page() (we need to make sure it's not used on a pmd in the
> hugetlb context).
> 

flush_tlb_page() is used to flush only one page.  If we add the flush_pte_tlb_range(),
then we can use it to flush a range of pages in the future.

But flush_pte_* and flush_p4d_* macros are really not used anywhere.
I will remove them in next version of series, and add them if someone needs.

>> So even if we know the level here, we can not pass the value to tlbi
>> instructions (flush_tlb_range() is a common kernel interface and retro-fit it
>> needs lots of changes), according to Peter's suggestion, I finally decide to
>> pass the value of TTL by the tlb_gather_* frame.[1]
> 
> My comment was about the generic implementation using mmu_gather as you
> are proposing. We don't need to change the flush_tlb_range() interface,
> nor do we need to rewrite flush_p??_tlb_range().
> 

Thanks,
Zhenyu
Catalin Marinas June 1, 2020, 11:56 a.m. UTC | #5
Hi Zhenyu,

On Sat, May 30, 2020 at 06:24:21PM +0800, Zhenyu Ye wrote:
> On 2020/5/26 22:52, Catalin Marinas wrote:
> > On Mon, May 25, 2020 at 03:19:42PM +0800, Zhenyu Ye wrote:
> >> tlb_flush_##_pxx##_range() is used to set tlb->cleared_*,
> >> flush_##_pxx##_tlb_range() will actually flush the TLB entry.
> >>
> >> In arch64, tlb_flush_p?d_range() is defined as:
> >>
> >> 	#define flush_pmd_tlb_range(vma, addr, end)	flush_tlb_range(vma, addr, end)
> >> 	#define flush_pud_tlb_range(vma, addr, end)	flush_tlb_range(vma, addr, end)
> > 
> > Currently, flush_p??_tlb_range() are generic and defined as above. I
> > think in the generic code they can remain an alias for
> > flush_tlb_range().
> > 
> > On arm64, we can redefine them as:
> > 
> > #define flush_pte_tlb_range(vma, addr, end)	__flush_tlb_range(vma, addr, end, 3)
> > #define flush_pmd_tlb_range(vma, addr, end)	__flush_tlb_range(vma, addr, end, 2)
> > #define flush_pud_tlb_range(vma, addr, end)	__flush_tlb_range(vma, addr, end, 1)
> > #define flush_p4d_tlb_range(vma, addr, end)	__flush_tlb_range(vma, addr, end, 0)
> > 
> > (unless the compiler optimises away all the mmu_gather stuff in your
> > macro above but they don't look trivial to me)
> 
> I changed generic code before considering that other structures may also
> use this feature, such as Power9. And Peter may want to replace all
> flush_tlb_range() by tlb_flush() in the future, see [1] for details.
> 
> If only enable this feature on aarch64, your codes are better.
> 
> [1] https://lore.kernel.org/linux-arm-kernel/20200402163849.GM20713@hirez.programming.kicks-ass.net/

But we change the semantics slightly if we implement these as
mmu_gather. For example, tlb_end_vma() -> tlb_flush_mmu_tlbonly() ends
up calling mmu_notifier_invalidate_range() which it didn't before. I
think we end up invoking the notifier unnecessarily in some cases (see
the comment in __split_huge_pmd()) or we end up calling the notifier
twice (e.g. pmdp_huge_clear_flush_notify()).

> > Also, I don't see the new flush_pte_* and flush_p4d_* macros used
> > anywhere and I don't think they are needed. The pte equivalent is
> > flush_tlb_page() (we need to make sure it's not used on a pmd in the
> > hugetlb context).
> 
> flush_tlb_page() is used to flush only one page.  If we add the
> flush_pte_tlb_range(), then we can use it to flush a range of pages in
> the future.

If we know flush_tlb_page() is only called on a small page, could we add
TTL information here as well?

> But flush_pte_* and flush_p4d_* macros are really not used anywhere. I
> will remove them in next version of series, and add them if someone
> needs.

I think it makes sense.
Zhenyu Ye June 1, 2020, 1:36 p.m. UTC | #6
On 2020/6/1 19:56, Catalin Marinas wrote:
> Hi Zhenyu,
> 
> On Sat, May 30, 2020 at 06:24:21PM +0800, Zhenyu Ye wrote:
>> On 2020/5/26 22:52, Catalin Marinas wrote:
>>> On Mon, May 25, 2020 at 03:19:42PM +0800, Zhenyu Ye wrote:
>>>> tlb_flush_##_pxx##_range() is used to set tlb->cleared_*,
>>>> flush_##_pxx##_tlb_range() will actually flush the TLB entry.
>>>>
>>>> In arch64, tlb_flush_p?d_range() is defined as:
>>>>
>>>> 	#define flush_pmd_tlb_range(vma, addr, end)	flush_tlb_range(vma, addr, end)
>>>> 	#define flush_pud_tlb_range(vma, addr, end)	flush_tlb_range(vma, addr, end)
>>>
>>> Currently, flush_p??_tlb_range() are generic and defined as above. I
>>> think in the generic code they can remain an alias for
>>> flush_tlb_range().
>>>
>>> On arm64, we can redefine them as:
>>>
>>> #define flush_pte_tlb_range(vma, addr, end)	__flush_tlb_range(vma, addr, end, 3)
>>> #define flush_pmd_tlb_range(vma, addr, end)	__flush_tlb_range(vma, addr, end, 2)
>>> #define flush_pud_tlb_range(vma, addr, end)	__flush_tlb_range(vma, addr, end, 1)
>>> #define flush_p4d_tlb_range(vma, addr, end)	__flush_tlb_range(vma, addr, end, 0)
>>>
>>> (unless the compiler optimises away all the mmu_gather stuff in your
>>> macro above but they don't look trivial to me)
>>
>> I changed generic code before considering that other structures may also
>> use this feature, such as Power9. And Peter may want to replace all
>> flush_tlb_range() by tlb_flush() in the future, see [1] for details.
>>
>> If only enable this feature on aarch64, your codes are better.
>>
>> [1] https://lore.kernel.org/linux-arm-kernel/20200402163849.GM20713@hirez.programming.kicks-ass.net/
> 
> But we change the semantics slightly if we implement these as
> mmu_gather. For example, tlb_end_vma() -> tlb_flush_mmu_tlbonly() ends
> up calling mmu_notifier_invalidate_range() which it didn't before. I
> think we end up invoking the notifier unnecessarily in some cases (see
> the comment in __split_huge_pmd()) or we end up calling the notifier
> twice (e.g. pmdp_huge_clear_flush_notify()).
> 

Yes, so only enable this feature on aarch64 may be better.
I will change this in V4 of this series. [the v3 only has some minor
changes and can be ignored :)]

>>> Also, I don't see the new flush_pte_* and flush_p4d_* macros used
>>> anywhere and I don't think they are needed. The pte equivalent is
>>> flush_tlb_page() (we need to make sure it's not used on a pmd in the
>>> hugetlb context).
>>
>> flush_tlb_page() is used to flush only one page.  If we add the
>> flush_pte_tlb_range(), then we can use it to flush a range of pages in
>> the future.
> 
> If we know flush_tlb_page() is only called on a small page, could we add
> TTL information here as well?
> 

Yes, we could. I will add this in flush_tlb_page().

>> But flush_pte_* and flush_p4d_* macros are really not used anywhere. I
>> will remove them in next version of series, and add them if someone
>> needs.
> 
> I think it makes sense.
> 

Thanks,
Zhenyu
diff mbox series

Patch

diff --git a/include/asm-generic/pgtable.h b/include/asm-generic/pgtable.h
index 329b8c8ca703..8c92122ded9b 100644
--- a/include/asm-generic/pgtable.h
+++ b/include/asm-generic/pgtable.h
@@ -1161,11 +1161,19 @@  static inline int pmd_free_pte_page(pmd_t *pmd, unsigned long addr)
  * invalidate the entire TLB which is not desitable.
  * e.g. see arch/arc: flush_pmd_tlb_range
  */
-#define flush_pmd_tlb_range(vma, addr, end)	flush_tlb_range(vma, addr, end)
-#define flush_pud_tlb_range(vma, addr, end)	flush_tlb_range(vma, addr, end)
+extern void flush_pte_tlb_range(struct vm_area_struct *vma,
+				unsigned long addr, unsigned long end);
+extern void flush_pmd_tlb_range(struct vm_area_struct *vma,
+				unsigned long addr, unsigned long end);
+extern void flush_pud_tlb_range(struct vm_area_struct *vma,
+				unsigned long addr, unsigned long end);
+extern void flush_p4d_tlb_range(struct vm_area_struct *vma,
+				unsigned long addr, unsigned long end);
 #else
+#define flush_pte_tlb_range(vma, addr, end)	BUILD_BUG()
 #define flush_pmd_tlb_range(vma, addr, end)	BUILD_BUG()
 #define flush_pud_tlb_range(vma, addr, end)	BUILD_BUG()
+#define flush_p4d_tlb_range(vma, addr, end)	BUILD_BUG()
 #endif
 #endif
 
diff --git a/mm/pgtable-generic.c b/mm/pgtable-generic.c
index 3d7c01e76efc..3eff199d3507 100644
--- a/mm/pgtable-generic.c
+++ b/mm/pgtable-generic.c
@@ -101,6 +101,28 @@  pte_t ptep_clear_flush(struct vm_area_struct *vma, unsigned long address,
 
 #ifdef CONFIG_TRANSPARENT_HUGEPAGE
 
+#ifndef __HAVE_ARCH_FLUSH_PMD_TLB_RANGE
+
+#define FLUSH_Pxx_TLB_RANGE(_pxx)					\
+void flush_##_pxx##_tlb_range(struct vm_area_struct *vma,		\
+			      unsigned long addr, unsigned long end)	\
+{									\
+		struct mmu_gather tlb;					\
+									\
+		tlb_gather_mmu(&tlb, vma->vm_mm, addr, end);		\
+		tlb_start_vma(&tlb, vma);				\
+		tlb_flush_##_pxx##_range(&tlb, addr, end - addr);	\
+		tlb_end_vma(&tlb, vma);					\
+		tlb_finish_mmu(&tlb, addr, end);			\
+}
+
+FLUSH_Pxx_TLB_RANGE(pte)
+FLUSH_Pxx_TLB_RANGE(pmd)
+FLUSH_Pxx_TLB_RANGE(pud)
+FLUSH_Pxx_TLB_RANGE(p4d)
+
+#endif /* __HAVE_ARCH_FLUSH_PMD_TLB_RANGE */
+
 #ifndef __HAVE_ARCH_PMDP_SET_ACCESS_FLAGS
 int pmdp_set_access_flags(struct vm_area_struct *vma,
 			  unsigned long address, pmd_t *pmdp,