diff mbox series

[v2,3/6] tlb: mmu_gather: Introduce tlb_gather_mmu_fullmm()

Message ID 20201210121110.10094-4-will@kernel.org (mailing list archive)
State New, archived
Headers show
Series tlb: Fix (soft-)dirty bit management clean up API | expand

Commit Message

Will Deacon Dec. 10, 2020, 12:11 p.m. UTC
Passing the range '0, -1' to tlb_gather_mmu() sets the 'fullmm' flag,
which indicates that the mm_struct being operated on is going away. In
this case, some architectures (such as arm64) can elide TLB invalidation
by ensuring that the TLB tag (ASID) associated with this mm is not
immediately reclaimed. Although this behaviour is documented in
asm-generic/tlb.h, it's subtle and easily missed.

Introduce tlb_gather_mmu_fullmm() to make it clearer that this is for the
entire mm and WARN() if tlb_gather_mmu() is called with the 'fullmm'
address range.

Signed-off-by: Will Deacon <will@kernel.org>
---
 include/asm-generic/tlb.h |  6 ++++--
 include/linux/mm_types.h  |  1 +
 mm/mmap.c                 |  2 +-
 mm/mmu_gather.c           | 16 ++++++++++++++--
 4 files changed, 20 insertions(+), 5 deletions(-)

Comments

Yu Zhao Dec. 10, 2020, 11:44 p.m. UTC | #1
On Thu, Dec 10, 2020 at 12:11:07PM +0000, Will Deacon wrote:
> Passing the range '0, -1' to tlb_gather_mmu() sets the 'fullmm' flag,
> which indicates that the mm_struct being operated on is going away. In
> this case, some architectures (such as arm64) can elide TLB invalidation
> by ensuring that the TLB tag (ASID) associated with this mm is not
> immediately reclaimed. Although this behaviour is documented in
> asm-generic/tlb.h, it's subtle and easily missed.
> 
> Introduce tlb_gather_mmu_fullmm() to make it clearer that this is for the
> entire mm and WARN() if tlb_gather_mmu() is called with the 'fullmm'
> address range.
> 
> Signed-off-by: Will Deacon <will@kernel.org>
> ---
>  include/asm-generic/tlb.h |  6 ++++--
>  include/linux/mm_types.h  |  1 +
>  mm/mmap.c                 |  2 +-
>  mm/mmu_gather.c           | 16 ++++++++++++++--
>  4 files changed, 20 insertions(+), 5 deletions(-)
> 
> diff --git a/include/asm-generic/tlb.h b/include/asm-generic/tlb.h
> index 6661ee1cff47..2c68a545ffa7 100644
> --- a/include/asm-generic/tlb.h
> +++ b/include/asm-generic/tlb.h
> @@ -46,7 +46,9 @@
>   *
>   * The mmu_gather API consists of:
>   *
> - *  - tlb_gather_mmu() / tlb_finish_mmu(); start and finish a mmu_gather
> + *  - tlb_gather_mmu() / tlb_gather_mmu_fullmm() / tlb_finish_mmu()
> + *
> + *    start and finish a mmu_gather
>   *
>   *    Finish in particular will issue a (final) TLB invalidate and free
>   *    all (remaining) queued pages.
> @@ -91,7 +93,7 @@
>   *
>   *  - mmu_gather::fullmm
>   *
> - *    A flag set by tlb_gather_mmu() to indicate we're going to free
> + *    A flag set by tlb_gather_mmu_fullmm() to indicate we're going to free
>   *    the entire mm; this allows a number of optimizations.
>   *
>   *    - We can ignore tlb_{start,end}_vma(); because we don't
> diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
> index 7b90058a62be..42231729affe 100644
> --- a/include/linux/mm_types.h
> +++ b/include/linux/mm_types.h
> @@ -585,6 +585,7 @@ static inline cpumask_t *mm_cpumask(struct mm_struct *mm)
>  struct mmu_gather;
>  extern void tlb_gather_mmu(struct mmu_gather *tlb, struct mm_struct *mm,
>  				unsigned long start, unsigned long end);
> +extern void tlb_gather_mmu_fullmm(struct mmu_gather *tlb, struct mm_struct *mm);
>  extern void tlb_finish_mmu(struct mmu_gather *tlb);
>  
>  static inline void init_tlb_flush_pending(struct mm_struct *mm)
> diff --git a/mm/mmap.c b/mm/mmap.c
> index a3e5854cd01e..cdd3dae6547c 100644
> --- a/mm/mmap.c
> +++ b/mm/mmap.c
> @@ -3214,7 +3214,7 @@ void exit_mmap(struct mm_struct *mm)
>  
>  	lru_add_drain();
>  	flush_cache_mm(mm);
> -	tlb_gather_mmu(&tlb, mm, 0, -1);
> +	tlb_gather_mmu_fullmm(&tlb, mm);
>  	/* update_hiwater_rss(mm) here? but nobody should be looking */
>  	/* Use -1 here to ensure all VMAs in the mm are unmapped */
>  	unmap_vmas(&tlb, vma, 0, -1);
> diff --git a/mm/mmu_gather.c b/mm/mmu_gather.c
> index b0be5a7aa08f..5f5e45d9eb50 100644
> --- a/mm/mmu_gather.c
> +++ b/mm/mmu_gather.c
> @@ -261,8 +261,8 @@ void tlb_flush_mmu(struct mmu_gather *tlb)
>   * respectively when @mm is without users and we're going to destroy
>   * the full address space (exit/execve).
>   */
> -void tlb_gather_mmu(struct mmu_gather *tlb, struct mm_struct *mm,
> -			unsigned long start, unsigned long end)
> +static void __tlb_gather_mmu(struct mmu_gather *tlb, struct mm_struct *mm,
> +			     unsigned long start, unsigned long end)
>  {
>  	tlb->mm = mm;
>  
> @@ -287,6 +287,18 @@ void tlb_gather_mmu(struct mmu_gather *tlb, struct mm_struct *mm,
>  	inc_tlb_flush_pending(tlb->mm);
>  }
>  
> +void tlb_gather_mmu(struct mmu_gather *tlb, struct mm_struct *mm,
> +		    unsigned long start, unsigned long end)
> +{
> +	WARN_ON(!(start | (end + 1))); /* Use _fullmm() instead */
> +	__tlb_gather_mmu(tlb, mm, start, end);
> +}
> +
> +void tlb_gather_mmu_fullmm(struct mmu_gather *tlb, struct mm_struct *mm)
> +{
> +	__tlb_gather_mmu(tlb, mm, 0, -1);
> +}
> +

IMO, there is no point adding the wrappers given you will remove
the WARN_ON in the next patch.

But if you prefer, they should be at least moved to the header for
inlining.

Consider this whole series

Reviewed-by: Yu Zhao <yuzhao@google.com>
diff mbox series

Patch

diff --git a/include/asm-generic/tlb.h b/include/asm-generic/tlb.h
index 6661ee1cff47..2c68a545ffa7 100644
--- a/include/asm-generic/tlb.h
+++ b/include/asm-generic/tlb.h
@@ -46,7 +46,9 @@ 
  *
  * The mmu_gather API consists of:
  *
- *  - tlb_gather_mmu() / tlb_finish_mmu(); start and finish a mmu_gather
+ *  - tlb_gather_mmu() / tlb_gather_mmu_fullmm() / tlb_finish_mmu()
+ *
+ *    start and finish a mmu_gather
  *
  *    Finish in particular will issue a (final) TLB invalidate and free
  *    all (remaining) queued pages.
@@ -91,7 +93,7 @@ 
  *
  *  - mmu_gather::fullmm
  *
- *    A flag set by tlb_gather_mmu() to indicate we're going to free
+ *    A flag set by tlb_gather_mmu_fullmm() to indicate we're going to free
  *    the entire mm; this allows a number of optimizations.
  *
  *    - We can ignore tlb_{start,end}_vma(); because we don't
diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index 7b90058a62be..42231729affe 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -585,6 +585,7 @@  static inline cpumask_t *mm_cpumask(struct mm_struct *mm)
 struct mmu_gather;
 extern void tlb_gather_mmu(struct mmu_gather *tlb, struct mm_struct *mm,
 				unsigned long start, unsigned long end);
+extern void tlb_gather_mmu_fullmm(struct mmu_gather *tlb, struct mm_struct *mm);
 extern void tlb_finish_mmu(struct mmu_gather *tlb);
 
 static inline void init_tlb_flush_pending(struct mm_struct *mm)
diff --git a/mm/mmap.c b/mm/mmap.c
index a3e5854cd01e..cdd3dae6547c 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -3214,7 +3214,7 @@  void exit_mmap(struct mm_struct *mm)
 
 	lru_add_drain();
 	flush_cache_mm(mm);
-	tlb_gather_mmu(&tlb, mm, 0, -1);
+	tlb_gather_mmu_fullmm(&tlb, mm);
 	/* update_hiwater_rss(mm) here? but nobody should be looking */
 	/* Use -1 here to ensure all VMAs in the mm are unmapped */
 	unmap_vmas(&tlb, vma, 0, -1);
diff --git a/mm/mmu_gather.c b/mm/mmu_gather.c
index b0be5a7aa08f..5f5e45d9eb50 100644
--- a/mm/mmu_gather.c
+++ b/mm/mmu_gather.c
@@ -261,8 +261,8 @@  void tlb_flush_mmu(struct mmu_gather *tlb)
  * respectively when @mm is without users and we're going to destroy
  * the full address space (exit/execve).
  */
-void tlb_gather_mmu(struct mmu_gather *tlb, struct mm_struct *mm,
-			unsigned long start, unsigned long end)
+static void __tlb_gather_mmu(struct mmu_gather *tlb, struct mm_struct *mm,
+			     unsigned long start, unsigned long end)
 {
 	tlb->mm = mm;
 
@@ -287,6 +287,18 @@  void tlb_gather_mmu(struct mmu_gather *tlb, struct mm_struct *mm,
 	inc_tlb_flush_pending(tlb->mm);
 }
 
+void tlb_gather_mmu(struct mmu_gather *tlb, struct mm_struct *mm,
+		    unsigned long start, unsigned long end)
+{
+	WARN_ON(!(start | (end + 1))); /* Use _fullmm() instead */
+	__tlb_gather_mmu(tlb, mm, start, end);
+}
+
+void tlb_gather_mmu_fullmm(struct mmu_gather *tlb, struct mm_struct *mm)
+{
+	__tlb_gather_mmu(tlb, mm, 0, -1);
+}
+
 /**
  * tlb_finish_mmu - finish an mmu_gather structure
  * @tlb: the mmu_gather structure to finish